xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision f76f6674d8221f59f9e515e3cc03ad07fa72fe46)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 static bool compareDistinctValues(QualType Type, Value &Val1,
54                                   const Environment &Env1, Value &Val2,
55                                   const Environment &Env2,
56                                   Environment::ValueModel &Model) {
57   // Note: Potentially costly, but, for booleans, we could check whether both
58   // can be proven equivalent in their respective environments.
59 
60   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
61   // and implement separate, join/widen specific handling for
62   // reference/pointers.
63   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
64   case ComparisonResult::Same:
65     return true;
66   case ComparisonResult::Different:
67     return false;
68   case ComparisonResult::Unknown:
69     switch (Val1.getKind()) {
70     case Value::Kind::Integer:
71     case Value::Kind::Pointer:
72     case Value::Kind::Struct:
73       // FIXME: this choice intentionally introduces unsoundness to allow
74       // for convergence. Once we have widening support for the
75       // reference/pointer and struct built-in models, this should be
76       // `false`.
77       return true;
78     default:
79       return false;
80     }
81   }
82   llvm_unreachable("All cases covered in switch");
83 }
84 
85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
86 /// respectively, of the same type `Type`. Merging generally produces a single
87 /// value that (soundly) approximates the two inputs, although the actual
88 /// meaning depends on `Model`.
89 static Value *mergeDistinctValues(QualType Type, Value &Val1,
90                                   const Environment &Env1, Value &Val2,
91                                   const Environment &Env2,
92                                   Environment &MergedEnv,
93                                   Environment::ValueModel &Model) {
94   // Join distinct boolean values preserving information about the constraints
95   // in the respective path conditions.
96   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
97     // FIXME: Checking both values should be unnecessary, since they should have
98     // a consistent shape.  However, right now we can end up with BoolValue's in
99     // integer-typed variables due to our incorrect handling of
100     // boolean-to-integer casts (we just propagate the BoolValue to the result
101     // of the cast). So, a join can encounter an integer in one branch but a
102     // bool in the other.
103     // For example:
104     // ```
105     // std::optional<bool> o;
106     // int x;
107     // if (o.has_value())
108     //   x = o.value();
109     // ```
110     auto &Expr1 = cast<BoolValue>(Val1).formula();
111     auto &Expr2 = cast<BoolValue>(Val2).formula();
112     auto &A = MergedEnv.arena();
113     auto &MergedVal = A.makeAtomRef(A.makeAtom());
114     MergedEnv.addToFlowCondition(
115         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
116                            A.makeEquals(MergedVal, Expr1)),
117                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
118                            A.makeEquals(MergedVal, Expr2))));
119     return &A.makeBoolValue(MergedVal);
120   }
121 
122   Value *MergedVal = nullptr;
123   if (auto *StructVal1 = dyn_cast<StructValue>(&Val1)) {
124     [[maybe_unused]] auto *StructVal2 = cast<StructValue>(&Val2);
125 
126     // Values to be merged are always associated with the same location in
127     // `LocToVal`. The location stored in `StructVal` should therefore also
128     // be the same.
129     assert(&StructVal1->getAggregateLoc() == &StructVal2->getAggregateLoc());
130 
131     // `StructVal1` and `StructVal2` may have different properties associated
132     // with them. Create a new `StructValue` without any properties so that we
133     // soundly approximate both values. If a particular analysis needs to merge
134     // properties, it should do so in `DataflowAnalysis::merge()`.
135     MergedVal = &MergedEnv.create<StructValue>(StructVal1->getAggregateLoc());
136   } else {
137     MergedVal = MergedEnv.createValue(Type);
138   }
139 
140   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
141   // returns false to avoid storing unneeded values in `DACtx`.
142   if (MergedVal)
143     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
144       return MergedVal;
145 
146   return nullptr;
147 }
148 
149 // When widening does not change `Current`, return value will equal `&Prev`.
150 static Value &widenDistinctValues(QualType Type, Value &Prev,
151                                   const Environment &PrevEnv, Value &Current,
152                                   Environment &CurrentEnv,
153                                   Environment::ValueModel &Model) {
154   // Boolean-model widening.
155   if (isa<BoolValue>(&Prev)) {
156     assert(isa<BoolValue>(Current));
157     // Widen to Top, because we know they are different values. If previous was
158     // already Top, re-use that to (implicitly) indicate that no change occured.
159     if (isa<TopBoolValue>(Prev))
160       return Prev;
161     return CurrentEnv.makeTopBoolValue();
162   }
163 
164   // FIXME: Add other built-in model widening.
165 
166   // Custom-model widening.
167   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
168     return *W;
169 
170   // Default of widening is a no-op: leave the current value unchanged.
171   return Current;
172 }
173 
174 /// Initializes a global storage value.
175 static void insertIfGlobal(const Decl &D,
176                            llvm::DenseSet<const VarDecl *> &Vars) {
177   if (auto *V = dyn_cast<VarDecl>(&D))
178     if (V->hasGlobalStorage())
179       Vars.insert(V);
180 }
181 
182 static void insertIfFunction(const Decl &D,
183                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
184   if (auto *FD = dyn_cast<FunctionDecl>(&D))
185     Funcs.insert(FD);
186 }
187 
188 static void
189 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
190                          llvm::DenseSet<const VarDecl *> &Vars,
191                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
192   insertIfGlobal(D, Vars);
193   insertIfFunction(D, Funcs);
194   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
195     for (const auto *B : Decomp->bindings())
196       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
197         // FIXME: should we be using `E->getFoundDecl()`?
198         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
199           Fields.insert(FD);
200 }
201 
202 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
203 /// global variables and functions that are declared in or referenced from
204 /// sub-statements.
205 static void
206 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
207                          llvm::DenseSet<const VarDecl *> &Vars,
208                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
209   for (auto *Child : S.children())
210     if (Child != nullptr)
211       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
212   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
213     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
214 
215   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
216     if (DS->isSingleDecl())
217       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
218     else
219       for (auto *D : DS->getDeclGroup())
220         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
221   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
222     insertIfGlobal(*E->getDecl(), Vars);
223     insertIfFunction(*E->getDecl(), Funcs);
224   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
225     // FIXME: should we be using `E->getFoundDecl()`?
226     const ValueDecl *VD = E->getMemberDecl();
227     insertIfGlobal(*VD, Vars);
228     insertIfFunction(*VD, Funcs);
229     if (const auto *FD = dyn_cast<FieldDecl>(VD))
230       Fields.insert(FD);
231   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
232     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
233       for (const auto *FD : getFieldsForInitListExpr(RD))
234         Fields.insert(FD);
235   }
236 }
237 
238 // FIXME: Add support for resetting globals after function calls to enable
239 // the implementation of sound analyses.
240 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
241   assert(FuncDecl->getBody() != nullptr);
242 
243   FieldSet Fields;
244   llvm::DenseSet<const VarDecl *> Vars;
245   llvm::DenseSet<const FunctionDecl *> Funcs;
246 
247   // Look for global variable and field references in the
248   // constructor-initializers.
249   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
250     for (const auto *Init : CtorDecl->inits()) {
251       if (Init->isMemberInitializer()) {
252         Fields.insert(Init->getMember());
253       } else if (Init->isIndirectMemberInitializer()) {
254         for (const auto *I : Init->getIndirectMember()->chain())
255           Fields.insert(cast<FieldDecl>(I));
256       }
257       const Expr *E = Init->getInit();
258       assert(E != nullptr);
259       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
260     }
261     // Add all fields mentioned in default member initializers.
262     for (const FieldDecl *F : CtorDecl->getParent()->fields())
263       if (const auto *I = F->getInClassInitializer())
264           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
265   }
266   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
267 
268   // These have to be added before the lines that follow to ensure that
269   // `create*` work correctly for structs.
270   DACtx->addModeledFields(Fields);
271 
272   for (const VarDecl *D : Vars) {
273     if (getStorageLocation(*D) != nullptr)
274       continue;
275 
276     setStorageLocation(*D, createObject(*D));
277   }
278 
279   for (const FunctionDecl *FD : Funcs) {
280     if (getStorageLocation(*FD) != nullptr)
281       continue;
282     auto &Loc = createStorageLocation(FD->getType());
283     setStorageLocation(*FD, Loc);
284   }
285 }
286 
287 Environment::Environment(DataflowAnalysisContext &DACtx)
288     : DACtx(&DACtx),
289       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
290 
291 Environment Environment::fork() const {
292   Environment Copy(*this);
293   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
294   return Copy;
295 }
296 
297 Environment::Environment(DataflowAnalysisContext &DACtx,
298                          const DeclContext &DeclCtx)
299     : Environment(DACtx) {
300   CallStack.push_back(&DeclCtx);
301 
302   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
303     assert(FuncDecl->getBody() != nullptr);
304 
305     initFieldsGlobalsAndFuncs(FuncDecl);
306 
307     for (const auto *ParamDecl : FuncDecl->parameters()) {
308       assert(ParamDecl != nullptr);
309       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
310     }
311   }
312 
313   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
314     auto *Parent = MethodDecl->getParent();
315     assert(Parent != nullptr);
316     if (Parent->isLambda())
317       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
318 
319     // FIXME: Initialize the ThisPointeeLoc of lambdas too.
320     if (MethodDecl && !MethodDecl->isStatic()) {
321       QualType ThisPointeeType = MethodDecl->getThisObjectType();
322       ThisPointeeLoc =
323           &cast<StructValue>(createValue(ThisPointeeType))->getAggregateLoc();
324     }
325   }
326 }
327 
328 bool Environment::canDescend(unsigned MaxDepth,
329                              const DeclContext *Callee) const {
330   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
331 }
332 
333 Environment Environment::pushCall(const CallExpr *Call) const {
334   Environment Env(*this);
335 
336   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
337     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
338       if (!isa<CXXThisExpr>(Arg))
339           Env.ThisPointeeLoc =
340               cast<AggregateStorageLocation>(getStorageLocationStrict(*Arg));
341       // Otherwise (when the argument is `this`), retain the current
342       // environment's `ThisPointeeLoc`.
343     }
344   }
345 
346   Env.pushCallInternal(Call->getDirectCallee(),
347                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
348 
349   return Env;
350 }
351 
352 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
353   Environment Env(*this);
354 
355   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
356 
357   Env.pushCallInternal(Call->getConstructor(),
358                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
359 
360   return Env;
361 }
362 
363 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
364                                    ArrayRef<const Expr *> Args) {
365   // Canonicalize to the definition of the function. This ensures that we're
366   // putting arguments into the same `ParamVarDecl`s` that the callee will later
367   // be retrieving them from.
368   assert(FuncDecl->getDefinition() != nullptr);
369   FuncDecl = FuncDecl->getDefinition();
370 
371   CallStack.push_back(FuncDecl);
372 
373   initFieldsGlobalsAndFuncs(FuncDecl);
374 
375   const auto *ParamIt = FuncDecl->param_begin();
376 
377   // FIXME: Parameters don't always map to arguments 1:1; examples include
378   // overloaded operators implemented as member functions, and parameter packs.
379   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
380     assert(ParamIt != FuncDecl->param_end());
381     const VarDecl *Param = *ParamIt;
382     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
383   }
384 }
385 
386 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
387   // We ignore `DACtx` because it's already the same in both. We don't want the
388   // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't
389   // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later
390   // analyze the same callee in a different context, and `setStorageLocation`
391   // requires there to not already be a storage location assigned. Conceptually,
392   // these maps capture information from the local scope, so when popping that
393   // scope, we do not propagate the maps.
394   this->LocToVal = std::move(CalleeEnv.LocToVal);
395   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
396 
397   if (Call->isGLValue()) {
398     if (CalleeEnv.ReturnLoc != nullptr)
399       setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc);
400   } else if (!Call->getType()->isVoidType()) {
401     if (CalleeEnv.ReturnVal != nullptr)
402       setValueStrict(*Call, *CalleeEnv.ReturnVal);
403   }
404 }
405 
406 void Environment::popCall(const CXXConstructExpr *Call,
407                           const Environment &CalleeEnv) {
408   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
409   this->LocToVal = std::move(CalleeEnv.LocToVal);
410   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
411 
412   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
413     setValueStrict(*Call, *Val);
414   }
415 }
416 
417 bool Environment::equivalentTo(const Environment &Other,
418                                Environment::ValueModel &Model) const {
419   assert(DACtx == Other.DACtx);
420 
421   if (ReturnVal != Other.ReturnVal)
422     return false;
423 
424   if (ReturnLoc != Other.ReturnLoc)
425     return false;
426 
427   if (ThisPointeeLoc != Other.ThisPointeeLoc)
428     return false;
429 
430   if (DeclToLoc != Other.DeclToLoc)
431     return false;
432 
433   if (ExprToLoc != Other.ExprToLoc)
434     return false;
435 
436   // Compare the contents for the intersection of their domains.
437   for (auto &Entry : LocToVal) {
438     const StorageLocation *Loc = Entry.first;
439     assert(Loc != nullptr);
440 
441     Value *Val = Entry.second;
442     assert(Val != nullptr);
443 
444     auto It = Other.LocToVal.find(Loc);
445     if (It == Other.LocToVal.end())
446       continue;
447     assert(It->second != nullptr);
448 
449     if (!areEquivalentValues(*Val, *It->second) &&
450         !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other,
451                                Model))
452       return false;
453   }
454 
455   return true;
456 }
457 
458 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
459                                      Environment::ValueModel &Model) {
460   assert(DACtx == PrevEnv.DACtx);
461   assert(ReturnVal == PrevEnv.ReturnVal);
462   assert(ReturnLoc == PrevEnv.ReturnLoc);
463   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
464   assert(CallStack == PrevEnv.CallStack);
465 
466   auto Effect = LatticeJoinEffect::Unchanged;
467 
468   // By the API, `PrevEnv` is a previous version of the environment for the same
469   // block, so we have some guarantees about its shape. In particular, it will
470   // be the result of a join or widen operation on previous values for this
471   // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are
472   // subsets of the maps in `PrevEnv`. So, as long as we maintain this property
473   // here, we don't need change their current values to widen.
474   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
475   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
476 
477   llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
478   for (auto &Entry : LocToVal) {
479     const StorageLocation *Loc = Entry.first;
480     assert(Loc != nullptr);
481 
482     Value *Val = Entry.second;
483     assert(Val != nullptr);
484 
485     auto PrevIt = PrevEnv.LocToVal.find(Loc);
486     if (PrevIt == PrevEnv.LocToVal.end())
487       continue;
488     assert(PrevIt->second != nullptr);
489 
490     if (areEquivalentValues(*Val, *PrevIt->second)) {
491       WidenedLocToVal.insert({Loc, Val});
492       continue;
493     }
494 
495     Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second,
496                                             PrevEnv, *Val, *this, Model);
497     WidenedLocToVal.insert({Loc, &WidenedVal});
498     if (&WidenedVal != PrevIt->second)
499       Effect = LatticeJoinEffect::Changed;
500   }
501   LocToVal = std::move(WidenedLocToVal);
502   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
503       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
504       LocToVal.size() != PrevEnv.LocToVal.size())
505     Effect = LatticeJoinEffect::Changed;
506 
507   return Effect;
508 }
509 
510 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
511                               Environment::ValueModel &Model) {
512   assert(EnvA.DACtx == EnvB.DACtx);
513   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
514   assert(EnvA.CallStack == EnvB.CallStack);
515 
516   Environment JoinedEnv(*EnvA.DACtx);
517 
518   JoinedEnv.CallStack = EnvA.CallStack;
519   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
520 
521   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
522     // `ReturnVal` might not always get set -- for example if we have a return
523     // statement of the form `return some_other_func()` and we decide not to
524     // analyze `some_other_func()`.
525     // In this case, we can't say anything about the joined return value -- we
526     // don't simply want to propagate the return value that we do have, because
527     // it might not be the correct one.
528     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
529     JoinedEnv.ReturnVal = nullptr;
530   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
531     JoinedEnv.ReturnVal = EnvA.ReturnVal;
532   } else {
533     assert(!EnvA.CallStack.empty());
534     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
535     // cast.
536     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
537     assert(Func != nullptr);
538     if (Value *MergedVal =
539             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
540                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
541       JoinedEnv.ReturnVal = MergedVal;
542   }
543 
544   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
545     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
546   else
547     JoinedEnv.ReturnLoc = nullptr;
548 
549   // FIXME: Once we're able to remove declarations from `DeclToLoc` when their
550   // lifetime ends, add an assertion that there aren't any entries in
551   // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
552   // different storage locations.
553   JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
554 
555   JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
556 
557   // FIXME: update join to detect backedges and simplify the flow condition
558   // accordingly.
559   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
560       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
561 
562   for (auto &Entry : EnvA.LocToVal) {
563     const StorageLocation *Loc = Entry.first;
564     assert(Loc != nullptr);
565 
566     Value *Val = Entry.second;
567     assert(Val != nullptr);
568 
569     auto It = EnvB.LocToVal.find(Loc);
570     if (It == EnvB.LocToVal.end())
571       continue;
572     assert(It->second != nullptr);
573 
574     if (areEquivalentValues(*Val, *It->second)) {
575       JoinedEnv.LocToVal.insert({Loc, Val});
576       continue;
577     }
578 
579     if (Value *MergedVal = mergeDistinctValues(
580             Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) {
581       JoinedEnv.LocToVal.insert({Loc, MergedVal});
582     }
583   }
584 
585   return JoinedEnv;
586 }
587 
588 StorageLocation &Environment::createStorageLocation(QualType Type) {
589   return DACtx->createStorageLocation(Type);
590 }
591 
592 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
593   // Evaluated declarations are always assigned the same storage locations to
594   // ensure that the environment stabilizes across loop iterations. Storage
595   // locations for evaluated declarations are stored in the analysis context.
596   return DACtx->getStableStorageLocation(D);
597 }
598 
599 StorageLocation &Environment::createStorageLocation(const Expr &E) {
600   // Evaluated expressions are always assigned the same storage locations to
601   // ensure that the environment stabilizes across loop iterations. Storage
602   // locations for evaluated expressions are stored in the analysis context.
603   return DACtx->getStableStorageLocation(E);
604 }
605 
606 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
607   assert(!DeclToLoc.contains(&D));
608   DeclToLoc[&D] = &Loc;
609 }
610 
611 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
612   auto It = DeclToLoc.find(&D);
613   if (It == DeclToLoc.end())
614     return nullptr;
615 
616   StorageLocation *Loc = It->second;
617 
618   return Loc;
619 }
620 
621 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
622   const Expr &CanonE = ignoreCFGOmittedNodes(E);
623   assert(!ExprToLoc.contains(&CanonE));
624   ExprToLoc[&CanonE] = &Loc;
625 }
626 
627 void Environment::setStorageLocationStrict(const Expr &E,
628                                            StorageLocation &Loc) {
629   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
630   // but we still want to be able to associate a `StorageLocation` with them,
631   // so allow these as an exception.
632   assert(E.isGLValue() ||
633          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
634   setStorageLocation(E, Loc);
635 }
636 
637 StorageLocation *Environment::getStorageLocation(const Expr &E,
638                                                  SkipPast SP) const {
639   // FIXME: Add a test with parens.
640   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
641   return It == ExprToLoc.end() ? nullptr : &*It->second;
642 }
643 
644 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const {
645   // See comment in `setStorageLocationStrict()`.
646   assert(E.isGLValue() ||
647          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
648   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
649 
650   if (Loc == nullptr)
651     return nullptr;
652 
653   return Loc;
654 }
655 
656 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const {
657   return ThisPointeeLoc;
658 }
659 
660 AggregateStorageLocation &
661 Environment::getResultObjectLocation(const Expr &RecordPRValue) {
662   assert(RecordPRValue.getType()->isRecordType());
663   assert(RecordPRValue.isPRValue());
664 
665   if (StorageLocation *ExistingLoc =
666           getStorageLocation(RecordPRValue, SkipPast::None))
667     return *cast<AggregateStorageLocation>(ExistingLoc);
668   auto &Loc = cast<AggregateStorageLocation>(
669       DACtx->getStableStorageLocation(RecordPRValue));
670   setStorageLocation(RecordPRValue, Loc);
671   return Loc;
672 }
673 
674 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
675   return DACtx->getOrCreateNullPointerValue(PointeeType);
676 }
677 
678 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
679   assert(!isa<StructValue>(&Val) ||
680          &cast<StructValue>(&Val)->getAggregateLoc() == &Loc);
681 
682   LocToVal[&Loc] = &Val;
683 }
684 
685 void Environment::setValueStrict(const Expr &E, Value &Val) {
686   assert(E.isPRValue());
687 
688   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
689     if ([[maybe_unused]] auto *ExistingVal =
690             cast_or_null<StructValue>(getValue(E)))
691       assert(&ExistingVal->getAggregateLoc() == &StructVal->getAggregateLoc());
692     if ([[maybe_unused]] StorageLocation *ExistingLoc =
693             getStorageLocation(E, SkipPast::None))
694       assert(ExistingLoc == &StructVal->getAggregateLoc());
695     else
696       setStorageLocation(E, StructVal->getAggregateLoc());
697     setValue(StructVal->getAggregateLoc(), Val);
698     return;
699   }
700 
701   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
702   if (Loc == nullptr) {
703     Loc = &createStorageLocation(E);
704     setStorageLocation(E, *Loc);
705   }
706   setValue(*Loc, Val);
707 }
708 
709 Value *Environment::getValue(const StorageLocation &Loc) const {
710   return LocToVal.lookup(&Loc);
711 }
712 
713 Value *Environment::getValue(const ValueDecl &D) const {
714   auto *Loc = getStorageLocation(D);
715   if (Loc == nullptr)
716     return nullptr;
717   return getValue(*Loc);
718 }
719 
720 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
721   auto *Loc = getStorageLocation(E, SP);
722   if (Loc == nullptr)
723     return nullptr;
724   return getValue(*Loc);
725 }
726 
727 Value *Environment::getValueStrict(const Expr &E) const {
728   assert(E.isPRValue());
729   return getValue(E);
730 }
731 
732 Value *Environment::createValue(QualType Type) {
733   llvm::DenseSet<QualType> Visited;
734   int CreatedValuesCount = 0;
735   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
736                                                 CreatedValuesCount);
737   if (CreatedValuesCount > MaxCompositeValueSize) {
738     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
739                  << '\n';
740   }
741   return Val;
742 }
743 
744 Value *Environment::createValueUnlessSelfReferential(
745     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
746     int &CreatedValuesCount) {
747   assert(!Type.isNull());
748   assert(!Type->isReferenceType());
749 
750   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
751   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
752       Depth > MaxCompositeValueDepth)
753     return nullptr;
754 
755   if (Type->isBooleanType()) {
756     CreatedValuesCount++;
757     return &makeAtomicBoolValue();
758   }
759 
760   if (Type->isIntegerType()) {
761     // FIXME: consider instead `return nullptr`, given that we do nothing useful
762     // with integers, and so distinguishing them serves no purpose, but could
763     // prevent convergence.
764     CreatedValuesCount++;
765     return &arena().create<IntegerValue>();
766   }
767 
768   if (Type->isPointerType()) {
769     CreatedValuesCount++;
770     QualType PointeeType = Type->getPointeeType();
771     StorageLocation &PointeeLoc =
772         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
773 
774     return &arena().create<PointerValue>(PointeeLoc);
775   }
776 
777   if (Type->isRecordType()) {
778     CreatedValuesCount++;
779     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
780     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
781       assert(Field != nullptr);
782 
783       QualType FieldType = Field->getType();
784 
785       FieldLocs.insert(
786           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
787                                           CreatedValuesCount)});
788     }
789 
790     AggregateStorageLocation &Loc =
791         arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs));
792     StructValue &StructVal = create<StructValue>(Loc);
793 
794     // As we already have a storage location for the `StructValue`, we can and
795     // should associate them in the environment.
796     setValue(Loc, StructVal);
797 
798     return &StructVal;
799   }
800 
801   return nullptr;
802 }
803 
804 StorageLocation &
805 Environment::createLocAndMaybeValue(QualType Ty,
806                                     llvm::DenseSet<QualType> &Visited,
807                                     int Depth, int &CreatedValuesCount) {
808   if (!Visited.insert(Ty.getCanonicalType()).second)
809     return createStorageLocation(Ty.getNonReferenceType());
810   Value *Val = createValueUnlessSelfReferential(
811       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
812   Visited.erase(Ty.getCanonicalType());
813 
814   Ty = Ty.getNonReferenceType();
815 
816   if (Val == nullptr)
817     return createStorageLocation(Ty);
818 
819   if (Ty->isRecordType())
820     return cast<StructValue>(Val)->getAggregateLoc();
821 
822   StorageLocation &Loc = createStorageLocation(Ty);
823   setValue(Loc, *Val);
824   return Loc;
825 }
826 
827 StorageLocation &Environment::createObjectInternal(const VarDecl *D,
828                                                    QualType Ty,
829                                                    const Expr *InitExpr) {
830   if (Ty->isReferenceType()) {
831     // Although variables of reference type always need to be initialized, it
832     // can happen that we can't see the initializer, so `InitExpr` may still
833     // be null.
834     if (InitExpr) {
835       if (auto *InitExprLoc = getStorageLocationStrict(*InitExpr))
836           return *InitExprLoc;
837     }
838 
839     // Even though we have an initializer, we might not get an
840     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
841     // don't have a function body. In this case, we just invent a storage
842     // location and value -- it's the best we can do.
843     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
844   }
845 
846   Value *Val = nullptr;
847   if (InitExpr)
848     // In the (few) cases where an expression is intentionally
849     // "uninterpreted", `InitExpr` is not associated with a value.  There are
850     // two ways to handle this situation: propagate the status, so that
851     // uninterpreted initializers result in uninterpreted variables, or
852     // provide a default value. We choose the latter so that later refinements
853     // of the variable can be used for reasoning about the surrounding code.
854     // For this reason, we let this case be handled by the `createValue()`
855     // call below.
856     //
857     // FIXME. If and when we interpret all language cases, change this to
858     // assert that `InitExpr` is interpreted, rather than supplying a
859     // default value (assuming we don't update the environment API to return
860     // references).
861     Val = getValue(*InitExpr);
862   if (!Val)
863     Val = createValue(Ty);
864 
865   if (Ty->isRecordType())
866     return cast<StructValue>(Val)->getAggregateLoc();
867 
868   StorageLocation &Loc =
869       D ? createStorageLocation(*D) : createStorageLocation(Ty);
870 
871   if (Val)
872     setValue(Loc, *Val);
873 
874   return Loc;
875 }
876 
877 void Environment::addToFlowCondition(const Formula &Val) {
878   DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
879 }
880 
881 bool Environment::flowConditionImplies(const Formula &Val) const {
882   return DACtx->flowConditionImplies(FlowConditionToken, Val);
883 }
884 
885 void Environment::dump(raw_ostream &OS) const {
886   // FIXME: add printing for remaining fields and allow caller to decide what
887   // fields are printed.
888   OS << "DeclToLoc:\n";
889   for (auto [D, L] : DeclToLoc)
890     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
891 
892   OS << "ExprToLoc:\n";
893   for (auto [E, L] : ExprToLoc)
894     OS << "  [" << E << ", " << L << "]\n";
895 
896   OS << "LocToVal:\n";
897   for (auto [L, V] : LocToVal) {
898     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
899   }
900 
901   OS << "FlowConditionToken:\n";
902   DACtx->dumpFlowCondition(FlowConditionToken, OS);
903 }
904 
905 void Environment::dump() const {
906   dump(llvm::dbgs());
907 }
908 
909 AggregateStorageLocation *
910 getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
911                           const Environment &Env) {
912   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
913   if (ImplicitObject == nullptr)
914     return nullptr;
915   if (ImplicitObject->getType()->isPointerType()) {
916     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject)))
917       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
918     return nullptr;
919   }
920   return cast_or_null<AggregateStorageLocation>(
921       Env.getStorageLocationStrict(*ImplicitObject));
922 }
923 
924 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
925                                                 const Environment &Env) {
926   Expr *Base = ME.getBase();
927   if (Base == nullptr)
928     return nullptr;
929   if (ME.isArrow()) {
930     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base)))
931       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
932     return nullptr;
933   }
934   return cast_or_null<AggregateStorageLocation>(
935       Env.getStorageLocationStrict(*Base));
936 }
937 
938 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
939   // Unnamed bitfields are only used for padding and do not appear in
940   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
941   // field list, and we thus need to remove them before mapping inits to
942   // fields to avoid mapping inits to the wrongs fields.
943   std::vector<FieldDecl *> Fields;
944   llvm::copy_if(
945       RD->fields(), std::back_inserter(Fields),
946       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
947   return Fields;
948 }
949 
950 StructValue &refreshStructValue(AggregateStorageLocation &Loc,
951                                 Environment &Env) {
952   auto &NewVal = Env.create<StructValue>(Loc);
953   Env.setValue(Loc, NewVal);
954   return NewVal;
955 }
956 
957 StructValue &refreshStructValue(const Expr &Expr, Environment &Env) {
958   assert(Expr.getType()->isRecordType());
959 
960   if (Expr.isPRValue()) {
961     if (auto *ExistingVal = cast_or_null<StructValue>(Env.getValue(Expr))) {
962       auto &NewVal = Env.create<StructValue>(ExistingVal->getAggregateLoc());
963       Env.setValueStrict(Expr, NewVal);
964       return NewVal;
965     }
966 
967     auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType()));
968     Env.setValueStrict(Expr, NewVal);
969     return NewVal;
970   }
971 
972   if (auto *Loc = cast_or_null<AggregateStorageLocation>(
973           Env.getStorageLocationStrict(Expr))) {
974     auto &NewVal = Env.create<StructValue>(*Loc);
975     Env.setValue(*Loc, NewVal);
976     return NewVal;
977   }
978 
979   auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType()));
980   Env.setStorageLocationStrict(Expr, NewVal.getAggregateLoc());
981   return NewVal;
982 }
983 
984 } // namespace dataflow
985 } // namespace clang
986