xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 17ba278f76116b12eb7a954c8963ee5a5a19c32b)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 static bool compareDistinctValues(QualType Type, Value &Val1,
54                                   const Environment &Env1, Value &Val2,
55                                   const Environment &Env2,
56                                   Environment::ValueModel &Model) {
57   // Note: Potentially costly, but, for booleans, we could check whether both
58   // can be proven equivalent in their respective environments.
59 
60   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
61   // and implement separate, join/widen specific handling for
62   // reference/pointers.
63   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
64   case ComparisonResult::Same:
65     return true;
66   case ComparisonResult::Different:
67     return false;
68   case ComparisonResult::Unknown:
69     switch (Val1.getKind()) {
70     case Value::Kind::Integer:
71     case Value::Kind::Pointer:
72     case Value::Kind::Struct:
73       // FIXME: this choice intentionally introduces unsoundness to allow
74       // for convergence. Once we have widening support for the
75       // reference/pointer and struct built-in models, this should be
76       // `false`.
77       return true;
78     default:
79       return false;
80     }
81   }
82   llvm_unreachable("All cases covered in switch");
83 }
84 
85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
86 /// respectively, of the same type `Type`. Merging generally produces a single
87 /// value that (soundly) approximates the two inputs, although the actual
88 /// meaning depends on `Model`.
89 static Value *mergeDistinctValues(QualType Type, Value &Val1,
90                                   const Environment &Env1, Value &Val2,
91                                   const Environment &Env2,
92                                   Environment &MergedEnv,
93                                   Environment::ValueModel &Model) {
94   // Join distinct boolean values preserving information about the constraints
95   // in the respective path conditions.
96   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
97     // FIXME: Checking both values should be unnecessary, since they should have
98     // a consistent shape.  However, right now we can end up with BoolValue's in
99     // integer-typed variables due to our incorrect handling of
100     // boolean-to-integer casts (we just propagate the BoolValue to the result
101     // of the cast). So, a join can encounter an integer in one branch but a
102     // bool in the other.
103     // For example:
104     // ```
105     // std::optional<bool> o;
106     // int x;
107     // if (o.has_value())
108     //   x = o.value();
109     // ```
110     auto &Expr1 = cast<BoolValue>(Val1).formula();
111     auto &Expr2 = cast<BoolValue>(Val2).formula();
112     auto &A = MergedEnv.arena();
113     auto &MergedVal = A.makeAtomRef(A.makeAtom());
114     MergedEnv.addToFlowCondition(
115         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
116                            A.makeEquals(MergedVal, Expr1)),
117                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
118                            A.makeEquals(MergedVal, Expr2))));
119     return &A.makeBoolValue(MergedVal);
120   }
121 
122   Value *MergedVal = nullptr;
123   if (auto *StructVal1 = dyn_cast<StructValue>(&Val1)) {
124     [[maybe_unused]] auto *StructVal2 = cast<StructValue>(&Val2);
125 
126     // Values to be merged are always associated with the same location in
127     // `LocToVal`. The location stored in `StructVal` should therefore also
128     // be the same.
129     assert(&StructVal1->getAggregateLoc() == &StructVal2->getAggregateLoc());
130 
131     // `StructVal1` and `StructVal2` may have different properties associated
132     // with them. Create a new `StructValue` without any properties so that we
133     // soundly approximate both values. If a particular analysis needs to merge
134     // properties, it should do so in `DataflowAnalysis::merge()`.
135     MergedVal = &MergedEnv.create<StructValue>(StructVal1->getAggregateLoc());
136   } else {
137     MergedVal = MergedEnv.createValue(Type);
138   }
139 
140   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
141   // returns false to avoid storing unneeded values in `DACtx`.
142   if (MergedVal)
143     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
144       return MergedVal;
145 
146   return nullptr;
147 }
148 
149 // When widening does not change `Current`, return value will equal `&Prev`.
150 static Value &widenDistinctValues(QualType Type, Value &Prev,
151                                   const Environment &PrevEnv, Value &Current,
152                                   Environment &CurrentEnv,
153                                   Environment::ValueModel &Model) {
154   // Boolean-model widening.
155   if (isa<BoolValue>(&Prev)) {
156     assert(isa<BoolValue>(Current));
157     // Widen to Top, because we know they are different values. If previous was
158     // already Top, re-use that to (implicitly) indicate that no change occured.
159     if (isa<TopBoolValue>(Prev))
160       return Prev;
161     return CurrentEnv.makeTopBoolValue();
162   }
163 
164   // FIXME: Add other built-in model widening.
165 
166   // Custom-model widening.
167   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
168     return *W;
169 
170   // Default of widening is a no-op: leave the current value unchanged.
171   return Current;
172 }
173 
174 /// Initializes a global storage value.
175 static void insertIfGlobal(const Decl &D,
176                            llvm::DenseSet<const VarDecl *> &Vars) {
177   if (auto *V = dyn_cast<VarDecl>(&D))
178     if (V->hasGlobalStorage())
179       Vars.insert(V);
180 }
181 
182 static void insertIfFunction(const Decl &D,
183                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
184   if (auto *FD = dyn_cast<FunctionDecl>(&D))
185     Funcs.insert(FD);
186 }
187 
188 static void
189 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
190                          llvm::DenseSet<const VarDecl *> &Vars,
191                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
192   insertIfGlobal(D, Vars);
193   insertIfFunction(D, Funcs);
194   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
195     for (const auto *B : Decomp->bindings())
196       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
197         // FIXME: should we be using `E->getFoundDecl()`?
198         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
199           Fields.insert(FD);
200 }
201 
202 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
203 /// global variables and functions that are declared in or referenced from
204 /// sub-statements.
205 static void
206 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
207                          llvm::DenseSet<const VarDecl *> &Vars,
208                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
209   for (auto *Child : S.children())
210     if (Child != nullptr)
211       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
212   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
213     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
214 
215   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
216     if (DS->isSingleDecl())
217       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
218     else
219       for (auto *D : DS->getDeclGroup())
220         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
221   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
222     insertIfGlobal(*E->getDecl(), Vars);
223     insertIfFunction(*E->getDecl(), Funcs);
224   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
225     // FIXME: should we be using `E->getFoundDecl()`?
226     const ValueDecl *VD = E->getMemberDecl();
227     insertIfGlobal(*VD, Vars);
228     insertIfFunction(*VD, Funcs);
229     if (const auto *FD = dyn_cast<FieldDecl>(VD))
230       Fields.insert(FD);
231   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
232     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
233       for (const auto *FD : getFieldsForInitListExpr(RD))
234         Fields.insert(FD);
235   }
236 }
237 
238 // FIXME: Add support for resetting globals after function calls to enable
239 // the implementation of sound analyses.
240 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
241   assert(FuncDecl->getBody() != nullptr);
242 
243   FieldSet Fields;
244   llvm::DenseSet<const VarDecl *> Vars;
245   llvm::DenseSet<const FunctionDecl *> Funcs;
246 
247   // Look for global variable and field references in the
248   // constructor-initializers.
249   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
250     for (const auto *Init : CtorDecl->inits()) {
251       if (Init->isMemberInitializer()) {
252         Fields.insert(Init->getMember());
253       } else if (Init->isIndirectMemberInitializer()) {
254         for (const auto *I : Init->getIndirectMember()->chain())
255           Fields.insert(cast<FieldDecl>(I));
256       }
257       const Expr *E = Init->getInit();
258       assert(E != nullptr);
259       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
260     }
261     // Add all fields mentioned in default member initializers.
262     for (const FieldDecl *F : CtorDecl->getParent()->fields())
263       if (const auto *I = F->getInClassInitializer())
264           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
265   }
266   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
267 
268   // These have to be added before the lines that follow to ensure that
269   // `create*` work correctly for structs.
270   DACtx->addModeledFields(Fields);
271 
272   for (const VarDecl *D : Vars) {
273     if (getStorageLocation(*D) != nullptr)
274       continue;
275 
276     setStorageLocation(*D, createObject(*D));
277   }
278 
279   for (const FunctionDecl *FD : Funcs) {
280     if (getStorageLocation(*FD) != nullptr)
281       continue;
282     auto &Loc = createStorageLocation(FD->getType());
283     setStorageLocation(*FD, Loc);
284   }
285 }
286 
287 Environment::Environment(DataflowAnalysisContext &DACtx)
288     : DACtx(&DACtx),
289       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
290 
291 Environment Environment::fork() const {
292   Environment Copy(*this);
293   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
294   return Copy;
295 }
296 
297 Environment::Environment(DataflowAnalysisContext &DACtx,
298                          const DeclContext &DeclCtx)
299     : Environment(DACtx) {
300   CallStack.push_back(&DeclCtx);
301 
302   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
303     assert(FuncDecl->getBody() != nullptr);
304 
305     initFieldsGlobalsAndFuncs(FuncDecl);
306 
307     for (const auto *ParamDecl : FuncDecl->parameters()) {
308       assert(ParamDecl != nullptr);
309       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
310     }
311   }
312 
313   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
314     auto *Parent = MethodDecl->getParent();
315     assert(Parent != nullptr);
316     if (Parent->isLambda())
317       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
318 
319     // FIXME: Initialize the ThisPointeeLoc of lambdas too.
320     if (MethodDecl && !MethodDecl->isStatic()) {
321       QualType ThisPointeeType = MethodDecl->getThisObjectType();
322       ThisPointeeLoc =
323           &cast<StructValue>(createValue(ThisPointeeType))->getAggregateLoc();
324     }
325   }
326 }
327 
328 bool Environment::canDescend(unsigned MaxDepth,
329                              const DeclContext *Callee) const {
330   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
331 }
332 
333 Environment Environment::pushCall(const CallExpr *Call) const {
334   Environment Env(*this);
335 
336   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
337     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
338       if (!isa<CXXThisExpr>(Arg))
339           Env.ThisPointeeLoc =
340               cast<AggregateStorageLocation>(getStorageLocationStrict(*Arg));
341       // Otherwise (when the argument is `this`), retain the current
342       // environment's `ThisPointeeLoc`.
343     }
344   }
345 
346   Env.pushCallInternal(Call->getDirectCallee(),
347                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
348 
349   return Env;
350 }
351 
352 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
353   Environment Env(*this);
354 
355   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
356 
357   Env.pushCallInternal(Call->getConstructor(),
358                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
359 
360   return Env;
361 }
362 
363 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
364                                    ArrayRef<const Expr *> Args) {
365   // Canonicalize to the definition of the function. This ensures that we're
366   // putting arguments into the same `ParamVarDecl`s` that the callee will later
367   // be retrieving them from.
368   assert(FuncDecl->getDefinition() != nullptr);
369   FuncDecl = FuncDecl->getDefinition();
370 
371   CallStack.push_back(FuncDecl);
372 
373   initFieldsGlobalsAndFuncs(FuncDecl);
374 
375   const auto *ParamIt = FuncDecl->param_begin();
376 
377   // FIXME: Parameters don't always map to arguments 1:1; examples include
378   // overloaded operators implemented as member functions, and parameter packs.
379   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
380     assert(ParamIt != FuncDecl->param_end());
381     const VarDecl *Param = *ParamIt;
382     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
383   }
384 }
385 
386 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
387   // We ignore `DACtx` because it's already the same in both. We don't want the
388   // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't
389   // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later
390   // analyze the same callee in a different context, and `setStorageLocation`
391   // requires there to not already be a storage location assigned. Conceptually,
392   // these maps capture information from the local scope, so when popping that
393   // scope, we do not propagate the maps.
394   this->LocToVal = std::move(CalleeEnv.LocToVal);
395   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
396 
397   if (Call->isGLValue()) {
398     if (CalleeEnv.ReturnLoc != nullptr)
399       setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc);
400   } else if (!Call->getType()->isVoidType()) {
401     if (CalleeEnv.ReturnVal != nullptr)
402       setValueStrict(*Call, *CalleeEnv.ReturnVal);
403   }
404 }
405 
406 void Environment::popCall(const CXXConstructExpr *Call,
407                           const Environment &CalleeEnv) {
408   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
409   this->LocToVal = std::move(CalleeEnv.LocToVal);
410   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
411 
412   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
413     setValueStrict(*Call, *Val);
414   }
415 }
416 
417 bool Environment::equivalentTo(const Environment &Other,
418                                Environment::ValueModel &Model) const {
419   assert(DACtx == Other.DACtx);
420 
421   if (ReturnVal != Other.ReturnVal)
422     return false;
423 
424   if (ReturnLoc != Other.ReturnLoc)
425     return false;
426 
427   if (ThisPointeeLoc != Other.ThisPointeeLoc)
428     return false;
429 
430   if (DeclToLoc != Other.DeclToLoc)
431     return false;
432 
433   if (ExprToLoc != Other.ExprToLoc)
434     return false;
435 
436   // Compare the contents for the intersection of their domains.
437   for (auto &Entry : LocToVal) {
438     const StorageLocation *Loc = Entry.first;
439     assert(Loc != nullptr);
440 
441     Value *Val = Entry.second;
442     assert(Val != nullptr);
443 
444     auto It = Other.LocToVal.find(Loc);
445     if (It == Other.LocToVal.end())
446       continue;
447     assert(It->second != nullptr);
448 
449     if (!areEquivalentValues(*Val, *It->second) &&
450         !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other,
451                                Model))
452       return false;
453   }
454 
455   return true;
456 }
457 
458 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
459                                      Environment::ValueModel &Model) {
460   assert(DACtx == PrevEnv.DACtx);
461   assert(ReturnVal == PrevEnv.ReturnVal);
462   assert(ReturnLoc == PrevEnv.ReturnLoc);
463   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
464   assert(CallStack == PrevEnv.CallStack);
465 
466   auto Effect = LatticeJoinEffect::Unchanged;
467 
468   // By the API, `PrevEnv` is a previous version of the environment for the same
469   // block, so we have some guarantees about its shape. In particular, it will
470   // be the result of a join or widen operation on previous values for this
471   // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are
472   // subsets of the maps in `PrevEnv`. So, as long as we maintain this property
473   // here, we don't need change their current values to widen.
474   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
475   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
476 
477   llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
478   for (auto &Entry : LocToVal) {
479     const StorageLocation *Loc = Entry.first;
480     assert(Loc != nullptr);
481 
482     Value *Val = Entry.second;
483     assert(Val != nullptr);
484 
485     auto PrevIt = PrevEnv.LocToVal.find(Loc);
486     if (PrevIt == PrevEnv.LocToVal.end())
487       continue;
488     assert(PrevIt->second != nullptr);
489 
490     if (areEquivalentValues(*Val, *PrevIt->second)) {
491       WidenedLocToVal.insert({Loc, Val});
492       continue;
493     }
494 
495     Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second,
496                                             PrevEnv, *Val, *this, Model);
497     WidenedLocToVal.insert({Loc, &WidenedVal});
498     if (&WidenedVal != PrevIt->second)
499       Effect = LatticeJoinEffect::Changed;
500   }
501   LocToVal = std::move(WidenedLocToVal);
502   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
503       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
504       LocToVal.size() != PrevEnv.LocToVal.size())
505     Effect = LatticeJoinEffect::Changed;
506 
507   return Effect;
508 }
509 
510 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
511                               Environment::ValueModel &Model) {
512   assert(EnvA.DACtx == EnvB.DACtx);
513   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
514   assert(EnvA.CallStack == EnvB.CallStack);
515 
516   Environment JoinedEnv(*EnvA.DACtx);
517 
518   JoinedEnv.CallStack = EnvA.CallStack;
519   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
520 
521   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
522     // `ReturnVal` might not always get set -- for example if we have a return
523     // statement of the form `return some_other_func()` and we decide not to
524     // analyze `some_other_func()`.
525     // In this case, we can't say anything about the joined return value -- we
526     // don't simply want to propagate the return value that we do have, because
527     // it might not be the correct one.
528     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
529     JoinedEnv.ReturnVal = nullptr;
530   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
531     JoinedEnv.ReturnVal = EnvA.ReturnVal;
532   } else {
533     assert(!EnvA.CallStack.empty());
534     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
535     // cast.
536     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
537     assert(Func != nullptr);
538     if (Value *MergedVal =
539             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
540                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
541       JoinedEnv.ReturnVal = MergedVal;
542   }
543 
544   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
545     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
546   else
547     JoinedEnv.ReturnLoc = nullptr;
548 
549   // FIXME: Once we're able to remove declarations from `DeclToLoc` when their
550   // lifetime ends, add an assertion that there aren't any entries in
551   // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
552   // different storage locations.
553   JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
554 
555   JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
556 
557   // FIXME: update join to detect backedges and simplify the flow condition
558   // accordingly.
559   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
560       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
561 
562   for (auto &Entry : EnvA.LocToVal) {
563     const StorageLocation *Loc = Entry.first;
564     assert(Loc != nullptr);
565 
566     Value *Val = Entry.second;
567     assert(Val != nullptr);
568 
569     auto It = EnvB.LocToVal.find(Loc);
570     if (It == EnvB.LocToVal.end())
571       continue;
572     assert(It->second != nullptr);
573 
574     if (areEquivalentValues(*Val, *It->second)) {
575       JoinedEnv.LocToVal.insert({Loc, Val});
576       continue;
577     }
578 
579     if (Value *MergedVal = mergeDistinctValues(
580             Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) {
581       JoinedEnv.LocToVal.insert({Loc, MergedVal});
582     }
583   }
584 
585   return JoinedEnv;
586 }
587 
588 StorageLocation &Environment::createStorageLocation(QualType Type) {
589   return DACtx->createStorageLocation(Type);
590 }
591 
592 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
593   // Evaluated declarations are always assigned the same storage locations to
594   // ensure that the environment stabilizes across loop iterations. Storage
595   // locations for evaluated declarations are stored in the analysis context.
596   return DACtx->getStableStorageLocation(D);
597 }
598 
599 StorageLocation &Environment::createStorageLocation(const Expr &E) {
600   // Evaluated expressions are always assigned the same storage locations to
601   // ensure that the environment stabilizes across loop iterations. Storage
602   // locations for evaluated expressions are stored in the analysis context.
603   return DACtx->getStableStorageLocation(E);
604 }
605 
606 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
607   assert(!DeclToLoc.contains(&D));
608   DeclToLoc[&D] = &Loc;
609 }
610 
611 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
612   auto It = DeclToLoc.find(&D);
613   if (It == DeclToLoc.end())
614     return nullptr;
615 
616   StorageLocation *Loc = It->second;
617 
618   return Loc;
619 }
620 
621 void Environment::setStorageLocationStrict(const Expr &E,
622                                            StorageLocation &Loc) {
623   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
624   // but we still want to be able to associate a `StorageLocation` with them,
625   // so allow these as an exception.
626   assert(E.isGLValue() ||
627          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
628   setStorageLocationInternal(E, Loc);
629 }
630 
631 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const {
632   // See comment in `setStorageLocationStrict()`.
633   assert(E.isGLValue() ||
634          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
635   return getStorageLocationInternal(E);
636 }
637 
638 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const {
639   return ThisPointeeLoc;
640 }
641 
642 AggregateStorageLocation &
643 Environment::getResultObjectLocation(const Expr &RecordPRValue) {
644   assert(RecordPRValue.getType()->isRecordType());
645   assert(RecordPRValue.isPRValue());
646 
647   if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue))
648     return *cast<AggregateStorageLocation>(ExistingLoc);
649   auto &Loc = cast<AggregateStorageLocation>(
650       DACtx->getStableStorageLocation(RecordPRValue));
651   setStorageLocationInternal(RecordPRValue, Loc);
652   return Loc;
653 }
654 
655 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
656   return DACtx->getOrCreateNullPointerValue(PointeeType);
657 }
658 
659 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
660   assert(!isa<StructValue>(&Val) ||
661          &cast<StructValue>(&Val)->getAggregateLoc() == &Loc);
662 
663   LocToVal[&Loc] = &Val;
664 }
665 
666 void Environment::setValueStrict(const Expr &E, Value &Val) {
667   assert(E.isPRValue());
668 
669   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
670     if ([[maybe_unused]] auto *ExistingVal =
671             cast_or_null<StructValue>(getValue(E)))
672       assert(&ExistingVal->getAggregateLoc() == &StructVal->getAggregateLoc());
673     if ([[maybe_unused]] StorageLocation *ExistingLoc =
674             getStorageLocationInternal(E))
675       assert(ExistingLoc == &StructVal->getAggregateLoc());
676     else
677       setStorageLocationInternal(E, StructVal->getAggregateLoc());
678     setValue(StructVal->getAggregateLoc(), Val);
679     return;
680   }
681 
682   StorageLocation *Loc = getStorageLocationInternal(E);
683   if (Loc == nullptr) {
684     Loc = &createStorageLocation(E);
685     setStorageLocationInternal(E, *Loc);
686   }
687   setValue(*Loc, Val);
688 }
689 
690 Value *Environment::getValue(const StorageLocation &Loc) const {
691   return LocToVal.lookup(&Loc);
692 }
693 
694 Value *Environment::getValue(const ValueDecl &D) const {
695   auto *Loc = getStorageLocation(D);
696   if (Loc == nullptr)
697     return nullptr;
698   return getValue(*Loc);
699 }
700 
701 Value *Environment::getValue(const Expr &E) const {
702   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
703   if (It == ExprToLoc.end())
704     return nullptr;
705   return getValue(*It->second);
706 }
707 
708 Value *Environment::createValue(QualType Type) {
709   llvm::DenseSet<QualType> Visited;
710   int CreatedValuesCount = 0;
711   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
712                                                 CreatedValuesCount);
713   if (CreatedValuesCount > MaxCompositeValueSize) {
714     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
715                  << '\n';
716   }
717   return Val;
718 }
719 
720 void Environment::setStorageLocationInternal(const Expr &E,
721                                              StorageLocation &Loc) {
722   const Expr &CanonE = ignoreCFGOmittedNodes(E);
723   assert(!ExprToLoc.contains(&CanonE));
724   ExprToLoc[&CanonE] = &Loc;
725 }
726 
727 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const {
728   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
729   return It == ExprToLoc.end() ? nullptr : &*It->second;
730 }
731 
732 Value *Environment::createValueUnlessSelfReferential(
733     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
734     int &CreatedValuesCount) {
735   assert(!Type.isNull());
736   assert(!Type->isReferenceType());
737 
738   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
739   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
740       Depth > MaxCompositeValueDepth)
741     return nullptr;
742 
743   if (Type->isBooleanType()) {
744     CreatedValuesCount++;
745     return &makeAtomicBoolValue();
746   }
747 
748   if (Type->isIntegerType()) {
749     // FIXME: consider instead `return nullptr`, given that we do nothing useful
750     // with integers, and so distinguishing them serves no purpose, but could
751     // prevent convergence.
752     CreatedValuesCount++;
753     return &arena().create<IntegerValue>();
754   }
755 
756   if (Type->isPointerType()) {
757     CreatedValuesCount++;
758     QualType PointeeType = Type->getPointeeType();
759     StorageLocation &PointeeLoc =
760         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
761 
762     return &arena().create<PointerValue>(PointeeLoc);
763   }
764 
765   if (Type->isRecordType()) {
766     CreatedValuesCount++;
767     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
768     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
769       assert(Field != nullptr);
770 
771       QualType FieldType = Field->getType();
772 
773       FieldLocs.insert(
774           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
775                                           CreatedValuesCount)});
776     }
777 
778     AggregateStorageLocation &Loc =
779         arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs));
780     StructValue &StructVal = create<StructValue>(Loc);
781 
782     // As we already have a storage location for the `StructValue`, we can and
783     // should associate them in the environment.
784     setValue(Loc, StructVal);
785 
786     return &StructVal;
787   }
788 
789   return nullptr;
790 }
791 
792 StorageLocation &
793 Environment::createLocAndMaybeValue(QualType Ty,
794                                     llvm::DenseSet<QualType> &Visited,
795                                     int Depth, int &CreatedValuesCount) {
796   if (!Visited.insert(Ty.getCanonicalType()).second)
797     return createStorageLocation(Ty.getNonReferenceType());
798   Value *Val = createValueUnlessSelfReferential(
799       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
800   Visited.erase(Ty.getCanonicalType());
801 
802   Ty = Ty.getNonReferenceType();
803 
804   if (Val == nullptr)
805     return createStorageLocation(Ty);
806 
807   if (Ty->isRecordType())
808     return cast<StructValue>(Val)->getAggregateLoc();
809 
810   StorageLocation &Loc = createStorageLocation(Ty);
811   setValue(Loc, *Val);
812   return Loc;
813 }
814 
815 StorageLocation &Environment::createObjectInternal(const VarDecl *D,
816                                                    QualType Ty,
817                                                    const Expr *InitExpr) {
818   if (Ty->isReferenceType()) {
819     // Although variables of reference type always need to be initialized, it
820     // can happen that we can't see the initializer, so `InitExpr` may still
821     // be null.
822     if (InitExpr) {
823       if (auto *InitExprLoc = getStorageLocationStrict(*InitExpr))
824           return *InitExprLoc;
825     }
826 
827     // Even though we have an initializer, we might not get an
828     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
829     // don't have a function body. In this case, we just invent a storage
830     // location and value -- it's the best we can do.
831     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
832   }
833 
834   Value *Val = nullptr;
835   if (InitExpr)
836     // In the (few) cases where an expression is intentionally
837     // "uninterpreted", `InitExpr` is not associated with a value.  There are
838     // two ways to handle this situation: propagate the status, so that
839     // uninterpreted initializers result in uninterpreted variables, or
840     // provide a default value. We choose the latter so that later refinements
841     // of the variable can be used for reasoning about the surrounding code.
842     // For this reason, we let this case be handled by the `createValue()`
843     // call below.
844     //
845     // FIXME. If and when we interpret all language cases, change this to
846     // assert that `InitExpr` is interpreted, rather than supplying a
847     // default value (assuming we don't update the environment API to return
848     // references).
849     Val = getValue(*InitExpr);
850   if (!Val)
851     Val = createValue(Ty);
852 
853   if (Ty->isRecordType())
854     return cast<StructValue>(Val)->getAggregateLoc();
855 
856   StorageLocation &Loc =
857       D ? createStorageLocation(*D) : createStorageLocation(Ty);
858 
859   if (Val)
860     setValue(Loc, *Val);
861 
862   return Loc;
863 }
864 
865 void Environment::addToFlowCondition(const Formula &Val) {
866   DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
867 }
868 
869 bool Environment::flowConditionImplies(const Formula &Val) const {
870   return DACtx->flowConditionImplies(FlowConditionToken, Val);
871 }
872 
873 void Environment::dump(raw_ostream &OS) const {
874   // FIXME: add printing for remaining fields and allow caller to decide what
875   // fields are printed.
876   OS << "DeclToLoc:\n";
877   for (auto [D, L] : DeclToLoc)
878     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
879 
880   OS << "ExprToLoc:\n";
881   for (auto [E, L] : ExprToLoc)
882     OS << "  [" << E << ", " << L << "]\n";
883 
884   OS << "LocToVal:\n";
885   for (auto [L, V] : LocToVal) {
886     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
887   }
888 
889   OS << "FlowConditionToken:\n";
890   DACtx->dumpFlowCondition(FlowConditionToken, OS);
891 }
892 
893 void Environment::dump() const {
894   dump(llvm::dbgs());
895 }
896 
897 AggregateStorageLocation *
898 getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
899                           const Environment &Env) {
900   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
901   if (ImplicitObject == nullptr)
902     return nullptr;
903   if (ImplicitObject->getType()->isPointerType()) {
904     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject)))
905       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
906     return nullptr;
907   }
908   return cast_or_null<AggregateStorageLocation>(
909       Env.getStorageLocationStrict(*ImplicitObject));
910 }
911 
912 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
913                                                 const Environment &Env) {
914   Expr *Base = ME.getBase();
915   if (Base == nullptr)
916     return nullptr;
917   if (ME.isArrow()) {
918     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base)))
919       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
920     return nullptr;
921   }
922   return cast_or_null<AggregateStorageLocation>(
923       Env.getStorageLocationStrict(*Base));
924 }
925 
926 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
927   // Unnamed bitfields are only used for padding and do not appear in
928   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
929   // field list, and we thus need to remove them before mapping inits to
930   // fields to avoid mapping inits to the wrongs fields.
931   std::vector<FieldDecl *> Fields;
932   llvm::copy_if(
933       RD->fields(), std::back_inserter(Fields),
934       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
935   return Fields;
936 }
937 
938 StructValue &refreshStructValue(AggregateStorageLocation &Loc,
939                                 Environment &Env) {
940   auto &NewVal = Env.create<StructValue>(Loc);
941   Env.setValue(Loc, NewVal);
942   return NewVal;
943 }
944 
945 StructValue &refreshStructValue(const Expr &Expr, Environment &Env) {
946   assert(Expr.getType()->isRecordType());
947 
948   if (Expr.isPRValue()) {
949     if (auto *ExistingVal = cast_or_null<StructValue>(Env.getValue(Expr))) {
950       auto &NewVal = Env.create<StructValue>(ExistingVal->getAggregateLoc());
951       Env.setValueStrict(Expr, NewVal);
952       return NewVal;
953     }
954 
955     auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType()));
956     Env.setValueStrict(Expr, NewVal);
957     return NewVal;
958   }
959 
960   if (auto *Loc = cast_or_null<AggregateStorageLocation>(
961           Env.getStorageLocationStrict(Expr))) {
962     auto &NewVal = Env.create<StructValue>(*Loc);
963     Env.setValue(*Loc, NewVal);
964     return NewVal;
965   }
966 
967   auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType()));
968   Env.setStorageLocationStrict(Expr, NewVal.getAggregateLoc());
969   return NewVal;
970 }
971 
972 } // namespace dataflow
973 } // namespace clang
974