xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 9ecdbe3855a8048989a507ff8d470aee4d407589)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 static bool compareDistinctValues(QualType Type, Value &Val1,
54                                   const Environment &Env1, Value &Val2,
55                                   const Environment &Env2,
56                                   Environment::ValueModel &Model) {
57   // Note: Potentially costly, but, for booleans, we could check whether both
58   // can be proven equivalent in their respective environments.
59 
60   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
61   // and implement separate, join/widen specific handling for
62   // reference/pointers.
63   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
64   case ComparisonResult::Same:
65     return true;
66   case ComparisonResult::Different:
67     return false;
68   case ComparisonResult::Unknown:
69     switch (Val1.getKind()) {
70     case Value::Kind::Integer:
71     case Value::Kind::Pointer:
72     case Value::Kind::Record:
73       // FIXME: this choice intentionally introduces unsoundness to allow
74       // for convergence. Once we have widening support for the
75       // reference/pointer and struct built-in models, this should be
76       // `false`.
77       return true;
78     default:
79       return false;
80     }
81   }
82   llvm_unreachable("All cases covered in switch");
83 }
84 
85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
86 /// respectively, of the same type `Type`. Merging generally produces a single
87 /// value that (soundly) approximates the two inputs, although the actual
88 /// meaning depends on `Model`.
89 static Value *mergeDistinctValues(QualType Type, Value &Val1,
90                                   const Environment &Env1, Value &Val2,
91                                   const Environment &Env2,
92                                   Environment &MergedEnv,
93                                   Environment::ValueModel &Model) {
94   // Join distinct boolean values preserving information about the constraints
95   // in the respective path conditions.
96   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
97     // FIXME: Checking both values should be unnecessary, since they should have
98     // a consistent shape.  However, right now we can end up with BoolValue's in
99     // integer-typed variables due to our incorrect handling of
100     // boolean-to-integer casts (we just propagate the BoolValue to the result
101     // of the cast). So, a join can encounter an integer in one branch but a
102     // bool in the other.
103     // For example:
104     // ```
105     // std::optional<bool> o;
106     // int x;
107     // if (o.has_value())
108     //   x = o.value();
109     // ```
110     auto &Expr1 = cast<BoolValue>(Val1).formula();
111     auto &Expr2 = cast<BoolValue>(Val2).formula();
112     auto &A = MergedEnv.arena();
113     auto &MergedVal = A.makeAtomRef(A.makeAtom());
114     MergedEnv.addToFlowCondition(
115         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
116                            A.makeEquals(MergedVal, Expr1)),
117                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
118                            A.makeEquals(MergedVal, Expr2))));
119     return &A.makeBoolValue(MergedVal);
120   }
121 
122   Value *MergedVal = nullptr;
123   if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
124     [[maybe_unused]] auto *RecordVal2 = cast<RecordValue>(&Val2);
125 
126     // Values to be merged are always associated with the same location in
127     // `LocToVal`. The location stored in `RecordVal` should therefore also
128     // be the same.
129     assert(&RecordVal1->getLoc() == &RecordVal2->getLoc());
130 
131     // `RecordVal1` and `RecordVal2` may have different properties associated
132     // with them. Create a new `RecordValue` without any properties so that we
133     // soundly approximate both values. If a particular analysis needs to merge
134     // properties, it should do so in `DataflowAnalysis::merge()`.
135     MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
136   } else {
137     MergedVal = MergedEnv.createValue(Type);
138   }
139 
140   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
141   // returns false to avoid storing unneeded values in `DACtx`.
142   if (MergedVal)
143     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
144       return MergedVal;
145 
146   return nullptr;
147 }
148 
149 // When widening does not change `Current`, return value will equal `&Prev`.
150 static Value &widenDistinctValues(QualType Type, Value &Prev,
151                                   const Environment &PrevEnv, Value &Current,
152                                   Environment &CurrentEnv,
153                                   Environment::ValueModel &Model) {
154   // Boolean-model widening.
155   if (isa<BoolValue>(&Prev)) {
156     assert(isa<BoolValue>(Current));
157     // Widen to Top, because we know they are different values. If previous was
158     // already Top, re-use that to (implicitly) indicate that no change occured.
159     if (isa<TopBoolValue>(Prev))
160       return Prev;
161     return CurrentEnv.makeTopBoolValue();
162   }
163 
164   // FIXME: Add other built-in model widening.
165 
166   // Custom-model widening.
167   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
168     return *W;
169 
170   // Default of widening is a no-op: leave the current value unchanged.
171   return Current;
172 }
173 
174 /// Initializes a global storage value.
175 static void insertIfGlobal(const Decl &D,
176                            llvm::DenseSet<const VarDecl *> &Vars) {
177   if (auto *V = dyn_cast<VarDecl>(&D))
178     if (V->hasGlobalStorage())
179       Vars.insert(V);
180 }
181 
182 static void insertIfFunction(const Decl &D,
183                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
184   if (auto *FD = dyn_cast<FunctionDecl>(&D))
185     Funcs.insert(FD);
186 }
187 
188 static void
189 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
190                          llvm::DenseSet<const VarDecl *> &Vars,
191                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
192   insertIfGlobal(D, Vars);
193   insertIfFunction(D, Funcs);
194   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
195     for (const auto *B : Decomp->bindings())
196       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
197         // FIXME: should we be using `E->getFoundDecl()`?
198         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
199           Fields.insert(FD);
200 }
201 
202 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
203 /// global variables and functions that are declared in or referenced from
204 /// sub-statements.
205 static void
206 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
207                          llvm::DenseSet<const VarDecl *> &Vars,
208                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
209   for (auto *Child : S.children())
210     if (Child != nullptr)
211       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
212   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
213     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
214 
215   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
216     if (DS->isSingleDecl())
217       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
218     else
219       for (auto *D : DS->getDeclGroup())
220         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
221   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
222     insertIfGlobal(*E->getDecl(), Vars);
223     insertIfFunction(*E->getDecl(), Funcs);
224   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
225     // FIXME: should we be using `E->getFoundDecl()`?
226     const ValueDecl *VD = E->getMemberDecl();
227     insertIfGlobal(*VD, Vars);
228     insertIfFunction(*VD, Funcs);
229     if (const auto *FD = dyn_cast<FieldDecl>(VD))
230       Fields.insert(FD);
231   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
232     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
233       for (const auto *FD : getFieldsForInitListExpr(RD))
234         Fields.insert(FD);
235   }
236 }
237 
238 // FIXME: Add support for resetting globals after function calls to enable
239 // the implementation of sound analyses.
240 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
241   assert(FuncDecl->getBody() != nullptr);
242 
243   FieldSet Fields;
244   llvm::DenseSet<const VarDecl *> Vars;
245   llvm::DenseSet<const FunctionDecl *> Funcs;
246 
247   // Look for global variable and field references in the
248   // constructor-initializers.
249   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
250     for (const auto *Init : CtorDecl->inits()) {
251       if (Init->isMemberInitializer()) {
252         Fields.insert(Init->getMember());
253       } else if (Init->isIndirectMemberInitializer()) {
254         for (const auto *I : Init->getIndirectMember()->chain())
255           Fields.insert(cast<FieldDecl>(I));
256       }
257       const Expr *E = Init->getInit();
258       assert(E != nullptr);
259       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
260     }
261     // Add all fields mentioned in default member initializers.
262     for (const FieldDecl *F : CtorDecl->getParent()->fields())
263       if (const auto *I = F->getInClassInitializer())
264           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
265   }
266   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
267 
268   // These have to be added before the lines that follow to ensure that
269   // `create*` work correctly for structs.
270   DACtx->addModeledFields(Fields);
271 
272   for (const VarDecl *D : Vars) {
273     if (getStorageLocation(*D) != nullptr)
274       continue;
275 
276     setStorageLocation(*D, createObject(*D));
277   }
278 
279   for (const FunctionDecl *FD : Funcs) {
280     if (getStorageLocation(*FD) != nullptr)
281       continue;
282     auto &Loc = createStorageLocation(FD->getType());
283     setStorageLocation(*FD, Loc);
284   }
285 }
286 
287 Environment::Environment(DataflowAnalysisContext &DACtx)
288     : DACtx(&DACtx),
289       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
290 
291 Environment Environment::fork() const {
292   Environment Copy(*this);
293   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
294   return Copy;
295 }
296 
297 Environment::Environment(DataflowAnalysisContext &DACtx,
298                          const DeclContext &DeclCtx)
299     : Environment(DACtx) {
300   CallStack.push_back(&DeclCtx);
301 
302   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
303     assert(FuncDecl->getBody() != nullptr);
304 
305     initFieldsGlobalsAndFuncs(FuncDecl);
306 
307     for (const auto *ParamDecl : FuncDecl->parameters()) {
308       assert(ParamDecl != nullptr);
309       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
310     }
311   }
312 
313   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
314     auto *Parent = MethodDecl->getParent();
315     assert(Parent != nullptr);
316     if (Parent->isLambda())
317       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
318 
319     // FIXME: Initialize the ThisPointeeLoc of lambdas too.
320     if (MethodDecl && !MethodDecl->isStatic()) {
321       QualType ThisPointeeType = MethodDecl->getThisObjectType();
322       ThisPointeeLoc =
323           &cast<RecordValue>(createValue(ThisPointeeType))->getLoc();
324     }
325   }
326 }
327 
328 bool Environment::canDescend(unsigned MaxDepth,
329                              const DeclContext *Callee) const {
330   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
331 }
332 
333 Environment Environment::pushCall(const CallExpr *Call) const {
334   Environment Env(*this);
335 
336   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
337     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
338       if (!isa<CXXThisExpr>(Arg))
339           Env.ThisPointeeLoc =
340               cast<RecordStorageLocation>(getStorageLocation(*Arg));
341       // Otherwise (when the argument is `this`), retain the current
342       // environment's `ThisPointeeLoc`.
343     }
344   }
345 
346   Env.pushCallInternal(Call->getDirectCallee(),
347                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
348 
349   return Env;
350 }
351 
352 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
353   Environment Env(*this);
354 
355   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
356 
357   Env.pushCallInternal(Call->getConstructor(),
358                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
359 
360   return Env;
361 }
362 
363 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
364                                    ArrayRef<const Expr *> Args) {
365   // Canonicalize to the definition of the function. This ensures that we're
366   // putting arguments into the same `ParamVarDecl`s` that the callee will later
367   // be retrieving them from.
368   assert(FuncDecl->getDefinition() != nullptr);
369   FuncDecl = FuncDecl->getDefinition();
370 
371   CallStack.push_back(FuncDecl);
372 
373   initFieldsGlobalsAndFuncs(FuncDecl);
374 
375   const auto *ParamIt = FuncDecl->param_begin();
376 
377   // FIXME: Parameters don't always map to arguments 1:1; examples include
378   // overloaded operators implemented as member functions, and parameter packs.
379   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
380     assert(ParamIt != FuncDecl->param_end());
381     const VarDecl *Param = *ParamIt;
382     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
383   }
384 }
385 
386 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
387   // We ignore `DACtx` because it's already the same in both. We don't want the
388   // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't
389   // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later
390   // analyze the same callee in a different context, and `setStorageLocation`
391   // requires there to not already be a storage location assigned. Conceptually,
392   // these maps capture information from the local scope, so when popping that
393   // scope, we do not propagate the maps.
394   this->LocToVal = std::move(CalleeEnv.LocToVal);
395   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
396 
397   if (Call->isGLValue()) {
398     if (CalleeEnv.ReturnLoc != nullptr)
399       setStorageLocation(*Call, *CalleeEnv.ReturnLoc);
400   } else if (!Call->getType()->isVoidType()) {
401     if (CalleeEnv.ReturnVal != nullptr)
402       setValue(*Call, *CalleeEnv.ReturnVal);
403   }
404 }
405 
406 void Environment::popCall(const CXXConstructExpr *Call,
407                           const Environment &CalleeEnv) {
408   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
409   this->LocToVal = std::move(CalleeEnv.LocToVal);
410   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
411 
412   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
413     setValue(*Call, *Val);
414   }
415 }
416 
417 bool Environment::equivalentTo(const Environment &Other,
418                                Environment::ValueModel &Model) const {
419   assert(DACtx == Other.DACtx);
420 
421   if (ReturnVal != Other.ReturnVal)
422     return false;
423 
424   if (ReturnLoc != Other.ReturnLoc)
425     return false;
426 
427   if (ThisPointeeLoc != Other.ThisPointeeLoc)
428     return false;
429 
430   if (DeclToLoc != Other.DeclToLoc)
431     return false;
432 
433   if (ExprToLoc != Other.ExprToLoc)
434     return false;
435 
436   // Compare the contents for the intersection of their domains.
437   for (auto &Entry : LocToVal) {
438     const StorageLocation *Loc = Entry.first;
439     assert(Loc != nullptr);
440 
441     Value *Val = Entry.second;
442     assert(Val != nullptr);
443 
444     auto It = Other.LocToVal.find(Loc);
445     if (It == Other.LocToVal.end())
446       continue;
447     assert(It->second != nullptr);
448 
449     if (!areEquivalentValues(*Val, *It->second) &&
450         !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other,
451                                Model))
452       return false;
453   }
454 
455   return true;
456 }
457 
458 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
459                                      Environment::ValueModel &Model) {
460   assert(DACtx == PrevEnv.DACtx);
461   assert(ReturnVal == PrevEnv.ReturnVal);
462   assert(ReturnLoc == PrevEnv.ReturnLoc);
463   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
464   assert(CallStack == PrevEnv.CallStack);
465 
466   auto Effect = LatticeJoinEffect::Unchanged;
467 
468   // By the API, `PrevEnv` is a previous version of the environment for the same
469   // block, so we have some guarantees about its shape. In particular, it will
470   // be the result of a join or widen operation on previous values for this
471   // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are
472   // subsets of the maps in `PrevEnv`. So, as long as we maintain this property
473   // here, we don't need change their current values to widen.
474   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
475   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
476 
477   llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
478   for (auto &Entry : LocToVal) {
479     const StorageLocation *Loc = Entry.first;
480     assert(Loc != nullptr);
481 
482     Value *Val = Entry.second;
483     assert(Val != nullptr);
484 
485     auto PrevIt = PrevEnv.LocToVal.find(Loc);
486     if (PrevIt == PrevEnv.LocToVal.end())
487       continue;
488     assert(PrevIt->second != nullptr);
489 
490     if (areEquivalentValues(*Val, *PrevIt->second)) {
491       WidenedLocToVal.insert({Loc, Val});
492       continue;
493     }
494 
495     Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second,
496                                             PrevEnv, *Val, *this, Model);
497     WidenedLocToVal.insert({Loc, &WidenedVal});
498     if (&WidenedVal != PrevIt->second)
499       Effect = LatticeJoinEffect::Changed;
500   }
501   LocToVal = std::move(WidenedLocToVal);
502   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
503       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
504       LocToVal.size() != PrevEnv.LocToVal.size())
505     Effect = LatticeJoinEffect::Changed;
506 
507   return Effect;
508 }
509 
510 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
511                               Environment::ValueModel &Model) {
512   assert(EnvA.DACtx == EnvB.DACtx);
513   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
514   assert(EnvA.CallStack == EnvB.CallStack);
515 
516   Environment JoinedEnv(*EnvA.DACtx);
517 
518   JoinedEnv.CallStack = EnvA.CallStack;
519   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
520 
521   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
522     // `ReturnVal` might not always get set -- for example if we have a return
523     // statement of the form `return some_other_func()` and we decide not to
524     // analyze `some_other_func()`.
525     // In this case, we can't say anything about the joined return value -- we
526     // don't simply want to propagate the return value that we do have, because
527     // it might not be the correct one.
528     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
529     JoinedEnv.ReturnVal = nullptr;
530   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
531     JoinedEnv.ReturnVal = EnvA.ReturnVal;
532   } else {
533     assert(!EnvA.CallStack.empty());
534     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
535     // cast.
536     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
537     assert(Func != nullptr);
538     if (Value *MergedVal =
539             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
540                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
541       JoinedEnv.ReturnVal = MergedVal;
542   }
543 
544   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
545     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
546   else
547     JoinedEnv.ReturnLoc = nullptr;
548 
549   // FIXME: Once we're able to remove declarations from `DeclToLoc` when their
550   // lifetime ends, add an assertion that there aren't any entries in
551   // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
552   // different storage locations.
553   JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
554 
555   JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
556 
557   // FIXME: update join to detect backedges and simplify the flow condition
558   // accordingly.
559   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
560       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
561 
562   for (auto &Entry : EnvA.LocToVal) {
563     const StorageLocation *Loc = Entry.first;
564     assert(Loc != nullptr);
565 
566     Value *Val = Entry.second;
567     assert(Val != nullptr);
568 
569     auto It = EnvB.LocToVal.find(Loc);
570     if (It == EnvB.LocToVal.end())
571       continue;
572     assert(It->second != nullptr);
573 
574     if (areEquivalentValues(*Val, *It->second)) {
575       JoinedEnv.LocToVal.insert({Loc, Val});
576       continue;
577     }
578 
579     if (Value *MergedVal = mergeDistinctValues(
580             Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) {
581       JoinedEnv.LocToVal.insert({Loc, MergedVal});
582     }
583   }
584 
585   return JoinedEnv;
586 }
587 
588 StorageLocation &Environment::createStorageLocation(QualType Type) {
589   return DACtx->createStorageLocation(Type);
590 }
591 
592 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
593   // Evaluated declarations are always assigned the same storage locations to
594   // ensure that the environment stabilizes across loop iterations. Storage
595   // locations for evaluated declarations are stored in the analysis context.
596   return DACtx->getStableStorageLocation(D);
597 }
598 
599 StorageLocation &Environment::createStorageLocation(const Expr &E) {
600   // Evaluated expressions are always assigned the same storage locations to
601   // ensure that the environment stabilizes across loop iterations. Storage
602   // locations for evaluated expressions are stored in the analysis context.
603   return DACtx->getStableStorageLocation(E);
604 }
605 
606 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
607   assert(!DeclToLoc.contains(&D));
608   DeclToLoc[&D] = &Loc;
609 }
610 
611 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
612   auto It = DeclToLoc.find(&D);
613   if (It == DeclToLoc.end())
614     return nullptr;
615 
616   StorageLocation *Loc = It->second;
617 
618   return Loc;
619 }
620 
621 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
622   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
623   // but we still want to be able to associate a `StorageLocation` with them,
624   // so allow these as an exception.
625   assert(E.isGLValue() ||
626          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
627   setStorageLocationInternal(E, Loc);
628 }
629 
630 StorageLocation *Environment::getStorageLocation(const Expr &E) const {
631   // See comment in `setStorageLocation()`.
632   assert(E.isGLValue() ||
633          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
634   return getStorageLocationInternal(E);
635 }
636 
637 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const {
638   return ThisPointeeLoc;
639 }
640 
641 RecordStorageLocation &
642 Environment::getResultObjectLocation(const Expr &RecordPRValue) {
643   assert(RecordPRValue.getType()->isRecordType());
644   assert(RecordPRValue.isPRValue());
645 
646   if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue))
647     return *cast<RecordStorageLocation>(ExistingLoc);
648   auto &Loc = cast<RecordStorageLocation>(
649       DACtx->getStableStorageLocation(RecordPRValue));
650   setStorageLocationInternal(RecordPRValue, Loc);
651   return Loc;
652 }
653 
654 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
655   return DACtx->getOrCreateNullPointerValue(PointeeType);
656 }
657 
658 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
659   assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc);
660 
661   LocToVal[&Loc] = &Val;
662 }
663 
664 void Environment::setValue(const Expr &E, Value &Val) {
665   assert(E.isPRValue());
666 
667   if (auto *RecordVal = dyn_cast<RecordValue>(&Val)) {
668     if ([[maybe_unused]] auto *ExistingVal =
669             cast_or_null<RecordValue>(getValue(E)))
670       assert(&ExistingVal->getLoc() == &RecordVal->getLoc());
671     if ([[maybe_unused]] StorageLocation *ExistingLoc =
672             getStorageLocationInternal(E))
673       assert(ExistingLoc == &RecordVal->getLoc());
674     else
675       setStorageLocationInternal(E, RecordVal->getLoc());
676     setValue(RecordVal->getLoc(), Val);
677     return;
678   }
679 
680   StorageLocation *Loc = getStorageLocationInternal(E);
681   if (Loc == nullptr) {
682     Loc = &createStorageLocation(E);
683     setStorageLocationInternal(E, *Loc);
684   }
685   setValue(*Loc, Val);
686 }
687 
688 Value *Environment::getValue(const StorageLocation &Loc) const {
689   return LocToVal.lookup(&Loc);
690 }
691 
692 Value *Environment::getValue(const ValueDecl &D) const {
693   auto *Loc = getStorageLocation(D);
694   if (Loc == nullptr)
695     return nullptr;
696   return getValue(*Loc);
697 }
698 
699 Value *Environment::getValue(const Expr &E) const {
700   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
701   if (It == ExprToLoc.end())
702     return nullptr;
703   return getValue(*It->second);
704 }
705 
706 Value *Environment::createValue(QualType Type) {
707   llvm::DenseSet<QualType> Visited;
708   int CreatedValuesCount = 0;
709   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
710                                                 CreatedValuesCount);
711   if (CreatedValuesCount > MaxCompositeValueSize) {
712     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
713                  << '\n';
714   }
715   return Val;
716 }
717 
718 void Environment::setStorageLocationInternal(const Expr &E,
719                                              StorageLocation &Loc) {
720   const Expr &CanonE = ignoreCFGOmittedNodes(E);
721   assert(!ExprToLoc.contains(&CanonE));
722   ExprToLoc[&CanonE] = &Loc;
723 }
724 
725 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const {
726   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
727   return It == ExprToLoc.end() ? nullptr : &*It->second;
728 }
729 
730 Value *Environment::createValueUnlessSelfReferential(
731     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
732     int &CreatedValuesCount) {
733   assert(!Type.isNull());
734   assert(!Type->isReferenceType());
735 
736   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
737   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
738       Depth > MaxCompositeValueDepth)
739     return nullptr;
740 
741   if (Type->isBooleanType()) {
742     CreatedValuesCount++;
743     return &makeAtomicBoolValue();
744   }
745 
746   if (Type->isIntegerType()) {
747     // FIXME: consider instead `return nullptr`, given that we do nothing useful
748     // with integers, and so distinguishing them serves no purpose, but could
749     // prevent convergence.
750     CreatedValuesCount++;
751     return &arena().create<IntegerValue>();
752   }
753 
754   if (Type->isPointerType()) {
755     CreatedValuesCount++;
756     QualType PointeeType = Type->getPointeeType();
757     StorageLocation &PointeeLoc =
758         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
759 
760     return &arena().create<PointerValue>(PointeeLoc);
761   }
762 
763   if (Type->isRecordType()) {
764     CreatedValuesCount++;
765     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
766     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
767       assert(Field != nullptr);
768 
769       QualType FieldType = Field->getType();
770 
771       FieldLocs.insert(
772           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
773                                           CreatedValuesCount)});
774     }
775 
776     RecordStorageLocation &Loc =
777         arena().create<RecordStorageLocation>(Type, std::move(FieldLocs));
778     RecordValue &RecordVal = create<RecordValue>(Loc);
779 
780     // As we already have a storage location for the `RecordValue`, we can and
781     // should associate them in the environment.
782     setValue(Loc, RecordVal);
783 
784     return &RecordVal;
785   }
786 
787   return nullptr;
788 }
789 
790 StorageLocation &
791 Environment::createLocAndMaybeValue(QualType Ty,
792                                     llvm::DenseSet<QualType> &Visited,
793                                     int Depth, int &CreatedValuesCount) {
794   if (!Visited.insert(Ty.getCanonicalType()).second)
795     return createStorageLocation(Ty.getNonReferenceType());
796   Value *Val = createValueUnlessSelfReferential(
797       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
798   Visited.erase(Ty.getCanonicalType());
799 
800   Ty = Ty.getNonReferenceType();
801 
802   if (Val == nullptr)
803     return createStorageLocation(Ty);
804 
805   if (Ty->isRecordType())
806     return cast<RecordValue>(Val)->getLoc();
807 
808   StorageLocation &Loc = createStorageLocation(Ty);
809   setValue(Loc, *Val);
810   return Loc;
811 }
812 
813 StorageLocation &Environment::createObjectInternal(const VarDecl *D,
814                                                    QualType Ty,
815                                                    const Expr *InitExpr) {
816   if (Ty->isReferenceType()) {
817     // Although variables of reference type always need to be initialized, it
818     // can happen that we can't see the initializer, so `InitExpr` may still
819     // be null.
820     if (InitExpr) {
821       if (auto *InitExprLoc = getStorageLocation(*InitExpr))
822           return *InitExprLoc;
823     }
824 
825     // Even though we have an initializer, we might not get an
826     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
827     // don't have a function body. In this case, we just invent a storage
828     // location and value -- it's the best we can do.
829     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
830   }
831 
832   Value *Val = nullptr;
833   if (InitExpr)
834     // In the (few) cases where an expression is intentionally
835     // "uninterpreted", `InitExpr` is not associated with a value.  There are
836     // two ways to handle this situation: propagate the status, so that
837     // uninterpreted initializers result in uninterpreted variables, or
838     // provide a default value. We choose the latter so that later refinements
839     // of the variable can be used for reasoning about the surrounding code.
840     // For this reason, we let this case be handled by the `createValue()`
841     // call below.
842     //
843     // FIXME. If and when we interpret all language cases, change this to
844     // assert that `InitExpr` is interpreted, rather than supplying a
845     // default value (assuming we don't update the environment API to return
846     // references).
847     Val = getValue(*InitExpr);
848   if (!Val)
849     Val = createValue(Ty);
850 
851   if (Ty->isRecordType())
852     return cast<RecordValue>(Val)->getLoc();
853 
854   StorageLocation &Loc =
855       D ? createStorageLocation(*D) : createStorageLocation(Ty);
856 
857   if (Val)
858     setValue(Loc, *Val);
859 
860   return Loc;
861 }
862 
863 void Environment::addToFlowCondition(const Formula &Val) {
864   DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
865 }
866 
867 bool Environment::flowConditionImplies(const Formula &Val) const {
868   return DACtx->flowConditionImplies(FlowConditionToken, Val);
869 }
870 
871 void Environment::dump(raw_ostream &OS) const {
872   // FIXME: add printing for remaining fields and allow caller to decide what
873   // fields are printed.
874   OS << "DeclToLoc:\n";
875   for (auto [D, L] : DeclToLoc)
876     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
877 
878   OS << "ExprToLoc:\n";
879   for (auto [E, L] : ExprToLoc)
880     OS << "  [" << E << ", " << L << "]\n";
881 
882   OS << "LocToVal:\n";
883   for (auto [L, V] : LocToVal) {
884     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
885   }
886 
887   OS << "FlowConditionToken:\n";
888   DACtx->dumpFlowCondition(FlowConditionToken, OS);
889 }
890 
891 void Environment::dump() const {
892   dump(llvm::dbgs());
893 }
894 
895 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
896                                                  const Environment &Env) {
897   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
898   if (ImplicitObject == nullptr)
899     return nullptr;
900   if (ImplicitObject->getType()->isPointerType()) {
901     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject)))
902       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
903     return nullptr;
904   }
905   return cast_or_null<RecordStorageLocation>(
906       Env.getStorageLocation(*ImplicitObject));
907 }
908 
909 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
910                                              const Environment &Env) {
911   Expr *Base = ME.getBase();
912   if (Base == nullptr)
913     return nullptr;
914   if (ME.isArrow()) {
915     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base)))
916       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
917     return nullptr;
918   }
919   return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base));
920 }
921 
922 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
923   // Unnamed bitfields are only used for padding and do not appear in
924   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
925   // field list, and we thus need to remove them before mapping inits to
926   // fields to avoid mapping inits to the wrongs fields.
927   std::vector<FieldDecl *> Fields;
928   llvm::copy_if(
929       RD->fields(), std::back_inserter(Fields),
930       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
931   return Fields;
932 }
933 
934 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) {
935   auto &NewVal = Env.create<RecordValue>(Loc);
936   Env.setValue(Loc, NewVal);
937   return NewVal;
938 }
939 
940 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) {
941   assert(Expr.getType()->isRecordType());
942 
943   if (Expr.isPRValue()) {
944     if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) {
945       auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc());
946       Env.setValue(Expr, NewVal);
947       return NewVal;
948     }
949 
950     auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
951     Env.setValue(Expr, NewVal);
952     return NewVal;
953   }
954 
955   if (auto *Loc =
956           cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) {
957     auto &NewVal = Env.create<RecordValue>(*Loc);
958     Env.setValue(*Loc, NewVal);
959     return NewVal;
960   }
961 
962   auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
963   Env.setStorageLocation(Expr, NewVal.getLoc());
964   return NewVal;
965 }
966 
967 } // namespace dataflow
968 } // namespace clang
969