xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 80f0dc3aa4bf2097932fb789904c33d985767ecd)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include <cassert>
27 #include <utility>
28 
29 namespace clang {
30 namespace dataflow {
31 
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
34 // analysis.
35 static constexpr int MaxCompositeValueDepth = 3;
36 static constexpr int MaxCompositeValueSize = 1000;
37 
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 template <typename K, typename V>
40 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
41                                         const llvm::DenseMap<K, V> &Map2) {
42   llvm::DenseMap<K, V> Result;
43   for (auto &Entry : Map1) {
44     auto It = Map2.find(Entry.first);
45     if (It != Map2.end() && Entry.second == It->second)
46       Result.insert({Entry.first, Entry.second});
47   }
48   return Result;
49 }
50 
51 // Whether to consider equivalent two values with an unknown relation.
52 //
53 // FIXME: this function is a hack enabling unsoundness to support
54 // convergence. Once we have widening support for the reference/pointer and
55 // struct built-in models, this should be unconditionally `false` (and inlined
56 // as such at its call sites).
57 static bool equateUnknownValues(Value::Kind K) {
58   switch (K) {
59   case Value::Kind::Integer:
60   case Value::Kind::Pointer:
61   case Value::Kind::Record:
62     return true;
63   default:
64     return false;
65   }
66 }
67 
68 static bool compareDistinctValues(QualType Type, Value &Val1,
69                                   const Environment &Env1, Value &Val2,
70                                   const Environment &Env2,
71                                   Environment::ValueModel &Model) {
72   // Note: Potentially costly, but, for booleans, we could check whether both
73   // can be proven equivalent in their respective environments.
74 
75   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
76   // and implement separate, join/widen specific handling for
77   // reference/pointers.
78   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
79   case ComparisonResult::Same:
80     return true;
81   case ComparisonResult::Different:
82     return false;
83   case ComparisonResult::Unknown:
84     return equateUnknownValues(Val1.getKind());
85   }
86   llvm_unreachable("All cases covered in switch");
87 }
88 
89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
90 /// respectively, of the same type `Type`. Merging generally produces a single
91 /// value that (soundly) approximates the two inputs, although the actual
92 /// meaning depends on `Model`.
93 static Value *mergeDistinctValues(QualType Type, Value &Val1,
94                                   const Environment &Env1, Value &Val2,
95                                   const Environment &Env2,
96                                   Environment &MergedEnv,
97                                   Environment::ValueModel &Model) {
98   // Join distinct boolean values preserving information about the constraints
99   // in the respective path conditions.
100   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
101     // FIXME: Checking both values should be unnecessary, since they should have
102     // a consistent shape.  However, right now we can end up with BoolValue's in
103     // integer-typed variables due to our incorrect handling of
104     // boolean-to-integer casts (we just propagate the BoolValue to the result
105     // of the cast). So, a join can encounter an integer in one branch but a
106     // bool in the other.
107     // For example:
108     // ```
109     // std::optional<bool> o;
110     // int x;
111     // if (o.has_value())
112     //   x = o.value();
113     // ```
114     auto &Expr1 = cast<BoolValue>(Val1).formula();
115     auto &Expr2 = cast<BoolValue>(Val2).formula();
116     auto &A = MergedEnv.arena();
117     auto &MergedVal = A.makeAtomRef(A.makeAtom());
118     MergedEnv.addToFlowCondition(
119         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
120                            A.makeEquals(MergedVal, Expr1)),
121                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
122                            A.makeEquals(MergedVal, Expr2))));
123     return &A.makeBoolValue(MergedVal);
124   }
125 
126   Value *MergedVal = nullptr;
127   if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
128     [[maybe_unused]] auto *RecordVal2 = cast<RecordValue>(&Val2);
129 
130     // Values to be merged are always associated with the same location in
131     // `LocToVal`. The location stored in `RecordVal` should therefore also
132     // be the same.
133     assert(&RecordVal1->getLoc() == &RecordVal2->getLoc());
134 
135     // `RecordVal1` and `RecordVal2` may have different properties associated
136     // with them. Create a new `RecordValue` without any properties so that we
137     // soundly approximate both values. If a particular analysis needs to merge
138     // properties, it should do so in `DataflowAnalysis::merge()`.
139     MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
140   } else {
141     MergedVal = MergedEnv.createValue(Type);
142   }
143 
144   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
145   // returns false to avoid storing unneeded values in `DACtx`.
146   if (MergedVal)
147     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
148       return MergedVal;
149 
150   return nullptr;
151 }
152 
153 // When widening does not change `Current`, return value will equal `&Prev`.
154 static Value &widenDistinctValues(QualType Type, Value &Prev,
155                                   const Environment &PrevEnv, Value &Current,
156                                   Environment &CurrentEnv,
157                                   Environment::ValueModel &Model) {
158   // Boolean-model widening.
159   if (isa<BoolValue>(&Prev)) {
160     assert(isa<BoolValue>(Current));
161     // Widen to Top, because we know they are different values. If previous was
162     // already Top, re-use that to (implicitly) indicate that no change occured.
163     if (isa<TopBoolValue>(Prev))
164       return Prev;
165     return CurrentEnv.makeTopBoolValue();
166   }
167 
168   // FIXME: Add other built-in model widening.
169 
170   // Custom-model widening.
171   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
172     return *W;
173 
174   return equateUnknownValues(Prev.getKind()) ? Prev : Current;
175 }
176 
177 // Returns whether the values in `Map1` and `Map2` compare equal for those
178 // keys that `Map1` and `Map2` have in common.
179 template <typename Key>
180 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1,
181                            const llvm::MapVector<Key, Value *> &Map2,
182                            const Environment &Env1, const Environment &Env2,
183                            Environment::ValueModel &Model) {
184   for (auto &Entry : Map1) {
185     Key K = Entry.first;
186     assert(K != nullptr);
187 
188     Value *Val = Entry.second;
189     assert(Val != nullptr);
190 
191     auto It = Map2.find(K);
192     if (It == Map2.end())
193       continue;
194     assert(It->second != nullptr);
195 
196     if (!areEquivalentValues(*Val, *It->second) &&
197         !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2,
198                                Model))
199       return false;
200   }
201 
202   return true;
203 }
204 
205 // Perform a join on either `LocToVal` or `ExprToVal`. `Key` must be either
206 // `const StorageLocation *` or `const Expr *`.
207 template <typename Key>
208 llvm::MapVector<Key, Value *>
209 joinKeyToValueMap(const llvm::MapVector<Key, Value *> &Map1,
210                   const llvm::MapVector<Key, Value *> &Map2,
211                   const Environment &Env1, const Environment &Env2,
212                   Environment &JoinedEnv, Environment::ValueModel &Model) {
213   llvm::MapVector<Key, Value *> MergedMap;
214   for (auto &Entry : Map1) {
215     Key K = Entry.first;
216     assert(K != nullptr);
217 
218     Value *Val = Entry.second;
219     assert(Val != nullptr);
220 
221     auto It = Map2.find(K);
222     if (It == Map2.end())
223       continue;
224     assert(It->second != nullptr);
225 
226     if (areEquivalentValues(*Val, *It->second)) {
227       MergedMap.insert({K, Val});
228       continue;
229     }
230 
231     if (Value *MergedVal = mergeDistinctValues(
232             K->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
233       MergedMap.insert({K, MergedVal});
234     }
235   }
236 
237   return MergedMap;
238 }
239 
240 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either
241 // `const StorageLocation *` or `const Expr *`.
242 template <typename Key>
243 llvm::MapVector<Key, Value *>
244 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap,
245                    const llvm::MapVector<Key, Value *> &PrevMap,
246                    Environment &CurEnv, const Environment &PrevEnv,
247                    Environment::ValueModel &Model, LatticeJoinEffect &Effect) {
248   llvm::MapVector<Key, Value *> WidenedMap;
249   for (auto &Entry : CurMap) {
250     Key K = Entry.first;
251     assert(K != nullptr);
252 
253     Value *Val = Entry.second;
254     assert(Val != nullptr);
255 
256     auto PrevIt = PrevMap.find(K);
257     if (PrevIt == PrevMap.end())
258       continue;
259     assert(PrevIt->second != nullptr);
260 
261     if (areEquivalentValues(*Val, *PrevIt->second)) {
262       WidenedMap.insert({K, Val});
263       continue;
264     }
265 
266     Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second,
267                                             PrevEnv, *Val, CurEnv, Model);
268     WidenedMap.insert({K, &WidenedVal});
269     if (&WidenedVal != PrevIt->second)
270       Effect = LatticeJoinEffect::Changed;
271   }
272 
273   return WidenedMap;
274 }
275 
276 /// Initializes a global storage value.
277 static void insertIfGlobal(const Decl &D,
278                            llvm::DenseSet<const VarDecl *> &Vars) {
279   if (auto *V = dyn_cast<VarDecl>(&D))
280     if (V->hasGlobalStorage())
281       Vars.insert(V);
282 }
283 
284 static void insertIfFunction(const Decl &D,
285                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
286   if (auto *FD = dyn_cast<FunctionDecl>(&D))
287     Funcs.insert(FD);
288 }
289 
290 static void
291 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
292                          llvm::DenseSet<const VarDecl *> &Vars,
293                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
294   insertIfGlobal(D, Vars);
295   insertIfFunction(D, Funcs);
296   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
297     for (const auto *B : Decomp->bindings())
298       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
299         // FIXME: should we be using `E->getFoundDecl()`?
300         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
301           Fields.insert(FD);
302 }
303 
304 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
305 /// global variables and functions that are declared in or referenced from
306 /// sub-statements.
307 static void
308 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
309                          llvm::DenseSet<const VarDecl *> &Vars,
310                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
311   for (auto *Child : S.children())
312     if (Child != nullptr)
313       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
314   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
315     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
316 
317   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
318     if (DS->isSingleDecl())
319       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
320     else
321       for (auto *D : DS->getDeclGroup())
322         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
323   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
324     insertIfGlobal(*E->getDecl(), Vars);
325     insertIfFunction(*E->getDecl(), Funcs);
326   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
327     // FIXME: should we be using `E->getFoundDecl()`?
328     const ValueDecl *VD = E->getMemberDecl();
329     insertIfGlobal(*VD, Vars);
330     insertIfFunction(*VD, Funcs);
331     if (const auto *FD = dyn_cast<FieldDecl>(VD))
332       Fields.insert(FD);
333   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
334     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
335       for (const auto *FD : getFieldsForInitListExpr(RD))
336         Fields.insert(FD);
337   }
338 }
339 
340 // FIXME: Add support for resetting globals after function calls to enable
341 // the implementation of sound analyses.
342 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
343   assert(FuncDecl->getBody() != nullptr);
344 
345   FieldSet Fields;
346   llvm::DenseSet<const VarDecl *> Vars;
347   llvm::DenseSet<const FunctionDecl *> Funcs;
348 
349   // Look for global variable and field references in the
350   // constructor-initializers.
351   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
352     for (const auto *Init : CtorDecl->inits()) {
353       if (Init->isMemberInitializer()) {
354         Fields.insert(Init->getMember());
355       } else if (Init->isIndirectMemberInitializer()) {
356         for (const auto *I : Init->getIndirectMember()->chain())
357           Fields.insert(cast<FieldDecl>(I));
358       }
359       const Expr *E = Init->getInit();
360       assert(E != nullptr);
361       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
362     }
363     // Add all fields mentioned in default member initializers.
364     for (const FieldDecl *F : CtorDecl->getParent()->fields())
365       if (const auto *I = F->getInClassInitializer())
366           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
367   }
368   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
369 
370   // These have to be added before the lines that follow to ensure that
371   // `create*` work correctly for structs.
372   DACtx->addModeledFields(Fields);
373 
374   for (const VarDecl *D : Vars) {
375     if (getStorageLocation(*D) != nullptr)
376       continue;
377 
378     setStorageLocation(*D, createObject(*D));
379   }
380 
381   for (const FunctionDecl *FD : Funcs) {
382     if (getStorageLocation(*FD) != nullptr)
383       continue;
384     auto &Loc = createStorageLocation(FD->getType());
385     setStorageLocation(*FD, Loc);
386   }
387 }
388 
389 Environment::Environment(DataflowAnalysisContext &DACtx)
390     : DACtx(&DACtx),
391       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
392 
393 Environment Environment::fork() const {
394   Environment Copy(*this);
395   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
396   return Copy;
397 }
398 
399 Environment::Environment(DataflowAnalysisContext &DACtx,
400                          const DeclContext &DeclCtx)
401     : Environment(DACtx) {
402   CallStack.push_back(&DeclCtx);
403 
404   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
405     assert(FuncDecl->getBody() != nullptr);
406 
407     initFieldsGlobalsAndFuncs(FuncDecl);
408 
409     for (const auto *ParamDecl : FuncDecl->parameters()) {
410       assert(ParamDecl != nullptr);
411       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
412     }
413   }
414 
415   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
416     auto *Parent = MethodDecl->getParent();
417     assert(Parent != nullptr);
418     if (Parent->isLambda())
419       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
420 
421     // FIXME: Initialize the ThisPointeeLoc of lambdas too.
422     if (MethodDecl && !MethodDecl->isStatic()) {
423       QualType ThisPointeeType = MethodDecl->getThisObjectType();
424       ThisPointeeLoc =
425           &cast<RecordValue>(createValue(ThisPointeeType))->getLoc();
426     }
427   }
428 }
429 
430 bool Environment::canDescend(unsigned MaxDepth,
431                              const DeclContext *Callee) const {
432   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
433 }
434 
435 Environment Environment::pushCall(const CallExpr *Call) const {
436   Environment Env(*this);
437 
438   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
439     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
440       if (!isa<CXXThisExpr>(Arg))
441           Env.ThisPointeeLoc =
442               cast<RecordStorageLocation>(getStorageLocation(*Arg));
443       // Otherwise (when the argument is `this`), retain the current
444       // environment's `ThisPointeeLoc`.
445     }
446   }
447 
448   Env.pushCallInternal(Call->getDirectCallee(),
449                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
450 
451   return Env;
452 }
453 
454 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
455   Environment Env(*this);
456 
457   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
458 
459   Env.pushCallInternal(Call->getConstructor(),
460                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
461 
462   return Env;
463 }
464 
465 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
466                                    ArrayRef<const Expr *> Args) {
467   // Canonicalize to the definition of the function. This ensures that we're
468   // putting arguments into the same `ParamVarDecl`s` that the callee will later
469   // be retrieving them from.
470   assert(FuncDecl->getDefinition() != nullptr);
471   FuncDecl = FuncDecl->getDefinition();
472 
473   CallStack.push_back(FuncDecl);
474 
475   initFieldsGlobalsAndFuncs(FuncDecl);
476 
477   const auto *ParamIt = FuncDecl->param_begin();
478 
479   // FIXME: Parameters don't always map to arguments 1:1; examples include
480   // overloaded operators implemented as member functions, and parameter packs.
481   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
482     assert(ParamIt != FuncDecl->param_end());
483     const VarDecl *Param = *ParamIt;
484     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
485   }
486 }
487 
488 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
489   // We ignore some entries of `CalleeEnv`:
490   // - `DACtx` because is already the same in both
491   // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or
492   //   `ThisPointeeLoc` because they don't apply to us.
493   // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the
494   //   callee's local scope, so when popping that scope, we do not propagate
495   //   the maps.
496   this->LocToVal = std::move(CalleeEnv.LocToVal);
497   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
498 
499   if (Call->isGLValue()) {
500     if (CalleeEnv.ReturnLoc != nullptr)
501       setStorageLocation(*Call, *CalleeEnv.ReturnLoc);
502   } else if (!Call->getType()->isVoidType()) {
503     if (CalleeEnv.ReturnVal != nullptr)
504       setValue(*Call, *CalleeEnv.ReturnVal);
505   }
506 }
507 
508 void Environment::popCall(const CXXConstructExpr *Call,
509                           const Environment &CalleeEnv) {
510   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
511   this->LocToVal = std::move(CalleeEnv.LocToVal);
512   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
513 
514   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
515     setValue(*Call, *Val);
516   }
517 }
518 
519 bool Environment::equivalentTo(const Environment &Other,
520                                Environment::ValueModel &Model) const {
521   assert(DACtx == Other.DACtx);
522 
523   if (ReturnVal != Other.ReturnVal)
524     return false;
525 
526   if (ReturnLoc != Other.ReturnLoc)
527     return false;
528 
529   if (ThisPointeeLoc != Other.ThisPointeeLoc)
530     return false;
531 
532   if (DeclToLoc != Other.DeclToLoc)
533     return false;
534 
535   if (ExprToLoc != Other.ExprToLoc)
536     return false;
537 
538   if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model))
539     return false;
540 
541   if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model))
542     return false;
543 
544   return true;
545 }
546 
547 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
548                                      Environment::ValueModel &Model) {
549   assert(DACtx == PrevEnv.DACtx);
550   assert(ReturnVal == PrevEnv.ReturnVal);
551   assert(ReturnLoc == PrevEnv.ReturnLoc);
552   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
553   assert(CallStack == PrevEnv.CallStack);
554 
555   auto Effect = LatticeJoinEffect::Unchanged;
556 
557   // By the API, `PrevEnv` is a previous version of the environment for the same
558   // block, so we have some guarantees about its shape. In particular, it will
559   // be the result of a join or widen operation on previous values for this
560   // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that
561   // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain
562   // this property here, we don't need change their current values to widen.
563   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
564   assert(ExprToVal.size() <= PrevEnv.ExprToVal.size());
565   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
566 
567   ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv,
568                                  Model, Effect);
569 
570   LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv,
571                                 Model, Effect);
572   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
573       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
574       ExprToVal.size() != PrevEnv.ExprToVal.size() ||
575       LocToVal.size() != PrevEnv.LocToVal.size())
576     Effect = LatticeJoinEffect::Changed;
577 
578   return Effect;
579 }
580 
581 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
582                               Environment::ValueModel &Model) {
583   assert(EnvA.DACtx == EnvB.DACtx);
584   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
585   assert(EnvA.CallStack == EnvB.CallStack);
586 
587   Environment JoinedEnv(*EnvA.DACtx);
588 
589   JoinedEnv.CallStack = EnvA.CallStack;
590   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
591 
592   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
593     // `ReturnVal` might not always get set -- for example if we have a return
594     // statement of the form `return some_other_func()` and we decide not to
595     // analyze `some_other_func()`.
596     // In this case, we can't say anything about the joined return value -- we
597     // don't simply want to propagate the return value that we do have, because
598     // it might not be the correct one.
599     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
600     JoinedEnv.ReturnVal = nullptr;
601   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
602     JoinedEnv.ReturnVal = EnvA.ReturnVal;
603   } else {
604     assert(!EnvA.CallStack.empty());
605     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
606     // cast.
607     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
608     assert(Func != nullptr);
609     if (Value *MergedVal =
610             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
611                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
612       JoinedEnv.ReturnVal = MergedVal;
613   }
614 
615   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
616     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
617   else
618     JoinedEnv.ReturnLoc = nullptr;
619 
620   // FIXME: Once we're able to remove declarations from `DeclToLoc` when their
621   // lifetime ends, add an assertion that there aren't any entries in
622   // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
623   // different storage locations.
624   JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
625 
626   JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
627 
628   // FIXME: update join to detect backedges and simplify the flow condition
629   // accordingly.
630   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
631       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
632 
633   JoinedEnv.ExprToVal = joinKeyToValueMap(EnvA.ExprToVal, EnvB.ExprToVal, EnvA,
634                                           EnvB, JoinedEnv, Model);
635 
636   JoinedEnv.LocToVal = joinKeyToValueMap(EnvA.LocToVal, EnvB.LocToVal, EnvA,
637                                          EnvB, JoinedEnv, Model);
638 
639   return JoinedEnv;
640 }
641 
642 StorageLocation &Environment::createStorageLocation(QualType Type) {
643   return DACtx->createStorageLocation(Type);
644 }
645 
646 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
647   // Evaluated declarations are always assigned the same storage locations to
648   // ensure that the environment stabilizes across loop iterations. Storage
649   // locations for evaluated declarations are stored in the analysis context.
650   return DACtx->getStableStorageLocation(D);
651 }
652 
653 StorageLocation &Environment::createStorageLocation(const Expr &E) {
654   // Evaluated expressions are always assigned the same storage locations to
655   // ensure that the environment stabilizes across loop iterations. Storage
656   // locations for evaluated expressions are stored in the analysis context.
657   return DACtx->getStableStorageLocation(E);
658 }
659 
660 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
661   assert(!DeclToLoc.contains(&D));
662   DeclToLoc[&D] = &Loc;
663 }
664 
665 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
666   auto It = DeclToLoc.find(&D);
667   if (It == DeclToLoc.end())
668     return nullptr;
669 
670   StorageLocation *Loc = It->second;
671 
672   return Loc;
673 }
674 
675 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
676   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
677   // but we still want to be able to associate a `StorageLocation` with them,
678   // so allow these as an exception.
679   assert(E.isGLValue() ||
680          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
681   setStorageLocationInternal(E, Loc);
682 }
683 
684 StorageLocation *Environment::getStorageLocation(const Expr &E) const {
685   // See comment in `setStorageLocation()`.
686   assert(E.isGLValue() ||
687          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
688   return getStorageLocationInternal(E);
689 }
690 
691 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const {
692   return ThisPointeeLoc;
693 }
694 
695 RecordStorageLocation &
696 Environment::getResultObjectLocation(const Expr &RecordPRValue) {
697   assert(RecordPRValue.getType()->isRecordType());
698   assert(RecordPRValue.isPRValue());
699 
700   if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue))
701     return *cast<RecordStorageLocation>(ExistingLoc);
702   auto &Loc = cast<RecordStorageLocation>(
703       DACtx->getStableStorageLocation(RecordPRValue));
704   setStorageLocationInternal(RecordPRValue, Loc);
705   return Loc;
706 }
707 
708 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
709   return DACtx->getOrCreateNullPointerValue(PointeeType);
710 }
711 
712 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
713   assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc);
714 
715   LocToVal[&Loc] = &Val;
716 }
717 
718 void Environment::setValue(const Expr &E, Value &Val) {
719   assert(E.isPRValue());
720   ExprToVal[&E] = &Val;
721 }
722 
723 Value *Environment::getValue(const StorageLocation &Loc) const {
724   return LocToVal.lookup(&Loc);
725 }
726 
727 Value *Environment::getValue(const ValueDecl &D) const {
728   auto *Loc = getStorageLocation(D);
729   if (Loc == nullptr)
730     return nullptr;
731   return getValue(*Loc);
732 }
733 
734 Value *Environment::getValue(const Expr &E) const {
735   if (E.isPRValue()) {
736     auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E));
737     return It == ExprToVal.end() ? nullptr : It->second;
738   }
739 
740   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
741   if (It == ExprToLoc.end())
742     return nullptr;
743   return getValue(*It->second);
744 }
745 
746 Value *Environment::createValue(QualType Type) {
747   llvm::DenseSet<QualType> Visited;
748   int CreatedValuesCount = 0;
749   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
750                                                 CreatedValuesCount);
751   if (CreatedValuesCount > MaxCompositeValueSize) {
752     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
753                  << '\n';
754   }
755   return Val;
756 }
757 
758 void Environment::setStorageLocationInternal(const Expr &E,
759                                              StorageLocation &Loc) {
760   const Expr &CanonE = ignoreCFGOmittedNodes(E);
761   assert(!ExprToLoc.contains(&CanonE));
762   ExprToLoc[&CanonE] = &Loc;
763 }
764 
765 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const {
766   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
767   return It == ExprToLoc.end() ? nullptr : &*It->second;
768 }
769 
770 Value *Environment::createValueUnlessSelfReferential(
771     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
772     int &CreatedValuesCount) {
773   assert(!Type.isNull());
774   assert(!Type->isReferenceType());
775 
776   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
777   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
778       Depth > MaxCompositeValueDepth)
779     return nullptr;
780 
781   if (Type->isBooleanType()) {
782     CreatedValuesCount++;
783     return &makeAtomicBoolValue();
784   }
785 
786   if (Type->isIntegerType()) {
787     // FIXME: consider instead `return nullptr`, given that we do nothing useful
788     // with integers, and so distinguishing them serves no purpose, but could
789     // prevent convergence.
790     CreatedValuesCount++;
791     return &arena().create<IntegerValue>();
792   }
793 
794   if (Type->isPointerType()) {
795     CreatedValuesCount++;
796     QualType PointeeType = Type->getPointeeType();
797     StorageLocation &PointeeLoc =
798         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
799 
800     return &arena().create<PointerValue>(PointeeLoc);
801   }
802 
803   if (Type->isRecordType()) {
804     CreatedValuesCount++;
805     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
806     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
807       assert(Field != nullptr);
808 
809       QualType FieldType = Field->getType();
810 
811       FieldLocs.insert(
812           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
813                                           CreatedValuesCount)});
814     }
815 
816     RecordStorageLocation &Loc =
817         arena().create<RecordStorageLocation>(Type, std::move(FieldLocs));
818     RecordValue &RecordVal = create<RecordValue>(Loc);
819 
820     // As we already have a storage location for the `RecordValue`, we can and
821     // should associate them in the environment.
822     setValue(Loc, RecordVal);
823 
824     return &RecordVal;
825   }
826 
827   return nullptr;
828 }
829 
830 StorageLocation &
831 Environment::createLocAndMaybeValue(QualType Ty,
832                                     llvm::DenseSet<QualType> &Visited,
833                                     int Depth, int &CreatedValuesCount) {
834   if (!Visited.insert(Ty.getCanonicalType()).second)
835     return createStorageLocation(Ty.getNonReferenceType());
836   Value *Val = createValueUnlessSelfReferential(
837       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
838   Visited.erase(Ty.getCanonicalType());
839 
840   Ty = Ty.getNonReferenceType();
841 
842   if (Val == nullptr)
843     return createStorageLocation(Ty);
844 
845   if (Ty->isRecordType())
846     return cast<RecordValue>(Val)->getLoc();
847 
848   StorageLocation &Loc = createStorageLocation(Ty);
849   setValue(Loc, *Val);
850   return Loc;
851 }
852 
853 StorageLocation &Environment::createObjectInternal(const VarDecl *D,
854                                                    QualType Ty,
855                                                    const Expr *InitExpr) {
856   if (Ty->isReferenceType()) {
857     // Although variables of reference type always need to be initialized, it
858     // can happen that we can't see the initializer, so `InitExpr` may still
859     // be null.
860     if (InitExpr) {
861       if (auto *InitExprLoc = getStorageLocation(*InitExpr))
862           return *InitExprLoc;
863     }
864 
865     // Even though we have an initializer, we might not get an
866     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
867     // don't have a function body. In this case, we just invent a storage
868     // location and value -- it's the best we can do.
869     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
870   }
871 
872   Value *Val = nullptr;
873   if (InitExpr)
874     // In the (few) cases where an expression is intentionally
875     // "uninterpreted", `InitExpr` is not associated with a value.  There are
876     // two ways to handle this situation: propagate the status, so that
877     // uninterpreted initializers result in uninterpreted variables, or
878     // provide a default value. We choose the latter so that later refinements
879     // of the variable can be used for reasoning about the surrounding code.
880     // For this reason, we let this case be handled by the `createValue()`
881     // call below.
882     //
883     // FIXME. If and when we interpret all language cases, change this to
884     // assert that `InitExpr` is interpreted, rather than supplying a
885     // default value (assuming we don't update the environment API to return
886     // references).
887     Val = getValue(*InitExpr);
888   if (!Val)
889     Val = createValue(Ty);
890 
891   if (Ty->isRecordType())
892     return cast<RecordValue>(Val)->getLoc();
893 
894   StorageLocation &Loc =
895       D ? createStorageLocation(*D) : createStorageLocation(Ty);
896 
897   if (Val)
898     setValue(Loc, *Val);
899 
900   return Loc;
901 }
902 
903 void Environment::addToFlowCondition(const Formula &Val) {
904   DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
905 }
906 
907 bool Environment::flowConditionImplies(const Formula &Val) const {
908   return DACtx->flowConditionImplies(FlowConditionToken, Val);
909 }
910 
911 void Environment::dump(raw_ostream &OS) const {
912   // FIXME: add printing for remaining fields and allow caller to decide what
913   // fields are printed.
914   OS << "DeclToLoc:\n";
915   for (auto [D, L] : DeclToLoc)
916     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
917 
918   OS << "ExprToLoc:\n";
919   for (auto [E, L] : ExprToLoc)
920     OS << "  [" << E << ", " << L << "]\n";
921 
922   OS << "ExprToVal:\n";
923   for (auto [E, V] : ExprToVal)
924     OS << "  [" << E << ", " << V << ": " << *V << "]\n";
925 
926   OS << "LocToVal:\n";
927   for (auto [L, V] : LocToVal) {
928     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
929   }
930 
931   OS << "FlowConditionToken:\n";
932   DACtx->dumpFlowCondition(FlowConditionToken, OS);
933 }
934 
935 void Environment::dump() const {
936   dump(llvm::dbgs());
937 }
938 
939 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
940                                                  const Environment &Env) {
941   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
942   if (ImplicitObject == nullptr)
943     return nullptr;
944   if (ImplicitObject->getType()->isPointerType()) {
945     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject)))
946       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
947     return nullptr;
948   }
949   return cast_or_null<RecordStorageLocation>(
950       Env.getStorageLocation(*ImplicitObject));
951 }
952 
953 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
954                                              const Environment &Env) {
955   Expr *Base = ME.getBase();
956   if (Base == nullptr)
957     return nullptr;
958   if (ME.isArrow()) {
959     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base)))
960       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
961     return nullptr;
962   }
963   return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base));
964 }
965 
966 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
967   // Unnamed bitfields are only used for padding and do not appear in
968   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
969   // field list, and we thus need to remove them before mapping inits to
970   // fields to avoid mapping inits to the wrongs fields.
971   std::vector<FieldDecl *> Fields;
972   llvm::copy_if(
973       RD->fields(), std::back_inserter(Fields),
974       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
975   return Fields;
976 }
977 
978 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) {
979   auto &NewVal = Env.create<RecordValue>(Loc);
980   Env.setValue(Loc, NewVal);
981   return NewVal;
982 }
983 
984 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) {
985   assert(Expr.getType()->isRecordType());
986 
987   if (Expr.isPRValue()) {
988     if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) {
989       auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc());
990       Env.setValue(Expr, NewVal);
991       return NewVal;
992     }
993 
994     auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
995     Env.setValue(Expr, NewVal);
996     return NewVal;
997   }
998 
999   if (auto *Loc =
1000           cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) {
1001     auto &NewVal = Env.create<RecordValue>(*Loc);
1002     Env.setValue(*Loc, NewVal);
1003     return NewVal;
1004   }
1005 
1006   auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
1007   Env.setStorageLocation(Expr, NewVal.getLoc());
1008   return NewVal;
1009 }
1010 
1011 } // namespace dataflow
1012 } // namespace clang
1013