xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 0f6cf555674959d0b21769fc1c46e23584561f2a)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 static bool compareDistinctValues(QualType Type, Value &Val1,
54                                   const Environment &Env1, Value &Val2,
55                                   const Environment &Env2,
56                                   Environment::ValueModel &Model) {
57   // Note: Potentially costly, but, for booleans, we could check whether both
58   // can be proven equivalent in their respective environments.
59 
60   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
61   // and implement separate, join/widen specific handling for
62   // reference/pointers.
63   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
64   case ComparisonResult::Same:
65     return true;
66   case ComparisonResult::Different:
67     return false;
68   case ComparisonResult::Unknown:
69     switch (Val1.getKind()) {
70     case Value::Kind::Integer:
71     case Value::Kind::Reference:
72     case Value::Kind::Pointer:
73     case Value::Kind::Struct:
74       // FIXME: this choice intentionally introduces unsoundness to allow
75       // for convergence. Once we have widening support for the
76       // reference/pointer and struct built-in models, this should be
77       // `false`.
78       return true;
79     default:
80       return false;
81     }
82   }
83   llvm_unreachable("All cases covered in switch");
84 }
85 
86 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
87 /// respectively, of the same type `Type`. Merging generally produces a single
88 /// value that (soundly) approximates the two inputs, although the actual
89 /// meaning depends on `Model`.
90 static Value *mergeDistinctValues(QualType Type, Value &Val1,
91                                   const Environment &Env1, Value &Val2,
92                                   const Environment &Env2,
93                                   Environment &MergedEnv,
94                                   Environment::ValueModel &Model) {
95   // Join distinct boolean values preserving information about the constraints
96   // in the respective path conditions.
97   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
98     // FIXME: Checking both values should be unnecessary, since they should have
99     // a consistent shape.  However, right now we can end up with BoolValue's in
100     // integer-typed variables due to our incorrect handling of
101     // boolean-to-integer casts (we just propagate the BoolValue to the result
102     // of the cast). So, a join can encounter an integer in one branch but a
103     // bool in the other.
104     // For example:
105     // ```
106     // std::optional<bool> o;
107     // int x;
108     // if (o.has_value())
109     //   x = o.value();
110     // ```
111     auto &Expr1 = cast<BoolValue>(Val1).formula();
112     auto &Expr2 = cast<BoolValue>(Val2).formula();
113     auto &A = MergedEnv.arena();
114     auto &MergedVal = A.makeAtomRef(A.makeAtom());
115     MergedEnv.addToFlowCondition(
116         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
117                            A.makeEquals(MergedVal, Expr1)),
118                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
119                            A.makeEquals(MergedVal, Expr2))));
120     return &A.makeBoolValue(MergedVal);
121   }
122 
123   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
124   // returns false to avoid storing unneeded values in `DACtx`.
125   // FIXME: Creating the value based on the type alone creates misshapen values
126   // for lvalues, since the type does not reflect the need for `ReferenceValue`.
127   // This issue will be resolved when `ReferenceValue` is eliminated as part
128   // of the ongoing migration to strict handling of value categories (see
129   // https://discourse.llvm.org/t/70086 for details).
130   if (Value *MergedVal = MergedEnv.createValue(Type))
131     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
132       return MergedVal;
133 
134   return nullptr;
135 }
136 
137 // When widening does not change `Current`, return value will equal `&Prev`.
138 static Value &widenDistinctValues(QualType Type, Value &Prev,
139                                   const Environment &PrevEnv, Value &Current,
140                                   Environment &CurrentEnv,
141                                   Environment::ValueModel &Model) {
142   // Boolean-model widening.
143   if (isa<BoolValue>(&Prev)) {
144     assert(isa<BoolValue>(Current));
145     // Widen to Top, because we know they are different values. If previous was
146     // already Top, re-use that to (implicitly) indicate that no change occured.
147     if (isa<TopBoolValue>(Prev))
148       return Prev;
149     return CurrentEnv.makeTopBoolValue();
150   }
151 
152   // FIXME: Add other built-in model widening.
153 
154   // Custom-model widening.
155   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
156     return *W;
157 
158   // Default of widening is a no-op: leave the current value unchanged.
159   return Current;
160 }
161 
162 /// Initializes a global storage value.
163 static void insertIfGlobal(const Decl &D,
164                            llvm::DenseSet<const VarDecl *> &Vars) {
165   if (auto *V = dyn_cast<VarDecl>(&D))
166     if (V->hasGlobalStorage())
167       Vars.insert(V);
168 }
169 
170 static void insertIfFunction(const Decl &D,
171                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
172   if (auto *FD = dyn_cast<FunctionDecl>(&D))
173     Funcs.insert(FD);
174 }
175 
176 static void
177 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
178                          llvm::DenseSet<const VarDecl *> &Vars,
179                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
180   insertIfGlobal(D, Vars);
181   insertIfFunction(D, Funcs);
182   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
183     for (const auto *B : Decomp->bindings())
184       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
185         // FIXME: should we be using `E->getFoundDecl()`?
186         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
187           Fields.insert(FD);
188 }
189 
190 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
191 /// global variables and functions that are declared in or referenced from
192 /// sub-statements.
193 static void
194 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
195                          llvm::DenseSet<const VarDecl *> &Vars,
196                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
197   for (auto *Child : S.children())
198     if (Child != nullptr)
199       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
200   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
201     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
202 
203   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
204     if (DS->isSingleDecl())
205       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
206     else
207       for (auto *D : DS->getDeclGroup())
208         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
209   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
210     insertIfGlobal(*E->getDecl(), Vars);
211     insertIfFunction(*E->getDecl(), Funcs);
212   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
213     // FIXME: should we be using `E->getFoundDecl()`?
214     const ValueDecl *VD = E->getMemberDecl();
215     insertIfGlobal(*VD, Vars);
216     insertIfFunction(*VD, Funcs);
217     if (const auto *FD = dyn_cast<FieldDecl>(VD))
218       Fields.insert(FD);
219   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
220     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
221       for (const auto *FD : getFieldsForInitListExpr(RD))
222         Fields.insert(FD);
223   }
224 }
225 
226 // FIXME: Add support for resetting globals after function calls to enable
227 // the implementation of sound analyses.
228 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
229   assert(FuncDecl->getBody() != nullptr);
230 
231   FieldSet Fields;
232   llvm::DenseSet<const VarDecl *> Vars;
233   llvm::DenseSet<const FunctionDecl *> Funcs;
234 
235   // Look for global variable and field references in the
236   // constructor-initializers.
237   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
238     for (const auto *Init : CtorDecl->inits()) {
239       if (Init->isMemberInitializer()) {
240         Fields.insert(Init->getMember());
241       } else if (Init->isIndirectMemberInitializer()) {
242         for (const auto *I : Init->getIndirectMember()->chain())
243           Fields.insert(cast<FieldDecl>(I));
244       }
245       const Expr *E = Init->getInit();
246       assert(E != nullptr);
247       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
248     }
249     // Add all fields mentioned in default member initializers.
250     for (const FieldDecl *F : CtorDecl->getParent()->fields())
251       if (const auto *I = F->getInClassInitializer())
252           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
253   }
254   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
255 
256   // These have to be added before the lines that follow to ensure that
257   // `create*` work correctly for structs.
258   DACtx->addModeledFields(Fields);
259 
260   for (const VarDecl *D : Vars) {
261     if (getStorageLocation(*D) != nullptr)
262       continue;
263 
264     setStorageLocation(*D, createObject(*D));
265   }
266 
267   for (const FunctionDecl *FD : Funcs) {
268     if (getStorageLocation(*FD) != nullptr)
269       continue;
270     auto &Loc = createStorageLocation(FD->getType());
271     setStorageLocation(*FD, Loc);
272   }
273 }
274 
275 Environment::Environment(DataflowAnalysisContext &DACtx)
276     : DACtx(&DACtx),
277       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
278 
279 Environment Environment::fork() const {
280   Environment Copy(*this);
281   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
282   return Copy;
283 }
284 
285 Environment::Environment(DataflowAnalysisContext &DACtx,
286                          const DeclContext &DeclCtx)
287     : Environment(DACtx) {
288   CallStack.push_back(&DeclCtx);
289 
290   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
291     assert(FuncDecl->getBody() != nullptr);
292 
293     initFieldsGlobalsAndFuncs(FuncDecl);
294 
295     for (const auto *ParamDecl : FuncDecl->parameters()) {
296       assert(ParamDecl != nullptr);
297       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
298     }
299   }
300 
301   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
302     auto *Parent = MethodDecl->getParent();
303     assert(Parent != nullptr);
304     if (Parent->isLambda())
305       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
306 
307     // FIXME: Initialize the ThisPointeeLoc of lambdas too.
308     if (MethodDecl && !MethodDecl->isStatic()) {
309       QualType ThisPointeeType = MethodDecl->getThisObjectType();
310       ThisPointeeLoc = &cast<AggregateStorageLocation>(
311           createStorageLocation(ThisPointeeType));
312       if (Value *ThisPointeeVal = createValue(ThisPointeeType))
313         setValue(*ThisPointeeLoc, *ThisPointeeVal);
314     }
315   }
316 }
317 
318 bool Environment::canDescend(unsigned MaxDepth,
319                              const DeclContext *Callee) const {
320   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
321 }
322 
323 Environment Environment::pushCall(const CallExpr *Call) const {
324   Environment Env(*this);
325 
326   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
327     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
328       if (!isa<CXXThisExpr>(Arg))
329         Env.ThisPointeeLoc = cast<AggregateStorageLocation>(
330             getStorageLocation(*Arg, SkipPast::Reference));
331       // Otherwise (when the argument is `this`), retain the current
332       // environment's `ThisPointeeLoc`.
333     }
334   }
335 
336   Env.pushCallInternal(Call->getDirectCallee(),
337                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
338 
339   return Env;
340 }
341 
342 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
343   Environment Env(*this);
344 
345   Env.ThisPointeeLoc = &cast<AggregateStorageLocation>(
346       Env.createStorageLocation(Call->getType()));
347   if (Value *Val = Env.createValue(Call->getType()))
348     Env.setValue(*Env.ThisPointeeLoc, *Val);
349 
350   Env.pushCallInternal(Call->getConstructor(),
351                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
352 
353   return Env;
354 }
355 
356 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
357                                    ArrayRef<const Expr *> Args) {
358   // Canonicalize to the definition of the function. This ensures that we're
359   // putting arguments into the same `ParamVarDecl`s` that the callee will later
360   // be retrieving them from.
361   assert(FuncDecl->getDefinition() != nullptr);
362   FuncDecl = FuncDecl->getDefinition();
363 
364   CallStack.push_back(FuncDecl);
365 
366   initFieldsGlobalsAndFuncs(FuncDecl);
367 
368   const auto *ParamIt = FuncDecl->param_begin();
369 
370   // FIXME: Parameters don't always map to arguments 1:1; examples include
371   // overloaded operators implemented as member functions, and parameter packs.
372   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
373     assert(ParamIt != FuncDecl->param_end());
374     const VarDecl *Param = *ParamIt;
375     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
376   }
377 }
378 
379 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
380   // We ignore `DACtx` because it's already the same in both. We don't want the
381   // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't
382   // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later
383   // analyze the same callee in a different context, and `setStorageLocation`
384   // requires there to not already be a storage location assigned. Conceptually,
385   // these maps capture information from the local scope, so when popping that
386   // scope, we do not propagate the maps.
387   this->LocToVal = std::move(CalleeEnv.LocToVal);
388   this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct);
389   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
390 
391   if (Call->isGLValue()) {
392     if (CalleeEnv.ReturnLoc != nullptr)
393       setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc);
394   } else if (!Call->getType()->isVoidType()) {
395     if (CalleeEnv.ReturnVal != nullptr)
396       setValueStrict(*Call, *CalleeEnv.ReturnVal);
397   }
398 }
399 
400 void Environment::popCall(const CXXConstructExpr *Call,
401                           const Environment &CalleeEnv) {
402   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
403   this->LocToVal = std::move(CalleeEnv.LocToVal);
404   this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct);
405   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
406 
407   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
408     setValueStrict(*Call, *Val);
409   }
410 }
411 
412 bool Environment::equivalentTo(const Environment &Other,
413                                Environment::ValueModel &Model) const {
414   assert(DACtx == Other.DACtx);
415 
416   if (ReturnVal != Other.ReturnVal)
417     return false;
418 
419   if (ReturnLoc != Other.ReturnLoc)
420     return false;
421 
422   if (ThisPointeeLoc != Other.ThisPointeeLoc)
423     return false;
424 
425   if (DeclToLoc != Other.DeclToLoc)
426     return false;
427 
428   if (ExprToLoc != Other.ExprToLoc)
429     return false;
430 
431   // Compare the contents for the intersection of their domains.
432   for (auto &Entry : LocToVal) {
433     const StorageLocation *Loc = Entry.first;
434     assert(Loc != nullptr);
435 
436     Value *Val = Entry.second;
437     assert(Val != nullptr);
438 
439     auto It = Other.LocToVal.find(Loc);
440     if (It == Other.LocToVal.end())
441       continue;
442     assert(It->second != nullptr);
443 
444     if (!areEquivalentValues(*Val, *It->second) &&
445         !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other,
446                                Model))
447       return false;
448   }
449 
450   return true;
451 }
452 
453 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
454                                      Environment::ValueModel &Model) {
455   assert(DACtx == PrevEnv.DACtx);
456   assert(ReturnVal == PrevEnv.ReturnVal);
457   assert(ReturnLoc == PrevEnv.ReturnLoc);
458   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
459   assert(CallStack == PrevEnv.CallStack);
460 
461   auto Effect = LatticeJoinEffect::Unchanged;
462 
463   // By the API, `PrevEnv` is a previous version of the environment for the same
464   // block, so we have some guarantees about its shape. In particular, it will
465   // be the result of a join or widen operation on previous values for this
466   // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are
467   // subsets of the maps in `PrevEnv`. So, as long as we maintain this property
468   // here, we don't need change their current values to widen.
469   //
470   // FIXME: `MemberLocToStruct` does not share the above property, because
471   // `join` can cause the map size to increase (when we add fresh data in places
472   // of conflict). Once this issue with join is resolved, re-enable the
473   // assertion below or replace with something that captures the desired
474   // invariant.
475   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
476   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
477   // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size());
478 
479   llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal;
480   for (auto &Entry : LocToVal) {
481     const StorageLocation *Loc = Entry.first;
482     assert(Loc != nullptr);
483 
484     Value *Val = Entry.second;
485     assert(Val != nullptr);
486 
487     auto PrevIt = PrevEnv.LocToVal.find(Loc);
488     if (PrevIt == PrevEnv.LocToVal.end())
489       continue;
490     assert(PrevIt->second != nullptr);
491 
492     if (areEquivalentValues(*Val, *PrevIt->second)) {
493       WidenedLocToVal.insert({Loc, Val});
494       continue;
495     }
496 
497     Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second,
498                                             PrevEnv, *Val, *this, Model);
499     WidenedLocToVal.insert({Loc, &WidenedVal});
500     if (&WidenedVal != PrevIt->second)
501       Effect = LatticeJoinEffect::Changed;
502   }
503   LocToVal = std::move(WidenedLocToVal);
504   // FIXME: update the equivalence calculation for `MemberLocToStruct`, once we
505   // have a systematic way of soundly comparing this map.
506   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
507       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
508       LocToVal.size() != PrevEnv.LocToVal.size() ||
509       MemberLocToStruct.size() != PrevEnv.MemberLocToStruct.size())
510     Effect = LatticeJoinEffect::Changed;
511 
512   return Effect;
513 }
514 
515 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
516                               Environment::ValueModel &Model) {
517   assert(EnvA.DACtx == EnvB.DACtx);
518   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
519   assert(EnvA.CallStack == EnvB.CallStack);
520 
521   Environment JoinedEnv(*EnvA.DACtx);
522 
523   JoinedEnv.CallStack = EnvA.CallStack;
524   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
525 
526   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
527     // `ReturnVal` might not always get set -- for example if we have a return
528     // statement of the form `return some_other_func()` and we decide not to
529     // analyze `some_other_func()`.
530     // In this case, we can't say anything about the joined return value -- we
531     // don't simply want to propagate the return value that we do have, because
532     // it might not be the correct one.
533     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
534     JoinedEnv.ReturnVal = nullptr;
535   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
536     JoinedEnv.ReturnVal = EnvA.ReturnVal;
537   } else {
538     assert(!EnvA.CallStack.empty());
539     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
540     // cast.
541     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
542     assert(Func != nullptr);
543     if (Value *MergedVal =
544             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
545                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
546       JoinedEnv.ReturnVal = MergedVal;
547   }
548 
549   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
550     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
551   else
552     JoinedEnv.ReturnLoc = nullptr;
553 
554   // FIXME: Once we're able to remove declarations from `DeclToLoc` when their
555   // lifetime ends, add an assertion that there aren't any entries in
556   // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
557   // different storage locations.
558   JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
559 
560   JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
561 
562   JoinedEnv.MemberLocToStruct =
563       intersectDenseMaps(EnvA.MemberLocToStruct, EnvB.MemberLocToStruct);
564 
565   // FIXME: update join to detect backedges and simplify the flow condition
566   // accordingly.
567   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
568       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
569 
570   for (auto &Entry : EnvA.LocToVal) {
571     const StorageLocation *Loc = Entry.first;
572     assert(Loc != nullptr);
573 
574     Value *Val = Entry.second;
575     assert(Val != nullptr);
576 
577     auto It = EnvB.LocToVal.find(Loc);
578     if (It == EnvB.LocToVal.end())
579       continue;
580     assert(It->second != nullptr);
581 
582     if (areEquivalentValues(*Val, *It->second)) {
583       JoinedEnv.LocToVal.insert({Loc, Val});
584       continue;
585     }
586 
587     if (Value *MergedVal = mergeDistinctValues(
588             Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) {
589       JoinedEnv.LocToVal.insert({Loc, MergedVal});
590     }
591   }
592 
593   return JoinedEnv;
594 }
595 
596 StorageLocation &Environment::createStorageLocation(QualType Type) {
597   return DACtx->createStorageLocation(Type);
598 }
599 
600 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
601   // Evaluated declarations are always assigned the same storage locations to
602   // ensure that the environment stabilizes across loop iterations. Storage
603   // locations for evaluated declarations are stored in the analysis context.
604   return DACtx->getStableStorageLocation(D);
605 }
606 
607 StorageLocation &Environment::createStorageLocation(const Expr &E) {
608   // Evaluated expressions are always assigned the same storage locations to
609   // ensure that the environment stabilizes across loop iterations. Storage
610   // locations for evaluated expressions are stored in the analysis context.
611   return DACtx->getStableStorageLocation(E);
612 }
613 
614 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
615   assert(!DeclToLoc.contains(&D));
616   assert(!isa_and_nonnull<ReferenceValue>(getValue(Loc)));
617   DeclToLoc[&D] = &Loc;
618 }
619 
620 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
621   auto It = DeclToLoc.find(&D);
622   if (It == DeclToLoc.end())
623     return nullptr;
624 
625   StorageLocation *Loc = It->second;
626 
627   assert(!isa_and_nonnull<ReferenceValue>(getValue(*Loc)));
628 
629   return Loc;
630 }
631 
632 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
633   const Expr &CanonE = ignoreCFGOmittedNodes(E);
634   assert(!ExprToLoc.contains(&CanonE));
635   ExprToLoc[&CanonE] = &Loc;
636 }
637 
638 void Environment::setStorageLocationStrict(const Expr &E,
639                                            StorageLocation &Loc) {
640   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
641   // but we still want to be able to associate a `StorageLocation` with them,
642   // so allow these as an exception.
643   assert(E.isGLValue() ||
644          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
645   setStorageLocation(E, Loc);
646 }
647 
648 StorageLocation *Environment::getStorageLocation(const Expr &E,
649                                                  SkipPast SP) const {
650   // FIXME: Add a test with parens.
651   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
652   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
653 }
654 
655 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const {
656   // See comment in `setStorageLocationStrict()`.
657   assert(E.isGLValue() ||
658          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
659   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
660 
661   if (Loc == nullptr)
662     return nullptr;
663 
664   if (auto *RefVal = dyn_cast_or_null<ReferenceValue>(getValue(*Loc)))
665     return &RefVal->getReferentLoc();
666 
667   return Loc;
668 }
669 
670 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const {
671   return ThisPointeeLoc;
672 }
673 
674 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
675   return DACtx->getOrCreateNullPointerValue(PointeeType);
676 }
677 
678 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
679   LocToVal[&Loc] = &Val;
680 
681   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
682     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
683 
684     const QualType Type = AggregateLoc.getType();
685     assert(Type->isRecordType());
686 
687     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
688       assert(Field != nullptr);
689       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
690       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
691       if (auto *FieldVal = StructVal->getChild(*Field))
692         setValue(FieldLoc, *FieldVal);
693     }
694   }
695 
696   auto It = MemberLocToStruct.find(&Loc);
697   if (It != MemberLocToStruct.end()) {
698     // `Loc` is the location of a struct member so we need to also update the
699     // value of the member in the corresponding `StructValue`.
700 
701     assert(It->second.first != nullptr);
702     StructValue &StructVal = *It->second.first;
703 
704     assert(It->second.second != nullptr);
705     const ValueDecl &Member = *It->second.second;
706 
707     StructVal.setChild(Member, Val);
708   }
709 }
710 
711 void Environment::clearValue(const StorageLocation &Loc) {
712   LocToVal.erase(&Loc);
713 
714   if (auto It = MemberLocToStruct.find(&Loc); It != MemberLocToStruct.end()) {
715     // `Loc` is the location of a struct member so we need to also clear the
716     // member in the corresponding `StructValue`.
717 
718     assert(It->second.first != nullptr);
719     StructValue &StructVal = *It->second.first;
720 
721     assert(It->second.second != nullptr);
722     const ValueDecl &Member = *It->second.second;
723 
724     StructVal.clearChild(Member);
725   }
726 }
727 
728 void Environment::setValueStrict(const Expr &E, Value &Val) {
729   assert(E.isPRValue());
730   assert(!isa<ReferenceValue>(Val));
731 
732   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
733   if (Loc == nullptr) {
734     Loc = &createStorageLocation(E);
735     setStorageLocation(E, *Loc);
736   }
737   setValue(*Loc, Val);
738 }
739 
740 Value *Environment::getValue(const StorageLocation &Loc) const {
741   return LocToVal.lookup(&Loc);
742 }
743 
744 Value *Environment::getValue(const ValueDecl &D) const {
745   auto *Loc = getStorageLocation(D);
746   if (Loc == nullptr)
747     return nullptr;
748   return getValue(*Loc);
749 }
750 
751 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
752   auto *Loc = getStorageLocation(E, SP);
753   if (Loc == nullptr)
754     return nullptr;
755   return getValue(*Loc);
756 }
757 
758 Value *Environment::getValueStrict(const Expr &E) const {
759   assert(E.isPRValue());
760   Value *Val = getValue(E, SkipPast::None);
761 
762   assert(Val == nullptr || !isa<ReferenceValue>(Val));
763 
764   return Val;
765 }
766 
767 Value *Environment::createValue(QualType Type) {
768   llvm::DenseSet<QualType> Visited;
769   int CreatedValuesCount = 0;
770   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
771                                                 CreatedValuesCount);
772   if (CreatedValuesCount > MaxCompositeValueSize) {
773     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
774                  << '\n';
775   }
776   return Val;
777 }
778 
779 Value *Environment::createValueUnlessSelfReferential(
780     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
781     int &CreatedValuesCount) {
782   assert(!Type.isNull());
783 
784   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
785   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
786       Depth > MaxCompositeValueDepth)
787     return nullptr;
788 
789   if (Type->isBooleanType()) {
790     CreatedValuesCount++;
791     return &makeAtomicBoolValue();
792   }
793 
794   if (Type->isIntegerType()) {
795     // FIXME: consider instead `return nullptr`, given that we do nothing useful
796     // with integers, and so distinguishing them serves no purpose, but could
797     // prevent convergence.
798     CreatedValuesCount++;
799     return &arena().create<IntegerValue>();
800   }
801 
802   if (Type->isReferenceType() || Type->isPointerType()) {
803     CreatedValuesCount++;
804     QualType PointeeType = Type->getPointeeType();
805     auto &PointeeLoc = createStorageLocation(PointeeType);
806 
807     if (Visited.insert(PointeeType.getCanonicalType()).second) {
808       Value *PointeeVal = createValueUnlessSelfReferential(
809           PointeeType, Visited, Depth, CreatedValuesCount);
810       Visited.erase(PointeeType.getCanonicalType());
811 
812       if (PointeeVal != nullptr)
813         setValue(PointeeLoc, *PointeeVal);
814     }
815 
816     if (Type->isReferenceType())
817       return &arena().create<ReferenceValue>(PointeeLoc);
818     else
819       return &arena().create<PointerValue>(PointeeLoc);
820   }
821 
822   if (Type->isRecordType()) {
823     CreatedValuesCount++;
824     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
825     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
826       assert(Field != nullptr);
827 
828       QualType FieldType = Field->getType();
829       if (Visited.contains(FieldType.getCanonicalType()))
830         continue;
831 
832       Visited.insert(FieldType.getCanonicalType());
833       if (auto *FieldValue = createValueUnlessSelfReferential(
834               FieldType, Visited, Depth + 1, CreatedValuesCount))
835         FieldValues.insert({Field, FieldValue});
836       Visited.erase(FieldType.getCanonicalType());
837     }
838 
839     return &arena().create<StructValue>(std::move(FieldValues));
840   }
841 
842   return nullptr;
843 }
844 
845 StorageLocation &Environment::createObjectInternal(const VarDecl *D,
846                                                    QualType Ty,
847                                                    const Expr *InitExpr) {
848   if (Ty->isReferenceType()) {
849     // Although variables of reference type always need to be initialized, it
850     // can happen that we can't see the initializer, so `InitExpr` may still
851     // be null.
852     if (InitExpr) {
853       if (auto *InitExprLoc =
854               getStorageLocation(*InitExpr, SkipPast::Reference))
855         return *InitExprLoc;
856     }
857 
858     // Even though we have an initializer, we might not get an
859     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
860     // don't have a function body. In this case, we just invent a storage
861     // location and value -- it's the best we can do.
862     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
863   }
864 
865   Value *Val = nullptr;
866   if (InitExpr)
867     // In the (few) cases where an expression is intentionally
868     // "uninterpreted", `InitExpr` is not associated with a value.  There are
869     // two ways to handle this situation: propagate the status, so that
870     // uninterpreted initializers result in uninterpreted variables, or
871     // provide a default value. We choose the latter so that later refinements
872     // of the variable can be used for reasoning about the surrounding code.
873     // For this reason, we let this case be handled by the `createValue()`
874     // call below.
875     //
876     // FIXME. If and when we interpret all language cases, change this to
877     // assert that `InitExpr` is interpreted, rather than supplying a
878     // default value (assuming we don't update the environment API to return
879     // references).
880     Val = getValueStrict(*InitExpr);
881   if (!Val)
882     Val = createValue(Ty);
883 
884   StorageLocation &Loc =
885       D ? createStorageLocation(*D) : createStorageLocation(Ty);
886 
887   if (Val)
888     setValue(Loc, *Val);
889 
890   return Loc;
891 }
892 
893 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
894   switch (SP) {
895   case SkipPast::None:
896     return Loc;
897   case SkipPast::Reference:
898     // References cannot be chained so we only need to skip past one level of
899     // indirection.
900     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
901       return Val->getReferentLoc();
902     return Loc;
903   }
904   llvm_unreachable("bad SkipPast kind");
905 }
906 
907 const StorageLocation &Environment::skip(const StorageLocation &Loc,
908                                          SkipPast SP) const {
909   return skip(*const_cast<StorageLocation *>(&Loc), SP);
910 }
911 
912 void Environment::addToFlowCondition(const Formula &Val) {
913   DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
914 }
915 
916 bool Environment::flowConditionImplies(const Formula &Val) const {
917   return DACtx->flowConditionImplies(FlowConditionToken, Val);
918 }
919 
920 void Environment::dump(raw_ostream &OS) const {
921   // FIXME: add printing for remaining fields and allow caller to decide what
922   // fields are printed.
923   OS << "DeclToLoc:\n";
924   for (auto [D, L] : DeclToLoc)
925     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
926 
927   OS << "ExprToLoc:\n";
928   for (auto [E, L] : ExprToLoc)
929     OS << "  [" << E << ", " << L << "]\n";
930 
931   OS << "LocToVal:\n";
932   for (auto [L, V] : LocToVal) {
933     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
934   }
935 
936   OS << "FlowConditionToken:\n";
937   DACtx->dumpFlowCondition(FlowConditionToken, OS);
938 }
939 
940 void Environment::dump() const {
941   dump(llvm::dbgs());
942 }
943 
944 AggregateStorageLocation *
945 getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
946                           const Environment &Env) {
947   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
948   if (ImplicitObject == nullptr)
949     return nullptr;
950   StorageLocation *Loc =
951       Env.getStorageLocation(*ImplicitObject, SkipPast::Reference);
952   if (Loc == nullptr)
953     return nullptr;
954   if (ImplicitObject->getType()->isPointerType()) {
955     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc)))
956       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
957     return nullptr;
958   }
959   return cast<AggregateStorageLocation>(Loc);
960 }
961 
962 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
963                                                 const Environment &Env) {
964   Expr *Base = ME.getBase();
965   if (Base == nullptr)
966     return nullptr;
967   StorageLocation *Loc = Env.getStorageLocation(*Base, SkipPast::Reference);
968   if (Loc == nullptr)
969     return nullptr;
970   if (ME.isArrow()) {
971     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc)))
972       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
973     return nullptr;
974   }
975   return cast<AggregateStorageLocation>(Loc);
976 }
977 
978 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
979   // Unnamed bitfields are only used for padding and do not appear in
980   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
981   // field list, and we thus need to remove them before mapping inits to
982   // fields to avoid mapping inits to the wrongs fields.
983   std::vector<FieldDecl *> Fields;
984   llvm::copy_if(
985       RD->fields(), std::back_inserter(Fields),
986       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
987   return Fields;
988 }
989 
990 StructValue &refreshStructValue(AggregateStorageLocation &Loc,
991                                 Environment &Env) {
992   auto &NewVal = *cast<StructValue>(Env.createValue(Loc.getType()));
993   Env.setValue(Loc, NewVal);
994   return NewVal;
995 }
996 
997 StructValue &refreshStructValue(const Expr &Expr, Environment &Env) {
998   assert(Expr.getType()->isRecordType());
999 
1000   auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType()));
1001 
1002   if (Expr.isPRValue()) {
1003     Env.setValueStrict(Expr, NewVal);
1004   } else {
1005     StorageLocation *Loc = Env.getStorageLocationStrict(Expr);
1006     if (Loc == nullptr) {
1007       Loc = &Env.createStorageLocation(Expr);
1008       Env.setStorageLocation(Expr, *Loc);
1009     }
1010     Env.setValue(*Loc, NewVal);
1011   }
1012 
1013   return NewVal;
1014 }
1015 
1016 } // namespace dataflow
1017 } // namespace clang
1018