xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision c4c59192e6e913dcb0c68ad1632efb277d329cc7)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include <cassert>
27 #include <utility>
28 
29 namespace clang {
30 namespace dataflow {
31 
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
34 // analysis.
35 static constexpr int MaxCompositeValueDepth = 3;
36 static constexpr int MaxCompositeValueSize = 1000;
37 
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc(
40     const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1,
41     const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) {
42   llvm::DenseMap<const ValueDecl *, StorageLocation *> Result;
43   for (auto &Entry : DeclToLoc1) {
44     auto It = DeclToLoc2.find(Entry.first);
45     if (It != DeclToLoc2.end() && Entry.second == It->second)
46       Result.insert({Entry.first, Entry.second});
47   }
48   return Result;
49 }
50 
51 // Whether to consider equivalent two values with an unknown relation.
52 //
53 // FIXME: this function is a hack enabling unsoundness to support
54 // convergence. Once we have widening support for the reference/pointer and
55 // struct built-in models, this should be unconditionally `false` (and inlined
56 // as such at its call sites).
57 static bool equateUnknownValues(Value::Kind K) {
58   switch (K) {
59   case Value::Kind::Integer:
60   case Value::Kind::Pointer:
61   case Value::Kind::Record:
62     return true;
63   default:
64     return false;
65   }
66 }
67 
68 static bool compareDistinctValues(QualType Type, Value &Val1,
69                                   const Environment &Env1, Value &Val2,
70                                   const Environment &Env2,
71                                   Environment::ValueModel &Model) {
72   // Note: Potentially costly, but, for booleans, we could check whether both
73   // can be proven equivalent in their respective environments.
74 
75   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
76   // and implement separate, join/widen specific handling for
77   // reference/pointers.
78   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
79   case ComparisonResult::Same:
80     return true;
81   case ComparisonResult::Different:
82     return false;
83   case ComparisonResult::Unknown:
84     return equateUnknownValues(Val1.getKind());
85   }
86   llvm_unreachable("All cases covered in switch");
87 }
88 
89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
90 /// respectively, of the same type `Type`. Merging generally produces a single
91 /// value that (soundly) approximates the two inputs, although the actual
92 /// meaning depends on `Model`.
93 static Value *mergeDistinctValues(QualType Type, Value &Val1,
94                                   const Environment &Env1, Value &Val2,
95                                   const Environment &Env2,
96                                   Environment &MergedEnv,
97                                   Environment::ValueModel &Model) {
98   // Join distinct boolean values preserving information about the constraints
99   // in the respective path conditions.
100   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
101     // FIXME: Checking both values should be unnecessary, since they should have
102     // a consistent shape.  However, right now we can end up with BoolValue's in
103     // integer-typed variables due to our incorrect handling of
104     // boolean-to-integer casts (we just propagate the BoolValue to the result
105     // of the cast). So, a join can encounter an integer in one branch but a
106     // bool in the other.
107     // For example:
108     // ```
109     // std::optional<bool> o;
110     // int x;
111     // if (o.has_value())
112     //   x = o.value();
113     // ```
114     auto &Expr1 = cast<BoolValue>(Val1).formula();
115     auto &Expr2 = cast<BoolValue>(Val2).formula();
116     auto &A = MergedEnv.arena();
117     auto &MergedVal = A.makeAtomRef(A.makeAtom());
118     MergedEnv.assume(
119         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
120                            A.makeEquals(MergedVal, Expr1)),
121                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
122                            A.makeEquals(MergedVal, Expr2))));
123     return &A.makeBoolValue(MergedVal);
124   }
125 
126   Value *MergedVal = nullptr;
127   if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
128     auto *RecordVal2 = cast<RecordValue>(&Val2);
129 
130     if (&RecordVal1->getLoc() == &RecordVal2->getLoc())
131       // `RecordVal1` and `RecordVal2` may have different properties associated
132       // with them. Create a new `RecordValue` with the same location but
133       // without any properties so that we soundly approximate both values. If a
134       // particular analysis needs to merge properties, it should do so in
135       // `DataflowAnalysis::merge()`.
136       MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
137     else
138       // If the locations for the two records are different, need to create a
139       // completely new value.
140       MergedVal = MergedEnv.createValue(Type);
141   } else {
142     MergedVal = MergedEnv.createValue(Type);
143   }
144 
145   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
146   // returns false to avoid storing unneeded values in `DACtx`.
147   if (MergedVal)
148     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
149       return MergedVal;
150 
151   return nullptr;
152 }
153 
154 // When widening does not change `Current`, return value will equal `&Prev`.
155 static Value &widenDistinctValues(QualType Type, Value &Prev,
156                                   const Environment &PrevEnv, Value &Current,
157                                   Environment &CurrentEnv,
158                                   Environment::ValueModel &Model) {
159   // Boolean-model widening.
160   if (isa<BoolValue>(&Prev)) {
161     assert(isa<BoolValue>(Current));
162     // Widen to Top, because we know they are different values. If previous was
163     // already Top, re-use that to (implicitly) indicate that no change occured.
164     if (isa<TopBoolValue>(Prev))
165       return Prev;
166     return CurrentEnv.makeTopBoolValue();
167   }
168 
169   // FIXME: Add other built-in model widening.
170 
171   // Custom-model widening.
172   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
173     return *W;
174 
175   return equateUnknownValues(Prev.getKind()) ? Prev : Current;
176 }
177 
178 // Returns whether the values in `Map1` and `Map2` compare equal for those
179 // keys that `Map1` and `Map2` have in common.
180 template <typename Key>
181 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1,
182                            const llvm::MapVector<Key, Value *> &Map2,
183                            const Environment &Env1, const Environment &Env2,
184                            Environment::ValueModel &Model) {
185   for (auto &Entry : Map1) {
186     Key K = Entry.first;
187     assert(K != nullptr);
188 
189     Value *Val = Entry.second;
190     assert(Val != nullptr);
191 
192     auto It = Map2.find(K);
193     if (It == Map2.end())
194       continue;
195     assert(It->second != nullptr);
196 
197     if (!areEquivalentValues(*Val, *It->second) &&
198         !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2,
199                                Model))
200       return false;
201   }
202 
203   return true;
204 }
205 
206 // Perform a join on two `LocToVal` maps.
207 static llvm::MapVector<const StorageLocation *, Value *>
208 joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
209              const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2,
210              const Environment &Env1, const Environment &Env2,
211              Environment &JoinedEnv, Environment::ValueModel &Model) {
212   llvm::MapVector<const StorageLocation *, Value *> Result;
213   for (auto &Entry : LocToVal) {
214     const StorageLocation *Loc = Entry.first;
215     assert(Loc != nullptr);
216 
217     Value *Val = Entry.second;
218     assert(Val != nullptr);
219 
220     auto It = LocToVal2.find(Loc);
221     if (It == LocToVal2.end())
222       continue;
223     assert(It->second != nullptr);
224 
225     if (areEquivalentValues(*Val, *It->second)) {
226       Result.insert({Loc, Val});
227       continue;
228     }
229 
230     if (Value *MergedVal = mergeDistinctValues(
231             Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
232       Result.insert({Loc, MergedVal});
233     }
234   }
235 
236   return Result;
237 }
238 
239 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either
240 // `const StorageLocation *` or `const Expr *`.
241 template <typename Key>
242 llvm::MapVector<Key, Value *>
243 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap,
244                    const llvm::MapVector<Key, Value *> &PrevMap,
245                    Environment &CurEnv, const Environment &PrevEnv,
246                    Environment::ValueModel &Model, LatticeJoinEffect &Effect) {
247   llvm::MapVector<Key, Value *> WidenedMap;
248   for (auto &Entry : CurMap) {
249     Key K = Entry.first;
250     assert(K != nullptr);
251 
252     Value *Val = Entry.second;
253     assert(Val != nullptr);
254 
255     auto PrevIt = PrevMap.find(K);
256     if (PrevIt == PrevMap.end())
257       continue;
258     assert(PrevIt->second != nullptr);
259 
260     if (areEquivalentValues(*Val, *PrevIt->second)) {
261       WidenedMap.insert({K, Val});
262       continue;
263     }
264 
265     Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second,
266                                             PrevEnv, *Val, CurEnv, Model);
267     WidenedMap.insert({K, &WidenedVal});
268     if (&WidenedVal != PrevIt->second)
269       Effect = LatticeJoinEffect::Changed;
270   }
271 
272   return WidenedMap;
273 }
274 
275 /// Initializes a global storage value.
276 static void insertIfGlobal(const Decl &D,
277                            llvm::DenseSet<const VarDecl *> &Vars) {
278   if (auto *V = dyn_cast<VarDecl>(&D))
279     if (V->hasGlobalStorage())
280       Vars.insert(V);
281 }
282 
283 static void insertIfFunction(const Decl &D,
284                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
285   if (auto *FD = dyn_cast<FunctionDecl>(&D))
286     Funcs.insert(FD);
287 }
288 
289 static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) {
290   if (!C.getMethodDecl())
291     return nullptr;
292   auto *Body = dyn_cast_or_null<CompoundStmt>(C.getMethodDecl()->getBody());
293   if (!Body || Body->size() != 1)
294     return nullptr;
295   if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin()))
296     if (auto *Return = RS->getRetValue())
297       return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts());
298   return nullptr;
299 }
300 
301 static void
302 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
303                          llvm::DenseSet<const VarDecl *> &Vars,
304                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
305   insertIfGlobal(D, Vars);
306   insertIfFunction(D, Funcs);
307   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
308     for (const auto *B : Decomp->bindings())
309       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
310         // FIXME: should we be using `E->getFoundDecl()`?
311         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
312           Fields.insert(FD);
313 }
314 
315 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
316 /// global variables and functions that are declared in or referenced from
317 /// sub-statements.
318 static void
319 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
320                          llvm::DenseSet<const VarDecl *> &Vars,
321                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
322   for (auto *Child : S.children())
323     if (Child != nullptr)
324       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
325   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
326     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
327 
328   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
329     if (DS->isSingleDecl())
330       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
331     else
332       for (auto *D : DS->getDeclGroup())
333         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
334   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
335     insertIfGlobal(*E->getDecl(), Vars);
336     insertIfFunction(*E->getDecl(), Funcs);
337   } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) {
338     // If this is a method that returns a member variable but does nothing else,
339     // model the field of the return value.
340     if (MemberExpr *E = getMemberForAccessor(*C))
341       if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl()))
342         Fields.insert(FD);
343   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
344     // FIXME: should we be using `E->getFoundDecl()`?
345     const ValueDecl *VD = E->getMemberDecl();
346     insertIfGlobal(*VD, Vars);
347     insertIfFunction(*VD, Funcs);
348     if (const auto *FD = dyn_cast<FieldDecl>(VD))
349       Fields.insert(FD);
350   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
351     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
352       for (const auto *FD : getFieldsForInitListExpr(RD))
353         Fields.insert(FD);
354   }
355 }
356 
357 // FIXME: Add support for resetting globals after function calls to enable
358 // the implementation of sound analyses.
359 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
360   assert(FuncDecl->getBody() != nullptr);
361 
362   FieldSet Fields;
363   llvm::DenseSet<const VarDecl *> Vars;
364   llvm::DenseSet<const FunctionDecl *> Funcs;
365 
366   // Look for global variable and field references in the
367   // constructor-initializers.
368   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
369     for (const auto *Init : CtorDecl->inits()) {
370       if (Init->isMemberInitializer()) {
371         Fields.insert(Init->getMember());
372       } else if (Init->isIndirectMemberInitializer()) {
373         for (const auto *I : Init->getIndirectMember()->chain())
374           Fields.insert(cast<FieldDecl>(I));
375       }
376       const Expr *E = Init->getInit();
377       assert(E != nullptr);
378       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
379     }
380     // Add all fields mentioned in default member initializers.
381     for (const FieldDecl *F : CtorDecl->getParent()->fields())
382       if (const auto *I = F->getInClassInitializer())
383           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
384   }
385   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
386 
387   // These have to be added before the lines that follow to ensure that
388   // `create*` work correctly for structs.
389   DACtx->addModeledFields(Fields);
390 
391   for (const VarDecl *D : Vars) {
392     if (getStorageLocation(*D) != nullptr)
393       continue;
394 
395     setStorageLocation(*D, createObject(*D));
396   }
397 
398   for (const FunctionDecl *FD : Funcs) {
399     if (getStorageLocation(*FD) != nullptr)
400       continue;
401     auto &Loc = createStorageLocation(FD->getType());
402     setStorageLocation(*FD, Loc);
403   }
404 }
405 
406 Environment::Environment(DataflowAnalysisContext &DACtx)
407     : DACtx(&DACtx),
408       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
409 
410 Environment Environment::fork() const {
411   Environment Copy(*this);
412   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
413   return Copy;
414 }
415 
416 Environment::Environment(DataflowAnalysisContext &DACtx,
417                          const DeclContext &DeclCtx)
418     : Environment(DACtx) {
419   CallStack.push_back(&DeclCtx);
420 
421   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
422     assert(FuncDecl->getBody() != nullptr);
423 
424     initFieldsGlobalsAndFuncs(FuncDecl);
425 
426     for (const auto *ParamDecl : FuncDecl->parameters()) {
427       assert(ParamDecl != nullptr);
428       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
429     }
430   }
431 
432   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
433     auto *Parent = MethodDecl->getParent();
434     assert(Parent != nullptr);
435 
436     if (Parent->isLambda()) {
437       for (auto Capture : Parent->captures()) {
438         if (Capture.capturesVariable()) {
439           const auto *VarDecl = Capture.getCapturedVar();
440           assert(VarDecl != nullptr);
441           setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr));
442         } else if (Capture.capturesThis()) {
443           const auto *SurroundingMethodDecl =
444               cast<CXXMethodDecl>(DeclCtx.getNonClosureAncestor());
445           QualType ThisPointeeType =
446               SurroundingMethodDecl->getFunctionObjectParameterType();
447           ThisPointeeLoc =
448               &cast<RecordValue>(createValue(ThisPointeeType))->getLoc();
449         }
450       }
451     } else if (MethodDecl->isImplicitObjectMemberFunction()) {
452       QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType();
453       ThisPointeeLoc =
454           &cast<RecordValue>(createValue(ThisPointeeType))->getLoc();
455     }
456   }
457 }
458 
459 bool Environment::canDescend(unsigned MaxDepth,
460                              const DeclContext *Callee) const {
461   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
462 }
463 
464 Environment Environment::pushCall(const CallExpr *Call) const {
465   Environment Env(*this);
466 
467   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
468     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
469       if (!isa<CXXThisExpr>(Arg))
470           Env.ThisPointeeLoc =
471               cast<RecordStorageLocation>(getStorageLocation(*Arg));
472       // Otherwise (when the argument is `this`), retain the current
473       // environment's `ThisPointeeLoc`.
474     }
475   }
476 
477   Env.pushCallInternal(Call->getDirectCallee(),
478                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
479 
480   return Env;
481 }
482 
483 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
484   Environment Env(*this);
485 
486   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
487 
488   Env.pushCallInternal(Call->getConstructor(),
489                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
490 
491   return Env;
492 }
493 
494 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
495                                    ArrayRef<const Expr *> Args) {
496   // Canonicalize to the definition of the function. This ensures that we're
497   // putting arguments into the same `ParamVarDecl`s` that the callee will later
498   // be retrieving them from.
499   assert(FuncDecl->getDefinition() != nullptr);
500   FuncDecl = FuncDecl->getDefinition();
501 
502   CallStack.push_back(FuncDecl);
503 
504   initFieldsGlobalsAndFuncs(FuncDecl);
505 
506   const auto *ParamIt = FuncDecl->param_begin();
507 
508   // FIXME: Parameters don't always map to arguments 1:1; examples include
509   // overloaded operators implemented as member functions, and parameter packs.
510   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
511     assert(ParamIt != FuncDecl->param_end());
512     const VarDecl *Param = *ParamIt;
513     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
514   }
515 }
516 
517 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
518   // We ignore some entries of `CalleeEnv`:
519   // - `DACtx` because is already the same in both
520   // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or
521   //   `ThisPointeeLoc` because they don't apply to us.
522   // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the
523   //   callee's local scope, so when popping that scope, we do not propagate
524   //   the maps.
525   this->LocToVal = std::move(CalleeEnv.LocToVal);
526   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
527 
528   if (Call->isGLValue()) {
529     if (CalleeEnv.ReturnLoc != nullptr)
530       setStorageLocation(*Call, *CalleeEnv.ReturnLoc);
531   } else if (!Call->getType()->isVoidType()) {
532     if (CalleeEnv.ReturnVal != nullptr)
533       setValue(*Call, *CalleeEnv.ReturnVal);
534   }
535 }
536 
537 void Environment::popCall(const CXXConstructExpr *Call,
538                           const Environment &CalleeEnv) {
539   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
540   this->LocToVal = std::move(CalleeEnv.LocToVal);
541   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
542 
543   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
544     setValue(*Call, *Val);
545   }
546 }
547 
548 bool Environment::equivalentTo(const Environment &Other,
549                                Environment::ValueModel &Model) const {
550   assert(DACtx == Other.DACtx);
551 
552   if (ReturnVal != Other.ReturnVal)
553     return false;
554 
555   if (ReturnLoc != Other.ReturnLoc)
556     return false;
557 
558   if (ThisPointeeLoc != Other.ThisPointeeLoc)
559     return false;
560 
561   if (DeclToLoc != Other.DeclToLoc)
562     return false;
563 
564   if (ExprToLoc != Other.ExprToLoc)
565     return false;
566 
567   if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model))
568     return false;
569 
570   if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model))
571     return false;
572 
573   return true;
574 }
575 
576 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
577                                      Environment::ValueModel &Model) {
578   assert(DACtx == PrevEnv.DACtx);
579   assert(ReturnVal == PrevEnv.ReturnVal);
580   assert(ReturnLoc == PrevEnv.ReturnLoc);
581   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
582   assert(CallStack == PrevEnv.CallStack);
583 
584   auto Effect = LatticeJoinEffect::Unchanged;
585 
586   // By the API, `PrevEnv` is a previous version of the environment for the same
587   // block, so we have some guarantees about its shape. In particular, it will
588   // be the result of a join or widen operation on previous values for this
589   // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that
590   // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain
591   // this property here, we don't need change their current values to widen.
592   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
593   assert(ExprToVal.size() <= PrevEnv.ExprToVal.size());
594   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
595 
596   ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv,
597                                  Model, Effect);
598 
599   LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv,
600                                 Model, Effect);
601   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
602       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
603       ExprToVal.size() != PrevEnv.ExprToVal.size() ||
604       LocToVal.size() != PrevEnv.LocToVal.size())
605     Effect = LatticeJoinEffect::Changed;
606 
607   return Effect;
608 }
609 
610 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
611                               Environment::ValueModel &Model) {
612   assert(EnvA.DACtx == EnvB.DACtx);
613   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
614   assert(EnvA.CallStack == EnvB.CallStack);
615 
616   Environment JoinedEnv(*EnvA.DACtx);
617 
618   JoinedEnv.CallStack = EnvA.CallStack;
619   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
620 
621   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
622     // `ReturnVal` might not always get set -- for example if we have a return
623     // statement of the form `return some_other_func()` and we decide not to
624     // analyze `some_other_func()`.
625     // In this case, we can't say anything about the joined return value -- we
626     // don't simply want to propagate the return value that we do have, because
627     // it might not be the correct one.
628     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
629     JoinedEnv.ReturnVal = nullptr;
630   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
631     JoinedEnv.ReturnVal = EnvA.ReturnVal;
632   } else {
633     assert(!EnvA.CallStack.empty());
634     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
635     // cast.
636     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
637     assert(Func != nullptr);
638     if (Value *MergedVal =
639             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
640                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
641       JoinedEnv.ReturnVal = MergedVal;
642   }
643 
644   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
645     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
646   else
647     JoinedEnv.ReturnLoc = nullptr;
648 
649   JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc);
650 
651   // FIXME: update join to detect backedges and simplify the flow condition
652   // accordingly.
653   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
654       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
655 
656   JoinedEnv.LocToVal =
657       joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model);
658 
659   // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal`
660   // empty, as we never need to access entries in these maps outside of the
661   // basic block that sets them.
662 
663   return JoinedEnv;
664 }
665 
666 StorageLocation &Environment::createStorageLocation(QualType Type) {
667   return DACtx->createStorageLocation(Type);
668 }
669 
670 StorageLocation &Environment::createStorageLocation(const ValueDecl &D) {
671   // Evaluated declarations are always assigned the same storage locations to
672   // ensure that the environment stabilizes across loop iterations. Storage
673   // locations for evaluated declarations are stored in the analysis context.
674   return DACtx->getStableStorageLocation(D);
675 }
676 
677 StorageLocation &Environment::createStorageLocation(const Expr &E) {
678   // Evaluated expressions are always assigned the same storage locations to
679   // ensure that the environment stabilizes across loop iterations. Storage
680   // locations for evaluated expressions are stored in the analysis context.
681   return DACtx->getStableStorageLocation(E);
682 }
683 
684 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
685   assert(!DeclToLoc.contains(&D));
686   DeclToLoc[&D] = &Loc;
687 }
688 
689 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
690   auto It = DeclToLoc.find(&D);
691   if (It == DeclToLoc.end())
692     return nullptr;
693 
694   StorageLocation *Loc = It->second;
695 
696   return Loc;
697 }
698 
699 void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); }
700 
701 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
702   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
703   // but we still want to be able to associate a `StorageLocation` with them,
704   // so allow these as an exception.
705   assert(E.isGLValue() ||
706          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
707   setStorageLocationInternal(E, Loc);
708 }
709 
710 StorageLocation *Environment::getStorageLocation(const Expr &E) const {
711   // See comment in `setStorageLocation()`.
712   assert(E.isGLValue() ||
713          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
714   return getStorageLocationInternal(E);
715 }
716 
717 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const {
718   return ThisPointeeLoc;
719 }
720 
721 RecordStorageLocation &
722 Environment::getResultObjectLocation(const Expr &RecordPRValue) {
723   assert(RecordPRValue.getType()->isRecordType());
724   assert(RecordPRValue.isPRValue());
725 
726   if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue))
727     return *cast<RecordStorageLocation>(ExistingLoc);
728   auto &Loc = cast<RecordStorageLocation>(
729       DACtx->getStableStorageLocation(RecordPRValue));
730   setStorageLocationInternal(RecordPRValue, Loc);
731   return Loc;
732 }
733 
734 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
735   return DACtx->getOrCreateNullPointerValue(PointeeType);
736 }
737 
738 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
739   assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc);
740 
741   LocToVal[&Loc] = &Val;
742 }
743 
744 void Environment::setValue(const Expr &E, Value &Val) {
745   assert(E.isPRValue());
746   ExprToVal[&E] = &Val;
747 }
748 
749 Value *Environment::getValue(const StorageLocation &Loc) const {
750   return LocToVal.lookup(&Loc);
751 }
752 
753 Value *Environment::getValue(const ValueDecl &D) const {
754   auto *Loc = getStorageLocation(D);
755   if (Loc == nullptr)
756     return nullptr;
757   return getValue(*Loc);
758 }
759 
760 Value *Environment::getValue(const Expr &E) const {
761   if (E.isPRValue()) {
762     auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E));
763     return It == ExprToVal.end() ? nullptr : It->second;
764   }
765 
766   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
767   if (It == ExprToLoc.end())
768     return nullptr;
769   return getValue(*It->second);
770 }
771 
772 Value *Environment::createValue(QualType Type) {
773   llvm::DenseSet<QualType> Visited;
774   int CreatedValuesCount = 0;
775   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
776                                                 CreatedValuesCount);
777   if (CreatedValuesCount > MaxCompositeValueSize) {
778     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
779                  << '\n';
780   }
781   return Val;
782 }
783 
784 void Environment::setStorageLocationInternal(const Expr &E,
785                                              StorageLocation &Loc) {
786   const Expr &CanonE = ignoreCFGOmittedNodes(E);
787   assert(!ExprToLoc.contains(&CanonE));
788   ExprToLoc[&CanonE] = &Loc;
789 }
790 
791 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const {
792   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
793   return It == ExprToLoc.end() ? nullptr : &*It->second;
794 }
795 
796 Value *Environment::createValueUnlessSelfReferential(
797     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
798     int &CreatedValuesCount) {
799   assert(!Type.isNull());
800   assert(!Type->isReferenceType());
801 
802   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
803   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
804       Depth > MaxCompositeValueDepth)
805     return nullptr;
806 
807   if (Type->isBooleanType()) {
808     CreatedValuesCount++;
809     return &makeAtomicBoolValue();
810   }
811 
812   if (Type->isIntegerType()) {
813     // FIXME: consider instead `return nullptr`, given that we do nothing useful
814     // with integers, and so distinguishing them serves no purpose, but could
815     // prevent convergence.
816     CreatedValuesCount++;
817     return &arena().create<IntegerValue>();
818   }
819 
820   if (Type->isPointerType()) {
821     CreatedValuesCount++;
822     QualType PointeeType = Type->getPointeeType();
823     StorageLocation &PointeeLoc =
824         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
825 
826     return &arena().create<PointerValue>(PointeeLoc);
827   }
828 
829   if (Type->isRecordType()) {
830     CreatedValuesCount++;
831     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
832     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
833       assert(Field != nullptr);
834 
835       QualType FieldType = Field->getType();
836 
837       FieldLocs.insert(
838           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
839                                           CreatedValuesCount)});
840     }
841 
842     RecordStorageLocation &Loc =
843         arena().create<RecordStorageLocation>(Type, std::move(FieldLocs));
844     RecordValue &RecordVal = create<RecordValue>(Loc);
845 
846     // As we already have a storage location for the `RecordValue`, we can and
847     // should associate them in the environment.
848     setValue(Loc, RecordVal);
849 
850     return &RecordVal;
851   }
852 
853   return nullptr;
854 }
855 
856 StorageLocation &
857 Environment::createLocAndMaybeValue(QualType Ty,
858                                     llvm::DenseSet<QualType> &Visited,
859                                     int Depth, int &CreatedValuesCount) {
860   if (!Visited.insert(Ty.getCanonicalType()).second)
861     return createStorageLocation(Ty.getNonReferenceType());
862   Value *Val = createValueUnlessSelfReferential(
863       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
864   Visited.erase(Ty.getCanonicalType());
865 
866   Ty = Ty.getNonReferenceType();
867 
868   if (Val == nullptr)
869     return createStorageLocation(Ty);
870 
871   if (Ty->isRecordType())
872     return cast<RecordValue>(Val)->getLoc();
873 
874   StorageLocation &Loc = createStorageLocation(Ty);
875   setValue(Loc, *Val);
876   return Loc;
877 }
878 
879 StorageLocation &Environment::createObjectInternal(const ValueDecl *D,
880                                                    QualType Ty,
881                                                    const Expr *InitExpr) {
882   if (Ty->isReferenceType()) {
883     // Although variables of reference type always need to be initialized, it
884     // can happen that we can't see the initializer, so `InitExpr` may still
885     // be null.
886     if (InitExpr) {
887       if (auto *InitExprLoc = getStorageLocation(*InitExpr))
888           return *InitExprLoc;
889     }
890 
891     // Even though we have an initializer, we might not get an
892     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
893     // don't have a function body. In this case, we just invent a storage
894     // location and value -- it's the best we can do.
895     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
896   }
897 
898   Value *Val = nullptr;
899   if (InitExpr)
900     // In the (few) cases where an expression is intentionally
901     // "uninterpreted", `InitExpr` is not associated with a value.  There are
902     // two ways to handle this situation: propagate the status, so that
903     // uninterpreted initializers result in uninterpreted variables, or
904     // provide a default value. We choose the latter so that later refinements
905     // of the variable can be used for reasoning about the surrounding code.
906     // For this reason, we let this case be handled by the `createValue()`
907     // call below.
908     //
909     // FIXME. If and when we interpret all language cases, change this to
910     // assert that `InitExpr` is interpreted, rather than supplying a
911     // default value (assuming we don't update the environment API to return
912     // references).
913     Val = getValue(*InitExpr);
914   if (!Val)
915     Val = createValue(Ty);
916 
917   if (Ty->isRecordType())
918     return cast<RecordValue>(Val)->getLoc();
919 
920   StorageLocation &Loc =
921       D ? createStorageLocation(*D) : createStorageLocation(Ty);
922 
923   if (Val)
924     setValue(Loc, *Val);
925 
926   return Loc;
927 }
928 
929 void Environment::assume(const Formula &F) {
930   DACtx->addFlowConditionConstraint(FlowConditionToken, F);
931 }
932 
933 bool Environment::proves(const Formula &F) const {
934   return DACtx->flowConditionImplies(FlowConditionToken, F);
935 }
936 
937 bool Environment::allows(const Formula &F) const {
938   return DACtx->flowConditionAllows(FlowConditionToken, F);
939 }
940 
941 void Environment::dump(raw_ostream &OS) const {
942   // FIXME: add printing for remaining fields and allow caller to decide what
943   // fields are printed.
944   OS << "DeclToLoc:\n";
945   for (auto [D, L] : DeclToLoc)
946     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
947 
948   OS << "ExprToLoc:\n";
949   for (auto [E, L] : ExprToLoc)
950     OS << "  [" << E << ", " << L << "]\n";
951 
952   OS << "ExprToVal:\n";
953   for (auto [E, V] : ExprToVal)
954     OS << "  [" << E << ", " << V << ": " << *V << "]\n";
955 
956   OS << "LocToVal:\n";
957   for (auto [L, V] : LocToVal) {
958     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
959   }
960 
961   OS << "\n";
962   DACtx->dumpFlowCondition(FlowConditionToken, OS);
963 }
964 
965 void Environment::dump() const {
966   dump(llvm::dbgs());
967 }
968 
969 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
970                                                  const Environment &Env) {
971   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
972   if (ImplicitObject == nullptr)
973     return nullptr;
974   if (ImplicitObject->getType()->isPointerType()) {
975     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject)))
976       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
977     return nullptr;
978   }
979   return cast_or_null<RecordStorageLocation>(
980       Env.getStorageLocation(*ImplicitObject));
981 }
982 
983 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
984                                              const Environment &Env) {
985   Expr *Base = ME.getBase();
986   if (Base == nullptr)
987     return nullptr;
988   if (ME.isArrow()) {
989     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base)))
990       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
991     return nullptr;
992   }
993   return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base));
994 }
995 
996 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
997   // Unnamed bitfields are only used for padding and do not appear in
998   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
999   // field list, and we thus need to remove them before mapping inits to
1000   // fields to avoid mapping inits to the wrongs fields.
1001   std::vector<FieldDecl *> Fields;
1002   llvm::copy_if(
1003       RD->fields(), std::back_inserter(Fields),
1004       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
1005   return Fields;
1006 }
1007 
1008 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) {
1009   auto &NewVal = Env.create<RecordValue>(Loc);
1010   Env.setValue(Loc, NewVal);
1011   return NewVal;
1012 }
1013 
1014 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) {
1015   assert(Expr.getType()->isRecordType());
1016 
1017   if (Expr.isPRValue()) {
1018     if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) {
1019       auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc());
1020       Env.setValue(Expr, NewVal);
1021       return NewVal;
1022     }
1023 
1024     auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
1025     Env.setValue(Expr, NewVal);
1026     return NewVal;
1027   }
1028 
1029   if (auto *Loc =
1030           cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) {
1031     auto &NewVal = Env.create<RecordValue>(*Loc);
1032     Env.setValue(*Loc, NewVal);
1033     return NewVal;
1034   }
1035 
1036   auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
1037   Env.setStorageLocation(Expr, NewVal.getLoc());
1038   return NewVal;
1039 }
1040 
1041 } // namespace dataflow
1042 } // namespace clang
1043