xref: /freebsd-src/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision eb5fd01b53a6579f3bd5769ed41964683335a7fc)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include <cassert>
27 #include <utility>
28 
29 namespace clang {
30 namespace dataflow {
31 
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
34 // analysis.
35 static constexpr int MaxCompositeValueDepth = 3;
36 static constexpr int MaxCompositeValueSize = 1000;
37 
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc(
40     const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1,
41     const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) {
42   llvm::DenseMap<const ValueDecl *, StorageLocation *> Result;
43   for (auto &Entry : DeclToLoc1) {
44     auto It = DeclToLoc2.find(Entry.first);
45     if (It != DeclToLoc2.end() && Entry.second == It->second)
46       Result.insert({Entry.first, Entry.second});
47   }
48   return Result;
49 }
50 
51 // Whether to consider equivalent two values with an unknown relation.
52 //
53 // FIXME: this function is a hack enabling unsoundness to support
54 // convergence. Once we have widening support for the reference/pointer and
55 // struct built-in models, this should be unconditionally `false` (and inlined
56 // as such at its call sites).
57 static bool equateUnknownValues(Value::Kind K) {
58   switch (K) {
59   case Value::Kind::Integer:
60   case Value::Kind::Pointer:
61   case Value::Kind::Record:
62     return true;
63   default:
64     return false;
65   }
66 }
67 
68 static bool compareDistinctValues(QualType Type, Value &Val1,
69                                   const Environment &Env1, Value &Val2,
70                                   const Environment &Env2,
71                                   Environment::ValueModel &Model) {
72   // Note: Potentially costly, but, for booleans, we could check whether both
73   // can be proven equivalent in their respective environments.
74 
75   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
76   // and implement separate, join/widen specific handling for
77   // reference/pointers.
78   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
79   case ComparisonResult::Same:
80     return true;
81   case ComparisonResult::Different:
82     return false;
83   case ComparisonResult::Unknown:
84     return equateUnknownValues(Val1.getKind());
85   }
86   llvm_unreachable("All cases covered in switch");
87 }
88 
89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
90 /// respectively, of the same type `Type`. Merging generally produces a single
91 /// value that (soundly) approximates the two inputs, although the actual
92 /// meaning depends on `Model`.
93 static Value *mergeDistinctValues(QualType Type, Value &Val1,
94                                   const Environment &Env1, Value &Val2,
95                                   const Environment &Env2,
96                                   Environment &MergedEnv,
97                                   Environment::ValueModel &Model) {
98   // Join distinct boolean values preserving information about the constraints
99   // in the respective path conditions.
100   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
101     // FIXME: Checking both values should be unnecessary, since they should have
102     // a consistent shape.  However, right now we can end up with BoolValue's in
103     // integer-typed variables due to our incorrect handling of
104     // boolean-to-integer casts (we just propagate the BoolValue to the result
105     // of the cast). So, a join can encounter an integer in one branch but a
106     // bool in the other.
107     // For example:
108     // ```
109     // std::optional<bool> o;
110     // int x;
111     // if (o.has_value())
112     //   x = o.value();
113     // ```
114     auto &Expr1 = cast<BoolValue>(Val1).formula();
115     auto &Expr2 = cast<BoolValue>(Val2).formula();
116     auto &A = MergedEnv.arena();
117     auto &MergedVal = A.makeAtomRef(A.makeAtom());
118     MergedEnv.assume(
119         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
120                            A.makeEquals(MergedVal, Expr1)),
121                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
122                            A.makeEquals(MergedVal, Expr2))));
123     return &A.makeBoolValue(MergedVal);
124   }
125 
126   Value *MergedVal = nullptr;
127   if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
128     auto *RecordVal2 = cast<RecordValue>(&Val2);
129 
130     if (&RecordVal1->getLoc() == &RecordVal2->getLoc())
131       // `RecordVal1` and `RecordVal2` may have different properties associated
132       // with them. Create a new `RecordValue` with the same location but
133       // without any properties so that we soundly approximate both values. If a
134       // particular analysis needs to merge properties, it should do so in
135       // `DataflowAnalysis::merge()`.
136       MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
137     else
138       // If the locations for the two records are different, need to create a
139       // completely new value.
140       MergedVal = MergedEnv.createValue(Type);
141   } else {
142     MergedVal = MergedEnv.createValue(Type);
143   }
144 
145   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
146   // returns false to avoid storing unneeded values in `DACtx`.
147   if (MergedVal)
148     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
149       return MergedVal;
150 
151   return nullptr;
152 }
153 
154 // When widening does not change `Current`, return value will equal `&Prev`.
155 static Value &widenDistinctValues(QualType Type, Value &Prev,
156                                   const Environment &PrevEnv, Value &Current,
157                                   Environment &CurrentEnv,
158                                   Environment::ValueModel &Model) {
159   // Boolean-model widening.
160   if (auto *PrevBool = dyn_cast<BoolValue>(&Prev)) {
161     // If previous value was already Top, re-use that to (implicitly) indicate
162     // that no change occurred.
163     if (isa<TopBoolValue>(Prev))
164       return Prev;
165 
166     // We may need to widen to Top, but before we do so, check whether both
167     // values are implied to be either true or false in the current environment.
168     // In that case, we can simply return a literal instead.
169     auto &CurBool = cast<BoolValue>(Current);
170     bool TruePrev = PrevEnv.proves(PrevBool->formula());
171     bool TrueCur = CurrentEnv.proves(CurBool.formula());
172     if (TruePrev && TrueCur)
173       return CurrentEnv.getBoolLiteralValue(true);
174     if (!TruePrev && !TrueCur &&
175         PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool->formula())) &&
176         CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula())))
177       return CurrentEnv.getBoolLiteralValue(false);
178 
179     return CurrentEnv.makeTopBoolValue();
180   }
181 
182   // FIXME: Add other built-in model widening.
183 
184   // Custom-model widening.
185   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
186     return *W;
187 
188   return equateUnknownValues(Prev.getKind()) ? Prev : Current;
189 }
190 
191 // Returns whether the values in `Map1` and `Map2` compare equal for those
192 // keys that `Map1` and `Map2` have in common.
193 template <typename Key>
194 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1,
195                            const llvm::MapVector<Key, Value *> &Map2,
196                            const Environment &Env1, const Environment &Env2,
197                            Environment::ValueModel &Model) {
198   for (auto &Entry : Map1) {
199     Key K = Entry.first;
200     assert(K != nullptr);
201 
202     Value *Val = Entry.second;
203     assert(Val != nullptr);
204 
205     auto It = Map2.find(K);
206     if (It == Map2.end())
207       continue;
208     assert(It->second != nullptr);
209 
210     if (!areEquivalentValues(*Val, *It->second) &&
211         !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2,
212                                Model))
213       return false;
214   }
215 
216   return true;
217 }
218 
219 // Perform a join on two `LocToVal` maps.
220 static llvm::MapVector<const StorageLocation *, Value *>
221 joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
222              const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2,
223              const Environment &Env1, const Environment &Env2,
224              Environment &JoinedEnv, Environment::ValueModel &Model) {
225   llvm::MapVector<const StorageLocation *, Value *> Result;
226   for (auto &Entry : LocToVal) {
227     const StorageLocation *Loc = Entry.first;
228     assert(Loc != nullptr);
229 
230     Value *Val = Entry.second;
231     assert(Val != nullptr);
232 
233     auto It = LocToVal2.find(Loc);
234     if (It == LocToVal2.end())
235       continue;
236     assert(It->second != nullptr);
237 
238     if (areEquivalentValues(*Val, *It->second)) {
239       Result.insert({Loc, Val});
240       continue;
241     }
242 
243     if (Value *MergedVal = mergeDistinctValues(
244             Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
245       Result.insert({Loc, MergedVal});
246     }
247   }
248 
249   return Result;
250 }
251 
252 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either
253 // `const StorageLocation *` or `const Expr *`.
254 template <typename Key>
255 llvm::MapVector<Key, Value *>
256 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap,
257                    const llvm::MapVector<Key, Value *> &PrevMap,
258                    Environment &CurEnv, const Environment &PrevEnv,
259                    Environment::ValueModel &Model, LatticeJoinEffect &Effect) {
260   llvm::MapVector<Key, Value *> WidenedMap;
261   for (auto &Entry : CurMap) {
262     Key K = Entry.first;
263     assert(K != nullptr);
264 
265     Value *Val = Entry.second;
266     assert(Val != nullptr);
267 
268     auto PrevIt = PrevMap.find(K);
269     if (PrevIt == PrevMap.end())
270       continue;
271     assert(PrevIt->second != nullptr);
272 
273     if (areEquivalentValues(*Val, *PrevIt->second)) {
274       WidenedMap.insert({K, Val});
275       continue;
276     }
277 
278     Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second,
279                                             PrevEnv, *Val, CurEnv, Model);
280     WidenedMap.insert({K, &WidenedVal});
281     if (&WidenedVal != PrevIt->second)
282       Effect = LatticeJoinEffect::Changed;
283   }
284 
285   return WidenedMap;
286 }
287 
288 /// Initializes a global storage value.
289 static void insertIfGlobal(const Decl &D,
290                            llvm::DenseSet<const VarDecl *> &Vars) {
291   if (auto *V = dyn_cast<VarDecl>(&D))
292     if (V->hasGlobalStorage())
293       Vars.insert(V);
294 }
295 
296 static void insertIfFunction(const Decl &D,
297                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
298   if (auto *FD = dyn_cast<FunctionDecl>(&D))
299     Funcs.insert(FD);
300 }
301 
302 static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) {
303   // Use getCalleeDecl instead of getMethodDecl in order to handle
304   // pointer-to-member calls.
305   const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(C.getCalleeDecl());
306   if (!MethodDecl)
307     return nullptr;
308   auto *Body = dyn_cast_or_null<CompoundStmt>(MethodDecl->getBody());
309   if (!Body || Body->size() != 1)
310     return nullptr;
311   if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin()))
312     if (auto *Return = RS->getRetValue())
313       return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts());
314   return nullptr;
315 }
316 
317 static void
318 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
319                          llvm::DenseSet<const VarDecl *> &Vars,
320                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
321   insertIfGlobal(D, Vars);
322   insertIfFunction(D, Funcs);
323   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
324     for (const auto *B : Decomp->bindings())
325       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
326         // FIXME: should we be using `E->getFoundDecl()`?
327         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
328           Fields.insert(FD);
329 }
330 
331 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
332 /// global variables and functions that are declared in or referenced from
333 /// sub-statements.
334 static void
335 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
336                          llvm::DenseSet<const VarDecl *> &Vars,
337                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
338   for (auto *Child : S.children())
339     if (Child != nullptr)
340       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
341   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
342     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
343 
344   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
345     if (DS->isSingleDecl())
346       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
347     else
348       for (auto *D : DS->getDeclGroup())
349         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
350   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
351     insertIfGlobal(*E->getDecl(), Vars);
352     insertIfFunction(*E->getDecl(), Funcs);
353   } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) {
354     // If this is a method that returns a member variable but does nothing else,
355     // model the field of the return value.
356     if (MemberExpr *E = getMemberForAccessor(*C))
357       if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl()))
358         Fields.insert(FD);
359   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
360     // FIXME: should we be using `E->getFoundDecl()`?
361     const ValueDecl *VD = E->getMemberDecl();
362     insertIfGlobal(*VD, Vars);
363     insertIfFunction(*VD, Funcs);
364     if (const auto *FD = dyn_cast<FieldDecl>(VD))
365       Fields.insert(FD);
366   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
367     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
368       for (const auto *FD : getFieldsForInitListExpr(RD))
369         Fields.insert(FD);
370   }
371 }
372 
373 Environment::Environment(DataflowAnalysisContext &DACtx)
374     : DACtx(&DACtx),
375       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
376 
377 Environment::Environment(DataflowAnalysisContext &DACtx,
378                          const DeclContext &DeclCtx)
379     : Environment(DACtx) {
380   CallStack.push_back(&DeclCtx);
381 }
382 
383 void Environment::initialize() {
384   const DeclContext *DeclCtx = getDeclCtx();
385   if (DeclCtx == nullptr)
386     return;
387 
388   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx)) {
389     assert(FuncDecl->doesThisDeclarationHaveABody());
390 
391     initFieldsGlobalsAndFuncs(FuncDecl);
392 
393     for (const auto *ParamDecl : FuncDecl->parameters()) {
394       assert(ParamDecl != nullptr);
395       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
396     }
397   }
398 
399   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(DeclCtx)) {
400     auto *Parent = MethodDecl->getParent();
401     assert(Parent != nullptr);
402 
403     if (Parent->isLambda()) {
404       for (auto Capture : Parent->captures()) {
405         if (Capture.capturesVariable()) {
406           const auto *VarDecl = Capture.getCapturedVar();
407           assert(VarDecl != nullptr);
408           setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr));
409         } else if (Capture.capturesThis()) {
410           const auto *SurroundingMethodDecl =
411               cast<CXXMethodDecl>(DeclCtx->getNonClosureAncestor());
412           QualType ThisPointeeType =
413               SurroundingMethodDecl->getFunctionObjectParameterType();
414           setThisPointeeStorageLocation(
415               cast<RecordValue>(createValue(ThisPointeeType))->getLoc());
416         }
417       }
418     } else if (MethodDecl->isImplicitObjectMemberFunction()) {
419       QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType();
420       setThisPointeeStorageLocation(
421           cast<RecordValue>(createValue(ThisPointeeType))->getLoc());
422     }
423   }
424 }
425 
426 // FIXME: Add support for resetting globals after function calls to enable
427 // the implementation of sound analyses.
428 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
429   assert(FuncDecl->doesThisDeclarationHaveABody());
430 
431   FieldSet Fields;
432   llvm::DenseSet<const VarDecl *> Vars;
433   llvm::DenseSet<const FunctionDecl *> Funcs;
434 
435   // Look for global variable and field references in the
436   // constructor-initializers.
437   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
438     for (const auto *Init : CtorDecl->inits()) {
439       if (Init->isMemberInitializer()) {
440         Fields.insert(Init->getMember());
441       } else if (Init->isIndirectMemberInitializer()) {
442         for (const auto *I : Init->getIndirectMember()->chain())
443           Fields.insert(cast<FieldDecl>(I));
444       }
445       const Expr *E = Init->getInit();
446       assert(E != nullptr);
447       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
448     }
449     // Add all fields mentioned in default member initializers.
450     for (const FieldDecl *F : CtorDecl->getParent()->fields())
451       if (const auto *I = F->getInClassInitializer())
452           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
453   }
454   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
455 
456   // These have to be added before the lines that follow to ensure that
457   // `create*` work correctly for structs.
458   DACtx->addModeledFields(Fields);
459 
460   for (const VarDecl *D : Vars) {
461     if (getStorageLocation(*D) != nullptr)
462       continue;
463 
464     setStorageLocation(*D, createObject(*D));
465   }
466 
467   for (const FunctionDecl *FD : Funcs) {
468     if (getStorageLocation(*FD) != nullptr)
469       continue;
470     auto &Loc = createStorageLocation(FD->getType());
471     setStorageLocation(*FD, Loc);
472   }
473 }
474 
475 Environment Environment::fork() const {
476   Environment Copy(*this);
477   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
478   return Copy;
479 }
480 
481 bool Environment::canDescend(unsigned MaxDepth,
482                              const DeclContext *Callee) const {
483   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
484 }
485 
486 Environment Environment::pushCall(const CallExpr *Call) const {
487   Environment Env(*this);
488 
489   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
490     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
491       if (!isa<CXXThisExpr>(Arg))
492           Env.ThisPointeeLoc =
493               cast<RecordStorageLocation>(getStorageLocation(*Arg));
494       // Otherwise (when the argument is `this`), retain the current
495       // environment's `ThisPointeeLoc`.
496     }
497   }
498 
499   Env.pushCallInternal(Call->getDirectCallee(),
500                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
501 
502   return Env;
503 }
504 
505 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
506   Environment Env(*this);
507 
508   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
509 
510   Env.pushCallInternal(Call->getConstructor(),
511                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
512 
513   return Env;
514 }
515 
516 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
517                                    ArrayRef<const Expr *> Args) {
518   // Canonicalize to the definition of the function. This ensures that we're
519   // putting arguments into the same `ParamVarDecl`s` that the callee will later
520   // be retrieving them from.
521   assert(FuncDecl->getDefinition() != nullptr);
522   FuncDecl = FuncDecl->getDefinition();
523 
524   CallStack.push_back(FuncDecl);
525 
526   initFieldsGlobalsAndFuncs(FuncDecl);
527 
528   const auto *ParamIt = FuncDecl->param_begin();
529 
530   // FIXME: Parameters don't always map to arguments 1:1; examples include
531   // overloaded operators implemented as member functions, and parameter packs.
532   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
533     assert(ParamIt != FuncDecl->param_end());
534     const VarDecl *Param = *ParamIt;
535     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
536   }
537 }
538 
539 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
540   // We ignore some entries of `CalleeEnv`:
541   // - `DACtx` because is already the same in both
542   // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or
543   //   `ThisPointeeLoc` because they don't apply to us.
544   // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the
545   //   callee's local scope, so when popping that scope, we do not propagate
546   //   the maps.
547   this->LocToVal = std::move(CalleeEnv.LocToVal);
548   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
549 
550   if (Call->isGLValue()) {
551     if (CalleeEnv.ReturnLoc != nullptr)
552       setStorageLocation(*Call, *CalleeEnv.ReturnLoc);
553   } else if (!Call->getType()->isVoidType()) {
554     if (CalleeEnv.ReturnVal != nullptr)
555       setValue(*Call, *CalleeEnv.ReturnVal);
556   }
557 }
558 
559 void Environment::popCall(const CXXConstructExpr *Call,
560                           const Environment &CalleeEnv) {
561   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
562   this->LocToVal = std::move(CalleeEnv.LocToVal);
563   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
564 
565   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
566     setValue(*Call, *Val);
567   }
568 }
569 
570 bool Environment::equivalentTo(const Environment &Other,
571                                Environment::ValueModel &Model) const {
572   assert(DACtx == Other.DACtx);
573 
574   if (ReturnVal != Other.ReturnVal)
575     return false;
576 
577   if (ReturnLoc != Other.ReturnLoc)
578     return false;
579 
580   if (ThisPointeeLoc != Other.ThisPointeeLoc)
581     return false;
582 
583   if (DeclToLoc != Other.DeclToLoc)
584     return false;
585 
586   if (ExprToLoc != Other.ExprToLoc)
587     return false;
588 
589   if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model))
590     return false;
591 
592   if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model))
593     return false;
594 
595   return true;
596 }
597 
598 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
599                                      Environment::ValueModel &Model) {
600   assert(DACtx == PrevEnv.DACtx);
601   assert(ReturnVal == PrevEnv.ReturnVal);
602   assert(ReturnLoc == PrevEnv.ReturnLoc);
603   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
604   assert(CallStack == PrevEnv.CallStack);
605 
606   auto Effect = LatticeJoinEffect::Unchanged;
607 
608   // By the API, `PrevEnv` is a previous version of the environment for the same
609   // block, so we have some guarantees about its shape. In particular, it will
610   // be the result of a join or widen operation on previous values for this
611   // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that
612   // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain
613   // this property here, we don't need change their current values to widen.
614   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
615   assert(ExprToVal.size() <= PrevEnv.ExprToVal.size());
616   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
617 
618   ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv,
619                                  Model, Effect);
620 
621   LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv,
622                                 Model, Effect);
623   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
624       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
625       ExprToVal.size() != PrevEnv.ExprToVal.size() ||
626       LocToVal.size() != PrevEnv.LocToVal.size())
627     Effect = LatticeJoinEffect::Changed;
628 
629   return Effect;
630 }
631 
632 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
633                               Environment::ValueModel &Model) {
634   assert(EnvA.DACtx == EnvB.DACtx);
635   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
636   assert(EnvA.CallStack == EnvB.CallStack);
637 
638   Environment JoinedEnv(*EnvA.DACtx);
639 
640   JoinedEnv.CallStack = EnvA.CallStack;
641   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
642 
643   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
644     // `ReturnVal` might not always get set -- for example if we have a return
645     // statement of the form `return some_other_func()` and we decide not to
646     // analyze `some_other_func()`.
647     // In this case, we can't say anything about the joined return value -- we
648     // don't simply want to propagate the return value that we do have, because
649     // it might not be the correct one.
650     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
651     JoinedEnv.ReturnVal = nullptr;
652   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
653     JoinedEnv.ReturnVal = EnvA.ReturnVal;
654   } else {
655     assert(!EnvA.CallStack.empty());
656     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
657     // cast.
658     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
659     assert(Func != nullptr);
660     if (Value *MergedVal =
661             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
662                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
663       JoinedEnv.ReturnVal = MergedVal;
664   }
665 
666   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
667     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
668   else
669     JoinedEnv.ReturnLoc = nullptr;
670 
671   JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc);
672 
673   // FIXME: update join to detect backedges and simplify the flow condition
674   // accordingly.
675   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
676       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
677 
678   JoinedEnv.LocToVal =
679       joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model);
680 
681   // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal`
682   // empty, as we never need to access entries in these maps outside of the
683   // basic block that sets them.
684 
685   return JoinedEnv;
686 }
687 
688 StorageLocation &Environment::createStorageLocation(QualType Type) {
689   return DACtx->createStorageLocation(Type);
690 }
691 
692 StorageLocation &Environment::createStorageLocation(const ValueDecl &D) {
693   // Evaluated declarations are always assigned the same storage locations to
694   // ensure that the environment stabilizes across loop iterations. Storage
695   // locations for evaluated declarations are stored in the analysis context.
696   return DACtx->getStableStorageLocation(D);
697 }
698 
699 StorageLocation &Environment::createStorageLocation(const Expr &E) {
700   // Evaluated expressions are always assigned the same storage locations to
701   // ensure that the environment stabilizes across loop iterations. Storage
702   // locations for evaluated expressions are stored in the analysis context.
703   return DACtx->getStableStorageLocation(E);
704 }
705 
706 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
707   assert(!DeclToLoc.contains(&D));
708   DeclToLoc[&D] = &Loc;
709 }
710 
711 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
712   auto It = DeclToLoc.find(&D);
713   if (It == DeclToLoc.end())
714     return nullptr;
715 
716   StorageLocation *Loc = It->second;
717 
718   return Loc;
719 }
720 
721 void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); }
722 
723 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
724   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
725   // but we still want to be able to associate a `StorageLocation` with them,
726   // so allow these as an exception.
727   assert(E.isGLValue() ||
728          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
729   const Expr &CanonE = ignoreCFGOmittedNodes(E);
730   assert(!ExprToLoc.contains(&CanonE));
731   ExprToLoc[&CanonE] = &Loc;
732 }
733 
734 StorageLocation *Environment::getStorageLocation(const Expr &E) const {
735   // See comment in `setStorageLocation()`.
736   assert(E.isGLValue() ||
737          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
738   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
739   return It == ExprToLoc.end() ? nullptr : &*It->second;
740 }
741 
742 // Returns whether a prvalue of record type is the one that originally
743 // constructs the object (i.e. it doesn't propagate it from one of its
744 // children).
745 static bool isOriginalRecordConstructor(const Expr &RecordPRValue) {
746   if (auto *Init = dyn_cast<InitListExpr>(&RecordPRValue))
747     return !Init->isSemanticForm() || !Init->isTransparent();
748   return isa<CXXConstructExpr>(RecordPRValue) || isa<CallExpr>(RecordPRValue) ||
749          isa<LambdaExpr>(RecordPRValue) ||
750          isa<CXXDefaultInitExpr>(RecordPRValue) ||
751          // The framework currently does not propagate the objects created in
752          // the two branches of a `ConditionalOperator` because there is no way
753          // to reconcile their storage locations, which are different. We
754          // therefore claim that the `ConditionalOperator` is the expression
755          // that originally constructs the object.
756          // Ultimately, this will be fixed by propagating locations down from
757          // the result object, rather than up from the original constructor as
758          // we do now (see also the FIXME in the documentation for
759          // `getResultObjectLocation()`).
760          isa<ConditionalOperator>(RecordPRValue);
761 }
762 
763 RecordStorageLocation &
764 Environment::getResultObjectLocation(const Expr &RecordPRValue) const {
765   assert(RecordPRValue.getType()->isRecordType());
766   assert(RecordPRValue.isPRValue());
767 
768   // Returns a storage location that we can use if assertions fail.
769   auto FallbackForAssertFailure =
770       [this, &RecordPRValue]() -> RecordStorageLocation & {
771     return cast<RecordStorageLocation>(
772         DACtx->getStableStorageLocation(RecordPRValue));
773   };
774 
775   if (isOriginalRecordConstructor(RecordPRValue)) {
776     auto *Val = cast_or_null<RecordValue>(getValue(RecordPRValue));
777     // The builtin transfer function should have created a `RecordValue` for all
778     // original record constructors.
779     assert(Val);
780     if (!Val)
781       return FallbackForAssertFailure();
782     return Val->getLoc();
783   }
784 
785   if (auto *Op = dyn_cast<BinaryOperator>(&RecordPRValue);
786       Op && Op->isCommaOp()) {
787     return getResultObjectLocation(*Op->getRHS());
788   }
789 
790   // All other expression nodes that propagate a record prvalue should have
791   // exactly one child.
792   llvm::SmallVector<const Stmt *> children(RecordPRValue.child_begin(),
793                                            RecordPRValue.child_end());
794   assert(children.size() == 1);
795   if (children.empty())
796     return FallbackForAssertFailure();
797 
798   return getResultObjectLocation(*cast<Expr>(children[0]));
799 }
800 
801 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
802   return DACtx->getOrCreateNullPointerValue(PointeeType);
803 }
804 
805 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
806   assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc);
807 
808   LocToVal[&Loc] = &Val;
809 }
810 
811 void Environment::setValue(const Expr &E, Value &Val) {
812   const Expr &CanonE = ignoreCFGOmittedNodes(E);
813 
814   if (auto *RecordVal = dyn_cast<RecordValue>(&Val)) {
815     assert(isOriginalRecordConstructor(CanonE) ||
816            &RecordVal->getLoc() == &getResultObjectLocation(CanonE));
817   }
818 
819   assert(CanonE.isPRValue());
820   ExprToVal[&CanonE] = &Val;
821 }
822 
823 Value *Environment::getValue(const StorageLocation &Loc) const {
824   return LocToVal.lookup(&Loc);
825 }
826 
827 Value *Environment::getValue(const ValueDecl &D) const {
828   auto *Loc = getStorageLocation(D);
829   if (Loc == nullptr)
830     return nullptr;
831   return getValue(*Loc);
832 }
833 
834 Value *Environment::getValue(const Expr &E) const {
835   if (E.isPRValue()) {
836     auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E));
837     return It == ExprToVal.end() ? nullptr : It->second;
838   }
839 
840   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
841   if (It == ExprToLoc.end())
842     return nullptr;
843   return getValue(*It->second);
844 }
845 
846 Value *Environment::createValue(QualType Type) {
847   llvm::DenseSet<QualType> Visited;
848   int CreatedValuesCount = 0;
849   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
850                                                 CreatedValuesCount);
851   if (CreatedValuesCount > MaxCompositeValueSize) {
852     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
853                  << '\n';
854   }
855   return Val;
856 }
857 
858 Value *Environment::createValueUnlessSelfReferential(
859     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
860     int &CreatedValuesCount) {
861   assert(!Type.isNull());
862   assert(!Type->isReferenceType());
863 
864   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
865   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
866       Depth > MaxCompositeValueDepth)
867     return nullptr;
868 
869   if (Type->isBooleanType()) {
870     CreatedValuesCount++;
871     return &makeAtomicBoolValue();
872   }
873 
874   if (Type->isIntegerType()) {
875     // FIXME: consider instead `return nullptr`, given that we do nothing useful
876     // with integers, and so distinguishing them serves no purpose, but could
877     // prevent convergence.
878     CreatedValuesCount++;
879     return &arena().create<IntegerValue>();
880   }
881 
882   if (Type->isPointerType()) {
883     CreatedValuesCount++;
884     QualType PointeeType = Type->getPointeeType();
885     StorageLocation &PointeeLoc =
886         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
887 
888     return &arena().create<PointerValue>(PointeeLoc);
889   }
890 
891   if (Type->isRecordType()) {
892     CreatedValuesCount++;
893     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
894     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
895       assert(Field != nullptr);
896 
897       QualType FieldType = Field->getType();
898 
899       FieldLocs.insert(
900           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
901                                           CreatedValuesCount)});
902     }
903 
904     RecordStorageLocation::SyntheticFieldMap SyntheticFieldLocs;
905     for (const auto &Entry : DACtx->getSyntheticFields(Type)) {
906       SyntheticFieldLocs.insert(
907           {Entry.getKey(),
908            &createLocAndMaybeValue(Entry.getValue(), Visited, Depth + 1,
909                                    CreatedValuesCount)});
910     }
911 
912     RecordStorageLocation &Loc = DACtx->createRecordStorageLocation(
913         Type, std::move(FieldLocs), std::move(SyntheticFieldLocs));
914     RecordValue &RecordVal = create<RecordValue>(Loc);
915 
916     // As we already have a storage location for the `RecordValue`, we can and
917     // should associate them in the environment.
918     setValue(Loc, RecordVal);
919 
920     return &RecordVal;
921   }
922 
923   return nullptr;
924 }
925 
926 StorageLocation &
927 Environment::createLocAndMaybeValue(QualType Ty,
928                                     llvm::DenseSet<QualType> &Visited,
929                                     int Depth, int &CreatedValuesCount) {
930   if (!Visited.insert(Ty.getCanonicalType()).second)
931     return createStorageLocation(Ty.getNonReferenceType());
932   Value *Val = createValueUnlessSelfReferential(
933       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
934   Visited.erase(Ty.getCanonicalType());
935 
936   Ty = Ty.getNonReferenceType();
937 
938   if (Val == nullptr)
939     return createStorageLocation(Ty);
940 
941   if (Ty->isRecordType())
942     return cast<RecordValue>(Val)->getLoc();
943 
944   StorageLocation &Loc = createStorageLocation(Ty);
945   setValue(Loc, *Val);
946   return Loc;
947 }
948 
949 StorageLocation &Environment::createObjectInternal(const ValueDecl *D,
950                                                    QualType Ty,
951                                                    const Expr *InitExpr) {
952   if (Ty->isReferenceType()) {
953     // Although variables of reference type always need to be initialized, it
954     // can happen that we can't see the initializer, so `InitExpr` may still
955     // be null.
956     if (InitExpr) {
957       if (auto *InitExprLoc = getStorageLocation(*InitExpr))
958           return *InitExprLoc;
959     }
960 
961     // Even though we have an initializer, we might not get an
962     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
963     // don't have a function body. In this case, we just invent a storage
964     // location and value -- it's the best we can do.
965     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
966   }
967 
968   Value *Val = nullptr;
969   if (InitExpr)
970     // In the (few) cases where an expression is intentionally
971     // "uninterpreted", `InitExpr` is not associated with a value.  There are
972     // two ways to handle this situation: propagate the status, so that
973     // uninterpreted initializers result in uninterpreted variables, or
974     // provide a default value. We choose the latter so that later refinements
975     // of the variable can be used for reasoning about the surrounding code.
976     // For this reason, we let this case be handled by the `createValue()`
977     // call below.
978     //
979     // FIXME. If and when we interpret all language cases, change this to
980     // assert that `InitExpr` is interpreted, rather than supplying a
981     // default value (assuming we don't update the environment API to return
982     // references).
983     Val = getValue(*InitExpr);
984   if (!Val)
985     Val = createValue(Ty);
986 
987   if (Ty->isRecordType())
988     return cast<RecordValue>(Val)->getLoc();
989 
990   StorageLocation &Loc =
991       D ? createStorageLocation(*D) : createStorageLocation(Ty);
992 
993   if (Val)
994     setValue(Loc, *Val);
995 
996   return Loc;
997 }
998 
999 void Environment::assume(const Formula &F) {
1000   DACtx->addFlowConditionConstraint(FlowConditionToken, F);
1001 }
1002 
1003 bool Environment::proves(const Formula &F) const {
1004   return DACtx->flowConditionImplies(FlowConditionToken, F);
1005 }
1006 
1007 bool Environment::allows(const Formula &F) const {
1008   return DACtx->flowConditionAllows(FlowConditionToken, F);
1009 }
1010 
1011 void Environment::dump(raw_ostream &OS) const {
1012   // FIXME: add printing for remaining fields and allow caller to decide what
1013   // fields are printed.
1014   OS << "DeclToLoc:\n";
1015   for (auto [D, L] : DeclToLoc)
1016     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
1017 
1018   OS << "ExprToLoc:\n";
1019   for (auto [E, L] : ExprToLoc)
1020     OS << "  [" << E << ", " << L << "]\n";
1021 
1022   OS << "ExprToVal:\n";
1023   for (auto [E, V] : ExprToVal)
1024     OS << "  [" << E << ", " << V << ": " << *V << "]\n";
1025 
1026   OS << "LocToVal:\n";
1027   for (auto [L, V] : LocToVal) {
1028     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
1029   }
1030 
1031   OS << "\n";
1032   DACtx->dumpFlowCondition(FlowConditionToken, OS);
1033 }
1034 
1035 void Environment::dump() const {
1036   dump(llvm::dbgs());
1037 }
1038 
1039 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
1040                                                  const Environment &Env) {
1041   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
1042   if (ImplicitObject == nullptr)
1043     return nullptr;
1044   if (ImplicitObject->getType()->isPointerType()) {
1045     if (auto *Val = Env.get<PointerValue>(*ImplicitObject))
1046       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
1047     return nullptr;
1048   }
1049   return cast_or_null<RecordStorageLocation>(
1050       Env.getStorageLocation(*ImplicitObject));
1051 }
1052 
1053 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
1054                                              const Environment &Env) {
1055   Expr *Base = ME.getBase();
1056   if (Base == nullptr)
1057     return nullptr;
1058   if (ME.isArrow()) {
1059     if (auto *Val = Env.get<PointerValue>(*Base))
1060       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
1061     return nullptr;
1062   }
1063   return Env.get<RecordStorageLocation>(*Base);
1064 }
1065 
1066 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
1067   // Unnamed bitfields are only used for padding and do not appear in
1068   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
1069   // field list, and we thus need to remove them before mapping inits to
1070   // fields to avoid mapping inits to the wrongs fields.
1071   std::vector<FieldDecl *> Fields;
1072   llvm::copy_if(
1073       RD->fields(), std::back_inserter(Fields),
1074       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
1075   return Fields;
1076 }
1077 
1078 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) {
1079   auto &NewVal = Env.create<RecordValue>(Loc);
1080   Env.setValue(Loc, NewVal);
1081   return NewVal;
1082 }
1083 
1084 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) {
1085   assert(Expr.getType()->isRecordType());
1086 
1087   if (Expr.isPRValue()) {
1088     if (auto *ExistingVal = Env.get<RecordValue>(Expr)) {
1089       auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc());
1090       Env.setValue(Expr, NewVal);
1091       Env.setValue(NewVal.getLoc(), NewVal);
1092       return NewVal;
1093     }
1094 
1095     auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
1096     Env.setValue(Expr, NewVal);
1097     return NewVal;
1098   }
1099 
1100   if (auto *Loc = Env.get<RecordStorageLocation>(Expr)) {
1101     auto &NewVal = Env.create<RecordValue>(*Loc);
1102     Env.setValue(*Loc, NewVal);
1103     return NewVal;
1104   }
1105 
1106   auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
1107   Env.setStorageLocation(Expr, NewVal.getLoc());
1108   return NewVal;
1109 }
1110 
1111 } // namespace dataflow
1112 } // namespace clang
1113