xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 5bd643e1456bd2acc53ac0bf594d285be89a44fe)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/MapVector.h"
24 #include "llvm/ADT/STLExtras.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include <cassert>
27 #include <utility>
28 
29 namespace clang {
30 namespace dataflow {
31 
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
34 // analysis.
35 static constexpr int MaxCompositeValueDepth = 3;
36 static constexpr int MaxCompositeValueSize = 1000;
37 
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc(
40     const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1,
41     const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) {
42   llvm::DenseMap<const ValueDecl *, StorageLocation *> Result;
43   for (auto &Entry : DeclToLoc1) {
44     auto It = DeclToLoc2.find(Entry.first);
45     if (It != DeclToLoc2.end() && Entry.second == It->second)
46       Result.insert({Entry.first, Entry.second});
47   }
48   return Result;
49 }
50 
51 // Whether to consider equivalent two values with an unknown relation.
52 //
53 // FIXME: this function is a hack enabling unsoundness to support
54 // convergence. Once we have widening support for the reference/pointer and
55 // struct built-in models, this should be unconditionally `false` (and inlined
56 // as such at its call sites).
57 static bool equateUnknownValues(Value::Kind K) {
58   switch (K) {
59   case Value::Kind::Integer:
60   case Value::Kind::Pointer:
61   case Value::Kind::Record:
62     return true;
63   default:
64     return false;
65   }
66 }
67 
68 static bool compareDistinctValues(QualType Type, Value &Val1,
69                                   const Environment &Env1, Value &Val2,
70                                   const Environment &Env2,
71                                   Environment::ValueModel &Model) {
72   // Note: Potentially costly, but, for booleans, we could check whether both
73   // can be proven equivalent in their respective environments.
74 
75   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
76   // and implement separate, join/widen specific handling for
77   // reference/pointers.
78   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
79   case ComparisonResult::Same:
80     return true;
81   case ComparisonResult::Different:
82     return false;
83   case ComparisonResult::Unknown:
84     return equateUnknownValues(Val1.getKind());
85   }
86   llvm_unreachable("All cases covered in switch");
87 }
88 
89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
90 /// respectively, of the same type `Type`. Merging generally produces a single
91 /// value that (soundly) approximates the two inputs, although the actual
92 /// meaning depends on `Model`.
93 static Value *mergeDistinctValues(QualType Type, Value &Val1,
94                                   const Environment &Env1, Value &Val2,
95                                   const Environment &Env2,
96                                   Environment &MergedEnv,
97                                   Environment::ValueModel &Model) {
98   // Join distinct boolean values preserving information about the constraints
99   // in the respective path conditions.
100   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
101     // FIXME: Checking both values should be unnecessary, since they should have
102     // a consistent shape.  However, right now we can end up with BoolValue's in
103     // integer-typed variables due to our incorrect handling of
104     // boolean-to-integer casts (we just propagate the BoolValue to the result
105     // of the cast). So, a join can encounter an integer in one branch but a
106     // bool in the other.
107     // For example:
108     // ```
109     // std::optional<bool> o;
110     // int x;
111     // if (o.has_value())
112     //   x = o.value();
113     // ```
114     auto &Expr1 = cast<BoolValue>(Val1).formula();
115     auto &Expr2 = cast<BoolValue>(Val2).formula();
116     auto &A = MergedEnv.arena();
117     auto &MergedVal = A.makeAtomRef(A.makeAtom());
118     MergedEnv.assume(
119         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
120                            A.makeEquals(MergedVal, Expr1)),
121                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
122                            A.makeEquals(MergedVal, Expr2))));
123     return &A.makeBoolValue(MergedVal);
124   }
125 
126   Value *MergedVal = nullptr;
127   if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) {
128     auto *RecordVal2 = cast<RecordValue>(&Val2);
129 
130     if (&RecordVal1->getLoc() == &RecordVal2->getLoc())
131       // `RecordVal1` and `RecordVal2` may have different properties associated
132       // with them. Create a new `RecordValue` with the same location but
133       // without any properties so that we soundly approximate both values. If a
134       // particular analysis needs to merge properties, it should do so in
135       // `DataflowAnalysis::merge()`.
136       MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc());
137     else
138       // If the locations for the two records are different, need to create a
139       // completely new value.
140       MergedVal = MergedEnv.createValue(Type);
141   } else {
142     MergedVal = MergedEnv.createValue(Type);
143   }
144 
145   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
146   // returns false to avoid storing unneeded values in `DACtx`.
147   if (MergedVal)
148     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
149       return MergedVal;
150 
151   return nullptr;
152 }
153 
154 // When widening does not change `Current`, return value will equal `&Prev`.
155 static Value &widenDistinctValues(QualType Type, Value &Prev,
156                                   const Environment &PrevEnv, Value &Current,
157                                   Environment &CurrentEnv,
158                                   Environment::ValueModel &Model) {
159   // Boolean-model widening.
160   if (auto *PrevBool = dyn_cast<BoolValue>(&Prev)) {
161     // If previous value was already Top, re-use that to (implicitly) indicate
162     // that no change occurred.
163     if (isa<TopBoolValue>(Prev))
164       return Prev;
165 
166     // We may need to widen to Top, but before we do so, check whether both
167     // values are implied to be either true or false in the current environment.
168     // In that case, we can simply return a literal instead.
169     auto &CurBool = cast<BoolValue>(Current);
170     bool TruePrev = PrevEnv.proves(PrevBool->formula());
171     bool TrueCur = CurrentEnv.proves(CurBool.formula());
172     if (TruePrev && TrueCur)
173       return CurrentEnv.getBoolLiteralValue(true);
174     if (!TruePrev && !TrueCur &&
175         PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool->formula())) &&
176         CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula())))
177       return CurrentEnv.getBoolLiteralValue(false);
178 
179     return CurrentEnv.makeTopBoolValue();
180   }
181 
182   // FIXME: Add other built-in model widening.
183 
184   // Custom-model widening.
185   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
186     return *W;
187 
188   return equateUnknownValues(Prev.getKind()) ? Prev : Current;
189 }
190 
191 // Returns whether the values in `Map1` and `Map2` compare equal for those
192 // keys that `Map1` and `Map2` have in common.
193 template <typename Key>
194 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1,
195                            const llvm::MapVector<Key, Value *> &Map2,
196                            const Environment &Env1, const Environment &Env2,
197                            Environment::ValueModel &Model) {
198   for (auto &Entry : Map1) {
199     Key K = Entry.first;
200     assert(K != nullptr);
201 
202     Value *Val = Entry.second;
203     assert(Val != nullptr);
204 
205     auto It = Map2.find(K);
206     if (It == Map2.end())
207       continue;
208     assert(It->second != nullptr);
209 
210     if (!areEquivalentValues(*Val, *It->second) &&
211         !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2,
212                                Model))
213       return false;
214   }
215 
216   return true;
217 }
218 
219 // Perform a join on two `LocToVal` maps.
220 static llvm::MapVector<const StorageLocation *, Value *>
221 joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
222              const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2,
223              const Environment &Env1, const Environment &Env2,
224              Environment &JoinedEnv, Environment::ValueModel &Model) {
225   llvm::MapVector<const StorageLocation *, Value *> Result;
226   for (auto &Entry : LocToVal) {
227     const StorageLocation *Loc = Entry.first;
228     assert(Loc != nullptr);
229 
230     Value *Val = Entry.second;
231     assert(Val != nullptr);
232 
233     auto It = LocToVal2.find(Loc);
234     if (It == LocToVal2.end())
235       continue;
236     assert(It->second != nullptr);
237 
238     if (areEquivalentValues(*Val, *It->second)) {
239       Result.insert({Loc, Val});
240       continue;
241     }
242 
243     if (Value *MergedVal = mergeDistinctValues(
244             Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
245       Result.insert({Loc, MergedVal});
246     }
247   }
248 
249   return Result;
250 }
251 
252 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either
253 // `const StorageLocation *` or `const Expr *`.
254 template <typename Key>
255 llvm::MapVector<Key, Value *>
256 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap,
257                    const llvm::MapVector<Key, Value *> &PrevMap,
258                    Environment &CurEnv, const Environment &PrevEnv,
259                    Environment::ValueModel &Model, LatticeJoinEffect &Effect) {
260   llvm::MapVector<Key, Value *> WidenedMap;
261   for (auto &Entry : CurMap) {
262     Key K = Entry.first;
263     assert(K != nullptr);
264 
265     Value *Val = Entry.second;
266     assert(Val != nullptr);
267 
268     auto PrevIt = PrevMap.find(K);
269     if (PrevIt == PrevMap.end())
270       continue;
271     assert(PrevIt->second != nullptr);
272 
273     if (areEquivalentValues(*Val, *PrevIt->second)) {
274       WidenedMap.insert({K, Val});
275       continue;
276     }
277 
278     Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second,
279                                             PrevEnv, *Val, CurEnv, Model);
280     WidenedMap.insert({K, &WidenedVal});
281     if (&WidenedVal != PrevIt->second)
282       Effect = LatticeJoinEffect::Changed;
283   }
284 
285   return WidenedMap;
286 }
287 
288 /// Initializes a global storage value.
289 static void insertIfGlobal(const Decl &D,
290                            llvm::DenseSet<const VarDecl *> &Vars) {
291   if (auto *V = dyn_cast<VarDecl>(&D))
292     if (V->hasGlobalStorage())
293       Vars.insert(V);
294 }
295 
296 static void insertIfFunction(const Decl &D,
297                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
298   if (auto *FD = dyn_cast<FunctionDecl>(&D))
299     Funcs.insert(FD);
300 }
301 
302 static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) {
303   if (!C.getMethodDecl())
304     return nullptr;
305   auto *Body = dyn_cast_or_null<CompoundStmt>(C.getMethodDecl()->getBody());
306   if (!Body || Body->size() != 1)
307     return nullptr;
308   if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin()))
309     if (auto *Return = RS->getRetValue())
310       return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts());
311   return nullptr;
312 }
313 
314 static void
315 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
316                          llvm::DenseSet<const VarDecl *> &Vars,
317                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
318   insertIfGlobal(D, Vars);
319   insertIfFunction(D, Funcs);
320   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
321     for (const auto *B : Decomp->bindings())
322       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
323         // FIXME: should we be using `E->getFoundDecl()`?
324         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
325           Fields.insert(FD);
326 }
327 
328 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
329 /// global variables and functions that are declared in or referenced from
330 /// sub-statements.
331 static void
332 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
333                          llvm::DenseSet<const VarDecl *> &Vars,
334                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
335   for (auto *Child : S.children())
336     if (Child != nullptr)
337       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
338   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
339     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
340 
341   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
342     if (DS->isSingleDecl())
343       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
344     else
345       for (auto *D : DS->getDeclGroup())
346         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
347   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
348     insertIfGlobal(*E->getDecl(), Vars);
349     insertIfFunction(*E->getDecl(), Funcs);
350   } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) {
351     // If this is a method that returns a member variable but does nothing else,
352     // model the field of the return value.
353     if (MemberExpr *E = getMemberForAccessor(*C))
354       if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl()))
355         Fields.insert(FD);
356   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
357     // FIXME: should we be using `E->getFoundDecl()`?
358     const ValueDecl *VD = E->getMemberDecl();
359     insertIfGlobal(*VD, Vars);
360     insertIfFunction(*VD, Funcs);
361     if (const auto *FD = dyn_cast<FieldDecl>(VD))
362       Fields.insert(FD);
363   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
364     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
365       for (const auto *FD : getFieldsForInitListExpr(RD))
366         Fields.insert(FD);
367   }
368 }
369 
370 // FIXME: Add support for resetting globals after function calls to enable
371 // the implementation of sound analyses.
372 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
373   assert(FuncDecl->getBody() != nullptr);
374 
375   FieldSet Fields;
376   llvm::DenseSet<const VarDecl *> Vars;
377   llvm::DenseSet<const FunctionDecl *> Funcs;
378 
379   // Look for global variable and field references in the
380   // constructor-initializers.
381   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
382     for (const auto *Init : CtorDecl->inits()) {
383       if (Init->isMemberInitializer()) {
384         Fields.insert(Init->getMember());
385       } else if (Init->isIndirectMemberInitializer()) {
386         for (const auto *I : Init->getIndirectMember()->chain())
387           Fields.insert(cast<FieldDecl>(I));
388       }
389       const Expr *E = Init->getInit();
390       assert(E != nullptr);
391       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
392     }
393     // Add all fields mentioned in default member initializers.
394     for (const FieldDecl *F : CtorDecl->getParent()->fields())
395       if (const auto *I = F->getInClassInitializer())
396           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
397   }
398   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
399 
400   // These have to be added before the lines that follow to ensure that
401   // `create*` work correctly for structs.
402   DACtx->addModeledFields(Fields);
403 
404   for (const VarDecl *D : Vars) {
405     if (getStorageLocation(*D) != nullptr)
406       continue;
407 
408     setStorageLocation(*D, createObject(*D));
409   }
410 
411   for (const FunctionDecl *FD : Funcs) {
412     if (getStorageLocation(*FD) != nullptr)
413       continue;
414     auto &Loc = createStorageLocation(FD->getType());
415     setStorageLocation(*FD, Loc);
416   }
417 }
418 
419 Environment::Environment(DataflowAnalysisContext &DACtx)
420     : DACtx(&DACtx),
421       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
422 
423 Environment Environment::fork() const {
424   Environment Copy(*this);
425   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
426   return Copy;
427 }
428 
429 Environment::Environment(DataflowAnalysisContext &DACtx,
430                          const DeclContext &DeclCtx)
431     : Environment(DACtx) {
432   CallStack.push_back(&DeclCtx);
433 
434   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
435     assert(FuncDecl->getBody() != nullptr);
436 
437     initFieldsGlobalsAndFuncs(FuncDecl);
438 
439     for (const auto *ParamDecl : FuncDecl->parameters()) {
440       assert(ParamDecl != nullptr);
441       setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr));
442     }
443   }
444 
445   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
446     auto *Parent = MethodDecl->getParent();
447     assert(Parent != nullptr);
448 
449     if (Parent->isLambda()) {
450       for (auto Capture : Parent->captures()) {
451         if (Capture.capturesVariable()) {
452           const auto *VarDecl = Capture.getCapturedVar();
453           assert(VarDecl != nullptr);
454           setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr));
455         } else if (Capture.capturesThis()) {
456           const auto *SurroundingMethodDecl =
457               cast<CXXMethodDecl>(DeclCtx.getNonClosureAncestor());
458           QualType ThisPointeeType =
459               SurroundingMethodDecl->getFunctionObjectParameterType();
460           ThisPointeeLoc =
461               &cast<RecordValue>(createValue(ThisPointeeType))->getLoc();
462         }
463       }
464     } else if (MethodDecl->isImplicitObjectMemberFunction()) {
465       QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType();
466       ThisPointeeLoc =
467           &cast<RecordValue>(createValue(ThisPointeeType))->getLoc();
468     }
469   }
470 }
471 
472 bool Environment::canDescend(unsigned MaxDepth,
473                              const DeclContext *Callee) const {
474   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
475 }
476 
477 Environment Environment::pushCall(const CallExpr *Call) const {
478   Environment Env(*this);
479 
480   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
481     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
482       if (!isa<CXXThisExpr>(Arg))
483           Env.ThisPointeeLoc =
484               cast<RecordStorageLocation>(getStorageLocation(*Arg));
485       // Otherwise (when the argument is `this`), retain the current
486       // environment's `ThisPointeeLoc`.
487     }
488   }
489 
490   Env.pushCallInternal(Call->getDirectCallee(),
491                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
492 
493   return Env;
494 }
495 
496 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
497   Environment Env(*this);
498 
499   Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call);
500 
501   Env.pushCallInternal(Call->getConstructor(),
502                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
503 
504   return Env;
505 }
506 
507 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
508                                    ArrayRef<const Expr *> Args) {
509   // Canonicalize to the definition of the function. This ensures that we're
510   // putting arguments into the same `ParamVarDecl`s` that the callee will later
511   // be retrieving them from.
512   assert(FuncDecl->getDefinition() != nullptr);
513   FuncDecl = FuncDecl->getDefinition();
514 
515   CallStack.push_back(FuncDecl);
516 
517   initFieldsGlobalsAndFuncs(FuncDecl);
518 
519   const auto *ParamIt = FuncDecl->param_begin();
520 
521   // FIXME: Parameters don't always map to arguments 1:1; examples include
522   // overloaded operators implemented as member functions, and parameter packs.
523   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
524     assert(ParamIt != FuncDecl->param_end());
525     const VarDecl *Param = *ParamIt;
526     setStorageLocation(*Param, createObject(*Param, Args[ArgIndex]));
527   }
528 }
529 
530 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
531   // We ignore some entries of `CalleeEnv`:
532   // - `DACtx` because is already the same in both
533   // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or
534   //   `ThisPointeeLoc` because they don't apply to us.
535   // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the
536   //   callee's local scope, so when popping that scope, we do not propagate
537   //   the maps.
538   this->LocToVal = std::move(CalleeEnv.LocToVal);
539   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
540 
541   if (Call->isGLValue()) {
542     if (CalleeEnv.ReturnLoc != nullptr)
543       setStorageLocation(*Call, *CalleeEnv.ReturnLoc);
544   } else if (!Call->getType()->isVoidType()) {
545     if (CalleeEnv.ReturnVal != nullptr)
546       setValue(*Call, *CalleeEnv.ReturnVal);
547   }
548 }
549 
550 void Environment::popCall(const CXXConstructExpr *Call,
551                           const Environment &CalleeEnv) {
552   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
553   this->LocToVal = std::move(CalleeEnv.LocToVal);
554   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
555 
556   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
557     setValue(*Call, *Val);
558   }
559 }
560 
561 bool Environment::equivalentTo(const Environment &Other,
562                                Environment::ValueModel &Model) const {
563   assert(DACtx == Other.DACtx);
564 
565   if (ReturnVal != Other.ReturnVal)
566     return false;
567 
568   if (ReturnLoc != Other.ReturnLoc)
569     return false;
570 
571   if (ThisPointeeLoc != Other.ThisPointeeLoc)
572     return false;
573 
574   if (DeclToLoc != Other.DeclToLoc)
575     return false;
576 
577   if (ExprToLoc != Other.ExprToLoc)
578     return false;
579 
580   if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model))
581     return false;
582 
583   if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model))
584     return false;
585 
586   return true;
587 }
588 
589 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
590                                      Environment::ValueModel &Model) {
591   assert(DACtx == PrevEnv.DACtx);
592   assert(ReturnVal == PrevEnv.ReturnVal);
593   assert(ReturnLoc == PrevEnv.ReturnLoc);
594   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
595   assert(CallStack == PrevEnv.CallStack);
596 
597   auto Effect = LatticeJoinEffect::Unchanged;
598 
599   // By the API, `PrevEnv` is a previous version of the environment for the same
600   // block, so we have some guarantees about its shape. In particular, it will
601   // be the result of a join or widen operation on previous values for this
602   // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that
603   // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain
604   // this property here, we don't need change their current values to widen.
605   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
606   assert(ExprToVal.size() <= PrevEnv.ExprToVal.size());
607   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
608 
609   ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv,
610                                  Model, Effect);
611 
612   LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv,
613                                 Model, Effect);
614   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
615       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
616       ExprToVal.size() != PrevEnv.ExprToVal.size() ||
617       LocToVal.size() != PrevEnv.LocToVal.size())
618     Effect = LatticeJoinEffect::Changed;
619 
620   return Effect;
621 }
622 
623 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
624                               Environment::ValueModel &Model) {
625   assert(EnvA.DACtx == EnvB.DACtx);
626   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
627   assert(EnvA.CallStack == EnvB.CallStack);
628 
629   Environment JoinedEnv(*EnvA.DACtx);
630 
631   JoinedEnv.CallStack = EnvA.CallStack;
632   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
633 
634   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
635     // `ReturnVal` might not always get set -- for example if we have a return
636     // statement of the form `return some_other_func()` and we decide not to
637     // analyze `some_other_func()`.
638     // In this case, we can't say anything about the joined return value -- we
639     // don't simply want to propagate the return value that we do have, because
640     // it might not be the correct one.
641     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
642     JoinedEnv.ReturnVal = nullptr;
643   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
644     JoinedEnv.ReturnVal = EnvA.ReturnVal;
645   } else {
646     assert(!EnvA.CallStack.empty());
647     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
648     // cast.
649     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
650     assert(Func != nullptr);
651     if (Value *MergedVal =
652             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
653                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
654       JoinedEnv.ReturnVal = MergedVal;
655   }
656 
657   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
658     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
659   else
660     JoinedEnv.ReturnLoc = nullptr;
661 
662   JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc);
663 
664   // FIXME: update join to detect backedges and simplify the flow condition
665   // accordingly.
666   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
667       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
668 
669   JoinedEnv.LocToVal =
670       joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model);
671 
672   // We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal`
673   // empty, as we never need to access entries in these maps outside of the
674   // basic block that sets them.
675 
676   return JoinedEnv;
677 }
678 
679 StorageLocation &Environment::createStorageLocation(QualType Type) {
680   return DACtx->createStorageLocation(Type);
681 }
682 
683 StorageLocation &Environment::createStorageLocation(const ValueDecl &D) {
684   // Evaluated declarations are always assigned the same storage locations to
685   // ensure that the environment stabilizes across loop iterations. Storage
686   // locations for evaluated declarations are stored in the analysis context.
687   return DACtx->getStableStorageLocation(D);
688 }
689 
690 StorageLocation &Environment::createStorageLocation(const Expr &E) {
691   // Evaluated expressions are always assigned the same storage locations to
692   // ensure that the environment stabilizes across loop iterations. Storage
693   // locations for evaluated expressions are stored in the analysis context.
694   return DACtx->getStableStorageLocation(E);
695 }
696 
697 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
698   assert(!DeclToLoc.contains(&D));
699   DeclToLoc[&D] = &Loc;
700 }
701 
702 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
703   auto It = DeclToLoc.find(&D);
704   if (It == DeclToLoc.end())
705     return nullptr;
706 
707   StorageLocation *Loc = It->second;
708 
709   return Loc;
710 }
711 
712 void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); }
713 
714 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
715   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
716   // but we still want to be able to associate a `StorageLocation` with them,
717   // so allow these as an exception.
718   assert(E.isGLValue() ||
719          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
720   setStorageLocationInternal(E, Loc);
721 }
722 
723 StorageLocation *Environment::getStorageLocation(const Expr &E) const {
724   // See comment in `setStorageLocation()`.
725   assert(E.isGLValue() ||
726          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
727   return getStorageLocationInternal(E);
728 }
729 
730 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const {
731   return ThisPointeeLoc;
732 }
733 
734 RecordStorageLocation &
735 Environment::getResultObjectLocation(const Expr &RecordPRValue) {
736   assert(RecordPRValue.getType()->isRecordType());
737   assert(RecordPRValue.isPRValue());
738 
739   if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue))
740     return *cast<RecordStorageLocation>(ExistingLoc);
741   auto &Loc = cast<RecordStorageLocation>(
742       DACtx->getStableStorageLocation(RecordPRValue));
743   setStorageLocationInternal(RecordPRValue, Loc);
744   return Loc;
745 }
746 
747 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
748   return DACtx->getOrCreateNullPointerValue(PointeeType);
749 }
750 
751 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
752   assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc);
753 
754   LocToVal[&Loc] = &Val;
755 }
756 
757 void Environment::setValue(const Expr &E, Value &Val) {
758   assert(E.isPRValue());
759   ExprToVal[&E] = &Val;
760 }
761 
762 Value *Environment::getValue(const StorageLocation &Loc) const {
763   return LocToVal.lookup(&Loc);
764 }
765 
766 Value *Environment::getValue(const ValueDecl &D) const {
767   auto *Loc = getStorageLocation(D);
768   if (Loc == nullptr)
769     return nullptr;
770   return getValue(*Loc);
771 }
772 
773 Value *Environment::getValue(const Expr &E) const {
774   if (E.isPRValue()) {
775     auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E));
776     return It == ExprToVal.end() ? nullptr : It->second;
777   }
778 
779   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
780   if (It == ExprToLoc.end())
781     return nullptr;
782   return getValue(*It->second);
783 }
784 
785 Value *Environment::createValue(QualType Type) {
786   llvm::DenseSet<QualType> Visited;
787   int CreatedValuesCount = 0;
788   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
789                                                 CreatedValuesCount);
790   if (CreatedValuesCount > MaxCompositeValueSize) {
791     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
792                  << '\n';
793   }
794   return Val;
795 }
796 
797 void Environment::setStorageLocationInternal(const Expr &E,
798                                              StorageLocation &Loc) {
799   const Expr &CanonE = ignoreCFGOmittedNodes(E);
800   assert(!ExprToLoc.contains(&CanonE));
801   ExprToLoc[&CanonE] = &Loc;
802 }
803 
804 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const {
805   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
806   return It == ExprToLoc.end() ? nullptr : &*It->second;
807 }
808 
809 Value *Environment::createValueUnlessSelfReferential(
810     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
811     int &CreatedValuesCount) {
812   assert(!Type.isNull());
813   assert(!Type->isReferenceType());
814 
815   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
816   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
817       Depth > MaxCompositeValueDepth)
818     return nullptr;
819 
820   if (Type->isBooleanType()) {
821     CreatedValuesCount++;
822     return &makeAtomicBoolValue();
823   }
824 
825   if (Type->isIntegerType()) {
826     // FIXME: consider instead `return nullptr`, given that we do nothing useful
827     // with integers, and so distinguishing them serves no purpose, but could
828     // prevent convergence.
829     CreatedValuesCount++;
830     return &arena().create<IntegerValue>();
831   }
832 
833   if (Type->isPointerType()) {
834     CreatedValuesCount++;
835     QualType PointeeType = Type->getPointeeType();
836     StorageLocation &PointeeLoc =
837         createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount);
838 
839     return &arena().create<PointerValue>(PointeeLoc);
840   }
841 
842   if (Type->isRecordType()) {
843     CreatedValuesCount++;
844     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
845     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
846       assert(Field != nullptr);
847 
848       QualType FieldType = Field->getType();
849 
850       FieldLocs.insert(
851           {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1,
852                                           CreatedValuesCount)});
853     }
854 
855     RecordStorageLocation &Loc =
856         arena().create<RecordStorageLocation>(Type, std::move(FieldLocs));
857     RecordValue &RecordVal = create<RecordValue>(Loc);
858 
859     // As we already have a storage location for the `RecordValue`, we can and
860     // should associate them in the environment.
861     setValue(Loc, RecordVal);
862 
863     return &RecordVal;
864   }
865 
866   return nullptr;
867 }
868 
869 StorageLocation &
870 Environment::createLocAndMaybeValue(QualType Ty,
871                                     llvm::DenseSet<QualType> &Visited,
872                                     int Depth, int &CreatedValuesCount) {
873   if (!Visited.insert(Ty.getCanonicalType()).second)
874     return createStorageLocation(Ty.getNonReferenceType());
875   Value *Val = createValueUnlessSelfReferential(
876       Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount);
877   Visited.erase(Ty.getCanonicalType());
878 
879   Ty = Ty.getNonReferenceType();
880 
881   if (Val == nullptr)
882     return createStorageLocation(Ty);
883 
884   if (Ty->isRecordType())
885     return cast<RecordValue>(Val)->getLoc();
886 
887   StorageLocation &Loc = createStorageLocation(Ty);
888   setValue(Loc, *Val);
889   return Loc;
890 }
891 
892 StorageLocation &Environment::createObjectInternal(const ValueDecl *D,
893                                                    QualType Ty,
894                                                    const Expr *InitExpr) {
895   if (Ty->isReferenceType()) {
896     // Although variables of reference type always need to be initialized, it
897     // can happen that we can't see the initializer, so `InitExpr` may still
898     // be null.
899     if (InitExpr) {
900       if (auto *InitExprLoc = getStorageLocation(*InitExpr))
901           return *InitExprLoc;
902     }
903 
904     // Even though we have an initializer, we might not get an
905     // InitExprLoc, for example if the InitExpr is a CallExpr for which we
906     // don't have a function body. In this case, we just invent a storage
907     // location and value -- it's the best we can do.
908     return createObjectInternal(D, Ty.getNonReferenceType(), nullptr);
909   }
910 
911   Value *Val = nullptr;
912   if (InitExpr)
913     // In the (few) cases where an expression is intentionally
914     // "uninterpreted", `InitExpr` is not associated with a value.  There are
915     // two ways to handle this situation: propagate the status, so that
916     // uninterpreted initializers result in uninterpreted variables, or
917     // provide a default value. We choose the latter so that later refinements
918     // of the variable can be used for reasoning about the surrounding code.
919     // For this reason, we let this case be handled by the `createValue()`
920     // call below.
921     //
922     // FIXME. If and when we interpret all language cases, change this to
923     // assert that `InitExpr` is interpreted, rather than supplying a
924     // default value (assuming we don't update the environment API to return
925     // references).
926     Val = getValue(*InitExpr);
927   if (!Val)
928     Val = createValue(Ty);
929 
930   if (Ty->isRecordType())
931     return cast<RecordValue>(Val)->getLoc();
932 
933   StorageLocation &Loc =
934       D ? createStorageLocation(*D) : createStorageLocation(Ty);
935 
936   if (Val)
937     setValue(Loc, *Val);
938 
939   return Loc;
940 }
941 
942 void Environment::assume(const Formula &F) {
943   DACtx->addFlowConditionConstraint(FlowConditionToken, F);
944 }
945 
946 bool Environment::proves(const Formula &F) const {
947   return DACtx->flowConditionImplies(FlowConditionToken, F);
948 }
949 
950 bool Environment::allows(const Formula &F) const {
951   return DACtx->flowConditionAllows(FlowConditionToken, F);
952 }
953 
954 void Environment::dump(raw_ostream &OS) const {
955   // FIXME: add printing for remaining fields and allow caller to decide what
956   // fields are printed.
957   OS << "DeclToLoc:\n";
958   for (auto [D, L] : DeclToLoc)
959     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
960 
961   OS << "ExprToLoc:\n";
962   for (auto [E, L] : ExprToLoc)
963     OS << "  [" << E << ", " << L << "]\n";
964 
965   OS << "ExprToVal:\n";
966   for (auto [E, V] : ExprToVal)
967     OS << "  [" << E << ", " << V << ": " << *V << "]\n";
968 
969   OS << "LocToVal:\n";
970   for (auto [L, V] : LocToVal) {
971     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
972   }
973 
974   OS << "\n";
975   DACtx->dumpFlowCondition(FlowConditionToken, OS);
976 }
977 
978 void Environment::dump() const {
979   dump(llvm::dbgs());
980 }
981 
982 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
983                                                  const Environment &Env) {
984   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
985   if (ImplicitObject == nullptr)
986     return nullptr;
987   if (ImplicitObject->getType()->isPointerType()) {
988     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject)))
989       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
990     return nullptr;
991   }
992   return cast_or_null<RecordStorageLocation>(
993       Env.getStorageLocation(*ImplicitObject));
994 }
995 
996 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
997                                              const Environment &Env) {
998   Expr *Base = ME.getBase();
999   if (Base == nullptr)
1000     return nullptr;
1001   if (ME.isArrow()) {
1002     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base)))
1003       return &cast<RecordStorageLocation>(Val->getPointeeLoc());
1004     return nullptr;
1005   }
1006   return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base));
1007 }
1008 
1009 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
1010   // Unnamed bitfields are only used for padding and do not appear in
1011   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
1012   // field list, and we thus need to remove them before mapping inits to
1013   // fields to avoid mapping inits to the wrongs fields.
1014   std::vector<FieldDecl *> Fields;
1015   llvm::copy_if(
1016       RD->fields(), std::back_inserter(Fields),
1017       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
1018   return Fields;
1019 }
1020 
1021 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) {
1022   auto &NewVal = Env.create<RecordValue>(Loc);
1023   Env.setValue(Loc, NewVal);
1024   return NewVal;
1025 }
1026 
1027 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) {
1028   assert(Expr.getType()->isRecordType());
1029 
1030   if (Expr.isPRValue()) {
1031     if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) {
1032       auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc());
1033       Env.setValue(Expr, NewVal);
1034       return NewVal;
1035     }
1036 
1037     auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
1038     Env.setValue(Expr, NewVal);
1039     return NewVal;
1040   }
1041 
1042   if (auto *Loc =
1043           cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) {
1044     auto &NewVal = Env.create<RecordValue>(*Loc);
1045     Env.setValue(*Loc, NewVal);
1046     return NewVal;
1047   }
1048 
1049   auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType()));
1050   Env.setStorageLocation(Expr, NewVal.getLoc());
1051   return NewVal;
1052 }
1053 
1054 } // namespace dataflow
1055 } // namespace clang
1056