xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision b47bdcbc7207aac617d3c35dfc029f79b0a46fd8)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/Value.h"
21 #include "llvm/ADT/DenseMap.h"
22 #include "llvm/ADT/DenseSet.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/Support/Casting.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include <cassert>
27 #include <memory>
28 #include <utility>
29 
30 namespace clang {
31 namespace dataflow {
32 
33 // FIXME: convert these to parameters of the analysis or environment. Current
34 // settings have been experimentaly validated, but only for a particular
35 // analysis.
36 static constexpr int MaxCompositeValueDepth = 3;
37 static constexpr int MaxCompositeValueSize = 1000;
38 
39 /// Returns a map consisting of key-value entries that are present in both maps.
40 template <typename K, typename V>
41 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
42                                         const llvm::DenseMap<K, V> &Map2) {
43   llvm::DenseMap<K, V> Result;
44   for (auto &Entry : Map1) {
45     auto It = Map2.find(Entry.first);
46     if (It != Map2.end() && Entry.second == It->second)
47       Result.insert({Entry.first, Entry.second});
48   }
49   return Result;
50 }
51 
52 static bool compareDistinctValues(QualType Type, Value &Val1,
53                                   const Environment &Env1, Value &Val2,
54                                   const Environment &Env2,
55                                   Environment::ValueModel &Model) {
56   // Note: Potentially costly, but, for booleans, we could check whether both
57   // can be proven equivalent in their respective environments.
58 
59   // FIXME: move the reference/pointers logic from `areEquivalentValues` to here
60   // and implement separate, join/widen specific handling for
61   // reference/pointers.
62   switch (Model.compare(Type, Val1, Env1, Val2, Env2)) {
63   case ComparisonResult::Same:
64     return true;
65   case ComparisonResult::Different:
66     return false;
67   case ComparisonResult::Unknown:
68     switch (Val1.getKind()) {
69     case Value::Kind::Integer:
70     case Value::Kind::Reference:
71     case Value::Kind::Pointer:
72     case Value::Kind::Struct:
73       // FIXME: this choice intentionally introduces unsoundness to allow
74       // for convergence. Once we have widening support for the
75       // reference/pointer and struct built-in models, this should be
76       // `false`.
77       return true;
78     default:
79       return false;
80     }
81   }
82   llvm_unreachable("All cases covered in switch");
83 }
84 
85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
86 /// respectively, of the same type `Type`. Merging generally produces a single
87 /// value that (soundly) approximates the two inputs, although the actual
88 /// meaning depends on `Model`.
89 static Value *mergeDistinctValues(QualType Type, Value &Val1,
90                                   const Environment &Env1, Value &Val2,
91                                   const Environment &Env2,
92                                   Environment &MergedEnv,
93                                   Environment::ValueModel &Model) {
94   // Join distinct boolean values preserving information about the constraints
95   // in the respective path conditions.
96   if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) {
97     // FIXME: Checking both values should be unnecessary, since they should have
98     // a consistent shape.  However, right now we can end up with BoolValue's in
99     // integer-typed variables due to our incorrect handling of
100     // boolean-to-integer casts (we just propagate the BoolValue to the result
101     // of the cast). So, a join can encounter an integer in one branch but a
102     // bool in the other.
103     // For example:
104     // ```
105     // std::optional<bool> o;
106     // int x;
107     // if (o.has_value())
108     //   x = o.value();
109     // ```
110     auto &Expr1 = cast<BoolValue>(Val1).formula();
111     auto &Expr2 = cast<BoolValue>(Val2).formula();
112     auto &A = MergedEnv.arena();
113     auto &MergedVal = A.makeAtomRef(A.makeAtom());
114     MergedEnv.addToFlowCondition(
115         A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()),
116                            A.makeEquals(MergedVal, Expr1)),
117                  A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()),
118                            A.makeEquals(MergedVal, Expr2))));
119     return &A.makeBoolValue(MergedVal);
120   }
121 
122   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
123   // returns false to avoid storing unneeded values in `DACtx`.
124   // FIXME: Creating the value based on the type alone creates misshapen values
125   // for lvalues, since the type does not reflect the need for `ReferenceValue`.
126   // This issue will be resolved when `ReferenceValue` is eliminated as part
127   // of the ongoing migration to strict handling of value categories (see
128   // https://discourse.llvm.org/t/70086 for details).
129   if (Value *MergedVal = MergedEnv.createValue(Type))
130     if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv))
131       return MergedVal;
132 
133   return nullptr;
134 }
135 
136 // When widening does not change `Current`, return value will equal `&Prev`.
137 static Value &widenDistinctValues(QualType Type, Value &Prev,
138                                   const Environment &PrevEnv, Value &Current,
139                                   Environment &CurrentEnv,
140                                   Environment::ValueModel &Model) {
141   // Boolean-model widening.
142   if (isa<BoolValue>(&Prev)) {
143     assert(isa<BoolValue>(Current));
144     // Widen to Top, because we know they are different values. If previous was
145     // already Top, re-use that to (implicitly) indicate that no change occured.
146     if (isa<TopBoolValue>(Prev))
147       return Prev;
148     return CurrentEnv.makeTopBoolValue();
149   }
150 
151   // FIXME: Add other built-in model widening.
152 
153   // Custom-model widening.
154   if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv))
155     return *W;
156 
157   // Default of widening is a no-op: leave the current value unchanged.
158   return Current;
159 }
160 
161 /// Initializes a global storage value.
162 static void insertIfGlobal(const Decl &D,
163                            llvm::DenseSet<const VarDecl *> &Vars) {
164   if (auto *V = dyn_cast<VarDecl>(&D))
165     if (V->hasGlobalStorage())
166       Vars.insert(V);
167 }
168 
169 static void insertIfFunction(const Decl &D,
170                              llvm::DenseSet<const FunctionDecl *> &Funcs) {
171   if (auto *FD = dyn_cast<FunctionDecl>(&D))
172     Funcs.insert(FD);
173 }
174 
175 static void
176 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields,
177                          llvm::DenseSet<const VarDecl *> &Vars,
178                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
179   insertIfGlobal(D, Vars);
180   insertIfFunction(D, Funcs);
181   if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D))
182     for (const auto *B : Decomp->bindings())
183       if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding()))
184         // FIXME: should we be using `E->getFoundDecl()`?
185         if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl()))
186           Fields.insert(FD);
187 }
188 
189 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields,
190 /// global variables and functions that are declared in or referenced from
191 /// sub-statements.
192 static void
193 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields,
194                          llvm::DenseSet<const VarDecl *> &Vars,
195                          llvm::DenseSet<const FunctionDecl *> &Funcs) {
196   for (auto *Child : S.children())
197     if (Child != nullptr)
198       getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs);
199   if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S))
200     getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs);
201 
202   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
203     if (DS->isSingleDecl())
204       getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs);
205     else
206       for (auto *D : DS->getDeclGroup())
207         getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs);
208   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
209     insertIfGlobal(*E->getDecl(), Vars);
210     insertIfFunction(*E->getDecl(), Funcs);
211   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
212     // FIXME: should we be using `E->getFoundDecl()`?
213     const ValueDecl *VD = E->getMemberDecl();
214     insertIfGlobal(*VD, Vars);
215     insertIfFunction(*VD, Funcs);
216     if (const auto *FD = dyn_cast<FieldDecl>(VD))
217       Fields.insert(FD);
218   } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) {
219     if (RecordDecl *RD = InitList->getType()->getAsRecordDecl())
220       for (const auto *FD : getFieldsForInitListExpr(RD))
221         Fields.insert(FD);
222   }
223 }
224 
225 // FIXME: Add support for resetting globals after function calls to enable
226 // the implementation of sound analyses.
227 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) {
228   assert(FuncDecl->getBody() != nullptr);
229 
230   FieldSet Fields;
231   llvm::DenseSet<const VarDecl *> Vars;
232   llvm::DenseSet<const FunctionDecl *> Funcs;
233 
234   // Look for global variable and field references in the
235   // constructor-initializers.
236   if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) {
237     for (const auto *Init : CtorDecl->inits()) {
238       if (Init->isMemberInitializer()) {
239         Fields.insert(Init->getMember());
240       } else if (Init->isIndirectMemberInitializer()) {
241         for (const auto *I : Init->getIndirectMember()->chain())
242           Fields.insert(cast<FieldDecl>(I));
243       }
244       const Expr *E = Init->getInit();
245       assert(E != nullptr);
246       getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs);
247     }
248     // Add all fields mentioned in default member initializers.
249     for (const FieldDecl *F : CtorDecl->getParent()->fields())
250       if (const auto *I = F->getInClassInitializer())
251           getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs);
252   }
253   getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs);
254 
255   // These have to be added before the lines that follow to ensure that
256   // `create*` work correctly for structs.
257   DACtx->addModeledFields(Fields);
258 
259   for (const VarDecl *D : Vars) {
260     if (getStorageLocation(*D) != nullptr)
261       continue;
262     auto &Loc = createStorageLocation(D->getType().getNonReferenceType());
263     setStorageLocation(*D, Loc);
264     if (auto *Val = createValue(D->getType().getNonReferenceType()))
265       setValue(Loc, *Val);
266   }
267 
268   for (const FunctionDecl *FD : Funcs) {
269     if (getStorageLocation(*FD) != nullptr)
270       continue;
271     auto &Loc = createStorageLocation(FD->getType());
272     setStorageLocation(*FD, Loc);
273   }
274 }
275 
276 Environment::Environment(DataflowAnalysisContext &DACtx)
277     : DACtx(&DACtx),
278       FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {}
279 
280 Environment Environment::fork() const {
281   Environment Copy(*this);
282   Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken);
283   return Copy;
284 }
285 
286 Environment::Environment(DataflowAnalysisContext &DACtx,
287                          const DeclContext &DeclCtx)
288     : Environment(DACtx) {
289   CallStack.push_back(&DeclCtx);
290 
291   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
292     assert(FuncDecl->getBody() != nullptr);
293 
294     initFieldsGlobalsAndFuncs(FuncDecl);
295 
296     for (const auto *ParamDecl : FuncDecl->parameters()) {
297       assert(ParamDecl != nullptr);
298       // References aren't objects, so the reference itself doesn't have a
299       // storage location. Instead, the storage location for a reference refers
300       // directly to an object of the referenced type -- so strip off any
301       // reference from the type.
302       auto &ParamLoc =
303           createStorageLocation(ParamDecl->getType().getNonReferenceType());
304       setStorageLocation(*ParamDecl, ParamLoc);
305       if (Value *ParamVal =
306               createValue(ParamDecl->getType().getNonReferenceType()))
307           setValue(ParamLoc, *ParamVal);
308     }
309   }
310 
311   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
312     auto *Parent = MethodDecl->getParent();
313     assert(Parent != nullptr);
314     if (Parent->isLambda())
315       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
316 
317     // FIXME: Initialize the ThisPointeeLoc of lambdas too.
318     if (MethodDecl && !MethodDecl->isStatic()) {
319       QualType ThisPointeeType = MethodDecl->getThisObjectType();
320       ThisPointeeLoc = &cast<AggregateStorageLocation>(
321           createStorageLocation(ThisPointeeType));
322       if (Value *ThisPointeeVal = createValue(ThisPointeeType))
323         setValue(*ThisPointeeLoc, *ThisPointeeVal);
324     }
325   }
326 }
327 
328 bool Environment::canDescend(unsigned MaxDepth,
329                              const DeclContext *Callee) const {
330   return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee);
331 }
332 
333 Environment Environment::pushCall(const CallExpr *Call) const {
334   Environment Env(*this);
335 
336   if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) {
337     if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) {
338       if (!isa<CXXThisExpr>(Arg))
339         Env.ThisPointeeLoc = cast<AggregateStorageLocation>(
340             getStorageLocation(*Arg, SkipPast::Reference));
341       // Otherwise (when the argument is `this`), retain the current
342       // environment's `ThisPointeeLoc`.
343     }
344   }
345 
346   Env.pushCallInternal(Call->getDirectCallee(),
347                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
348 
349   return Env;
350 }
351 
352 Environment Environment::pushCall(const CXXConstructExpr *Call) const {
353   Environment Env(*this);
354 
355   Env.ThisPointeeLoc = &cast<AggregateStorageLocation>(
356       Env.createStorageLocation(Call->getType()));
357   if (Value *Val = Env.createValue(Call->getType()))
358     Env.setValue(*Env.ThisPointeeLoc, *Val);
359 
360   Env.pushCallInternal(Call->getConstructor(),
361                        llvm::ArrayRef(Call->getArgs(), Call->getNumArgs()));
362 
363   return Env;
364 }
365 
366 void Environment::pushCallInternal(const FunctionDecl *FuncDecl,
367                                    ArrayRef<const Expr *> Args) {
368   // Canonicalize to the definition of the function. This ensures that we're
369   // putting arguments into the same `ParamVarDecl`s` that the callee will later
370   // be retrieving them from.
371   assert(FuncDecl->getDefinition() != nullptr);
372   FuncDecl = FuncDecl->getDefinition();
373 
374   CallStack.push_back(FuncDecl);
375 
376   initFieldsGlobalsAndFuncs(FuncDecl);
377 
378   const auto *ParamIt = FuncDecl->param_begin();
379 
380   // FIXME: Parameters don't always map to arguments 1:1; examples include
381   // overloaded operators implemented as member functions, and parameter packs.
382   for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) {
383     assert(ParamIt != FuncDecl->param_end());
384 
385     const Expr *Arg = Args[ArgIndex];
386     auto *ArgLoc = getStorageLocation(*Arg, SkipPast::Reference);
387     if (ArgLoc == nullptr)
388       continue;
389 
390     const VarDecl *Param = *ParamIt;
391 
392     QualType ParamType = Param->getType();
393     if (ParamType->isReferenceType()) {
394       setStorageLocation(*Param, *ArgLoc);
395     } else {
396       auto &Loc = createStorageLocation(*Param);
397       setStorageLocation(*Param, Loc);
398 
399       if (auto *ArgVal = getValue(*ArgLoc)) {
400         setValue(Loc, *ArgVal);
401       } else if (Value *Val = createValue(ParamType)) {
402         setValue(Loc, *Val);
403       }
404     }
405   }
406 }
407 
408 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) {
409   // We ignore `DACtx` because it's already the same in both. We don't want the
410   // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't
411   // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later
412   // analyze the same callee in a different context, and `setStorageLocation`
413   // requires there to not already be a storage location assigned. Conceptually,
414   // these maps capture information from the local scope, so when popping that
415   // scope, we do not propagate the maps.
416   this->LocToVal = std::move(CalleeEnv.LocToVal);
417   this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct);
418   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
419 
420   if (Call->isGLValue()) {
421     if (CalleeEnv.ReturnLoc != nullptr)
422       setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc);
423   } else if (!Call->getType()->isVoidType()) {
424     if (CalleeEnv.ReturnVal != nullptr)
425       setValueStrict(*Call, *CalleeEnv.ReturnVal);
426   }
427 }
428 
429 void Environment::popCall(const CXXConstructExpr *Call,
430                           const Environment &CalleeEnv) {
431   // See also comment in `popCall(const CallExpr *, const Environment &)` above.
432   this->LocToVal = std::move(CalleeEnv.LocToVal);
433   this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct);
434   this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken);
435 
436   if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) {
437     setValueStrict(*Call, *Val);
438   }
439 }
440 
441 bool Environment::equivalentTo(const Environment &Other,
442                                Environment::ValueModel &Model) const {
443   assert(DACtx == Other.DACtx);
444 
445   if (ReturnVal != Other.ReturnVal)
446     return false;
447 
448   if (ReturnLoc != Other.ReturnLoc)
449     return false;
450 
451   if (ThisPointeeLoc != Other.ThisPointeeLoc)
452     return false;
453 
454   if (DeclToLoc != Other.DeclToLoc)
455     return false;
456 
457   if (ExprToLoc != Other.ExprToLoc)
458     return false;
459 
460   // Compare the contents for the intersection of their domains.
461   for (auto &Entry : LocToVal) {
462     const StorageLocation *Loc = Entry.first;
463     assert(Loc != nullptr);
464 
465     Value *Val = Entry.second;
466     assert(Val != nullptr);
467 
468     auto It = Other.LocToVal.find(Loc);
469     if (It == Other.LocToVal.end())
470       continue;
471     assert(It->second != nullptr);
472 
473     if (!areEquivalentValues(*Val, *It->second) &&
474         !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other,
475                                Model))
476       return false;
477   }
478 
479   return true;
480 }
481 
482 LatticeJoinEffect Environment::widen(const Environment &PrevEnv,
483                                      Environment::ValueModel &Model) {
484   assert(DACtx == PrevEnv.DACtx);
485   assert(ReturnVal == PrevEnv.ReturnVal);
486   assert(ReturnLoc == PrevEnv.ReturnLoc);
487   assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc);
488   assert(CallStack == PrevEnv.CallStack);
489 
490   auto Effect = LatticeJoinEffect::Unchanged;
491 
492   // By the API, `PrevEnv` is a previous version of the environment for the same
493   // block, so we have some guarantees about its shape. In particular, it will
494   // be the result of a join or widen operation on previous values for this
495   // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are
496   // subsets of the maps in `PrevEnv`. So, as long as we maintain this property
497   // here, we don't need change their current values to widen.
498   //
499   // FIXME: `MemberLocToStruct` does not share the above property, because
500   // `join` can cause the map size to increase (when we add fresh data in places
501   // of conflict). Once this issue with join is resolved, re-enable the
502   // assertion below or replace with something that captures the desired
503   // invariant.
504   assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size());
505   assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size());
506   // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size());
507 
508   llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal;
509   for (auto &Entry : LocToVal) {
510     const StorageLocation *Loc = Entry.first;
511     assert(Loc != nullptr);
512 
513     Value *Val = Entry.second;
514     assert(Val != nullptr);
515 
516     auto PrevIt = PrevEnv.LocToVal.find(Loc);
517     if (PrevIt == PrevEnv.LocToVal.end())
518       continue;
519     assert(PrevIt->second != nullptr);
520 
521     if (areEquivalentValues(*Val, *PrevIt->second)) {
522       WidenedLocToVal.insert({Loc, Val});
523       continue;
524     }
525 
526     Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second,
527                                             PrevEnv, *Val, *this, Model);
528     WidenedLocToVal.insert({Loc, &WidenedVal});
529     if (&WidenedVal != PrevIt->second)
530       Effect = LatticeJoinEffect::Changed;
531   }
532   LocToVal = std::move(WidenedLocToVal);
533   // FIXME: update the equivalence calculation for `MemberLocToStruct`, once we
534   // have a systematic way of soundly comparing this map.
535   if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() ||
536       ExprToLoc.size() != PrevEnv.ExprToLoc.size() ||
537       LocToVal.size() != PrevEnv.LocToVal.size() ||
538       MemberLocToStruct.size() != PrevEnv.MemberLocToStruct.size())
539     Effect = LatticeJoinEffect::Changed;
540 
541   return Effect;
542 }
543 
544 Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
545                               Environment::ValueModel &Model) {
546   assert(EnvA.DACtx == EnvB.DACtx);
547   assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc);
548   assert(EnvA.CallStack == EnvB.CallStack);
549 
550   Environment JoinedEnv(*EnvA.DACtx);
551 
552   JoinedEnv.CallStack = EnvA.CallStack;
553   JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc;
554 
555   if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) {
556     // `ReturnVal` might not always get set -- for example if we have a return
557     // statement of the form `return some_other_func()` and we decide not to
558     // analyze `some_other_func()`.
559     // In this case, we can't say anything about the joined return value -- we
560     // don't simply want to propagate the return value that we do have, because
561     // it might not be the correct one.
562     // This occurs for example in the test `ContextSensitiveMutualRecursion`.
563     JoinedEnv.ReturnVal = nullptr;
564   } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) {
565     JoinedEnv.ReturnVal = EnvA.ReturnVal;
566   } else {
567     assert(!EnvA.CallStack.empty());
568     // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this
569     // cast.
570     auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back());
571     assert(Func != nullptr);
572     if (Value *MergedVal =
573             mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA,
574                                 *EnvB.ReturnVal, EnvB, JoinedEnv, Model))
575       JoinedEnv.ReturnVal = MergedVal;
576   }
577 
578   if (EnvA.ReturnLoc == EnvB.ReturnLoc)
579     JoinedEnv.ReturnLoc = EnvA.ReturnLoc;
580   else
581     JoinedEnv.ReturnLoc = nullptr;
582 
583   // FIXME: Once we're able to remove declarations from `DeclToLoc` when their
584   // lifetime ends, add an assertion that there aren't any entries in
585   // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to
586   // different storage locations.
587   JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
588 
589   JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
590 
591   JoinedEnv.MemberLocToStruct =
592       intersectDenseMaps(EnvA.MemberLocToStruct, EnvB.MemberLocToStruct);
593 
594   // FIXME: update join to detect backedges and simplify the flow condition
595   // accordingly.
596   JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
597       EnvA.FlowConditionToken, EnvB.FlowConditionToken);
598 
599   for (auto &Entry : EnvA.LocToVal) {
600     const StorageLocation *Loc = Entry.first;
601     assert(Loc != nullptr);
602 
603     Value *Val = Entry.second;
604     assert(Val != nullptr);
605 
606     auto It = EnvB.LocToVal.find(Loc);
607     if (It == EnvB.LocToVal.end())
608       continue;
609     assert(It->second != nullptr);
610 
611     if (areEquivalentValues(*Val, *It->second)) {
612       JoinedEnv.LocToVal.insert({Loc, Val});
613       continue;
614     }
615 
616     if (Value *MergedVal = mergeDistinctValues(
617             Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) {
618       JoinedEnv.LocToVal.insert({Loc, MergedVal});
619     }
620   }
621 
622   return JoinedEnv;
623 }
624 
625 StorageLocation &Environment::createStorageLocation(QualType Type) {
626   return DACtx->createStorageLocation(Type);
627 }
628 
629 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
630   // Evaluated declarations are always assigned the same storage locations to
631   // ensure that the environment stabilizes across loop iterations. Storage
632   // locations for evaluated declarations are stored in the analysis context.
633   return DACtx->getStableStorageLocation(D);
634 }
635 
636 StorageLocation &Environment::createStorageLocation(const Expr &E) {
637   // Evaluated expressions are always assigned the same storage locations to
638   // ensure that the environment stabilizes across loop iterations. Storage
639   // locations for evaluated expressions are stored in the analysis context.
640   return DACtx->getStableStorageLocation(E);
641 }
642 
643 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
644   assert(!DeclToLoc.contains(&D));
645   assert(!isa_and_nonnull<ReferenceValue>(getValue(Loc)));
646   DeclToLoc[&D] = &Loc;
647 }
648 
649 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const {
650   auto It = DeclToLoc.find(&D);
651   if (It == DeclToLoc.end())
652     return nullptr;
653 
654   StorageLocation *Loc = It->second;
655 
656   assert(!isa_and_nonnull<ReferenceValue>(getValue(*Loc)));
657 
658   return Loc;
659 }
660 
661 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
662   const Expr &CanonE = ignoreCFGOmittedNodes(E);
663   assert(!ExprToLoc.contains(&CanonE));
664   ExprToLoc[&CanonE] = &Loc;
665 }
666 
667 void Environment::setStorageLocationStrict(const Expr &E,
668                                            StorageLocation &Loc) {
669   // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason,
670   // but we still want to be able to associate a `StorageLocation` with them,
671   // so allow these as an exception.
672   assert(E.isGLValue() ||
673          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
674   setStorageLocation(E, Loc);
675 }
676 
677 StorageLocation *Environment::getStorageLocation(const Expr &E,
678                                                  SkipPast SP) const {
679   // FIXME: Add a test with parens.
680   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
681   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
682 }
683 
684 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const {
685   // See comment in `setStorageLocationStrict()`.
686   assert(E.isGLValue() ||
687          E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn));
688   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
689 
690   if (Loc == nullptr)
691     return nullptr;
692 
693   if (auto *RefVal = dyn_cast_or_null<ReferenceValue>(getValue(*Loc)))
694     return &RefVal->getReferentLoc();
695 
696   return Loc;
697 }
698 
699 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const {
700   return ThisPointeeLoc;
701 }
702 
703 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) {
704   return DACtx->getOrCreateNullPointerValue(PointeeType);
705 }
706 
707 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
708   LocToVal[&Loc] = &Val;
709 
710   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
711     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
712 
713     const QualType Type = AggregateLoc.getType();
714     assert(Type->isRecordType());
715 
716     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
717       assert(Field != nullptr);
718       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
719       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
720       if (auto *FieldVal = StructVal->getChild(*Field))
721         setValue(FieldLoc, *FieldVal);
722     }
723   }
724 
725   auto It = MemberLocToStruct.find(&Loc);
726   if (It != MemberLocToStruct.end()) {
727     // `Loc` is the location of a struct member so we need to also update the
728     // value of the member in the corresponding `StructValue`.
729 
730     assert(It->second.first != nullptr);
731     StructValue &StructVal = *It->second.first;
732 
733     assert(It->second.second != nullptr);
734     const ValueDecl &Member = *It->second.second;
735 
736     StructVal.setChild(Member, Val);
737   }
738 }
739 
740 void Environment::clearValue(const StorageLocation &Loc) {
741   LocToVal.erase(&Loc);
742 
743   if (auto It = MemberLocToStruct.find(&Loc); It != MemberLocToStruct.end()) {
744     // `Loc` is the location of a struct member so we need to also clear the
745     // member in the corresponding `StructValue`.
746 
747     assert(It->second.first != nullptr);
748     StructValue &StructVal = *It->second.first;
749 
750     assert(It->second.second != nullptr);
751     const ValueDecl &Member = *It->second.second;
752 
753     StructVal.clearChild(Member);
754   }
755 }
756 
757 void Environment::setValueStrict(const Expr &E, Value &Val) {
758   assert(E.isPRValue());
759   assert(!isa<ReferenceValue>(Val));
760 
761   StorageLocation *Loc = getStorageLocation(E, SkipPast::None);
762   if (Loc == nullptr) {
763     Loc = &createStorageLocation(E);
764     setStorageLocation(E, *Loc);
765   }
766   setValue(*Loc, Val);
767 }
768 
769 Value *Environment::getValue(const StorageLocation &Loc) const {
770   return LocToVal.lookup(&Loc);
771 }
772 
773 Value *Environment::getValue(const ValueDecl &D) const {
774   auto *Loc = getStorageLocation(D);
775   if (Loc == nullptr)
776     return nullptr;
777   return getValue(*Loc);
778 }
779 
780 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
781   auto *Loc = getStorageLocation(E, SP);
782   if (Loc == nullptr)
783     return nullptr;
784   return getValue(*Loc);
785 }
786 
787 Value *Environment::getValueStrict(const Expr &E) const {
788   assert(E.isPRValue());
789   Value *Val = getValue(E, SkipPast::None);
790 
791   assert(Val == nullptr || !isa<ReferenceValue>(Val));
792 
793   return Val;
794 }
795 
796 Value *Environment::createValue(QualType Type) {
797   llvm::DenseSet<QualType> Visited;
798   int CreatedValuesCount = 0;
799   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
800                                                 CreatedValuesCount);
801   if (CreatedValuesCount > MaxCompositeValueSize) {
802     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
803                  << '\n';
804   }
805   return Val;
806 }
807 
808 Value *Environment::createValueUnlessSelfReferential(
809     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
810     int &CreatedValuesCount) {
811   assert(!Type.isNull());
812 
813   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
814   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
815       Depth > MaxCompositeValueDepth)
816     return nullptr;
817 
818   if (Type->isBooleanType()) {
819     CreatedValuesCount++;
820     return &makeAtomicBoolValue();
821   }
822 
823   if (Type->isIntegerType()) {
824     // FIXME: consider instead `return nullptr`, given that we do nothing useful
825     // with integers, and so distinguishing them serves no purpose, but could
826     // prevent convergence.
827     CreatedValuesCount++;
828     return &arena().create<IntegerValue>();
829   }
830 
831   if (Type->isReferenceType() || Type->isPointerType()) {
832     CreatedValuesCount++;
833     QualType PointeeType = Type->getPointeeType();
834     auto &PointeeLoc = createStorageLocation(PointeeType);
835 
836     if (Visited.insert(PointeeType.getCanonicalType()).second) {
837       Value *PointeeVal = createValueUnlessSelfReferential(
838           PointeeType, Visited, Depth, CreatedValuesCount);
839       Visited.erase(PointeeType.getCanonicalType());
840 
841       if (PointeeVal != nullptr)
842         setValue(PointeeLoc, *PointeeVal);
843     }
844 
845     if (Type->isReferenceType())
846       return &arena().create<ReferenceValue>(PointeeLoc);
847     else
848       return &arena().create<PointerValue>(PointeeLoc);
849   }
850 
851   if (Type->isRecordType()) {
852     CreatedValuesCount++;
853     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
854     for (const FieldDecl *Field : DACtx->getModeledFields(Type)) {
855       assert(Field != nullptr);
856 
857       QualType FieldType = Field->getType();
858       if (Visited.contains(FieldType.getCanonicalType()))
859         continue;
860 
861       Visited.insert(FieldType.getCanonicalType());
862       if (auto *FieldValue = createValueUnlessSelfReferential(
863               FieldType, Visited, Depth + 1, CreatedValuesCount))
864         FieldValues.insert({Field, FieldValue});
865       Visited.erase(FieldType.getCanonicalType());
866     }
867 
868     return &arena().create<StructValue>(std::move(FieldValues));
869   }
870 
871   return nullptr;
872 }
873 
874 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
875   switch (SP) {
876   case SkipPast::None:
877     return Loc;
878   case SkipPast::Reference:
879     // References cannot be chained so we only need to skip past one level of
880     // indirection.
881     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
882       return Val->getReferentLoc();
883     return Loc;
884   }
885   llvm_unreachable("bad SkipPast kind");
886 }
887 
888 const StorageLocation &Environment::skip(const StorageLocation &Loc,
889                                          SkipPast SP) const {
890   return skip(*const_cast<StorageLocation *>(&Loc), SP);
891 }
892 
893 void Environment::addToFlowCondition(const Formula &Val) {
894   DACtx->addFlowConditionConstraint(FlowConditionToken, Val);
895 }
896 void Environment::addToFlowCondition(BoolValue &Val) {
897   addToFlowCondition(Val.formula());
898 }
899 
900 bool Environment::flowConditionImplies(const Formula &Val) const {
901   return DACtx->flowConditionImplies(FlowConditionToken, Val);
902 }
903 bool Environment::flowConditionImplies(BoolValue &Val) const {
904   return flowConditionImplies(Val.formula());
905 }
906 
907 void Environment::dump(raw_ostream &OS) const {
908   // FIXME: add printing for remaining fields and allow caller to decide what
909   // fields are printed.
910   OS << "DeclToLoc:\n";
911   for (auto [D, L] : DeclToLoc)
912     OS << "  [" << D->getNameAsString() << ", " << L << "]\n";
913 
914   OS << "ExprToLoc:\n";
915   for (auto [E, L] : ExprToLoc)
916     OS << "  [" << E << ", " << L << "]\n";
917 
918   OS << "LocToVal:\n";
919   for (auto [L, V] : LocToVal) {
920     OS << "  [" << L << ", " << V << ": " << *V << "]\n";
921   }
922 
923   OS << "FlowConditionToken:\n";
924   DACtx->dumpFlowCondition(FlowConditionToken, OS);
925 }
926 
927 void Environment::dump() const {
928   dump(llvm::dbgs());
929 }
930 
931 AggregateStorageLocation *
932 getImplicitObjectLocation(const CXXMemberCallExpr &MCE,
933                           const Environment &Env) {
934   Expr *ImplicitObject = MCE.getImplicitObjectArgument();
935   if (ImplicitObject == nullptr)
936     return nullptr;
937   StorageLocation *Loc =
938       Env.getStorageLocation(*ImplicitObject, SkipPast::Reference);
939   if (Loc == nullptr)
940     return nullptr;
941   if (ImplicitObject->getType()->isPointerType()) {
942     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc)))
943       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
944     return nullptr;
945   }
946   return cast<AggregateStorageLocation>(Loc);
947 }
948 
949 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME,
950                                                 const Environment &Env) {
951   Expr *Base = ME.getBase();
952   if (Base == nullptr)
953     return nullptr;
954   StorageLocation *Loc = Env.getStorageLocation(*Base, SkipPast::Reference);
955   if (Loc == nullptr)
956     return nullptr;
957   if (ME.isArrow()) {
958     if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc)))
959       return &cast<AggregateStorageLocation>(Val->getPointeeLoc());
960     return nullptr;
961   }
962   return cast<AggregateStorageLocation>(Loc);
963 }
964 
965 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) {
966   // Unnamed bitfields are only used for padding and do not appear in
967   // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s
968   // field list, and we thus need to remove them before mapping inits to
969   // fields to avoid mapping inits to the wrongs fields.
970   std::vector<FieldDecl *> Fields;
971   llvm::copy_if(
972       RD->fields(), std::back_inserter(Fields),
973       [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); });
974   return Fields;
975 }
976 
977 } // namespace dataflow
978 } // namespace clang
979