xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 67136d0e8fb57251dece4be0907414fdbe081f7a)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/Type.h"
20 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
21 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
22 #include "clang/Analysis/FlowSensitive/Value.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/DenseSet.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 /// Returns true if and only if `Val1` is equivalent to `Val2`.
54 static bool equivalentValues(QualType Type, Value *Val1,
55                              const Environment &Env1, Value *Val2,
56                              const Environment &Env2,
57                              Environment::ValueModel &Model) {
58   if (Val1 == Val2)
59     return true;
60 
61   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
62     auto *IndVal2 = cast<IndirectionValue>(Val2);
63     assert(IndVal1->getKind() == IndVal2->getKind());
64     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc())
65       return true;
66   }
67 
68   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
69 }
70 
71 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
72 /// respectively, of the same type `Type`. Merging generally produces a single
73 /// value that (soundly) approximates the two inputs, although the actual
74 /// meaning depends on `Model`.
75 static Value *mergeDistinctValues(QualType Type, Value *Val1,
76                                   const Environment &Env1, Value *Val2,
77                                   const Environment &Env2,
78                                   Environment &MergedEnv,
79                                   Environment::ValueModel &Model) {
80   // Join distinct boolean values preserving information about the constraints
81   // in the respective path conditions.
82   //
83   // FIXME: Does not work for backedges, since the two (or more) paths will not
84   // have mutually exclusive conditions.
85   if (auto *Expr1 = dyn_cast<BoolValue>(Val1)) {
86     auto *Expr2 = cast<BoolValue>(Val2);
87     return &Env1.makeOr(Env1.makeAnd(Env1.getFlowConditionToken(), *Expr1),
88                         Env1.makeAnd(Env2.getFlowConditionToken(), *Expr2));
89   }
90 
91   // FIXME: add unit tests that cover this statement.
92   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
93     auto *IndVal2 = cast<IndirectionValue>(Val2);
94     assert(IndVal1->getKind() == IndVal2->getKind());
95     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc()) {
96       return Val1;
97     }
98   }
99 
100   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
101   // returns false to avoid storing unneeded values in `DACtx`.
102   if (Value *MergedVal = MergedEnv.createValue(Type))
103     if (Model.merge(Type, *Val1, Env1, *Val2, Env2, *MergedVal, MergedEnv))
104       return MergedVal;
105 
106   return nullptr;
107 }
108 
109 /// Initializes a global storage value.
110 static void initGlobalVar(const VarDecl &D, Environment &Env) {
111   if (!D.hasGlobalStorage() ||
112       Env.getStorageLocation(D, SkipPast::None) != nullptr)
113     return;
114 
115   auto &Loc = Env.createStorageLocation(D);
116   Env.setStorageLocation(D, Loc);
117   if (auto *Val = Env.createValue(D.getType()))
118     Env.setValue(Loc, *Val);
119 }
120 
121 /// Initializes a global storage value.
122 static void initGlobalVar(const Decl &D, Environment &Env) {
123   if (auto *V = dyn_cast<VarDecl>(&D))
124     initGlobalVar(*V, Env);
125 }
126 
127 /// Initializes global storage values that are declared or referenced from
128 /// sub-statements of `S`.
129 // FIXME: Add support for resetting globals after function calls to enable
130 // the implementation of sound analyses.
131 static void initGlobalVars(const Stmt &S, Environment &Env) {
132   for (auto *Child : S.children()) {
133     if (Child != nullptr)
134       initGlobalVars(*Child, Env);
135   }
136 
137   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
138     if (DS->isSingleDecl()) {
139       initGlobalVar(*DS->getSingleDecl(), Env);
140     } else {
141       for (auto *D : DS->getDeclGroup())
142         initGlobalVar(*D, Env);
143     }
144   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
145     initGlobalVar(*E->getDecl(), Env);
146   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
147     initGlobalVar(*E->getMemberDecl(), Env);
148   }
149 }
150 
151 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single
152 // field decl will be modeled for all instances of the inherited field.
153 static void
154 getFieldsFromClassHierarchy(QualType Type,
155                             llvm::DenseSet<const FieldDecl *> &Fields) {
156   if (Type->isIncompleteType() || Type->isDependentType() ||
157       !Type->isRecordType())
158     return;
159 
160   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields())
161     Fields.insert(Field);
162   if (auto *CXXRecord = Type->getAsCXXRecordDecl())
163     for (const CXXBaseSpecifier &Base : CXXRecord->bases())
164       getFieldsFromClassHierarchy(Base.getType(), Fields);
165 }
166 
167 /// Gets the set of all fields in the type.
168 static llvm::DenseSet<const FieldDecl *> getObjectFields(QualType Type) {
169   llvm::DenseSet<const FieldDecl *> Fields;
170   getFieldsFromClassHierarchy(Type, Fields);
171   return Fields;
172 }
173 
174 Environment::Environment(DataflowAnalysisContext &DACtx)
175     : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {}
176 
177 Environment::Environment(const Environment &Other)
178     : DACtx(Other.DACtx), DeclToLoc(Other.DeclToLoc),
179       ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
180       MemberLocToStruct(Other.MemberLocToStruct),
181       FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
182 }
183 
184 Environment &Environment::operator=(const Environment &Other) {
185   Environment Copy(Other);
186   *this = std::move(Copy);
187   return *this;
188 }
189 
190 Environment::Environment(DataflowAnalysisContext &DACtx,
191                          const DeclContext &DeclCtx)
192     : Environment(DACtx) {
193   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
194     assert(FuncDecl->getBody() != nullptr);
195     initGlobalVars(*FuncDecl->getBody(), *this);
196     for (const auto *ParamDecl : FuncDecl->parameters()) {
197       assert(ParamDecl != nullptr);
198       auto &ParamLoc = createStorageLocation(*ParamDecl);
199       setStorageLocation(*ParamDecl, ParamLoc);
200       if (Value *ParamVal = createValue(ParamDecl->getType()))
201         setValue(ParamLoc, *ParamVal);
202     }
203   }
204 
205   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
206     auto *Parent = MethodDecl->getParent();
207     assert(Parent != nullptr);
208     if (Parent->isLambda())
209       MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext());
210 
211     if (MethodDecl && !MethodDecl->isStatic()) {
212       QualType ThisPointeeType = MethodDecl->getThisObjectType();
213       // FIXME: Add support for union types.
214       if (!ThisPointeeType->isUnionType()) {
215         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
216         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
217         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
218           setValue(ThisPointeeLoc, *ThisPointeeVal);
219       }
220     }
221   }
222 }
223 
224 bool Environment::equivalentTo(const Environment &Other,
225                                Environment::ValueModel &Model) const {
226   assert(DACtx == Other.DACtx);
227 
228   if (DeclToLoc != Other.DeclToLoc)
229     return false;
230 
231   if (ExprToLoc != Other.ExprToLoc)
232     return false;
233 
234   // Compare the contents for the intersection of their domains.
235   for (auto &Entry : LocToVal) {
236     const StorageLocation *Loc = Entry.first;
237     assert(Loc != nullptr);
238 
239     Value *Val = Entry.second;
240     assert(Val != nullptr);
241 
242     auto It = Other.LocToVal.find(Loc);
243     if (It == Other.LocToVal.end())
244       continue;
245     assert(It->second != nullptr);
246 
247     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
248       return false;
249   }
250 
251   return true;
252 }
253 
254 LatticeJoinEffect Environment::join(const Environment &Other,
255                                     Environment::ValueModel &Model) {
256   assert(DACtx == Other.DACtx);
257 
258   auto Effect = LatticeJoinEffect::Unchanged;
259 
260   Environment JoinedEnv(*DACtx);
261 
262   JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
263   if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size())
264     Effect = LatticeJoinEffect::Changed;
265 
266   JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
267   if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size())
268     Effect = LatticeJoinEffect::Changed;
269 
270   JoinedEnv.MemberLocToStruct =
271       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
272   if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size())
273     Effect = LatticeJoinEffect::Changed;
274 
275   // FIXME: set `Effect` as needed.
276   JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions(
277       *FlowConditionToken, *Other.FlowConditionToken);
278 
279   for (auto &Entry : LocToVal) {
280     const StorageLocation *Loc = Entry.first;
281     assert(Loc != nullptr);
282 
283     Value *Val = Entry.second;
284     assert(Val != nullptr);
285 
286     auto It = Other.LocToVal.find(Loc);
287     if (It == Other.LocToVal.end())
288       continue;
289     assert(It->second != nullptr);
290 
291     if (Val == It->second) {
292       JoinedEnv.LocToVal.insert({Loc, Val});
293       continue;
294     }
295 
296     if (Value *MergedVal = mergeDistinctValues(
297             Loc->getType(), Val, *this, It->second, Other, JoinedEnv, Model))
298       JoinedEnv.LocToVal.insert({Loc, MergedVal});
299   }
300   if (LocToVal.size() != JoinedEnv.LocToVal.size())
301     Effect = LatticeJoinEffect::Changed;
302 
303   *this = std::move(JoinedEnv);
304 
305   return Effect;
306 }
307 
308 StorageLocation &Environment::createStorageLocation(QualType Type) {
309   assert(!Type.isNull());
310   if (Type->isStructureOrClassType() || Type->isUnionType()) {
311     // FIXME: Explore options to avoid eager initialization of fields as some of
312     // them might not be needed for a particular analysis.
313     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
314     for (const FieldDecl *Field : getObjectFields(Type))
315       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
316     return takeOwnership(
317         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
318   }
319   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
320 }
321 
322 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
323   // Evaluated declarations are always assigned the same storage locations to
324   // ensure that the environment stabilizes across loop iterations. Storage
325   // locations for evaluated declarations are stored in the analysis context.
326   if (auto *Loc = DACtx->getStorageLocation(D))
327     return *Loc;
328   auto &Loc = createStorageLocation(D.getType());
329   DACtx->setStorageLocation(D, Loc);
330   return Loc;
331 }
332 
333 StorageLocation &Environment::createStorageLocation(const Expr &E) {
334   // Evaluated expressions are always assigned the same storage locations to
335   // ensure that the environment stabilizes across loop iterations. Storage
336   // locations for evaluated expressions are stored in the analysis context.
337   if (auto *Loc = DACtx->getStorageLocation(E))
338     return *Loc;
339   auto &Loc = createStorageLocation(E.getType());
340   DACtx->setStorageLocation(E, Loc);
341   return Loc;
342 }
343 
344 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
345   assert(DeclToLoc.find(&D) == DeclToLoc.end());
346   DeclToLoc[&D] = &Loc;
347 }
348 
349 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
350                                                  SkipPast SP) const {
351   auto It = DeclToLoc.find(&D);
352   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
353 }
354 
355 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
356   const Expr &CanonE = ignoreCFGOmittedNodes(E);
357   assert(ExprToLoc.find(&CanonE) == ExprToLoc.end());
358   ExprToLoc[&CanonE] = &Loc;
359 }
360 
361 StorageLocation *Environment::getStorageLocation(const Expr &E,
362                                                  SkipPast SP) const {
363   // FIXME: Add a test with parens.
364   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
365   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
366 }
367 
368 StorageLocation *Environment::getThisPointeeStorageLocation() const {
369   return DACtx->getThisPointeeStorageLocation();
370 }
371 
372 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
373   LocToVal[&Loc] = &Val;
374 
375   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
376     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
377 
378     const QualType Type = AggregateLoc.getType();
379     assert(Type->isStructureOrClassType());
380 
381     for (const FieldDecl *Field : getObjectFields(Type)) {
382       assert(Field != nullptr);
383       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
384       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
385       if (auto *FieldVal = StructVal->getChild(*Field))
386         setValue(FieldLoc, *FieldVal);
387     }
388   }
389 
390   auto IT = MemberLocToStruct.find(&Loc);
391   if (IT != MemberLocToStruct.end()) {
392     // `Loc` is the location of a struct member so we need to also update the
393     // value of the member in the corresponding `StructValue`.
394 
395     assert(IT->second.first != nullptr);
396     StructValue &StructVal = *IT->second.first;
397 
398     assert(IT->second.second != nullptr);
399     const ValueDecl &Member = *IT->second.second;
400 
401     StructVal.setChild(Member, Val);
402   }
403 }
404 
405 Value *Environment::getValue(const StorageLocation &Loc) const {
406   auto It = LocToVal.find(&Loc);
407   return It == LocToVal.end() ? nullptr : It->second;
408 }
409 
410 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
411   auto *Loc = getStorageLocation(D, SP);
412   if (Loc == nullptr)
413     return nullptr;
414   return getValue(*Loc);
415 }
416 
417 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
418   auto *Loc = getStorageLocation(E, SP);
419   if (Loc == nullptr)
420     return nullptr;
421   return getValue(*Loc);
422 }
423 
424 Value *Environment::createValue(QualType Type) {
425   llvm::DenseSet<QualType> Visited;
426   int CreatedValuesCount = 0;
427   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
428                                                 CreatedValuesCount);
429   if (CreatedValuesCount > MaxCompositeValueSize) {
430     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
431                  << '\n';
432   }
433   return Val;
434 }
435 
436 Value *Environment::createValueUnlessSelfReferential(
437     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
438     int &CreatedValuesCount) {
439   assert(!Type.isNull());
440 
441   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
442   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
443       Depth > MaxCompositeValueDepth)
444     return nullptr;
445 
446   if (Type->isBooleanType()) {
447     CreatedValuesCount++;
448     return &makeAtomicBoolValue();
449   }
450 
451   if (Type->isIntegerType()) {
452     CreatedValuesCount++;
453     return &takeOwnership(std::make_unique<IntegerValue>());
454   }
455 
456   if (Type->isReferenceType()) {
457     CreatedValuesCount++;
458     QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType();
459     auto &PointeeLoc = createStorageLocation(PointeeType);
460 
461     if (!Visited.contains(PointeeType.getCanonicalType())) {
462       Visited.insert(PointeeType.getCanonicalType());
463       Value *PointeeVal = createValueUnlessSelfReferential(
464           PointeeType, Visited, Depth, CreatedValuesCount);
465       Visited.erase(PointeeType.getCanonicalType());
466 
467       if (PointeeVal != nullptr)
468         setValue(PointeeLoc, *PointeeVal);
469     }
470 
471     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
472   }
473 
474   if (Type->isPointerType()) {
475     CreatedValuesCount++;
476     QualType PointeeType = Type->castAs<PointerType>()->getPointeeType();
477     auto &PointeeLoc = createStorageLocation(PointeeType);
478 
479     if (!Visited.contains(PointeeType.getCanonicalType())) {
480       Visited.insert(PointeeType.getCanonicalType());
481       Value *PointeeVal = createValueUnlessSelfReferential(
482           PointeeType, Visited, Depth, CreatedValuesCount);
483       Visited.erase(PointeeType.getCanonicalType());
484 
485       if (PointeeVal != nullptr)
486         setValue(PointeeLoc, *PointeeVal);
487     }
488 
489     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
490   }
491 
492   if (Type->isStructureOrClassType()) {
493     CreatedValuesCount++;
494     // FIXME: Initialize only fields that are accessed in the context that is
495     // being analyzed.
496     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
497     for (const FieldDecl *Field : getObjectFields(Type)) {
498       assert(Field != nullptr);
499 
500       QualType FieldType = Field->getType();
501       if (Visited.contains(FieldType.getCanonicalType()))
502         continue;
503 
504       Visited.insert(FieldType.getCanonicalType());
505       if (auto *FieldValue = createValueUnlessSelfReferential(
506               FieldType, Visited, Depth + 1, CreatedValuesCount))
507         FieldValues.insert({Field, FieldValue});
508       Visited.erase(FieldType.getCanonicalType());
509     }
510 
511     return &takeOwnership(
512         std::make_unique<StructValue>(std::move(FieldValues)));
513   }
514 
515   return nullptr;
516 }
517 
518 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
519   switch (SP) {
520   case SkipPast::None:
521     return Loc;
522   case SkipPast::Reference:
523     // References cannot be chained so we only need to skip past one level of
524     // indirection.
525     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
526       return Val->getPointeeLoc();
527     return Loc;
528   case SkipPast::ReferenceThenPointer:
529     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
530     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
531       return Val->getPointeeLoc();
532     return LocPastRef;
533   }
534   llvm_unreachable("bad SkipPast kind");
535 }
536 
537 const StorageLocation &Environment::skip(const StorageLocation &Loc,
538                                          SkipPast SP) const {
539   return skip(*const_cast<StorageLocation *>(&Loc), SP);
540 }
541 
542 void Environment::addToFlowCondition(BoolValue &Val) {
543   DACtx->addFlowConditionConstraint(*FlowConditionToken, Val);
544 }
545 
546 bool Environment::flowConditionImplies(BoolValue &Val) const {
547   return DACtx->flowConditionImplies(*FlowConditionToken, Val);
548 }
549 
550 } // namespace dataflow
551 } // namespace clang
552