xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 955a05a2782ee19d6f4bce6bf5d1c4a8f591f0f3)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/Support/Casting.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include <cassert>
27 #include <memory>
28 #include <utility>
29 
30 namespace clang {
31 namespace dataflow {
32 
33 // FIXME: convert these to parameters of the analysis or environment. Current
34 // settings have been experimentaly validated, but only for a particular
35 // analysis.
36 static constexpr int MaxCompositeValueDepth = 3;
37 static constexpr int MaxCompositeValueSize = 1000;
38 
39 /// Returns a map consisting of key-value entries that are present in both maps.
40 template <typename K, typename V>
41 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
42                                         const llvm::DenseMap<K, V> &Map2) {
43   llvm::DenseMap<K, V> Result;
44   for (auto &Entry : Map1) {
45     auto It = Map2.find(Entry.first);
46     if (It != Map2.end() && Entry.second == It->second)
47       Result.insert({Entry.first, Entry.second});
48   }
49   return Result;
50 }
51 
52 /// Returns true if and only if `Val1` is equivalent to `Val2`.
53 static bool equivalentValues(QualType Type, Value *Val1,
54                              const Environment &Env1, Value *Val2,
55                              const Environment &Env2,
56                              Environment::ValueModel &Model) {
57   if (Val1 == Val2)
58     return true;
59 
60   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
61     auto *IndVal2 = cast<IndirectionValue>(Val2);
62     assert(IndVal1->getKind() == IndVal2->getKind());
63     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc())
64       return true;
65   }
66 
67   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
68 }
69 
70 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
71 /// respectively, of the same type `Type`. Merging generally produces a single
72 /// value that (soundly) approximates the two inputs, although the actual
73 /// meaning depends on `Model`.
74 static Value *mergeDistinctValues(QualType Type, Value *Val1,
75                                   const Environment &Env1, Value *Val2,
76                                   const Environment &Env2,
77                                   Environment &MergedEnv,
78                                   Environment::ValueModel &Model) {
79   // Join distinct boolean values preserving information about the constraints
80   // in the respective path conditions.
81   //
82   // FIXME: Does not work for backedges, since the two (or more) paths will not
83   // have mutually exclusive conditions.
84   if (auto *Expr1 = dyn_cast<BoolValue>(Val1)) {
85     auto *Expr2 = cast<BoolValue>(Val2);
86     return &Env1.makeOr(Env1.makeAnd(Env1.getFlowConditionToken(), *Expr1),
87                         Env1.makeAnd(Env2.getFlowConditionToken(), *Expr2));
88   }
89 
90   // FIXME: add unit tests that cover this statement.
91   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
92     auto *IndVal2 = cast<IndirectionValue>(Val2);
93     assert(IndVal1->getKind() == IndVal2->getKind());
94     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc()) {
95       return Val1;
96     }
97   }
98 
99   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
100   // returns false to avoid storing unneeded values in `DACtx`.
101   if (Value *MergedVal = MergedEnv.createValue(Type))
102     if (Model.merge(Type, *Val1, Env1, *Val2, Env2, *MergedVal, MergedEnv))
103       return MergedVal;
104 
105   return nullptr;
106 }
107 
108 /// Initializes a global storage value.
109 static void initGlobalVar(const VarDecl &D, Environment &Env) {
110   if (!D.hasGlobalStorage() ||
111       Env.getStorageLocation(D, SkipPast::None) != nullptr)
112     return;
113 
114   auto &Loc = Env.createStorageLocation(D);
115   Env.setStorageLocation(D, Loc);
116   if (auto *Val = Env.createValue(D.getType()))
117     Env.setValue(Loc, *Val);
118 }
119 
120 /// Initializes a global storage value.
121 static void initGlobalVar(const Decl &D, Environment &Env) {
122   if (auto *V = dyn_cast<VarDecl>(&D))
123     initGlobalVar(*V, Env);
124 }
125 
126 /// Initializes global storage values that are declared or referenced from
127 /// sub-statements of `S`.
128 // FIXME: Add support for resetting globals after function calls to enable
129 // the implementation of sound analyses.
130 static void initGlobalVars(const Stmt &S, Environment &Env) {
131   for (auto *Child : S.children()) {
132     if (Child != nullptr)
133       initGlobalVars(*Child, Env);
134   }
135 
136   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
137     if (DS->isSingleDecl()) {
138       initGlobalVar(*DS->getSingleDecl(), Env);
139     } else {
140       for (auto *D : DS->getDeclGroup())
141         initGlobalVar(*D, Env);
142     }
143   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
144     initGlobalVar(*E->getDecl(), Env);
145   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
146     initGlobalVar(*E->getMemberDecl(), Env);
147   }
148 }
149 
150 static void
151 getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields,
152                             llvm::DenseSet<const FieldDecl *> &Fields) {
153   if (Type->isIncompleteType() || Type->isDependentType() ||
154       !Type->isRecordType())
155     return;
156 
157   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
158     if (IgnorePrivateFields &&
159         (Field->getAccess() == AS_private ||
160          (Field->getAccess() == AS_none && Type->getAsRecordDecl()->isClass())))
161       continue;
162     Fields.insert(Field);
163   }
164   if (auto *CXXRecord = Type->getAsCXXRecordDecl()) {
165     for (const CXXBaseSpecifier &Base : CXXRecord->bases()) {
166       // Ignore private fields (including default access in C++ classes) in
167       // base classes, because they are not visible in derived classes.
168       getFieldsFromClassHierarchy(Base.getType(), /*IgnorePrivateFields=*/true,
169                                   Fields);
170     }
171   }
172 }
173 
174 /// Gets the set of all fields accesible from the type.
175 ///
176 /// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
177 /// field decl will be modeled for all instances of the inherited field.
178 static llvm::DenseSet<const FieldDecl *>
179 getAccessibleObjectFields(QualType Type) {
180   llvm::DenseSet<const FieldDecl *> Fields;
181   // Don't ignore private fields for the class itself, only its super classes.
182   getFieldsFromClassHierarchy(Type, /*IgnorePrivateFields=*/false, Fields);
183   return Fields;
184 }
185 
186 Environment::Environment(DataflowAnalysisContext &DACtx)
187     : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {}
188 
189 Environment::Environment(const Environment &Other)
190     : DACtx(Other.DACtx), DeclToLoc(Other.DeclToLoc),
191       ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
192       MemberLocToStruct(Other.MemberLocToStruct),
193       FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
194 }
195 
196 Environment &Environment::operator=(const Environment &Other) {
197   Environment Copy(Other);
198   *this = std::move(Copy);
199   return *this;
200 }
201 
202 Environment::Environment(DataflowAnalysisContext &DACtx,
203                          const DeclContext &DeclCtx)
204     : Environment(DACtx) {
205   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
206     assert(FuncDecl->getBody() != nullptr);
207     initGlobalVars(*FuncDecl->getBody(), *this);
208     for (const auto *ParamDecl : FuncDecl->parameters()) {
209       assert(ParamDecl != nullptr);
210       auto &ParamLoc = createStorageLocation(*ParamDecl);
211       setStorageLocation(*ParamDecl, ParamLoc);
212       if (Value *ParamVal = createValue(ParamDecl->getType()))
213         setValue(ParamLoc, *ParamVal);
214     }
215   }
216 
217   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
218     if (!MethodDecl->isStatic()) {
219       QualType ThisPointeeType = MethodDecl->getThisObjectType();
220       // FIXME: Add support for union types.
221       if (!ThisPointeeType->isUnionType()) {
222         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
223         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
224         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
225           setValue(ThisPointeeLoc, *ThisPointeeVal);
226       }
227     }
228   }
229 }
230 
231 bool Environment::equivalentTo(const Environment &Other,
232                                Environment::ValueModel &Model) const {
233   assert(DACtx == Other.DACtx);
234 
235   if (DeclToLoc != Other.DeclToLoc)
236     return false;
237 
238   if (ExprToLoc != Other.ExprToLoc)
239     return false;
240 
241   if (MemberLocToStruct != Other.MemberLocToStruct)
242     return false;
243 
244   // Compare the contents for the intersection of their domains.
245   for (auto &Entry : LocToVal) {
246     const StorageLocation *Loc = Entry.first;
247     assert(Loc != nullptr);
248 
249     Value *Val = Entry.second;
250     assert(Val != nullptr);
251 
252     auto It = Other.LocToVal.find(Loc);
253     if (It == Other.LocToVal.end())
254       continue;
255     assert(It->second != nullptr);
256 
257     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
258       return false;
259   }
260 
261   return true;
262 }
263 
264 LatticeJoinEffect Environment::join(const Environment &Other,
265                                     Environment::ValueModel &Model) {
266   assert(DACtx == Other.DACtx);
267 
268   auto Effect = LatticeJoinEffect::Unchanged;
269 
270   Environment JoinedEnv(*DACtx);
271 
272   JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
273   if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size())
274     Effect = LatticeJoinEffect::Changed;
275 
276   JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
277   if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size())
278     Effect = LatticeJoinEffect::Changed;
279 
280   JoinedEnv.MemberLocToStruct =
281       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
282   if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size())
283     Effect = LatticeJoinEffect::Changed;
284 
285   // FIXME: set `Effect` as needed.
286   JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions(
287       *FlowConditionToken, *Other.FlowConditionToken);
288 
289   for (auto &Entry : LocToVal) {
290     const StorageLocation *Loc = Entry.first;
291     assert(Loc != nullptr);
292 
293     Value *Val = Entry.second;
294     assert(Val != nullptr);
295 
296     auto It = Other.LocToVal.find(Loc);
297     if (It == Other.LocToVal.end())
298       continue;
299     assert(It->second != nullptr);
300 
301     if (Val == It->second) {
302       JoinedEnv.LocToVal.insert({Loc, Val});
303       continue;
304     }
305 
306     if (Value *MergedVal = mergeDistinctValues(
307             Loc->getType(), Val, *this, It->second, Other, JoinedEnv, Model))
308       JoinedEnv.LocToVal.insert({Loc, MergedVal});
309   }
310   if (LocToVal.size() != JoinedEnv.LocToVal.size())
311     Effect = LatticeJoinEffect::Changed;
312 
313   *this = std::move(JoinedEnv);
314 
315   return Effect;
316 }
317 
318 StorageLocation &Environment::createStorageLocation(QualType Type) {
319   assert(!Type.isNull());
320   if (Type->isStructureOrClassType() || Type->isUnionType()) {
321     // FIXME: Explore options to avoid eager initialization of fields as some of
322     // them might not be needed for a particular analysis.
323     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
324     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
325       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
326     }
327     return takeOwnership(
328         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
329   }
330   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
331 }
332 
333 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
334   // Evaluated declarations are always assigned the same storage locations to
335   // ensure that the environment stabilizes across loop iterations. Storage
336   // locations for evaluated declarations are stored in the analysis context.
337   if (auto *Loc = DACtx->getStorageLocation(D))
338     return *Loc;
339   auto &Loc = createStorageLocation(D.getType());
340   DACtx->setStorageLocation(D, Loc);
341   return Loc;
342 }
343 
344 StorageLocation &Environment::createStorageLocation(const Expr &E) {
345   // Evaluated expressions are always assigned the same storage locations to
346   // ensure that the environment stabilizes across loop iterations. Storage
347   // locations for evaluated expressions are stored in the analysis context.
348   if (auto *Loc = DACtx->getStorageLocation(E))
349     return *Loc;
350   auto &Loc = createStorageLocation(E.getType());
351   DACtx->setStorageLocation(E, Loc);
352   return Loc;
353 }
354 
355 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
356   assert(DeclToLoc.find(&D) == DeclToLoc.end());
357   DeclToLoc[&D] = &Loc;
358 }
359 
360 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
361                                                  SkipPast SP) const {
362   auto It = DeclToLoc.find(&D);
363   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
364 }
365 
366 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
367   assert(ExprToLoc.find(&E) == ExprToLoc.end());
368   ExprToLoc[&E] = &Loc;
369 }
370 
371 StorageLocation *Environment::getStorageLocation(const Expr &E,
372                                                  SkipPast SP) const {
373   // FIXME: Add a test with parens.
374   auto It = ExprToLoc.find(E.IgnoreParens());
375   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
376 }
377 
378 StorageLocation *Environment::getThisPointeeStorageLocation() const {
379   return DACtx->getThisPointeeStorageLocation();
380 }
381 
382 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
383   LocToVal[&Loc] = &Val;
384 
385   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
386     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
387 
388     const QualType Type = AggregateLoc.getType();
389     assert(Type->isStructureOrClassType());
390 
391     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
392       assert(Field != nullptr);
393       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
394       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
395       if (auto *FieldVal = StructVal->getChild(*Field))
396         setValue(FieldLoc, *FieldVal);
397     }
398   }
399 
400   auto IT = MemberLocToStruct.find(&Loc);
401   if (IT != MemberLocToStruct.end()) {
402     // `Loc` is the location of a struct member so we need to also update the
403     // value of the member in the corresponding `StructValue`.
404 
405     assert(IT->second.first != nullptr);
406     StructValue &StructVal = *IT->second.first;
407 
408     assert(IT->second.second != nullptr);
409     const ValueDecl &Member = *IT->second.second;
410 
411     StructVal.setChild(Member, Val);
412   }
413 }
414 
415 Value *Environment::getValue(const StorageLocation &Loc) const {
416   auto It = LocToVal.find(&Loc);
417   return It == LocToVal.end() ? nullptr : It->second;
418 }
419 
420 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
421   auto *Loc = getStorageLocation(D, SP);
422   if (Loc == nullptr)
423     return nullptr;
424   return getValue(*Loc);
425 }
426 
427 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
428   auto *Loc = getStorageLocation(E, SP);
429   if (Loc == nullptr)
430     return nullptr;
431   return getValue(*Loc);
432 }
433 
434 Value *Environment::createValue(QualType Type) {
435   llvm::DenseSet<QualType> Visited;
436   int CreatedValuesCount = 0;
437   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
438                                                 CreatedValuesCount);
439   if (CreatedValuesCount > MaxCompositeValueSize) {
440     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
441                  << '\n';
442   }
443   return Val;
444 }
445 
446 Value *Environment::createValueUnlessSelfReferential(
447     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
448     int &CreatedValuesCount) {
449   assert(!Type.isNull());
450 
451   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
452   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
453       Depth > MaxCompositeValueDepth)
454     return nullptr;
455 
456   if (Type->isBooleanType()) {
457     CreatedValuesCount++;
458     return &makeAtomicBoolValue();
459   }
460 
461   if (Type->isIntegerType()) {
462     CreatedValuesCount++;
463     return &takeOwnership(std::make_unique<IntegerValue>());
464   }
465 
466   if (Type->isReferenceType()) {
467     CreatedValuesCount++;
468     QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType();
469     auto &PointeeLoc = createStorageLocation(PointeeType);
470 
471     if (!Visited.contains(PointeeType.getCanonicalType())) {
472       Visited.insert(PointeeType.getCanonicalType());
473       Value *PointeeVal = createValueUnlessSelfReferential(
474           PointeeType, Visited, Depth, CreatedValuesCount);
475       Visited.erase(PointeeType.getCanonicalType());
476 
477       if (PointeeVal != nullptr)
478         setValue(PointeeLoc, *PointeeVal);
479     }
480 
481     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
482   }
483 
484   if (Type->isPointerType()) {
485     CreatedValuesCount++;
486     QualType PointeeType = Type->castAs<PointerType>()->getPointeeType();
487     auto &PointeeLoc = createStorageLocation(PointeeType);
488 
489     if (!Visited.contains(PointeeType.getCanonicalType())) {
490       Visited.insert(PointeeType.getCanonicalType());
491       Value *PointeeVal = createValueUnlessSelfReferential(
492           PointeeType, Visited, Depth, CreatedValuesCount);
493       Visited.erase(PointeeType.getCanonicalType());
494 
495       if (PointeeVal != nullptr)
496         setValue(PointeeLoc, *PointeeVal);
497     }
498 
499     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
500   }
501 
502   if (Type->isStructureOrClassType()) {
503     CreatedValuesCount++;
504     // FIXME: Initialize only fields that are accessed in the context that is
505     // being analyzed.
506     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
507     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
508       assert(Field != nullptr);
509 
510       QualType FieldType = Field->getType();
511       if (Visited.contains(FieldType.getCanonicalType()))
512         continue;
513 
514       Visited.insert(FieldType.getCanonicalType());
515       if (auto *FieldValue = createValueUnlessSelfReferential(
516               FieldType, Visited, Depth + 1, CreatedValuesCount))
517         FieldValues.insert({Field, FieldValue});
518       Visited.erase(FieldType.getCanonicalType());
519     }
520 
521     return &takeOwnership(
522         std::make_unique<StructValue>(std::move(FieldValues)));
523   }
524 
525   return nullptr;
526 }
527 
528 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
529   switch (SP) {
530   case SkipPast::None:
531     return Loc;
532   case SkipPast::Reference:
533     // References cannot be chained so we only need to skip past one level of
534     // indirection.
535     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
536       return Val->getPointeeLoc();
537     return Loc;
538   case SkipPast::ReferenceThenPointer:
539     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
540     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
541       return Val->getPointeeLoc();
542     return LocPastRef;
543   }
544   llvm_unreachable("bad SkipPast kind");
545 }
546 
547 const StorageLocation &Environment::skip(const StorageLocation &Loc,
548                                          SkipPast SP) const {
549   return skip(*const_cast<StorageLocation *>(&Loc), SP);
550 }
551 
552 void Environment::addToFlowCondition(BoolValue &Val) {
553   DACtx->addFlowConditionConstraint(*FlowConditionToken, Val);
554 }
555 
556 bool Environment::flowConditionImplies(BoolValue &Val) const {
557   return DACtx->flowConditionImplies(*FlowConditionToken, Val);
558 }
559 
560 } // namespace dataflow
561 } // namespace clang
562