xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 45643cfcc12ea6152fb9516ed24f5adf7469eb4c)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/Type.h"
20 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
21 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
22 #include "clang/Analysis/FlowSensitive/Value.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/DenseSet.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 /// Returns true if and only if `Val1` is equivalent to `Val2`.
54 static bool equivalentValues(QualType Type, Value *Val1,
55                              const Environment &Env1, Value *Val2,
56                              const Environment &Env2,
57                              Environment::ValueModel &Model) {
58   if (Val1 == Val2)
59     return true;
60 
61   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
62     auto *IndVal2 = cast<IndirectionValue>(Val2);
63     assert(IndVal1->getKind() == IndVal2->getKind());
64     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc())
65       return true;
66   }
67 
68   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
69 }
70 
71 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
72 /// respectively, of the same type `Type`. Merging generally produces a single
73 /// value that (soundly) approximates the two inputs, although the actual
74 /// meaning depends on `Model`.
75 static Value *mergeDistinctValues(QualType Type, Value *Val1,
76                                   const Environment &Env1, Value *Val2,
77                                   const Environment &Env2,
78                                   Environment &MergedEnv,
79                                   Environment::ValueModel &Model) {
80   // Join distinct boolean values preserving information about the constraints
81   // in the respective path conditions.
82   //
83   // FIXME: Does not work for backedges, since the two (or more) paths will not
84   // have mutually exclusive conditions.
85   if (auto *Expr1 = dyn_cast<BoolValue>(Val1)) {
86     auto *Expr2 = cast<BoolValue>(Val2);
87     return &Env1.makeOr(Env1.makeAnd(Env1.getFlowConditionToken(), *Expr1),
88                         Env1.makeAnd(Env2.getFlowConditionToken(), *Expr2));
89   }
90 
91   // FIXME: add unit tests that cover this statement.
92   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
93     auto *IndVal2 = cast<IndirectionValue>(Val2);
94     assert(IndVal1->getKind() == IndVal2->getKind());
95     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc()) {
96       return Val1;
97     }
98   }
99 
100   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
101   // returns false to avoid storing unneeded values in `DACtx`.
102   if (Value *MergedVal = MergedEnv.createValue(Type))
103     if (Model.merge(Type, *Val1, Env1, *Val2, Env2, *MergedVal, MergedEnv))
104       return MergedVal;
105 
106   return nullptr;
107 }
108 
109 /// Initializes a global storage value.
110 static void initGlobalVar(const VarDecl &D, Environment &Env) {
111   if (!D.hasGlobalStorage() ||
112       Env.getStorageLocation(D, SkipPast::None) != nullptr)
113     return;
114 
115   auto &Loc = Env.createStorageLocation(D);
116   Env.setStorageLocation(D, Loc);
117   if (auto *Val = Env.createValue(D.getType()))
118     Env.setValue(Loc, *Val);
119 }
120 
121 /// Initializes a global storage value.
122 static void initGlobalVar(const Decl &D, Environment &Env) {
123   if (auto *V = dyn_cast<VarDecl>(&D))
124     initGlobalVar(*V, Env);
125 }
126 
127 /// Initializes global storage values that are declared or referenced from
128 /// sub-statements of `S`.
129 // FIXME: Add support for resetting globals after function calls to enable
130 // the implementation of sound analyses.
131 static void initGlobalVars(const Stmt &S, Environment &Env) {
132   for (auto *Child : S.children()) {
133     if (Child != nullptr)
134       initGlobalVars(*Child, Env);
135   }
136 
137   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
138     if (DS->isSingleDecl()) {
139       initGlobalVar(*DS->getSingleDecl(), Env);
140     } else {
141       for (auto *D : DS->getDeclGroup())
142         initGlobalVar(*D, Env);
143     }
144   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
145     initGlobalVar(*E->getDecl(), Env);
146   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
147     initGlobalVar(*E->getMemberDecl(), Env);
148   }
149 }
150 
151 static void
152 getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields,
153                             llvm::DenseSet<const FieldDecl *> &Fields) {
154   if (Type->isIncompleteType() || Type->isDependentType() ||
155       !Type->isRecordType())
156     return;
157 
158   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
159     if (IgnorePrivateFields &&
160         (Field->getAccess() == AS_private ||
161          (Field->getAccess() == AS_none && Type->getAsRecordDecl()->isClass())))
162       continue;
163     Fields.insert(Field);
164   }
165   if (auto *CXXRecord = Type->getAsCXXRecordDecl()) {
166     for (const CXXBaseSpecifier &Base : CXXRecord->bases()) {
167       // Ignore private fields (including default access in C++ classes) in
168       // base classes, because they are not visible in derived classes.
169       getFieldsFromClassHierarchy(Base.getType(), /*IgnorePrivateFields=*/true,
170                                   Fields);
171     }
172   }
173 }
174 
175 /// Gets the set of all fields accesible from the type.
176 ///
177 /// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
178 /// field decl will be modeled for all instances of the inherited field.
179 static llvm::DenseSet<const FieldDecl *>
180 getAccessibleObjectFields(QualType Type) {
181   llvm::DenseSet<const FieldDecl *> Fields;
182   // Don't ignore private fields for the class itself, only its super classes.
183   getFieldsFromClassHierarchy(Type, /*IgnorePrivateFields=*/false, Fields);
184   return Fields;
185 }
186 
187 Environment::Environment(DataflowAnalysisContext &DACtx)
188     : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {}
189 
190 Environment::Environment(const Environment &Other)
191     : DACtx(Other.DACtx), DeclToLoc(Other.DeclToLoc),
192       ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
193       MemberLocToStruct(Other.MemberLocToStruct),
194       FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
195 }
196 
197 Environment &Environment::operator=(const Environment &Other) {
198   Environment Copy(Other);
199   *this = std::move(Copy);
200   return *this;
201 }
202 
203 Environment::Environment(DataflowAnalysisContext &DACtx,
204                          const DeclContext &DeclCtx)
205     : Environment(DACtx) {
206   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
207     assert(FuncDecl->getBody() != nullptr);
208     initGlobalVars(*FuncDecl->getBody(), *this);
209     for (const auto *ParamDecl : FuncDecl->parameters()) {
210       assert(ParamDecl != nullptr);
211       auto &ParamLoc = createStorageLocation(*ParamDecl);
212       setStorageLocation(*ParamDecl, ParamLoc);
213       if (Value *ParamVal = createValue(ParamDecl->getType()))
214         setValue(ParamLoc, *ParamVal);
215     }
216   }
217 
218   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
219     if (!MethodDecl->isStatic()) {
220       QualType ThisPointeeType = MethodDecl->getThisObjectType();
221       // FIXME: Add support for union types.
222       if (!ThisPointeeType->isUnionType()) {
223         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
224         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
225         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
226           setValue(ThisPointeeLoc, *ThisPointeeVal);
227       }
228     }
229   }
230 }
231 
232 bool Environment::equivalentTo(const Environment &Other,
233                                Environment::ValueModel &Model) const {
234   assert(DACtx == Other.DACtx);
235 
236   if (DeclToLoc != Other.DeclToLoc)
237     return false;
238 
239   if (ExprToLoc != Other.ExprToLoc)
240     return false;
241 
242   if (MemberLocToStruct != Other.MemberLocToStruct)
243     return false;
244 
245   // Compare the contents for the intersection of their domains.
246   for (auto &Entry : LocToVal) {
247     const StorageLocation *Loc = Entry.first;
248     assert(Loc != nullptr);
249 
250     Value *Val = Entry.second;
251     assert(Val != nullptr);
252 
253     auto It = Other.LocToVal.find(Loc);
254     if (It == Other.LocToVal.end())
255       continue;
256     assert(It->second != nullptr);
257 
258     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
259       return false;
260   }
261 
262   return true;
263 }
264 
265 LatticeJoinEffect Environment::join(const Environment &Other,
266                                     Environment::ValueModel &Model) {
267   assert(DACtx == Other.DACtx);
268 
269   auto Effect = LatticeJoinEffect::Unchanged;
270 
271   Environment JoinedEnv(*DACtx);
272 
273   JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
274   if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size())
275     Effect = LatticeJoinEffect::Changed;
276 
277   JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
278   if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size())
279     Effect = LatticeJoinEffect::Changed;
280 
281   JoinedEnv.MemberLocToStruct =
282       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
283   if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size())
284     Effect = LatticeJoinEffect::Changed;
285 
286   // FIXME: set `Effect` as needed.
287   JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions(
288       *FlowConditionToken, *Other.FlowConditionToken);
289 
290   for (auto &Entry : LocToVal) {
291     const StorageLocation *Loc = Entry.first;
292     assert(Loc != nullptr);
293 
294     Value *Val = Entry.second;
295     assert(Val != nullptr);
296 
297     auto It = Other.LocToVal.find(Loc);
298     if (It == Other.LocToVal.end())
299       continue;
300     assert(It->second != nullptr);
301 
302     if (Val == It->second) {
303       JoinedEnv.LocToVal.insert({Loc, Val});
304       continue;
305     }
306 
307     if (Value *MergedVal = mergeDistinctValues(
308             Loc->getType(), Val, *this, It->second, Other, JoinedEnv, Model))
309       JoinedEnv.LocToVal.insert({Loc, MergedVal});
310   }
311   if (LocToVal.size() != JoinedEnv.LocToVal.size())
312     Effect = LatticeJoinEffect::Changed;
313 
314   *this = std::move(JoinedEnv);
315 
316   return Effect;
317 }
318 
319 StorageLocation &Environment::createStorageLocation(QualType Type) {
320   assert(!Type.isNull());
321   if (Type->isStructureOrClassType() || Type->isUnionType()) {
322     // FIXME: Explore options to avoid eager initialization of fields as some of
323     // them might not be needed for a particular analysis.
324     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
325     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
326       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
327     }
328     return takeOwnership(
329         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
330   }
331   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
332 }
333 
334 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
335   // Evaluated declarations are always assigned the same storage locations to
336   // ensure that the environment stabilizes across loop iterations. Storage
337   // locations for evaluated declarations are stored in the analysis context.
338   if (auto *Loc = DACtx->getStorageLocation(D))
339     return *Loc;
340   auto &Loc = createStorageLocation(D.getType());
341   DACtx->setStorageLocation(D, Loc);
342   return Loc;
343 }
344 
345 StorageLocation &Environment::createStorageLocation(const Expr &E) {
346   // Evaluated expressions are always assigned the same storage locations to
347   // ensure that the environment stabilizes across loop iterations. Storage
348   // locations for evaluated expressions are stored in the analysis context.
349   if (auto *Loc = DACtx->getStorageLocation(E))
350     return *Loc;
351   auto &Loc = createStorageLocation(E.getType());
352   DACtx->setStorageLocation(E, Loc);
353   return Loc;
354 }
355 
356 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
357   assert(DeclToLoc.find(&D) == DeclToLoc.end());
358   DeclToLoc[&D] = &Loc;
359 }
360 
361 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
362                                                  SkipPast SP) const {
363   auto It = DeclToLoc.find(&D);
364   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
365 }
366 
367 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
368   const Expr &CanonE = ignoreCFGOmittedNodes(E);
369   assert(ExprToLoc.find(&CanonE) == ExprToLoc.end());
370   ExprToLoc[&CanonE] = &Loc;
371 }
372 
373 StorageLocation *Environment::getStorageLocation(const Expr &E,
374                                                  SkipPast SP) const {
375   // FIXME: Add a test with parens.
376   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
377   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
378 }
379 
380 StorageLocation *Environment::getThisPointeeStorageLocation() const {
381   return DACtx->getThisPointeeStorageLocation();
382 }
383 
384 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
385   LocToVal[&Loc] = &Val;
386 
387   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
388     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
389 
390     const QualType Type = AggregateLoc.getType();
391     assert(Type->isStructureOrClassType());
392 
393     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
394       assert(Field != nullptr);
395       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
396       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
397       if (auto *FieldVal = StructVal->getChild(*Field))
398         setValue(FieldLoc, *FieldVal);
399     }
400   }
401 
402   auto IT = MemberLocToStruct.find(&Loc);
403   if (IT != MemberLocToStruct.end()) {
404     // `Loc` is the location of a struct member so we need to also update the
405     // value of the member in the corresponding `StructValue`.
406 
407     assert(IT->second.first != nullptr);
408     StructValue &StructVal = *IT->second.first;
409 
410     assert(IT->second.second != nullptr);
411     const ValueDecl &Member = *IT->second.second;
412 
413     StructVal.setChild(Member, Val);
414   }
415 }
416 
417 Value *Environment::getValue(const StorageLocation &Loc) const {
418   auto It = LocToVal.find(&Loc);
419   return It == LocToVal.end() ? nullptr : It->second;
420 }
421 
422 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
423   auto *Loc = getStorageLocation(D, SP);
424   if (Loc == nullptr)
425     return nullptr;
426   return getValue(*Loc);
427 }
428 
429 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
430   auto *Loc = getStorageLocation(E, SP);
431   if (Loc == nullptr)
432     return nullptr;
433   return getValue(*Loc);
434 }
435 
436 Value *Environment::createValue(QualType Type) {
437   llvm::DenseSet<QualType> Visited;
438   int CreatedValuesCount = 0;
439   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
440                                                 CreatedValuesCount);
441   if (CreatedValuesCount > MaxCompositeValueSize) {
442     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
443                  << '\n';
444   }
445   return Val;
446 }
447 
448 Value *Environment::createValueUnlessSelfReferential(
449     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
450     int &CreatedValuesCount) {
451   assert(!Type.isNull());
452 
453   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
454   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
455       Depth > MaxCompositeValueDepth)
456     return nullptr;
457 
458   if (Type->isBooleanType()) {
459     CreatedValuesCount++;
460     return &makeAtomicBoolValue();
461   }
462 
463   if (Type->isIntegerType()) {
464     CreatedValuesCount++;
465     return &takeOwnership(std::make_unique<IntegerValue>());
466   }
467 
468   if (Type->isReferenceType()) {
469     CreatedValuesCount++;
470     QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType();
471     auto &PointeeLoc = createStorageLocation(PointeeType);
472 
473     if (!Visited.contains(PointeeType.getCanonicalType())) {
474       Visited.insert(PointeeType.getCanonicalType());
475       Value *PointeeVal = createValueUnlessSelfReferential(
476           PointeeType, Visited, Depth, CreatedValuesCount);
477       Visited.erase(PointeeType.getCanonicalType());
478 
479       if (PointeeVal != nullptr)
480         setValue(PointeeLoc, *PointeeVal);
481     }
482 
483     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
484   }
485 
486   if (Type->isPointerType()) {
487     CreatedValuesCount++;
488     QualType PointeeType = Type->castAs<PointerType>()->getPointeeType();
489     auto &PointeeLoc = createStorageLocation(PointeeType);
490 
491     if (!Visited.contains(PointeeType.getCanonicalType())) {
492       Visited.insert(PointeeType.getCanonicalType());
493       Value *PointeeVal = createValueUnlessSelfReferential(
494           PointeeType, Visited, Depth, CreatedValuesCount);
495       Visited.erase(PointeeType.getCanonicalType());
496 
497       if (PointeeVal != nullptr)
498         setValue(PointeeLoc, *PointeeVal);
499     }
500 
501     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
502   }
503 
504   if (Type->isStructureOrClassType()) {
505     CreatedValuesCount++;
506     // FIXME: Initialize only fields that are accessed in the context that is
507     // being analyzed.
508     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
509     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
510       assert(Field != nullptr);
511 
512       QualType FieldType = Field->getType();
513       if (Visited.contains(FieldType.getCanonicalType()))
514         continue;
515 
516       Visited.insert(FieldType.getCanonicalType());
517       if (auto *FieldValue = createValueUnlessSelfReferential(
518               FieldType, Visited, Depth + 1, CreatedValuesCount))
519         FieldValues.insert({Field, FieldValue});
520       Visited.erase(FieldType.getCanonicalType());
521     }
522 
523     return &takeOwnership(
524         std::make_unique<StructValue>(std::move(FieldValues)));
525   }
526 
527   return nullptr;
528 }
529 
530 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
531   switch (SP) {
532   case SkipPast::None:
533     return Loc;
534   case SkipPast::Reference:
535     // References cannot be chained so we only need to skip past one level of
536     // indirection.
537     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
538       return Val->getPointeeLoc();
539     return Loc;
540   case SkipPast::ReferenceThenPointer:
541     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
542     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
543       return Val->getPointeeLoc();
544     return LocPastRef;
545   }
546   llvm_unreachable("bad SkipPast kind");
547 }
548 
549 const StorageLocation &Environment::skip(const StorageLocation &Loc,
550                                          SkipPast SP) const {
551   return skip(*const_cast<StorageLocation *>(&Loc), SP);
552 }
553 
554 void Environment::addToFlowCondition(BoolValue &Val) {
555   DACtx->addFlowConditionConstraint(*FlowConditionToken, Val);
556 }
557 
558 bool Environment::flowConditionImplies(BoolValue &Val) const {
559   return DACtx->flowConditionImplies(*FlowConditionToken, Val);
560 }
561 
562 } // namespace dataflow
563 } // namespace clang
564