xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 2f93bbb9cd7c20ea1a273cf652d852d4b641f94a)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/Type.h"
20 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
21 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
22 #include "clang/Analysis/FlowSensitive/Value.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/DenseSet.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 /// Returns true if and only if `Val1` is equivalent to `Val2`.
54 static bool equivalentValues(QualType Type, Value *Val1,
55                              const Environment &Env1, Value *Val2,
56                              const Environment &Env2,
57                              Environment::ValueModel &Model) {
58   if (Val1 == Val2)
59     return true;
60 
61   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
62     auto *IndVal2 = cast<IndirectionValue>(Val2);
63     assert(IndVal1->getKind() == IndVal2->getKind());
64     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc())
65       return true;
66   }
67 
68   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
69 }
70 
71 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
72 /// respectively, of the same type `Type`. Merging generally produces a single
73 /// value that (soundly) approximates the two inputs, although the actual
74 /// meaning depends on `Model`.
75 static Value *mergeDistinctValues(QualType Type, Value *Val1,
76                                   const Environment &Env1, Value *Val2,
77                                   const Environment &Env2,
78                                   Environment &MergedEnv,
79                                   Environment::ValueModel &Model) {
80   // Join distinct boolean values preserving information about the constraints
81   // in the respective path conditions.
82   //
83   // FIXME: Does not work for backedges, since the two (or more) paths will not
84   // have mutually exclusive conditions.
85   if (auto *Expr1 = dyn_cast<BoolValue>(Val1)) {
86     auto *Expr2 = cast<BoolValue>(Val2);
87     return &Env1.makeOr(Env1.makeAnd(Env1.getFlowConditionToken(), *Expr1),
88                         Env1.makeAnd(Env2.getFlowConditionToken(), *Expr2));
89   }
90 
91   // FIXME: add unit tests that cover this statement.
92   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
93     auto *IndVal2 = cast<IndirectionValue>(Val2);
94     assert(IndVal1->getKind() == IndVal2->getKind());
95     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc()) {
96       return Val1;
97     }
98   }
99 
100   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
101   // returns false to avoid storing unneeded values in `DACtx`.
102   if (Value *MergedVal = MergedEnv.createValue(Type))
103     if (Model.merge(Type, *Val1, Env1, *Val2, Env2, *MergedVal, MergedEnv))
104       return MergedVal;
105 
106   return nullptr;
107 }
108 
109 /// Initializes a global storage value.
110 static void initGlobalVar(const VarDecl &D, Environment &Env) {
111   if (!D.hasGlobalStorage() ||
112       Env.getStorageLocation(D, SkipPast::None) != nullptr)
113     return;
114 
115   auto &Loc = Env.createStorageLocation(D);
116   Env.setStorageLocation(D, Loc);
117   if (auto *Val = Env.createValue(D.getType()))
118     Env.setValue(Loc, *Val);
119 }
120 
121 /// Initializes a global storage value.
122 static void initGlobalVar(const Decl &D, Environment &Env) {
123   if (auto *V = dyn_cast<VarDecl>(&D))
124     initGlobalVar(*V, Env);
125 }
126 
127 /// Initializes global storage values that are declared or referenced from
128 /// sub-statements of `S`.
129 // FIXME: Add support for resetting globals after function calls to enable
130 // the implementation of sound analyses.
131 static void initGlobalVars(const Stmt &S, Environment &Env) {
132   for (auto *Child : S.children()) {
133     if (Child != nullptr)
134       initGlobalVars(*Child, Env);
135   }
136 
137   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
138     if (DS->isSingleDecl()) {
139       initGlobalVar(*DS->getSingleDecl(), Env);
140     } else {
141       for (auto *D : DS->getDeclGroup())
142         initGlobalVar(*D, Env);
143     }
144   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
145     initGlobalVar(*E->getDecl(), Env);
146   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
147     initGlobalVar(*E->getMemberDecl(), Env);
148   }
149 }
150 
151 static void
152 getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields,
153                             llvm::DenseSet<const FieldDecl *> &Fields) {
154   if (Type->isIncompleteType() || Type->isDependentType() ||
155       !Type->isRecordType())
156     return;
157 
158   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
159     if (IgnorePrivateFields &&
160         (Field->getAccess() == AS_private ||
161          (Field->getAccess() == AS_none && Type->getAsRecordDecl()->isClass())))
162       continue;
163     Fields.insert(Field);
164   }
165   if (auto *CXXRecord = Type->getAsCXXRecordDecl()) {
166     for (const CXXBaseSpecifier &Base : CXXRecord->bases()) {
167       // Ignore private fields (including default access in C++ classes) in
168       // base classes, because they are not visible in derived classes.
169       getFieldsFromClassHierarchy(Base.getType(), /*IgnorePrivateFields=*/true,
170                                   Fields);
171     }
172   }
173 }
174 
175 /// Gets the set of all fields accesible from the type.
176 ///
177 /// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
178 /// field decl will be modeled for all instances of the inherited field.
179 static llvm::DenseSet<const FieldDecl *>
180 getAccessibleObjectFields(QualType Type) {
181   llvm::DenseSet<const FieldDecl *> Fields;
182   // Don't ignore private fields for the class itself, only its super classes.
183   getFieldsFromClassHierarchy(Type, /*IgnorePrivateFields=*/false, Fields);
184   return Fields;
185 }
186 
187 Environment::Environment(DataflowAnalysisContext &DACtx)
188     : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {}
189 
190 Environment::Environment(const Environment &Other)
191     : DACtx(Other.DACtx), DeclToLoc(Other.DeclToLoc),
192       ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
193       MemberLocToStruct(Other.MemberLocToStruct),
194       FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
195 }
196 
197 Environment &Environment::operator=(const Environment &Other) {
198   Environment Copy(Other);
199   *this = std::move(Copy);
200   return *this;
201 }
202 
203 Environment::Environment(DataflowAnalysisContext &DACtx,
204                          const DeclContext &DeclCtx)
205     : Environment(DACtx) {
206   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
207     assert(FuncDecl->getBody() != nullptr);
208     initGlobalVars(*FuncDecl->getBody(), *this);
209     for (const auto *ParamDecl : FuncDecl->parameters()) {
210       assert(ParamDecl != nullptr);
211       auto &ParamLoc = createStorageLocation(*ParamDecl);
212       setStorageLocation(*ParamDecl, ParamLoc);
213       if (Value *ParamVal = createValue(ParamDecl->getType()))
214         setValue(ParamLoc, *ParamVal);
215     }
216   }
217 
218   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
219     if (!MethodDecl->isStatic()) {
220       QualType ThisPointeeType = MethodDecl->getThisObjectType();
221       // FIXME: Add support for union types.
222       if (!ThisPointeeType->isUnionType()) {
223         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
224         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
225         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
226           setValue(ThisPointeeLoc, *ThisPointeeVal);
227       }
228     }
229   }
230 }
231 
232 bool Environment::equivalentTo(const Environment &Other,
233                                Environment::ValueModel &Model) const {
234   assert(DACtx == Other.DACtx);
235 
236   if (DeclToLoc != Other.DeclToLoc)
237     return false;
238 
239   if (ExprToLoc != Other.ExprToLoc)
240     return false;
241 
242   // Compare the contents for the intersection of their domains.
243   for (auto &Entry : LocToVal) {
244     const StorageLocation *Loc = Entry.first;
245     assert(Loc != nullptr);
246 
247     Value *Val = Entry.second;
248     assert(Val != nullptr);
249 
250     auto It = Other.LocToVal.find(Loc);
251     if (It == Other.LocToVal.end())
252       continue;
253     assert(It->second != nullptr);
254 
255     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
256       return false;
257   }
258 
259   return true;
260 }
261 
262 LatticeJoinEffect Environment::join(const Environment &Other,
263                                     Environment::ValueModel &Model) {
264   assert(DACtx == Other.DACtx);
265 
266   auto Effect = LatticeJoinEffect::Unchanged;
267 
268   Environment JoinedEnv(*DACtx);
269 
270   JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
271   if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size())
272     Effect = LatticeJoinEffect::Changed;
273 
274   JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
275   if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size())
276     Effect = LatticeJoinEffect::Changed;
277 
278   JoinedEnv.MemberLocToStruct =
279       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
280   if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size())
281     Effect = LatticeJoinEffect::Changed;
282 
283   // FIXME: set `Effect` as needed.
284   JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions(
285       *FlowConditionToken, *Other.FlowConditionToken);
286 
287   for (auto &Entry : LocToVal) {
288     const StorageLocation *Loc = Entry.first;
289     assert(Loc != nullptr);
290 
291     Value *Val = Entry.second;
292     assert(Val != nullptr);
293 
294     auto It = Other.LocToVal.find(Loc);
295     if (It == Other.LocToVal.end())
296       continue;
297     assert(It->second != nullptr);
298 
299     if (Val == It->second) {
300       JoinedEnv.LocToVal.insert({Loc, Val});
301       continue;
302     }
303 
304     if (Value *MergedVal = mergeDistinctValues(
305             Loc->getType(), Val, *this, It->second, Other, JoinedEnv, Model))
306       JoinedEnv.LocToVal.insert({Loc, MergedVal});
307   }
308   if (LocToVal.size() != JoinedEnv.LocToVal.size())
309     Effect = LatticeJoinEffect::Changed;
310 
311   *this = std::move(JoinedEnv);
312 
313   return Effect;
314 }
315 
316 StorageLocation &Environment::createStorageLocation(QualType Type) {
317   assert(!Type.isNull());
318   if (Type->isStructureOrClassType() || Type->isUnionType()) {
319     // FIXME: Explore options to avoid eager initialization of fields as some of
320     // them might not be needed for a particular analysis.
321     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
322     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
323       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
324     }
325     return takeOwnership(
326         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
327   }
328   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
329 }
330 
331 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
332   // Evaluated declarations are always assigned the same storage locations to
333   // ensure that the environment stabilizes across loop iterations. Storage
334   // locations for evaluated declarations are stored in the analysis context.
335   if (auto *Loc = DACtx->getStorageLocation(D))
336     return *Loc;
337   auto &Loc = createStorageLocation(D.getType());
338   DACtx->setStorageLocation(D, Loc);
339   return Loc;
340 }
341 
342 StorageLocation &Environment::createStorageLocation(const Expr &E) {
343   // Evaluated expressions are always assigned the same storage locations to
344   // ensure that the environment stabilizes across loop iterations. Storage
345   // locations for evaluated expressions are stored in the analysis context.
346   if (auto *Loc = DACtx->getStorageLocation(E))
347     return *Loc;
348   auto &Loc = createStorageLocation(E.getType());
349   DACtx->setStorageLocation(E, Loc);
350   return Loc;
351 }
352 
353 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
354   assert(DeclToLoc.find(&D) == DeclToLoc.end());
355   DeclToLoc[&D] = &Loc;
356 }
357 
358 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
359                                                  SkipPast SP) const {
360   auto It = DeclToLoc.find(&D);
361   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
362 }
363 
364 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
365   const Expr &CanonE = ignoreCFGOmittedNodes(E);
366   assert(ExprToLoc.find(&CanonE) == ExprToLoc.end());
367   ExprToLoc[&CanonE] = &Loc;
368 }
369 
370 StorageLocation *Environment::getStorageLocation(const Expr &E,
371                                                  SkipPast SP) const {
372   // FIXME: Add a test with parens.
373   auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E));
374   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
375 }
376 
377 StorageLocation *Environment::getThisPointeeStorageLocation() const {
378   return DACtx->getThisPointeeStorageLocation();
379 }
380 
381 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
382   LocToVal[&Loc] = &Val;
383 
384   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
385     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
386 
387     const QualType Type = AggregateLoc.getType();
388     assert(Type->isStructureOrClassType());
389 
390     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
391       assert(Field != nullptr);
392       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
393       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
394       if (auto *FieldVal = StructVal->getChild(*Field))
395         setValue(FieldLoc, *FieldVal);
396     }
397   }
398 
399   auto IT = MemberLocToStruct.find(&Loc);
400   if (IT != MemberLocToStruct.end()) {
401     // `Loc` is the location of a struct member so we need to also update the
402     // value of the member in the corresponding `StructValue`.
403 
404     assert(IT->second.first != nullptr);
405     StructValue &StructVal = *IT->second.first;
406 
407     assert(IT->second.second != nullptr);
408     const ValueDecl &Member = *IT->second.second;
409 
410     StructVal.setChild(Member, Val);
411   }
412 }
413 
414 Value *Environment::getValue(const StorageLocation &Loc) const {
415   auto It = LocToVal.find(&Loc);
416   return It == LocToVal.end() ? nullptr : It->second;
417 }
418 
419 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
420   auto *Loc = getStorageLocation(D, SP);
421   if (Loc == nullptr)
422     return nullptr;
423   return getValue(*Loc);
424 }
425 
426 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
427   auto *Loc = getStorageLocation(E, SP);
428   if (Loc == nullptr)
429     return nullptr;
430   return getValue(*Loc);
431 }
432 
433 Value *Environment::createValue(QualType Type) {
434   llvm::DenseSet<QualType> Visited;
435   int CreatedValuesCount = 0;
436   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
437                                                 CreatedValuesCount);
438   if (CreatedValuesCount > MaxCompositeValueSize) {
439     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
440                  << '\n';
441   }
442   return Val;
443 }
444 
445 Value *Environment::createValueUnlessSelfReferential(
446     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
447     int &CreatedValuesCount) {
448   assert(!Type.isNull());
449 
450   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
451   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
452       Depth > MaxCompositeValueDepth)
453     return nullptr;
454 
455   if (Type->isBooleanType()) {
456     CreatedValuesCount++;
457     return &makeAtomicBoolValue();
458   }
459 
460   if (Type->isIntegerType()) {
461     CreatedValuesCount++;
462     return &takeOwnership(std::make_unique<IntegerValue>());
463   }
464 
465   if (Type->isReferenceType()) {
466     CreatedValuesCount++;
467     QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType();
468     auto &PointeeLoc = createStorageLocation(PointeeType);
469 
470     if (!Visited.contains(PointeeType.getCanonicalType())) {
471       Visited.insert(PointeeType.getCanonicalType());
472       Value *PointeeVal = createValueUnlessSelfReferential(
473           PointeeType, Visited, Depth, CreatedValuesCount);
474       Visited.erase(PointeeType.getCanonicalType());
475 
476       if (PointeeVal != nullptr)
477         setValue(PointeeLoc, *PointeeVal);
478     }
479 
480     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
481   }
482 
483   if (Type->isPointerType()) {
484     CreatedValuesCount++;
485     QualType PointeeType = Type->castAs<PointerType>()->getPointeeType();
486     auto &PointeeLoc = createStorageLocation(PointeeType);
487 
488     if (!Visited.contains(PointeeType.getCanonicalType())) {
489       Visited.insert(PointeeType.getCanonicalType());
490       Value *PointeeVal = createValueUnlessSelfReferential(
491           PointeeType, Visited, Depth, CreatedValuesCount);
492       Visited.erase(PointeeType.getCanonicalType());
493 
494       if (PointeeVal != nullptr)
495         setValue(PointeeLoc, *PointeeVal);
496     }
497 
498     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
499   }
500 
501   if (Type->isStructureOrClassType()) {
502     CreatedValuesCount++;
503     // FIXME: Initialize only fields that are accessed in the context that is
504     // being analyzed.
505     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
506     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
507       assert(Field != nullptr);
508 
509       QualType FieldType = Field->getType();
510       if (Visited.contains(FieldType.getCanonicalType()))
511         continue;
512 
513       Visited.insert(FieldType.getCanonicalType());
514       if (auto *FieldValue = createValueUnlessSelfReferential(
515               FieldType, Visited, Depth + 1, CreatedValuesCount))
516         FieldValues.insert({Field, FieldValue});
517       Visited.erase(FieldType.getCanonicalType());
518     }
519 
520     return &takeOwnership(
521         std::make_unique<StructValue>(std::move(FieldValues)));
522   }
523 
524   return nullptr;
525 }
526 
527 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
528   switch (SP) {
529   case SkipPast::None:
530     return Loc;
531   case SkipPast::Reference:
532     // References cannot be chained so we only need to skip past one level of
533     // indirection.
534     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
535       return Val->getPointeeLoc();
536     return Loc;
537   case SkipPast::ReferenceThenPointer:
538     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
539     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
540       return Val->getPointeeLoc();
541     return LocPastRef;
542   }
543   llvm_unreachable("bad SkipPast kind");
544 }
545 
546 const StorageLocation &Environment::skip(const StorageLocation &Loc,
547                                          SkipPast SP) const {
548   return skip(*const_cast<StorageLocation *>(&Loc), SP);
549 }
550 
551 void Environment::addToFlowCondition(BoolValue &Val) {
552   DACtx->addFlowConditionConstraint(*FlowConditionToken, Val);
553 }
554 
555 bool Environment::flowConditionImplies(BoolValue &Val) const {
556   return DACtx->flowConditionImplies(*FlowConditionToken, Val);
557 }
558 
559 } // namespace dataflow
560 } // namespace clang
561