xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision 62b2a47a9f15ed2f1dc4b39c924341c7b9bd7cf8)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/Type.h"
20 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
21 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
22 #include "clang/Analysis/FlowSensitive/Value.h"
23 #include "llvm/ADT/DenseMap.h"
24 #include "llvm/ADT/DenseSet.h"
25 #include "llvm/Support/Casting.h"
26 #include "llvm/Support/ErrorHandling.h"
27 #include <cassert>
28 #include <memory>
29 #include <utility>
30 
31 namespace clang {
32 namespace dataflow {
33 
34 // FIXME: convert these to parameters of the analysis or environment. Current
35 // settings have been experimentaly validated, but only for a particular
36 // analysis.
37 static constexpr int MaxCompositeValueDepth = 3;
38 static constexpr int MaxCompositeValueSize = 1000;
39 
40 /// Returns a map consisting of key-value entries that are present in both maps.
41 template <typename K, typename V>
42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
43                                         const llvm::DenseMap<K, V> &Map2) {
44   llvm::DenseMap<K, V> Result;
45   for (auto &Entry : Map1) {
46     auto It = Map2.find(Entry.first);
47     if (It != Map2.end() && Entry.second == It->second)
48       Result.insert({Entry.first, Entry.second});
49   }
50   return Result;
51 }
52 
53 /// Returns true if and only if `Val1` is equivalent to `Val2`.
54 static bool equivalentValues(QualType Type, Value *Val1,
55                              const Environment &Env1, Value *Val2,
56                              const Environment &Env2,
57                              Environment::ValueModel &Model) {
58   if (Val1 == Val2)
59     return true;
60 
61   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
62     auto *IndVal2 = cast<IndirectionValue>(Val2);
63     assert(IndVal1->getKind() == IndVal2->getKind());
64     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc())
65       return true;
66   }
67 
68   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
69 }
70 
71 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`,
72 /// respectively, of the same type `Type`. Merging generally produces a single
73 /// value that (soundly) approximates the two inputs, although the actual
74 /// meaning depends on `Model`.
75 static Value *mergeDistinctValues(QualType Type, Value *Val1,
76                                   const Environment &Env1, Value *Val2,
77                                   const Environment &Env2,
78                                   Environment &MergedEnv,
79                                   Environment::ValueModel &Model) {
80   // Join distinct boolean values preserving information about the constraints
81   // in the respective path conditions.
82   //
83   // FIXME: Does not work for backedges, since the two (or more) paths will not
84   // have mutually exclusive conditions.
85   if (auto *Expr1 = dyn_cast<BoolValue>(Val1)) {
86     auto *Expr2 = cast<BoolValue>(Val2);
87     return &Env1.makeOr(Env1.makeAnd(Env1.getFlowConditionToken(), *Expr1),
88                         Env1.makeAnd(Env2.getFlowConditionToken(), *Expr2));
89   }
90 
91   // FIXME: add unit tests that cover this statement.
92   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
93     auto *IndVal2 = cast<IndirectionValue>(Val2);
94     assert(IndVal1->getKind() == IndVal2->getKind());
95     if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc()) {
96       return Val1;
97     }
98   }
99 
100   // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge`
101   // returns false to avoid storing unneeded values in `DACtx`.
102   if (Value *MergedVal = MergedEnv.createValue(Type))
103     if (Model.merge(Type, *Val1, Env1, *Val2, Env2, *MergedVal, MergedEnv))
104       return MergedVal;
105 
106   return nullptr;
107 }
108 
109 /// Initializes a global storage value.
110 static void initGlobalVar(const VarDecl &D, Environment &Env) {
111   if (!D.hasGlobalStorage() ||
112       Env.getStorageLocation(D, SkipPast::None) != nullptr)
113     return;
114 
115   auto &Loc = Env.createStorageLocation(D);
116   Env.setStorageLocation(D, Loc);
117   if (auto *Val = Env.createValue(D.getType()))
118     Env.setValue(Loc, *Val);
119 }
120 
121 /// Initializes a global storage value.
122 static void initGlobalVar(const Decl &D, Environment &Env) {
123   if (auto *V = dyn_cast<VarDecl>(&D))
124     initGlobalVar(*V, Env);
125 }
126 
127 /// Initializes global storage values that are declared or referenced from
128 /// sub-statements of `S`.
129 // FIXME: Add support for resetting globals after function calls to enable
130 // the implementation of sound analyses.
131 static void initGlobalVars(const Stmt &S, Environment &Env) {
132   for (auto *Child : S.children()) {
133     if (Child != nullptr)
134       initGlobalVars(*Child, Env);
135   }
136 
137   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
138     if (DS->isSingleDecl()) {
139       initGlobalVar(*DS->getSingleDecl(), Env);
140     } else {
141       for (auto *D : DS->getDeclGroup())
142         initGlobalVar(*D, Env);
143     }
144   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
145     initGlobalVar(*E->getDecl(), Env);
146   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
147     initGlobalVar(*E->getMemberDecl(), Env);
148   }
149 }
150 
151 static void
152 getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields,
153                             llvm::DenseSet<const FieldDecl *> &Fields) {
154   if (Type->isIncompleteType() || Type->isDependentType() ||
155       !Type->isRecordType())
156     return;
157 
158   for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
159     if (IgnorePrivateFields &&
160         (Field->getAccess() == AS_private ||
161          (Field->getAccess() == AS_none && Type->getAsRecordDecl()->isClass())))
162       continue;
163     Fields.insert(Field);
164   }
165   if (auto *CXXRecord = Type->getAsCXXRecordDecl()) {
166     for (const CXXBaseSpecifier &Base : CXXRecord->bases()) {
167       // Ignore private fields (including default access in C++ classes) in
168       // base classes, because they are not visible in derived classes.
169       getFieldsFromClassHierarchy(Base.getType(), /*IgnorePrivateFields=*/true,
170                                   Fields);
171     }
172   }
173 }
174 
175 /// Gets the set of all fields accesible from the type.
176 ///
177 /// FIXME: Does not precisely handle non-virtual diamond inheritance. A single
178 /// field decl will be modeled for all instances of the inherited field.
179 static llvm::DenseSet<const FieldDecl *>
180 getAccessibleObjectFields(QualType Type) {
181   llvm::DenseSet<const FieldDecl *> Fields;
182   // Don't ignore private fields for the class itself, only its super classes.
183   getFieldsFromClassHierarchy(Type, /*IgnorePrivateFields=*/false, Fields);
184   return Fields;
185 }
186 
187 Environment::Environment(DataflowAnalysisContext &DACtx)
188     : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {}
189 
190 Environment::Environment(const Environment &Other)
191     : DACtx(Other.DACtx), DeclToLoc(Other.DeclToLoc),
192       ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal),
193       MemberLocToStruct(Other.MemberLocToStruct),
194       FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) {
195 }
196 
197 Environment &Environment::operator=(const Environment &Other) {
198   Environment Copy(Other);
199   *this = std::move(Copy);
200   return *this;
201 }
202 
203 Environment::Environment(DataflowAnalysisContext &DACtx,
204                          const DeclContext &DeclCtx)
205     : Environment(DACtx) {
206   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
207     assert(FuncDecl->getBody() != nullptr);
208     initGlobalVars(*FuncDecl->getBody(), *this);
209     for (const auto *ParamDecl : FuncDecl->parameters()) {
210       assert(ParamDecl != nullptr);
211       auto &ParamLoc = createStorageLocation(*ParamDecl);
212       setStorageLocation(*ParamDecl, ParamLoc);
213       if (Value *ParamVal = createValue(ParamDecl->getType()))
214         setValue(ParamLoc, *ParamVal);
215     }
216   }
217 
218   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
219     if (!MethodDecl->isStatic()) {
220       QualType ThisPointeeType = MethodDecl->getThisObjectType();
221       // FIXME: Add support for union types.
222       if (!ThisPointeeType->isUnionType()) {
223         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
224         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
225         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
226           setValue(ThisPointeeLoc, *ThisPointeeVal);
227       }
228     }
229   }
230 }
231 
232 bool Environment::equivalentTo(const Environment &Other,
233                                Environment::ValueModel &Model) const {
234   assert(DACtx == Other.DACtx);
235 
236   if (DeclToLoc != Other.DeclToLoc)
237     return false;
238 
239   if (ExprToLoc != Other.ExprToLoc)
240     return false;
241 
242   if (MemberLocToStruct != Other.MemberLocToStruct)
243     return false;
244 
245   // Compare the contents for the intersection of their domains.
246   for (auto &Entry : LocToVal) {
247     const StorageLocation *Loc = Entry.first;
248     assert(Loc != nullptr);
249 
250     Value *Val = Entry.second;
251     assert(Val != nullptr);
252 
253     auto It = Other.LocToVal.find(Loc);
254     if (It == Other.LocToVal.end())
255       continue;
256     assert(It->second != nullptr);
257 
258     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
259       return false;
260   }
261 
262   return true;
263 }
264 
265 LatticeJoinEffect Environment::join(const Environment &Other,
266                                     Environment::ValueModel &Model) {
267   assert(DACtx == Other.DACtx);
268 
269   auto Effect = LatticeJoinEffect::Unchanged;
270 
271   Environment JoinedEnv(*DACtx);
272 
273   JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
274   if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size())
275     Effect = LatticeJoinEffect::Changed;
276 
277   JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
278   if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size())
279     Effect = LatticeJoinEffect::Changed;
280 
281   JoinedEnv.MemberLocToStruct =
282       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
283   if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size())
284     Effect = LatticeJoinEffect::Changed;
285 
286   // FIXME: set `Effect` as needed.
287   JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions(
288       *FlowConditionToken, *Other.FlowConditionToken);
289 
290   for (auto &Entry : LocToVal) {
291     const StorageLocation *Loc = Entry.first;
292     assert(Loc != nullptr);
293 
294     Value *Val = Entry.second;
295     assert(Val != nullptr);
296 
297     auto It = Other.LocToVal.find(Loc);
298     if (It == Other.LocToVal.end())
299       continue;
300     assert(It->second != nullptr);
301 
302     if (Val == It->second) {
303       JoinedEnv.LocToVal.insert({Loc, Val});
304       continue;
305     }
306 
307     if (Value *MergedVal = mergeDistinctValues(
308             Loc->getType(), Val, *this, It->second, Other, JoinedEnv, Model))
309       JoinedEnv.LocToVal.insert({Loc, MergedVal});
310   }
311   if (LocToVal.size() != JoinedEnv.LocToVal.size())
312     Effect = LatticeJoinEffect::Changed;
313 
314   *this = std::move(JoinedEnv);
315 
316   return Effect;
317 }
318 
319 StorageLocation &Environment::createStorageLocation(QualType Type) {
320   assert(!Type.isNull());
321   if (Type->isStructureOrClassType() || Type->isUnionType()) {
322     // FIXME: Explore options to avoid eager initialization of fields as some of
323     // them might not be needed for a particular analysis.
324     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
325     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
326       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
327     }
328     return takeOwnership(
329         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
330   }
331   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
332 }
333 
334 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
335   // Evaluated declarations are always assigned the same storage locations to
336   // ensure that the environment stabilizes across loop iterations. Storage
337   // locations for evaluated declarations are stored in the analysis context.
338   if (auto *Loc = DACtx->getStorageLocation(D))
339     return *Loc;
340   auto &Loc = createStorageLocation(D.getType());
341   DACtx->setStorageLocation(D, Loc);
342   return Loc;
343 }
344 
345 StorageLocation &Environment::createStorageLocation(const Expr &E) {
346   assert(!isa<ExprWithCleanups>(&E));
347   // Evaluated expressions are always assigned the same storage locations to
348   // ensure that the environment stabilizes across loop iterations. Storage
349   // locations for evaluated expressions are stored in the analysis context.
350   if (auto *Loc = DACtx->getStorageLocation(E))
351     return *Loc;
352   auto &Loc = createStorageLocation(E.getType());
353   DACtx->setStorageLocation(E, Loc);
354   return Loc;
355 }
356 
357 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
358   assert(DeclToLoc.find(&D) == DeclToLoc.end());
359   DeclToLoc[&D] = &Loc;
360 }
361 
362 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
363                                                  SkipPast SP) const {
364   auto It = DeclToLoc.find(&D);
365   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
366 }
367 
368 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
369   assert(!isa<ExprWithCleanups>(&E));
370   assert(ExprToLoc.find(&E) == ExprToLoc.end());
371   ExprToLoc[&E] = &Loc;
372 }
373 
374 StorageLocation *Environment::getStorageLocation(const Expr &E,
375                                                  SkipPast SP) const {
376   assert(!isa<ExprWithCleanups>(&E));
377   // FIXME: Add a test with parens.
378   auto It = ExprToLoc.find(E.IgnoreParens());
379   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
380 }
381 
382 StorageLocation *Environment::getThisPointeeStorageLocation() const {
383   return DACtx->getThisPointeeStorageLocation();
384 }
385 
386 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
387   LocToVal[&Loc] = &Val;
388 
389   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
390     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
391 
392     const QualType Type = AggregateLoc.getType();
393     assert(Type->isStructureOrClassType());
394 
395     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
396       assert(Field != nullptr);
397       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
398       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
399       if (auto *FieldVal = StructVal->getChild(*Field))
400         setValue(FieldLoc, *FieldVal);
401     }
402   }
403 
404   auto IT = MemberLocToStruct.find(&Loc);
405   if (IT != MemberLocToStruct.end()) {
406     // `Loc` is the location of a struct member so we need to also update the
407     // value of the member in the corresponding `StructValue`.
408 
409     assert(IT->second.first != nullptr);
410     StructValue &StructVal = *IT->second.first;
411 
412     assert(IT->second.second != nullptr);
413     const ValueDecl &Member = *IT->second.second;
414 
415     StructVal.setChild(Member, Val);
416   }
417 }
418 
419 Value *Environment::getValue(const StorageLocation &Loc) const {
420   auto It = LocToVal.find(&Loc);
421   return It == LocToVal.end() ? nullptr : It->second;
422 }
423 
424 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
425   auto *Loc = getStorageLocation(D, SP);
426   if (Loc == nullptr)
427     return nullptr;
428   return getValue(*Loc);
429 }
430 
431 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
432   auto *Loc = getStorageLocation(E, SP);
433   if (Loc == nullptr)
434     return nullptr;
435   return getValue(*Loc);
436 }
437 
438 Value *Environment::createValue(QualType Type) {
439   llvm::DenseSet<QualType> Visited;
440   int CreatedValuesCount = 0;
441   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
442                                                 CreatedValuesCount);
443   if (CreatedValuesCount > MaxCompositeValueSize) {
444     llvm::errs() << "Attempting to initialize a huge value of type: " << Type
445                  << '\n';
446   }
447   return Val;
448 }
449 
450 Value *Environment::createValueUnlessSelfReferential(
451     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
452     int &CreatedValuesCount) {
453   assert(!Type.isNull());
454 
455   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
456   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
457       Depth > MaxCompositeValueDepth)
458     return nullptr;
459 
460   if (Type->isBooleanType()) {
461     CreatedValuesCount++;
462     return &makeAtomicBoolValue();
463   }
464 
465   if (Type->isIntegerType()) {
466     CreatedValuesCount++;
467     return &takeOwnership(std::make_unique<IntegerValue>());
468   }
469 
470   if (Type->isReferenceType()) {
471     CreatedValuesCount++;
472     QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType();
473     auto &PointeeLoc = createStorageLocation(PointeeType);
474 
475     if (!Visited.contains(PointeeType.getCanonicalType())) {
476       Visited.insert(PointeeType.getCanonicalType());
477       Value *PointeeVal = createValueUnlessSelfReferential(
478           PointeeType, Visited, Depth, CreatedValuesCount);
479       Visited.erase(PointeeType.getCanonicalType());
480 
481       if (PointeeVal != nullptr)
482         setValue(PointeeLoc, *PointeeVal);
483     }
484 
485     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
486   }
487 
488   if (Type->isPointerType()) {
489     CreatedValuesCount++;
490     QualType PointeeType = Type->castAs<PointerType>()->getPointeeType();
491     auto &PointeeLoc = createStorageLocation(PointeeType);
492 
493     if (!Visited.contains(PointeeType.getCanonicalType())) {
494       Visited.insert(PointeeType.getCanonicalType());
495       Value *PointeeVal = createValueUnlessSelfReferential(
496           PointeeType, Visited, Depth, CreatedValuesCount);
497       Visited.erase(PointeeType.getCanonicalType());
498 
499       if (PointeeVal != nullptr)
500         setValue(PointeeLoc, *PointeeVal);
501     }
502 
503     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
504   }
505 
506   if (Type->isStructureOrClassType()) {
507     CreatedValuesCount++;
508     // FIXME: Initialize only fields that are accessed in the context that is
509     // being analyzed.
510     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
511     for (const FieldDecl *Field : getAccessibleObjectFields(Type)) {
512       assert(Field != nullptr);
513 
514       QualType FieldType = Field->getType();
515       if (Visited.contains(FieldType.getCanonicalType()))
516         continue;
517 
518       Visited.insert(FieldType.getCanonicalType());
519       if (auto *FieldValue = createValueUnlessSelfReferential(
520               FieldType, Visited, Depth + 1, CreatedValuesCount))
521         FieldValues.insert({Field, FieldValue});
522       Visited.erase(FieldType.getCanonicalType());
523     }
524 
525     return &takeOwnership(
526         std::make_unique<StructValue>(std::move(FieldValues)));
527   }
528 
529   return nullptr;
530 }
531 
532 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
533   switch (SP) {
534   case SkipPast::None:
535     return Loc;
536   case SkipPast::Reference:
537     // References cannot be chained so we only need to skip past one level of
538     // indirection.
539     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
540       return Val->getPointeeLoc();
541     return Loc;
542   case SkipPast::ReferenceThenPointer:
543     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
544     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
545       return Val->getPointeeLoc();
546     return LocPastRef;
547   }
548   llvm_unreachable("bad SkipPast kind");
549 }
550 
551 const StorageLocation &Environment::skip(const StorageLocation &Loc,
552                                          SkipPast SP) const {
553   return skip(*const_cast<StorageLocation *>(&Loc), SP);
554 }
555 
556 void Environment::addToFlowCondition(BoolValue &Val) {
557   DACtx->addFlowConditionConstraint(*FlowConditionToken, Val);
558 }
559 
560 bool Environment::flowConditionImplies(BoolValue &Val) const {
561   return DACtx->flowConditionImplies(*FlowConditionToken, Val);
562 }
563 
564 } // namespace dataflow
565 } // namespace clang
566