xref: /llvm-project/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp (revision a157d839c52077001f234ce5c8b0cbc05fbb429c)
1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines an Environment class that is used by dataflow analyses
10 //  that run over Control-Flow Graphs (CFGs) to keep track of the state of the
11 //  program at given program points.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
16 #include "clang/AST/Decl.h"
17 #include "clang/AST/DeclCXX.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
20 #include "clang/Analysis/FlowSensitive/StorageLocation.h"
21 #include "clang/Analysis/FlowSensitive/Value.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/DenseSet.h"
24 #include "llvm/Support/ErrorHandling.h"
25 #include <cassert>
26 #include <memory>
27 #include <utility>
28 
29 namespace clang {
30 namespace dataflow {
31 
32 // FIXME: convert these to parameters of the analysis or environment. Current
33 // settings have been experimentaly validated, but only for a particular
34 // analysis.
35 static constexpr int MaxCompositeValueDepth = 3;
36 static constexpr int MaxCompositeValueSize = 1000;
37 
38 /// Returns a map consisting of key-value entries that are present in both maps.
39 template <typename K, typename V>
40 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
41                                         const llvm::DenseMap<K, V> &Map2) {
42   llvm::DenseMap<K, V> Result;
43   for (auto &Entry : Map1) {
44     auto It = Map2.find(Entry.first);
45     if (It != Map2.end() && Entry.second == It->second)
46       Result.insert({Entry.first, Entry.second});
47   }
48   return Result;
49 }
50 
51 /// Returns true if and only if `Val1` is equivalent to `Val2`.
52 static bool equivalentValues(QualType Type, Value *Val1,
53                              const Environment &Env1, Value *Val2,
54                              const Environment &Env2,
55                              Environment::ValueModel &Model) {
56   if (Val1 == Val2)
57     return true;
58 
59   if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) {
60     auto *IndVal2 = cast<IndirectionValue>(Val2);
61     assert(IndVal1->getKind() == IndVal2->getKind());
62     return &IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc();
63   }
64 
65   return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2);
66 }
67 
68 /// Initializes a global storage value.
69 static void initGlobalVar(const VarDecl &D, Environment &Env) {
70   if (!D.hasGlobalStorage() ||
71       Env.getStorageLocation(D, SkipPast::None) != nullptr)
72     return;
73 
74   auto &Loc = Env.createStorageLocation(D);
75   Env.setStorageLocation(D, Loc);
76   if (auto *Val = Env.createValue(D.getType()))
77     Env.setValue(Loc, *Val);
78 }
79 
80 /// Initializes a global storage value.
81 static void initGlobalVar(const Decl &D, Environment &Env) {
82   if (auto *V = dyn_cast<VarDecl>(&D))
83     initGlobalVar(*V, Env);
84 }
85 
86 /// Initializes global storage values that are declared or referenced from
87 /// sub-statements of `S`.
88 // FIXME: Add support for resetting globals after function calls to enable
89 // the implementation of sound analyses.
90 static void initGlobalVars(const Stmt &S, Environment &Env) {
91   for (auto *Child : S.children()) {
92     if (Child != nullptr)
93       initGlobalVars(*Child, Env);
94   }
95 
96   if (auto *DS = dyn_cast<DeclStmt>(&S)) {
97     if (DS->isSingleDecl()) {
98       initGlobalVar(*DS->getSingleDecl(), Env);
99     } else {
100       for (auto *D : DS->getDeclGroup())
101         initGlobalVar(*D, Env);
102     }
103   } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) {
104     initGlobalVar(*E->getDecl(), Env);
105   } else if (auto *E = dyn_cast<MemberExpr>(&S)) {
106     initGlobalVar(*E->getMemberDecl(), Env);
107   }
108 }
109 
110 /// Returns constraints that represent the disjunction of `Constraints1` and
111 /// `Constraints2`.
112 ///
113 /// Requirements:
114 ///
115 ///  The elements of `Constraints1` and `Constraints2` must not be null.
116 llvm::DenseSet<BoolValue *>
117 joinConstraints(DataflowAnalysisContext *Context,
118                 const llvm::DenseSet<BoolValue *> &Constraints1,
119                 const llvm::DenseSet<BoolValue *> &Constraints2) {
120   // `(X ^ Y) v (X ^ Z)` is logically equivalent to `X ^ (Y v Z)`. Therefore, to
121   // avoid unnecessarily expanding the resulting set of constraints, we will add
122   // all common constraints of `Constraints1` and `Constraints2` directly and
123   // add a disjunction of the constraints that are not common.
124 
125   llvm::DenseSet<BoolValue *> JoinedConstraints;
126 
127   if (Constraints1.empty() || Constraints2.empty()) {
128     // Disjunction of empty set and non-empty set is represented as empty set.
129     return JoinedConstraints;
130   }
131 
132   BoolValue *Val1 = nullptr;
133   for (BoolValue *Constraint : Constraints1) {
134     if (Constraints2.contains(Constraint)) {
135       // Add common constraints directly to `JoinedConstraints`.
136       JoinedConstraints.insert(Constraint);
137     } else if (Val1 == nullptr) {
138       Val1 = Constraint;
139     } else {
140       Val1 = &Context->getOrCreateConjunctionValue(*Val1, *Constraint);
141     }
142   }
143 
144   BoolValue *Val2 = nullptr;
145   for (BoolValue *Constraint : Constraints2) {
146     // Common constraints are added to `JoinedConstraints` above.
147     if (Constraints1.contains(Constraint)) {
148       continue;
149     }
150     if (Val2 == nullptr) {
151       Val2 = Constraint;
152     } else {
153       Val2 = &Context->getOrCreateConjunctionValue(*Val2, *Constraint);
154     }
155   }
156 
157   // An empty set of constraints (represented as a null value) is interpreted as
158   // `true` and `true v X` is logically equivalent to `true` so we need to add a
159   // constraint only if both `Val1` and `Val2` are not null.
160   if (Val1 != nullptr && Val2 != nullptr)
161     JoinedConstraints.insert(
162         &Context->getOrCreateDisjunctionValue(*Val1, *Val2));
163 
164   return JoinedConstraints;
165 }
166 
167 Environment::Environment(DataflowAnalysisContext &DACtx,
168                          const DeclContext &DeclCtx)
169     : Environment(DACtx) {
170   if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) {
171     assert(FuncDecl->getBody() != nullptr);
172     initGlobalVars(*FuncDecl->getBody(), *this);
173     for (const auto *ParamDecl : FuncDecl->parameters()) {
174       assert(ParamDecl != nullptr);
175       auto &ParamLoc = createStorageLocation(*ParamDecl);
176       setStorageLocation(*ParamDecl, ParamLoc);
177       if (Value *ParamVal = createValue(ParamDecl->getType()))
178         setValue(ParamLoc, *ParamVal);
179     }
180   }
181 
182   if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) {
183     if (!MethodDecl->isStatic()) {
184       QualType ThisPointeeType = MethodDecl->getThisObjectType();
185       // FIXME: Add support for union types.
186       if (!ThisPointeeType->isUnionType()) {
187         auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType);
188         DACtx.setThisPointeeStorageLocation(ThisPointeeLoc);
189         if (Value *ThisPointeeVal = createValue(ThisPointeeType))
190           setValue(ThisPointeeLoc, *ThisPointeeVal);
191       }
192     }
193   }
194 }
195 
196 bool Environment::equivalentTo(const Environment &Other,
197                                Environment::ValueModel &Model) const {
198   assert(DACtx == Other.DACtx);
199 
200   if (DeclToLoc != Other.DeclToLoc)
201     return false;
202 
203   if (ExprToLoc != Other.ExprToLoc)
204     return false;
205 
206   if (MemberLocToStruct != Other.MemberLocToStruct)
207     return false;
208 
209   if (LocToVal.size() != Other.LocToVal.size())
210     return false;
211 
212   for (auto &Entry : LocToVal) {
213     const StorageLocation *Loc = Entry.first;
214     assert(Loc != nullptr);
215 
216     Value *Val = Entry.second;
217     assert(Val != nullptr);
218 
219     auto It = Other.LocToVal.find(Loc);
220     if (It == Other.LocToVal.end())
221       return false;
222     assert(It->second != nullptr);
223 
224     if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model))
225       return false;
226   }
227 
228   return true;
229 }
230 
231 LatticeJoinEffect Environment::join(const Environment &Other,
232                                     Environment::ValueModel &Model) {
233   assert(DACtx == Other.DACtx);
234 
235   auto Effect = LatticeJoinEffect::Unchanged;
236 
237   const unsigned DeclToLocSizeBefore = DeclToLoc.size();
238   DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc);
239   if (DeclToLocSizeBefore != DeclToLoc.size())
240     Effect = LatticeJoinEffect::Changed;
241 
242   const unsigned ExprToLocSizeBefore = ExprToLoc.size();
243   ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc);
244   if (ExprToLocSizeBefore != ExprToLoc.size())
245     Effect = LatticeJoinEffect::Changed;
246 
247   const unsigned MemberLocToStructSizeBefore = MemberLocToStruct.size();
248   MemberLocToStruct =
249       intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct);
250   if (MemberLocToStructSizeBefore != MemberLocToStruct.size())
251     Effect = LatticeJoinEffect::Changed;
252 
253   // Move `LocToVal` so that `Environment::ValueModel::merge` can safely assign
254   // values to storage locations while this code iterates over the current
255   // assignments.
256   llvm::DenseMap<const StorageLocation *, Value *> OldLocToVal =
257       std::move(LocToVal);
258   for (auto &Entry : OldLocToVal) {
259     const StorageLocation *Loc = Entry.first;
260     assert(Loc != nullptr);
261 
262     Value *Val = Entry.second;
263     assert(Val != nullptr);
264 
265     auto It = Other.LocToVal.find(Loc);
266     if (It == Other.LocToVal.end())
267       continue;
268     assert(It->second != nullptr);
269 
270     if (equivalentValues(Loc->getType(), Val, *this, It->second, Other,
271                          Model)) {
272       LocToVal.insert({Loc, Val});
273       continue;
274     }
275 
276     // FIXME: Consider destroying `MergedValue` immediately if
277     // `ValueModel::merge` returns false to avoid storing unneeded values in
278     // `DACtx`.
279     if (Value *MergedVal = createValue(Loc->getType()))
280       if (Model.merge(Loc->getType(), *Val, *this, *It->second, Other,
281                       *MergedVal, *this))
282         LocToVal.insert({Loc, MergedVal});
283   }
284   if (OldLocToVal.size() != LocToVal.size())
285     Effect = LatticeJoinEffect::Changed;
286 
287   FlowConditionConstraints = joinConstraints(DACtx, FlowConditionConstraints,
288                                              Other.FlowConditionConstraints);
289 
290   return Effect;
291 }
292 
293 StorageLocation &Environment::createStorageLocation(QualType Type) {
294   assert(!Type.isNull());
295   if (Type->isStructureOrClassType() || Type->isUnionType()) {
296     // FIXME: Explore options to avoid eager initialization of fields as some of
297     // them might not be needed for a particular analysis.
298     llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs;
299     for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
300       FieldLocs.insert({Field, &createStorageLocation(Field->getType())});
301     }
302     return takeOwnership(
303         std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs)));
304   }
305   return takeOwnership(std::make_unique<ScalarStorageLocation>(Type));
306 }
307 
308 StorageLocation &Environment::createStorageLocation(const VarDecl &D) {
309   // Evaluated declarations are always assigned the same storage locations to
310   // ensure that the environment stabilizes across loop iterations. Storage
311   // locations for evaluated declarations are stored in the analysis context.
312   if (auto *Loc = DACtx->getStorageLocation(D))
313     return *Loc;
314   auto &Loc = createStorageLocation(D.getType());
315   DACtx->setStorageLocation(D, Loc);
316   return Loc;
317 }
318 
319 StorageLocation &Environment::createStorageLocation(const Expr &E) {
320   // Evaluated expressions are always assigned the same storage locations to
321   // ensure that the environment stabilizes across loop iterations. Storage
322   // locations for evaluated expressions are stored in the analysis context.
323   if (auto *Loc = DACtx->getStorageLocation(E))
324     return *Loc;
325   auto &Loc = createStorageLocation(E.getType());
326   DACtx->setStorageLocation(E, Loc);
327   return Loc;
328 }
329 
330 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) {
331   assert(DeclToLoc.find(&D) == DeclToLoc.end());
332   DeclToLoc[&D] = &Loc;
333 }
334 
335 StorageLocation *Environment::getStorageLocation(const ValueDecl &D,
336                                                  SkipPast SP) const {
337   auto It = DeclToLoc.find(&D);
338   return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP);
339 }
340 
341 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) {
342   assert(ExprToLoc.find(&E) == ExprToLoc.end());
343   ExprToLoc[&E] = &Loc;
344 }
345 
346 StorageLocation *Environment::getStorageLocation(const Expr &E,
347                                                  SkipPast SP) const {
348   auto It = ExprToLoc.find(&E);
349   return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP);
350 }
351 
352 StorageLocation *Environment::getThisPointeeStorageLocation() const {
353   return DACtx->getThisPointeeStorageLocation();
354 }
355 
356 void Environment::setValue(const StorageLocation &Loc, Value &Val) {
357   LocToVal[&Loc] = &Val;
358 
359   if (auto *StructVal = dyn_cast<StructValue>(&Val)) {
360     auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc);
361 
362     const QualType Type = AggregateLoc.getType();
363     assert(Type->isStructureOrClassType());
364 
365     for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
366       assert(Field != nullptr);
367       StorageLocation &FieldLoc = AggregateLoc.getChild(*Field);
368       MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field);
369       if (auto *FieldVal = StructVal->getChild(*Field))
370         setValue(FieldLoc, *FieldVal);
371     }
372   }
373 
374   auto IT = MemberLocToStruct.find(&Loc);
375   if (IT != MemberLocToStruct.end()) {
376     // `Loc` is the location of a struct member so we need to also update the
377     // value of the member in the corresponding `StructValue`.
378 
379     assert(IT->second.first != nullptr);
380     StructValue &StructVal = *IT->second.first;
381 
382     assert(IT->second.second != nullptr);
383     const ValueDecl &Member = *IT->second.second;
384 
385     StructVal.setChild(Member, Val);
386   }
387 }
388 
389 Value *Environment::getValue(const StorageLocation &Loc) const {
390   auto It = LocToVal.find(&Loc);
391   return It == LocToVal.end() ? nullptr : It->second;
392 }
393 
394 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const {
395   auto *Loc = getStorageLocation(D, SP);
396   if (Loc == nullptr)
397     return nullptr;
398   return getValue(*Loc);
399 }
400 
401 Value *Environment::getValue(const Expr &E, SkipPast SP) const {
402   auto *Loc = getStorageLocation(E, SP);
403   if (Loc == nullptr)
404     return nullptr;
405   return getValue(*Loc);
406 }
407 
408 Value *Environment::createValue(QualType Type) {
409   llvm::DenseSet<QualType> Visited;
410   int CreatedValuesCount = 0;
411   Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0,
412                                                 CreatedValuesCount);
413   if (CreatedValuesCount > MaxCompositeValueSize) {
414     llvm::errs() << "Attempting to initialize a huge value of type: "
415                  << Type.getAsString() << "\n";
416   }
417   return Val;
418 }
419 
420 Value *Environment::createValueUnlessSelfReferential(
421     QualType Type, llvm::DenseSet<QualType> &Visited, int Depth,
422     int &CreatedValuesCount) {
423   assert(!Type.isNull());
424 
425   // Allow unlimited fields at depth 1; only cap at deeper nesting levels.
426   if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) ||
427       Depth > MaxCompositeValueDepth)
428     return nullptr;
429 
430   if (Type->isBooleanType()) {
431     CreatedValuesCount++;
432     return &makeAtomicBoolValue();
433   }
434 
435   if (Type->isIntegerType()) {
436     CreatedValuesCount++;
437     return &takeOwnership(std::make_unique<IntegerValue>());
438   }
439 
440   if (Type->isReferenceType()) {
441     CreatedValuesCount++;
442     QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType();
443     auto &PointeeLoc = createStorageLocation(PointeeType);
444 
445     if (!Visited.contains(PointeeType.getCanonicalType())) {
446       Visited.insert(PointeeType.getCanonicalType());
447       Value *PointeeVal = createValueUnlessSelfReferential(
448           PointeeType, Visited, Depth, CreatedValuesCount);
449       Visited.erase(PointeeType.getCanonicalType());
450 
451       if (PointeeVal != nullptr)
452         setValue(PointeeLoc, *PointeeVal);
453     }
454 
455     return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc));
456   }
457 
458   if (Type->isPointerType()) {
459     CreatedValuesCount++;
460     QualType PointeeType = Type->castAs<PointerType>()->getPointeeType();
461     auto &PointeeLoc = createStorageLocation(PointeeType);
462 
463     if (!Visited.contains(PointeeType.getCanonicalType())) {
464       Visited.insert(PointeeType.getCanonicalType());
465       Value *PointeeVal = createValueUnlessSelfReferential(
466           PointeeType, Visited, Depth, CreatedValuesCount);
467       Visited.erase(PointeeType.getCanonicalType());
468 
469       if (PointeeVal != nullptr)
470         setValue(PointeeLoc, *PointeeVal);
471     }
472 
473     return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc));
474   }
475 
476   if (Type->isStructureOrClassType()) {
477     CreatedValuesCount++;
478     // FIXME: Initialize only fields that are accessed in the context that is
479     // being analyzed.
480     llvm::DenseMap<const ValueDecl *, Value *> FieldValues;
481     for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) {
482       assert(Field != nullptr);
483 
484       QualType FieldType = Field->getType();
485       if (Visited.contains(FieldType.getCanonicalType()))
486         continue;
487 
488       Visited.insert(FieldType.getCanonicalType());
489       if (auto *FieldValue = createValueUnlessSelfReferential(
490               FieldType, Visited, Depth + 1, CreatedValuesCount))
491         FieldValues.insert({Field, FieldValue});
492       Visited.erase(FieldType.getCanonicalType());
493     }
494 
495     return &takeOwnership(
496         std::make_unique<StructValue>(std::move(FieldValues)));
497   }
498 
499   return nullptr;
500 }
501 
502 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const {
503   switch (SP) {
504   case SkipPast::None:
505     return Loc;
506   case SkipPast::Reference:
507     // References cannot be chained so we only need to skip past one level of
508     // indirection.
509     if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc)))
510       return Val->getPointeeLoc();
511     return Loc;
512   case SkipPast::ReferenceThenPointer:
513     StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference);
514     if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef)))
515       return Val->getPointeeLoc();
516     return LocPastRef;
517   }
518   llvm_unreachable("bad SkipPast kind");
519 }
520 
521 const StorageLocation &Environment::skip(const StorageLocation &Loc,
522                                          SkipPast SP) const {
523   return skip(*const_cast<StorageLocation *>(&Loc), SP);
524 }
525 
526 void Environment::addToFlowCondition(BoolValue &Val) {
527   FlowConditionConstraints.insert(&Val);
528 }
529 
530 bool Environment::flowConditionImplies(BoolValue &Val) const {
531   // Returns true if and only if truth assignment of the flow condition implies
532   // that `Val` is also true. We prove whether or not this property holds by
533   // reducing the problem to satisfiability checking. In other words, we attempt
534   // to show that assuming `Val` is false makes the constraints induced by the
535   // flow condition unsatisfiable.
536   llvm::DenseSet<BoolValue *> Constraints = {
537       &makeNot(Val), &getBoolLiteralValue(true),
538       &makeNot(getBoolLiteralValue(false))};
539   Constraints.insert(FlowConditionConstraints.begin(),
540                      FlowConditionConstraints.end());
541   return DACtx->getSolver().solve(std::move(Constraints)) ==
542          Solver::Result::Unsatisfiable;
543 }
544 
545 } // namespace dataflow
546 } // namespace clang
547