1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include <cassert> 27 #include <utility> 28 29 namespace clang { 30 namespace dataflow { 31 32 // FIXME: convert these to parameters of the analysis or environment. Current 33 // settings have been experimentaly validated, but only for a particular 34 // analysis. 35 static constexpr int MaxCompositeValueDepth = 3; 36 static constexpr int MaxCompositeValueSize = 1000; 37 38 /// Returns a map consisting of key-value entries that are present in both maps. 39 template <typename K, typename V> 40 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 41 const llvm::DenseMap<K, V> &Map2) { 42 llvm::DenseMap<K, V> Result; 43 for (auto &Entry : Map1) { 44 auto It = Map2.find(Entry.first); 45 if (It != Map2.end() && Entry.second == It->second) 46 Result.insert({Entry.first, Entry.second}); 47 } 48 return Result; 49 } 50 51 // Whether to consider equivalent two values with an unknown relation. 52 // 53 // FIXME: this function is a hack enabling unsoundness to support 54 // convergence. Once we have widening support for the reference/pointer and 55 // struct built-in models, this should be unconditionally `false` (and inlined 56 // as such at its call sites). 57 static bool equateUnknownValues(Value::Kind K) { 58 switch (K) { 59 case Value::Kind::Integer: 60 case Value::Kind::Pointer: 61 case Value::Kind::Record: 62 return true; 63 default: 64 return false; 65 } 66 } 67 68 static bool compareDistinctValues(QualType Type, Value &Val1, 69 const Environment &Env1, Value &Val2, 70 const Environment &Env2, 71 Environment::ValueModel &Model) { 72 // Note: Potentially costly, but, for booleans, we could check whether both 73 // can be proven equivalent in their respective environments. 74 75 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 76 // and implement separate, join/widen specific handling for 77 // reference/pointers. 78 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 79 case ComparisonResult::Same: 80 return true; 81 case ComparisonResult::Different: 82 return false; 83 case ComparisonResult::Unknown: 84 return equateUnknownValues(Val1.getKind()); 85 } 86 llvm_unreachable("All cases covered in switch"); 87 } 88 89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 90 /// respectively, of the same type `Type`. Merging generally produces a single 91 /// value that (soundly) approximates the two inputs, although the actual 92 /// meaning depends on `Model`. 93 static Value *mergeDistinctValues(QualType Type, Value &Val1, 94 const Environment &Env1, Value &Val2, 95 const Environment &Env2, 96 Environment &MergedEnv, 97 Environment::ValueModel &Model) { 98 // Join distinct boolean values preserving information about the constraints 99 // in the respective path conditions. 100 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 101 // FIXME: Checking both values should be unnecessary, since they should have 102 // a consistent shape. However, right now we can end up with BoolValue's in 103 // integer-typed variables due to our incorrect handling of 104 // boolean-to-integer casts (we just propagate the BoolValue to the result 105 // of the cast). So, a join can encounter an integer in one branch but a 106 // bool in the other. 107 // For example: 108 // ``` 109 // std::optional<bool> o; 110 // int x; 111 // if (o.has_value()) 112 // x = o.value(); 113 // ``` 114 auto &Expr1 = cast<BoolValue>(Val1).formula(); 115 auto &Expr2 = cast<BoolValue>(Val2).formula(); 116 auto &A = MergedEnv.arena(); 117 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 118 MergedEnv.addToFlowCondition( 119 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 120 A.makeEquals(MergedVal, Expr1)), 121 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 122 A.makeEquals(MergedVal, Expr2)))); 123 return &A.makeBoolValue(MergedVal); 124 } 125 126 Value *MergedVal = nullptr; 127 if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) { 128 [[maybe_unused]] auto *RecordVal2 = cast<RecordValue>(&Val2); 129 130 // Values to be merged are always associated with the same location in 131 // `LocToVal`. The location stored in `RecordVal` should therefore also 132 // be the same. 133 assert(&RecordVal1->getLoc() == &RecordVal2->getLoc()); 134 135 // `RecordVal1` and `RecordVal2` may have different properties associated 136 // with them. Create a new `RecordValue` without any properties so that we 137 // soundly approximate both values. If a particular analysis needs to merge 138 // properties, it should do so in `DataflowAnalysis::merge()`. 139 MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc()); 140 } else { 141 MergedVal = MergedEnv.createValue(Type); 142 } 143 144 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 145 // returns false to avoid storing unneeded values in `DACtx`. 146 if (MergedVal) 147 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 148 return MergedVal; 149 150 return nullptr; 151 } 152 153 // When widening does not change `Current`, return value will equal `&Prev`. 154 static Value &widenDistinctValues(QualType Type, Value &Prev, 155 const Environment &PrevEnv, Value &Current, 156 Environment &CurrentEnv, 157 Environment::ValueModel &Model) { 158 // Boolean-model widening. 159 if (isa<BoolValue>(&Prev)) { 160 assert(isa<BoolValue>(Current)); 161 // Widen to Top, because we know they are different values. If previous was 162 // already Top, re-use that to (implicitly) indicate that no change occured. 163 if (isa<TopBoolValue>(Prev)) 164 return Prev; 165 return CurrentEnv.makeTopBoolValue(); 166 } 167 168 // FIXME: Add other built-in model widening. 169 170 // Custom-model widening. 171 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 172 return *W; 173 174 return equateUnknownValues(Prev.getKind()) ? Prev : Current; 175 } 176 177 // Returns whether the values in `Map1` and `Map2` compare equal for those 178 // keys that `Map1` and `Map2` have in common. 179 template <typename Key> 180 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, 181 const llvm::MapVector<Key, Value *> &Map2, 182 const Environment &Env1, const Environment &Env2, 183 Environment::ValueModel &Model) { 184 for (auto &Entry : Map1) { 185 Key K = Entry.first; 186 assert(K != nullptr); 187 188 Value *Val = Entry.second; 189 assert(Val != nullptr); 190 191 auto It = Map2.find(K); 192 if (It == Map2.end()) 193 continue; 194 assert(It->second != nullptr); 195 196 if (!areEquivalentValues(*Val, *It->second) && 197 !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, 198 Model)) 199 return false; 200 } 201 202 return true; 203 } 204 205 // Perform a join on either `LocToVal` or `ExprToVal`. `Key` must be either 206 // `const StorageLocation *` or `const Expr *`. 207 template <typename Key> 208 llvm::MapVector<Key, Value *> 209 joinKeyToValueMap(const llvm::MapVector<Key, Value *> &Map1, 210 const llvm::MapVector<Key, Value *> &Map2, 211 const Environment &Env1, const Environment &Env2, 212 Environment &JoinedEnv, Environment::ValueModel &Model) { 213 llvm::MapVector<Key, Value *> MergedMap; 214 for (auto &Entry : Map1) { 215 Key K = Entry.first; 216 assert(K != nullptr); 217 218 Value *Val = Entry.second; 219 assert(Val != nullptr); 220 221 auto It = Map2.find(K); 222 if (It == Map2.end()) 223 continue; 224 assert(It->second != nullptr); 225 226 if (areEquivalentValues(*Val, *It->second)) { 227 MergedMap.insert({K, Val}); 228 continue; 229 } 230 231 if (Value *MergedVal = mergeDistinctValues( 232 K->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) { 233 MergedMap.insert({K, MergedVal}); 234 } 235 } 236 237 return MergedMap; 238 } 239 240 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either 241 // `const StorageLocation *` or `const Expr *`. 242 template <typename Key> 243 llvm::MapVector<Key, Value *> 244 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, 245 const llvm::MapVector<Key, Value *> &PrevMap, 246 Environment &CurEnv, const Environment &PrevEnv, 247 Environment::ValueModel &Model, LatticeJoinEffect &Effect) { 248 llvm::MapVector<Key, Value *> WidenedMap; 249 for (auto &Entry : CurMap) { 250 Key K = Entry.first; 251 assert(K != nullptr); 252 253 Value *Val = Entry.second; 254 assert(Val != nullptr); 255 256 auto PrevIt = PrevMap.find(K); 257 if (PrevIt == PrevMap.end()) 258 continue; 259 assert(PrevIt->second != nullptr); 260 261 if (areEquivalentValues(*Val, *PrevIt->second)) { 262 WidenedMap.insert({K, Val}); 263 continue; 264 } 265 266 Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second, 267 PrevEnv, *Val, CurEnv, Model); 268 WidenedMap.insert({K, &WidenedVal}); 269 if (&WidenedVal != PrevIt->second) 270 Effect = LatticeJoinEffect::Changed; 271 } 272 273 return WidenedMap; 274 } 275 276 /// Initializes a global storage value. 277 static void insertIfGlobal(const Decl &D, 278 llvm::DenseSet<const VarDecl *> &Vars) { 279 if (auto *V = dyn_cast<VarDecl>(&D)) 280 if (V->hasGlobalStorage()) 281 Vars.insert(V); 282 } 283 284 static void insertIfFunction(const Decl &D, 285 llvm::DenseSet<const FunctionDecl *> &Funcs) { 286 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 287 Funcs.insert(FD); 288 } 289 290 static void 291 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 292 llvm::DenseSet<const VarDecl *> &Vars, 293 llvm::DenseSet<const FunctionDecl *> &Funcs) { 294 insertIfGlobal(D, Vars); 295 insertIfFunction(D, Funcs); 296 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 297 for (const auto *B : Decomp->bindings()) 298 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 299 // FIXME: should we be using `E->getFoundDecl()`? 300 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 301 Fields.insert(FD); 302 } 303 304 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 305 /// global variables and functions that are declared in or referenced from 306 /// sub-statements. 307 static void 308 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 309 llvm::DenseSet<const VarDecl *> &Vars, 310 llvm::DenseSet<const FunctionDecl *> &Funcs) { 311 for (auto *Child : S.children()) 312 if (Child != nullptr) 313 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 314 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 315 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 316 317 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 318 if (DS->isSingleDecl()) 319 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 320 else 321 for (auto *D : DS->getDeclGroup()) 322 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 323 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 324 insertIfGlobal(*E->getDecl(), Vars); 325 insertIfFunction(*E->getDecl(), Funcs); 326 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 327 // FIXME: should we be using `E->getFoundDecl()`? 328 const ValueDecl *VD = E->getMemberDecl(); 329 insertIfGlobal(*VD, Vars); 330 insertIfFunction(*VD, Funcs); 331 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 332 Fields.insert(FD); 333 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 334 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 335 for (const auto *FD : getFieldsForInitListExpr(RD)) 336 Fields.insert(FD); 337 } 338 } 339 340 // FIXME: Add support for resetting globals after function calls to enable 341 // the implementation of sound analyses. 342 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 343 assert(FuncDecl->getBody() != nullptr); 344 345 FieldSet Fields; 346 llvm::DenseSet<const VarDecl *> Vars; 347 llvm::DenseSet<const FunctionDecl *> Funcs; 348 349 // Look for global variable and field references in the 350 // constructor-initializers. 351 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 352 for (const auto *Init : CtorDecl->inits()) { 353 if (Init->isMemberInitializer()) { 354 Fields.insert(Init->getMember()); 355 } else if (Init->isIndirectMemberInitializer()) { 356 for (const auto *I : Init->getIndirectMember()->chain()) 357 Fields.insert(cast<FieldDecl>(I)); 358 } 359 const Expr *E = Init->getInit(); 360 assert(E != nullptr); 361 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 362 } 363 // Add all fields mentioned in default member initializers. 364 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 365 if (const auto *I = F->getInClassInitializer()) 366 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 367 } 368 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 369 370 // These have to be added before the lines that follow to ensure that 371 // `create*` work correctly for structs. 372 DACtx->addModeledFields(Fields); 373 374 for (const VarDecl *D : Vars) { 375 if (getStorageLocation(*D) != nullptr) 376 continue; 377 378 setStorageLocation(*D, createObject(*D)); 379 } 380 381 for (const FunctionDecl *FD : Funcs) { 382 if (getStorageLocation(*FD) != nullptr) 383 continue; 384 auto &Loc = createStorageLocation(FD->getType()); 385 setStorageLocation(*FD, Loc); 386 } 387 } 388 389 Environment::Environment(DataflowAnalysisContext &DACtx) 390 : DACtx(&DACtx), 391 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 392 393 Environment Environment::fork() const { 394 Environment Copy(*this); 395 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 396 return Copy; 397 } 398 399 Environment::Environment(DataflowAnalysisContext &DACtx, 400 const DeclContext &DeclCtx) 401 : Environment(DACtx) { 402 CallStack.push_back(&DeclCtx); 403 404 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 405 assert(FuncDecl->getBody() != nullptr); 406 407 initFieldsGlobalsAndFuncs(FuncDecl); 408 409 for (const auto *ParamDecl : FuncDecl->parameters()) { 410 assert(ParamDecl != nullptr); 411 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 412 } 413 } 414 415 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 416 auto *Parent = MethodDecl->getParent(); 417 assert(Parent != nullptr); 418 if (Parent->isLambda()) 419 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 420 421 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 422 if (MethodDecl && !MethodDecl->isStatic()) { 423 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 424 ThisPointeeLoc = 425 &cast<RecordValue>(createValue(ThisPointeeType))->getLoc(); 426 } 427 } 428 } 429 430 bool Environment::canDescend(unsigned MaxDepth, 431 const DeclContext *Callee) const { 432 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 433 } 434 435 Environment Environment::pushCall(const CallExpr *Call) const { 436 Environment Env(*this); 437 438 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 439 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 440 if (!isa<CXXThisExpr>(Arg)) 441 Env.ThisPointeeLoc = 442 cast<RecordStorageLocation>(getStorageLocation(*Arg)); 443 // Otherwise (when the argument is `this`), retain the current 444 // environment's `ThisPointeeLoc`. 445 } 446 } 447 448 Env.pushCallInternal(Call->getDirectCallee(), 449 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 450 451 return Env; 452 } 453 454 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 455 Environment Env(*this); 456 457 Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); 458 459 Env.pushCallInternal(Call->getConstructor(), 460 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 461 462 return Env; 463 } 464 465 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 466 ArrayRef<const Expr *> Args) { 467 // Canonicalize to the definition of the function. This ensures that we're 468 // putting arguments into the same `ParamVarDecl`s` that the callee will later 469 // be retrieving them from. 470 assert(FuncDecl->getDefinition() != nullptr); 471 FuncDecl = FuncDecl->getDefinition(); 472 473 CallStack.push_back(FuncDecl); 474 475 initFieldsGlobalsAndFuncs(FuncDecl); 476 477 const auto *ParamIt = FuncDecl->param_begin(); 478 479 // FIXME: Parameters don't always map to arguments 1:1; examples include 480 // overloaded operators implemented as member functions, and parameter packs. 481 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 482 assert(ParamIt != FuncDecl->param_end()); 483 const VarDecl *Param = *ParamIt; 484 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 485 } 486 } 487 488 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 489 // We ignore some entries of `CalleeEnv`: 490 // - `DACtx` because is already the same in both 491 // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or 492 // `ThisPointeeLoc` because they don't apply to us. 493 // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the 494 // callee's local scope, so when popping that scope, we do not propagate 495 // the maps. 496 this->LocToVal = std::move(CalleeEnv.LocToVal); 497 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 498 499 if (Call->isGLValue()) { 500 if (CalleeEnv.ReturnLoc != nullptr) 501 setStorageLocation(*Call, *CalleeEnv.ReturnLoc); 502 } else if (!Call->getType()->isVoidType()) { 503 if (CalleeEnv.ReturnVal != nullptr) 504 setValue(*Call, *CalleeEnv.ReturnVal); 505 } 506 } 507 508 void Environment::popCall(const CXXConstructExpr *Call, 509 const Environment &CalleeEnv) { 510 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 511 this->LocToVal = std::move(CalleeEnv.LocToVal); 512 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 513 514 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 515 setValue(*Call, *Val); 516 } 517 } 518 519 bool Environment::equivalentTo(const Environment &Other, 520 Environment::ValueModel &Model) const { 521 assert(DACtx == Other.DACtx); 522 523 if (ReturnVal != Other.ReturnVal) 524 return false; 525 526 if (ReturnLoc != Other.ReturnLoc) 527 return false; 528 529 if (ThisPointeeLoc != Other.ThisPointeeLoc) 530 return false; 531 532 if (DeclToLoc != Other.DeclToLoc) 533 return false; 534 535 if (ExprToLoc != Other.ExprToLoc) 536 return false; 537 538 if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) 539 return false; 540 541 if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) 542 return false; 543 544 return true; 545 } 546 547 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 548 Environment::ValueModel &Model) { 549 assert(DACtx == PrevEnv.DACtx); 550 assert(ReturnVal == PrevEnv.ReturnVal); 551 assert(ReturnLoc == PrevEnv.ReturnLoc); 552 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 553 assert(CallStack == PrevEnv.CallStack); 554 555 auto Effect = LatticeJoinEffect::Unchanged; 556 557 // By the API, `PrevEnv` is a previous version of the environment for the same 558 // block, so we have some guarantees about its shape. In particular, it will 559 // be the result of a join or widen operation on previous values for this 560 // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that 561 // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain 562 // this property here, we don't need change their current values to widen. 563 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 564 assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); 565 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 566 567 ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, 568 Model, Effect); 569 570 LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, 571 Model, Effect); 572 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 573 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 574 ExprToVal.size() != PrevEnv.ExprToVal.size() || 575 LocToVal.size() != PrevEnv.LocToVal.size()) 576 Effect = LatticeJoinEffect::Changed; 577 578 return Effect; 579 } 580 581 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 582 Environment::ValueModel &Model) { 583 assert(EnvA.DACtx == EnvB.DACtx); 584 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 585 assert(EnvA.CallStack == EnvB.CallStack); 586 587 Environment JoinedEnv(*EnvA.DACtx); 588 589 JoinedEnv.CallStack = EnvA.CallStack; 590 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 591 592 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 593 // `ReturnVal` might not always get set -- for example if we have a return 594 // statement of the form `return some_other_func()` and we decide not to 595 // analyze `some_other_func()`. 596 // In this case, we can't say anything about the joined return value -- we 597 // don't simply want to propagate the return value that we do have, because 598 // it might not be the correct one. 599 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 600 JoinedEnv.ReturnVal = nullptr; 601 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 602 JoinedEnv.ReturnVal = EnvA.ReturnVal; 603 } else { 604 assert(!EnvA.CallStack.empty()); 605 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 606 // cast. 607 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 608 assert(Func != nullptr); 609 if (Value *MergedVal = 610 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 611 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 612 JoinedEnv.ReturnVal = MergedVal; 613 } 614 615 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 616 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 617 else 618 JoinedEnv.ReturnLoc = nullptr; 619 620 // FIXME: Once we're able to remove declarations from `DeclToLoc` when their 621 // lifetime ends, add an assertion that there aren't any entries in 622 // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to 623 // different storage locations. 624 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 625 626 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 627 628 // FIXME: update join to detect backedges and simplify the flow condition 629 // accordingly. 630 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 631 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 632 633 JoinedEnv.ExprToVal = joinKeyToValueMap(EnvA.ExprToVal, EnvB.ExprToVal, EnvA, 634 EnvB, JoinedEnv, Model); 635 636 JoinedEnv.LocToVal = joinKeyToValueMap(EnvA.LocToVal, EnvB.LocToVal, EnvA, 637 EnvB, JoinedEnv, Model); 638 639 return JoinedEnv; 640 } 641 642 StorageLocation &Environment::createStorageLocation(QualType Type) { 643 return DACtx->createStorageLocation(Type); 644 } 645 646 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 647 // Evaluated declarations are always assigned the same storage locations to 648 // ensure that the environment stabilizes across loop iterations. Storage 649 // locations for evaluated declarations are stored in the analysis context. 650 return DACtx->getStableStorageLocation(D); 651 } 652 653 StorageLocation &Environment::createStorageLocation(const Expr &E) { 654 // Evaluated expressions are always assigned the same storage locations to 655 // ensure that the environment stabilizes across loop iterations. Storage 656 // locations for evaluated expressions are stored in the analysis context. 657 return DACtx->getStableStorageLocation(E); 658 } 659 660 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 661 assert(!DeclToLoc.contains(&D)); 662 DeclToLoc[&D] = &Loc; 663 } 664 665 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 666 auto It = DeclToLoc.find(&D); 667 if (It == DeclToLoc.end()) 668 return nullptr; 669 670 StorageLocation *Loc = It->second; 671 672 return Loc; 673 } 674 675 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 676 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 677 // but we still want to be able to associate a `StorageLocation` with them, 678 // so allow these as an exception. 679 assert(E.isGLValue() || 680 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 681 setStorageLocationInternal(E, Loc); 682 } 683 684 StorageLocation *Environment::getStorageLocation(const Expr &E) const { 685 // See comment in `setStorageLocation()`. 686 assert(E.isGLValue() || 687 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 688 return getStorageLocationInternal(E); 689 } 690 691 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const { 692 return ThisPointeeLoc; 693 } 694 695 RecordStorageLocation & 696 Environment::getResultObjectLocation(const Expr &RecordPRValue) { 697 assert(RecordPRValue.getType()->isRecordType()); 698 assert(RecordPRValue.isPRValue()); 699 700 if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue)) 701 return *cast<RecordStorageLocation>(ExistingLoc); 702 auto &Loc = cast<RecordStorageLocation>( 703 DACtx->getStableStorageLocation(RecordPRValue)); 704 setStorageLocationInternal(RecordPRValue, Loc); 705 return Loc; 706 } 707 708 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 709 return DACtx->getOrCreateNullPointerValue(PointeeType); 710 } 711 712 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 713 assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc); 714 715 LocToVal[&Loc] = &Val; 716 } 717 718 void Environment::setValue(const Expr &E, Value &Val) { 719 assert(E.isPRValue()); 720 ExprToVal[&E] = &Val; 721 } 722 723 Value *Environment::getValue(const StorageLocation &Loc) const { 724 return LocToVal.lookup(&Loc); 725 } 726 727 Value *Environment::getValue(const ValueDecl &D) const { 728 auto *Loc = getStorageLocation(D); 729 if (Loc == nullptr) 730 return nullptr; 731 return getValue(*Loc); 732 } 733 734 Value *Environment::getValue(const Expr &E) const { 735 if (E.isPRValue()) { 736 auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); 737 return It == ExprToVal.end() ? nullptr : It->second; 738 } 739 740 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 741 if (It == ExprToLoc.end()) 742 return nullptr; 743 return getValue(*It->second); 744 } 745 746 Value *Environment::createValue(QualType Type) { 747 llvm::DenseSet<QualType> Visited; 748 int CreatedValuesCount = 0; 749 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 750 CreatedValuesCount); 751 if (CreatedValuesCount > MaxCompositeValueSize) { 752 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 753 << '\n'; 754 } 755 return Val; 756 } 757 758 void Environment::setStorageLocationInternal(const Expr &E, 759 StorageLocation &Loc) { 760 const Expr &CanonE = ignoreCFGOmittedNodes(E); 761 assert(!ExprToLoc.contains(&CanonE)); 762 ExprToLoc[&CanonE] = &Loc; 763 } 764 765 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const { 766 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 767 return It == ExprToLoc.end() ? nullptr : &*It->second; 768 } 769 770 Value *Environment::createValueUnlessSelfReferential( 771 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 772 int &CreatedValuesCount) { 773 assert(!Type.isNull()); 774 assert(!Type->isReferenceType()); 775 776 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 777 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 778 Depth > MaxCompositeValueDepth) 779 return nullptr; 780 781 if (Type->isBooleanType()) { 782 CreatedValuesCount++; 783 return &makeAtomicBoolValue(); 784 } 785 786 if (Type->isIntegerType()) { 787 // FIXME: consider instead `return nullptr`, given that we do nothing useful 788 // with integers, and so distinguishing them serves no purpose, but could 789 // prevent convergence. 790 CreatedValuesCount++; 791 return &arena().create<IntegerValue>(); 792 } 793 794 if (Type->isPointerType()) { 795 CreatedValuesCount++; 796 QualType PointeeType = Type->getPointeeType(); 797 StorageLocation &PointeeLoc = 798 createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); 799 800 return &arena().create<PointerValue>(PointeeLoc); 801 } 802 803 if (Type->isRecordType()) { 804 CreatedValuesCount++; 805 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 806 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 807 assert(Field != nullptr); 808 809 QualType FieldType = Field->getType(); 810 811 FieldLocs.insert( 812 {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, 813 CreatedValuesCount)}); 814 } 815 816 RecordStorageLocation &Loc = 817 arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 818 RecordValue &RecordVal = create<RecordValue>(Loc); 819 820 // As we already have a storage location for the `RecordValue`, we can and 821 // should associate them in the environment. 822 setValue(Loc, RecordVal); 823 824 return &RecordVal; 825 } 826 827 return nullptr; 828 } 829 830 StorageLocation & 831 Environment::createLocAndMaybeValue(QualType Ty, 832 llvm::DenseSet<QualType> &Visited, 833 int Depth, int &CreatedValuesCount) { 834 if (!Visited.insert(Ty.getCanonicalType()).second) 835 return createStorageLocation(Ty.getNonReferenceType()); 836 Value *Val = createValueUnlessSelfReferential( 837 Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); 838 Visited.erase(Ty.getCanonicalType()); 839 840 Ty = Ty.getNonReferenceType(); 841 842 if (Val == nullptr) 843 return createStorageLocation(Ty); 844 845 if (Ty->isRecordType()) 846 return cast<RecordValue>(Val)->getLoc(); 847 848 StorageLocation &Loc = createStorageLocation(Ty); 849 setValue(Loc, *Val); 850 return Loc; 851 } 852 853 StorageLocation &Environment::createObjectInternal(const VarDecl *D, 854 QualType Ty, 855 const Expr *InitExpr) { 856 if (Ty->isReferenceType()) { 857 // Although variables of reference type always need to be initialized, it 858 // can happen that we can't see the initializer, so `InitExpr` may still 859 // be null. 860 if (InitExpr) { 861 if (auto *InitExprLoc = getStorageLocation(*InitExpr)) 862 return *InitExprLoc; 863 } 864 865 // Even though we have an initializer, we might not get an 866 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 867 // don't have a function body. In this case, we just invent a storage 868 // location and value -- it's the best we can do. 869 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 870 } 871 872 Value *Val = nullptr; 873 if (InitExpr) 874 // In the (few) cases where an expression is intentionally 875 // "uninterpreted", `InitExpr` is not associated with a value. There are 876 // two ways to handle this situation: propagate the status, so that 877 // uninterpreted initializers result in uninterpreted variables, or 878 // provide a default value. We choose the latter so that later refinements 879 // of the variable can be used for reasoning about the surrounding code. 880 // For this reason, we let this case be handled by the `createValue()` 881 // call below. 882 // 883 // FIXME. If and when we interpret all language cases, change this to 884 // assert that `InitExpr` is interpreted, rather than supplying a 885 // default value (assuming we don't update the environment API to return 886 // references). 887 Val = getValue(*InitExpr); 888 if (!Val) 889 Val = createValue(Ty); 890 891 if (Ty->isRecordType()) 892 return cast<RecordValue>(Val)->getLoc(); 893 894 StorageLocation &Loc = 895 D ? createStorageLocation(*D) : createStorageLocation(Ty); 896 897 if (Val) 898 setValue(Loc, *Val); 899 900 return Loc; 901 } 902 903 void Environment::addToFlowCondition(const Formula &Val) { 904 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 905 } 906 907 bool Environment::flowConditionImplies(const Formula &Val) const { 908 return DACtx->flowConditionImplies(FlowConditionToken, Val); 909 } 910 911 void Environment::dump(raw_ostream &OS) const { 912 // FIXME: add printing for remaining fields and allow caller to decide what 913 // fields are printed. 914 OS << "DeclToLoc:\n"; 915 for (auto [D, L] : DeclToLoc) 916 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 917 918 OS << "ExprToLoc:\n"; 919 for (auto [E, L] : ExprToLoc) 920 OS << " [" << E << ", " << L << "]\n"; 921 922 OS << "ExprToVal:\n"; 923 for (auto [E, V] : ExprToVal) 924 OS << " [" << E << ", " << V << ": " << *V << "]\n"; 925 926 OS << "LocToVal:\n"; 927 for (auto [L, V] : LocToVal) { 928 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 929 } 930 931 OS << "FlowConditionToken:\n"; 932 DACtx->dumpFlowCondition(FlowConditionToken, OS); 933 } 934 935 void Environment::dump() const { 936 dump(llvm::dbgs()); 937 } 938 939 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 940 const Environment &Env) { 941 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 942 if (ImplicitObject == nullptr) 943 return nullptr; 944 if (ImplicitObject->getType()->isPointerType()) { 945 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject))) 946 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 947 return nullptr; 948 } 949 return cast_or_null<RecordStorageLocation>( 950 Env.getStorageLocation(*ImplicitObject)); 951 } 952 953 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 954 const Environment &Env) { 955 Expr *Base = ME.getBase(); 956 if (Base == nullptr) 957 return nullptr; 958 if (ME.isArrow()) { 959 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base))) 960 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 961 return nullptr; 962 } 963 return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base)); 964 } 965 966 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 967 // Unnamed bitfields are only used for padding and do not appear in 968 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 969 // field list, and we thus need to remove them before mapping inits to 970 // fields to avoid mapping inits to the wrongs fields. 971 std::vector<FieldDecl *> Fields; 972 llvm::copy_if( 973 RD->fields(), std::back_inserter(Fields), 974 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 975 return Fields; 976 } 977 978 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { 979 auto &NewVal = Env.create<RecordValue>(Loc); 980 Env.setValue(Loc, NewVal); 981 return NewVal; 982 } 983 984 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { 985 assert(Expr.getType()->isRecordType()); 986 987 if (Expr.isPRValue()) { 988 if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) { 989 auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc()); 990 Env.setValue(Expr, NewVal); 991 return NewVal; 992 } 993 994 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 995 Env.setValue(Expr, NewVal); 996 return NewVal; 997 } 998 999 if (auto *Loc = 1000 cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) { 1001 auto &NewVal = Env.create<RecordValue>(*Loc); 1002 Env.setValue(*Loc, NewVal); 1003 return NewVal; 1004 } 1005 1006 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1007 Env.setStorageLocation(Expr, NewVal.getLoc()); 1008 return NewVal; 1009 } 1010 1011 } // namespace dataflow 1012 } // namespace clang 1013