1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include <cassert> 28 #include <memory> 29 #include <utility> 30 31 namespace clang { 32 namespace dataflow { 33 34 // FIXME: convert these to parameters of the analysis or environment. Current 35 // settings have been experimentaly validated, but only for a particular 36 // analysis. 37 static constexpr int MaxCompositeValueDepth = 3; 38 static constexpr int MaxCompositeValueSize = 1000; 39 40 /// Returns a map consisting of key-value entries that are present in both maps. 41 template <typename K, typename V> 42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 43 const llvm::DenseMap<K, V> &Map2) { 44 llvm::DenseMap<K, V> Result; 45 for (auto &Entry : Map1) { 46 auto It = Map2.find(Entry.first); 47 if (It != Map2.end() && Entry.second == It->second) 48 Result.insert({Entry.first, Entry.second}); 49 } 50 return Result; 51 } 52 53 static bool compareDistinctValues(QualType Type, Value &Val1, 54 const Environment &Env1, Value &Val2, 55 const Environment &Env2, 56 Environment::ValueModel &Model) { 57 // Note: Potentially costly, but, for booleans, we could check whether both 58 // can be proven equivalent in their respective environments. 59 60 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 61 // and implement separate, join/widen specific handling for 62 // reference/pointers. 63 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 64 case ComparisonResult::Same: 65 return true; 66 case ComparisonResult::Different: 67 return false; 68 case ComparisonResult::Unknown: 69 switch (Val1.getKind()) { 70 case Value::Kind::Integer: 71 case Value::Kind::Pointer: 72 case Value::Kind::Record: 73 // FIXME: this choice intentionally introduces unsoundness to allow 74 // for convergence. Once we have widening support for the 75 // reference/pointer and struct built-in models, this should be 76 // `false`. 77 return true; 78 default: 79 return false; 80 } 81 } 82 llvm_unreachable("All cases covered in switch"); 83 } 84 85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 86 /// respectively, of the same type `Type`. Merging generally produces a single 87 /// value that (soundly) approximates the two inputs, although the actual 88 /// meaning depends on `Model`. 89 static Value *mergeDistinctValues(QualType Type, Value &Val1, 90 const Environment &Env1, Value &Val2, 91 const Environment &Env2, 92 Environment &MergedEnv, 93 Environment::ValueModel &Model) { 94 // Join distinct boolean values preserving information about the constraints 95 // in the respective path conditions. 96 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 97 // FIXME: Checking both values should be unnecessary, since they should have 98 // a consistent shape. However, right now we can end up with BoolValue's in 99 // integer-typed variables due to our incorrect handling of 100 // boolean-to-integer casts (we just propagate the BoolValue to the result 101 // of the cast). So, a join can encounter an integer in one branch but a 102 // bool in the other. 103 // For example: 104 // ``` 105 // std::optional<bool> o; 106 // int x; 107 // if (o.has_value()) 108 // x = o.value(); 109 // ``` 110 auto &Expr1 = cast<BoolValue>(Val1).formula(); 111 auto &Expr2 = cast<BoolValue>(Val2).formula(); 112 auto &A = MergedEnv.arena(); 113 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 114 MergedEnv.addToFlowCondition( 115 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 116 A.makeEquals(MergedVal, Expr1)), 117 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 118 A.makeEquals(MergedVal, Expr2)))); 119 return &A.makeBoolValue(MergedVal); 120 } 121 122 Value *MergedVal = nullptr; 123 if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) { 124 [[maybe_unused]] auto *RecordVal2 = cast<RecordValue>(&Val2); 125 126 // Values to be merged are always associated with the same location in 127 // `LocToVal`. The location stored in `RecordVal` should therefore also 128 // be the same. 129 assert(&RecordVal1->getLoc() == &RecordVal2->getLoc()); 130 131 // `RecordVal1` and `RecordVal2` may have different properties associated 132 // with them. Create a new `RecordValue` without any properties so that we 133 // soundly approximate both values. If a particular analysis needs to merge 134 // properties, it should do so in `DataflowAnalysis::merge()`. 135 MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc()); 136 } else { 137 MergedVal = MergedEnv.createValue(Type); 138 } 139 140 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 141 // returns false to avoid storing unneeded values in `DACtx`. 142 if (MergedVal) 143 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 144 return MergedVal; 145 146 return nullptr; 147 } 148 149 // When widening does not change `Current`, return value will equal `&Prev`. 150 static Value &widenDistinctValues(QualType Type, Value &Prev, 151 const Environment &PrevEnv, Value &Current, 152 Environment &CurrentEnv, 153 Environment::ValueModel &Model) { 154 // Boolean-model widening. 155 if (isa<BoolValue>(&Prev)) { 156 assert(isa<BoolValue>(Current)); 157 // Widen to Top, because we know they are different values. If previous was 158 // already Top, re-use that to (implicitly) indicate that no change occured. 159 if (isa<TopBoolValue>(Prev)) 160 return Prev; 161 return CurrentEnv.makeTopBoolValue(); 162 } 163 164 // FIXME: Add other built-in model widening. 165 166 // Custom-model widening. 167 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 168 return *W; 169 170 // Default of widening is a no-op: leave the current value unchanged. 171 return Current; 172 } 173 174 // Returns whether the values in `Map1` and `Map2` compare equal for those 175 // keys that `Map1` and `Map2` have in common. 176 template <typename Key> 177 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, 178 const llvm::MapVector<Key, Value *> &Map2, 179 const Environment &Env1, const Environment &Env2, 180 Environment::ValueModel &Model) { 181 for (auto &Entry : Map1) { 182 Key K = Entry.first; 183 assert(K != nullptr); 184 185 Value *Val = Entry.second; 186 assert(Val != nullptr); 187 188 auto It = Map2.find(K); 189 if (It == Map2.end()) 190 continue; 191 assert(It->second != nullptr); 192 193 if (!areEquivalentValues(*Val, *It->second) && 194 !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, 195 Model)) 196 return false; 197 } 198 199 return true; 200 } 201 202 // Perform a join on either `LocToVal` or `ExprToVal`. `Key` must be either 203 // `const StorageLocation *` or `const Expr *`. 204 template <typename Key> 205 llvm::MapVector<Key, Value *> 206 joinKeyToValueMap(const llvm::MapVector<Key, Value *> &Map1, 207 const llvm::MapVector<Key, Value *> &Map2, 208 const Environment &Env1, const Environment &Env2, 209 Environment &JoinedEnv, Environment::ValueModel &Model) { 210 llvm::MapVector<Key, Value *> MergedMap; 211 for (auto &Entry : Map1) { 212 Key K = Entry.first; 213 assert(K != nullptr); 214 215 Value *Val = Entry.second; 216 assert(Val != nullptr); 217 218 auto It = Map2.find(K); 219 if (It == Map2.end()) 220 continue; 221 assert(It->second != nullptr); 222 223 if (areEquivalentValues(*Val, *It->second)) { 224 MergedMap.insert({K, Val}); 225 continue; 226 } 227 228 if (Value *MergedVal = mergeDistinctValues( 229 K->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) { 230 MergedMap.insert({K, MergedVal}); 231 } 232 } 233 234 return MergedMap; 235 } 236 237 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either 238 // `const StorageLocation *` or `const Expr *`. 239 template <typename Key> 240 llvm::MapVector<Key, Value *> 241 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, 242 const llvm::MapVector<Key, Value *> &PrevMap, 243 Environment &CurEnv, const Environment &PrevEnv, 244 Environment::ValueModel &Model, LatticeJoinEffect &Effect) { 245 llvm::MapVector<Key, Value *> WidenedMap; 246 for (auto &Entry : CurMap) { 247 Key K = Entry.first; 248 assert(K != nullptr); 249 250 Value *Val = Entry.second; 251 assert(Val != nullptr); 252 253 auto PrevIt = PrevMap.find(K); 254 if (PrevIt == PrevMap.end()) 255 continue; 256 assert(PrevIt->second != nullptr); 257 258 if (areEquivalentValues(*Val, *PrevIt->second)) { 259 WidenedMap.insert({K, Val}); 260 continue; 261 } 262 263 Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second, 264 PrevEnv, *Val, CurEnv, Model); 265 WidenedMap.insert({K, &WidenedVal}); 266 if (&WidenedVal != PrevIt->second) 267 Effect = LatticeJoinEffect::Changed; 268 } 269 270 return WidenedMap; 271 } 272 273 /// Initializes a global storage value. 274 static void insertIfGlobal(const Decl &D, 275 llvm::DenseSet<const VarDecl *> &Vars) { 276 if (auto *V = dyn_cast<VarDecl>(&D)) 277 if (V->hasGlobalStorage()) 278 Vars.insert(V); 279 } 280 281 static void insertIfFunction(const Decl &D, 282 llvm::DenseSet<const FunctionDecl *> &Funcs) { 283 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 284 Funcs.insert(FD); 285 } 286 287 static void 288 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 289 llvm::DenseSet<const VarDecl *> &Vars, 290 llvm::DenseSet<const FunctionDecl *> &Funcs) { 291 insertIfGlobal(D, Vars); 292 insertIfFunction(D, Funcs); 293 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 294 for (const auto *B : Decomp->bindings()) 295 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 296 // FIXME: should we be using `E->getFoundDecl()`? 297 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 298 Fields.insert(FD); 299 } 300 301 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 302 /// global variables and functions that are declared in or referenced from 303 /// sub-statements. 304 static void 305 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 306 llvm::DenseSet<const VarDecl *> &Vars, 307 llvm::DenseSet<const FunctionDecl *> &Funcs) { 308 for (auto *Child : S.children()) 309 if (Child != nullptr) 310 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 311 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 312 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 313 314 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 315 if (DS->isSingleDecl()) 316 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 317 else 318 for (auto *D : DS->getDeclGroup()) 319 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 320 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 321 insertIfGlobal(*E->getDecl(), Vars); 322 insertIfFunction(*E->getDecl(), Funcs); 323 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 324 // FIXME: should we be using `E->getFoundDecl()`? 325 const ValueDecl *VD = E->getMemberDecl(); 326 insertIfGlobal(*VD, Vars); 327 insertIfFunction(*VD, Funcs); 328 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 329 Fields.insert(FD); 330 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 331 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 332 for (const auto *FD : getFieldsForInitListExpr(RD)) 333 Fields.insert(FD); 334 } 335 } 336 337 // FIXME: Add support for resetting globals after function calls to enable 338 // the implementation of sound analyses. 339 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 340 assert(FuncDecl->getBody() != nullptr); 341 342 FieldSet Fields; 343 llvm::DenseSet<const VarDecl *> Vars; 344 llvm::DenseSet<const FunctionDecl *> Funcs; 345 346 // Look for global variable and field references in the 347 // constructor-initializers. 348 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 349 for (const auto *Init : CtorDecl->inits()) { 350 if (Init->isMemberInitializer()) { 351 Fields.insert(Init->getMember()); 352 } else if (Init->isIndirectMemberInitializer()) { 353 for (const auto *I : Init->getIndirectMember()->chain()) 354 Fields.insert(cast<FieldDecl>(I)); 355 } 356 const Expr *E = Init->getInit(); 357 assert(E != nullptr); 358 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 359 } 360 // Add all fields mentioned in default member initializers. 361 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 362 if (const auto *I = F->getInClassInitializer()) 363 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 364 } 365 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 366 367 // These have to be added before the lines that follow to ensure that 368 // `create*` work correctly for structs. 369 DACtx->addModeledFields(Fields); 370 371 for (const VarDecl *D : Vars) { 372 if (getStorageLocation(*D) != nullptr) 373 continue; 374 375 setStorageLocation(*D, createObject(*D)); 376 } 377 378 for (const FunctionDecl *FD : Funcs) { 379 if (getStorageLocation(*FD) != nullptr) 380 continue; 381 auto &Loc = createStorageLocation(FD->getType()); 382 setStorageLocation(*FD, Loc); 383 } 384 } 385 386 Environment::Environment(DataflowAnalysisContext &DACtx) 387 : DACtx(&DACtx), 388 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 389 390 Environment Environment::fork() const { 391 Environment Copy(*this); 392 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 393 return Copy; 394 } 395 396 Environment::Environment(DataflowAnalysisContext &DACtx, 397 const DeclContext &DeclCtx) 398 : Environment(DACtx) { 399 CallStack.push_back(&DeclCtx); 400 401 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 402 assert(FuncDecl->getBody() != nullptr); 403 404 initFieldsGlobalsAndFuncs(FuncDecl); 405 406 for (const auto *ParamDecl : FuncDecl->parameters()) { 407 assert(ParamDecl != nullptr); 408 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 409 } 410 } 411 412 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 413 auto *Parent = MethodDecl->getParent(); 414 assert(Parent != nullptr); 415 if (Parent->isLambda()) 416 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 417 418 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 419 if (MethodDecl && !MethodDecl->isStatic()) { 420 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 421 ThisPointeeLoc = 422 &cast<RecordValue>(createValue(ThisPointeeType))->getLoc(); 423 } 424 } 425 } 426 427 bool Environment::canDescend(unsigned MaxDepth, 428 const DeclContext *Callee) const { 429 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 430 } 431 432 Environment Environment::pushCall(const CallExpr *Call) const { 433 Environment Env(*this); 434 435 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 436 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 437 if (!isa<CXXThisExpr>(Arg)) 438 Env.ThisPointeeLoc = 439 cast<RecordStorageLocation>(getStorageLocation(*Arg)); 440 // Otherwise (when the argument is `this`), retain the current 441 // environment's `ThisPointeeLoc`. 442 } 443 } 444 445 Env.pushCallInternal(Call->getDirectCallee(), 446 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 447 448 return Env; 449 } 450 451 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 452 Environment Env(*this); 453 454 Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); 455 456 Env.pushCallInternal(Call->getConstructor(), 457 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 458 459 return Env; 460 } 461 462 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 463 ArrayRef<const Expr *> Args) { 464 // Canonicalize to the definition of the function. This ensures that we're 465 // putting arguments into the same `ParamVarDecl`s` that the callee will later 466 // be retrieving them from. 467 assert(FuncDecl->getDefinition() != nullptr); 468 FuncDecl = FuncDecl->getDefinition(); 469 470 CallStack.push_back(FuncDecl); 471 472 initFieldsGlobalsAndFuncs(FuncDecl); 473 474 const auto *ParamIt = FuncDecl->param_begin(); 475 476 // FIXME: Parameters don't always map to arguments 1:1; examples include 477 // overloaded operators implemented as member functions, and parameter packs. 478 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 479 assert(ParamIt != FuncDecl->param_end()); 480 const VarDecl *Param = *ParamIt; 481 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 482 } 483 } 484 485 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 486 // We ignore some entries of `CalleeEnv`: 487 // - `DACtx` because is already the same in both 488 // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or 489 // `ThisPointeeLoc` because they don't apply to us. 490 // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the 491 // callee's local scope, so when popping that scope, we do not propagate 492 // the maps. 493 this->LocToVal = std::move(CalleeEnv.LocToVal); 494 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 495 496 if (Call->isGLValue()) { 497 if (CalleeEnv.ReturnLoc != nullptr) 498 setStorageLocation(*Call, *CalleeEnv.ReturnLoc); 499 } else if (!Call->getType()->isVoidType()) { 500 if (CalleeEnv.ReturnVal != nullptr) 501 setValue(*Call, *CalleeEnv.ReturnVal); 502 } 503 } 504 505 void Environment::popCall(const CXXConstructExpr *Call, 506 const Environment &CalleeEnv) { 507 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 508 this->LocToVal = std::move(CalleeEnv.LocToVal); 509 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 510 511 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 512 setValue(*Call, *Val); 513 } 514 } 515 516 bool Environment::equivalentTo(const Environment &Other, 517 Environment::ValueModel &Model) const { 518 assert(DACtx == Other.DACtx); 519 520 if (ReturnVal != Other.ReturnVal) 521 return false; 522 523 if (ReturnLoc != Other.ReturnLoc) 524 return false; 525 526 if (ThisPointeeLoc != Other.ThisPointeeLoc) 527 return false; 528 529 if (DeclToLoc != Other.DeclToLoc) 530 return false; 531 532 if (ExprToLoc != Other.ExprToLoc) 533 return false; 534 535 if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) 536 return false; 537 538 if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) 539 return false; 540 541 return true; 542 } 543 544 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 545 Environment::ValueModel &Model) { 546 assert(DACtx == PrevEnv.DACtx); 547 assert(ReturnVal == PrevEnv.ReturnVal); 548 assert(ReturnLoc == PrevEnv.ReturnLoc); 549 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 550 assert(CallStack == PrevEnv.CallStack); 551 552 auto Effect = LatticeJoinEffect::Unchanged; 553 554 // By the API, `PrevEnv` is a previous version of the environment for the same 555 // block, so we have some guarantees about its shape. In particular, it will 556 // be the result of a join or widen operation on previous values for this 557 // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that 558 // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain 559 // this property here, we don't need change their current values to widen. 560 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 561 assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); 562 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 563 564 ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, 565 Model, Effect); 566 567 LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, 568 Model, Effect); 569 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 570 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 571 ExprToVal.size() != PrevEnv.ExprToVal.size() || 572 LocToVal.size() != PrevEnv.LocToVal.size()) 573 Effect = LatticeJoinEffect::Changed; 574 575 return Effect; 576 } 577 578 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 579 Environment::ValueModel &Model) { 580 assert(EnvA.DACtx == EnvB.DACtx); 581 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 582 assert(EnvA.CallStack == EnvB.CallStack); 583 584 Environment JoinedEnv(*EnvA.DACtx); 585 586 JoinedEnv.CallStack = EnvA.CallStack; 587 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 588 589 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 590 // `ReturnVal` might not always get set -- for example if we have a return 591 // statement of the form `return some_other_func()` and we decide not to 592 // analyze `some_other_func()`. 593 // In this case, we can't say anything about the joined return value -- we 594 // don't simply want to propagate the return value that we do have, because 595 // it might not be the correct one. 596 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 597 JoinedEnv.ReturnVal = nullptr; 598 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 599 JoinedEnv.ReturnVal = EnvA.ReturnVal; 600 } else { 601 assert(!EnvA.CallStack.empty()); 602 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 603 // cast. 604 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 605 assert(Func != nullptr); 606 if (Value *MergedVal = 607 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 608 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 609 JoinedEnv.ReturnVal = MergedVal; 610 } 611 612 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 613 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 614 else 615 JoinedEnv.ReturnLoc = nullptr; 616 617 // FIXME: Once we're able to remove declarations from `DeclToLoc` when their 618 // lifetime ends, add an assertion that there aren't any entries in 619 // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to 620 // different storage locations. 621 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 622 623 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 624 625 // FIXME: update join to detect backedges and simplify the flow condition 626 // accordingly. 627 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 628 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 629 630 JoinedEnv.ExprToVal = joinKeyToValueMap(EnvA.ExprToVal, EnvB.ExprToVal, EnvA, 631 EnvB, JoinedEnv, Model); 632 633 JoinedEnv.LocToVal = joinKeyToValueMap(EnvA.LocToVal, EnvB.LocToVal, EnvA, 634 EnvB, JoinedEnv, Model); 635 636 return JoinedEnv; 637 } 638 639 StorageLocation &Environment::createStorageLocation(QualType Type) { 640 return DACtx->createStorageLocation(Type); 641 } 642 643 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 644 // Evaluated declarations are always assigned the same storage locations to 645 // ensure that the environment stabilizes across loop iterations. Storage 646 // locations for evaluated declarations are stored in the analysis context. 647 return DACtx->getStableStorageLocation(D); 648 } 649 650 StorageLocation &Environment::createStorageLocation(const Expr &E) { 651 // Evaluated expressions are always assigned the same storage locations to 652 // ensure that the environment stabilizes across loop iterations. Storage 653 // locations for evaluated expressions are stored in the analysis context. 654 return DACtx->getStableStorageLocation(E); 655 } 656 657 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 658 assert(!DeclToLoc.contains(&D)); 659 DeclToLoc[&D] = &Loc; 660 } 661 662 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 663 auto It = DeclToLoc.find(&D); 664 if (It == DeclToLoc.end()) 665 return nullptr; 666 667 StorageLocation *Loc = It->second; 668 669 return Loc; 670 } 671 672 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 673 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 674 // but we still want to be able to associate a `StorageLocation` with them, 675 // so allow these as an exception. 676 assert(E.isGLValue() || 677 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 678 setStorageLocationInternal(E, Loc); 679 } 680 681 StorageLocation *Environment::getStorageLocation(const Expr &E) const { 682 // See comment in `setStorageLocation()`. 683 assert(E.isGLValue() || 684 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 685 return getStorageLocationInternal(E); 686 } 687 688 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const { 689 return ThisPointeeLoc; 690 } 691 692 RecordStorageLocation & 693 Environment::getResultObjectLocation(const Expr &RecordPRValue) { 694 assert(RecordPRValue.getType()->isRecordType()); 695 assert(RecordPRValue.isPRValue()); 696 697 if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue)) 698 return *cast<RecordStorageLocation>(ExistingLoc); 699 auto &Loc = cast<RecordStorageLocation>( 700 DACtx->getStableStorageLocation(RecordPRValue)); 701 setStorageLocationInternal(RecordPRValue, Loc); 702 return Loc; 703 } 704 705 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 706 return DACtx->getOrCreateNullPointerValue(PointeeType); 707 } 708 709 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 710 assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc); 711 712 LocToVal[&Loc] = &Val; 713 } 714 715 void Environment::setValue(const Expr &E, Value &Val) { 716 assert(E.isPRValue()); 717 ExprToVal[&E] = &Val; 718 } 719 720 Value *Environment::getValue(const StorageLocation &Loc) const { 721 return LocToVal.lookup(&Loc); 722 } 723 724 Value *Environment::getValue(const ValueDecl &D) const { 725 auto *Loc = getStorageLocation(D); 726 if (Loc == nullptr) 727 return nullptr; 728 return getValue(*Loc); 729 } 730 731 Value *Environment::getValue(const Expr &E) const { 732 if (E.isPRValue()) { 733 auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); 734 return It == ExprToVal.end() ? nullptr : It->second; 735 } 736 737 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 738 if (It == ExprToLoc.end()) 739 return nullptr; 740 return getValue(*It->second); 741 } 742 743 Value *Environment::createValue(QualType Type) { 744 llvm::DenseSet<QualType> Visited; 745 int CreatedValuesCount = 0; 746 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 747 CreatedValuesCount); 748 if (CreatedValuesCount > MaxCompositeValueSize) { 749 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 750 << '\n'; 751 } 752 return Val; 753 } 754 755 void Environment::setStorageLocationInternal(const Expr &E, 756 StorageLocation &Loc) { 757 const Expr &CanonE = ignoreCFGOmittedNodes(E); 758 assert(!ExprToLoc.contains(&CanonE)); 759 ExprToLoc[&CanonE] = &Loc; 760 } 761 762 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const { 763 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 764 return It == ExprToLoc.end() ? nullptr : &*It->second; 765 } 766 767 Value *Environment::createValueUnlessSelfReferential( 768 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 769 int &CreatedValuesCount) { 770 assert(!Type.isNull()); 771 assert(!Type->isReferenceType()); 772 773 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 774 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 775 Depth > MaxCompositeValueDepth) 776 return nullptr; 777 778 if (Type->isBooleanType()) { 779 CreatedValuesCount++; 780 return &makeAtomicBoolValue(); 781 } 782 783 if (Type->isIntegerType()) { 784 // FIXME: consider instead `return nullptr`, given that we do nothing useful 785 // with integers, and so distinguishing them serves no purpose, but could 786 // prevent convergence. 787 CreatedValuesCount++; 788 return &arena().create<IntegerValue>(); 789 } 790 791 if (Type->isPointerType()) { 792 CreatedValuesCount++; 793 QualType PointeeType = Type->getPointeeType(); 794 StorageLocation &PointeeLoc = 795 createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); 796 797 return &arena().create<PointerValue>(PointeeLoc); 798 } 799 800 if (Type->isRecordType()) { 801 CreatedValuesCount++; 802 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 803 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 804 assert(Field != nullptr); 805 806 QualType FieldType = Field->getType(); 807 808 FieldLocs.insert( 809 {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, 810 CreatedValuesCount)}); 811 } 812 813 RecordStorageLocation &Loc = 814 arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 815 RecordValue &RecordVal = create<RecordValue>(Loc); 816 817 // As we already have a storage location for the `RecordValue`, we can and 818 // should associate them in the environment. 819 setValue(Loc, RecordVal); 820 821 return &RecordVal; 822 } 823 824 return nullptr; 825 } 826 827 StorageLocation & 828 Environment::createLocAndMaybeValue(QualType Ty, 829 llvm::DenseSet<QualType> &Visited, 830 int Depth, int &CreatedValuesCount) { 831 if (!Visited.insert(Ty.getCanonicalType()).second) 832 return createStorageLocation(Ty.getNonReferenceType()); 833 Value *Val = createValueUnlessSelfReferential( 834 Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); 835 Visited.erase(Ty.getCanonicalType()); 836 837 Ty = Ty.getNonReferenceType(); 838 839 if (Val == nullptr) 840 return createStorageLocation(Ty); 841 842 if (Ty->isRecordType()) 843 return cast<RecordValue>(Val)->getLoc(); 844 845 StorageLocation &Loc = createStorageLocation(Ty); 846 setValue(Loc, *Val); 847 return Loc; 848 } 849 850 StorageLocation &Environment::createObjectInternal(const VarDecl *D, 851 QualType Ty, 852 const Expr *InitExpr) { 853 if (Ty->isReferenceType()) { 854 // Although variables of reference type always need to be initialized, it 855 // can happen that we can't see the initializer, so `InitExpr` may still 856 // be null. 857 if (InitExpr) { 858 if (auto *InitExprLoc = getStorageLocation(*InitExpr)) 859 return *InitExprLoc; 860 } 861 862 // Even though we have an initializer, we might not get an 863 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 864 // don't have a function body. In this case, we just invent a storage 865 // location and value -- it's the best we can do. 866 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 867 } 868 869 Value *Val = nullptr; 870 if (InitExpr) 871 // In the (few) cases where an expression is intentionally 872 // "uninterpreted", `InitExpr` is not associated with a value. There are 873 // two ways to handle this situation: propagate the status, so that 874 // uninterpreted initializers result in uninterpreted variables, or 875 // provide a default value. We choose the latter so that later refinements 876 // of the variable can be used for reasoning about the surrounding code. 877 // For this reason, we let this case be handled by the `createValue()` 878 // call below. 879 // 880 // FIXME. If and when we interpret all language cases, change this to 881 // assert that `InitExpr` is interpreted, rather than supplying a 882 // default value (assuming we don't update the environment API to return 883 // references). 884 Val = getValue(*InitExpr); 885 if (!Val) 886 Val = createValue(Ty); 887 888 if (Ty->isRecordType()) 889 return cast<RecordValue>(Val)->getLoc(); 890 891 StorageLocation &Loc = 892 D ? createStorageLocation(*D) : createStorageLocation(Ty); 893 894 if (Val) 895 setValue(Loc, *Val); 896 897 return Loc; 898 } 899 900 void Environment::addToFlowCondition(const Formula &Val) { 901 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 902 } 903 904 bool Environment::flowConditionImplies(const Formula &Val) const { 905 return DACtx->flowConditionImplies(FlowConditionToken, Val); 906 } 907 908 void Environment::dump(raw_ostream &OS) const { 909 // FIXME: add printing for remaining fields and allow caller to decide what 910 // fields are printed. 911 OS << "DeclToLoc:\n"; 912 for (auto [D, L] : DeclToLoc) 913 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 914 915 OS << "ExprToLoc:\n"; 916 for (auto [E, L] : ExprToLoc) 917 OS << " [" << E << ", " << L << "]\n"; 918 919 OS << "ExprToVal:\n"; 920 for (auto [E, V] : ExprToVal) 921 OS << " [" << E << ", " << V << "]\n"; 922 923 OS << "LocToVal:\n"; 924 for (auto [L, V] : LocToVal) { 925 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 926 } 927 928 OS << "FlowConditionToken:\n"; 929 DACtx->dumpFlowCondition(FlowConditionToken, OS); 930 } 931 932 void Environment::dump() const { 933 dump(llvm::dbgs()); 934 } 935 936 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 937 const Environment &Env) { 938 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 939 if (ImplicitObject == nullptr) 940 return nullptr; 941 if (ImplicitObject->getType()->isPointerType()) { 942 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject))) 943 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 944 return nullptr; 945 } 946 return cast_or_null<RecordStorageLocation>( 947 Env.getStorageLocation(*ImplicitObject)); 948 } 949 950 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 951 const Environment &Env) { 952 Expr *Base = ME.getBase(); 953 if (Base == nullptr) 954 return nullptr; 955 if (ME.isArrow()) { 956 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base))) 957 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 958 return nullptr; 959 } 960 return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base)); 961 } 962 963 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 964 // Unnamed bitfields are only used for padding and do not appear in 965 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 966 // field list, and we thus need to remove them before mapping inits to 967 // fields to avoid mapping inits to the wrongs fields. 968 std::vector<FieldDecl *> Fields; 969 llvm::copy_if( 970 RD->fields(), std::back_inserter(Fields), 971 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 972 return Fields; 973 } 974 975 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { 976 auto &NewVal = Env.create<RecordValue>(Loc); 977 Env.setValue(Loc, NewVal); 978 return NewVal; 979 } 980 981 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { 982 assert(Expr.getType()->isRecordType()); 983 984 if (Expr.isPRValue()) { 985 if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) { 986 auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc()); 987 Env.setValue(Expr, NewVal); 988 return NewVal; 989 } 990 991 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 992 Env.setValue(Expr, NewVal); 993 return NewVal; 994 } 995 996 if (auto *Loc = 997 cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) { 998 auto &NewVal = Env.create<RecordValue>(*Loc); 999 Env.setValue(*Loc, NewVal); 1000 return NewVal; 1001 } 1002 1003 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1004 Env.setStorageLocation(Expr, NewVal.getLoc()); 1005 return NewVal; 1006 } 1007 1008 } // namespace dataflow 1009 } // namespace clang 1010