1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/Support/Casting.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include <cassert> 27 #include <memory> 28 #include <utility> 29 30 namespace clang { 31 namespace dataflow { 32 33 // FIXME: convert these to parameters of the analysis or environment. Current 34 // settings have been experimentaly validated, but only for a particular 35 // analysis. 36 static constexpr int MaxCompositeValueDepth = 3; 37 static constexpr int MaxCompositeValueSize = 1000; 38 39 /// Returns a map consisting of key-value entries that are present in both maps. 40 template <typename K, typename V> 41 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 42 const llvm::DenseMap<K, V> &Map2) { 43 llvm::DenseMap<K, V> Result; 44 for (auto &Entry : Map1) { 45 auto It = Map2.find(Entry.first); 46 if (It != Map2.end() && Entry.second == It->second) 47 Result.insert({Entry.first, Entry.second}); 48 } 49 return Result; 50 } 51 52 static bool compareDistinctValues(QualType Type, Value &Val1, 53 const Environment &Env1, Value &Val2, 54 const Environment &Env2, 55 Environment::ValueModel &Model) { 56 // Note: Potentially costly, but, for booleans, we could check whether both 57 // can be proven equivalent in their respective environments. 58 59 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 60 // and implement separate, join/widen specific handling for 61 // reference/pointers. 62 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 63 case ComparisonResult::Same: 64 return true; 65 case ComparisonResult::Different: 66 return false; 67 case ComparisonResult::Unknown: 68 switch (Val1.getKind()) { 69 case Value::Kind::Integer: 70 case Value::Kind::Reference: 71 case Value::Kind::Pointer: 72 case Value::Kind::Struct: 73 // FIXME: this choice intentionally introduces unsoundness to allow 74 // for convergence. Once we have widening support for the 75 // reference/pointer and struct built-in models, this should be 76 // `false`. 77 return true; 78 default: 79 return false; 80 } 81 } 82 llvm_unreachable("All cases covered in switch"); 83 } 84 85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 86 /// respectively, of the same type `Type`. Merging generally produces a single 87 /// value that (soundly) approximates the two inputs, although the actual 88 /// meaning depends on `Model`. 89 static Value *mergeDistinctValues(QualType Type, Value &Val1, 90 const Environment &Env1, Value &Val2, 91 const Environment &Env2, 92 Environment &MergedEnv, 93 Environment::ValueModel &Model) { 94 // Join distinct boolean values preserving information about the constraints 95 // in the respective path conditions. 96 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 97 // FIXME: Checking both values should be unnecessary, since they should have 98 // a consistent shape. However, right now we can end up with BoolValue's in 99 // integer-typed variables due to our incorrect handling of 100 // boolean-to-integer casts (we just propagate the BoolValue to the result 101 // of the cast). So, a join can encounter an integer in one branch but a 102 // bool in the other. 103 // For example: 104 // ``` 105 // std::optional<bool> o; 106 // int x; 107 // if (o.has_value()) 108 // x = o.value(); 109 // ``` 110 auto &Expr1 = cast<BoolValue>(Val1).formula(); 111 auto &Expr2 = cast<BoolValue>(Val2).formula(); 112 auto &A = MergedEnv.arena(); 113 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 114 MergedEnv.addToFlowCondition( 115 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 116 A.makeEquals(MergedVal, Expr1)), 117 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 118 A.makeEquals(MergedVal, Expr2)))); 119 return &A.makeBoolValue(MergedVal); 120 } 121 122 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 123 // returns false to avoid storing unneeded values in `DACtx`. 124 // FIXME: Creating the value based on the type alone creates misshapen values 125 // for lvalues, since the type does not reflect the need for `ReferenceValue`. 126 if (Value *MergedVal = MergedEnv.createValue(Type)) 127 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 128 return MergedVal; 129 130 return nullptr; 131 } 132 133 // When widening does not change `Current`, return value will equal `&Prev`. 134 static Value &widenDistinctValues(QualType Type, Value &Prev, 135 const Environment &PrevEnv, Value &Current, 136 Environment &CurrentEnv, 137 Environment::ValueModel &Model) { 138 // Boolean-model widening. 139 if (isa<BoolValue>(&Prev)) { 140 assert(isa<BoolValue>(Current)); 141 // Widen to Top, because we know they are different values. If previous was 142 // already Top, re-use that to (implicitly) indicate that no change occured. 143 if (isa<TopBoolValue>(Prev)) 144 return Prev; 145 return CurrentEnv.makeTopBoolValue(); 146 } 147 148 // FIXME: Add other built-in model widening. 149 150 // Custom-model widening. 151 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 152 return *W; 153 154 // Default of widening is a no-op: leave the current value unchanged. 155 return Current; 156 } 157 158 /// Initializes a global storage value. 159 static void insertIfGlobal(const Decl &D, 160 llvm::DenseSet<const VarDecl *> &Vars) { 161 if (auto *V = dyn_cast<VarDecl>(&D)) 162 if (V->hasGlobalStorage()) 163 Vars.insert(V); 164 } 165 166 static void insertIfFunction(const Decl &D, 167 llvm::DenseSet<const FunctionDecl *> &Funcs) { 168 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 169 Funcs.insert(FD); 170 } 171 172 static void 173 getFieldsGlobalsAndFuncs(const Decl &D, 174 llvm::DenseSet<const FieldDecl *> &Fields, 175 llvm::DenseSet<const VarDecl *> &Vars, 176 llvm::DenseSet<const FunctionDecl *> &Funcs) { 177 insertIfGlobal(D, Vars); 178 insertIfFunction(D, Funcs); 179 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 180 for (const auto *B : Decomp->bindings()) 181 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 182 // FIXME: should we be using `E->getFoundDecl()`? 183 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 184 Fields.insert(FD); 185 } 186 187 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 188 /// global variables and functions that are declared in or referenced from 189 /// sub-statements. 190 static void 191 getFieldsGlobalsAndFuncs(const Stmt &S, 192 llvm::DenseSet<const FieldDecl *> &Fields, 193 llvm::DenseSet<const VarDecl *> &Vars, 194 llvm::DenseSet<const FunctionDecl *> &Funcs) { 195 for (auto *Child : S.children()) 196 if (Child != nullptr) 197 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 198 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 199 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 200 201 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 202 if (DS->isSingleDecl()) 203 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 204 else 205 for (auto *D : DS->getDeclGroup()) 206 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 207 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 208 insertIfGlobal(*E->getDecl(), Vars); 209 insertIfFunction(*E->getDecl(), Funcs); 210 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 211 // FIXME: should we be using `E->getFoundDecl()`? 212 const ValueDecl *VD = E->getMemberDecl(); 213 insertIfGlobal(*VD, Vars); 214 insertIfFunction(*VD, Funcs); 215 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 216 Fields.insert(FD); 217 } 218 } 219 220 // FIXME: Add support for resetting globals after function calls to enable 221 // the implementation of sound analyses. 222 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 223 assert(FuncDecl->getBody() != nullptr); 224 225 llvm::DenseSet<const FieldDecl *> Fields; 226 llvm::DenseSet<const VarDecl *> Vars; 227 llvm::DenseSet<const FunctionDecl *> Funcs; 228 229 // Look for global variable and field references in the 230 // constructor-initializers. 231 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 232 for (const auto *Init : CtorDecl->inits()) { 233 if (Init->isMemberInitializer()) { 234 Fields.insert(Init->getMember()); 235 } else if (Init->isIndirectMemberInitializer()) { 236 for (const auto *I : Init->getIndirectMember()->chain()) 237 Fields.insert(cast<FieldDecl>(I)); 238 } 239 const Expr *E = Init->getInit(); 240 assert(E != nullptr); 241 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 242 } 243 // Add all fields mentioned in default member initializers. 244 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 245 if (const auto *I = F->getInClassInitializer()) 246 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 247 } 248 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 249 250 // These have to be added before the lines that follow to ensure that 251 // `create*` work correctly for structs. 252 DACtx->addModeledFields(Fields); 253 254 for (const VarDecl *D : Vars) { 255 if (getStorageLocation(*D) != nullptr) 256 continue; 257 auto &Loc = createStorageLocation(D->getType().getNonReferenceType()); 258 setStorageLocation(*D, Loc); 259 if (auto *Val = createValue(D->getType().getNonReferenceType())) 260 setValue(Loc, *Val); 261 } 262 263 for (const FunctionDecl *FD : Funcs) { 264 if (getStorageLocation(*FD) != nullptr) 265 continue; 266 auto &Loc = createStorageLocation(FD->getType()); 267 setStorageLocation(*FD, Loc); 268 } 269 } 270 271 Environment::Environment(DataflowAnalysisContext &DACtx) 272 : DACtx(&DACtx), 273 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 274 275 Environment Environment::fork() const { 276 Environment Copy(*this); 277 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 278 return Copy; 279 } 280 281 Environment::Environment(DataflowAnalysisContext &DACtx, 282 const DeclContext &DeclCtx) 283 : Environment(DACtx) { 284 CallStack.push_back(&DeclCtx); 285 286 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 287 assert(FuncDecl->getBody() != nullptr); 288 289 initFieldsGlobalsAndFuncs(FuncDecl); 290 291 for (const auto *ParamDecl : FuncDecl->parameters()) { 292 assert(ParamDecl != nullptr); 293 // References aren't objects, so the reference itself doesn't have a 294 // storage location. Instead, the storage location for a reference refers 295 // directly to an object of the referenced type -- so strip off any 296 // reference from the type. 297 auto &ParamLoc = 298 createStorageLocation(ParamDecl->getType().getNonReferenceType()); 299 setStorageLocation(*ParamDecl, ParamLoc); 300 if (Value *ParamVal = 301 createValue(ParamDecl->getType().getNonReferenceType())) 302 setValue(ParamLoc, *ParamVal); 303 } 304 } 305 306 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 307 auto *Parent = MethodDecl->getParent(); 308 assert(Parent != nullptr); 309 if (Parent->isLambda()) 310 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 311 312 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 313 if (MethodDecl && !MethodDecl->isStatic()) { 314 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 315 ThisPointeeLoc = &cast<AggregateStorageLocation>( 316 createStorageLocation(ThisPointeeType)); 317 if (Value *ThisPointeeVal = createValue(ThisPointeeType)) 318 setValue(*ThisPointeeLoc, *ThisPointeeVal); 319 } 320 } 321 } 322 323 bool Environment::canDescend(unsigned MaxDepth, 324 const DeclContext *Callee) const { 325 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 326 } 327 328 Environment Environment::pushCall(const CallExpr *Call) const { 329 Environment Env(*this); 330 331 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 332 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 333 if (!isa<CXXThisExpr>(Arg)) 334 Env.ThisPointeeLoc = cast<AggregateStorageLocation>( 335 getStorageLocation(*Arg, SkipPast::Reference)); 336 // Otherwise (when the argument is `this`), retain the current 337 // environment's `ThisPointeeLoc`. 338 } 339 } 340 341 Env.pushCallInternal(Call->getDirectCallee(), 342 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 343 344 return Env; 345 } 346 347 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 348 Environment Env(*this); 349 350 Env.ThisPointeeLoc = &cast<AggregateStorageLocation>( 351 Env.createStorageLocation(Call->getType())); 352 if (Value *Val = Env.createValue(Call->getType())) 353 Env.setValue(*Env.ThisPointeeLoc, *Val); 354 355 Env.pushCallInternal(Call->getConstructor(), 356 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 357 358 return Env; 359 } 360 361 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 362 ArrayRef<const Expr *> Args) { 363 // Canonicalize to the definition of the function. This ensures that we're 364 // putting arguments into the same `ParamVarDecl`s` that the callee will later 365 // be retrieving them from. 366 assert(FuncDecl->getDefinition() != nullptr); 367 FuncDecl = FuncDecl->getDefinition(); 368 369 CallStack.push_back(FuncDecl); 370 371 initFieldsGlobalsAndFuncs(FuncDecl); 372 373 const auto *ParamIt = FuncDecl->param_begin(); 374 375 // FIXME: Parameters don't always map to arguments 1:1; examples include 376 // overloaded operators implemented as member functions, and parameter packs. 377 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 378 assert(ParamIt != FuncDecl->param_end()); 379 380 const Expr *Arg = Args[ArgIndex]; 381 auto *ArgLoc = getStorageLocation(*Arg, SkipPast::Reference); 382 if (ArgLoc == nullptr) 383 continue; 384 385 const VarDecl *Param = *ParamIt; 386 387 QualType ParamType = Param->getType(); 388 if (ParamType->isReferenceType()) { 389 setStorageLocation(*Param, *ArgLoc); 390 } else { 391 auto &Loc = createStorageLocation(*Param); 392 setStorageLocation(*Param, Loc); 393 394 if (auto *ArgVal = getValue(*ArgLoc)) { 395 setValue(Loc, *ArgVal); 396 } else if (Value *Val = createValue(ParamType)) { 397 setValue(Loc, *Val); 398 } 399 } 400 } 401 } 402 403 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 404 // We ignore `DACtx` because it's already the same in both. We don't want the 405 // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't 406 // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later 407 // analyze the same callee in a different context, and `setStorageLocation` 408 // requires there to not already be a storage location assigned. Conceptually, 409 // these maps capture information from the local scope, so when popping that 410 // scope, we do not propagate the maps. 411 this->LocToVal = std::move(CalleeEnv.LocToVal); 412 this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); 413 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 414 415 if (Call->isGLValue()) { 416 if (CalleeEnv.ReturnLoc != nullptr) 417 setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc); 418 } else if (!Call->getType()->isVoidType()) { 419 if (CalleeEnv.ReturnVal != nullptr) 420 setValueStrict(*Call, *CalleeEnv.ReturnVal); 421 } 422 } 423 424 void Environment::popCall(const CXXConstructExpr *Call, 425 const Environment &CalleeEnv) { 426 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 427 this->LocToVal = std::move(CalleeEnv.LocToVal); 428 this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); 429 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 430 431 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 432 setValueStrict(*Call, *Val); 433 } 434 } 435 436 bool Environment::equivalentTo(const Environment &Other, 437 Environment::ValueModel &Model) const { 438 assert(DACtx == Other.DACtx); 439 440 if (ReturnVal != Other.ReturnVal) 441 return false; 442 443 if (ReturnLoc != Other.ReturnLoc) 444 return false; 445 446 if (ThisPointeeLoc != Other.ThisPointeeLoc) 447 return false; 448 449 if (DeclToLoc != Other.DeclToLoc) 450 return false; 451 452 if (ExprToLoc != Other.ExprToLoc) 453 return false; 454 455 // Compare the contents for the intersection of their domains. 456 for (auto &Entry : LocToVal) { 457 const StorageLocation *Loc = Entry.first; 458 assert(Loc != nullptr); 459 460 Value *Val = Entry.second; 461 assert(Val != nullptr); 462 463 auto It = Other.LocToVal.find(Loc); 464 if (It == Other.LocToVal.end()) 465 continue; 466 assert(It->second != nullptr); 467 468 if (!areEquivalentValues(*Val, *It->second) && 469 !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other, 470 Model)) 471 return false; 472 } 473 474 return true; 475 } 476 477 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 478 Environment::ValueModel &Model) { 479 assert(DACtx == PrevEnv.DACtx); 480 assert(ReturnVal == PrevEnv.ReturnVal); 481 assert(ReturnLoc == PrevEnv.ReturnLoc); 482 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 483 assert(CallStack == PrevEnv.CallStack); 484 485 auto Effect = LatticeJoinEffect::Unchanged; 486 487 // By the API, `PrevEnv` is a previous version of the environment for the same 488 // block, so we have some guarantees about its shape. In particular, it will 489 // be the result of a join or widen operation on previous values for this 490 // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are 491 // subsets of the maps in `PrevEnv`. So, as long as we maintain this property 492 // here, we don't need change their current values to widen. 493 // 494 // FIXME: `MemberLocToStruct` does not share the above property, because 495 // `join` can cause the map size to increase (when we add fresh data in places 496 // of conflict). Once this issue with join is resolved, re-enable the 497 // assertion below or replace with something that captures the desired 498 // invariant. 499 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 500 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 501 // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size()); 502 503 llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal; 504 for (auto &Entry : LocToVal) { 505 const StorageLocation *Loc = Entry.first; 506 assert(Loc != nullptr); 507 508 Value *Val = Entry.second; 509 assert(Val != nullptr); 510 511 auto PrevIt = PrevEnv.LocToVal.find(Loc); 512 if (PrevIt == PrevEnv.LocToVal.end()) 513 continue; 514 assert(PrevIt->second != nullptr); 515 516 if (areEquivalentValues(*Val, *PrevIt->second)) { 517 WidenedLocToVal.insert({Loc, Val}); 518 continue; 519 } 520 521 Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second, 522 PrevEnv, *Val, *this, Model); 523 WidenedLocToVal.insert({Loc, &WidenedVal}); 524 if (&WidenedVal != PrevIt->second) 525 Effect = LatticeJoinEffect::Changed; 526 } 527 LocToVal = std::move(WidenedLocToVal); 528 // FIXME: update the equivalence calculation for `MemberLocToStruct`, once we 529 // have a systematic way of soundly comparing this map. 530 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 531 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 532 LocToVal.size() != PrevEnv.LocToVal.size() || 533 MemberLocToStruct.size() != PrevEnv.MemberLocToStruct.size()) 534 Effect = LatticeJoinEffect::Changed; 535 536 return Effect; 537 } 538 539 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 540 Environment::ValueModel &Model) { 541 assert(EnvA.DACtx == EnvB.DACtx); 542 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 543 assert(EnvA.CallStack == EnvB.CallStack); 544 545 Environment JoinedEnv(*EnvA.DACtx); 546 547 JoinedEnv.CallStack = EnvA.CallStack; 548 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 549 550 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 551 // `ReturnVal` might not always get set -- for example if we have a return 552 // statement of the form `return some_other_func()` and we decide not to 553 // analyze `some_other_func()`. 554 // In this case, we can't say anything about the joined return value -- we 555 // don't simply want to propagate the return value that we do have, because 556 // it might not be the correct one. 557 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 558 JoinedEnv.ReturnVal = nullptr; 559 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 560 JoinedEnv.ReturnVal = EnvA.ReturnVal; 561 } else { 562 assert(!EnvA.CallStack.empty()); 563 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 564 // cast. 565 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 566 assert(Func != nullptr); 567 if (Value *MergedVal = 568 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 569 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 570 JoinedEnv.ReturnVal = MergedVal; 571 } 572 573 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 574 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 575 else 576 JoinedEnv.ReturnLoc = nullptr; 577 578 // FIXME: Once we're able to remove declarations from `DeclToLoc` when their 579 // lifetime ends, add an assertion that there aren't any entries in 580 // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to 581 // different storage locations. 582 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 583 584 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 585 586 JoinedEnv.MemberLocToStruct = 587 intersectDenseMaps(EnvA.MemberLocToStruct, EnvB.MemberLocToStruct); 588 589 // FIXME: update join to detect backedges and simplify the flow condition 590 // accordingly. 591 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 592 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 593 594 for (auto &Entry : EnvA.LocToVal) { 595 const StorageLocation *Loc = Entry.first; 596 assert(Loc != nullptr); 597 598 Value *Val = Entry.second; 599 assert(Val != nullptr); 600 601 auto It = EnvB.LocToVal.find(Loc); 602 if (It == EnvB.LocToVal.end()) 603 continue; 604 assert(It->second != nullptr); 605 606 if (areEquivalentValues(*Val, *It->second)) { 607 JoinedEnv.LocToVal.insert({Loc, Val}); 608 continue; 609 } 610 611 if (Value *MergedVal = mergeDistinctValues( 612 Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) { 613 JoinedEnv.LocToVal.insert({Loc, MergedVal}); 614 } 615 } 616 617 return JoinedEnv; 618 } 619 620 StorageLocation &Environment::createStorageLocation(QualType Type) { 621 return DACtx->createStorageLocation(Type); 622 } 623 624 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 625 // Evaluated declarations are always assigned the same storage locations to 626 // ensure that the environment stabilizes across loop iterations. Storage 627 // locations for evaluated declarations are stored in the analysis context. 628 return DACtx->getStableStorageLocation(D); 629 } 630 631 StorageLocation &Environment::createStorageLocation(const Expr &E) { 632 // Evaluated expressions are always assigned the same storage locations to 633 // ensure that the environment stabilizes across loop iterations. Storage 634 // locations for evaluated expressions are stored in the analysis context. 635 return DACtx->getStableStorageLocation(E); 636 } 637 638 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 639 assert(!DeclToLoc.contains(&D)); 640 assert(!isa_and_nonnull<ReferenceValue>(getValue(Loc))); 641 DeclToLoc[&D] = &Loc; 642 } 643 644 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 645 auto It = DeclToLoc.find(&D); 646 if (It == DeclToLoc.end()) 647 return nullptr; 648 649 StorageLocation *Loc = It->second; 650 651 assert(!isa_and_nonnull<ReferenceValue>(getValue(*Loc))); 652 653 return Loc; 654 } 655 656 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 657 const Expr &CanonE = ignoreCFGOmittedNodes(E); 658 assert(!ExprToLoc.contains(&CanonE)); 659 ExprToLoc[&CanonE] = &Loc; 660 } 661 662 void Environment::setStorageLocationStrict(const Expr &E, 663 StorageLocation &Loc) { 664 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 665 // but we still want to be able to associate a `StorageLocation` with them, 666 // so allow these as an exception. 667 assert(E.isGLValue() || 668 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 669 setStorageLocation(E, Loc); 670 } 671 672 StorageLocation *Environment::getStorageLocation(const Expr &E, 673 SkipPast SP) const { 674 // FIXME: Add a test with parens. 675 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 676 return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP); 677 } 678 679 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const { 680 // See comment in `setStorageLocationStrict()`. 681 assert(E.isGLValue() || 682 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 683 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 684 685 if (Loc == nullptr) 686 return nullptr; 687 688 if (auto *RefVal = dyn_cast_or_null<ReferenceValue>(getValue(*Loc))) 689 return &RefVal->getReferentLoc(); 690 691 return Loc; 692 } 693 694 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const { 695 return ThisPointeeLoc; 696 } 697 698 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 699 return DACtx->getOrCreateNullPointerValue(PointeeType); 700 } 701 702 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 703 LocToVal[&Loc] = &Val; 704 705 if (auto *StructVal = dyn_cast<StructValue>(&Val)) { 706 auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc); 707 708 const QualType Type = AggregateLoc.getType(); 709 assert(Type->isRecordType()); 710 711 for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { 712 assert(Field != nullptr); 713 StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); 714 MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); 715 if (auto *FieldVal = StructVal->getChild(*Field)) 716 setValue(FieldLoc, *FieldVal); 717 } 718 } 719 720 auto It = MemberLocToStruct.find(&Loc); 721 if (It != MemberLocToStruct.end()) { 722 // `Loc` is the location of a struct member so we need to also update the 723 // value of the member in the corresponding `StructValue`. 724 725 assert(It->second.first != nullptr); 726 StructValue &StructVal = *It->second.first; 727 728 assert(It->second.second != nullptr); 729 const ValueDecl &Member = *It->second.second; 730 731 StructVal.setChild(Member, Val); 732 } 733 } 734 735 void Environment::clearValue(const StorageLocation &Loc) { 736 LocToVal.erase(&Loc); 737 738 if (auto It = MemberLocToStruct.find(&Loc); It != MemberLocToStruct.end()) { 739 // `Loc` is the location of a struct member so we need to also clear the 740 // member in the corresponding `StructValue`. 741 742 assert(It->second.first != nullptr); 743 StructValue &StructVal = *It->second.first; 744 745 assert(It->second.second != nullptr); 746 const ValueDecl &Member = *It->second.second; 747 748 StructVal.clearChild(Member); 749 } 750 } 751 752 void Environment::setValueStrict(const Expr &E, Value &Val) { 753 assert(E.isPRValue()); 754 assert(!isa<ReferenceValue>(Val)); 755 756 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 757 if (Loc == nullptr) { 758 Loc = &createStorageLocation(E); 759 setStorageLocation(E, *Loc); 760 } 761 setValue(*Loc, Val); 762 } 763 764 Value *Environment::getValue(const StorageLocation &Loc) const { 765 return LocToVal.lookup(&Loc); 766 } 767 768 Value *Environment::getValue(const ValueDecl &D) const { 769 auto *Loc = getStorageLocation(D); 770 if (Loc == nullptr) 771 return nullptr; 772 return getValue(*Loc); 773 } 774 775 Value *Environment::getValue(const Expr &E, SkipPast SP) const { 776 auto *Loc = getStorageLocation(E, SP); 777 if (Loc == nullptr) 778 return nullptr; 779 return getValue(*Loc); 780 } 781 782 Value *Environment::getValueStrict(const Expr &E) const { 783 assert(E.isPRValue()); 784 Value *Val = getValue(E, SkipPast::None); 785 786 assert(Val == nullptr || !isa<ReferenceValue>(Val)); 787 788 return Val; 789 } 790 791 Value *Environment::createValue(QualType Type) { 792 llvm::DenseSet<QualType> Visited; 793 int CreatedValuesCount = 0; 794 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 795 CreatedValuesCount); 796 if (CreatedValuesCount > MaxCompositeValueSize) { 797 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 798 << '\n'; 799 } 800 return Val; 801 } 802 803 Value *Environment::createValueUnlessSelfReferential( 804 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 805 int &CreatedValuesCount) { 806 assert(!Type.isNull()); 807 808 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 809 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 810 Depth > MaxCompositeValueDepth) 811 return nullptr; 812 813 if (Type->isBooleanType()) { 814 CreatedValuesCount++; 815 return &makeAtomicBoolValue(); 816 } 817 818 if (Type->isIntegerType()) { 819 // FIXME: consider instead `return nullptr`, given that we do nothing useful 820 // with integers, and so distinguishing them serves no purpose, but could 821 // prevent convergence. 822 CreatedValuesCount++; 823 return &arena().create<IntegerValue>(); 824 } 825 826 if (Type->isReferenceType() || Type->isPointerType()) { 827 CreatedValuesCount++; 828 QualType PointeeType = Type->getPointeeType(); 829 auto &PointeeLoc = createStorageLocation(PointeeType); 830 831 if (Visited.insert(PointeeType.getCanonicalType()).second) { 832 Value *PointeeVal = createValueUnlessSelfReferential( 833 PointeeType, Visited, Depth, CreatedValuesCount); 834 Visited.erase(PointeeType.getCanonicalType()); 835 836 if (PointeeVal != nullptr) 837 setValue(PointeeLoc, *PointeeVal); 838 } 839 840 if (Type->isReferenceType()) 841 return &arena().create<ReferenceValue>(PointeeLoc); 842 else 843 return &arena().create<PointerValue>(PointeeLoc); 844 } 845 846 if (Type->isRecordType()) { 847 CreatedValuesCount++; 848 llvm::DenseMap<const ValueDecl *, Value *> FieldValues; 849 for (const FieldDecl *Field : DACtx->getReferencedFields(Type)) { 850 assert(Field != nullptr); 851 852 QualType FieldType = Field->getType(); 853 if (Visited.contains(FieldType.getCanonicalType())) 854 continue; 855 856 Visited.insert(FieldType.getCanonicalType()); 857 if (auto *FieldValue = createValueUnlessSelfReferential( 858 FieldType, Visited, Depth + 1, CreatedValuesCount)) 859 FieldValues.insert({Field, FieldValue}); 860 Visited.erase(FieldType.getCanonicalType()); 861 } 862 863 return &arena().create<StructValue>(std::move(FieldValues)); 864 } 865 866 return nullptr; 867 } 868 869 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { 870 switch (SP) { 871 case SkipPast::None: 872 return Loc; 873 case SkipPast::Reference: 874 // References cannot be chained so we only need to skip past one level of 875 // indirection. 876 if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc))) 877 return Val->getReferentLoc(); 878 return Loc; 879 } 880 llvm_unreachable("bad SkipPast kind"); 881 } 882 883 const StorageLocation &Environment::skip(const StorageLocation &Loc, 884 SkipPast SP) const { 885 return skip(*const_cast<StorageLocation *>(&Loc), SP); 886 } 887 888 void Environment::addToFlowCondition(const Formula &Val) { 889 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 890 } 891 void Environment::addToFlowCondition(BoolValue &Val) { 892 addToFlowCondition(Val.formula()); 893 } 894 895 bool Environment::flowConditionImplies(const Formula &Val) const { 896 return DACtx->flowConditionImplies(FlowConditionToken, Val); 897 } 898 bool Environment::flowConditionImplies(BoolValue &Val) const { 899 return flowConditionImplies(Val.formula()); 900 } 901 902 void Environment::dump(raw_ostream &OS) const { 903 // FIXME: add printing for remaining fields and allow caller to decide what 904 // fields are printed. 905 OS << "DeclToLoc:\n"; 906 for (auto [D, L] : DeclToLoc) 907 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 908 909 OS << "ExprToLoc:\n"; 910 for (auto [E, L] : ExprToLoc) 911 OS << " [" << E << ", " << L << "]\n"; 912 913 OS << "LocToVal:\n"; 914 for (auto [L, V] : LocToVal) { 915 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 916 } 917 918 OS << "FlowConditionToken:\n"; 919 DACtx->dumpFlowCondition(FlowConditionToken, OS); 920 } 921 922 void Environment::dump() const { 923 dump(llvm::dbgs()); 924 } 925 926 AggregateStorageLocation * 927 getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 928 const Environment &Env) { 929 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 930 if (ImplicitObject == nullptr) 931 return nullptr; 932 StorageLocation *Loc = 933 Env.getStorageLocation(*ImplicitObject, SkipPast::Reference); 934 if (Loc == nullptr) 935 return nullptr; 936 if (ImplicitObject->getType()->isPointerType()) { 937 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) 938 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 939 return nullptr; 940 } 941 return cast<AggregateStorageLocation>(Loc); 942 } 943 944 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 945 const Environment &Env) { 946 Expr *Base = ME.getBase(); 947 if (Base == nullptr) 948 return nullptr; 949 StorageLocation *Loc = Env.getStorageLocation(*Base, SkipPast::Reference); 950 if (Loc == nullptr) 951 return nullptr; 952 if (ME.isArrow()) { 953 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) 954 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 955 return nullptr; 956 } 957 return cast<AggregateStorageLocation>(Loc); 958 } 959 960 } // namespace dataflow 961 } // namespace clang 962