1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include <cassert> 28 #include <memory> 29 #include <utility> 30 31 namespace clang { 32 namespace dataflow { 33 34 // FIXME: convert these to parameters of the analysis or environment. Current 35 // settings have been experimentaly validated, but only for a particular 36 // analysis. 37 static constexpr int MaxCompositeValueDepth = 3; 38 static constexpr int MaxCompositeValueSize = 1000; 39 40 /// Returns a map consisting of key-value entries that are present in both maps. 41 template <typename K, typename V> 42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 43 const llvm::DenseMap<K, V> &Map2) { 44 llvm::DenseMap<K, V> Result; 45 for (auto &Entry : Map1) { 46 auto It = Map2.find(Entry.first); 47 if (It != Map2.end() && Entry.second == It->second) 48 Result.insert({Entry.first, Entry.second}); 49 } 50 return Result; 51 } 52 53 static bool compareDistinctValues(QualType Type, Value &Val1, 54 const Environment &Env1, Value &Val2, 55 const Environment &Env2, 56 Environment::ValueModel &Model) { 57 // Note: Potentially costly, but, for booleans, we could check whether both 58 // can be proven equivalent in their respective environments. 59 60 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 61 // and implement separate, join/widen specific handling for 62 // reference/pointers. 63 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 64 case ComparisonResult::Same: 65 return true; 66 case ComparisonResult::Different: 67 return false; 68 case ComparisonResult::Unknown: 69 switch (Val1.getKind()) { 70 case Value::Kind::Integer: 71 case Value::Kind::Pointer: 72 case Value::Kind::Struct: 73 // FIXME: this choice intentionally introduces unsoundness to allow 74 // for convergence. Once we have widening support for the 75 // reference/pointer and struct built-in models, this should be 76 // `false`. 77 return true; 78 default: 79 return false; 80 } 81 } 82 llvm_unreachable("All cases covered in switch"); 83 } 84 85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 86 /// respectively, of the same type `Type`. Merging generally produces a single 87 /// value that (soundly) approximates the two inputs, although the actual 88 /// meaning depends on `Model`. 89 static Value *mergeDistinctValues(QualType Type, Value &Val1, 90 const Environment &Env1, Value &Val2, 91 const Environment &Env2, 92 Environment &MergedEnv, 93 Environment::ValueModel &Model) { 94 // Join distinct boolean values preserving information about the constraints 95 // in the respective path conditions. 96 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 97 // FIXME: Checking both values should be unnecessary, since they should have 98 // a consistent shape. However, right now we can end up with BoolValue's in 99 // integer-typed variables due to our incorrect handling of 100 // boolean-to-integer casts (we just propagate the BoolValue to the result 101 // of the cast). So, a join can encounter an integer in one branch but a 102 // bool in the other. 103 // For example: 104 // ``` 105 // std::optional<bool> o; 106 // int x; 107 // if (o.has_value()) 108 // x = o.value(); 109 // ``` 110 auto &Expr1 = cast<BoolValue>(Val1).formula(); 111 auto &Expr2 = cast<BoolValue>(Val2).formula(); 112 auto &A = MergedEnv.arena(); 113 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 114 MergedEnv.addToFlowCondition( 115 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 116 A.makeEquals(MergedVal, Expr1)), 117 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 118 A.makeEquals(MergedVal, Expr2)))); 119 return &A.makeBoolValue(MergedVal); 120 } 121 122 Value *MergedVal = nullptr; 123 if (auto *StructVal1 = dyn_cast<StructValue>(&Val1)) { 124 [[maybe_unused]] auto *StructVal2 = cast<StructValue>(&Val2); 125 126 // Values to be merged are always associated with the same location in 127 // `LocToVal`. The location stored in `StructVal` should therefore also 128 // be the same. 129 assert(&StructVal1->getAggregateLoc() == &StructVal2->getAggregateLoc()); 130 131 // `StructVal1` and `StructVal2` may have different properties associated 132 // with them. Create a new `StructValue` without any properties so that we 133 // soundly approximate both values. If a particular analysis needs to merge 134 // properties, it should do so in `DataflowAnalysis::merge()`. 135 MergedVal = &MergedEnv.create<StructValue>(StructVal1->getAggregateLoc()); 136 } else { 137 MergedVal = MergedEnv.createValue(Type); 138 } 139 140 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 141 // returns false to avoid storing unneeded values in `DACtx`. 142 if (MergedVal) 143 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 144 return MergedVal; 145 146 return nullptr; 147 } 148 149 // When widening does not change `Current`, return value will equal `&Prev`. 150 static Value &widenDistinctValues(QualType Type, Value &Prev, 151 const Environment &PrevEnv, Value &Current, 152 Environment &CurrentEnv, 153 Environment::ValueModel &Model) { 154 // Boolean-model widening. 155 if (isa<BoolValue>(&Prev)) { 156 assert(isa<BoolValue>(Current)); 157 // Widen to Top, because we know they are different values. If previous was 158 // already Top, re-use that to (implicitly) indicate that no change occured. 159 if (isa<TopBoolValue>(Prev)) 160 return Prev; 161 return CurrentEnv.makeTopBoolValue(); 162 } 163 164 // FIXME: Add other built-in model widening. 165 166 // Custom-model widening. 167 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 168 return *W; 169 170 // Default of widening is a no-op: leave the current value unchanged. 171 return Current; 172 } 173 174 /// Initializes a global storage value. 175 static void insertIfGlobal(const Decl &D, 176 llvm::DenseSet<const VarDecl *> &Vars) { 177 if (auto *V = dyn_cast<VarDecl>(&D)) 178 if (V->hasGlobalStorage()) 179 Vars.insert(V); 180 } 181 182 static void insertIfFunction(const Decl &D, 183 llvm::DenseSet<const FunctionDecl *> &Funcs) { 184 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 185 Funcs.insert(FD); 186 } 187 188 static void 189 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 190 llvm::DenseSet<const VarDecl *> &Vars, 191 llvm::DenseSet<const FunctionDecl *> &Funcs) { 192 insertIfGlobal(D, Vars); 193 insertIfFunction(D, Funcs); 194 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 195 for (const auto *B : Decomp->bindings()) 196 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 197 // FIXME: should we be using `E->getFoundDecl()`? 198 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 199 Fields.insert(FD); 200 } 201 202 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 203 /// global variables and functions that are declared in or referenced from 204 /// sub-statements. 205 static void 206 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 207 llvm::DenseSet<const VarDecl *> &Vars, 208 llvm::DenseSet<const FunctionDecl *> &Funcs) { 209 for (auto *Child : S.children()) 210 if (Child != nullptr) 211 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 212 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 213 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 214 215 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 216 if (DS->isSingleDecl()) 217 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 218 else 219 for (auto *D : DS->getDeclGroup()) 220 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 221 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 222 insertIfGlobal(*E->getDecl(), Vars); 223 insertIfFunction(*E->getDecl(), Funcs); 224 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 225 // FIXME: should we be using `E->getFoundDecl()`? 226 const ValueDecl *VD = E->getMemberDecl(); 227 insertIfGlobal(*VD, Vars); 228 insertIfFunction(*VD, Funcs); 229 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 230 Fields.insert(FD); 231 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 232 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 233 for (const auto *FD : getFieldsForInitListExpr(RD)) 234 Fields.insert(FD); 235 } 236 } 237 238 // FIXME: Add support for resetting globals after function calls to enable 239 // the implementation of sound analyses. 240 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 241 assert(FuncDecl->getBody() != nullptr); 242 243 FieldSet Fields; 244 llvm::DenseSet<const VarDecl *> Vars; 245 llvm::DenseSet<const FunctionDecl *> Funcs; 246 247 // Look for global variable and field references in the 248 // constructor-initializers. 249 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 250 for (const auto *Init : CtorDecl->inits()) { 251 if (Init->isMemberInitializer()) { 252 Fields.insert(Init->getMember()); 253 } else if (Init->isIndirectMemberInitializer()) { 254 for (const auto *I : Init->getIndirectMember()->chain()) 255 Fields.insert(cast<FieldDecl>(I)); 256 } 257 const Expr *E = Init->getInit(); 258 assert(E != nullptr); 259 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 260 } 261 // Add all fields mentioned in default member initializers. 262 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 263 if (const auto *I = F->getInClassInitializer()) 264 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 265 } 266 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 267 268 // These have to be added before the lines that follow to ensure that 269 // `create*` work correctly for structs. 270 DACtx->addModeledFields(Fields); 271 272 for (const VarDecl *D : Vars) { 273 if (getStorageLocation(*D) != nullptr) 274 continue; 275 276 setStorageLocation(*D, createObject(*D)); 277 } 278 279 for (const FunctionDecl *FD : Funcs) { 280 if (getStorageLocation(*FD) != nullptr) 281 continue; 282 auto &Loc = createStorageLocation(FD->getType()); 283 setStorageLocation(*FD, Loc); 284 } 285 } 286 287 Environment::Environment(DataflowAnalysisContext &DACtx) 288 : DACtx(&DACtx), 289 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 290 291 Environment Environment::fork() const { 292 Environment Copy(*this); 293 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 294 return Copy; 295 } 296 297 Environment::Environment(DataflowAnalysisContext &DACtx, 298 const DeclContext &DeclCtx) 299 : Environment(DACtx) { 300 CallStack.push_back(&DeclCtx); 301 302 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 303 assert(FuncDecl->getBody() != nullptr); 304 305 initFieldsGlobalsAndFuncs(FuncDecl); 306 307 for (const auto *ParamDecl : FuncDecl->parameters()) { 308 assert(ParamDecl != nullptr); 309 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 310 } 311 } 312 313 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 314 auto *Parent = MethodDecl->getParent(); 315 assert(Parent != nullptr); 316 if (Parent->isLambda()) 317 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 318 319 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 320 if (MethodDecl && !MethodDecl->isStatic()) { 321 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 322 ThisPointeeLoc = 323 &cast<StructValue>(createValue(ThisPointeeType))->getAggregateLoc(); 324 } 325 } 326 } 327 328 bool Environment::canDescend(unsigned MaxDepth, 329 const DeclContext *Callee) const { 330 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 331 } 332 333 Environment Environment::pushCall(const CallExpr *Call) const { 334 Environment Env(*this); 335 336 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 337 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 338 if (!isa<CXXThisExpr>(Arg)) 339 Env.ThisPointeeLoc = 340 cast<AggregateStorageLocation>(getStorageLocationStrict(*Arg)); 341 // Otherwise (when the argument is `this`), retain the current 342 // environment's `ThisPointeeLoc`. 343 } 344 } 345 346 Env.pushCallInternal(Call->getDirectCallee(), 347 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 348 349 return Env; 350 } 351 352 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 353 Environment Env(*this); 354 355 Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); 356 357 Env.pushCallInternal(Call->getConstructor(), 358 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 359 360 return Env; 361 } 362 363 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 364 ArrayRef<const Expr *> Args) { 365 // Canonicalize to the definition of the function. This ensures that we're 366 // putting arguments into the same `ParamVarDecl`s` that the callee will later 367 // be retrieving them from. 368 assert(FuncDecl->getDefinition() != nullptr); 369 FuncDecl = FuncDecl->getDefinition(); 370 371 CallStack.push_back(FuncDecl); 372 373 initFieldsGlobalsAndFuncs(FuncDecl); 374 375 const auto *ParamIt = FuncDecl->param_begin(); 376 377 // FIXME: Parameters don't always map to arguments 1:1; examples include 378 // overloaded operators implemented as member functions, and parameter packs. 379 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 380 assert(ParamIt != FuncDecl->param_end()); 381 const VarDecl *Param = *ParamIt; 382 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 383 } 384 } 385 386 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 387 // We ignore `DACtx` because it's already the same in both. We don't want the 388 // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't 389 // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later 390 // analyze the same callee in a different context, and `setStorageLocation` 391 // requires there to not already be a storage location assigned. Conceptually, 392 // these maps capture information from the local scope, so when popping that 393 // scope, we do not propagate the maps. 394 this->LocToVal = std::move(CalleeEnv.LocToVal); 395 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 396 397 if (Call->isGLValue()) { 398 if (CalleeEnv.ReturnLoc != nullptr) 399 setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc); 400 } else if (!Call->getType()->isVoidType()) { 401 if (CalleeEnv.ReturnVal != nullptr) 402 setValueStrict(*Call, *CalleeEnv.ReturnVal); 403 } 404 } 405 406 void Environment::popCall(const CXXConstructExpr *Call, 407 const Environment &CalleeEnv) { 408 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 409 this->LocToVal = std::move(CalleeEnv.LocToVal); 410 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 411 412 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 413 setValueStrict(*Call, *Val); 414 } 415 } 416 417 bool Environment::equivalentTo(const Environment &Other, 418 Environment::ValueModel &Model) const { 419 assert(DACtx == Other.DACtx); 420 421 if (ReturnVal != Other.ReturnVal) 422 return false; 423 424 if (ReturnLoc != Other.ReturnLoc) 425 return false; 426 427 if (ThisPointeeLoc != Other.ThisPointeeLoc) 428 return false; 429 430 if (DeclToLoc != Other.DeclToLoc) 431 return false; 432 433 if (ExprToLoc != Other.ExprToLoc) 434 return false; 435 436 // Compare the contents for the intersection of their domains. 437 for (auto &Entry : LocToVal) { 438 const StorageLocation *Loc = Entry.first; 439 assert(Loc != nullptr); 440 441 Value *Val = Entry.second; 442 assert(Val != nullptr); 443 444 auto It = Other.LocToVal.find(Loc); 445 if (It == Other.LocToVal.end()) 446 continue; 447 assert(It->second != nullptr); 448 449 if (!areEquivalentValues(*Val, *It->second) && 450 !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other, 451 Model)) 452 return false; 453 } 454 455 return true; 456 } 457 458 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 459 Environment::ValueModel &Model) { 460 assert(DACtx == PrevEnv.DACtx); 461 assert(ReturnVal == PrevEnv.ReturnVal); 462 assert(ReturnLoc == PrevEnv.ReturnLoc); 463 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 464 assert(CallStack == PrevEnv.CallStack); 465 466 auto Effect = LatticeJoinEffect::Unchanged; 467 468 // By the API, `PrevEnv` is a previous version of the environment for the same 469 // block, so we have some guarantees about its shape. In particular, it will 470 // be the result of a join or widen operation on previous values for this 471 // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are 472 // subsets of the maps in `PrevEnv`. So, as long as we maintain this property 473 // here, we don't need change their current values to widen. 474 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 475 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 476 477 llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal; 478 for (auto &Entry : LocToVal) { 479 const StorageLocation *Loc = Entry.first; 480 assert(Loc != nullptr); 481 482 Value *Val = Entry.second; 483 assert(Val != nullptr); 484 485 auto PrevIt = PrevEnv.LocToVal.find(Loc); 486 if (PrevIt == PrevEnv.LocToVal.end()) 487 continue; 488 assert(PrevIt->second != nullptr); 489 490 if (areEquivalentValues(*Val, *PrevIt->second)) { 491 WidenedLocToVal.insert({Loc, Val}); 492 continue; 493 } 494 495 Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second, 496 PrevEnv, *Val, *this, Model); 497 WidenedLocToVal.insert({Loc, &WidenedVal}); 498 if (&WidenedVal != PrevIt->second) 499 Effect = LatticeJoinEffect::Changed; 500 } 501 LocToVal = std::move(WidenedLocToVal); 502 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 503 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 504 LocToVal.size() != PrevEnv.LocToVal.size()) 505 Effect = LatticeJoinEffect::Changed; 506 507 return Effect; 508 } 509 510 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 511 Environment::ValueModel &Model) { 512 assert(EnvA.DACtx == EnvB.DACtx); 513 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 514 assert(EnvA.CallStack == EnvB.CallStack); 515 516 Environment JoinedEnv(*EnvA.DACtx); 517 518 JoinedEnv.CallStack = EnvA.CallStack; 519 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 520 521 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 522 // `ReturnVal` might not always get set -- for example if we have a return 523 // statement of the form `return some_other_func()` and we decide not to 524 // analyze `some_other_func()`. 525 // In this case, we can't say anything about the joined return value -- we 526 // don't simply want to propagate the return value that we do have, because 527 // it might not be the correct one. 528 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 529 JoinedEnv.ReturnVal = nullptr; 530 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 531 JoinedEnv.ReturnVal = EnvA.ReturnVal; 532 } else { 533 assert(!EnvA.CallStack.empty()); 534 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 535 // cast. 536 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 537 assert(Func != nullptr); 538 if (Value *MergedVal = 539 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 540 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 541 JoinedEnv.ReturnVal = MergedVal; 542 } 543 544 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 545 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 546 else 547 JoinedEnv.ReturnLoc = nullptr; 548 549 // FIXME: Once we're able to remove declarations from `DeclToLoc` when their 550 // lifetime ends, add an assertion that there aren't any entries in 551 // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to 552 // different storage locations. 553 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 554 555 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 556 557 // FIXME: update join to detect backedges and simplify the flow condition 558 // accordingly. 559 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 560 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 561 562 for (auto &Entry : EnvA.LocToVal) { 563 const StorageLocation *Loc = Entry.first; 564 assert(Loc != nullptr); 565 566 Value *Val = Entry.second; 567 assert(Val != nullptr); 568 569 auto It = EnvB.LocToVal.find(Loc); 570 if (It == EnvB.LocToVal.end()) 571 continue; 572 assert(It->second != nullptr); 573 574 if (areEquivalentValues(*Val, *It->second)) { 575 JoinedEnv.LocToVal.insert({Loc, Val}); 576 continue; 577 } 578 579 if (Value *MergedVal = mergeDistinctValues( 580 Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) { 581 JoinedEnv.LocToVal.insert({Loc, MergedVal}); 582 } 583 } 584 585 return JoinedEnv; 586 } 587 588 StorageLocation &Environment::createStorageLocation(QualType Type) { 589 return DACtx->createStorageLocation(Type); 590 } 591 592 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 593 // Evaluated declarations are always assigned the same storage locations to 594 // ensure that the environment stabilizes across loop iterations. Storage 595 // locations for evaluated declarations are stored in the analysis context. 596 return DACtx->getStableStorageLocation(D); 597 } 598 599 StorageLocation &Environment::createStorageLocation(const Expr &E) { 600 // Evaluated expressions are always assigned the same storage locations to 601 // ensure that the environment stabilizes across loop iterations. Storage 602 // locations for evaluated expressions are stored in the analysis context. 603 return DACtx->getStableStorageLocation(E); 604 } 605 606 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 607 assert(!DeclToLoc.contains(&D)); 608 DeclToLoc[&D] = &Loc; 609 } 610 611 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 612 auto It = DeclToLoc.find(&D); 613 if (It == DeclToLoc.end()) 614 return nullptr; 615 616 StorageLocation *Loc = It->second; 617 618 return Loc; 619 } 620 621 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 622 const Expr &CanonE = ignoreCFGOmittedNodes(E); 623 assert(!ExprToLoc.contains(&CanonE)); 624 ExprToLoc[&CanonE] = &Loc; 625 } 626 627 void Environment::setStorageLocationStrict(const Expr &E, 628 StorageLocation &Loc) { 629 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 630 // but we still want to be able to associate a `StorageLocation` with them, 631 // so allow these as an exception. 632 assert(E.isGLValue() || 633 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 634 setStorageLocation(E, Loc); 635 } 636 637 StorageLocation *Environment::getStorageLocation(const Expr &E, 638 SkipPast SP) const { 639 // FIXME: Add a test with parens. 640 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 641 return It == ExprToLoc.end() ? nullptr : &*It->second; 642 } 643 644 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const { 645 // See comment in `setStorageLocationStrict()`. 646 assert(E.isGLValue() || 647 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 648 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 649 650 if (Loc == nullptr) 651 return nullptr; 652 653 return Loc; 654 } 655 656 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const { 657 return ThisPointeeLoc; 658 } 659 660 AggregateStorageLocation & 661 Environment::getResultObjectLocation(const Expr &RecordPRValue) { 662 assert(RecordPRValue.getType()->isRecordType()); 663 assert(RecordPRValue.isPRValue()); 664 665 if (StorageLocation *ExistingLoc = 666 getStorageLocation(RecordPRValue, SkipPast::None)) 667 return *cast<AggregateStorageLocation>(ExistingLoc); 668 auto &Loc = cast<AggregateStorageLocation>( 669 DACtx->getStableStorageLocation(RecordPRValue)); 670 setStorageLocation(RecordPRValue, Loc); 671 return Loc; 672 } 673 674 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 675 return DACtx->getOrCreateNullPointerValue(PointeeType); 676 } 677 678 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 679 assert(!isa<StructValue>(&Val) || 680 &cast<StructValue>(&Val)->getAggregateLoc() == &Loc); 681 682 LocToVal[&Loc] = &Val; 683 } 684 685 void Environment::setValueStrict(const Expr &E, Value &Val) { 686 assert(E.isPRValue()); 687 688 if (auto *StructVal = dyn_cast<StructValue>(&Val)) { 689 if ([[maybe_unused]] auto *ExistingVal = 690 cast_or_null<StructValue>(getValue(E))) 691 assert(&ExistingVal->getAggregateLoc() == &StructVal->getAggregateLoc()); 692 if ([[maybe_unused]] StorageLocation *ExistingLoc = 693 getStorageLocation(E, SkipPast::None)) 694 assert(ExistingLoc == &StructVal->getAggregateLoc()); 695 else 696 setStorageLocation(E, StructVal->getAggregateLoc()); 697 setValue(StructVal->getAggregateLoc(), Val); 698 return; 699 } 700 701 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 702 if (Loc == nullptr) { 703 Loc = &createStorageLocation(E); 704 setStorageLocation(E, *Loc); 705 } 706 setValue(*Loc, Val); 707 } 708 709 Value *Environment::getValue(const StorageLocation &Loc) const { 710 return LocToVal.lookup(&Loc); 711 } 712 713 Value *Environment::getValue(const ValueDecl &D) const { 714 auto *Loc = getStorageLocation(D); 715 if (Loc == nullptr) 716 return nullptr; 717 return getValue(*Loc); 718 } 719 720 Value *Environment::getValue(const Expr &E, SkipPast SP) const { 721 auto *Loc = getStorageLocation(E, SP); 722 if (Loc == nullptr) 723 return nullptr; 724 return getValue(*Loc); 725 } 726 727 Value *Environment::getValueStrict(const Expr &E) const { 728 assert(E.isPRValue()); 729 return getValue(E); 730 } 731 732 Value *Environment::createValue(QualType Type) { 733 llvm::DenseSet<QualType> Visited; 734 int CreatedValuesCount = 0; 735 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 736 CreatedValuesCount); 737 if (CreatedValuesCount > MaxCompositeValueSize) { 738 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 739 << '\n'; 740 } 741 return Val; 742 } 743 744 Value *Environment::createValueUnlessSelfReferential( 745 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 746 int &CreatedValuesCount) { 747 assert(!Type.isNull()); 748 assert(!Type->isReferenceType()); 749 750 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 751 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 752 Depth > MaxCompositeValueDepth) 753 return nullptr; 754 755 if (Type->isBooleanType()) { 756 CreatedValuesCount++; 757 return &makeAtomicBoolValue(); 758 } 759 760 if (Type->isIntegerType()) { 761 // FIXME: consider instead `return nullptr`, given that we do nothing useful 762 // with integers, and so distinguishing them serves no purpose, but could 763 // prevent convergence. 764 CreatedValuesCount++; 765 return &arena().create<IntegerValue>(); 766 } 767 768 if (Type->isPointerType()) { 769 CreatedValuesCount++; 770 QualType PointeeType = Type->getPointeeType(); 771 StorageLocation &PointeeLoc = 772 createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); 773 774 return &arena().create<PointerValue>(PointeeLoc); 775 } 776 777 if (Type->isRecordType()) { 778 CreatedValuesCount++; 779 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 780 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 781 assert(Field != nullptr); 782 783 QualType FieldType = Field->getType(); 784 785 FieldLocs.insert( 786 {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, 787 CreatedValuesCount)}); 788 } 789 790 AggregateStorageLocation &Loc = 791 arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs)); 792 StructValue &StructVal = create<StructValue>(Loc); 793 794 // As we already have a storage location for the `StructValue`, we can and 795 // should associate them in the environment. 796 setValue(Loc, StructVal); 797 798 return &StructVal; 799 } 800 801 return nullptr; 802 } 803 804 StorageLocation & 805 Environment::createLocAndMaybeValue(QualType Ty, 806 llvm::DenseSet<QualType> &Visited, 807 int Depth, int &CreatedValuesCount) { 808 if (!Visited.insert(Ty.getCanonicalType()).second) 809 return createStorageLocation(Ty.getNonReferenceType()); 810 Value *Val = createValueUnlessSelfReferential( 811 Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); 812 Visited.erase(Ty.getCanonicalType()); 813 814 Ty = Ty.getNonReferenceType(); 815 816 if (Val == nullptr) 817 return createStorageLocation(Ty); 818 819 if (Ty->isRecordType()) 820 return cast<StructValue>(Val)->getAggregateLoc(); 821 822 StorageLocation &Loc = createStorageLocation(Ty); 823 setValue(Loc, *Val); 824 return Loc; 825 } 826 827 StorageLocation &Environment::createObjectInternal(const VarDecl *D, 828 QualType Ty, 829 const Expr *InitExpr) { 830 if (Ty->isReferenceType()) { 831 // Although variables of reference type always need to be initialized, it 832 // can happen that we can't see the initializer, so `InitExpr` may still 833 // be null. 834 if (InitExpr) { 835 if (auto *InitExprLoc = getStorageLocationStrict(*InitExpr)) 836 return *InitExprLoc; 837 } 838 839 // Even though we have an initializer, we might not get an 840 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 841 // don't have a function body. In this case, we just invent a storage 842 // location and value -- it's the best we can do. 843 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 844 } 845 846 Value *Val = nullptr; 847 if (InitExpr) 848 // In the (few) cases where an expression is intentionally 849 // "uninterpreted", `InitExpr` is not associated with a value. There are 850 // two ways to handle this situation: propagate the status, so that 851 // uninterpreted initializers result in uninterpreted variables, or 852 // provide a default value. We choose the latter so that later refinements 853 // of the variable can be used for reasoning about the surrounding code. 854 // For this reason, we let this case be handled by the `createValue()` 855 // call below. 856 // 857 // FIXME. If and when we interpret all language cases, change this to 858 // assert that `InitExpr` is interpreted, rather than supplying a 859 // default value (assuming we don't update the environment API to return 860 // references). 861 Val = getValue(*InitExpr); 862 if (!Val) 863 Val = createValue(Ty); 864 865 if (Ty->isRecordType()) 866 return cast<StructValue>(Val)->getAggregateLoc(); 867 868 StorageLocation &Loc = 869 D ? createStorageLocation(*D) : createStorageLocation(Ty); 870 871 if (Val) 872 setValue(Loc, *Val); 873 874 return Loc; 875 } 876 877 void Environment::addToFlowCondition(const Formula &Val) { 878 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 879 } 880 881 bool Environment::flowConditionImplies(const Formula &Val) const { 882 return DACtx->flowConditionImplies(FlowConditionToken, Val); 883 } 884 885 void Environment::dump(raw_ostream &OS) const { 886 // FIXME: add printing for remaining fields and allow caller to decide what 887 // fields are printed. 888 OS << "DeclToLoc:\n"; 889 for (auto [D, L] : DeclToLoc) 890 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 891 892 OS << "ExprToLoc:\n"; 893 for (auto [E, L] : ExprToLoc) 894 OS << " [" << E << ", " << L << "]\n"; 895 896 OS << "LocToVal:\n"; 897 for (auto [L, V] : LocToVal) { 898 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 899 } 900 901 OS << "FlowConditionToken:\n"; 902 DACtx->dumpFlowCondition(FlowConditionToken, OS); 903 } 904 905 void Environment::dump() const { 906 dump(llvm::dbgs()); 907 } 908 909 AggregateStorageLocation * 910 getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 911 const Environment &Env) { 912 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 913 if (ImplicitObject == nullptr) 914 return nullptr; 915 if (ImplicitObject->getType()->isPointerType()) { 916 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject))) 917 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 918 return nullptr; 919 } 920 return cast_or_null<AggregateStorageLocation>( 921 Env.getStorageLocationStrict(*ImplicitObject)); 922 } 923 924 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 925 const Environment &Env) { 926 Expr *Base = ME.getBase(); 927 if (Base == nullptr) 928 return nullptr; 929 if (ME.isArrow()) { 930 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base))) 931 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 932 return nullptr; 933 } 934 return cast_or_null<AggregateStorageLocation>( 935 Env.getStorageLocationStrict(*Base)); 936 } 937 938 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 939 // Unnamed bitfields are only used for padding and do not appear in 940 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 941 // field list, and we thus need to remove them before mapping inits to 942 // fields to avoid mapping inits to the wrongs fields. 943 std::vector<FieldDecl *> Fields; 944 llvm::copy_if( 945 RD->fields(), std::back_inserter(Fields), 946 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 947 return Fields; 948 } 949 950 StructValue &refreshStructValue(AggregateStorageLocation &Loc, 951 Environment &Env) { 952 auto &NewVal = Env.create<StructValue>(Loc); 953 Env.setValue(Loc, NewVal); 954 return NewVal; 955 } 956 957 StructValue &refreshStructValue(const Expr &Expr, Environment &Env) { 958 assert(Expr.getType()->isRecordType()); 959 960 if (Expr.isPRValue()) { 961 if (auto *ExistingVal = cast_or_null<StructValue>(Env.getValue(Expr))) { 962 auto &NewVal = Env.create<StructValue>(ExistingVal->getAggregateLoc()); 963 Env.setValueStrict(Expr, NewVal); 964 return NewVal; 965 } 966 967 auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType())); 968 Env.setValueStrict(Expr, NewVal); 969 return NewVal; 970 } 971 972 if (auto *Loc = cast_or_null<AggregateStorageLocation>( 973 Env.getStorageLocationStrict(Expr))) { 974 auto &NewVal = Env.create<StructValue>(*Loc); 975 Env.setValue(*Loc, NewVal); 976 return NewVal; 977 } 978 979 auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType())); 980 Env.setStorageLocationStrict(Expr, NewVal.getAggregateLoc()); 981 return NewVal; 982 } 983 984 } // namespace dataflow 985 } // namespace clang 986