1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include <cassert> 28 #include <memory> 29 #include <utility> 30 31 namespace clang { 32 namespace dataflow { 33 34 // FIXME: convert these to parameters of the analysis or environment. Current 35 // settings have been experimentaly validated, but only for a particular 36 // analysis. 37 static constexpr int MaxCompositeValueDepth = 3; 38 static constexpr int MaxCompositeValueSize = 1000; 39 40 /// Returns a map consisting of key-value entries that are present in both maps. 41 template <typename K, typename V> 42 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 43 const llvm::DenseMap<K, V> &Map2) { 44 llvm::DenseMap<K, V> Result; 45 for (auto &Entry : Map1) { 46 auto It = Map2.find(Entry.first); 47 if (It != Map2.end() && Entry.second == It->second) 48 Result.insert({Entry.first, Entry.second}); 49 } 50 return Result; 51 } 52 53 static bool compareDistinctValues(QualType Type, Value &Val1, 54 const Environment &Env1, Value &Val2, 55 const Environment &Env2, 56 Environment::ValueModel &Model) { 57 // Note: Potentially costly, but, for booleans, we could check whether both 58 // can be proven equivalent in their respective environments. 59 60 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 61 // and implement separate, join/widen specific handling for 62 // reference/pointers. 63 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 64 case ComparisonResult::Same: 65 return true; 66 case ComparisonResult::Different: 67 return false; 68 case ComparisonResult::Unknown: 69 switch (Val1.getKind()) { 70 case Value::Kind::Integer: 71 case Value::Kind::Reference: 72 case Value::Kind::Pointer: 73 case Value::Kind::Struct: 74 // FIXME: this choice intentionally introduces unsoundness to allow 75 // for convergence. Once we have widening support for the 76 // reference/pointer and struct built-in models, this should be 77 // `false`. 78 return true; 79 default: 80 return false; 81 } 82 } 83 llvm_unreachable("All cases covered in switch"); 84 } 85 86 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 87 /// respectively, of the same type `Type`. Merging generally produces a single 88 /// value that (soundly) approximates the two inputs, although the actual 89 /// meaning depends on `Model`. 90 static Value *mergeDistinctValues(QualType Type, Value &Val1, 91 const Environment &Env1, Value &Val2, 92 const Environment &Env2, 93 Environment &MergedEnv, 94 Environment::ValueModel &Model) { 95 // Join distinct boolean values preserving information about the constraints 96 // in the respective path conditions. 97 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 98 // FIXME: Checking both values should be unnecessary, since they should have 99 // a consistent shape. However, right now we can end up with BoolValue's in 100 // integer-typed variables due to our incorrect handling of 101 // boolean-to-integer casts (we just propagate the BoolValue to the result 102 // of the cast). So, a join can encounter an integer in one branch but a 103 // bool in the other. 104 // For example: 105 // ``` 106 // std::optional<bool> o; 107 // int x; 108 // if (o.has_value()) 109 // x = o.value(); 110 // ``` 111 auto &Expr1 = cast<BoolValue>(Val1).formula(); 112 auto &Expr2 = cast<BoolValue>(Val2).formula(); 113 auto &A = MergedEnv.arena(); 114 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 115 MergedEnv.addToFlowCondition( 116 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 117 A.makeEquals(MergedVal, Expr1)), 118 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 119 A.makeEquals(MergedVal, Expr2)))); 120 return &A.makeBoolValue(MergedVal); 121 } 122 123 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 124 // returns false to avoid storing unneeded values in `DACtx`. 125 // FIXME: Creating the value based on the type alone creates misshapen values 126 // for lvalues, since the type does not reflect the need for `ReferenceValue`. 127 // This issue will be resolved when `ReferenceValue` is eliminated as part 128 // of the ongoing migration to strict handling of value categories (see 129 // https://discourse.llvm.org/t/70086 for details). 130 if (Value *MergedVal = MergedEnv.createValue(Type)) 131 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 132 return MergedVal; 133 134 return nullptr; 135 } 136 137 // When widening does not change `Current`, return value will equal `&Prev`. 138 static Value &widenDistinctValues(QualType Type, Value &Prev, 139 const Environment &PrevEnv, Value &Current, 140 Environment &CurrentEnv, 141 Environment::ValueModel &Model) { 142 // Boolean-model widening. 143 if (isa<BoolValue>(&Prev)) { 144 assert(isa<BoolValue>(Current)); 145 // Widen to Top, because we know they are different values. If previous was 146 // already Top, re-use that to (implicitly) indicate that no change occured. 147 if (isa<TopBoolValue>(Prev)) 148 return Prev; 149 return CurrentEnv.makeTopBoolValue(); 150 } 151 152 // FIXME: Add other built-in model widening. 153 154 // Custom-model widening. 155 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 156 return *W; 157 158 // Default of widening is a no-op: leave the current value unchanged. 159 return Current; 160 } 161 162 /// Initializes a global storage value. 163 static void insertIfGlobal(const Decl &D, 164 llvm::DenseSet<const VarDecl *> &Vars) { 165 if (auto *V = dyn_cast<VarDecl>(&D)) 166 if (V->hasGlobalStorage()) 167 Vars.insert(V); 168 } 169 170 static void insertIfFunction(const Decl &D, 171 llvm::DenseSet<const FunctionDecl *> &Funcs) { 172 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 173 Funcs.insert(FD); 174 } 175 176 static void 177 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 178 llvm::DenseSet<const VarDecl *> &Vars, 179 llvm::DenseSet<const FunctionDecl *> &Funcs) { 180 insertIfGlobal(D, Vars); 181 insertIfFunction(D, Funcs); 182 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 183 for (const auto *B : Decomp->bindings()) 184 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 185 // FIXME: should we be using `E->getFoundDecl()`? 186 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 187 Fields.insert(FD); 188 } 189 190 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 191 /// global variables and functions that are declared in or referenced from 192 /// sub-statements. 193 static void 194 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 195 llvm::DenseSet<const VarDecl *> &Vars, 196 llvm::DenseSet<const FunctionDecl *> &Funcs) { 197 for (auto *Child : S.children()) 198 if (Child != nullptr) 199 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 200 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 201 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 202 203 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 204 if (DS->isSingleDecl()) 205 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 206 else 207 for (auto *D : DS->getDeclGroup()) 208 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 209 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 210 insertIfGlobal(*E->getDecl(), Vars); 211 insertIfFunction(*E->getDecl(), Funcs); 212 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 213 // FIXME: should we be using `E->getFoundDecl()`? 214 const ValueDecl *VD = E->getMemberDecl(); 215 insertIfGlobal(*VD, Vars); 216 insertIfFunction(*VD, Funcs); 217 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 218 Fields.insert(FD); 219 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 220 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 221 for (const auto *FD : getFieldsForInitListExpr(RD)) 222 Fields.insert(FD); 223 } 224 } 225 226 // FIXME: Add support for resetting globals after function calls to enable 227 // the implementation of sound analyses. 228 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 229 assert(FuncDecl->getBody() != nullptr); 230 231 FieldSet Fields; 232 llvm::DenseSet<const VarDecl *> Vars; 233 llvm::DenseSet<const FunctionDecl *> Funcs; 234 235 // Look for global variable and field references in the 236 // constructor-initializers. 237 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 238 for (const auto *Init : CtorDecl->inits()) { 239 if (Init->isMemberInitializer()) { 240 Fields.insert(Init->getMember()); 241 } else if (Init->isIndirectMemberInitializer()) { 242 for (const auto *I : Init->getIndirectMember()->chain()) 243 Fields.insert(cast<FieldDecl>(I)); 244 } 245 const Expr *E = Init->getInit(); 246 assert(E != nullptr); 247 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 248 } 249 // Add all fields mentioned in default member initializers. 250 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 251 if (const auto *I = F->getInClassInitializer()) 252 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 253 } 254 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 255 256 // These have to be added before the lines that follow to ensure that 257 // `create*` work correctly for structs. 258 DACtx->addModeledFields(Fields); 259 260 for (const VarDecl *D : Vars) { 261 if (getStorageLocation(*D) != nullptr) 262 continue; 263 264 setStorageLocation(*D, createObject(*D)); 265 } 266 267 for (const FunctionDecl *FD : Funcs) { 268 if (getStorageLocation(*FD) != nullptr) 269 continue; 270 auto &Loc = createStorageLocation(FD->getType()); 271 setStorageLocation(*FD, Loc); 272 } 273 } 274 275 Environment::Environment(DataflowAnalysisContext &DACtx) 276 : DACtx(&DACtx), 277 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 278 279 Environment Environment::fork() const { 280 Environment Copy(*this); 281 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 282 return Copy; 283 } 284 285 Environment::Environment(DataflowAnalysisContext &DACtx, 286 const DeclContext &DeclCtx) 287 : Environment(DACtx) { 288 CallStack.push_back(&DeclCtx); 289 290 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 291 assert(FuncDecl->getBody() != nullptr); 292 293 initFieldsGlobalsAndFuncs(FuncDecl); 294 295 for (const auto *ParamDecl : FuncDecl->parameters()) { 296 assert(ParamDecl != nullptr); 297 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 298 } 299 } 300 301 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 302 auto *Parent = MethodDecl->getParent(); 303 assert(Parent != nullptr); 304 if (Parent->isLambda()) 305 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 306 307 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 308 if (MethodDecl && !MethodDecl->isStatic()) { 309 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 310 ThisPointeeLoc = &cast<AggregateStorageLocation>( 311 createStorageLocation(ThisPointeeType)); 312 if (Value *ThisPointeeVal = createValue(ThisPointeeType)) 313 setValue(*ThisPointeeLoc, *ThisPointeeVal); 314 } 315 } 316 } 317 318 bool Environment::canDescend(unsigned MaxDepth, 319 const DeclContext *Callee) const { 320 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 321 } 322 323 Environment Environment::pushCall(const CallExpr *Call) const { 324 Environment Env(*this); 325 326 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 327 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 328 if (!isa<CXXThisExpr>(Arg)) 329 Env.ThisPointeeLoc = cast<AggregateStorageLocation>( 330 getStorageLocation(*Arg, SkipPast::Reference)); 331 // Otherwise (when the argument is `this`), retain the current 332 // environment's `ThisPointeeLoc`. 333 } 334 } 335 336 Env.pushCallInternal(Call->getDirectCallee(), 337 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 338 339 return Env; 340 } 341 342 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 343 Environment Env(*this); 344 345 Env.ThisPointeeLoc = &cast<AggregateStorageLocation>( 346 Env.createStorageLocation(Call->getType())); 347 if (Value *Val = Env.createValue(Call->getType())) 348 Env.setValue(*Env.ThisPointeeLoc, *Val); 349 350 Env.pushCallInternal(Call->getConstructor(), 351 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 352 353 return Env; 354 } 355 356 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 357 ArrayRef<const Expr *> Args) { 358 // Canonicalize to the definition of the function. This ensures that we're 359 // putting arguments into the same `ParamVarDecl`s` that the callee will later 360 // be retrieving them from. 361 assert(FuncDecl->getDefinition() != nullptr); 362 FuncDecl = FuncDecl->getDefinition(); 363 364 CallStack.push_back(FuncDecl); 365 366 initFieldsGlobalsAndFuncs(FuncDecl); 367 368 const auto *ParamIt = FuncDecl->param_begin(); 369 370 // FIXME: Parameters don't always map to arguments 1:1; examples include 371 // overloaded operators implemented as member functions, and parameter packs. 372 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 373 assert(ParamIt != FuncDecl->param_end()); 374 const VarDecl *Param = *ParamIt; 375 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 376 } 377 } 378 379 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 380 // We ignore `DACtx` because it's already the same in both. We don't want the 381 // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't 382 // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later 383 // analyze the same callee in a different context, and `setStorageLocation` 384 // requires there to not already be a storage location assigned. Conceptually, 385 // these maps capture information from the local scope, so when popping that 386 // scope, we do not propagate the maps. 387 this->LocToVal = std::move(CalleeEnv.LocToVal); 388 this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); 389 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 390 391 if (Call->isGLValue()) { 392 if (CalleeEnv.ReturnLoc != nullptr) 393 setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc); 394 } else if (!Call->getType()->isVoidType()) { 395 if (CalleeEnv.ReturnVal != nullptr) 396 setValueStrict(*Call, *CalleeEnv.ReturnVal); 397 } 398 } 399 400 void Environment::popCall(const CXXConstructExpr *Call, 401 const Environment &CalleeEnv) { 402 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 403 this->LocToVal = std::move(CalleeEnv.LocToVal); 404 this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); 405 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 406 407 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 408 setValueStrict(*Call, *Val); 409 } 410 } 411 412 bool Environment::equivalentTo(const Environment &Other, 413 Environment::ValueModel &Model) const { 414 assert(DACtx == Other.DACtx); 415 416 if (ReturnVal != Other.ReturnVal) 417 return false; 418 419 if (ReturnLoc != Other.ReturnLoc) 420 return false; 421 422 if (ThisPointeeLoc != Other.ThisPointeeLoc) 423 return false; 424 425 if (DeclToLoc != Other.DeclToLoc) 426 return false; 427 428 if (ExprToLoc != Other.ExprToLoc) 429 return false; 430 431 // Compare the contents for the intersection of their domains. 432 for (auto &Entry : LocToVal) { 433 const StorageLocation *Loc = Entry.first; 434 assert(Loc != nullptr); 435 436 Value *Val = Entry.second; 437 assert(Val != nullptr); 438 439 auto It = Other.LocToVal.find(Loc); 440 if (It == Other.LocToVal.end()) 441 continue; 442 assert(It->second != nullptr); 443 444 if (!areEquivalentValues(*Val, *It->second) && 445 !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other, 446 Model)) 447 return false; 448 } 449 450 return true; 451 } 452 453 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 454 Environment::ValueModel &Model) { 455 assert(DACtx == PrevEnv.DACtx); 456 assert(ReturnVal == PrevEnv.ReturnVal); 457 assert(ReturnLoc == PrevEnv.ReturnLoc); 458 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 459 assert(CallStack == PrevEnv.CallStack); 460 461 auto Effect = LatticeJoinEffect::Unchanged; 462 463 // By the API, `PrevEnv` is a previous version of the environment for the same 464 // block, so we have some guarantees about its shape. In particular, it will 465 // be the result of a join or widen operation on previous values for this 466 // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are 467 // subsets of the maps in `PrevEnv`. So, as long as we maintain this property 468 // here, we don't need change their current values to widen. 469 // 470 // FIXME: `MemberLocToStruct` does not share the above property, because 471 // `join` can cause the map size to increase (when we add fresh data in places 472 // of conflict). Once this issue with join is resolved, re-enable the 473 // assertion below or replace with something that captures the desired 474 // invariant. 475 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 476 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 477 // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size()); 478 479 llvm::MapVector<const StorageLocation *, Value *> WidenedLocToVal; 480 for (auto &Entry : LocToVal) { 481 const StorageLocation *Loc = Entry.first; 482 assert(Loc != nullptr); 483 484 Value *Val = Entry.second; 485 assert(Val != nullptr); 486 487 auto PrevIt = PrevEnv.LocToVal.find(Loc); 488 if (PrevIt == PrevEnv.LocToVal.end()) 489 continue; 490 assert(PrevIt->second != nullptr); 491 492 if (areEquivalentValues(*Val, *PrevIt->second)) { 493 WidenedLocToVal.insert({Loc, Val}); 494 continue; 495 } 496 497 Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second, 498 PrevEnv, *Val, *this, Model); 499 WidenedLocToVal.insert({Loc, &WidenedVal}); 500 if (&WidenedVal != PrevIt->second) 501 Effect = LatticeJoinEffect::Changed; 502 } 503 LocToVal = std::move(WidenedLocToVal); 504 // FIXME: update the equivalence calculation for `MemberLocToStruct`, once we 505 // have a systematic way of soundly comparing this map. 506 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 507 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 508 LocToVal.size() != PrevEnv.LocToVal.size() || 509 MemberLocToStruct.size() != PrevEnv.MemberLocToStruct.size()) 510 Effect = LatticeJoinEffect::Changed; 511 512 return Effect; 513 } 514 515 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 516 Environment::ValueModel &Model) { 517 assert(EnvA.DACtx == EnvB.DACtx); 518 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 519 assert(EnvA.CallStack == EnvB.CallStack); 520 521 Environment JoinedEnv(*EnvA.DACtx); 522 523 JoinedEnv.CallStack = EnvA.CallStack; 524 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 525 526 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 527 // `ReturnVal` might not always get set -- for example if we have a return 528 // statement of the form `return some_other_func()` and we decide not to 529 // analyze `some_other_func()`. 530 // In this case, we can't say anything about the joined return value -- we 531 // don't simply want to propagate the return value that we do have, because 532 // it might not be the correct one. 533 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 534 JoinedEnv.ReturnVal = nullptr; 535 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 536 JoinedEnv.ReturnVal = EnvA.ReturnVal; 537 } else { 538 assert(!EnvA.CallStack.empty()); 539 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 540 // cast. 541 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 542 assert(Func != nullptr); 543 if (Value *MergedVal = 544 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 545 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 546 JoinedEnv.ReturnVal = MergedVal; 547 } 548 549 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 550 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 551 else 552 JoinedEnv.ReturnLoc = nullptr; 553 554 // FIXME: Once we're able to remove declarations from `DeclToLoc` when their 555 // lifetime ends, add an assertion that there aren't any entries in 556 // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to 557 // different storage locations. 558 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 559 560 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 561 562 JoinedEnv.MemberLocToStruct = 563 intersectDenseMaps(EnvA.MemberLocToStruct, EnvB.MemberLocToStruct); 564 565 // FIXME: update join to detect backedges and simplify the flow condition 566 // accordingly. 567 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 568 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 569 570 for (auto &Entry : EnvA.LocToVal) { 571 const StorageLocation *Loc = Entry.first; 572 assert(Loc != nullptr); 573 574 Value *Val = Entry.second; 575 assert(Val != nullptr); 576 577 auto It = EnvB.LocToVal.find(Loc); 578 if (It == EnvB.LocToVal.end()) 579 continue; 580 assert(It->second != nullptr); 581 582 if (areEquivalentValues(*Val, *It->second)) { 583 JoinedEnv.LocToVal.insert({Loc, Val}); 584 continue; 585 } 586 587 if (Value *MergedVal = mergeDistinctValues( 588 Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) { 589 JoinedEnv.LocToVal.insert({Loc, MergedVal}); 590 } 591 } 592 593 return JoinedEnv; 594 } 595 596 StorageLocation &Environment::createStorageLocation(QualType Type) { 597 return DACtx->createStorageLocation(Type); 598 } 599 600 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 601 // Evaluated declarations are always assigned the same storage locations to 602 // ensure that the environment stabilizes across loop iterations. Storage 603 // locations for evaluated declarations are stored in the analysis context. 604 return DACtx->getStableStorageLocation(D); 605 } 606 607 StorageLocation &Environment::createStorageLocation(const Expr &E) { 608 // Evaluated expressions are always assigned the same storage locations to 609 // ensure that the environment stabilizes across loop iterations. Storage 610 // locations for evaluated expressions are stored in the analysis context. 611 return DACtx->getStableStorageLocation(E); 612 } 613 614 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 615 assert(!DeclToLoc.contains(&D)); 616 assert(!isa_and_nonnull<ReferenceValue>(getValue(Loc))); 617 DeclToLoc[&D] = &Loc; 618 } 619 620 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 621 auto It = DeclToLoc.find(&D); 622 if (It == DeclToLoc.end()) 623 return nullptr; 624 625 StorageLocation *Loc = It->second; 626 627 assert(!isa_and_nonnull<ReferenceValue>(getValue(*Loc))); 628 629 return Loc; 630 } 631 632 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 633 const Expr &CanonE = ignoreCFGOmittedNodes(E); 634 assert(!ExprToLoc.contains(&CanonE)); 635 ExprToLoc[&CanonE] = &Loc; 636 } 637 638 void Environment::setStorageLocationStrict(const Expr &E, 639 StorageLocation &Loc) { 640 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 641 // but we still want to be able to associate a `StorageLocation` with them, 642 // so allow these as an exception. 643 assert(E.isGLValue() || 644 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 645 setStorageLocation(E, Loc); 646 } 647 648 StorageLocation *Environment::getStorageLocation(const Expr &E, 649 SkipPast SP) const { 650 // FIXME: Add a test with parens. 651 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 652 return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP); 653 } 654 655 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const { 656 // See comment in `setStorageLocationStrict()`. 657 assert(E.isGLValue() || 658 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 659 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 660 661 if (Loc == nullptr) 662 return nullptr; 663 664 if (auto *RefVal = dyn_cast_or_null<ReferenceValue>(getValue(*Loc))) 665 return &RefVal->getReferentLoc(); 666 667 return Loc; 668 } 669 670 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const { 671 return ThisPointeeLoc; 672 } 673 674 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 675 return DACtx->getOrCreateNullPointerValue(PointeeType); 676 } 677 678 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 679 LocToVal[&Loc] = &Val; 680 681 if (auto *StructVal = dyn_cast<StructValue>(&Val)) { 682 auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc); 683 684 const QualType Type = AggregateLoc.getType(); 685 assert(Type->isRecordType()); 686 687 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 688 assert(Field != nullptr); 689 StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); 690 MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); 691 if (auto *FieldVal = StructVal->getChild(*Field)) 692 setValue(FieldLoc, *FieldVal); 693 } 694 } 695 696 auto It = MemberLocToStruct.find(&Loc); 697 if (It != MemberLocToStruct.end()) { 698 // `Loc` is the location of a struct member so we need to also update the 699 // value of the member in the corresponding `StructValue`. 700 701 assert(It->second.first != nullptr); 702 StructValue &StructVal = *It->second.first; 703 704 assert(It->second.second != nullptr); 705 const ValueDecl &Member = *It->second.second; 706 707 StructVal.setChild(Member, Val); 708 } 709 } 710 711 void Environment::clearValue(const StorageLocation &Loc) { 712 LocToVal.erase(&Loc); 713 714 if (auto It = MemberLocToStruct.find(&Loc); It != MemberLocToStruct.end()) { 715 // `Loc` is the location of a struct member so we need to also clear the 716 // member in the corresponding `StructValue`. 717 718 assert(It->second.first != nullptr); 719 StructValue &StructVal = *It->second.first; 720 721 assert(It->second.second != nullptr); 722 const ValueDecl &Member = *It->second.second; 723 724 StructVal.clearChild(Member); 725 } 726 } 727 728 void Environment::setValueStrict(const Expr &E, Value &Val) { 729 assert(E.isPRValue()); 730 assert(!isa<ReferenceValue>(Val)); 731 732 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 733 if (Loc == nullptr) { 734 Loc = &createStorageLocation(E); 735 setStorageLocation(E, *Loc); 736 } 737 setValue(*Loc, Val); 738 } 739 740 Value *Environment::getValue(const StorageLocation &Loc) const { 741 return LocToVal.lookup(&Loc); 742 } 743 744 Value *Environment::getValue(const ValueDecl &D) const { 745 auto *Loc = getStorageLocation(D); 746 if (Loc == nullptr) 747 return nullptr; 748 return getValue(*Loc); 749 } 750 751 Value *Environment::getValue(const Expr &E, SkipPast SP) const { 752 auto *Loc = getStorageLocation(E, SP); 753 if (Loc == nullptr) 754 return nullptr; 755 return getValue(*Loc); 756 } 757 758 Value *Environment::getValueStrict(const Expr &E) const { 759 assert(E.isPRValue()); 760 Value *Val = getValue(E, SkipPast::None); 761 762 assert(Val == nullptr || !isa<ReferenceValue>(Val)); 763 764 return Val; 765 } 766 767 Value *Environment::createValue(QualType Type) { 768 llvm::DenseSet<QualType> Visited; 769 int CreatedValuesCount = 0; 770 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 771 CreatedValuesCount); 772 if (CreatedValuesCount > MaxCompositeValueSize) { 773 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 774 << '\n'; 775 } 776 return Val; 777 } 778 779 Value *Environment::createValueUnlessSelfReferential( 780 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 781 int &CreatedValuesCount) { 782 assert(!Type.isNull()); 783 784 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 785 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 786 Depth > MaxCompositeValueDepth) 787 return nullptr; 788 789 if (Type->isBooleanType()) { 790 CreatedValuesCount++; 791 return &makeAtomicBoolValue(); 792 } 793 794 if (Type->isIntegerType()) { 795 // FIXME: consider instead `return nullptr`, given that we do nothing useful 796 // with integers, and so distinguishing them serves no purpose, but could 797 // prevent convergence. 798 CreatedValuesCount++; 799 return &arena().create<IntegerValue>(); 800 } 801 802 if (Type->isReferenceType() || Type->isPointerType()) { 803 CreatedValuesCount++; 804 QualType PointeeType = Type->getPointeeType(); 805 auto &PointeeLoc = createStorageLocation(PointeeType); 806 807 if (Visited.insert(PointeeType.getCanonicalType()).second) { 808 Value *PointeeVal = createValueUnlessSelfReferential( 809 PointeeType, Visited, Depth, CreatedValuesCount); 810 Visited.erase(PointeeType.getCanonicalType()); 811 812 if (PointeeVal != nullptr) 813 setValue(PointeeLoc, *PointeeVal); 814 } 815 816 if (Type->isReferenceType()) 817 return &arena().create<ReferenceValue>(PointeeLoc); 818 else 819 return &arena().create<PointerValue>(PointeeLoc); 820 } 821 822 if (Type->isRecordType()) { 823 CreatedValuesCount++; 824 llvm::DenseMap<const ValueDecl *, Value *> FieldValues; 825 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 826 assert(Field != nullptr); 827 828 QualType FieldType = Field->getType(); 829 if (Visited.contains(FieldType.getCanonicalType())) 830 continue; 831 832 Visited.insert(FieldType.getCanonicalType()); 833 if (auto *FieldValue = createValueUnlessSelfReferential( 834 FieldType, Visited, Depth + 1, CreatedValuesCount)) 835 FieldValues.insert({Field, FieldValue}); 836 Visited.erase(FieldType.getCanonicalType()); 837 } 838 839 return &arena().create<StructValue>(std::move(FieldValues)); 840 } 841 842 return nullptr; 843 } 844 845 StorageLocation &Environment::createObjectInternal(const VarDecl *D, 846 QualType Ty, 847 const Expr *InitExpr) { 848 if (Ty->isReferenceType()) { 849 // Although variables of reference type always need to be initialized, it 850 // can happen that we can't see the initializer, so `InitExpr` may still 851 // be null. 852 if (InitExpr) { 853 if (auto *InitExprLoc = 854 getStorageLocation(*InitExpr, SkipPast::Reference)) 855 return *InitExprLoc; 856 } 857 858 // Even though we have an initializer, we might not get an 859 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 860 // don't have a function body. In this case, we just invent a storage 861 // location and value -- it's the best we can do. 862 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 863 } 864 865 Value *Val = nullptr; 866 if (InitExpr) 867 // In the (few) cases where an expression is intentionally 868 // "uninterpreted", `InitExpr` is not associated with a value. There are 869 // two ways to handle this situation: propagate the status, so that 870 // uninterpreted initializers result in uninterpreted variables, or 871 // provide a default value. We choose the latter so that later refinements 872 // of the variable can be used for reasoning about the surrounding code. 873 // For this reason, we let this case be handled by the `createValue()` 874 // call below. 875 // 876 // FIXME. If and when we interpret all language cases, change this to 877 // assert that `InitExpr` is interpreted, rather than supplying a 878 // default value (assuming we don't update the environment API to return 879 // references). 880 Val = getValueStrict(*InitExpr); 881 if (!Val) 882 Val = createValue(Ty); 883 884 StorageLocation &Loc = 885 D ? createStorageLocation(*D) : createStorageLocation(Ty); 886 887 if (Val) 888 setValue(Loc, *Val); 889 890 return Loc; 891 } 892 893 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { 894 switch (SP) { 895 case SkipPast::None: 896 return Loc; 897 case SkipPast::Reference: 898 // References cannot be chained so we only need to skip past one level of 899 // indirection. 900 if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc))) 901 return Val->getReferentLoc(); 902 return Loc; 903 } 904 llvm_unreachable("bad SkipPast kind"); 905 } 906 907 const StorageLocation &Environment::skip(const StorageLocation &Loc, 908 SkipPast SP) const { 909 return skip(*const_cast<StorageLocation *>(&Loc), SP); 910 } 911 912 void Environment::addToFlowCondition(const Formula &Val) { 913 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 914 } 915 916 bool Environment::flowConditionImplies(const Formula &Val) const { 917 return DACtx->flowConditionImplies(FlowConditionToken, Val); 918 } 919 920 void Environment::dump(raw_ostream &OS) const { 921 // FIXME: add printing for remaining fields and allow caller to decide what 922 // fields are printed. 923 OS << "DeclToLoc:\n"; 924 for (auto [D, L] : DeclToLoc) 925 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 926 927 OS << "ExprToLoc:\n"; 928 for (auto [E, L] : ExprToLoc) 929 OS << " [" << E << ", " << L << "]\n"; 930 931 OS << "LocToVal:\n"; 932 for (auto [L, V] : LocToVal) { 933 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 934 } 935 936 OS << "FlowConditionToken:\n"; 937 DACtx->dumpFlowCondition(FlowConditionToken, OS); 938 } 939 940 void Environment::dump() const { 941 dump(llvm::dbgs()); 942 } 943 944 AggregateStorageLocation * 945 getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 946 const Environment &Env) { 947 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 948 if (ImplicitObject == nullptr) 949 return nullptr; 950 StorageLocation *Loc = 951 Env.getStorageLocation(*ImplicitObject, SkipPast::Reference); 952 if (Loc == nullptr) 953 return nullptr; 954 if (ImplicitObject->getType()->isPointerType()) { 955 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) 956 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 957 return nullptr; 958 } 959 return cast<AggregateStorageLocation>(Loc); 960 } 961 962 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 963 const Environment &Env) { 964 Expr *Base = ME.getBase(); 965 if (Base == nullptr) 966 return nullptr; 967 StorageLocation *Loc = Env.getStorageLocation(*Base, SkipPast::Reference); 968 if (Loc == nullptr) 969 return nullptr; 970 if (ME.isArrow()) { 971 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) 972 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 973 return nullptr; 974 } 975 return cast<AggregateStorageLocation>(Loc); 976 } 977 978 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 979 // Unnamed bitfields are only used for padding and do not appear in 980 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 981 // field list, and we thus need to remove them before mapping inits to 982 // fields to avoid mapping inits to the wrongs fields. 983 std::vector<FieldDecl *> Fields; 984 llvm::copy_if( 985 RD->fields(), std::back_inserter(Fields), 986 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 987 return Fields; 988 } 989 990 StructValue &refreshStructValue(AggregateStorageLocation &Loc, 991 Environment &Env) { 992 auto &NewVal = *cast<StructValue>(Env.createValue(Loc.getType())); 993 Env.setValue(Loc, NewVal); 994 return NewVal; 995 } 996 997 StructValue &refreshStructValue(const Expr &Expr, Environment &Env) { 998 assert(Expr.getType()->isRecordType()); 999 1000 auto &NewVal = *cast<StructValue>(Env.createValue(Expr.getType())); 1001 1002 if (Expr.isPRValue()) { 1003 Env.setValueStrict(Expr, NewVal); 1004 } else { 1005 StorageLocation *Loc = Env.getStorageLocationStrict(Expr); 1006 if (Loc == nullptr) { 1007 Loc = &Env.createStorageLocation(Expr); 1008 } 1009 Env.setValue(*Loc, NewVal); 1010 } 1011 1012 return NewVal; 1013 } 1014 1015 } // namespace dataflow 1016 } // namespace clang 1017