1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include <cassert> 27 #include <utility> 28 29 namespace clang { 30 namespace dataflow { 31 32 // FIXME: convert these to parameters of the analysis or environment. Current 33 // settings have been experimentaly validated, but only for a particular 34 // analysis. 35 static constexpr int MaxCompositeValueDepth = 3; 36 static constexpr int MaxCompositeValueSize = 1000; 37 38 /// Returns a map consisting of key-value entries that are present in both maps. 39 template <typename K, typename V> 40 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 41 const llvm::DenseMap<K, V> &Map2) { 42 llvm::DenseMap<K, V> Result; 43 for (auto &Entry : Map1) { 44 auto It = Map2.find(Entry.first); 45 if (It != Map2.end() && Entry.second == It->second) 46 Result.insert({Entry.first, Entry.second}); 47 } 48 return Result; 49 } 50 51 // Whether to consider equivalent two values with an unknown relation. 52 // 53 // FIXME: this function is a hack enabling unsoundness to support 54 // convergence. Once we have widening support for the reference/pointer and 55 // struct built-in models, this should be unconditionally `false` (and inlined 56 // as such at its call sites). 57 static bool equateUnknownValues(Value::Kind K) { 58 switch (K) { 59 case Value::Kind::Integer: 60 case Value::Kind::Pointer: 61 case Value::Kind::Record: 62 return true; 63 default: 64 return false; 65 } 66 } 67 68 static bool compareDistinctValues(QualType Type, Value &Val1, 69 const Environment &Env1, Value &Val2, 70 const Environment &Env2, 71 Environment::ValueModel &Model) { 72 // Note: Potentially costly, but, for booleans, we could check whether both 73 // can be proven equivalent in their respective environments. 74 75 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 76 // and implement separate, join/widen specific handling for 77 // reference/pointers. 78 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 79 case ComparisonResult::Same: 80 return true; 81 case ComparisonResult::Different: 82 return false; 83 case ComparisonResult::Unknown: 84 return equateUnknownValues(Val1.getKind()); 85 } 86 llvm_unreachable("All cases covered in switch"); 87 } 88 89 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 90 /// respectively, of the same type `Type`. Merging generally produces a single 91 /// value that (soundly) approximates the two inputs, although the actual 92 /// meaning depends on `Model`. 93 static Value *mergeDistinctValues(QualType Type, Value &Val1, 94 const Environment &Env1, Value &Val2, 95 const Environment &Env2, 96 Environment &MergedEnv, 97 Environment::ValueModel &Model) { 98 // Join distinct boolean values preserving information about the constraints 99 // in the respective path conditions. 100 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 101 // FIXME: Checking both values should be unnecessary, since they should have 102 // a consistent shape. However, right now we can end up with BoolValue's in 103 // integer-typed variables due to our incorrect handling of 104 // boolean-to-integer casts (we just propagate the BoolValue to the result 105 // of the cast). So, a join can encounter an integer in one branch but a 106 // bool in the other. 107 // For example: 108 // ``` 109 // std::optional<bool> o; 110 // int x; 111 // if (o.has_value()) 112 // x = o.value(); 113 // ``` 114 auto &Expr1 = cast<BoolValue>(Val1).formula(); 115 auto &Expr2 = cast<BoolValue>(Val2).formula(); 116 auto &A = MergedEnv.arena(); 117 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 118 MergedEnv.addToFlowCondition( 119 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 120 A.makeEquals(MergedVal, Expr1)), 121 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 122 A.makeEquals(MergedVal, Expr2)))); 123 return &A.makeBoolValue(MergedVal); 124 } 125 126 Value *MergedVal = nullptr; 127 if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) { 128 auto *RecordVal2 = cast<RecordValue>(&Val2); 129 130 if (&RecordVal1->getLoc() == &RecordVal2->getLoc()) 131 // `RecordVal1` and `RecordVal2` may have different properties associated 132 // with them. Create a new `RecordValue` with the same location but 133 // without any properties so that we soundly approximate both values. If a 134 // particular analysis needs to merge properties, it should do so in 135 // `DataflowAnalysis::merge()`. 136 MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc()); 137 else 138 // If the locations for the two records are different, need to create a 139 // completely new value. 140 MergedVal = MergedEnv.createValue(Type); 141 } else { 142 MergedVal = MergedEnv.createValue(Type); 143 } 144 145 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 146 // returns false to avoid storing unneeded values in `DACtx`. 147 if (MergedVal) 148 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 149 return MergedVal; 150 151 return nullptr; 152 } 153 154 // When widening does not change `Current`, return value will equal `&Prev`. 155 static Value &widenDistinctValues(QualType Type, Value &Prev, 156 const Environment &PrevEnv, Value &Current, 157 Environment &CurrentEnv, 158 Environment::ValueModel &Model) { 159 // Boolean-model widening. 160 if (isa<BoolValue>(&Prev)) { 161 assert(isa<BoolValue>(Current)); 162 // Widen to Top, because we know they are different values. If previous was 163 // already Top, re-use that to (implicitly) indicate that no change occured. 164 if (isa<TopBoolValue>(Prev)) 165 return Prev; 166 return CurrentEnv.makeTopBoolValue(); 167 } 168 169 // FIXME: Add other built-in model widening. 170 171 // Custom-model widening. 172 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 173 return *W; 174 175 return equateUnknownValues(Prev.getKind()) ? Prev : Current; 176 } 177 178 // Returns whether the values in `Map1` and `Map2` compare equal for those 179 // keys that `Map1` and `Map2` have in common. 180 template <typename Key> 181 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, 182 const llvm::MapVector<Key, Value *> &Map2, 183 const Environment &Env1, const Environment &Env2, 184 Environment::ValueModel &Model) { 185 for (auto &Entry : Map1) { 186 Key K = Entry.first; 187 assert(K != nullptr); 188 189 Value *Val = Entry.second; 190 assert(Val != nullptr); 191 192 auto It = Map2.find(K); 193 if (It == Map2.end()) 194 continue; 195 assert(It->second != nullptr); 196 197 if (!areEquivalentValues(*Val, *It->second) && 198 !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, 199 Model)) 200 return false; 201 } 202 203 return true; 204 } 205 206 // Perform a join on either `LocToVal` or `ExprToVal`. `Key` must be either 207 // `const StorageLocation *` or `const Expr *`. 208 template <typename Key> 209 llvm::MapVector<Key, Value *> 210 joinKeyToValueMap(const llvm::MapVector<Key, Value *> &Map1, 211 const llvm::MapVector<Key, Value *> &Map2, 212 const Environment &Env1, const Environment &Env2, 213 Environment &JoinedEnv, Environment::ValueModel &Model) { 214 llvm::MapVector<Key, Value *> MergedMap; 215 for (auto &Entry : Map1) { 216 Key K = Entry.first; 217 assert(K != nullptr); 218 219 Value *Val = Entry.second; 220 assert(Val != nullptr); 221 222 auto It = Map2.find(K); 223 if (It == Map2.end()) 224 continue; 225 assert(It->second != nullptr); 226 227 if (areEquivalentValues(*Val, *It->second)) { 228 MergedMap.insert({K, Val}); 229 continue; 230 } 231 232 if (Value *MergedVal = mergeDistinctValues( 233 K->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) { 234 MergedMap.insert({K, MergedVal}); 235 } 236 } 237 238 return MergedMap; 239 } 240 241 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either 242 // `const StorageLocation *` or `const Expr *`. 243 template <typename Key> 244 llvm::MapVector<Key, Value *> 245 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, 246 const llvm::MapVector<Key, Value *> &PrevMap, 247 Environment &CurEnv, const Environment &PrevEnv, 248 Environment::ValueModel &Model, LatticeJoinEffect &Effect) { 249 llvm::MapVector<Key, Value *> WidenedMap; 250 for (auto &Entry : CurMap) { 251 Key K = Entry.first; 252 assert(K != nullptr); 253 254 Value *Val = Entry.second; 255 assert(Val != nullptr); 256 257 auto PrevIt = PrevMap.find(K); 258 if (PrevIt == PrevMap.end()) 259 continue; 260 assert(PrevIt->second != nullptr); 261 262 if (areEquivalentValues(*Val, *PrevIt->second)) { 263 WidenedMap.insert({K, Val}); 264 continue; 265 } 266 267 Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second, 268 PrevEnv, *Val, CurEnv, Model); 269 WidenedMap.insert({K, &WidenedVal}); 270 if (&WidenedVal != PrevIt->second) 271 Effect = LatticeJoinEffect::Changed; 272 } 273 274 return WidenedMap; 275 } 276 277 /// Initializes a global storage value. 278 static void insertIfGlobal(const Decl &D, 279 llvm::DenseSet<const VarDecl *> &Vars) { 280 if (auto *V = dyn_cast<VarDecl>(&D)) 281 if (V->hasGlobalStorage()) 282 Vars.insert(V); 283 } 284 285 static void insertIfFunction(const Decl &D, 286 llvm::DenseSet<const FunctionDecl *> &Funcs) { 287 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 288 Funcs.insert(FD); 289 } 290 291 static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { 292 if (!C.getMethodDecl()) 293 return nullptr; 294 auto *Body = dyn_cast_or_null<CompoundStmt>(C.getMethodDecl()->getBody()); 295 if (!Body || Body->size() != 1) 296 return nullptr; 297 if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin())) 298 if (auto *Return = RS->getRetValue()) 299 return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts()); 300 return nullptr; 301 } 302 303 static void 304 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 305 llvm::DenseSet<const VarDecl *> &Vars, 306 llvm::DenseSet<const FunctionDecl *> &Funcs) { 307 insertIfGlobal(D, Vars); 308 insertIfFunction(D, Funcs); 309 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 310 for (const auto *B : Decomp->bindings()) 311 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 312 // FIXME: should we be using `E->getFoundDecl()`? 313 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 314 Fields.insert(FD); 315 } 316 317 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 318 /// global variables and functions that are declared in or referenced from 319 /// sub-statements. 320 static void 321 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 322 llvm::DenseSet<const VarDecl *> &Vars, 323 llvm::DenseSet<const FunctionDecl *> &Funcs) { 324 for (auto *Child : S.children()) 325 if (Child != nullptr) 326 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 327 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 328 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 329 330 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 331 if (DS->isSingleDecl()) 332 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 333 else 334 for (auto *D : DS->getDeclGroup()) 335 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 336 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 337 insertIfGlobal(*E->getDecl(), Vars); 338 insertIfFunction(*E->getDecl(), Funcs); 339 } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) { 340 // If this is a method that returns a member variable but does nothing else, 341 // model the field of the return value. 342 if (MemberExpr *E = getMemberForAccessor(*C)) 343 if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) 344 Fields.insert(FD); 345 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 346 // FIXME: should we be using `E->getFoundDecl()`? 347 const ValueDecl *VD = E->getMemberDecl(); 348 insertIfGlobal(*VD, Vars); 349 insertIfFunction(*VD, Funcs); 350 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 351 Fields.insert(FD); 352 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 353 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 354 for (const auto *FD : getFieldsForInitListExpr(RD)) 355 Fields.insert(FD); 356 } 357 } 358 359 // FIXME: Add support for resetting globals after function calls to enable 360 // the implementation of sound analyses. 361 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 362 assert(FuncDecl->getBody() != nullptr); 363 364 FieldSet Fields; 365 llvm::DenseSet<const VarDecl *> Vars; 366 llvm::DenseSet<const FunctionDecl *> Funcs; 367 368 // Look for global variable and field references in the 369 // constructor-initializers. 370 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 371 for (const auto *Init : CtorDecl->inits()) { 372 if (Init->isMemberInitializer()) { 373 Fields.insert(Init->getMember()); 374 } else if (Init->isIndirectMemberInitializer()) { 375 for (const auto *I : Init->getIndirectMember()->chain()) 376 Fields.insert(cast<FieldDecl>(I)); 377 } 378 const Expr *E = Init->getInit(); 379 assert(E != nullptr); 380 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 381 } 382 // Add all fields mentioned in default member initializers. 383 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 384 if (const auto *I = F->getInClassInitializer()) 385 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 386 } 387 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 388 389 // These have to be added before the lines that follow to ensure that 390 // `create*` work correctly for structs. 391 DACtx->addModeledFields(Fields); 392 393 for (const VarDecl *D : Vars) { 394 if (getStorageLocation(*D) != nullptr) 395 continue; 396 397 setStorageLocation(*D, createObject(*D)); 398 } 399 400 for (const FunctionDecl *FD : Funcs) { 401 if (getStorageLocation(*FD) != nullptr) 402 continue; 403 auto &Loc = createStorageLocation(FD->getType()); 404 setStorageLocation(*FD, Loc); 405 } 406 } 407 408 Environment::Environment(DataflowAnalysisContext &DACtx) 409 : DACtx(&DACtx), 410 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 411 412 Environment Environment::fork() const { 413 Environment Copy(*this); 414 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 415 return Copy; 416 } 417 418 Environment::Environment(DataflowAnalysisContext &DACtx, 419 const DeclContext &DeclCtx) 420 : Environment(DACtx) { 421 CallStack.push_back(&DeclCtx); 422 423 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 424 assert(FuncDecl->getBody() != nullptr); 425 426 initFieldsGlobalsAndFuncs(FuncDecl); 427 428 for (const auto *ParamDecl : FuncDecl->parameters()) { 429 assert(ParamDecl != nullptr); 430 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 431 } 432 } 433 434 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 435 auto *Parent = MethodDecl->getParent(); 436 assert(Parent != nullptr); 437 if (Parent->isLambda()) 438 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 439 440 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 441 if (MethodDecl && !MethodDecl->isStatic()) { 442 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 443 ThisPointeeLoc = 444 &cast<RecordValue>(createValue(ThisPointeeType))->getLoc(); 445 } 446 } 447 } 448 449 bool Environment::canDescend(unsigned MaxDepth, 450 const DeclContext *Callee) const { 451 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 452 } 453 454 Environment Environment::pushCall(const CallExpr *Call) const { 455 Environment Env(*this); 456 457 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 458 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 459 if (!isa<CXXThisExpr>(Arg)) 460 Env.ThisPointeeLoc = 461 cast<RecordStorageLocation>(getStorageLocation(*Arg)); 462 // Otherwise (when the argument is `this`), retain the current 463 // environment's `ThisPointeeLoc`. 464 } 465 } 466 467 Env.pushCallInternal(Call->getDirectCallee(), 468 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 469 470 return Env; 471 } 472 473 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 474 Environment Env(*this); 475 476 Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); 477 478 Env.pushCallInternal(Call->getConstructor(), 479 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 480 481 return Env; 482 } 483 484 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 485 ArrayRef<const Expr *> Args) { 486 // Canonicalize to the definition of the function. This ensures that we're 487 // putting arguments into the same `ParamVarDecl`s` that the callee will later 488 // be retrieving them from. 489 assert(FuncDecl->getDefinition() != nullptr); 490 FuncDecl = FuncDecl->getDefinition(); 491 492 CallStack.push_back(FuncDecl); 493 494 initFieldsGlobalsAndFuncs(FuncDecl); 495 496 const auto *ParamIt = FuncDecl->param_begin(); 497 498 // FIXME: Parameters don't always map to arguments 1:1; examples include 499 // overloaded operators implemented as member functions, and parameter packs. 500 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 501 assert(ParamIt != FuncDecl->param_end()); 502 const VarDecl *Param = *ParamIt; 503 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 504 } 505 } 506 507 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 508 // We ignore some entries of `CalleeEnv`: 509 // - `DACtx` because is already the same in both 510 // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or 511 // `ThisPointeeLoc` because they don't apply to us. 512 // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the 513 // callee's local scope, so when popping that scope, we do not propagate 514 // the maps. 515 this->LocToVal = std::move(CalleeEnv.LocToVal); 516 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 517 518 if (Call->isGLValue()) { 519 if (CalleeEnv.ReturnLoc != nullptr) 520 setStorageLocation(*Call, *CalleeEnv.ReturnLoc); 521 } else if (!Call->getType()->isVoidType()) { 522 if (CalleeEnv.ReturnVal != nullptr) 523 setValue(*Call, *CalleeEnv.ReturnVal); 524 } 525 } 526 527 void Environment::popCall(const CXXConstructExpr *Call, 528 const Environment &CalleeEnv) { 529 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 530 this->LocToVal = std::move(CalleeEnv.LocToVal); 531 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 532 533 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 534 setValue(*Call, *Val); 535 } 536 } 537 538 bool Environment::equivalentTo(const Environment &Other, 539 Environment::ValueModel &Model) const { 540 assert(DACtx == Other.DACtx); 541 542 if (ReturnVal != Other.ReturnVal) 543 return false; 544 545 if (ReturnLoc != Other.ReturnLoc) 546 return false; 547 548 if (ThisPointeeLoc != Other.ThisPointeeLoc) 549 return false; 550 551 if (DeclToLoc != Other.DeclToLoc) 552 return false; 553 554 if (ExprToLoc != Other.ExprToLoc) 555 return false; 556 557 if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) 558 return false; 559 560 if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) 561 return false; 562 563 return true; 564 } 565 566 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 567 Environment::ValueModel &Model) { 568 assert(DACtx == PrevEnv.DACtx); 569 assert(ReturnVal == PrevEnv.ReturnVal); 570 assert(ReturnLoc == PrevEnv.ReturnLoc); 571 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 572 assert(CallStack == PrevEnv.CallStack); 573 574 auto Effect = LatticeJoinEffect::Unchanged; 575 576 // By the API, `PrevEnv` is a previous version of the environment for the same 577 // block, so we have some guarantees about its shape. In particular, it will 578 // be the result of a join or widen operation on previous values for this 579 // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that 580 // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain 581 // this property here, we don't need change their current values to widen. 582 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 583 assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); 584 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 585 586 ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, 587 Model, Effect); 588 589 LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, 590 Model, Effect); 591 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 592 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 593 ExprToVal.size() != PrevEnv.ExprToVal.size() || 594 LocToVal.size() != PrevEnv.LocToVal.size()) 595 Effect = LatticeJoinEffect::Changed; 596 597 return Effect; 598 } 599 600 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 601 Environment::ValueModel &Model) { 602 assert(EnvA.DACtx == EnvB.DACtx); 603 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 604 assert(EnvA.CallStack == EnvB.CallStack); 605 606 Environment JoinedEnv(*EnvA.DACtx); 607 608 JoinedEnv.CallStack = EnvA.CallStack; 609 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 610 611 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 612 // `ReturnVal` might not always get set -- for example if we have a return 613 // statement of the form `return some_other_func()` and we decide not to 614 // analyze `some_other_func()`. 615 // In this case, we can't say anything about the joined return value -- we 616 // don't simply want to propagate the return value that we do have, because 617 // it might not be the correct one. 618 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 619 JoinedEnv.ReturnVal = nullptr; 620 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 621 JoinedEnv.ReturnVal = EnvA.ReturnVal; 622 } else { 623 assert(!EnvA.CallStack.empty()); 624 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 625 // cast. 626 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 627 assert(Func != nullptr); 628 if (Value *MergedVal = 629 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 630 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 631 JoinedEnv.ReturnVal = MergedVal; 632 } 633 634 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 635 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 636 else 637 JoinedEnv.ReturnLoc = nullptr; 638 639 // FIXME: Once we're able to remove declarations from `DeclToLoc` when their 640 // lifetime ends, add an assertion that there aren't any entries in 641 // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to 642 // different storage locations. 643 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 644 645 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 646 647 // FIXME: update join to detect backedges and simplify the flow condition 648 // accordingly. 649 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 650 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 651 652 JoinedEnv.ExprToVal = joinKeyToValueMap(EnvA.ExprToVal, EnvB.ExprToVal, EnvA, 653 EnvB, JoinedEnv, Model); 654 655 JoinedEnv.LocToVal = joinKeyToValueMap(EnvA.LocToVal, EnvB.LocToVal, EnvA, 656 EnvB, JoinedEnv, Model); 657 658 return JoinedEnv; 659 } 660 661 StorageLocation &Environment::createStorageLocation(QualType Type) { 662 return DACtx->createStorageLocation(Type); 663 } 664 665 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 666 // Evaluated declarations are always assigned the same storage locations to 667 // ensure that the environment stabilizes across loop iterations. Storage 668 // locations for evaluated declarations are stored in the analysis context. 669 return DACtx->getStableStorageLocation(D); 670 } 671 672 StorageLocation &Environment::createStorageLocation(const Expr &E) { 673 // Evaluated expressions are always assigned the same storage locations to 674 // ensure that the environment stabilizes across loop iterations. Storage 675 // locations for evaluated expressions are stored in the analysis context. 676 return DACtx->getStableStorageLocation(E); 677 } 678 679 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 680 assert(!DeclToLoc.contains(&D)); 681 DeclToLoc[&D] = &Loc; 682 } 683 684 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 685 auto It = DeclToLoc.find(&D); 686 if (It == DeclToLoc.end()) 687 return nullptr; 688 689 StorageLocation *Loc = It->second; 690 691 return Loc; 692 } 693 694 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 695 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 696 // but we still want to be able to associate a `StorageLocation` with them, 697 // so allow these as an exception. 698 assert(E.isGLValue() || 699 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 700 setStorageLocationInternal(E, Loc); 701 } 702 703 StorageLocation *Environment::getStorageLocation(const Expr &E) const { 704 // See comment in `setStorageLocation()`. 705 assert(E.isGLValue() || 706 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 707 return getStorageLocationInternal(E); 708 } 709 710 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const { 711 return ThisPointeeLoc; 712 } 713 714 RecordStorageLocation & 715 Environment::getResultObjectLocation(const Expr &RecordPRValue) { 716 assert(RecordPRValue.getType()->isRecordType()); 717 assert(RecordPRValue.isPRValue()); 718 719 if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue)) 720 return *cast<RecordStorageLocation>(ExistingLoc); 721 auto &Loc = cast<RecordStorageLocation>( 722 DACtx->getStableStorageLocation(RecordPRValue)); 723 setStorageLocationInternal(RecordPRValue, Loc); 724 return Loc; 725 } 726 727 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 728 return DACtx->getOrCreateNullPointerValue(PointeeType); 729 } 730 731 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 732 assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc); 733 734 LocToVal[&Loc] = &Val; 735 } 736 737 void Environment::setValue(const Expr &E, Value &Val) { 738 assert(E.isPRValue()); 739 ExprToVal[&E] = &Val; 740 } 741 742 Value *Environment::getValue(const StorageLocation &Loc) const { 743 return LocToVal.lookup(&Loc); 744 } 745 746 Value *Environment::getValue(const ValueDecl &D) const { 747 auto *Loc = getStorageLocation(D); 748 if (Loc == nullptr) 749 return nullptr; 750 return getValue(*Loc); 751 } 752 753 Value *Environment::getValue(const Expr &E) const { 754 if (E.isPRValue()) { 755 auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); 756 return It == ExprToVal.end() ? nullptr : It->second; 757 } 758 759 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 760 if (It == ExprToLoc.end()) 761 return nullptr; 762 return getValue(*It->second); 763 } 764 765 Value *Environment::createValue(QualType Type) { 766 llvm::DenseSet<QualType> Visited; 767 int CreatedValuesCount = 0; 768 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 769 CreatedValuesCount); 770 if (CreatedValuesCount > MaxCompositeValueSize) { 771 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 772 << '\n'; 773 } 774 return Val; 775 } 776 777 void Environment::setStorageLocationInternal(const Expr &E, 778 StorageLocation &Loc) { 779 const Expr &CanonE = ignoreCFGOmittedNodes(E); 780 assert(!ExprToLoc.contains(&CanonE)); 781 ExprToLoc[&CanonE] = &Loc; 782 } 783 784 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const { 785 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 786 return It == ExprToLoc.end() ? nullptr : &*It->second; 787 } 788 789 Value *Environment::createValueUnlessSelfReferential( 790 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 791 int &CreatedValuesCount) { 792 assert(!Type.isNull()); 793 assert(!Type->isReferenceType()); 794 795 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 796 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 797 Depth > MaxCompositeValueDepth) 798 return nullptr; 799 800 if (Type->isBooleanType()) { 801 CreatedValuesCount++; 802 return &makeAtomicBoolValue(); 803 } 804 805 if (Type->isIntegerType()) { 806 // FIXME: consider instead `return nullptr`, given that we do nothing useful 807 // with integers, and so distinguishing them serves no purpose, but could 808 // prevent convergence. 809 CreatedValuesCount++; 810 return &arena().create<IntegerValue>(); 811 } 812 813 if (Type->isPointerType()) { 814 CreatedValuesCount++; 815 QualType PointeeType = Type->getPointeeType(); 816 StorageLocation &PointeeLoc = 817 createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); 818 819 return &arena().create<PointerValue>(PointeeLoc); 820 } 821 822 if (Type->isRecordType()) { 823 CreatedValuesCount++; 824 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 825 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 826 assert(Field != nullptr); 827 828 QualType FieldType = Field->getType(); 829 830 FieldLocs.insert( 831 {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, 832 CreatedValuesCount)}); 833 } 834 835 RecordStorageLocation &Loc = 836 arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 837 RecordValue &RecordVal = create<RecordValue>(Loc); 838 839 // As we already have a storage location for the `RecordValue`, we can and 840 // should associate them in the environment. 841 setValue(Loc, RecordVal); 842 843 return &RecordVal; 844 } 845 846 return nullptr; 847 } 848 849 StorageLocation & 850 Environment::createLocAndMaybeValue(QualType Ty, 851 llvm::DenseSet<QualType> &Visited, 852 int Depth, int &CreatedValuesCount) { 853 if (!Visited.insert(Ty.getCanonicalType()).second) 854 return createStorageLocation(Ty.getNonReferenceType()); 855 Value *Val = createValueUnlessSelfReferential( 856 Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); 857 Visited.erase(Ty.getCanonicalType()); 858 859 Ty = Ty.getNonReferenceType(); 860 861 if (Val == nullptr) 862 return createStorageLocation(Ty); 863 864 if (Ty->isRecordType()) 865 return cast<RecordValue>(Val)->getLoc(); 866 867 StorageLocation &Loc = createStorageLocation(Ty); 868 setValue(Loc, *Val); 869 return Loc; 870 } 871 872 StorageLocation &Environment::createObjectInternal(const VarDecl *D, 873 QualType Ty, 874 const Expr *InitExpr) { 875 if (Ty->isReferenceType()) { 876 // Although variables of reference type always need to be initialized, it 877 // can happen that we can't see the initializer, so `InitExpr` may still 878 // be null. 879 if (InitExpr) { 880 if (auto *InitExprLoc = getStorageLocation(*InitExpr)) 881 return *InitExprLoc; 882 } 883 884 // Even though we have an initializer, we might not get an 885 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 886 // don't have a function body. In this case, we just invent a storage 887 // location and value -- it's the best we can do. 888 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 889 } 890 891 Value *Val = nullptr; 892 if (InitExpr) 893 // In the (few) cases where an expression is intentionally 894 // "uninterpreted", `InitExpr` is not associated with a value. There are 895 // two ways to handle this situation: propagate the status, so that 896 // uninterpreted initializers result in uninterpreted variables, or 897 // provide a default value. We choose the latter so that later refinements 898 // of the variable can be used for reasoning about the surrounding code. 899 // For this reason, we let this case be handled by the `createValue()` 900 // call below. 901 // 902 // FIXME. If and when we interpret all language cases, change this to 903 // assert that `InitExpr` is interpreted, rather than supplying a 904 // default value (assuming we don't update the environment API to return 905 // references). 906 Val = getValue(*InitExpr); 907 if (!Val) 908 Val = createValue(Ty); 909 910 if (Ty->isRecordType()) 911 return cast<RecordValue>(Val)->getLoc(); 912 913 StorageLocation &Loc = 914 D ? createStorageLocation(*D) : createStorageLocation(Ty); 915 916 if (Val) 917 setValue(Loc, *Val); 918 919 return Loc; 920 } 921 922 void Environment::addToFlowCondition(const Formula &Val) { 923 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 924 } 925 926 bool Environment::flowConditionImplies(const Formula &Val) const { 927 return DACtx->flowConditionImplies(FlowConditionToken, Val); 928 } 929 930 void Environment::dump(raw_ostream &OS) const { 931 // FIXME: add printing for remaining fields and allow caller to decide what 932 // fields are printed. 933 OS << "DeclToLoc:\n"; 934 for (auto [D, L] : DeclToLoc) 935 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 936 937 OS << "ExprToLoc:\n"; 938 for (auto [E, L] : ExprToLoc) 939 OS << " [" << E << ", " << L << "]\n"; 940 941 OS << "ExprToVal:\n"; 942 for (auto [E, V] : ExprToVal) 943 OS << " [" << E << ", " << V << ": " << *V << "]\n"; 944 945 OS << "LocToVal:\n"; 946 for (auto [L, V] : LocToVal) { 947 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 948 } 949 950 OS << "FlowConditionToken:\n"; 951 DACtx->dumpFlowCondition(FlowConditionToken, OS); 952 } 953 954 void Environment::dump() const { 955 dump(llvm::dbgs()); 956 } 957 958 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 959 const Environment &Env) { 960 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 961 if (ImplicitObject == nullptr) 962 return nullptr; 963 if (ImplicitObject->getType()->isPointerType()) { 964 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject))) 965 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 966 return nullptr; 967 } 968 return cast_or_null<RecordStorageLocation>( 969 Env.getStorageLocation(*ImplicitObject)); 970 } 971 972 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 973 const Environment &Env) { 974 Expr *Base = ME.getBase(); 975 if (Base == nullptr) 976 return nullptr; 977 if (ME.isArrow()) { 978 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base))) 979 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 980 return nullptr; 981 } 982 return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base)); 983 } 984 985 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 986 // Unnamed bitfields are only used for padding and do not appear in 987 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 988 // field list, and we thus need to remove them before mapping inits to 989 // fields to avoid mapping inits to the wrongs fields. 990 std::vector<FieldDecl *> Fields; 991 llvm::copy_if( 992 RD->fields(), std::back_inserter(Fields), 993 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 994 return Fields; 995 } 996 997 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { 998 auto &NewVal = Env.create<RecordValue>(Loc); 999 Env.setValue(Loc, NewVal); 1000 return NewVal; 1001 } 1002 1003 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { 1004 assert(Expr.getType()->isRecordType()); 1005 1006 if (Expr.isPRValue()) { 1007 if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) { 1008 auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc()); 1009 Env.setValue(Expr, NewVal); 1010 return NewVal; 1011 } 1012 1013 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1014 Env.setValue(Expr, NewVal); 1015 return NewVal; 1016 } 1017 1018 if (auto *Loc = 1019 cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) { 1020 auto &NewVal = Env.create<RecordValue>(*Loc); 1021 Env.setValue(*Loc, NewVal); 1022 return NewVal; 1023 } 1024 1025 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1026 Env.setStorageLocation(Expr, NewVal.getLoc()); 1027 return NewVal; 1028 } 1029 1030 } // namespace dataflow 1031 } // namespace clang 1032