1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/MapVector.h" 24 #include "llvm/ADT/STLExtras.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include <cassert> 27 #include <utility> 28 29 namespace clang { 30 namespace dataflow { 31 32 // FIXME: convert these to parameters of the analysis or environment. Current 33 // settings have been experimentaly validated, but only for a particular 34 // analysis. 35 static constexpr int MaxCompositeValueDepth = 3; 36 static constexpr int MaxCompositeValueSize = 1000; 37 38 /// Returns whether all declarations that `DeclToLoc1` and `DeclToLoc2` have in 39 /// common map to the same storage location in both maps. 40 bool declToLocConsistent( 41 const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1, 42 const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) { 43 for (auto &Entry : DeclToLoc1) { 44 auto It = DeclToLoc2.find(Entry.first); 45 if (It != DeclToLoc2.end() && Entry.second != It->second) 46 return false; 47 } 48 49 return true; 50 } 51 52 /// Returns a map consisting of key-value entries that are present in both maps. 53 template <typename K, typename V> 54 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 55 const llvm::DenseMap<K, V> &Map2) { 56 llvm::DenseMap<K, V> Result; 57 for (auto &Entry : Map1) { 58 auto It = Map2.find(Entry.first); 59 if (It != Map2.end() && Entry.second == It->second) 60 Result.insert({Entry.first, Entry.second}); 61 } 62 return Result; 63 } 64 65 // Whether to consider equivalent two values with an unknown relation. 66 // 67 // FIXME: this function is a hack enabling unsoundness to support 68 // convergence. Once we have widening support for the reference/pointer and 69 // struct built-in models, this should be unconditionally `false` (and inlined 70 // as such at its call sites). 71 static bool equateUnknownValues(Value::Kind K) { 72 switch (K) { 73 case Value::Kind::Integer: 74 case Value::Kind::Pointer: 75 case Value::Kind::Record: 76 return true; 77 default: 78 return false; 79 } 80 } 81 82 static bool compareDistinctValues(QualType Type, Value &Val1, 83 const Environment &Env1, Value &Val2, 84 const Environment &Env2, 85 Environment::ValueModel &Model) { 86 // Note: Potentially costly, but, for booleans, we could check whether both 87 // can be proven equivalent in their respective environments. 88 89 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 90 // and implement separate, join/widen specific handling for 91 // reference/pointers. 92 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 93 case ComparisonResult::Same: 94 return true; 95 case ComparisonResult::Different: 96 return false; 97 case ComparisonResult::Unknown: 98 return equateUnknownValues(Val1.getKind()); 99 } 100 llvm_unreachable("All cases covered in switch"); 101 } 102 103 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 104 /// respectively, of the same type `Type`. Merging generally produces a single 105 /// value that (soundly) approximates the two inputs, although the actual 106 /// meaning depends on `Model`. 107 static Value *mergeDistinctValues(QualType Type, Value &Val1, 108 const Environment &Env1, Value &Val2, 109 const Environment &Env2, 110 Environment &MergedEnv, 111 Environment::ValueModel &Model) { 112 // Join distinct boolean values preserving information about the constraints 113 // in the respective path conditions. 114 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 115 // FIXME: Checking both values should be unnecessary, since they should have 116 // a consistent shape. However, right now we can end up with BoolValue's in 117 // integer-typed variables due to our incorrect handling of 118 // boolean-to-integer casts (we just propagate the BoolValue to the result 119 // of the cast). So, a join can encounter an integer in one branch but a 120 // bool in the other. 121 // For example: 122 // ``` 123 // std::optional<bool> o; 124 // int x; 125 // if (o.has_value()) 126 // x = o.value(); 127 // ``` 128 auto &Expr1 = cast<BoolValue>(Val1).formula(); 129 auto &Expr2 = cast<BoolValue>(Val2).formula(); 130 auto &A = MergedEnv.arena(); 131 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 132 MergedEnv.addToFlowCondition( 133 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 134 A.makeEquals(MergedVal, Expr1)), 135 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 136 A.makeEquals(MergedVal, Expr2)))); 137 return &A.makeBoolValue(MergedVal); 138 } 139 140 Value *MergedVal = nullptr; 141 if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) { 142 auto *RecordVal2 = cast<RecordValue>(&Val2); 143 144 if (&RecordVal1->getLoc() == &RecordVal2->getLoc()) 145 // `RecordVal1` and `RecordVal2` may have different properties associated 146 // with them. Create a new `RecordValue` with the same location but 147 // without any properties so that we soundly approximate both values. If a 148 // particular analysis needs to merge properties, it should do so in 149 // `DataflowAnalysis::merge()`. 150 MergedVal = &MergedEnv.create<RecordValue>(RecordVal1->getLoc()); 151 else 152 // If the locations for the two records are different, need to create a 153 // completely new value. 154 MergedVal = MergedEnv.createValue(Type); 155 } else { 156 MergedVal = MergedEnv.createValue(Type); 157 } 158 159 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 160 // returns false to avoid storing unneeded values in `DACtx`. 161 if (MergedVal) 162 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 163 return MergedVal; 164 165 return nullptr; 166 } 167 168 // When widening does not change `Current`, return value will equal `&Prev`. 169 static Value &widenDistinctValues(QualType Type, Value &Prev, 170 const Environment &PrevEnv, Value &Current, 171 Environment &CurrentEnv, 172 Environment::ValueModel &Model) { 173 // Boolean-model widening. 174 if (isa<BoolValue>(&Prev)) { 175 assert(isa<BoolValue>(Current)); 176 // Widen to Top, because we know they are different values. If previous was 177 // already Top, re-use that to (implicitly) indicate that no change occured. 178 if (isa<TopBoolValue>(Prev)) 179 return Prev; 180 return CurrentEnv.makeTopBoolValue(); 181 } 182 183 // FIXME: Add other built-in model widening. 184 185 // Custom-model widening. 186 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 187 return *W; 188 189 return equateUnknownValues(Prev.getKind()) ? Prev : Current; 190 } 191 192 // Returns whether the values in `Map1` and `Map2` compare equal for those 193 // keys that `Map1` and `Map2` have in common. 194 template <typename Key> 195 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, 196 const llvm::MapVector<Key, Value *> &Map2, 197 const Environment &Env1, const Environment &Env2, 198 Environment::ValueModel &Model) { 199 for (auto &Entry : Map1) { 200 Key K = Entry.first; 201 assert(K != nullptr); 202 203 Value *Val = Entry.second; 204 assert(Val != nullptr); 205 206 auto It = Map2.find(K); 207 if (It == Map2.end()) 208 continue; 209 assert(It->second != nullptr); 210 211 if (!areEquivalentValues(*Val, *It->second) && 212 !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, 213 Model)) 214 return false; 215 } 216 217 return true; 218 } 219 220 // Perform a join on either `LocToVal` or `ExprToVal`. `Key` must be either 221 // `const StorageLocation *` or `const Expr *`. 222 template <typename Key> 223 llvm::MapVector<Key, Value *> 224 joinKeyToValueMap(const llvm::MapVector<Key, Value *> &Map1, 225 const llvm::MapVector<Key, Value *> &Map2, 226 const Environment &Env1, const Environment &Env2, 227 Environment &JoinedEnv, Environment::ValueModel &Model) { 228 llvm::MapVector<Key, Value *> MergedMap; 229 for (auto &Entry : Map1) { 230 Key K = Entry.first; 231 assert(K != nullptr); 232 233 Value *Val = Entry.second; 234 assert(Val != nullptr); 235 236 auto It = Map2.find(K); 237 if (It == Map2.end()) 238 continue; 239 assert(It->second != nullptr); 240 241 if (areEquivalentValues(*Val, *It->second)) { 242 MergedMap.insert({K, Val}); 243 continue; 244 } 245 246 if (Value *MergedVal = mergeDistinctValues( 247 K->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) { 248 MergedMap.insert({K, MergedVal}); 249 } 250 } 251 252 return MergedMap; 253 } 254 255 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either 256 // `const StorageLocation *` or `const Expr *`. 257 template <typename Key> 258 llvm::MapVector<Key, Value *> 259 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, 260 const llvm::MapVector<Key, Value *> &PrevMap, 261 Environment &CurEnv, const Environment &PrevEnv, 262 Environment::ValueModel &Model, LatticeJoinEffect &Effect) { 263 llvm::MapVector<Key, Value *> WidenedMap; 264 for (auto &Entry : CurMap) { 265 Key K = Entry.first; 266 assert(K != nullptr); 267 268 Value *Val = Entry.second; 269 assert(Val != nullptr); 270 271 auto PrevIt = PrevMap.find(K); 272 if (PrevIt == PrevMap.end()) 273 continue; 274 assert(PrevIt->second != nullptr); 275 276 if (areEquivalentValues(*Val, *PrevIt->second)) { 277 WidenedMap.insert({K, Val}); 278 continue; 279 } 280 281 Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second, 282 PrevEnv, *Val, CurEnv, Model); 283 WidenedMap.insert({K, &WidenedVal}); 284 if (&WidenedVal != PrevIt->second) 285 Effect = LatticeJoinEffect::Changed; 286 } 287 288 return WidenedMap; 289 } 290 291 /// Initializes a global storage value. 292 static void insertIfGlobal(const Decl &D, 293 llvm::DenseSet<const VarDecl *> &Vars) { 294 if (auto *V = dyn_cast<VarDecl>(&D)) 295 if (V->hasGlobalStorage()) 296 Vars.insert(V); 297 } 298 299 static void insertIfFunction(const Decl &D, 300 llvm::DenseSet<const FunctionDecl *> &Funcs) { 301 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 302 Funcs.insert(FD); 303 } 304 305 static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { 306 if (!C.getMethodDecl()) 307 return nullptr; 308 auto *Body = dyn_cast_or_null<CompoundStmt>(C.getMethodDecl()->getBody()); 309 if (!Body || Body->size() != 1) 310 return nullptr; 311 if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin())) 312 if (auto *Return = RS->getRetValue()) 313 return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts()); 314 return nullptr; 315 } 316 317 static void 318 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 319 llvm::DenseSet<const VarDecl *> &Vars, 320 llvm::DenseSet<const FunctionDecl *> &Funcs) { 321 insertIfGlobal(D, Vars); 322 insertIfFunction(D, Funcs); 323 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 324 for (const auto *B : Decomp->bindings()) 325 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 326 // FIXME: should we be using `E->getFoundDecl()`? 327 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 328 Fields.insert(FD); 329 } 330 331 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 332 /// global variables and functions that are declared in or referenced from 333 /// sub-statements. 334 static void 335 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 336 llvm::DenseSet<const VarDecl *> &Vars, 337 llvm::DenseSet<const FunctionDecl *> &Funcs) { 338 for (auto *Child : S.children()) 339 if (Child != nullptr) 340 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 341 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 342 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 343 344 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 345 if (DS->isSingleDecl()) 346 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 347 else 348 for (auto *D : DS->getDeclGroup()) 349 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 350 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 351 insertIfGlobal(*E->getDecl(), Vars); 352 insertIfFunction(*E->getDecl(), Funcs); 353 } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) { 354 // If this is a method that returns a member variable but does nothing else, 355 // model the field of the return value. 356 if (MemberExpr *E = getMemberForAccessor(*C)) 357 if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) 358 Fields.insert(FD); 359 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 360 // FIXME: should we be using `E->getFoundDecl()`? 361 const ValueDecl *VD = E->getMemberDecl(); 362 insertIfGlobal(*VD, Vars); 363 insertIfFunction(*VD, Funcs); 364 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 365 Fields.insert(FD); 366 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 367 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 368 for (const auto *FD : getFieldsForInitListExpr(RD)) 369 Fields.insert(FD); 370 } 371 } 372 373 // FIXME: Add support for resetting globals after function calls to enable 374 // the implementation of sound analyses. 375 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 376 assert(FuncDecl->getBody() != nullptr); 377 378 FieldSet Fields; 379 llvm::DenseSet<const VarDecl *> Vars; 380 llvm::DenseSet<const FunctionDecl *> Funcs; 381 382 // Look for global variable and field references in the 383 // constructor-initializers. 384 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 385 for (const auto *Init : CtorDecl->inits()) { 386 if (Init->isMemberInitializer()) { 387 Fields.insert(Init->getMember()); 388 } else if (Init->isIndirectMemberInitializer()) { 389 for (const auto *I : Init->getIndirectMember()->chain()) 390 Fields.insert(cast<FieldDecl>(I)); 391 } 392 const Expr *E = Init->getInit(); 393 assert(E != nullptr); 394 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 395 } 396 // Add all fields mentioned in default member initializers. 397 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 398 if (const auto *I = F->getInClassInitializer()) 399 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 400 } 401 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 402 403 // These have to be added before the lines that follow to ensure that 404 // `create*` work correctly for structs. 405 DACtx->addModeledFields(Fields); 406 407 for (const VarDecl *D : Vars) { 408 if (getStorageLocation(*D) != nullptr) 409 continue; 410 411 setStorageLocation(*D, createObject(*D)); 412 } 413 414 for (const FunctionDecl *FD : Funcs) { 415 if (getStorageLocation(*FD) != nullptr) 416 continue; 417 auto &Loc = createStorageLocation(FD->getType()); 418 setStorageLocation(*FD, Loc); 419 } 420 } 421 422 Environment::Environment(DataflowAnalysisContext &DACtx) 423 : DACtx(&DACtx), 424 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 425 426 Environment Environment::fork() const { 427 Environment Copy(*this); 428 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 429 return Copy; 430 } 431 432 Environment::Environment(DataflowAnalysisContext &DACtx, 433 const DeclContext &DeclCtx) 434 : Environment(DACtx) { 435 CallStack.push_back(&DeclCtx); 436 437 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 438 assert(FuncDecl->getBody() != nullptr); 439 440 initFieldsGlobalsAndFuncs(FuncDecl); 441 442 for (const auto *ParamDecl : FuncDecl->parameters()) { 443 assert(ParamDecl != nullptr); 444 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 445 } 446 } 447 448 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 449 auto *Parent = MethodDecl->getParent(); 450 assert(Parent != nullptr); 451 if (Parent->isLambda()) 452 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 453 454 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 455 if (MethodDecl && MethodDecl->isImplicitObjectMemberFunction()) { 456 QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); 457 ThisPointeeLoc = 458 &cast<RecordValue>(createValue(ThisPointeeType))->getLoc(); 459 } 460 } 461 } 462 463 bool Environment::canDescend(unsigned MaxDepth, 464 const DeclContext *Callee) const { 465 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 466 } 467 468 Environment Environment::pushCall(const CallExpr *Call) const { 469 Environment Env(*this); 470 471 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 472 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 473 if (!isa<CXXThisExpr>(Arg)) 474 Env.ThisPointeeLoc = 475 cast<RecordStorageLocation>(getStorageLocation(*Arg)); 476 // Otherwise (when the argument is `this`), retain the current 477 // environment's `ThisPointeeLoc`. 478 } 479 } 480 481 Env.pushCallInternal(Call->getDirectCallee(), 482 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 483 484 return Env; 485 } 486 487 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 488 Environment Env(*this); 489 490 Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); 491 492 Env.pushCallInternal(Call->getConstructor(), 493 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 494 495 return Env; 496 } 497 498 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 499 ArrayRef<const Expr *> Args) { 500 // Canonicalize to the definition of the function. This ensures that we're 501 // putting arguments into the same `ParamVarDecl`s` that the callee will later 502 // be retrieving them from. 503 assert(FuncDecl->getDefinition() != nullptr); 504 FuncDecl = FuncDecl->getDefinition(); 505 506 CallStack.push_back(FuncDecl); 507 508 initFieldsGlobalsAndFuncs(FuncDecl); 509 510 const auto *ParamIt = FuncDecl->param_begin(); 511 512 // FIXME: Parameters don't always map to arguments 1:1; examples include 513 // overloaded operators implemented as member functions, and parameter packs. 514 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 515 assert(ParamIt != FuncDecl->param_end()); 516 const VarDecl *Param = *ParamIt; 517 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 518 } 519 } 520 521 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 522 // We ignore some entries of `CalleeEnv`: 523 // - `DACtx` because is already the same in both 524 // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or 525 // `ThisPointeeLoc` because they don't apply to us. 526 // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the 527 // callee's local scope, so when popping that scope, we do not propagate 528 // the maps. 529 this->LocToVal = std::move(CalleeEnv.LocToVal); 530 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 531 532 if (Call->isGLValue()) { 533 if (CalleeEnv.ReturnLoc != nullptr) 534 setStorageLocation(*Call, *CalleeEnv.ReturnLoc); 535 } else if (!Call->getType()->isVoidType()) { 536 if (CalleeEnv.ReturnVal != nullptr) 537 setValue(*Call, *CalleeEnv.ReturnVal); 538 } 539 } 540 541 void Environment::popCall(const CXXConstructExpr *Call, 542 const Environment &CalleeEnv) { 543 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 544 this->LocToVal = std::move(CalleeEnv.LocToVal); 545 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 546 547 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 548 setValue(*Call, *Val); 549 } 550 } 551 552 bool Environment::equivalentTo(const Environment &Other, 553 Environment::ValueModel &Model) const { 554 assert(DACtx == Other.DACtx); 555 556 if (ReturnVal != Other.ReturnVal) 557 return false; 558 559 if (ReturnLoc != Other.ReturnLoc) 560 return false; 561 562 if (ThisPointeeLoc != Other.ThisPointeeLoc) 563 return false; 564 565 if (DeclToLoc != Other.DeclToLoc) 566 return false; 567 568 if (ExprToLoc != Other.ExprToLoc) 569 return false; 570 571 if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) 572 return false; 573 574 if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) 575 return false; 576 577 return true; 578 } 579 580 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 581 Environment::ValueModel &Model) { 582 assert(DACtx == PrevEnv.DACtx); 583 assert(ReturnVal == PrevEnv.ReturnVal); 584 assert(ReturnLoc == PrevEnv.ReturnLoc); 585 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 586 assert(CallStack == PrevEnv.CallStack); 587 588 auto Effect = LatticeJoinEffect::Unchanged; 589 590 // By the API, `PrevEnv` is a previous version of the environment for the same 591 // block, so we have some guarantees about its shape. In particular, it will 592 // be the result of a join or widen operation on previous values for this 593 // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that 594 // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain 595 // this property here, we don't need change their current values to widen. 596 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 597 assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); 598 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 599 600 ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, 601 Model, Effect); 602 603 LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, 604 Model, Effect); 605 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 606 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 607 ExprToVal.size() != PrevEnv.ExprToVal.size() || 608 LocToVal.size() != PrevEnv.LocToVal.size()) 609 Effect = LatticeJoinEffect::Changed; 610 611 return Effect; 612 } 613 614 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 615 Environment::ValueModel &Model) { 616 assert(EnvA.DACtx == EnvB.DACtx); 617 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 618 assert(EnvA.CallStack == EnvB.CallStack); 619 620 Environment JoinedEnv(*EnvA.DACtx); 621 622 JoinedEnv.CallStack = EnvA.CallStack; 623 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 624 625 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 626 // `ReturnVal` might not always get set -- for example if we have a return 627 // statement of the form `return some_other_func()` and we decide not to 628 // analyze `some_other_func()`. 629 // In this case, we can't say anything about the joined return value -- we 630 // don't simply want to propagate the return value that we do have, because 631 // it might not be the correct one. 632 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 633 JoinedEnv.ReturnVal = nullptr; 634 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 635 JoinedEnv.ReturnVal = EnvA.ReturnVal; 636 } else { 637 assert(!EnvA.CallStack.empty()); 638 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 639 // cast. 640 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 641 assert(Func != nullptr); 642 if (Value *MergedVal = 643 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 644 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 645 JoinedEnv.ReturnVal = MergedVal; 646 } 647 648 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 649 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 650 else 651 JoinedEnv.ReturnLoc = nullptr; 652 653 assert(declToLocConsistent(EnvA.DeclToLoc, EnvB.DeclToLoc)); 654 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 655 656 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 657 658 // FIXME: update join to detect backedges and simplify the flow condition 659 // accordingly. 660 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 661 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 662 663 JoinedEnv.ExprToVal = joinKeyToValueMap(EnvA.ExprToVal, EnvB.ExprToVal, EnvA, 664 EnvB, JoinedEnv, Model); 665 666 JoinedEnv.LocToVal = joinKeyToValueMap(EnvA.LocToVal, EnvB.LocToVal, EnvA, 667 EnvB, JoinedEnv, Model); 668 669 return JoinedEnv; 670 } 671 672 StorageLocation &Environment::createStorageLocation(QualType Type) { 673 return DACtx->createStorageLocation(Type); 674 } 675 676 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 677 // Evaluated declarations are always assigned the same storage locations to 678 // ensure that the environment stabilizes across loop iterations. Storage 679 // locations for evaluated declarations are stored in the analysis context. 680 return DACtx->getStableStorageLocation(D); 681 } 682 683 StorageLocation &Environment::createStorageLocation(const Expr &E) { 684 // Evaluated expressions are always assigned the same storage locations to 685 // ensure that the environment stabilizes across loop iterations. Storage 686 // locations for evaluated expressions are stored in the analysis context. 687 return DACtx->getStableStorageLocation(E); 688 } 689 690 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 691 assert(!DeclToLoc.contains(&D)); 692 DeclToLoc[&D] = &Loc; 693 } 694 695 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 696 auto It = DeclToLoc.find(&D); 697 if (It == DeclToLoc.end()) 698 return nullptr; 699 700 StorageLocation *Loc = It->second; 701 702 return Loc; 703 } 704 705 void Environment::removeDecl(const ValueDecl &D) { 706 assert(DeclToLoc.contains(&D)); 707 DeclToLoc.erase(&D); 708 } 709 710 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 711 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 712 // but we still want to be able to associate a `StorageLocation` with them, 713 // so allow these as an exception. 714 assert(E.isGLValue() || 715 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 716 setStorageLocationInternal(E, Loc); 717 } 718 719 StorageLocation *Environment::getStorageLocation(const Expr &E) const { 720 // See comment in `setStorageLocation()`. 721 assert(E.isGLValue() || 722 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 723 return getStorageLocationInternal(E); 724 } 725 726 RecordStorageLocation *Environment::getThisPointeeStorageLocation() const { 727 return ThisPointeeLoc; 728 } 729 730 RecordStorageLocation & 731 Environment::getResultObjectLocation(const Expr &RecordPRValue) { 732 assert(RecordPRValue.getType()->isRecordType()); 733 assert(RecordPRValue.isPRValue()); 734 735 if (StorageLocation *ExistingLoc = getStorageLocationInternal(RecordPRValue)) 736 return *cast<RecordStorageLocation>(ExistingLoc); 737 auto &Loc = cast<RecordStorageLocation>( 738 DACtx->getStableStorageLocation(RecordPRValue)); 739 setStorageLocationInternal(RecordPRValue, Loc); 740 return Loc; 741 } 742 743 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 744 return DACtx->getOrCreateNullPointerValue(PointeeType); 745 } 746 747 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 748 assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc); 749 750 LocToVal[&Loc] = &Val; 751 } 752 753 void Environment::setValue(const Expr &E, Value &Val) { 754 assert(E.isPRValue()); 755 ExprToVal[&E] = &Val; 756 } 757 758 Value *Environment::getValue(const StorageLocation &Loc) const { 759 return LocToVal.lookup(&Loc); 760 } 761 762 Value *Environment::getValue(const ValueDecl &D) const { 763 auto *Loc = getStorageLocation(D); 764 if (Loc == nullptr) 765 return nullptr; 766 return getValue(*Loc); 767 } 768 769 Value *Environment::getValue(const Expr &E) const { 770 if (E.isPRValue()) { 771 auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); 772 return It == ExprToVal.end() ? nullptr : It->second; 773 } 774 775 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 776 if (It == ExprToLoc.end()) 777 return nullptr; 778 return getValue(*It->second); 779 } 780 781 Value *Environment::createValue(QualType Type) { 782 llvm::DenseSet<QualType> Visited; 783 int CreatedValuesCount = 0; 784 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 785 CreatedValuesCount); 786 if (CreatedValuesCount > MaxCompositeValueSize) { 787 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 788 << '\n'; 789 } 790 return Val; 791 } 792 793 void Environment::setStorageLocationInternal(const Expr &E, 794 StorageLocation &Loc) { 795 const Expr &CanonE = ignoreCFGOmittedNodes(E); 796 assert(!ExprToLoc.contains(&CanonE)); 797 ExprToLoc[&CanonE] = &Loc; 798 } 799 800 StorageLocation *Environment::getStorageLocationInternal(const Expr &E) const { 801 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 802 return It == ExprToLoc.end() ? nullptr : &*It->second; 803 } 804 805 Value *Environment::createValueUnlessSelfReferential( 806 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 807 int &CreatedValuesCount) { 808 assert(!Type.isNull()); 809 assert(!Type->isReferenceType()); 810 811 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 812 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 813 Depth > MaxCompositeValueDepth) 814 return nullptr; 815 816 if (Type->isBooleanType()) { 817 CreatedValuesCount++; 818 return &makeAtomicBoolValue(); 819 } 820 821 if (Type->isIntegerType()) { 822 // FIXME: consider instead `return nullptr`, given that we do nothing useful 823 // with integers, and so distinguishing them serves no purpose, but could 824 // prevent convergence. 825 CreatedValuesCount++; 826 return &arena().create<IntegerValue>(); 827 } 828 829 if (Type->isPointerType()) { 830 CreatedValuesCount++; 831 QualType PointeeType = Type->getPointeeType(); 832 StorageLocation &PointeeLoc = 833 createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); 834 835 return &arena().create<PointerValue>(PointeeLoc); 836 } 837 838 if (Type->isRecordType()) { 839 CreatedValuesCount++; 840 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 841 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 842 assert(Field != nullptr); 843 844 QualType FieldType = Field->getType(); 845 846 FieldLocs.insert( 847 {Field, &createLocAndMaybeValue(FieldType, Visited, Depth + 1, 848 CreatedValuesCount)}); 849 } 850 851 RecordStorageLocation &Loc = 852 arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 853 RecordValue &RecordVal = create<RecordValue>(Loc); 854 855 // As we already have a storage location for the `RecordValue`, we can and 856 // should associate them in the environment. 857 setValue(Loc, RecordVal); 858 859 return &RecordVal; 860 } 861 862 return nullptr; 863 } 864 865 StorageLocation & 866 Environment::createLocAndMaybeValue(QualType Ty, 867 llvm::DenseSet<QualType> &Visited, 868 int Depth, int &CreatedValuesCount) { 869 if (!Visited.insert(Ty.getCanonicalType()).second) 870 return createStorageLocation(Ty.getNonReferenceType()); 871 Value *Val = createValueUnlessSelfReferential( 872 Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); 873 Visited.erase(Ty.getCanonicalType()); 874 875 Ty = Ty.getNonReferenceType(); 876 877 if (Val == nullptr) 878 return createStorageLocation(Ty); 879 880 if (Ty->isRecordType()) 881 return cast<RecordValue>(Val)->getLoc(); 882 883 StorageLocation &Loc = createStorageLocation(Ty); 884 setValue(Loc, *Val); 885 return Loc; 886 } 887 888 StorageLocation &Environment::createObjectInternal(const VarDecl *D, 889 QualType Ty, 890 const Expr *InitExpr) { 891 if (Ty->isReferenceType()) { 892 // Although variables of reference type always need to be initialized, it 893 // can happen that we can't see the initializer, so `InitExpr` may still 894 // be null. 895 if (InitExpr) { 896 if (auto *InitExprLoc = getStorageLocation(*InitExpr)) 897 return *InitExprLoc; 898 } 899 900 // Even though we have an initializer, we might not get an 901 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 902 // don't have a function body. In this case, we just invent a storage 903 // location and value -- it's the best we can do. 904 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 905 } 906 907 Value *Val = nullptr; 908 if (InitExpr) 909 // In the (few) cases where an expression is intentionally 910 // "uninterpreted", `InitExpr` is not associated with a value. There are 911 // two ways to handle this situation: propagate the status, so that 912 // uninterpreted initializers result in uninterpreted variables, or 913 // provide a default value. We choose the latter so that later refinements 914 // of the variable can be used for reasoning about the surrounding code. 915 // For this reason, we let this case be handled by the `createValue()` 916 // call below. 917 // 918 // FIXME. If and when we interpret all language cases, change this to 919 // assert that `InitExpr` is interpreted, rather than supplying a 920 // default value (assuming we don't update the environment API to return 921 // references). 922 Val = getValue(*InitExpr); 923 if (!Val) 924 Val = createValue(Ty); 925 926 if (Ty->isRecordType()) 927 return cast<RecordValue>(Val)->getLoc(); 928 929 StorageLocation &Loc = 930 D ? createStorageLocation(*D) : createStorageLocation(Ty); 931 932 if (Val) 933 setValue(Loc, *Val); 934 935 return Loc; 936 } 937 938 void Environment::addToFlowCondition(const Formula &Val) { 939 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 940 } 941 942 bool Environment::flowConditionImplies(const Formula &Val) const { 943 return DACtx->flowConditionImplies(FlowConditionToken, Val); 944 } 945 946 void Environment::dump(raw_ostream &OS) const { 947 // FIXME: add printing for remaining fields and allow caller to decide what 948 // fields are printed. 949 OS << "DeclToLoc:\n"; 950 for (auto [D, L] : DeclToLoc) 951 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 952 953 OS << "ExprToLoc:\n"; 954 for (auto [E, L] : ExprToLoc) 955 OS << " [" << E << ", " << L << "]\n"; 956 957 OS << "ExprToVal:\n"; 958 for (auto [E, V] : ExprToVal) 959 OS << " [" << E << ", " << V << ": " << *V << "]\n"; 960 961 OS << "LocToVal:\n"; 962 for (auto [L, V] : LocToVal) { 963 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 964 } 965 966 OS << "FlowConditionToken:\n"; 967 DACtx->dumpFlowCondition(FlowConditionToken, OS); 968 } 969 970 void Environment::dump() const { 971 dump(llvm::dbgs()); 972 } 973 974 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 975 const Environment &Env) { 976 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 977 if (ImplicitObject == nullptr) 978 return nullptr; 979 if (ImplicitObject->getType()->isPointerType()) { 980 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*ImplicitObject))) 981 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 982 return nullptr; 983 } 984 return cast_or_null<RecordStorageLocation>( 985 Env.getStorageLocation(*ImplicitObject)); 986 } 987 988 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 989 const Environment &Env) { 990 Expr *Base = ME.getBase(); 991 if (Base == nullptr) 992 return nullptr; 993 if (ME.isArrow()) { 994 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Base))) 995 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 996 return nullptr; 997 } 998 return cast_or_null<RecordStorageLocation>(Env.getStorageLocation(*Base)); 999 } 1000 1001 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 1002 // Unnamed bitfields are only used for padding and do not appear in 1003 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 1004 // field list, and we thus need to remove them before mapping inits to 1005 // fields to avoid mapping inits to the wrongs fields. 1006 std::vector<FieldDecl *> Fields; 1007 llvm::copy_if( 1008 RD->fields(), std::back_inserter(Fields), 1009 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 1010 return Fields; 1011 } 1012 1013 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { 1014 auto &NewVal = Env.create<RecordValue>(Loc); 1015 Env.setValue(Loc, NewVal); 1016 return NewVal; 1017 } 1018 1019 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { 1020 assert(Expr.getType()->isRecordType()); 1021 1022 if (Expr.isPRValue()) { 1023 if (auto *ExistingVal = cast_or_null<RecordValue>(Env.getValue(Expr))) { 1024 auto &NewVal = Env.create<RecordValue>(ExistingVal->getLoc()); 1025 Env.setValue(Expr, NewVal); 1026 return NewVal; 1027 } 1028 1029 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1030 Env.setValue(Expr, NewVal); 1031 return NewVal; 1032 } 1033 1034 if (auto *Loc = 1035 cast_or_null<RecordStorageLocation>(Env.getStorageLocation(Expr))) { 1036 auto &NewVal = Env.create<RecordValue>(*Loc); 1037 Env.setValue(*Loc, NewVal); 1038 return NewVal; 1039 } 1040 1041 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1042 Env.setStorageLocation(Expr, NewVal.getLoc()); 1043 return NewVal; 1044 } 1045 1046 } // namespace dataflow 1047 } // namespace clang 1048