1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/DenseMap.h" 22 #include "llvm/ADT/DenseSet.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/Support/Casting.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include <cassert> 27 #include <memory> 28 #include <utility> 29 30 namespace clang { 31 namespace dataflow { 32 33 // FIXME: convert these to parameters of the analysis or environment. Current 34 // settings have been experimentaly validated, but only for a particular 35 // analysis. 36 static constexpr int MaxCompositeValueDepth = 3; 37 static constexpr int MaxCompositeValueSize = 1000; 38 39 /// Returns a map consisting of key-value entries that are present in both maps. 40 template <typename K, typename V> 41 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 42 const llvm::DenseMap<K, V> &Map2) { 43 llvm::DenseMap<K, V> Result; 44 for (auto &Entry : Map1) { 45 auto It = Map2.find(Entry.first); 46 if (It != Map2.end() && Entry.second == It->second) 47 Result.insert({Entry.first, Entry.second}); 48 } 49 return Result; 50 } 51 52 static bool compareDistinctValues(QualType Type, Value &Val1, 53 const Environment &Env1, Value &Val2, 54 const Environment &Env2, 55 Environment::ValueModel &Model) { 56 // Note: Potentially costly, but, for booleans, we could check whether both 57 // can be proven equivalent in their respective environments. 58 59 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 60 // and implement separate, join/widen specific handling for 61 // reference/pointers. 62 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 63 case ComparisonResult::Same: 64 return true; 65 case ComparisonResult::Different: 66 return false; 67 case ComparisonResult::Unknown: 68 switch (Val1.getKind()) { 69 case Value::Kind::Integer: 70 case Value::Kind::Reference: 71 case Value::Kind::Pointer: 72 case Value::Kind::Struct: 73 // FIXME: this choice intentionally introduces unsoundness to allow 74 // for convergence. Once we have widening support for the 75 // reference/pointer and struct built-in models, this should be 76 // `false`. 77 return true; 78 default: 79 return false; 80 } 81 } 82 llvm_unreachable("All cases covered in switch"); 83 } 84 85 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 86 /// respectively, of the same type `Type`. Merging generally produces a single 87 /// value that (soundly) approximates the two inputs, although the actual 88 /// meaning depends on `Model`. 89 static Value *mergeDistinctValues(QualType Type, Value &Val1, 90 const Environment &Env1, Value &Val2, 91 const Environment &Env2, 92 Environment &MergedEnv, 93 Environment::ValueModel &Model) { 94 // Join distinct boolean values preserving information about the constraints 95 // in the respective path conditions. 96 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 97 // FIXME: Checking both values should be unnecessary, since they should have 98 // a consistent shape. However, right now we can end up with BoolValue's in 99 // integer-typed variables due to our incorrect handling of 100 // boolean-to-integer casts (we just propagate the BoolValue to the result 101 // of the cast). So, a join can encounter an integer in one branch but a 102 // bool in the other. 103 // For example: 104 // ``` 105 // std::optional<bool> o; 106 // int x; 107 // if (o.has_value()) 108 // x = o.value(); 109 // ``` 110 auto &Expr1 = cast<BoolValue>(Val1).formula(); 111 auto &Expr2 = cast<BoolValue>(Val2).formula(); 112 auto &A = MergedEnv.arena(); 113 auto &MergedVal = A.makeAtomRef(A.makeAtom()); 114 MergedEnv.addToFlowCondition( 115 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 116 A.makeEquals(MergedVal, Expr1)), 117 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 118 A.makeEquals(MergedVal, Expr2)))); 119 return &A.makeBoolValue(MergedVal); 120 } 121 122 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 123 // returns false to avoid storing unneeded values in `DACtx`. 124 // FIXME: Creating the value based on the type alone creates misshapen values 125 // for lvalues, since the type does not reflect the need for `ReferenceValue`. 126 // This issue will be resolved when `ReferenceValue` is eliminated as part 127 // of the ongoing migration to strict handling of value categories (see 128 // https://discourse.llvm.org/t/70086 for details). 129 if (Value *MergedVal = MergedEnv.createValue(Type)) 130 if (Model.merge(Type, Val1, Env1, Val2, Env2, *MergedVal, MergedEnv)) 131 return MergedVal; 132 133 return nullptr; 134 } 135 136 // When widening does not change `Current`, return value will equal `&Prev`. 137 static Value &widenDistinctValues(QualType Type, Value &Prev, 138 const Environment &PrevEnv, Value &Current, 139 Environment &CurrentEnv, 140 Environment::ValueModel &Model) { 141 // Boolean-model widening. 142 if (isa<BoolValue>(&Prev)) { 143 assert(isa<BoolValue>(Current)); 144 // Widen to Top, because we know they are different values. If previous was 145 // already Top, re-use that to (implicitly) indicate that no change occured. 146 if (isa<TopBoolValue>(Prev)) 147 return Prev; 148 return CurrentEnv.makeTopBoolValue(); 149 } 150 151 // FIXME: Add other built-in model widening. 152 153 // Custom-model widening. 154 if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 155 return *W; 156 157 // Default of widening is a no-op: leave the current value unchanged. 158 return Current; 159 } 160 161 /// Initializes a global storage value. 162 static void insertIfGlobal(const Decl &D, 163 llvm::DenseSet<const VarDecl *> &Vars) { 164 if (auto *V = dyn_cast<VarDecl>(&D)) 165 if (V->hasGlobalStorage()) 166 Vars.insert(V); 167 } 168 169 static void insertIfFunction(const Decl &D, 170 llvm::DenseSet<const FunctionDecl *> &Funcs) { 171 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 172 Funcs.insert(FD); 173 } 174 175 static void 176 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 177 llvm::DenseSet<const VarDecl *> &Vars, 178 llvm::DenseSet<const FunctionDecl *> &Funcs) { 179 insertIfGlobal(D, Vars); 180 insertIfFunction(D, Funcs); 181 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 182 for (const auto *B : Decomp->bindings()) 183 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 184 // FIXME: should we be using `E->getFoundDecl()`? 185 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 186 Fields.insert(FD); 187 } 188 189 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 190 /// global variables and functions that are declared in or referenced from 191 /// sub-statements. 192 static void 193 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 194 llvm::DenseSet<const VarDecl *> &Vars, 195 llvm::DenseSet<const FunctionDecl *> &Funcs) { 196 for (auto *Child : S.children()) 197 if (Child != nullptr) 198 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 199 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 200 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 201 202 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 203 if (DS->isSingleDecl()) 204 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 205 else 206 for (auto *D : DS->getDeclGroup()) 207 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 208 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 209 insertIfGlobal(*E->getDecl(), Vars); 210 insertIfFunction(*E->getDecl(), Funcs); 211 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 212 // FIXME: should we be using `E->getFoundDecl()`? 213 const ValueDecl *VD = E->getMemberDecl(); 214 insertIfGlobal(*VD, Vars); 215 insertIfFunction(*VD, Funcs); 216 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 217 Fields.insert(FD); 218 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 219 if (RecordDecl *RD = InitList->getType()->getAsRecordDecl()) 220 for (const auto *FD : getFieldsForInitListExpr(RD)) 221 Fields.insert(FD); 222 } 223 } 224 225 // FIXME: Add support for resetting globals after function calls to enable 226 // the implementation of sound analyses. 227 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 228 assert(FuncDecl->getBody() != nullptr); 229 230 FieldSet Fields; 231 llvm::DenseSet<const VarDecl *> Vars; 232 llvm::DenseSet<const FunctionDecl *> Funcs; 233 234 // Look for global variable and field references in the 235 // constructor-initializers. 236 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 237 for (const auto *Init : CtorDecl->inits()) { 238 if (Init->isMemberInitializer()) { 239 Fields.insert(Init->getMember()); 240 } else if (Init->isIndirectMemberInitializer()) { 241 for (const auto *I : Init->getIndirectMember()->chain()) 242 Fields.insert(cast<FieldDecl>(I)); 243 } 244 const Expr *E = Init->getInit(); 245 assert(E != nullptr); 246 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 247 } 248 // Add all fields mentioned in default member initializers. 249 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 250 if (const auto *I = F->getInClassInitializer()) 251 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 252 } 253 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 254 255 // These have to be added before the lines that follow to ensure that 256 // `create*` work correctly for structs. 257 DACtx->addModeledFields(Fields); 258 259 for (const VarDecl *D : Vars) { 260 if (getStorageLocation(*D) != nullptr) 261 continue; 262 auto &Loc = createStorageLocation(D->getType().getNonReferenceType()); 263 setStorageLocation(*D, Loc); 264 if (auto *Val = createValue(D->getType().getNonReferenceType())) 265 setValue(Loc, *Val); 266 } 267 268 for (const FunctionDecl *FD : Funcs) { 269 if (getStorageLocation(*FD) != nullptr) 270 continue; 271 auto &Loc = createStorageLocation(FD->getType()); 272 setStorageLocation(*FD, Loc); 273 } 274 } 275 276 Environment::Environment(DataflowAnalysisContext &DACtx) 277 : DACtx(&DACtx), 278 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 279 280 Environment Environment::fork() const { 281 Environment Copy(*this); 282 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 283 return Copy; 284 } 285 286 Environment::Environment(DataflowAnalysisContext &DACtx, 287 const DeclContext &DeclCtx) 288 : Environment(DACtx) { 289 CallStack.push_back(&DeclCtx); 290 291 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 292 assert(FuncDecl->getBody() != nullptr); 293 294 initFieldsGlobalsAndFuncs(FuncDecl); 295 296 for (const auto *ParamDecl : FuncDecl->parameters()) { 297 assert(ParamDecl != nullptr); 298 // References aren't objects, so the reference itself doesn't have a 299 // storage location. Instead, the storage location for a reference refers 300 // directly to an object of the referenced type -- so strip off any 301 // reference from the type. 302 auto &ParamLoc = 303 createStorageLocation(ParamDecl->getType().getNonReferenceType()); 304 setStorageLocation(*ParamDecl, ParamLoc); 305 if (Value *ParamVal = 306 createValue(ParamDecl->getType().getNonReferenceType())) 307 setValue(ParamLoc, *ParamVal); 308 } 309 } 310 311 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 312 auto *Parent = MethodDecl->getParent(); 313 assert(Parent != nullptr); 314 if (Parent->isLambda()) 315 MethodDecl = dyn_cast<CXXMethodDecl>(Parent->getDeclContext()); 316 317 // FIXME: Initialize the ThisPointeeLoc of lambdas too. 318 if (MethodDecl && !MethodDecl->isStatic()) { 319 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 320 ThisPointeeLoc = &cast<AggregateStorageLocation>( 321 createStorageLocation(ThisPointeeType)); 322 if (Value *ThisPointeeVal = createValue(ThisPointeeType)) 323 setValue(*ThisPointeeLoc, *ThisPointeeVal); 324 } 325 } 326 } 327 328 bool Environment::canDescend(unsigned MaxDepth, 329 const DeclContext *Callee) const { 330 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 331 } 332 333 Environment Environment::pushCall(const CallExpr *Call) const { 334 Environment Env(*this); 335 336 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 337 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 338 if (!isa<CXXThisExpr>(Arg)) 339 Env.ThisPointeeLoc = cast<AggregateStorageLocation>( 340 getStorageLocation(*Arg, SkipPast::Reference)); 341 // Otherwise (when the argument is `this`), retain the current 342 // environment's `ThisPointeeLoc`. 343 } 344 } 345 346 Env.pushCallInternal(Call->getDirectCallee(), 347 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 348 349 return Env; 350 } 351 352 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 353 Environment Env(*this); 354 355 Env.ThisPointeeLoc = &cast<AggregateStorageLocation>( 356 Env.createStorageLocation(Call->getType())); 357 if (Value *Val = Env.createValue(Call->getType())) 358 Env.setValue(*Env.ThisPointeeLoc, *Val); 359 360 Env.pushCallInternal(Call->getConstructor(), 361 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 362 363 return Env; 364 } 365 366 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 367 ArrayRef<const Expr *> Args) { 368 // Canonicalize to the definition of the function. This ensures that we're 369 // putting arguments into the same `ParamVarDecl`s` that the callee will later 370 // be retrieving them from. 371 assert(FuncDecl->getDefinition() != nullptr); 372 FuncDecl = FuncDecl->getDefinition(); 373 374 CallStack.push_back(FuncDecl); 375 376 initFieldsGlobalsAndFuncs(FuncDecl); 377 378 const auto *ParamIt = FuncDecl->param_begin(); 379 380 // FIXME: Parameters don't always map to arguments 1:1; examples include 381 // overloaded operators implemented as member functions, and parameter packs. 382 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 383 assert(ParamIt != FuncDecl->param_end()); 384 385 const Expr *Arg = Args[ArgIndex]; 386 auto *ArgLoc = getStorageLocation(*Arg, SkipPast::Reference); 387 if (ArgLoc == nullptr) 388 continue; 389 390 const VarDecl *Param = *ParamIt; 391 392 QualType ParamType = Param->getType(); 393 if (ParamType->isReferenceType()) { 394 setStorageLocation(*Param, *ArgLoc); 395 } else { 396 auto &Loc = createStorageLocation(*Param); 397 setStorageLocation(*Param, Loc); 398 399 if (auto *ArgVal = getValue(*ArgLoc)) { 400 setValue(Loc, *ArgVal); 401 } else if (Value *Val = createValue(ParamType)) { 402 setValue(Loc, *Val); 403 } 404 } 405 } 406 } 407 408 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 409 // We ignore `DACtx` because it's already the same in both. We don't want the 410 // callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or `ThisPointeeLoc`. We don't 411 // bring back `DeclToLoc` and `ExprToLoc` because we want to be able to later 412 // analyze the same callee in a different context, and `setStorageLocation` 413 // requires there to not already be a storage location assigned. Conceptually, 414 // these maps capture information from the local scope, so when popping that 415 // scope, we do not propagate the maps. 416 this->LocToVal = std::move(CalleeEnv.LocToVal); 417 this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); 418 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 419 420 if (Call->isGLValue()) { 421 if (CalleeEnv.ReturnLoc != nullptr) 422 setStorageLocationStrict(*Call, *CalleeEnv.ReturnLoc); 423 } else if (!Call->getType()->isVoidType()) { 424 if (CalleeEnv.ReturnVal != nullptr) 425 setValueStrict(*Call, *CalleeEnv.ReturnVal); 426 } 427 } 428 429 void Environment::popCall(const CXXConstructExpr *Call, 430 const Environment &CalleeEnv) { 431 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 432 this->LocToVal = std::move(CalleeEnv.LocToVal); 433 this->MemberLocToStruct = std::move(CalleeEnv.MemberLocToStruct); 434 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 435 436 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 437 setValueStrict(*Call, *Val); 438 } 439 } 440 441 bool Environment::equivalentTo(const Environment &Other, 442 Environment::ValueModel &Model) const { 443 assert(DACtx == Other.DACtx); 444 445 if (ReturnVal != Other.ReturnVal) 446 return false; 447 448 if (ReturnLoc != Other.ReturnLoc) 449 return false; 450 451 if (ThisPointeeLoc != Other.ThisPointeeLoc) 452 return false; 453 454 if (DeclToLoc != Other.DeclToLoc) 455 return false; 456 457 if (ExprToLoc != Other.ExprToLoc) 458 return false; 459 460 // Compare the contents for the intersection of their domains. 461 for (auto &Entry : LocToVal) { 462 const StorageLocation *Loc = Entry.first; 463 assert(Loc != nullptr); 464 465 Value *Val = Entry.second; 466 assert(Val != nullptr); 467 468 auto It = Other.LocToVal.find(Loc); 469 if (It == Other.LocToVal.end()) 470 continue; 471 assert(It->second != nullptr); 472 473 if (!areEquivalentValues(*Val, *It->second) && 474 !compareDistinctValues(Loc->getType(), *Val, *this, *It->second, Other, 475 Model)) 476 return false; 477 } 478 479 return true; 480 } 481 482 LatticeJoinEffect Environment::widen(const Environment &PrevEnv, 483 Environment::ValueModel &Model) { 484 assert(DACtx == PrevEnv.DACtx); 485 assert(ReturnVal == PrevEnv.ReturnVal); 486 assert(ReturnLoc == PrevEnv.ReturnLoc); 487 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 488 assert(CallStack == PrevEnv.CallStack); 489 490 auto Effect = LatticeJoinEffect::Unchanged; 491 492 // By the API, `PrevEnv` is a previous version of the environment for the same 493 // block, so we have some guarantees about its shape. In particular, it will 494 // be the result of a join or widen operation on previous values for this 495 // block. For `DeclToLoc` and `ExprToLoc`, join guarantees that these maps are 496 // subsets of the maps in `PrevEnv`. So, as long as we maintain this property 497 // here, we don't need change their current values to widen. 498 // 499 // FIXME: `MemberLocToStruct` does not share the above property, because 500 // `join` can cause the map size to increase (when we add fresh data in places 501 // of conflict). Once this issue with join is resolved, re-enable the 502 // assertion below or replace with something that captures the desired 503 // invariant. 504 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 505 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 506 // assert(MemberLocToStruct.size() <= PrevEnv.MemberLocToStruct.size()); 507 508 llvm::DenseMap<const StorageLocation *, Value *> WidenedLocToVal; 509 for (auto &Entry : LocToVal) { 510 const StorageLocation *Loc = Entry.first; 511 assert(Loc != nullptr); 512 513 Value *Val = Entry.second; 514 assert(Val != nullptr); 515 516 auto PrevIt = PrevEnv.LocToVal.find(Loc); 517 if (PrevIt == PrevEnv.LocToVal.end()) 518 continue; 519 assert(PrevIt->second != nullptr); 520 521 if (areEquivalentValues(*Val, *PrevIt->second)) { 522 WidenedLocToVal.insert({Loc, Val}); 523 continue; 524 } 525 526 Value &WidenedVal = widenDistinctValues(Loc->getType(), *PrevIt->second, 527 PrevEnv, *Val, *this, Model); 528 WidenedLocToVal.insert({Loc, &WidenedVal}); 529 if (&WidenedVal != PrevIt->second) 530 Effect = LatticeJoinEffect::Changed; 531 } 532 LocToVal = std::move(WidenedLocToVal); 533 // FIXME: update the equivalence calculation for `MemberLocToStruct`, once we 534 // have a systematic way of soundly comparing this map. 535 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 536 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 537 LocToVal.size() != PrevEnv.LocToVal.size() || 538 MemberLocToStruct.size() != PrevEnv.MemberLocToStruct.size()) 539 Effect = LatticeJoinEffect::Changed; 540 541 return Effect; 542 } 543 544 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 545 Environment::ValueModel &Model) { 546 assert(EnvA.DACtx == EnvB.DACtx); 547 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 548 assert(EnvA.CallStack == EnvB.CallStack); 549 550 Environment JoinedEnv(*EnvA.DACtx); 551 552 JoinedEnv.CallStack = EnvA.CallStack; 553 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 554 555 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 556 // `ReturnVal` might not always get set -- for example if we have a return 557 // statement of the form `return some_other_func()` and we decide not to 558 // analyze `some_other_func()`. 559 // In this case, we can't say anything about the joined return value -- we 560 // don't simply want to propagate the return value that we do have, because 561 // it might not be the correct one. 562 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 563 JoinedEnv.ReturnVal = nullptr; 564 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 565 JoinedEnv.ReturnVal = EnvA.ReturnVal; 566 } else { 567 assert(!EnvA.CallStack.empty()); 568 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 569 // cast. 570 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 571 assert(Func != nullptr); 572 if (Value *MergedVal = 573 mergeDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 574 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 575 JoinedEnv.ReturnVal = MergedVal; 576 } 577 578 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 579 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 580 else 581 JoinedEnv.ReturnLoc = nullptr; 582 583 // FIXME: Once we're able to remove declarations from `DeclToLoc` when their 584 // lifetime ends, add an assertion that there aren't any entries in 585 // `DeclToLoc` and `Other.DeclToLoc` that map the same declaration to 586 // different storage locations. 587 JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc); 588 589 JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 590 591 JoinedEnv.MemberLocToStruct = 592 intersectDenseMaps(EnvA.MemberLocToStruct, EnvB.MemberLocToStruct); 593 594 // FIXME: update join to detect backedges and simplify the flow condition 595 // accordingly. 596 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 597 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 598 599 for (auto &Entry : EnvA.LocToVal) { 600 const StorageLocation *Loc = Entry.first; 601 assert(Loc != nullptr); 602 603 Value *Val = Entry.second; 604 assert(Val != nullptr); 605 606 auto It = EnvB.LocToVal.find(Loc); 607 if (It == EnvB.LocToVal.end()) 608 continue; 609 assert(It->second != nullptr); 610 611 if (areEquivalentValues(*Val, *It->second)) { 612 JoinedEnv.LocToVal.insert({Loc, Val}); 613 continue; 614 } 615 616 if (Value *MergedVal = mergeDistinctValues( 617 Loc->getType(), *Val, EnvA, *It->second, EnvB, JoinedEnv, Model)) { 618 JoinedEnv.LocToVal.insert({Loc, MergedVal}); 619 } 620 } 621 622 return JoinedEnv; 623 } 624 625 StorageLocation &Environment::createStorageLocation(QualType Type) { 626 return DACtx->createStorageLocation(Type); 627 } 628 629 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 630 // Evaluated declarations are always assigned the same storage locations to 631 // ensure that the environment stabilizes across loop iterations. Storage 632 // locations for evaluated declarations are stored in the analysis context. 633 return DACtx->getStableStorageLocation(D); 634 } 635 636 StorageLocation &Environment::createStorageLocation(const Expr &E) { 637 // Evaluated expressions are always assigned the same storage locations to 638 // ensure that the environment stabilizes across loop iterations. Storage 639 // locations for evaluated expressions are stored in the analysis context. 640 return DACtx->getStableStorageLocation(E); 641 } 642 643 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 644 assert(!DeclToLoc.contains(&D)); 645 assert(!isa_and_nonnull<ReferenceValue>(getValue(Loc))); 646 DeclToLoc[&D] = &Loc; 647 } 648 649 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 650 auto It = DeclToLoc.find(&D); 651 if (It == DeclToLoc.end()) 652 return nullptr; 653 654 StorageLocation *Loc = It->second; 655 656 assert(!isa_and_nonnull<ReferenceValue>(getValue(*Loc))); 657 658 return Loc; 659 } 660 661 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 662 const Expr &CanonE = ignoreCFGOmittedNodes(E); 663 assert(!ExprToLoc.contains(&CanonE)); 664 ExprToLoc[&CanonE] = &Loc; 665 } 666 667 void Environment::setStorageLocationStrict(const Expr &E, 668 StorageLocation &Loc) { 669 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 670 // but we still want to be able to associate a `StorageLocation` with them, 671 // so allow these as an exception. 672 assert(E.isGLValue() || 673 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 674 setStorageLocation(E, Loc); 675 } 676 677 StorageLocation *Environment::getStorageLocation(const Expr &E, 678 SkipPast SP) const { 679 // FIXME: Add a test with parens. 680 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 681 return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP); 682 } 683 684 StorageLocation *Environment::getStorageLocationStrict(const Expr &E) const { 685 // See comment in `setStorageLocationStrict()`. 686 assert(E.isGLValue() || 687 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 688 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 689 690 if (Loc == nullptr) 691 return nullptr; 692 693 if (auto *RefVal = dyn_cast_or_null<ReferenceValue>(getValue(*Loc))) 694 return &RefVal->getReferentLoc(); 695 696 return Loc; 697 } 698 699 AggregateStorageLocation *Environment::getThisPointeeStorageLocation() const { 700 return ThisPointeeLoc; 701 } 702 703 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 704 return DACtx->getOrCreateNullPointerValue(PointeeType); 705 } 706 707 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 708 LocToVal[&Loc] = &Val; 709 710 if (auto *StructVal = dyn_cast<StructValue>(&Val)) { 711 auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc); 712 713 const QualType Type = AggregateLoc.getType(); 714 assert(Type->isRecordType()); 715 716 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 717 assert(Field != nullptr); 718 StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); 719 MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); 720 if (auto *FieldVal = StructVal->getChild(*Field)) 721 setValue(FieldLoc, *FieldVal); 722 } 723 } 724 725 auto It = MemberLocToStruct.find(&Loc); 726 if (It != MemberLocToStruct.end()) { 727 // `Loc` is the location of a struct member so we need to also update the 728 // value of the member in the corresponding `StructValue`. 729 730 assert(It->second.first != nullptr); 731 StructValue &StructVal = *It->second.first; 732 733 assert(It->second.second != nullptr); 734 const ValueDecl &Member = *It->second.second; 735 736 StructVal.setChild(Member, Val); 737 } 738 } 739 740 void Environment::clearValue(const StorageLocation &Loc) { 741 LocToVal.erase(&Loc); 742 743 if (auto It = MemberLocToStruct.find(&Loc); It != MemberLocToStruct.end()) { 744 // `Loc` is the location of a struct member so we need to also clear the 745 // member in the corresponding `StructValue`. 746 747 assert(It->second.first != nullptr); 748 StructValue &StructVal = *It->second.first; 749 750 assert(It->second.second != nullptr); 751 const ValueDecl &Member = *It->second.second; 752 753 StructVal.clearChild(Member); 754 } 755 } 756 757 void Environment::setValueStrict(const Expr &E, Value &Val) { 758 assert(E.isPRValue()); 759 assert(!isa<ReferenceValue>(Val)); 760 761 StorageLocation *Loc = getStorageLocation(E, SkipPast::None); 762 if (Loc == nullptr) { 763 Loc = &createStorageLocation(E); 764 setStorageLocation(E, *Loc); 765 } 766 setValue(*Loc, Val); 767 } 768 769 Value *Environment::getValue(const StorageLocation &Loc) const { 770 return LocToVal.lookup(&Loc); 771 } 772 773 Value *Environment::getValue(const ValueDecl &D) const { 774 auto *Loc = getStorageLocation(D); 775 if (Loc == nullptr) 776 return nullptr; 777 return getValue(*Loc); 778 } 779 780 Value *Environment::getValue(const Expr &E, SkipPast SP) const { 781 auto *Loc = getStorageLocation(E, SP); 782 if (Loc == nullptr) 783 return nullptr; 784 return getValue(*Loc); 785 } 786 787 Value *Environment::getValueStrict(const Expr &E) const { 788 assert(E.isPRValue()); 789 Value *Val = getValue(E, SkipPast::None); 790 791 assert(Val == nullptr || !isa<ReferenceValue>(Val)); 792 793 return Val; 794 } 795 796 Value *Environment::createValue(QualType Type) { 797 llvm::DenseSet<QualType> Visited; 798 int CreatedValuesCount = 0; 799 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 800 CreatedValuesCount); 801 if (CreatedValuesCount > MaxCompositeValueSize) { 802 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 803 << '\n'; 804 } 805 return Val; 806 } 807 808 Value *Environment::createValueUnlessSelfReferential( 809 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 810 int &CreatedValuesCount) { 811 assert(!Type.isNull()); 812 813 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 814 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 815 Depth > MaxCompositeValueDepth) 816 return nullptr; 817 818 if (Type->isBooleanType()) { 819 CreatedValuesCount++; 820 return &makeAtomicBoolValue(); 821 } 822 823 if (Type->isIntegerType()) { 824 // FIXME: consider instead `return nullptr`, given that we do nothing useful 825 // with integers, and so distinguishing them serves no purpose, but could 826 // prevent convergence. 827 CreatedValuesCount++; 828 return &arena().create<IntegerValue>(); 829 } 830 831 if (Type->isReferenceType() || Type->isPointerType()) { 832 CreatedValuesCount++; 833 QualType PointeeType = Type->getPointeeType(); 834 auto &PointeeLoc = createStorageLocation(PointeeType); 835 836 if (Visited.insert(PointeeType.getCanonicalType()).second) { 837 Value *PointeeVal = createValueUnlessSelfReferential( 838 PointeeType, Visited, Depth, CreatedValuesCount); 839 Visited.erase(PointeeType.getCanonicalType()); 840 841 if (PointeeVal != nullptr) 842 setValue(PointeeLoc, *PointeeVal); 843 } 844 845 if (Type->isReferenceType()) 846 return &arena().create<ReferenceValue>(PointeeLoc); 847 else 848 return &arena().create<PointerValue>(PointeeLoc); 849 } 850 851 if (Type->isRecordType()) { 852 CreatedValuesCount++; 853 llvm::DenseMap<const ValueDecl *, Value *> FieldValues; 854 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 855 assert(Field != nullptr); 856 857 QualType FieldType = Field->getType(); 858 if (Visited.contains(FieldType.getCanonicalType())) 859 continue; 860 861 Visited.insert(FieldType.getCanonicalType()); 862 if (auto *FieldValue = createValueUnlessSelfReferential( 863 FieldType, Visited, Depth + 1, CreatedValuesCount)) 864 FieldValues.insert({Field, FieldValue}); 865 Visited.erase(FieldType.getCanonicalType()); 866 } 867 868 return &arena().create<StructValue>(std::move(FieldValues)); 869 } 870 871 return nullptr; 872 } 873 874 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { 875 switch (SP) { 876 case SkipPast::None: 877 return Loc; 878 case SkipPast::Reference: 879 // References cannot be chained so we only need to skip past one level of 880 // indirection. 881 if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc))) 882 return Val->getReferentLoc(); 883 return Loc; 884 } 885 llvm_unreachable("bad SkipPast kind"); 886 } 887 888 const StorageLocation &Environment::skip(const StorageLocation &Loc, 889 SkipPast SP) const { 890 return skip(*const_cast<StorageLocation *>(&Loc), SP); 891 } 892 893 void Environment::addToFlowCondition(const Formula &Val) { 894 DACtx->addFlowConditionConstraint(FlowConditionToken, Val); 895 } 896 void Environment::addToFlowCondition(BoolValue &Val) { 897 addToFlowCondition(Val.formula()); 898 } 899 900 bool Environment::flowConditionImplies(const Formula &Val) const { 901 return DACtx->flowConditionImplies(FlowConditionToken, Val); 902 } 903 bool Environment::flowConditionImplies(BoolValue &Val) const { 904 return flowConditionImplies(Val.formula()); 905 } 906 907 void Environment::dump(raw_ostream &OS) const { 908 // FIXME: add printing for remaining fields and allow caller to decide what 909 // fields are printed. 910 OS << "DeclToLoc:\n"; 911 for (auto [D, L] : DeclToLoc) 912 OS << " [" << D->getNameAsString() << ", " << L << "]\n"; 913 914 OS << "ExprToLoc:\n"; 915 for (auto [E, L] : ExprToLoc) 916 OS << " [" << E << ", " << L << "]\n"; 917 918 OS << "LocToVal:\n"; 919 for (auto [L, V] : LocToVal) { 920 OS << " [" << L << ", " << V << ": " << *V << "]\n"; 921 } 922 923 OS << "FlowConditionToken:\n"; 924 DACtx->dumpFlowCondition(FlowConditionToken, OS); 925 } 926 927 void Environment::dump() const { 928 dump(llvm::dbgs()); 929 } 930 931 AggregateStorageLocation * 932 getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 933 const Environment &Env) { 934 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 935 if (ImplicitObject == nullptr) 936 return nullptr; 937 StorageLocation *Loc = 938 Env.getStorageLocation(*ImplicitObject, SkipPast::Reference); 939 if (Loc == nullptr) 940 return nullptr; 941 if (ImplicitObject->getType()->isPointerType()) { 942 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) 943 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 944 return nullptr; 945 } 946 return cast<AggregateStorageLocation>(Loc); 947 } 948 949 AggregateStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 950 const Environment &Env) { 951 Expr *Base = ME.getBase(); 952 if (Base == nullptr) 953 return nullptr; 954 StorageLocation *Loc = Env.getStorageLocation(*Base, SkipPast::Reference); 955 if (Loc == nullptr) 956 return nullptr; 957 if (ME.isArrow()) { 958 if (auto *Val = cast_or_null<PointerValue>(Env.getValue(*Loc))) 959 return &cast<AggregateStorageLocation>(Val->getPointeeLoc()); 960 return nullptr; 961 } 962 return cast<AggregateStorageLocation>(Loc); 963 } 964 965 std::vector<FieldDecl *> getFieldsForInitListExpr(const RecordDecl *RD) { 966 // Unnamed bitfields are only used for padding and do not appear in 967 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 968 // field list, and we thus need to remove them before mapping inits to 969 // fields to avoid mapping inits to the wrongs fields. 970 std::vector<FieldDecl *> Fields; 971 llvm::copy_if( 972 RD->fields(), std::back_inserter(Fields), 973 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 974 return Fields; 975 } 976 977 } // namespace dataflow 978 } // namespace clang 979