1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/RecursiveASTVisitor.h" 19 #include "clang/AST/Type.h" 20 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 21 #include "clang/Analysis/FlowSensitive/Value.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/DenseSet.h" 24 #include "llvm/ADT/MapVector.h" 25 #include "llvm/ADT/STLExtras.h" 26 #include "llvm/Support/ErrorHandling.h" 27 #include <cassert> 28 #include <utility> 29 30 #define DEBUG_TYPE "dataflow" 31 32 namespace clang { 33 namespace dataflow { 34 35 // FIXME: convert these to parameters of the analysis or environment. Current 36 // settings have been experimentaly validated, but only for a particular 37 // analysis. 38 static constexpr int MaxCompositeValueDepth = 3; 39 static constexpr int MaxCompositeValueSize = 1000; 40 41 /// Returns a map consisting of key-value entries that are present in both maps. 42 static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc( 43 const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1, 44 const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) { 45 llvm::DenseMap<const ValueDecl *, StorageLocation *> Result; 46 for (auto &Entry : DeclToLoc1) { 47 auto It = DeclToLoc2.find(Entry.first); 48 if (It != DeclToLoc2.end() && Entry.second == It->second) 49 Result.insert({Entry.first, Entry.second}); 50 } 51 return Result; 52 } 53 54 // Performs a join on either `ExprToLoc` or `ExprToVal`. 55 // The maps must be consistent in the sense that any entries for the same 56 // expression must map to the same location / value. This is the case if we are 57 // performing a join for control flow within a full-expression (which is the 58 // only case when this function should be used). 59 template <typename MapT> MapT joinExprMaps(const MapT &Map1, const MapT &Map2) { 60 MapT Result = Map1; 61 62 for (const auto &Entry : Map2) { 63 [[maybe_unused]] auto [It, Inserted] = Result.insert(Entry); 64 // If there was an existing entry, its value should be the same as for the 65 // entry we were trying to insert. 66 assert(It->second == Entry.second); 67 } 68 69 return Result; 70 } 71 72 // Whether to consider equivalent two values with an unknown relation. 73 // 74 // FIXME: this function is a hack enabling unsoundness to support 75 // convergence. Once we have widening support for the reference/pointer and 76 // struct built-in models, this should be unconditionally `false` (and inlined 77 // as such at its call sites). 78 static bool equateUnknownValues(Value::Kind K) { 79 switch (K) { 80 case Value::Kind::Integer: 81 case Value::Kind::Pointer: 82 case Value::Kind::Record: 83 return true; 84 default: 85 return false; 86 } 87 } 88 89 static bool compareDistinctValues(QualType Type, Value &Val1, 90 const Environment &Env1, Value &Val2, 91 const Environment &Env2, 92 Environment::ValueModel &Model) { 93 // Note: Potentially costly, but, for booleans, we could check whether both 94 // can be proven equivalent in their respective environments. 95 96 // FIXME: move the reference/pointers logic from `areEquivalentValues` to here 97 // and implement separate, join/widen specific handling for 98 // reference/pointers. 99 switch (Model.compare(Type, Val1, Env1, Val2, Env2)) { 100 case ComparisonResult::Same: 101 return true; 102 case ComparisonResult::Different: 103 return false; 104 case ComparisonResult::Unknown: 105 return equateUnknownValues(Val1.getKind()); 106 } 107 llvm_unreachable("All cases covered in switch"); 108 } 109 110 /// Attempts to join distinct values `Val1` and `Val2` in `Env1` and `Env2`, 111 /// respectively, of the same type `Type`. Joining generally produces a single 112 /// value that (soundly) approximates the two inputs, although the actual 113 /// meaning depends on `Model`. 114 static Value *joinDistinctValues(QualType Type, Value &Val1, 115 const Environment &Env1, Value &Val2, 116 const Environment &Env2, 117 Environment &JoinedEnv, 118 Environment::ValueModel &Model) { 119 // Join distinct boolean values preserving information about the constraints 120 // in the respective path conditions. 121 if (isa<BoolValue>(&Val1) && isa<BoolValue>(&Val2)) { 122 // FIXME: Checking both values should be unnecessary, since they should have 123 // a consistent shape. However, right now we can end up with BoolValue's in 124 // integer-typed variables due to our incorrect handling of 125 // boolean-to-integer casts (we just propagate the BoolValue to the result 126 // of the cast). So, a join can encounter an integer in one branch but a 127 // bool in the other. 128 // For example: 129 // ``` 130 // std::optional<bool> o; 131 // int x; 132 // if (o.has_value()) 133 // x = o.value(); 134 // ``` 135 auto &Expr1 = cast<BoolValue>(Val1).formula(); 136 auto &Expr2 = cast<BoolValue>(Val2).formula(); 137 auto &A = JoinedEnv.arena(); 138 auto &JoinedVal = A.makeAtomRef(A.makeAtom()); 139 JoinedEnv.assume( 140 A.makeOr(A.makeAnd(A.makeAtomRef(Env1.getFlowConditionToken()), 141 A.makeEquals(JoinedVal, Expr1)), 142 A.makeAnd(A.makeAtomRef(Env2.getFlowConditionToken()), 143 A.makeEquals(JoinedVal, Expr2)))); 144 return &A.makeBoolValue(JoinedVal); 145 } 146 147 Value *JoinedVal = nullptr; 148 if (auto *RecordVal1 = dyn_cast<RecordValue>(&Val1)) { 149 auto *RecordVal2 = cast<RecordValue>(&Val2); 150 151 if (&RecordVal1->getLoc() == &RecordVal2->getLoc()) 152 // `RecordVal1` and `RecordVal2` may have different properties associated 153 // with them. Create a new `RecordValue` with the same location but 154 // without any properties so that we soundly approximate both values. If a 155 // particular analysis needs to join properties, it should do so in 156 // `DataflowAnalysis::join()`. 157 JoinedVal = &JoinedEnv.create<RecordValue>(RecordVal1->getLoc()); 158 else 159 // If the locations for the two records are different, need to create a 160 // completely new value. 161 JoinedVal = JoinedEnv.createValue(Type); 162 } else { 163 JoinedVal = JoinedEnv.createValue(Type); 164 } 165 166 if (JoinedVal) 167 Model.join(Type, Val1, Env1, Val2, Env2, *JoinedVal, JoinedEnv); 168 169 return JoinedVal; 170 } 171 172 static WidenResult widenDistinctValues(QualType Type, Value &Prev, 173 const Environment &PrevEnv, 174 Value &Current, Environment &CurrentEnv, 175 Environment::ValueModel &Model) { 176 // Boolean-model widening. 177 if (auto *PrevBool = dyn_cast<BoolValue>(&Prev)) { 178 if (isa<TopBoolValue>(Prev)) 179 // Safe to return `Prev` here, because Top is never dependent on the 180 // environment. 181 return {&Prev, LatticeEffect::Unchanged}; 182 183 // We may need to widen to Top, but before we do so, check whether both 184 // values are implied to be either true or false in the current environment. 185 // In that case, we can simply return a literal instead. 186 auto &CurBool = cast<BoolValue>(Current); 187 bool TruePrev = PrevEnv.proves(PrevBool->formula()); 188 bool TrueCur = CurrentEnv.proves(CurBool.formula()); 189 if (TruePrev && TrueCur) 190 return {&CurrentEnv.getBoolLiteralValue(true), LatticeEffect::Unchanged}; 191 if (!TruePrev && !TrueCur && 192 PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool->formula())) && 193 CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula()))) 194 return {&CurrentEnv.getBoolLiteralValue(false), LatticeEffect::Unchanged}; 195 196 return {&CurrentEnv.makeTopBoolValue(), LatticeEffect::Changed}; 197 } 198 199 // FIXME: Add other built-in model widening. 200 201 // Custom-model widening. 202 if (auto Result = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) 203 return *Result; 204 205 return {&Current, equateUnknownValues(Prev.getKind()) 206 ? LatticeEffect::Unchanged 207 : LatticeEffect::Changed}; 208 } 209 210 // Returns whether the values in `Map1` and `Map2` compare equal for those 211 // keys that `Map1` and `Map2` have in common. 212 template <typename Key> 213 bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1, 214 const llvm::MapVector<Key, Value *> &Map2, 215 const Environment &Env1, const Environment &Env2, 216 Environment::ValueModel &Model) { 217 for (auto &Entry : Map1) { 218 Key K = Entry.first; 219 assert(K != nullptr); 220 221 Value *Val = Entry.second; 222 assert(Val != nullptr); 223 224 auto It = Map2.find(K); 225 if (It == Map2.end()) 226 continue; 227 assert(It->second != nullptr); 228 229 if (!areEquivalentValues(*Val, *It->second) && 230 !compareDistinctValues(K->getType(), *Val, Env1, *It->second, Env2, 231 Model)) 232 return false; 233 } 234 235 return true; 236 } 237 238 // Perform a join on two `LocToVal` maps. 239 static llvm::MapVector<const StorageLocation *, Value *> 240 joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal, 241 const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2, 242 const Environment &Env1, const Environment &Env2, 243 Environment &JoinedEnv, Environment::ValueModel &Model) { 244 llvm::MapVector<const StorageLocation *, Value *> Result; 245 for (auto &Entry : LocToVal) { 246 const StorageLocation *Loc = Entry.first; 247 assert(Loc != nullptr); 248 249 Value *Val = Entry.second; 250 assert(Val != nullptr); 251 252 auto It = LocToVal2.find(Loc); 253 if (It == LocToVal2.end()) 254 continue; 255 assert(It->second != nullptr); 256 257 if (areEquivalentValues(*Val, *It->second)) { 258 Result.insert({Loc, Val}); 259 continue; 260 } 261 262 if (Value *JoinedVal = joinDistinctValues( 263 Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) { 264 Result.insert({Loc, JoinedVal}); 265 } 266 } 267 268 return Result; 269 } 270 271 // Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either 272 // `const StorageLocation *` or `const Expr *`. 273 template <typename Key> 274 llvm::MapVector<Key, Value *> 275 widenKeyToValueMap(const llvm::MapVector<Key, Value *> &CurMap, 276 const llvm::MapVector<Key, Value *> &PrevMap, 277 Environment &CurEnv, const Environment &PrevEnv, 278 Environment::ValueModel &Model, LatticeEffect &Effect) { 279 llvm::MapVector<Key, Value *> WidenedMap; 280 for (auto &Entry : CurMap) { 281 Key K = Entry.first; 282 assert(K != nullptr); 283 284 Value *Val = Entry.second; 285 assert(Val != nullptr); 286 287 auto PrevIt = PrevMap.find(K); 288 if (PrevIt == PrevMap.end()) 289 continue; 290 assert(PrevIt->second != nullptr); 291 292 if (areEquivalentValues(*Val, *PrevIt->second)) { 293 WidenedMap.insert({K, Val}); 294 continue; 295 } 296 297 auto [WidenedVal, ValEffect] = widenDistinctValues( 298 K->getType(), *PrevIt->second, PrevEnv, *Val, CurEnv, Model); 299 WidenedMap.insert({K, WidenedVal}); 300 if (ValEffect == LatticeEffect::Changed) 301 Effect = LatticeEffect::Changed; 302 } 303 304 return WidenedMap; 305 } 306 307 /// Initializes a global storage value. 308 static void insertIfGlobal(const Decl &D, 309 llvm::DenseSet<const VarDecl *> &Vars) { 310 if (auto *V = dyn_cast<VarDecl>(&D)) 311 if (V->hasGlobalStorage()) 312 Vars.insert(V); 313 } 314 315 static void insertIfFunction(const Decl &D, 316 llvm::DenseSet<const FunctionDecl *> &Funcs) { 317 if (auto *FD = dyn_cast<FunctionDecl>(&D)) 318 Funcs.insert(FD); 319 } 320 321 static MemberExpr *getMemberForAccessor(const CXXMemberCallExpr &C) { 322 // Use getCalleeDecl instead of getMethodDecl in order to handle 323 // pointer-to-member calls. 324 const auto *MethodDecl = dyn_cast_or_null<CXXMethodDecl>(C.getCalleeDecl()); 325 if (!MethodDecl) 326 return nullptr; 327 auto *Body = dyn_cast_or_null<CompoundStmt>(MethodDecl->getBody()); 328 if (!Body || Body->size() != 1) 329 return nullptr; 330 if (auto *RS = dyn_cast<ReturnStmt>(*Body->body_begin())) 331 if (auto *Return = RS->getRetValue()) 332 return dyn_cast<MemberExpr>(Return->IgnoreParenImpCasts()); 333 return nullptr; 334 } 335 336 static void 337 getFieldsGlobalsAndFuncs(const Decl &D, FieldSet &Fields, 338 llvm::DenseSet<const VarDecl *> &Vars, 339 llvm::DenseSet<const FunctionDecl *> &Funcs) { 340 insertIfGlobal(D, Vars); 341 insertIfFunction(D, Funcs); 342 if (const auto *Decomp = dyn_cast<DecompositionDecl>(&D)) 343 for (const auto *B : Decomp->bindings()) 344 if (auto *ME = dyn_cast_or_null<MemberExpr>(B->getBinding())) 345 // FIXME: should we be using `E->getFoundDecl()`? 346 if (const auto *FD = dyn_cast<FieldDecl>(ME->getMemberDecl())) 347 Fields.insert(FD); 348 } 349 350 /// Traverses `S` and inserts into `Fields`, `Vars` and `Funcs` any fields, 351 /// global variables and functions that are declared in or referenced from 352 /// sub-statements. 353 static void 354 getFieldsGlobalsAndFuncs(const Stmt &S, FieldSet &Fields, 355 llvm::DenseSet<const VarDecl *> &Vars, 356 llvm::DenseSet<const FunctionDecl *> &Funcs) { 357 for (auto *Child : S.children()) 358 if (Child != nullptr) 359 getFieldsGlobalsAndFuncs(*Child, Fields, Vars, Funcs); 360 if (const auto *DefaultArg = dyn_cast<CXXDefaultArgExpr>(&S)) 361 getFieldsGlobalsAndFuncs(*DefaultArg->getExpr(), Fields, Vars, Funcs); 362 if (const auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(&S)) 363 getFieldsGlobalsAndFuncs(*DefaultInit->getExpr(), Fields, Vars, Funcs); 364 365 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 366 if (DS->isSingleDecl()) 367 getFieldsGlobalsAndFuncs(*DS->getSingleDecl(), Fields, Vars, Funcs); 368 else 369 for (auto *D : DS->getDeclGroup()) 370 getFieldsGlobalsAndFuncs(*D, Fields, Vars, Funcs); 371 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 372 insertIfGlobal(*E->getDecl(), Vars); 373 insertIfFunction(*E->getDecl(), Funcs); 374 } else if (const auto *C = dyn_cast<CXXMemberCallExpr>(&S)) { 375 // If this is a method that returns a member variable but does nothing else, 376 // model the field of the return value. 377 if (MemberExpr *E = getMemberForAccessor(*C)) 378 if (const auto *FD = dyn_cast<FieldDecl>(E->getMemberDecl())) 379 Fields.insert(FD); 380 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 381 // FIXME: should we be using `E->getFoundDecl()`? 382 const ValueDecl *VD = E->getMemberDecl(); 383 insertIfGlobal(*VD, Vars); 384 insertIfFunction(*VD, Funcs); 385 if (const auto *FD = dyn_cast<FieldDecl>(VD)) 386 Fields.insert(FD); 387 } else if (auto *InitList = dyn_cast<InitListExpr>(&S)) { 388 if (InitList->getType()->isRecordType()) 389 for (const auto *FD : getFieldsForInitListExpr(InitList)) 390 Fields.insert(FD); 391 } 392 } 393 394 namespace { 395 396 // Visitor that builds a map from record prvalues to result objects. 397 // This traverses the body of the function to be analyzed; for each result 398 // object that it encounters, it propagates the storage location of the result 399 // object to all record prvalues that can initialize it. 400 class ResultObjectVisitor : public RecursiveASTVisitor<ResultObjectVisitor> { 401 public: 402 // `ResultObjectMap` will be filled with a map from record prvalues to result 403 // object. If the function being analyzed returns a record by value, 404 // `LocForRecordReturnVal` is the location to which this record should be 405 // written; otherwise, it is null. 406 explicit ResultObjectVisitor( 407 llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap, 408 RecordStorageLocation *LocForRecordReturnVal, 409 DataflowAnalysisContext &DACtx) 410 : ResultObjectMap(ResultObjectMap), 411 LocForRecordReturnVal(LocForRecordReturnVal), DACtx(DACtx) {} 412 413 bool shouldVisitImplicitCode() { return true; } 414 415 bool shouldVisitLambdaBody() const { return false; } 416 417 // Traverse all member and base initializers of `Ctor`. This function is not 418 // called by `RecursiveASTVisitor`; it should be called manually if we are 419 // analyzing a constructor. `ThisPointeeLoc` is the storage location that 420 // `this` points to. 421 void TraverseConstructorInits(const CXXConstructorDecl *Ctor, 422 RecordStorageLocation *ThisPointeeLoc) { 423 assert(ThisPointeeLoc != nullptr); 424 for (const CXXCtorInitializer *Init : Ctor->inits()) { 425 Expr *InitExpr = Init->getInit(); 426 if (FieldDecl *Field = Init->getMember(); 427 Field != nullptr && Field->getType()->isRecordType()) { 428 PropagateResultObject(InitExpr, cast<RecordStorageLocation>( 429 ThisPointeeLoc->getChild(*Field))); 430 } else if (Init->getBaseClass()) { 431 PropagateResultObject(InitExpr, ThisPointeeLoc); 432 } 433 434 // Ensure that any result objects within `InitExpr` (e.g. temporaries) 435 // are also propagated to the prvalues that initialize them. 436 TraverseStmt(InitExpr); 437 438 // If this is a `CXXDefaultInitExpr`, also propagate any result objects 439 // within the default expression. 440 if (auto *DefaultInit = dyn_cast<CXXDefaultInitExpr>(InitExpr)) 441 TraverseStmt(DefaultInit->getExpr()); 442 } 443 } 444 445 bool TraverseBindingDecl(BindingDecl *BD) { 446 // `RecursiveASTVisitor` doesn't traverse holding variables for 447 // `BindingDecl`s by itself, so we need to tell it to. 448 if (VarDecl *HoldingVar = BD->getHoldingVar()) 449 TraverseDecl(HoldingVar); 450 return RecursiveASTVisitor<ResultObjectVisitor>::TraverseBindingDecl(BD); 451 } 452 453 bool VisitVarDecl(VarDecl *VD) { 454 if (VD->getType()->isRecordType() && VD->hasInit()) 455 PropagateResultObject( 456 VD->getInit(), 457 &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*VD))); 458 return true; 459 } 460 461 bool VisitMaterializeTemporaryExpr(MaterializeTemporaryExpr *MTE) { 462 if (MTE->getType()->isRecordType()) 463 PropagateResultObject( 464 MTE->getSubExpr(), 465 &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*MTE))); 466 return true; 467 } 468 469 bool VisitReturnStmt(ReturnStmt *Return) { 470 Expr *RetValue = Return->getRetValue(); 471 if (RetValue != nullptr && RetValue->getType()->isRecordType() && 472 RetValue->isPRValue()) 473 PropagateResultObject(RetValue, LocForRecordReturnVal); 474 return true; 475 } 476 477 bool VisitExpr(Expr *E) { 478 // Clang's AST can have record-type prvalues without a result object -- for 479 // example as full-expressions contained in a compound statement or as 480 // arguments of call expressions. We notice this if we get here and a 481 // storage location has not yet been associated with `E`. In this case, 482 // treat this as if it was a `MaterializeTemporaryExpr`. 483 if (E->isPRValue() && E->getType()->isRecordType() && 484 !ResultObjectMap.contains(E)) 485 PropagateResultObject( 486 E, &cast<RecordStorageLocation>(DACtx.getStableStorageLocation(*E))); 487 return true; 488 } 489 490 // Assigns `Loc` as the result object location of `E`, then propagates the 491 // location to all lower-level prvalues that initialize the same object as 492 // `E` (or one of its base classes or member variables). 493 void PropagateResultObject(Expr *E, RecordStorageLocation *Loc) { 494 if (!E->isPRValue() || !E->getType()->isRecordType()) { 495 assert(false); 496 // Ensure we don't propagate the result object if we hit this in a 497 // release build. 498 return; 499 } 500 501 ResultObjectMap[E] = Loc; 502 503 // The following AST node kinds are "original initializers": They are the 504 // lowest-level AST node that initializes a given object, and nothing 505 // below them can initialize the same object (or part of it). 506 if (isa<CXXConstructExpr>(E) || isa<CallExpr>(E) || isa<LambdaExpr>(E) || 507 isa<CXXDefaultArgExpr>(E) || isa<CXXDefaultInitExpr>(E) || 508 isa<CXXStdInitializerListExpr>(E)) { 509 return; 510 } 511 512 if (auto *InitList = dyn_cast<InitListExpr>(E)) { 513 if (!InitList->isSemanticForm()) 514 return; 515 if (InitList->isTransparent()) { 516 PropagateResultObject(InitList->getInit(0), Loc); 517 return; 518 } 519 520 RecordInitListHelper InitListHelper(InitList); 521 522 for (auto [Base, Init] : InitListHelper.base_inits()) { 523 assert(Base->getType().getCanonicalType() == 524 Init->getType().getCanonicalType()); 525 526 // Storage location for the base class is the same as that of the 527 // derived class because we "flatten" the object hierarchy and put all 528 // fields in `RecordStorageLocation` of the derived class. 529 PropagateResultObject(Init, Loc); 530 } 531 532 for (auto [Field, Init] : InitListHelper.field_inits()) { 533 // Fields of non-record type are handled in 534 // `TransferVisitor::VisitInitListExpr()`. 535 if (!Field->getType()->isRecordType()) 536 continue; 537 PropagateResultObject( 538 Init, cast<RecordStorageLocation>(Loc->getChild(*Field))); 539 } 540 return; 541 } 542 543 if (auto *Op = dyn_cast<BinaryOperator>(E); Op && Op->isCommaOp()) { 544 PropagateResultObject(Op->getRHS(), Loc); 545 return; 546 } 547 548 if (auto *Cond = dyn_cast<AbstractConditionalOperator>(E)) { 549 PropagateResultObject(Cond->getTrueExpr(), Loc); 550 PropagateResultObject(Cond->getFalseExpr(), Loc); 551 return; 552 } 553 554 // All other expression nodes that propagate a record prvalue should have 555 // exactly one child. 556 SmallVector<Stmt *, 1> Children(E->child_begin(), E->child_end()); 557 LLVM_DEBUG({ 558 if (Children.size() != 1) 559 E->dump(); 560 }); 561 assert(Children.size() == 1); 562 for (Stmt *S : Children) 563 PropagateResultObject(cast<Expr>(S), Loc); 564 } 565 566 private: 567 llvm::DenseMap<const Expr *, RecordStorageLocation *> &ResultObjectMap; 568 RecordStorageLocation *LocForRecordReturnVal; 569 DataflowAnalysisContext &DACtx; 570 }; 571 572 } // namespace 573 574 Environment::Environment(DataflowAnalysisContext &DACtx) 575 : DACtx(&DACtx), 576 FlowConditionToken(DACtx.arena().makeFlowConditionToken()) {} 577 578 Environment::Environment(DataflowAnalysisContext &DACtx, 579 const DeclContext &DeclCtx) 580 : Environment(DACtx) { 581 CallStack.push_back(&DeclCtx); 582 } 583 584 void Environment::initialize() { 585 const DeclContext *DeclCtx = getDeclCtx(); 586 if (DeclCtx == nullptr) 587 return; 588 589 const auto *FuncDecl = dyn_cast<FunctionDecl>(DeclCtx); 590 if (FuncDecl == nullptr) 591 return; 592 593 assert(FuncDecl->doesThisDeclarationHaveABody()); 594 595 initFieldsGlobalsAndFuncs(FuncDecl); 596 597 for (const auto *ParamDecl : FuncDecl->parameters()) { 598 assert(ParamDecl != nullptr); 599 setStorageLocation(*ParamDecl, createObject(*ParamDecl, nullptr)); 600 } 601 602 if (FuncDecl->getReturnType()->isRecordType()) 603 LocForRecordReturnVal = &cast<RecordStorageLocation>( 604 createStorageLocation(FuncDecl->getReturnType())); 605 606 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(DeclCtx)) { 607 auto *Parent = MethodDecl->getParent(); 608 assert(Parent != nullptr); 609 610 if (Parent->isLambda()) { 611 for (const auto &Capture : Parent->captures()) { 612 if (Capture.capturesVariable()) { 613 const auto *VarDecl = Capture.getCapturedVar(); 614 assert(VarDecl != nullptr); 615 setStorageLocation(*VarDecl, createObject(*VarDecl, nullptr)); 616 } else if (Capture.capturesThis()) { 617 const auto *SurroundingMethodDecl = 618 cast<CXXMethodDecl>(DeclCtx->getNonClosureAncestor()); 619 QualType ThisPointeeType = 620 SurroundingMethodDecl->getFunctionObjectParameterType(); 621 setThisPointeeStorageLocation( 622 cast<RecordStorageLocation>(createObject(ThisPointeeType))); 623 } 624 } 625 } else if (MethodDecl->isImplicitObjectMemberFunction()) { 626 QualType ThisPointeeType = MethodDecl->getFunctionObjectParameterType(); 627 auto &ThisLoc = 628 cast<RecordStorageLocation>(createStorageLocation(ThisPointeeType)); 629 setThisPointeeStorageLocation(ThisLoc); 630 refreshRecordValue(ThisLoc, *this); 631 // Initialize fields of `*this` with values, but only if we're not 632 // analyzing a constructor; after all, it's the constructor's job to do 633 // this (and we want to be able to test that). 634 if (!isa<CXXConstructorDecl>(MethodDecl)) 635 initializeFieldsWithValues(ThisLoc); 636 } 637 } 638 639 // We do this below the handling of `CXXMethodDecl` above so that we can 640 // be sure that the storage location for `this` has been set. 641 ResultObjectMap = std::make_shared<PrValueToResultObject>( 642 buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), 643 LocForRecordReturnVal)); 644 } 645 646 // FIXME: Add support for resetting globals after function calls to enable 647 // the implementation of sound analyses. 648 void Environment::initFieldsGlobalsAndFuncs(const FunctionDecl *FuncDecl) { 649 assert(FuncDecl->doesThisDeclarationHaveABody()); 650 651 FieldSet Fields; 652 llvm::DenseSet<const VarDecl *> Vars; 653 llvm::DenseSet<const FunctionDecl *> Funcs; 654 655 // Look for global variable and field references in the 656 // constructor-initializers. 657 if (const auto *CtorDecl = dyn_cast<CXXConstructorDecl>(FuncDecl)) { 658 for (const auto *Init : CtorDecl->inits()) { 659 if (Init->isMemberInitializer()) { 660 Fields.insert(Init->getMember()); 661 } else if (Init->isIndirectMemberInitializer()) { 662 for (const auto *I : Init->getIndirectMember()->chain()) 663 Fields.insert(cast<FieldDecl>(I)); 664 } 665 const Expr *E = Init->getInit(); 666 assert(E != nullptr); 667 getFieldsGlobalsAndFuncs(*E, Fields, Vars, Funcs); 668 } 669 // Add all fields mentioned in default member initializers. 670 for (const FieldDecl *F : CtorDecl->getParent()->fields()) 671 if (const auto *I = F->getInClassInitializer()) 672 getFieldsGlobalsAndFuncs(*I, Fields, Vars, Funcs); 673 } 674 getFieldsGlobalsAndFuncs(*FuncDecl->getBody(), Fields, Vars, Funcs); 675 676 // These have to be added before the lines that follow to ensure that 677 // `create*` work correctly for structs. 678 DACtx->addModeledFields(Fields); 679 680 for (const VarDecl *D : Vars) { 681 if (getStorageLocation(*D) != nullptr) 682 continue; 683 684 // We don't run transfer functions on the initializers of global variables, 685 // so they won't be associated with a value or storage location. We 686 // therefore intentionally don't pass an initializer to `createObject()`; 687 // in particular, this ensures that `createObject()` will initialize the 688 // fields of record-type variables with values. 689 setStorageLocation(*D, createObject(*D, nullptr)); 690 } 691 692 for (const FunctionDecl *FD : Funcs) { 693 if (getStorageLocation(*FD) != nullptr) 694 continue; 695 auto &Loc = createStorageLocation(*FD); 696 setStorageLocation(*FD, Loc); 697 } 698 } 699 700 Environment Environment::fork() const { 701 Environment Copy(*this); 702 Copy.FlowConditionToken = DACtx->forkFlowCondition(FlowConditionToken); 703 return Copy; 704 } 705 706 bool Environment::canDescend(unsigned MaxDepth, 707 const DeclContext *Callee) const { 708 return CallStack.size() <= MaxDepth && !llvm::is_contained(CallStack, Callee); 709 } 710 711 Environment Environment::pushCall(const CallExpr *Call) const { 712 Environment Env(*this); 713 714 if (const auto *MethodCall = dyn_cast<CXXMemberCallExpr>(Call)) { 715 if (const Expr *Arg = MethodCall->getImplicitObjectArgument()) { 716 if (!isa<CXXThisExpr>(Arg)) 717 Env.ThisPointeeLoc = 718 cast<RecordStorageLocation>(getStorageLocation(*Arg)); 719 // Otherwise (when the argument is `this`), retain the current 720 // environment's `ThisPointeeLoc`. 721 } 722 } 723 724 if (Call->getType()->isRecordType() && Call->isPRValue()) 725 Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); 726 727 Env.pushCallInternal(Call->getDirectCallee(), 728 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 729 730 return Env; 731 } 732 733 Environment Environment::pushCall(const CXXConstructExpr *Call) const { 734 Environment Env(*this); 735 736 Env.ThisPointeeLoc = &Env.getResultObjectLocation(*Call); 737 Env.LocForRecordReturnVal = &Env.getResultObjectLocation(*Call); 738 739 Env.pushCallInternal(Call->getConstructor(), 740 llvm::ArrayRef(Call->getArgs(), Call->getNumArgs())); 741 742 return Env; 743 } 744 745 void Environment::pushCallInternal(const FunctionDecl *FuncDecl, 746 ArrayRef<const Expr *> Args) { 747 // Canonicalize to the definition of the function. This ensures that we're 748 // putting arguments into the same `ParamVarDecl`s` that the callee will later 749 // be retrieving them from. 750 assert(FuncDecl->getDefinition() != nullptr); 751 FuncDecl = FuncDecl->getDefinition(); 752 753 CallStack.push_back(FuncDecl); 754 755 initFieldsGlobalsAndFuncs(FuncDecl); 756 757 const auto *ParamIt = FuncDecl->param_begin(); 758 759 // FIXME: Parameters don't always map to arguments 1:1; examples include 760 // overloaded operators implemented as member functions, and parameter packs. 761 for (unsigned ArgIndex = 0; ArgIndex < Args.size(); ++ParamIt, ++ArgIndex) { 762 assert(ParamIt != FuncDecl->param_end()); 763 const VarDecl *Param = *ParamIt; 764 setStorageLocation(*Param, createObject(*Param, Args[ArgIndex])); 765 } 766 767 ResultObjectMap = std::make_shared<PrValueToResultObject>( 768 buildResultObjectMap(DACtx, FuncDecl, getThisPointeeStorageLocation(), 769 LocForRecordReturnVal)); 770 } 771 772 void Environment::popCall(const CallExpr *Call, const Environment &CalleeEnv) { 773 // We ignore some entries of `CalleeEnv`: 774 // - `DACtx` because is already the same in both 775 // - We don't want the callee's `DeclCtx`, `ReturnVal`, `ReturnLoc` or 776 // `ThisPointeeLoc` because they don't apply to us. 777 // - `DeclToLoc`, `ExprToLoc`, and `ExprToVal` capture information from the 778 // callee's local scope, so when popping that scope, we do not propagate 779 // the maps. 780 this->LocToVal = std::move(CalleeEnv.LocToVal); 781 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 782 783 if (Call->isGLValue()) { 784 if (CalleeEnv.ReturnLoc != nullptr) 785 setStorageLocation(*Call, *CalleeEnv.ReturnLoc); 786 } else if (!Call->getType()->isVoidType()) { 787 if (CalleeEnv.ReturnVal != nullptr) 788 setValue(*Call, *CalleeEnv.ReturnVal); 789 } 790 } 791 792 void Environment::popCall(const CXXConstructExpr *Call, 793 const Environment &CalleeEnv) { 794 // See also comment in `popCall(const CallExpr *, const Environment &)` above. 795 this->LocToVal = std::move(CalleeEnv.LocToVal); 796 this->FlowConditionToken = std::move(CalleeEnv.FlowConditionToken); 797 798 if (Value *Val = CalleeEnv.getValue(*CalleeEnv.ThisPointeeLoc)) { 799 setValue(*Call, *Val); 800 } 801 } 802 803 bool Environment::equivalentTo(const Environment &Other, 804 Environment::ValueModel &Model) const { 805 assert(DACtx == Other.DACtx); 806 807 if (ReturnVal != Other.ReturnVal) 808 return false; 809 810 if (ReturnLoc != Other.ReturnLoc) 811 return false; 812 813 if (LocForRecordReturnVal != Other.LocForRecordReturnVal) 814 return false; 815 816 if (ThisPointeeLoc != Other.ThisPointeeLoc) 817 return false; 818 819 if (DeclToLoc != Other.DeclToLoc) 820 return false; 821 822 if (ExprToLoc != Other.ExprToLoc) 823 return false; 824 825 if (!compareKeyToValueMaps(ExprToVal, Other.ExprToVal, *this, Other, Model)) 826 return false; 827 828 if (!compareKeyToValueMaps(LocToVal, Other.LocToVal, *this, Other, Model)) 829 return false; 830 831 return true; 832 } 833 834 LatticeEffect Environment::widen(const Environment &PrevEnv, 835 Environment::ValueModel &Model) { 836 assert(DACtx == PrevEnv.DACtx); 837 assert(ReturnVal == PrevEnv.ReturnVal); 838 assert(ReturnLoc == PrevEnv.ReturnLoc); 839 assert(LocForRecordReturnVal == PrevEnv.LocForRecordReturnVal); 840 assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); 841 assert(CallStack == PrevEnv.CallStack); 842 assert(ResultObjectMap == PrevEnv.ResultObjectMap); 843 844 auto Effect = LatticeEffect::Unchanged; 845 846 // By the API, `PrevEnv` is a previous version of the environment for the same 847 // block, so we have some guarantees about its shape. In particular, it will 848 // be the result of a join or widen operation on previous values for this 849 // block. For `DeclToLoc`, `ExprToVal`, and `ExprToLoc`, join guarantees that 850 // these maps are subsets of the maps in `PrevEnv`. So, as long as we maintain 851 // this property here, we don't need change their current values to widen. 852 assert(DeclToLoc.size() <= PrevEnv.DeclToLoc.size()); 853 assert(ExprToVal.size() <= PrevEnv.ExprToVal.size()); 854 assert(ExprToLoc.size() <= PrevEnv.ExprToLoc.size()); 855 856 ExprToVal = widenKeyToValueMap(ExprToVal, PrevEnv.ExprToVal, *this, PrevEnv, 857 Model, Effect); 858 859 LocToVal = widenKeyToValueMap(LocToVal, PrevEnv.LocToVal, *this, PrevEnv, 860 Model, Effect); 861 if (DeclToLoc.size() != PrevEnv.DeclToLoc.size() || 862 ExprToLoc.size() != PrevEnv.ExprToLoc.size() || 863 ExprToVal.size() != PrevEnv.ExprToVal.size() || 864 LocToVal.size() != PrevEnv.LocToVal.size()) 865 Effect = LatticeEffect::Changed; 866 867 return Effect; 868 } 869 870 Environment Environment::join(const Environment &EnvA, const Environment &EnvB, 871 Environment::ValueModel &Model, 872 ExprJoinBehavior ExprBehavior) { 873 assert(EnvA.DACtx == EnvB.DACtx); 874 assert(EnvA.LocForRecordReturnVal == EnvB.LocForRecordReturnVal); 875 assert(EnvA.ThisPointeeLoc == EnvB.ThisPointeeLoc); 876 assert(EnvA.CallStack == EnvB.CallStack); 877 assert(EnvA.ResultObjectMap == EnvB.ResultObjectMap); 878 879 Environment JoinedEnv(*EnvA.DACtx); 880 881 JoinedEnv.CallStack = EnvA.CallStack; 882 JoinedEnv.ResultObjectMap = EnvA.ResultObjectMap; 883 JoinedEnv.LocForRecordReturnVal = EnvA.LocForRecordReturnVal; 884 JoinedEnv.ThisPointeeLoc = EnvA.ThisPointeeLoc; 885 886 if (EnvA.ReturnVal == nullptr || EnvB.ReturnVal == nullptr) { 887 // `ReturnVal` might not always get set -- for example if we have a return 888 // statement of the form `return some_other_func()` and we decide not to 889 // analyze `some_other_func()`. 890 // In this case, we can't say anything about the joined return value -- we 891 // don't simply want to propagate the return value that we do have, because 892 // it might not be the correct one. 893 // This occurs for example in the test `ContextSensitiveMutualRecursion`. 894 JoinedEnv.ReturnVal = nullptr; 895 } else if (areEquivalentValues(*EnvA.ReturnVal, *EnvB.ReturnVal)) { 896 JoinedEnv.ReturnVal = EnvA.ReturnVal; 897 } else { 898 assert(!EnvA.CallStack.empty()); 899 // FIXME: Make `CallStack` a vector of `FunctionDecl` so we don't need this 900 // cast. 901 auto *Func = dyn_cast<FunctionDecl>(EnvA.CallStack.back()); 902 assert(Func != nullptr); 903 if (Value *JoinedVal = 904 joinDistinctValues(Func->getReturnType(), *EnvA.ReturnVal, EnvA, 905 *EnvB.ReturnVal, EnvB, JoinedEnv, Model)) 906 JoinedEnv.ReturnVal = JoinedVal; 907 } 908 909 if (EnvA.ReturnLoc == EnvB.ReturnLoc) 910 JoinedEnv.ReturnLoc = EnvA.ReturnLoc; 911 else 912 JoinedEnv.ReturnLoc = nullptr; 913 914 JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc); 915 916 // FIXME: update join to detect backedges and simplify the flow condition 917 // accordingly. 918 JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions( 919 EnvA.FlowConditionToken, EnvB.FlowConditionToken); 920 921 JoinedEnv.LocToVal = 922 joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model); 923 924 if (ExprBehavior == KeepExprState) { 925 JoinedEnv.ExprToVal = joinExprMaps(EnvA.ExprToVal, EnvB.ExprToVal); 926 JoinedEnv.ExprToLoc = joinExprMaps(EnvA.ExprToLoc, EnvB.ExprToLoc); 927 } 928 929 return JoinedEnv; 930 } 931 932 StorageLocation &Environment::createStorageLocation(QualType Type) { 933 return DACtx->createStorageLocation(Type); 934 } 935 936 StorageLocation &Environment::createStorageLocation(const ValueDecl &D) { 937 // Evaluated declarations are always assigned the same storage locations to 938 // ensure that the environment stabilizes across loop iterations. Storage 939 // locations for evaluated declarations are stored in the analysis context. 940 return DACtx->getStableStorageLocation(D); 941 } 942 943 StorageLocation &Environment::createStorageLocation(const Expr &E) { 944 // Evaluated expressions are always assigned the same storage locations to 945 // ensure that the environment stabilizes across loop iterations. Storage 946 // locations for evaluated expressions are stored in the analysis context. 947 return DACtx->getStableStorageLocation(E); 948 } 949 950 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 951 assert(!DeclToLoc.contains(&D)); 952 // The only kinds of declarations that may have a "variable" storage location 953 // are declarations of reference type and `BindingDecl`. For all other 954 // declaration, the storage location should be the stable storage location 955 // returned by `createStorageLocation()`. 956 assert(D.getType()->isReferenceType() || isa<BindingDecl>(D) || 957 &Loc == &createStorageLocation(D)); 958 DeclToLoc[&D] = &Loc; 959 } 960 961 StorageLocation *Environment::getStorageLocation(const ValueDecl &D) const { 962 auto It = DeclToLoc.find(&D); 963 if (It == DeclToLoc.end()) 964 return nullptr; 965 966 StorageLocation *Loc = It->second; 967 968 return Loc; 969 } 970 971 void Environment::removeDecl(const ValueDecl &D) { DeclToLoc.erase(&D); } 972 973 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 974 // `DeclRefExpr`s to builtin function types aren't glvalues, for some reason, 975 // but we still want to be able to associate a `StorageLocation` with them, 976 // so allow these as an exception. 977 assert(E.isGLValue() || 978 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 979 const Expr &CanonE = ignoreCFGOmittedNodes(E); 980 assert(!ExprToLoc.contains(&CanonE)); 981 ExprToLoc[&CanonE] = &Loc; 982 } 983 984 StorageLocation *Environment::getStorageLocation(const Expr &E) const { 985 // See comment in `setStorageLocation()`. 986 assert(E.isGLValue() || 987 E.getType()->isSpecificBuiltinType(BuiltinType::BuiltinFn)); 988 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 989 return It == ExprToLoc.end() ? nullptr : &*It->second; 990 } 991 992 // Returns whether a prvalue of record type is the one that originally 993 // constructs the object (i.e. it doesn't propagate it from one of its 994 // children). 995 static bool isOriginalRecordConstructor(const Expr &RecordPRValue) { 996 if (auto *Init = dyn_cast<InitListExpr>(&RecordPRValue)) 997 return !Init->isSemanticForm() || !Init->isTransparent(); 998 return isa<CXXConstructExpr>(RecordPRValue) || isa<CallExpr>(RecordPRValue) || 999 isa<LambdaExpr>(RecordPRValue) || 1000 isa<CXXDefaultArgExpr>(RecordPRValue) || 1001 isa<CXXDefaultInitExpr>(RecordPRValue) || 1002 // The framework currently does not propagate the objects created in 1003 // the two branches of a `ConditionalOperator` because there is no way 1004 // to reconcile their storage locations, which are different. We 1005 // therefore claim that the `ConditionalOperator` is the expression 1006 // that originally constructs the object. 1007 // Ultimately, this will be fixed by propagating locations down from 1008 // the result object, rather than up from the original constructor as 1009 // we do now (see also the FIXME in the documentation for 1010 // `getResultObjectLocation()`). 1011 isa<ConditionalOperator>(RecordPRValue); 1012 } 1013 1014 RecordStorageLocation & 1015 Environment::getResultObjectLocation(const Expr &RecordPRValue) const { 1016 assert(RecordPRValue.getType()->isRecordType()); 1017 assert(RecordPRValue.isPRValue()); 1018 1019 assert(ResultObjectMap != nullptr); 1020 RecordStorageLocation *Loc = ResultObjectMap->lookup(&RecordPRValue); 1021 assert(Loc != nullptr); 1022 // In release builds, use the "stable" storage location if the map lookup 1023 // failed. 1024 if (Loc == nullptr) 1025 return cast<RecordStorageLocation>( 1026 DACtx->getStableStorageLocation(RecordPRValue)); 1027 return *Loc; 1028 } 1029 1030 PointerValue &Environment::getOrCreateNullPointerValue(QualType PointeeType) { 1031 return DACtx->getOrCreateNullPointerValue(PointeeType); 1032 } 1033 1034 void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, 1035 QualType Type) { 1036 llvm::DenseSet<QualType> Visited; 1037 int CreatedValuesCount = 0; 1038 initializeFieldsWithValues(Loc, Type, Visited, 0, CreatedValuesCount); 1039 if (CreatedValuesCount > MaxCompositeValueSize) { 1040 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 1041 << '\n'; 1042 } 1043 } 1044 1045 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 1046 assert(!isa<RecordValue>(&Val) || &cast<RecordValue>(&Val)->getLoc() == &Loc); 1047 1048 LocToVal[&Loc] = &Val; 1049 } 1050 1051 void Environment::setValue(const Expr &E, Value &Val) { 1052 const Expr &CanonE = ignoreCFGOmittedNodes(E); 1053 1054 if (auto *RecordVal = dyn_cast<RecordValue>(&Val)) { 1055 assert(&RecordVal->getLoc() == &getResultObjectLocation(CanonE)); 1056 (void)RecordVal; 1057 } 1058 1059 assert(CanonE.isPRValue()); 1060 ExprToVal[&CanonE] = &Val; 1061 } 1062 1063 Value *Environment::getValue(const StorageLocation &Loc) const { 1064 return LocToVal.lookup(&Loc); 1065 } 1066 1067 Value *Environment::getValue(const ValueDecl &D) const { 1068 auto *Loc = getStorageLocation(D); 1069 if (Loc == nullptr) 1070 return nullptr; 1071 return getValue(*Loc); 1072 } 1073 1074 Value *Environment::getValue(const Expr &E) const { 1075 if (E.isPRValue()) { 1076 auto It = ExprToVal.find(&ignoreCFGOmittedNodes(E)); 1077 return It == ExprToVal.end() ? nullptr : It->second; 1078 } 1079 1080 auto It = ExprToLoc.find(&ignoreCFGOmittedNodes(E)); 1081 if (It == ExprToLoc.end()) 1082 return nullptr; 1083 return getValue(*It->second); 1084 } 1085 1086 Value *Environment::createValue(QualType Type) { 1087 llvm::DenseSet<QualType> Visited; 1088 int CreatedValuesCount = 0; 1089 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 1090 CreatedValuesCount); 1091 if (CreatedValuesCount > MaxCompositeValueSize) { 1092 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 1093 << '\n'; 1094 } 1095 return Val; 1096 } 1097 1098 Value *Environment::createValueUnlessSelfReferential( 1099 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 1100 int &CreatedValuesCount) { 1101 assert(!Type.isNull()); 1102 assert(!Type->isReferenceType()); 1103 1104 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 1105 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 1106 Depth > MaxCompositeValueDepth) 1107 return nullptr; 1108 1109 if (Type->isBooleanType()) { 1110 CreatedValuesCount++; 1111 return &makeAtomicBoolValue(); 1112 } 1113 1114 if (Type->isIntegerType()) { 1115 // FIXME: consider instead `return nullptr`, given that we do nothing useful 1116 // with integers, and so distinguishing them serves no purpose, but could 1117 // prevent convergence. 1118 CreatedValuesCount++; 1119 return &arena().create<IntegerValue>(); 1120 } 1121 1122 if (Type->isPointerType()) { 1123 CreatedValuesCount++; 1124 QualType PointeeType = Type->getPointeeType(); 1125 StorageLocation &PointeeLoc = 1126 createLocAndMaybeValue(PointeeType, Visited, Depth, CreatedValuesCount); 1127 1128 return &arena().create<PointerValue>(PointeeLoc); 1129 } 1130 1131 if (Type->isRecordType()) { 1132 CreatedValuesCount++; 1133 auto &Loc = cast<RecordStorageLocation>(createStorageLocation(Type)); 1134 initializeFieldsWithValues(Loc, Loc.getType(), Visited, Depth, 1135 CreatedValuesCount); 1136 1137 return &refreshRecordValue(Loc, *this); 1138 } 1139 1140 return nullptr; 1141 } 1142 1143 StorageLocation & 1144 Environment::createLocAndMaybeValue(QualType Ty, 1145 llvm::DenseSet<QualType> &Visited, 1146 int Depth, int &CreatedValuesCount) { 1147 if (!Visited.insert(Ty.getCanonicalType()).second) 1148 return createStorageLocation(Ty.getNonReferenceType()); 1149 Value *Val = createValueUnlessSelfReferential( 1150 Ty.getNonReferenceType(), Visited, Depth, CreatedValuesCount); 1151 Visited.erase(Ty.getCanonicalType()); 1152 1153 Ty = Ty.getNonReferenceType(); 1154 1155 if (Val == nullptr) 1156 return createStorageLocation(Ty); 1157 1158 if (Ty->isRecordType()) 1159 return cast<RecordValue>(Val)->getLoc(); 1160 1161 StorageLocation &Loc = createStorageLocation(Ty); 1162 setValue(Loc, *Val); 1163 return Loc; 1164 } 1165 1166 void Environment::initializeFieldsWithValues(RecordStorageLocation &Loc, 1167 QualType Type, 1168 llvm::DenseSet<QualType> &Visited, 1169 int Depth, 1170 int &CreatedValuesCount) { 1171 auto initField = [&](QualType FieldType, StorageLocation &FieldLoc) { 1172 if (FieldType->isRecordType()) { 1173 auto &FieldRecordLoc = cast<RecordStorageLocation>(FieldLoc); 1174 setValue(FieldRecordLoc, create<RecordValue>(FieldRecordLoc)); 1175 initializeFieldsWithValues(FieldRecordLoc, FieldRecordLoc.getType(), 1176 Visited, Depth + 1, CreatedValuesCount); 1177 } else { 1178 if (!Visited.insert(FieldType.getCanonicalType()).second) 1179 return; 1180 if (Value *Val = createValueUnlessSelfReferential( 1181 FieldType, Visited, Depth + 1, CreatedValuesCount)) 1182 setValue(FieldLoc, *Val); 1183 Visited.erase(FieldType.getCanonicalType()); 1184 } 1185 }; 1186 1187 for (const FieldDecl *Field : DACtx->getModeledFields(Type)) { 1188 assert(Field != nullptr); 1189 QualType FieldType = Field->getType(); 1190 1191 if (FieldType->isReferenceType()) { 1192 Loc.setChild(*Field, 1193 &createLocAndMaybeValue(FieldType, Visited, Depth + 1, 1194 CreatedValuesCount)); 1195 } else { 1196 StorageLocation *FieldLoc = Loc.getChild(*Field); 1197 assert(FieldLoc != nullptr); 1198 initField(FieldType, *FieldLoc); 1199 } 1200 } 1201 for (const auto &[FieldName, FieldType] : DACtx->getSyntheticFields(Type)) { 1202 // Synthetic fields cannot have reference type, so we don't need to deal 1203 // with this case. 1204 assert(!FieldType->isReferenceType()); 1205 initField(FieldType, Loc.getSyntheticField(FieldName)); 1206 } 1207 } 1208 1209 StorageLocation &Environment::createObjectInternal(const ValueDecl *D, 1210 QualType Ty, 1211 const Expr *InitExpr) { 1212 if (Ty->isReferenceType()) { 1213 // Although variables of reference type always need to be initialized, it 1214 // can happen that we can't see the initializer, so `InitExpr` may still 1215 // be null. 1216 if (InitExpr) { 1217 if (auto *InitExprLoc = getStorageLocation(*InitExpr)) 1218 return *InitExprLoc; 1219 } 1220 1221 // Even though we have an initializer, we might not get an 1222 // InitExprLoc, for example if the InitExpr is a CallExpr for which we 1223 // don't have a function body. In this case, we just invent a storage 1224 // location and value -- it's the best we can do. 1225 return createObjectInternal(D, Ty.getNonReferenceType(), nullptr); 1226 } 1227 1228 StorageLocation &Loc = 1229 D ? createStorageLocation(*D) : createStorageLocation(Ty); 1230 1231 if (Ty->isRecordType()) { 1232 auto &RecordLoc = cast<RecordStorageLocation>(Loc); 1233 if (!InitExpr) 1234 initializeFieldsWithValues(RecordLoc); 1235 refreshRecordValue(RecordLoc, *this); 1236 } else { 1237 Value *Val = nullptr; 1238 if (InitExpr) 1239 // In the (few) cases where an expression is intentionally 1240 // "uninterpreted", `InitExpr` is not associated with a value. There are 1241 // two ways to handle this situation: propagate the status, so that 1242 // uninterpreted initializers result in uninterpreted variables, or 1243 // provide a default value. We choose the latter so that later refinements 1244 // of the variable can be used for reasoning about the surrounding code. 1245 // For this reason, we let this case be handled by the `createValue()` 1246 // call below. 1247 // 1248 // FIXME. If and when we interpret all language cases, change this to 1249 // assert that `InitExpr` is interpreted, rather than supplying a 1250 // default value (assuming we don't update the environment API to return 1251 // references). 1252 Val = getValue(*InitExpr); 1253 if (!Val) 1254 Val = createValue(Ty); 1255 if (Val) 1256 setValue(Loc, *Val); 1257 } 1258 1259 return Loc; 1260 } 1261 1262 void Environment::assume(const Formula &F) { 1263 DACtx->addFlowConditionConstraint(FlowConditionToken, F); 1264 } 1265 1266 bool Environment::proves(const Formula &F) const { 1267 return DACtx->flowConditionImplies(FlowConditionToken, F); 1268 } 1269 1270 bool Environment::allows(const Formula &F) const { 1271 return DACtx->flowConditionAllows(FlowConditionToken, F); 1272 } 1273 1274 void Environment::dump(raw_ostream &OS) const { 1275 llvm::DenseMap<const StorageLocation *, std::string> LocToName; 1276 if (LocForRecordReturnVal != nullptr) 1277 LocToName[LocForRecordReturnVal] = "(returned record)"; 1278 if (ThisPointeeLoc != nullptr) 1279 LocToName[ThisPointeeLoc] = "this"; 1280 1281 OS << "DeclToLoc:\n"; 1282 for (auto [D, L] : DeclToLoc) { 1283 auto Iter = LocToName.insert({L, D->getNameAsString()}).first; 1284 OS << " [" << Iter->second << ", " << L << "]\n"; 1285 } 1286 OS << "ExprToLoc:\n"; 1287 for (auto [E, L] : ExprToLoc) 1288 OS << " [" << E << ", " << L << "]\n"; 1289 1290 OS << "ExprToVal:\n"; 1291 for (auto [E, V] : ExprToVal) 1292 OS << " [" << E << ", " << V << ": " << *V << "]\n"; 1293 1294 OS << "LocToVal:\n"; 1295 for (auto [L, V] : LocToVal) { 1296 OS << " [" << L; 1297 if (auto Iter = LocToName.find(L); Iter != LocToName.end()) 1298 OS << " (" << Iter->second << ")"; 1299 OS << ", " << V << ": " << *V << "]\n"; 1300 } 1301 1302 if (const FunctionDecl *Func = getCurrentFunc()) { 1303 if (Func->getReturnType()->isReferenceType()) { 1304 OS << "ReturnLoc: " << ReturnLoc; 1305 if (auto Iter = LocToName.find(ReturnLoc); Iter != LocToName.end()) 1306 OS << " (" << Iter->second << ")"; 1307 OS << "\n"; 1308 } else if (Func->getReturnType()->isRecordType() || 1309 isa<CXXConstructorDecl>(Func)) { 1310 OS << "LocForRecordReturnVal: " << LocForRecordReturnVal << "\n"; 1311 } else if (!Func->getReturnType()->isVoidType()) { 1312 if (ReturnVal == nullptr) 1313 OS << "ReturnVal: nullptr\n"; 1314 else 1315 OS << "ReturnVal: " << *ReturnVal << "\n"; 1316 } 1317 1318 if (isa<CXXMethodDecl>(Func)) { 1319 OS << "ThisPointeeLoc: " << ThisPointeeLoc << "\n"; 1320 } 1321 } 1322 1323 OS << "\n"; 1324 DACtx->dumpFlowCondition(FlowConditionToken, OS); 1325 } 1326 1327 void Environment::dump() const { 1328 dump(llvm::dbgs()); 1329 } 1330 1331 Environment::PrValueToResultObject Environment::buildResultObjectMap( 1332 DataflowAnalysisContext *DACtx, const FunctionDecl *FuncDecl, 1333 RecordStorageLocation *ThisPointeeLoc, 1334 RecordStorageLocation *LocForRecordReturnVal) { 1335 assert(FuncDecl->doesThisDeclarationHaveABody()); 1336 1337 PrValueToResultObject Map; 1338 1339 ResultObjectVisitor Visitor(Map, LocForRecordReturnVal, *DACtx); 1340 if (const auto *Ctor = dyn_cast<CXXConstructorDecl>(FuncDecl)) 1341 Visitor.TraverseConstructorInits(Ctor, ThisPointeeLoc); 1342 Visitor.TraverseStmt(FuncDecl->getBody()); 1343 1344 return Map; 1345 } 1346 1347 RecordStorageLocation *getImplicitObjectLocation(const CXXMemberCallExpr &MCE, 1348 const Environment &Env) { 1349 Expr *ImplicitObject = MCE.getImplicitObjectArgument(); 1350 if (ImplicitObject == nullptr) 1351 return nullptr; 1352 if (ImplicitObject->getType()->isPointerType()) { 1353 if (auto *Val = Env.get<PointerValue>(*ImplicitObject)) 1354 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 1355 return nullptr; 1356 } 1357 return cast_or_null<RecordStorageLocation>( 1358 Env.getStorageLocation(*ImplicitObject)); 1359 } 1360 1361 RecordStorageLocation *getBaseObjectLocation(const MemberExpr &ME, 1362 const Environment &Env) { 1363 Expr *Base = ME.getBase(); 1364 if (Base == nullptr) 1365 return nullptr; 1366 if (ME.isArrow()) { 1367 if (auto *Val = Env.get<PointerValue>(*Base)) 1368 return &cast<RecordStorageLocation>(Val->getPointeeLoc()); 1369 return nullptr; 1370 } 1371 return Env.get<RecordStorageLocation>(*Base); 1372 } 1373 1374 std::vector<const FieldDecl *> 1375 getFieldsForInitListExpr(const InitListExpr *InitList) { 1376 const RecordDecl *RD = InitList->getType()->getAsRecordDecl(); 1377 assert(RD != nullptr); 1378 1379 std::vector<const FieldDecl *> Fields; 1380 1381 if (InitList->getType()->isUnionType()) { 1382 Fields.push_back(InitList->getInitializedFieldInUnion()); 1383 return Fields; 1384 } 1385 1386 // Unnamed bitfields are only used for padding and do not appear in 1387 // `InitListExpr`'s inits. However, those fields do appear in `RecordDecl`'s 1388 // field list, and we thus need to remove them before mapping inits to 1389 // fields to avoid mapping inits to the wrongs fields. 1390 llvm::copy_if( 1391 RD->fields(), std::back_inserter(Fields), 1392 [](const FieldDecl *Field) { return !Field->isUnnamedBitfield(); }); 1393 return Fields; 1394 } 1395 1396 RecordInitListHelper::RecordInitListHelper(const InitListExpr *InitList) { 1397 auto *RD = InitList->getType()->getAsCXXRecordDecl(); 1398 assert(RD != nullptr); 1399 1400 std::vector<const FieldDecl *> Fields = getFieldsForInitListExpr(InitList); 1401 ArrayRef<Expr *> Inits = InitList->inits(); 1402 1403 // Unions initialized with an empty initializer list need special treatment. 1404 // For structs/classes initialized with an empty initializer list, Clang 1405 // puts `ImplicitValueInitExpr`s in `InitListExpr::inits()`, but for unions, 1406 // it doesn't do this -- so we create an `ImplicitValueInitExpr` ourselves. 1407 SmallVector<Expr *> InitsForUnion; 1408 if (InitList->getType()->isUnionType() && Inits.empty()) { 1409 assert(Fields.size() == 1); 1410 ImplicitValueInitForUnion.emplace(Fields.front()->getType()); 1411 InitsForUnion.push_back(&*ImplicitValueInitForUnion); 1412 Inits = InitsForUnion; 1413 } 1414 1415 size_t InitIdx = 0; 1416 1417 assert(Fields.size() + RD->getNumBases() == Inits.size()); 1418 for (const CXXBaseSpecifier &Base : RD->bases()) { 1419 assert(InitIdx < Inits.size()); 1420 Expr *Init = Inits[InitIdx++]; 1421 BaseInits.emplace_back(&Base, Init); 1422 } 1423 1424 assert(Fields.size() == Inits.size() - InitIdx); 1425 for (const FieldDecl *Field : Fields) { 1426 assert(InitIdx < Inits.size()); 1427 Expr *Init = Inits[InitIdx++]; 1428 FieldInits.emplace_back(Field, Init); 1429 } 1430 } 1431 1432 RecordValue &refreshRecordValue(RecordStorageLocation &Loc, Environment &Env) { 1433 auto &NewVal = Env.create<RecordValue>(Loc); 1434 Env.setValue(Loc, NewVal); 1435 return NewVal; 1436 } 1437 1438 RecordValue &refreshRecordValue(const Expr &Expr, Environment &Env) { 1439 assert(Expr.getType()->isRecordType()); 1440 1441 if (Expr.isPRValue()) 1442 refreshRecordValue(Env.getResultObjectLocation(Expr), Env); 1443 1444 if (auto *Loc = Env.get<RecordStorageLocation>(Expr)) 1445 refreshRecordValue(*Loc, Env); 1446 1447 auto &NewVal = *cast<RecordValue>(Env.createValue(Expr.getType())); 1448 Env.setStorageLocation(Expr, NewVal.getLoc()); 1449 return NewVal; 1450 } 1451 1452 } // namespace dataflow 1453 } // namespace clang 1454