1 //===-- DataflowEnvironment.cpp ---------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines an Environment class that is used by dataflow analyses 10 // that run over Control-Flow Graphs (CFGs) to keep track of the state of the 11 // program at given program points. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 16 #include "clang/AST/Decl.h" 17 #include "clang/AST/DeclCXX.h" 18 #include "clang/AST/Type.h" 19 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 20 #include "clang/Analysis/FlowSensitive/StorageLocation.h" 21 #include "clang/Analysis/FlowSensitive/Value.h" 22 #include "llvm/ADT/DenseMap.h" 23 #include "llvm/ADT/DenseSet.h" 24 #include "llvm/Support/Casting.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include <cassert> 27 #include <memory> 28 #include <utility> 29 30 namespace clang { 31 namespace dataflow { 32 33 // FIXME: convert these to parameters of the analysis or environment. Current 34 // settings have been experimentaly validated, but only for a particular 35 // analysis. 36 static constexpr int MaxCompositeValueDepth = 3; 37 static constexpr int MaxCompositeValueSize = 1000; 38 39 /// Returns a map consisting of key-value entries that are present in both maps. 40 template <typename K, typename V> 41 llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1, 42 const llvm::DenseMap<K, V> &Map2) { 43 llvm::DenseMap<K, V> Result; 44 for (auto &Entry : Map1) { 45 auto It = Map2.find(Entry.first); 46 if (It != Map2.end() && Entry.second == It->second) 47 Result.insert({Entry.first, Entry.second}); 48 } 49 return Result; 50 } 51 52 /// Returns true if and only if `Val1` is equivalent to `Val2`. 53 static bool equivalentValues(QualType Type, Value *Val1, 54 const Environment &Env1, Value *Val2, 55 const Environment &Env2, 56 Environment::ValueModel &Model) { 57 if (Val1 == Val2) 58 return true; 59 60 if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) { 61 auto *IndVal2 = cast<IndirectionValue>(Val2); 62 assert(IndVal1->getKind() == IndVal2->getKind()); 63 if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc()) 64 return true; 65 } 66 67 return Model.compareEquivalent(Type, *Val1, Env1, *Val2, Env2); 68 } 69 70 /// Attempts to merge distinct values `Val1` and `Val2` in `Env1` and `Env2`, 71 /// respectively, of the same type `Type`. Merging generally produces a single 72 /// value that (soundly) approximates the two inputs, although the actual 73 /// meaning depends on `Model`. 74 static Value *mergeDistinctValues(QualType Type, Value *Val1, 75 const Environment &Env1, Value *Val2, 76 const Environment &Env2, 77 Environment &MergedEnv, 78 Environment::ValueModel &Model) { 79 // Join distinct boolean values preserving information about the constraints 80 // in the respective path conditions. 81 // 82 // FIXME: Does not work for backedges, since the two (or more) paths will not 83 // have mutually exclusive conditions. 84 if (auto *Expr1 = dyn_cast<BoolValue>(Val1)) { 85 auto *Expr2 = cast<BoolValue>(Val2); 86 return &Env1.makeOr(Env1.makeAnd(Env1.getFlowConditionToken(), *Expr1), 87 Env1.makeAnd(Env2.getFlowConditionToken(), *Expr2)); 88 } 89 90 // FIXME: add unit tests that cover this statement. 91 if (auto *IndVal1 = dyn_cast<IndirectionValue>(Val1)) { 92 auto *IndVal2 = cast<IndirectionValue>(Val2); 93 assert(IndVal1->getKind() == IndVal2->getKind()); 94 if (&IndVal1->getPointeeLoc() == &IndVal2->getPointeeLoc()) { 95 return Val1; 96 } 97 } 98 99 // FIXME: Consider destroying `MergedValue` immediately if `ValueModel::merge` 100 // returns false to avoid storing unneeded values in `DACtx`. 101 if (Value *MergedVal = MergedEnv.createValue(Type)) 102 if (Model.merge(Type, *Val1, Env1, *Val2, Env2, *MergedVal, MergedEnv)) 103 return MergedVal; 104 105 return nullptr; 106 } 107 108 /// Initializes a global storage value. 109 static void initGlobalVar(const VarDecl &D, Environment &Env) { 110 if (!D.hasGlobalStorage() || 111 Env.getStorageLocation(D, SkipPast::None) != nullptr) 112 return; 113 114 auto &Loc = Env.createStorageLocation(D); 115 Env.setStorageLocation(D, Loc); 116 if (auto *Val = Env.createValue(D.getType())) 117 Env.setValue(Loc, *Val); 118 } 119 120 /// Initializes a global storage value. 121 static void initGlobalVar(const Decl &D, Environment &Env) { 122 if (auto *V = dyn_cast<VarDecl>(&D)) 123 initGlobalVar(*V, Env); 124 } 125 126 /// Initializes global storage values that are declared or referenced from 127 /// sub-statements of `S`. 128 // FIXME: Add support for resetting globals after function calls to enable 129 // the implementation of sound analyses. 130 static void initGlobalVars(const Stmt &S, Environment &Env) { 131 for (auto *Child : S.children()) { 132 if (Child != nullptr) 133 initGlobalVars(*Child, Env); 134 } 135 136 if (auto *DS = dyn_cast<DeclStmt>(&S)) { 137 if (DS->isSingleDecl()) { 138 initGlobalVar(*DS->getSingleDecl(), Env); 139 } else { 140 for (auto *D : DS->getDeclGroup()) 141 initGlobalVar(*D, Env); 142 } 143 } else if (auto *E = dyn_cast<DeclRefExpr>(&S)) { 144 initGlobalVar(*E->getDecl(), Env); 145 } else if (auto *E = dyn_cast<MemberExpr>(&S)) { 146 initGlobalVar(*E->getMemberDecl(), Env); 147 } 148 } 149 150 static void 151 getFieldsFromClassHierarchy(QualType Type, bool IgnorePrivateFields, 152 llvm::DenseSet<const FieldDecl *> &Fields) { 153 if (Type->isIncompleteType() || Type->isDependentType() || 154 !Type->isRecordType()) 155 return; 156 157 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) { 158 if (IgnorePrivateFields && 159 (Field->getAccess() == AS_private || 160 (Field->getAccess() == AS_none && Type->getAsRecordDecl()->isClass()))) 161 continue; 162 Fields.insert(Field); 163 } 164 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) { 165 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) { 166 // Ignore private fields (including default access in C++ classes) in 167 // base classes, because they are not visible in derived classes. 168 getFieldsFromClassHierarchy(Base.getType(), /*IgnorePrivateFields=*/true, 169 Fields); 170 } 171 } 172 } 173 174 /// Gets the set of all fields accesible from the type. 175 /// 176 /// FIXME: Does not precisely handle non-virtual diamond inheritance. A single 177 /// field decl will be modeled for all instances of the inherited field. 178 static llvm::DenseSet<const FieldDecl *> 179 getAccessibleObjectFields(QualType Type) { 180 llvm::DenseSet<const FieldDecl *> Fields; 181 // Don't ignore private fields for the class itself, only its super classes. 182 getFieldsFromClassHierarchy(Type, /*IgnorePrivateFields=*/false, Fields); 183 return Fields; 184 } 185 186 Environment::Environment(DataflowAnalysisContext &DACtx) 187 : DACtx(&DACtx), FlowConditionToken(&DACtx.makeFlowConditionToken()) {} 188 189 Environment::Environment(const Environment &Other) 190 : DACtx(Other.DACtx), DeclToLoc(Other.DeclToLoc), 191 ExprToLoc(Other.ExprToLoc), LocToVal(Other.LocToVal), 192 MemberLocToStruct(Other.MemberLocToStruct), 193 FlowConditionToken(&DACtx->forkFlowCondition(*Other.FlowConditionToken)) { 194 } 195 196 Environment &Environment::operator=(const Environment &Other) { 197 Environment Copy(Other); 198 *this = std::move(Copy); 199 return *this; 200 } 201 202 Environment::Environment(DataflowAnalysisContext &DACtx, 203 const DeclContext &DeclCtx) 204 : Environment(DACtx) { 205 if (const auto *FuncDecl = dyn_cast<FunctionDecl>(&DeclCtx)) { 206 assert(FuncDecl->getBody() != nullptr); 207 initGlobalVars(*FuncDecl->getBody(), *this); 208 for (const auto *ParamDecl : FuncDecl->parameters()) { 209 assert(ParamDecl != nullptr); 210 auto &ParamLoc = createStorageLocation(*ParamDecl); 211 setStorageLocation(*ParamDecl, ParamLoc); 212 if (Value *ParamVal = createValue(ParamDecl->getType())) 213 setValue(ParamLoc, *ParamVal); 214 } 215 } 216 217 if (const auto *MethodDecl = dyn_cast<CXXMethodDecl>(&DeclCtx)) { 218 if (!MethodDecl->isStatic()) { 219 QualType ThisPointeeType = MethodDecl->getThisObjectType(); 220 // FIXME: Add support for union types. 221 if (!ThisPointeeType->isUnionType()) { 222 auto &ThisPointeeLoc = createStorageLocation(ThisPointeeType); 223 DACtx.setThisPointeeStorageLocation(ThisPointeeLoc); 224 if (Value *ThisPointeeVal = createValue(ThisPointeeType)) 225 setValue(ThisPointeeLoc, *ThisPointeeVal); 226 } 227 } 228 } 229 } 230 231 bool Environment::equivalentTo(const Environment &Other, 232 Environment::ValueModel &Model) const { 233 assert(DACtx == Other.DACtx); 234 235 if (DeclToLoc != Other.DeclToLoc) 236 return false; 237 238 if (ExprToLoc != Other.ExprToLoc) 239 return false; 240 241 if (MemberLocToStruct != Other.MemberLocToStruct) 242 return false; 243 244 // Compare the contents for the intersection of their domains. 245 for (auto &Entry : LocToVal) { 246 const StorageLocation *Loc = Entry.first; 247 assert(Loc != nullptr); 248 249 Value *Val = Entry.second; 250 assert(Val != nullptr); 251 252 auto It = Other.LocToVal.find(Loc); 253 if (It == Other.LocToVal.end()) 254 continue; 255 assert(It->second != nullptr); 256 257 if (!equivalentValues(Loc->getType(), Val, *this, It->second, Other, Model)) 258 return false; 259 } 260 261 return true; 262 } 263 264 LatticeJoinEffect Environment::join(const Environment &Other, 265 Environment::ValueModel &Model) { 266 assert(DACtx == Other.DACtx); 267 268 auto Effect = LatticeJoinEffect::Unchanged; 269 270 Environment JoinedEnv(*DACtx); 271 272 JoinedEnv.DeclToLoc = intersectDenseMaps(DeclToLoc, Other.DeclToLoc); 273 if (DeclToLoc.size() != JoinedEnv.DeclToLoc.size()) 274 Effect = LatticeJoinEffect::Changed; 275 276 JoinedEnv.ExprToLoc = intersectDenseMaps(ExprToLoc, Other.ExprToLoc); 277 if (ExprToLoc.size() != JoinedEnv.ExprToLoc.size()) 278 Effect = LatticeJoinEffect::Changed; 279 280 JoinedEnv.MemberLocToStruct = 281 intersectDenseMaps(MemberLocToStruct, Other.MemberLocToStruct); 282 if (MemberLocToStruct.size() != JoinedEnv.MemberLocToStruct.size()) 283 Effect = LatticeJoinEffect::Changed; 284 285 // FIXME: set `Effect` as needed. 286 JoinedEnv.FlowConditionToken = &DACtx->joinFlowConditions( 287 *FlowConditionToken, *Other.FlowConditionToken); 288 289 for (auto &Entry : LocToVal) { 290 const StorageLocation *Loc = Entry.first; 291 assert(Loc != nullptr); 292 293 Value *Val = Entry.second; 294 assert(Val != nullptr); 295 296 auto It = Other.LocToVal.find(Loc); 297 if (It == Other.LocToVal.end()) 298 continue; 299 assert(It->second != nullptr); 300 301 if (Val == It->second) { 302 JoinedEnv.LocToVal.insert({Loc, Val}); 303 continue; 304 } 305 306 if (Value *MergedVal = mergeDistinctValues( 307 Loc->getType(), Val, *this, It->second, Other, JoinedEnv, Model)) 308 JoinedEnv.LocToVal.insert({Loc, MergedVal}); 309 } 310 if (LocToVal.size() != JoinedEnv.LocToVal.size()) 311 Effect = LatticeJoinEffect::Changed; 312 313 *this = std::move(JoinedEnv); 314 315 return Effect; 316 } 317 318 StorageLocation &Environment::createStorageLocation(QualType Type) { 319 assert(!Type.isNull()); 320 if (Type->isStructureOrClassType() || Type->isUnionType()) { 321 // FIXME: Explore options to avoid eager initialization of fields as some of 322 // them might not be needed for a particular analysis. 323 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 324 for (const FieldDecl *Field : getAccessibleObjectFields(Type)) { 325 FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); 326 } 327 return takeOwnership( 328 std::make_unique<AggregateStorageLocation>(Type, std::move(FieldLocs))); 329 } 330 return takeOwnership(std::make_unique<ScalarStorageLocation>(Type)); 331 } 332 333 StorageLocation &Environment::createStorageLocation(const VarDecl &D) { 334 // Evaluated declarations are always assigned the same storage locations to 335 // ensure that the environment stabilizes across loop iterations. Storage 336 // locations for evaluated declarations are stored in the analysis context. 337 if (auto *Loc = DACtx->getStorageLocation(D)) 338 return *Loc; 339 auto &Loc = createStorageLocation(D.getType()); 340 DACtx->setStorageLocation(D, Loc); 341 return Loc; 342 } 343 344 StorageLocation &Environment::createStorageLocation(const Expr &E) { 345 // Evaluated expressions are always assigned the same storage locations to 346 // ensure that the environment stabilizes across loop iterations. Storage 347 // locations for evaluated expressions are stored in the analysis context. 348 if (auto *Loc = DACtx->getStorageLocation(E)) 349 return *Loc; 350 auto &Loc = createStorageLocation(E.getType()); 351 DACtx->setStorageLocation(E, Loc); 352 return Loc; 353 } 354 355 void Environment::setStorageLocation(const ValueDecl &D, StorageLocation &Loc) { 356 assert(DeclToLoc.find(&D) == DeclToLoc.end()); 357 DeclToLoc[&D] = &Loc; 358 } 359 360 StorageLocation *Environment::getStorageLocation(const ValueDecl &D, 361 SkipPast SP) const { 362 auto It = DeclToLoc.find(&D); 363 return It == DeclToLoc.end() ? nullptr : &skip(*It->second, SP); 364 } 365 366 void Environment::setStorageLocation(const Expr &E, StorageLocation &Loc) { 367 assert(ExprToLoc.find(&E) == ExprToLoc.end()); 368 ExprToLoc[&E] = &Loc; 369 } 370 371 StorageLocation *Environment::getStorageLocation(const Expr &E, 372 SkipPast SP) const { 373 // FIXME: Add a test with parens. 374 auto It = ExprToLoc.find(E.IgnoreParens()); 375 return It == ExprToLoc.end() ? nullptr : &skip(*It->second, SP); 376 } 377 378 StorageLocation *Environment::getThisPointeeStorageLocation() const { 379 return DACtx->getThisPointeeStorageLocation(); 380 } 381 382 void Environment::setValue(const StorageLocation &Loc, Value &Val) { 383 LocToVal[&Loc] = &Val; 384 385 if (auto *StructVal = dyn_cast<StructValue>(&Val)) { 386 auto &AggregateLoc = *cast<AggregateStorageLocation>(&Loc); 387 388 const QualType Type = AggregateLoc.getType(); 389 assert(Type->isStructureOrClassType()); 390 391 for (const FieldDecl *Field : getAccessibleObjectFields(Type)) { 392 assert(Field != nullptr); 393 StorageLocation &FieldLoc = AggregateLoc.getChild(*Field); 394 MemberLocToStruct[&FieldLoc] = std::make_pair(StructVal, Field); 395 if (auto *FieldVal = StructVal->getChild(*Field)) 396 setValue(FieldLoc, *FieldVal); 397 } 398 } 399 400 auto IT = MemberLocToStruct.find(&Loc); 401 if (IT != MemberLocToStruct.end()) { 402 // `Loc` is the location of a struct member so we need to also update the 403 // value of the member in the corresponding `StructValue`. 404 405 assert(IT->second.first != nullptr); 406 StructValue &StructVal = *IT->second.first; 407 408 assert(IT->second.second != nullptr); 409 const ValueDecl &Member = *IT->second.second; 410 411 StructVal.setChild(Member, Val); 412 } 413 } 414 415 Value *Environment::getValue(const StorageLocation &Loc) const { 416 auto It = LocToVal.find(&Loc); 417 return It == LocToVal.end() ? nullptr : It->second; 418 } 419 420 Value *Environment::getValue(const ValueDecl &D, SkipPast SP) const { 421 auto *Loc = getStorageLocation(D, SP); 422 if (Loc == nullptr) 423 return nullptr; 424 return getValue(*Loc); 425 } 426 427 Value *Environment::getValue(const Expr &E, SkipPast SP) const { 428 auto *Loc = getStorageLocation(E, SP); 429 if (Loc == nullptr) 430 return nullptr; 431 return getValue(*Loc); 432 } 433 434 Value *Environment::createValue(QualType Type) { 435 llvm::DenseSet<QualType> Visited; 436 int CreatedValuesCount = 0; 437 Value *Val = createValueUnlessSelfReferential(Type, Visited, /*Depth=*/0, 438 CreatedValuesCount); 439 if (CreatedValuesCount > MaxCompositeValueSize) { 440 llvm::errs() << "Attempting to initialize a huge value of type: " << Type 441 << '\n'; 442 } 443 return Val; 444 } 445 446 Value *Environment::createValueUnlessSelfReferential( 447 QualType Type, llvm::DenseSet<QualType> &Visited, int Depth, 448 int &CreatedValuesCount) { 449 assert(!Type.isNull()); 450 451 // Allow unlimited fields at depth 1; only cap at deeper nesting levels. 452 if ((Depth > 1 && CreatedValuesCount > MaxCompositeValueSize) || 453 Depth > MaxCompositeValueDepth) 454 return nullptr; 455 456 if (Type->isBooleanType()) { 457 CreatedValuesCount++; 458 return &makeAtomicBoolValue(); 459 } 460 461 if (Type->isIntegerType()) { 462 CreatedValuesCount++; 463 return &takeOwnership(std::make_unique<IntegerValue>()); 464 } 465 466 if (Type->isReferenceType()) { 467 CreatedValuesCount++; 468 QualType PointeeType = Type->castAs<ReferenceType>()->getPointeeType(); 469 auto &PointeeLoc = createStorageLocation(PointeeType); 470 471 if (!Visited.contains(PointeeType.getCanonicalType())) { 472 Visited.insert(PointeeType.getCanonicalType()); 473 Value *PointeeVal = createValueUnlessSelfReferential( 474 PointeeType, Visited, Depth, CreatedValuesCount); 475 Visited.erase(PointeeType.getCanonicalType()); 476 477 if (PointeeVal != nullptr) 478 setValue(PointeeLoc, *PointeeVal); 479 } 480 481 return &takeOwnership(std::make_unique<ReferenceValue>(PointeeLoc)); 482 } 483 484 if (Type->isPointerType()) { 485 CreatedValuesCount++; 486 QualType PointeeType = Type->castAs<PointerType>()->getPointeeType(); 487 auto &PointeeLoc = createStorageLocation(PointeeType); 488 489 if (!Visited.contains(PointeeType.getCanonicalType())) { 490 Visited.insert(PointeeType.getCanonicalType()); 491 Value *PointeeVal = createValueUnlessSelfReferential( 492 PointeeType, Visited, Depth, CreatedValuesCount); 493 Visited.erase(PointeeType.getCanonicalType()); 494 495 if (PointeeVal != nullptr) 496 setValue(PointeeLoc, *PointeeVal); 497 } 498 499 return &takeOwnership(std::make_unique<PointerValue>(PointeeLoc)); 500 } 501 502 if (Type->isStructureOrClassType()) { 503 CreatedValuesCount++; 504 // FIXME: Initialize only fields that are accessed in the context that is 505 // being analyzed. 506 llvm::DenseMap<const ValueDecl *, Value *> FieldValues; 507 for (const FieldDecl *Field : getAccessibleObjectFields(Type)) { 508 assert(Field != nullptr); 509 510 QualType FieldType = Field->getType(); 511 if (Visited.contains(FieldType.getCanonicalType())) 512 continue; 513 514 Visited.insert(FieldType.getCanonicalType()); 515 if (auto *FieldValue = createValueUnlessSelfReferential( 516 FieldType, Visited, Depth + 1, CreatedValuesCount)) 517 FieldValues.insert({Field, FieldValue}); 518 Visited.erase(FieldType.getCanonicalType()); 519 } 520 521 return &takeOwnership( 522 std::make_unique<StructValue>(std::move(FieldValues))); 523 } 524 525 return nullptr; 526 } 527 528 StorageLocation &Environment::skip(StorageLocation &Loc, SkipPast SP) const { 529 switch (SP) { 530 case SkipPast::None: 531 return Loc; 532 case SkipPast::Reference: 533 // References cannot be chained so we only need to skip past one level of 534 // indirection. 535 if (auto *Val = dyn_cast_or_null<ReferenceValue>(getValue(Loc))) 536 return Val->getPointeeLoc(); 537 return Loc; 538 case SkipPast::ReferenceThenPointer: 539 StorageLocation &LocPastRef = skip(Loc, SkipPast::Reference); 540 if (auto *Val = dyn_cast_or_null<PointerValue>(getValue(LocPastRef))) 541 return Val->getPointeeLoc(); 542 return LocPastRef; 543 } 544 llvm_unreachable("bad SkipPast kind"); 545 } 546 547 const StorageLocation &Environment::skip(const StorageLocation &Loc, 548 SkipPast SP) const { 549 return skip(*const_cast<StorageLocation *>(&Loc), SP); 550 } 551 552 void Environment::addToFlowCondition(BoolValue &Val) { 553 DACtx->addFlowConditionConstraint(*FlowConditionToken, Val); 554 } 555 556 bool Environment::flowConditionImplies(BoolValue &Val) const { 557 return DACtx->flowConditionImplies(*FlowConditionToken, Val); 558 } 559 560 } // namespace dataflow 561 } // namespace clang 562