10b57cec5SDimitry Andric //===-- CodeGenTBAA.cpp - TBAA information for LLVM CodeGen ---------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This is the code that manages TBAA information and defines the TBAA policy 100b57cec5SDimitry Andric // for the optimizer to use. Relevant standards text includes: 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // C99 6.5p7 130b57cec5SDimitry Andric // C++ [basic.lval] (p10 in n3126, p15 in some earlier versions) 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 160b57cec5SDimitry Andric 170b57cec5SDimitry Andric #include "CodeGenTBAA.h" 180fca6ea1SDimitry Andric #include "ABIInfoImpl.h" 19*5deeebd8SDimitry Andric #include "CGCXXABI.h" 200fca6ea1SDimitry Andric #include "CGRecordLayout.h" 210fca6ea1SDimitry Andric #include "CodeGenTypes.h" 220b57cec5SDimitry Andric #include "clang/AST/ASTContext.h" 230b57cec5SDimitry Andric #include "clang/AST/Attr.h" 240b57cec5SDimitry Andric #include "clang/AST/Mangle.h" 250b57cec5SDimitry Andric #include "clang/AST/RecordLayout.h" 260b57cec5SDimitry Andric #include "clang/Basic/CodeGenOptions.h" 270fca6ea1SDimitry Andric #include "clang/Basic/TargetInfo.h" 280b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 290b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 300b57cec5SDimitry Andric #include "llvm/IR/LLVMContext.h" 310b57cec5SDimitry Andric #include "llvm/IR/Metadata.h" 320b57cec5SDimitry Andric #include "llvm/IR/Module.h" 330b57cec5SDimitry Andric #include "llvm/IR/Type.h" 340fca6ea1SDimitry Andric #include "llvm/Support/Debug.h" 350b57cec5SDimitry Andric using namespace clang; 360b57cec5SDimitry Andric using namespace CodeGen; 370b57cec5SDimitry Andric 380fca6ea1SDimitry Andric CodeGenTBAA::CodeGenTBAA(ASTContext &Ctx, CodeGenTypes &CGTypes, 390fca6ea1SDimitry Andric llvm::Module &M, const CodeGenOptions &CGO, 40*5deeebd8SDimitry Andric const LangOptions &Features) 410fca6ea1SDimitry Andric : Context(Ctx), CGTypes(CGTypes), Module(M), CodeGenOpts(CGO), 42*5deeebd8SDimitry Andric Features(Features), MDHelper(M.getContext()), Root(nullptr), 43*5deeebd8SDimitry Andric Char(nullptr) {} 440b57cec5SDimitry Andric 450b57cec5SDimitry Andric CodeGenTBAA::~CodeGenTBAA() { 460b57cec5SDimitry Andric } 470b57cec5SDimitry Andric 480b57cec5SDimitry Andric llvm::MDNode *CodeGenTBAA::getRoot() { 490b57cec5SDimitry Andric // Define the root of the tree. This identifies the tree, so that 500b57cec5SDimitry Andric // if our LLVM IR is linked with LLVM IR from a different front-end 510b57cec5SDimitry Andric // (or a different version of this front-end), their TBAA trees will 520b57cec5SDimitry Andric // remain distinct, and the optimizer will treat them conservatively. 530b57cec5SDimitry Andric if (!Root) { 540b57cec5SDimitry Andric if (Features.CPlusPlus) 550b57cec5SDimitry Andric Root = MDHelper.createTBAARoot("Simple C++ TBAA"); 560b57cec5SDimitry Andric else 570b57cec5SDimitry Andric Root = MDHelper.createTBAARoot("Simple C/C++ TBAA"); 580b57cec5SDimitry Andric } 590b57cec5SDimitry Andric 600b57cec5SDimitry Andric return Root; 610b57cec5SDimitry Andric } 620b57cec5SDimitry Andric 630b57cec5SDimitry Andric llvm::MDNode *CodeGenTBAA::createScalarTypeNode(StringRef Name, 640b57cec5SDimitry Andric llvm::MDNode *Parent, 650b57cec5SDimitry Andric uint64_t Size) { 660b57cec5SDimitry Andric if (CodeGenOpts.NewStructPathTBAA) { 670b57cec5SDimitry Andric llvm::Metadata *Id = MDHelper.createString(Name); 680b57cec5SDimitry Andric return MDHelper.createTBAATypeNode(Parent, Size, Id); 690b57cec5SDimitry Andric } 700b57cec5SDimitry Andric return MDHelper.createTBAAScalarTypeNode(Name, Parent); 710b57cec5SDimitry Andric } 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric llvm::MDNode *CodeGenTBAA::getChar() { 740b57cec5SDimitry Andric // Define the root of the tree for user-accessible memory. C and C++ 750b57cec5SDimitry Andric // give special powers to char and certain similar types. However, 760b57cec5SDimitry Andric // these special powers only cover user-accessible memory, and doesn't 770b57cec5SDimitry Andric // include things like vtables. 780b57cec5SDimitry Andric if (!Char) 790b57cec5SDimitry Andric Char = createScalarTypeNode("omnipotent char", getRoot(), /* Size= */ 1); 800b57cec5SDimitry Andric 810b57cec5SDimitry Andric return Char; 820b57cec5SDimitry Andric } 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric static bool TypeHasMayAlias(QualType QTy) { 850b57cec5SDimitry Andric // Tagged types have declarations, and therefore may have attributes. 86480093f4SDimitry Andric if (auto *TD = QTy->getAsTagDecl()) 87480093f4SDimitry Andric if (TD->hasAttr<MayAliasAttr>()) 880b57cec5SDimitry Andric return true; 890b57cec5SDimitry Andric 90480093f4SDimitry Andric // Also look for may_alias as a declaration attribute on a typedef. 91480093f4SDimitry Andric // FIXME: We should follow GCC and model may_alias as a type attribute 92480093f4SDimitry Andric // rather than as a declaration attribute. 93480093f4SDimitry Andric while (auto *TT = QTy->getAs<TypedefType>()) { 94480093f4SDimitry Andric if (TT->getDecl()->hasAttr<MayAliasAttr>()) 95480093f4SDimitry Andric return true; 96480093f4SDimitry Andric QTy = TT->desugar(); 97480093f4SDimitry Andric } 980b57cec5SDimitry Andric return false; 990b57cec5SDimitry Andric } 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric /// Check if the given type is a valid base type to be used in access tags. 1020b57cec5SDimitry Andric static bool isValidBaseType(QualType QTy) { 1030b57cec5SDimitry Andric if (const RecordType *TTy = QTy->getAs<RecordType>()) { 1040b57cec5SDimitry Andric const RecordDecl *RD = TTy->getDecl()->getDefinition(); 1050b57cec5SDimitry Andric // Incomplete types are not valid base access types. 1060b57cec5SDimitry Andric if (!RD) 1070b57cec5SDimitry Andric return false; 1080b57cec5SDimitry Andric if (RD->hasFlexibleArrayMember()) 1090b57cec5SDimitry Andric return false; 1100b57cec5SDimitry Andric // RD can be struct, union, class, interface or enum. 1110b57cec5SDimitry Andric // For now, we only handle struct and class. 1120b57cec5SDimitry Andric if (RD->isStruct() || RD->isClass()) 1130b57cec5SDimitry Andric return true; 1140b57cec5SDimitry Andric } 1150b57cec5SDimitry Andric return false; 1160b57cec5SDimitry Andric } 1170b57cec5SDimitry Andric 1180b57cec5SDimitry Andric llvm::MDNode *CodeGenTBAA::getTypeInfoHelper(const Type *Ty) { 1190b57cec5SDimitry Andric uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity(); 1200b57cec5SDimitry Andric 1210b57cec5SDimitry Andric // Handle builtin types. 1220b57cec5SDimitry Andric if (const BuiltinType *BTy = dyn_cast<BuiltinType>(Ty)) { 1230b57cec5SDimitry Andric switch (BTy->getKind()) { 1240b57cec5SDimitry Andric // Character types are special and can alias anything. 1250b57cec5SDimitry Andric // In C++, this technically only includes "char" and "unsigned char", 1260b57cec5SDimitry Andric // and not "signed char". In C, it includes all three. For now, 1270b57cec5SDimitry Andric // the risk of exploiting this detail in C++ seems likely to outweigh 1280b57cec5SDimitry Andric // the benefit. 1290b57cec5SDimitry Andric case BuiltinType::Char_U: 1300b57cec5SDimitry Andric case BuiltinType::Char_S: 1310b57cec5SDimitry Andric case BuiltinType::UChar: 1320b57cec5SDimitry Andric case BuiltinType::SChar: 1330b57cec5SDimitry Andric return getChar(); 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric // Unsigned types can alias their corresponding signed types. 1360b57cec5SDimitry Andric case BuiltinType::UShort: 1370b57cec5SDimitry Andric return getTypeInfo(Context.ShortTy); 1380b57cec5SDimitry Andric case BuiltinType::UInt: 1390b57cec5SDimitry Andric return getTypeInfo(Context.IntTy); 1400b57cec5SDimitry Andric case BuiltinType::ULong: 1410b57cec5SDimitry Andric return getTypeInfo(Context.LongTy); 1420b57cec5SDimitry Andric case BuiltinType::ULongLong: 1430b57cec5SDimitry Andric return getTypeInfo(Context.LongLongTy); 1440b57cec5SDimitry Andric case BuiltinType::UInt128: 1450b57cec5SDimitry Andric return getTypeInfo(Context.Int128Ty); 1460b57cec5SDimitry Andric 1475ffd83dbSDimitry Andric case BuiltinType::UShortFract: 1485ffd83dbSDimitry Andric return getTypeInfo(Context.ShortFractTy); 1495ffd83dbSDimitry Andric case BuiltinType::UFract: 1505ffd83dbSDimitry Andric return getTypeInfo(Context.FractTy); 1515ffd83dbSDimitry Andric case BuiltinType::ULongFract: 1525ffd83dbSDimitry Andric return getTypeInfo(Context.LongFractTy); 1535ffd83dbSDimitry Andric 1545ffd83dbSDimitry Andric case BuiltinType::SatUShortFract: 1555ffd83dbSDimitry Andric return getTypeInfo(Context.SatShortFractTy); 1565ffd83dbSDimitry Andric case BuiltinType::SatUFract: 1575ffd83dbSDimitry Andric return getTypeInfo(Context.SatFractTy); 1585ffd83dbSDimitry Andric case BuiltinType::SatULongFract: 1595ffd83dbSDimitry Andric return getTypeInfo(Context.SatLongFractTy); 1605ffd83dbSDimitry Andric 1615ffd83dbSDimitry Andric case BuiltinType::UShortAccum: 1625ffd83dbSDimitry Andric return getTypeInfo(Context.ShortAccumTy); 1635ffd83dbSDimitry Andric case BuiltinType::UAccum: 1645ffd83dbSDimitry Andric return getTypeInfo(Context.AccumTy); 1655ffd83dbSDimitry Andric case BuiltinType::ULongAccum: 1665ffd83dbSDimitry Andric return getTypeInfo(Context.LongAccumTy); 1675ffd83dbSDimitry Andric 1685ffd83dbSDimitry Andric case BuiltinType::SatUShortAccum: 1695ffd83dbSDimitry Andric return getTypeInfo(Context.SatShortAccumTy); 1705ffd83dbSDimitry Andric case BuiltinType::SatUAccum: 1715ffd83dbSDimitry Andric return getTypeInfo(Context.SatAccumTy); 1725ffd83dbSDimitry Andric case BuiltinType::SatULongAccum: 1735ffd83dbSDimitry Andric return getTypeInfo(Context.SatLongAccumTy); 1745ffd83dbSDimitry Andric 1750b57cec5SDimitry Andric // Treat all other builtin types as distinct types. This includes 1760b57cec5SDimitry Andric // treating wchar_t, char16_t, and char32_t as distinct from their 1770b57cec5SDimitry Andric // "underlying types". 1780b57cec5SDimitry Andric default: 1790b57cec5SDimitry Andric return createScalarTypeNode(BTy->getName(Features), getChar(), Size); 1800b57cec5SDimitry Andric } 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric // C++1z [basic.lval]p10: "If a program attempts to access the stored value of 1840b57cec5SDimitry Andric // an object through a glvalue of other than one of the following types the 1850b57cec5SDimitry Andric // behavior is undefined: [...] a char, unsigned char, or std::byte type." 1860b57cec5SDimitry Andric if (Ty->isStdByteType()) 1870b57cec5SDimitry Andric return getChar(); 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric // Handle pointers and references. 1900fca6ea1SDimitry Andric // 1910fca6ea1SDimitry Andric // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2: 1920fca6ea1SDimitry Andric // For two pointer types to be compatible, both shall be identically 1930fca6ea1SDimitry Andric // qualified and both shall be pointers to compatible types. 1940fca6ea1SDimitry Andric // 1950fca6ea1SDimitry Andric // This rule is impractically strict; we want to at least ignore CVR 1960fca6ea1SDimitry Andric // qualifiers. Distinguishing by CVR qualifiers would make it UB to 1970fca6ea1SDimitry Andric // e.g. cast a `char **` to `const char * const *` and dereference it, 1980fca6ea1SDimitry Andric // which is too common and useful to invalidate. C++'s similar types 1990fca6ea1SDimitry Andric // rule permits qualifier differences in these nested positions; in fact, 2000fca6ea1SDimitry Andric // C++ even allows that cast as an implicit conversion. 2010fca6ea1SDimitry Andric // 2020fca6ea1SDimitry Andric // Other qualifiers could theoretically be distinguished, especially if 2030fca6ea1SDimitry Andric // they involve a significant representation difference. We don't 2040fca6ea1SDimitry Andric // currently do so, however. 2050fca6ea1SDimitry Andric // 2060fca6ea1SDimitry Andric // Computing the pointee type string recursively is implicitly more 2070fca6ea1SDimitry Andric // forgiving than the standards require. Effectively, we are turning 2080fca6ea1SDimitry Andric // the question "are these types compatible/similar" into "are 2090fca6ea1SDimitry Andric // accesses to these types allowed to alias". In both C and C++, 2100fca6ea1SDimitry Andric // the latter question has special carve-outs for signedness 2110fca6ea1SDimitry Andric // mismatches that only apply at the top level. As a result, we are 2120fca6ea1SDimitry Andric // allowing e.g. `int *` l-values to access `unsigned *` objects. 2130fca6ea1SDimitry Andric if (Ty->isPointerType() || Ty->isReferenceType()) { 2140fca6ea1SDimitry Andric llvm::MDNode *AnyPtr = createScalarTypeNode("any pointer", getChar(), Size); 2150fca6ea1SDimitry Andric if (!CodeGenOpts.PointerTBAA) 2160fca6ea1SDimitry Andric return AnyPtr; 2170fca6ea1SDimitry Andric // Compute the depth of the pointer and generate a tag of the form "p<depth> 2180fca6ea1SDimitry Andric // <base type tag>". 2190fca6ea1SDimitry Andric unsigned PtrDepth = 0; 2200fca6ea1SDimitry Andric do { 2210fca6ea1SDimitry Andric PtrDepth++; 2220fca6ea1SDimitry Andric Ty = Ty->getPointeeType().getTypePtr(); 2230fca6ea1SDimitry Andric } while (Ty->isPointerType()); 2240b57cec5SDimitry Andric // TODO: Implement C++'s type "similarity" and consider dis-"similar" 2250fca6ea1SDimitry Andric // pointers distinct for non-builtin types. 2260fca6ea1SDimitry Andric if (isa<BuiltinType>(Ty)) { 2270fca6ea1SDimitry Andric llvm::MDNode *ScalarMD = getTypeInfoHelper(Ty); 2280fca6ea1SDimitry Andric StringRef Name = 2290fca6ea1SDimitry Andric cast<llvm::MDString>( 2300fca6ea1SDimitry Andric ScalarMD->getOperand(CodeGenOpts.NewStructPathTBAA ? 2 : 0)) 2310fca6ea1SDimitry Andric ->getString(); 2320fca6ea1SDimitry Andric SmallString<256> OutName("p"); 2330fca6ea1SDimitry Andric OutName += std::to_string(PtrDepth); 2340fca6ea1SDimitry Andric OutName += " "; 2350fca6ea1SDimitry Andric OutName += Name; 2360fca6ea1SDimitry Andric return createScalarTypeNode(OutName, AnyPtr, Size); 2370fca6ea1SDimitry Andric } 2380fca6ea1SDimitry Andric return AnyPtr; 2390fca6ea1SDimitry Andric } 2400b57cec5SDimitry Andric 2410b57cec5SDimitry Andric // Accesses to arrays are accesses to objects of their element types. 2420b57cec5SDimitry Andric if (CodeGenOpts.NewStructPathTBAA && Ty->isArrayType()) 2430b57cec5SDimitry Andric return getTypeInfo(cast<ArrayType>(Ty)->getElementType()); 2440b57cec5SDimitry Andric 2450b57cec5SDimitry Andric // Enum types are distinct types. In C++ they have "underlying types", 2460b57cec5SDimitry Andric // however they aren't related for TBAA. 2470b57cec5SDimitry Andric if (const EnumType *ETy = dyn_cast<EnumType>(Ty)) { 2485f757f3fSDimitry Andric if (!Features.CPlusPlus) 2495f757f3fSDimitry Andric return getTypeInfo(ETy->getDecl()->getIntegerType()); 2505f757f3fSDimitry Andric 2510b57cec5SDimitry Andric // In C++ mode, types have linkage, so we can rely on the ODR and 2520b57cec5SDimitry Andric // on their mangled names, if they're external. 2530b57cec5SDimitry Andric // TODO: Is there a way to get a program-wide unique name for a 2540b57cec5SDimitry Andric // decl with local linkage or no linkage? 2555f757f3fSDimitry Andric if (!ETy->getDecl()->isExternallyVisible()) 2560b57cec5SDimitry Andric return getChar(); 2570b57cec5SDimitry Andric 2580b57cec5SDimitry Andric SmallString<256> OutName; 2590b57cec5SDimitry Andric llvm::raw_svector_ostream Out(OutName); 260*5deeebd8SDimitry Andric CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( 261*5deeebd8SDimitry Andric QualType(ETy, 0), Out); 2620b57cec5SDimitry Andric return createScalarTypeNode(OutName, getChar(), Size); 2630b57cec5SDimitry Andric } 2640b57cec5SDimitry Andric 2650eae32dcSDimitry Andric if (const auto *EIT = dyn_cast<BitIntType>(Ty)) { 2665ffd83dbSDimitry Andric SmallString<256> OutName; 2675ffd83dbSDimitry Andric llvm::raw_svector_ostream Out(OutName); 2685ffd83dbSDimitry Andric // Don't specify signed/unsigned since integer types can alias despite sign 2695ffd83dbSDimitry Andric // differences. 2700eae32dcSDimitry Andric Out << "_BitInt(" << EIT->getNumBits() << ')'; 2715ffd83dbSDimitry Andric return createScalarTypeNode(OutName, getChar(), Size); 2725ffd83dbSDimitry Andric } 2735ffd83dbSDimitry Andric 2740b57cec5SDimitry Andric // For now, handle any other kind of type conservatively. 2750b57cec5SDimitry Andric return getChar(); 2760b57cec5SDimitry Andric } 2770b57cec5SDimitry Andric 2780b57cec5SDimitry Andric llvm::MDNode *CodeGenTBAA::getTypeInfo(QualType QTy) { 2790b57cec5SDimitry Andric // At -O0 or relaxed aliasing, TBAA is not emitted for regular types. 2800b57cec5SDimitry Andric if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) 2810b57cec5SDimitry Andric return nullptr; 2820b57cec5SDimitry Andric 2830b57cec5SDimitry Andric // If the type has the may_alias attribute (even on a typedef), it is 2840b57cec5SDimitry Andric // effectively in the general char alias class. 2850b57cec5SDimitry Andric if (TypeHasMayAlias(QTy)) 2860b57cec5SDimitry Andric return getChar(); 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric // We need this function to not fall back to returning the "omnipotent char" 2890b57cec5SDimitry Andric // type node for aggregate and union types. Otherwise, any dereference of an 2900b57cec5SDimitry Andric // aggregate will result into the may-alias access descriptor, meaning all 2910b57cec5SDimitry Andric // subsequent accesses to direct and indirect members of that aggregate will 2920b57cec5SDimitry Andric // be considered may-alias too. 2930fca6ea1SDimitry Andric // TODO: Combine getTypeInfo() and getValidBaseTypeInfo() into a single 2940fca6ea1SDimitry Andric // function. 2950b57cec5SDimitry Andric if (isValidBaseType(QTy)) 2960fca6ea1SDimitry Andric return getValidBaseTypeInfo(QTy); 2970b57cec5SDimitry Andric 2980b57cec5SDimitry Andric const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 2990b57cec5SDimitry Andric if (llvm::MDNode *N = MetadataCache[Ty]) 3000b57cec5SDimitry Andric return N; 3010b57cec5SDimitry Andric 3020b57cec5SDimitry Andric // Note that the following helper call is allowed to add new nodes to the 3030b57cec5SDimitry Andric // cache, which invalidates all its previously obtained iterators. So we 3040b57cec5SDimitry Andric // first generate the node for the type and then add that node to the cache. 3050b57cec5SDimitry Andric llvm::MDNode *TypeNode = getTypeInfoHelper(Ty); 3060b57cec5SDimitry Andric return MetadataCache[Ty] = TypeNode; 3070b57cec5SDimitry Andric } 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric TBAAAccessInfo CodeGenTBAA::getAccessInfo(QualType AccessType) { 3100b57cec5SDimitry Andric // Pointee values may have incomplete types, but they shall never be 3110b57cec5SDimitry Andric // dereferenced. 3120b57cec5SDimitry Andric if (AccessType->isIncompleteType()) 3130b57cec5SDimitry Andric return TBAAAccessInfo::getIncompleteInfo(); 3140b57cec5SDimitry Andric 3150b57cec5SDimitry Andric if (TypeHasMayAlias(AccessType)) 3160b57cec5SDimitry Andric return TBAAAccessInfo::getMayAliasInfo(); 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric uint64_t Size = Context.getTypeSizeInChars(AccessType).getQuantity(); 3190b57cec5SDimitry Andric return TBAAAccessInfo(getTypeInfo(AccessType), Size); 3200b57cec5SDimitry Andric } 3210b57cec5SDimitry Andric 3220b57cec5SDimitry Andric TBAAAccessInfo CodeGenTBAA::getVTablePtrAccessInfo(llvm::Type *VTablePtrType) { 3230b57cec5SDimitry Andric llvm::DataLayout DL(&Module); 3240b57cec5SDimitry Andric unsigned Size = DL.getPointerTypeSize(VTablePtrType); 3250b57cec5SDimitry Andric return TBAAAccessInfo(createScalarTypeNode("vtable pointer", getRoot(), Size), 3260b57cec5SDimitry Andric Size); 3270b57cec5SDimitry Andric } 3280b57cec5SDimitry Andric 3290b57cec5SDimitry Andric bool 3300b57cec5SDimitry Andric CodeGenTBAA::CollectFields(uint64_t BaseOffset, 3310b57cec5SDimitry Andric QualType QTy, 3320b57cec5SDimitry Andric SmallVectorImpl<llvm::MDBuilder::TBAAStructField> & 3330b57cec5SDimitry Andric Fields, 3340b57cec5SDimitry Andric bool MayAlias) { 3350b57cec5SDimitry Andric /* Things not handled yet include: C++ base classes, bitfields, */ 3360b57cec5SDimitry Andric 3370b57cec5SDimitry Andric if (const RecordType *TTy = QTy->getAs<RecordType>()) { 3380fca6ea1SDimitry Andric if (TTy->isUnionType()) { 3390fca6ea1SDimitry Andric uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity(); 3400fca6ea1SDimitry Andric llvm::MDNode *TBAAType = getChar(); 3410fca6ea1SDimitry Andric llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size)); 3420fca6ea1SDimitry Andric Fields.push_back( 3430fca6ea1SDimitry Andric llvm::MDBuilder::TBAAStructField(BaseOffset, Size, TBAATag)); 3440fca6ea1SDimitry Andric return true; 3450fca6ea1SDimitry Andric } 3460b57cec5SDimitry Andric const RecordDecl *RD = TTy->getDecl()->getDefinition(); 3470b57cec5SDimitry Andric if (RD->hasFlexibleArrayMember()) 3480b57cec5SDimitry Andric return false; 3490b57cec5SDimitry Andric 3500b57cec5SDimitry Andric // TODO: Handle C++ base classes. 3510b57cec5SDimitry Andric if (const CXXRecordDecl *Decl = dyn_cast<CXXRecordDecl>(RD)) 3520b57cec5SDimitry Andric if (Decl->bases_begin() != Decl->bases_end()) 3530b57cec5SDimitry Andric return false; 3540b57cec5SDimitry Andric 3550b57cec5SDimitry Andric const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 3560fca6ea1SDimitry Andric const CGRecordLayout &CGRL = CGTypes.getCGRecordLayout(RD); 3570b57cec5SDimitry Andric 3580b57cec5SDimitry Andric unsigned idx = 0; 3590fca6ea1SDimitry Andric for (RecordDecl::field_iterator i = RD->field_begin(), e = RD->field_end(); 3600fca6ea1SDimitry Andric i != e; ++i, ++idx) { 3610fca6ea1SDimitry Andric if (isEmptyFieldForLayout(Context, *i)) 3620b57cec5SDimitry Andric continue; 3630fca6ea1SDimitry Andric 3640fca6ea1SDimitry Andric uint64_t Offset = 3650fca6ea1SDimitry Andric BaseOffset + Layout.getFieldOffset(idx) / Context.getCharWidth(); 3660fca6ea1SDimitry Andric 3670fca6ea1SDimitry Andric // Create a single field for consecutive named bitfields using char as 3680fca6ea1SDimitry Andric // base type. 3690fca6ea1SDimitry Andric if ((*i)->isBitField()) { 3700fca6ea1SDimitry Andric const CGBitFieldInfo &Info = CGRL.getBitFieldInfo(*i); 3710fca6ea1SDimitry Andric // For big endian targets the first bitfield in the consecutive run is 3720fca6ea1SDimitry Andric // at the most-significant end; see CGRecordLowering::setBitFieldInfo 3730fca6ea1SDimitry Andric // for more information. 3740fca6ea1SDimitry Andric bool IsBE = Context.getTargetInfo().isBigEndian(); 3750fca6ea1SDimitry Andric bool IsFirst = IsBE ? Info.StorageSize - (Info.Offset + Info.Size) == 0 3760fca6ea1SDimitry Andric : Info.Offset == 0; 3770fca6ea1SDimitry Andric if (!IsFirst) 3780fca6ea1SDimitry Andric continue; 3790fca6ea1SDimitry Andric unsigned CurrentBitFieldSize = Info.StorageSize; 3800fca6ea1SDimitry Andric uint64_t Size = 3810fca6ea1SDimitry Andric llvm::divideCeil(CurrentBitFieldSize, Context.getCharWidth()); 3820fca6ea1SDimitry Andric llvm::MDNode *TBAAType = getChar(); 3830fca6ea1SDimitry Andric llvm::MDNode *TBAATag = 3840fca6ea1SDimitry Andric getAccessTagInfo(TBAAAccessInfo(TBAAType, Size)); 3850fca6ea1SDimitry Andric Fields.push_back( 3860fca6ea1SDimitry Andric llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); 3870fca6ea1SDimitry Andric continue; 3880fca6ea1SDimitry Andric } 3890fca6ea1SDimitry Andric 3900b57cec5SDimitry Andric QualType FieldQTy = i->getType(); 3910b57cec5SDimitry Andric if (!CollectFields(Offset, FieldQTy, Fields, 3920b57cec5SDimitry Andric MayAlias || TypeHasMayAlias(FieldQTy))) 3930b57cec5SDimitry Andric return false; 3940b57cec5SDimitry Andric } 3950b57cec5SDimitry Andric return true; 3960b57cec5SDimitry Andric } 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric /* Otherwise, treat whatever it is as a field. */ 3990b57cec5SDimitry Andric uint64_t Offset = BaseOffset; 4000b57cec5SDimitry Andric uint64_t Size = Context.getTypeSizeInChars(QTy).getQuantity(); 4010b57cec5SDimitry Andric llvm::MDNode *TBAAType = MayAlias ? getChar() : getTypeInfo(QTy); 4020b57cec5SDimitry Andric llvm::MDNode *TBAATag = getAccessTagInfo(TBAAAccessInfo(TBAAType, Size)); 4030b57cec5SDimitry Andric Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, TBAATag)); 4040b57cec5SDimitry Andric return true; 4050b57cec5SDimitry Andric } 4060b57cec5SDimitry Andric 4070b57cec5SDimitry Andric llvm::MDNode * 4080b57cec5SDimitry Andric CodeGenTBAA::getTBAAStructInfo(QualType QTy) { 4090fca6ea1SDimitry Andric if (CodeGenOpts.OptimizationLevel == 0 || CodeGenOpts.RelaxedAliasing) 4100fca6ea1SDimitry Andric return nullptr; 4110fca6ea1SDimitry Andric 4120b57cec5SDimitry Andric const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric if (llvm::MDNode *N = StructMetadataCache[Ty]) 4150b57cec5SDimitry Andric return N; 4160b57cec5SDimitry Andric 4170b57cec5SDimitry Andric SmallVector<llvm::MDBuilder::TBAAStructField, 4> Fields; 4180b57cec5SDimitry Andric if (CollectFields(0, QTy, Fields, TypeHasMayAlias(QTy))) 4190b57cec5SDimitry Andric return MDHelper.createTBAAStructNode(Fields); 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric // For now, handle any other kind of type conservatively. 4220b57cec5SDimitry Andric return StructMetadataCache[Ty] = nullptr; 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric 4250b57cec5SDimitry Andric llvm::MDNode *CodeGenTBAA::getBaseTypeInfoHelper(const Type *Ty) { 4260b57cec5SDimitry Andric if (auto *TTy = dyn_cast<RecordType>(Ty)) { 4270b57cec5SDimitry Andric const RecordDecl *RD = TTy->getDecl()->getDefinition(); 4280b57cec5SDimitry Andric const ASTRecordLayout &Layout = Context.getASTRecordLayout(RD); 429753f127fSDimitry Andric using TBAAStructField = llvm::MDBuilder::TBAAStructField; 430753f127fSDimitry Andric SmallVector<TBAAStructField, 4> Fields; 431753f127fSDimitry Andric if (const CXXRecordDecl *CXXRD = dyn_cast<CXXRecordDecl>(RD)) { 432bdd1243dSDimitry Andric // Handle C++ base classes. Non-virtual bases can treated a kind of 433753f127fSDimitry Andric // field. Virtual bases are more complex and omitted, but avoid an 434753f127fSDimitry Andric // incomplete view for NewStructPathTBAA. 435753f127fSDimitry Andric if (CodeGenOpts.NewStructPathTBAA && CXXRD->getNumVBases() != 0) 4365f757f3fSDimitry Andric return nullptr; 437753f127fSDimitry Andric for (const CXXBaseSpecifier &B : CXXRD->bases()) { 438753f127fSDimitry Andric if (B.isVirtual()) 439753f127fSDimitry Andric continue; 440753f127fSDimitry Andric QualType BaseQTy = B.getType(); 441753f127fSDimitry Andric const CXXRecordDecl *BaseRD = BaseQTy->getAsCXXRecordDecl(); 442753f127fSDimitry Andric if (BaseRD->isEmpty()) 443753f127fSDimitry Andric continue; 444753f127fSDimitry Andric llvm::MDNode *TypeNode = isValidBaseType(BaseQTy) 4450fca6ea1SDimitry Andric ? getValidBaseTypeInfo(BaseQTy) 446753f127fSDimitry Andric : getTypeInfo(BaseQTy); 447753f127fSDimitry Andric if (!TypeNode) 4485f757f3fSDimitry Andric return nullptr; 449753f127fSDimitry Andric uint64_t Offset = Layout.getBaseClassOffset(BaseRD).getQuantity(); 450753f127fSDimitry Andric uint64_t Size = 451753f127fSDimitry Andric Context.getASTRecordLayout(BaseRD).getDataSize().getQuantity(); 452753f127fSDimitry Andric Fields.push_back( 453753f127fSDimitry Andric llvm::MDBuilder::TBAAStructField(Offset, Size, TypeNode)); 454753f127fSDimitry Andric } 455753f127fSDimitry Andric // The order in which base class subobjects are allocated is unspecified, 456753f127fSDimitry Andric // so may differ from declaration order. In particular, Itanium ABI will 457753f127fSDimitry Andric // allocate a primary base first. 458753f127fSDimitry Andric // Since we exclude empty subobjects, the objects are not overlapping and 459753f127fSDimitry Andric // their offsets are unique. 460753f127fSDimitry Andric llvm::sort(Fields, 461753f127fSDimitry Andric [](const TBAAStructField &A, const TBAAStructField &B) { 462753f127fSDimitry Andric return A.Offset < B.Offset; 463753f127fSDimitry Andric }); 464753f127fSDimitry Andric } 4650b57cec5SDimitry Andric for (FieldDecl *Field : RD->fields()) { 4660fca6ea1SDimitry Andric if (Field->isZeroSize(Context) || Field->isUnnamedBitField()) 4670b57cec5SDimitry Andric continue; 4680b57cec5SDimitry Andric QualType FieldQTy = Field->getType(); 4690fca6ea1SDimitry Andric llvm::MDNode *TypeNode = isValidBaseType(FieldQTy) 4700fca6ea1SDimitry Andric ? getValidBaseTypeInfo(FieldQTy) 4710fca6ea1SDimitry Andric : getTypeInfo(FieldQTy); 4720b57cec5SDimitry Andric if (!TypeNode) 4735f757f3fSDimitry Andric return nullptr; 4740b57cec5SDimitry Andric 4750b57cec5SDimitry Andric uint64_t BitOffset = Layout.getFieldOffset(Field->getFieldIndex()); 4760b57cec5SDimitry Andric uint64_t Offset = Context.toCharUnitsFromBits(BitOffset).getQuantity(); 4770b57cec5SDimitry Andric uint64_t Size = Context.getTypeSizeInChars(FieldQTy).getQuantity(); 4780b57cec5SDimitry Andric Fields.push_back(llvm::MDBuilder::TBAAStructField(Offset, Size, 4790b57cec5SDimitry Andric TypeNode)); 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4820b57cec5SDimitry Andric SmallString<256> OutName; 4830b57cec5SDimitry Andric if (Features.CPlusPlus) { 4840b57cec5SDimitry Andric // Don't use the mangler for C code. 4850b57cec5SDimitry Andric llvm::raw_svector_ostream Out(OutName); 486*5deeebd8SDimitry Andric CGTypes.getCXXABI().getMangleContext().mangleCanonicalTypeName( 487*5deeebd8SDimitry Andric QualType(Ty, 0), Out); 4880b57cec5SDimitry Andric } else { 4890b57cec5SDimitry Andric OutName = RD->getName(); 4900b57cec5SDimitry Andric } 4910b57cec5SDimitry Andric 4920b57cec5SDimitry Andric if (CodeGenOpts.NewStructPathTBAA) { 4930b57cec5SDimitry Andric llvm::MDNode *Parent = getChar(); 4940b57cec5SDimitry Andric uint64_t Size = Context.getTypeSizeInChars(Ty).getQuantity(); 4950b57cec5SDimitry Andric llvm::Metadata *Id = MDHelper.createString(OutName); 4960b57cec5SDimitry Andric return MDHelper.createTBAATypeNode(Parent, Size, Id, Fields); 4970b57cec5SDimitry Andric } 4980b57cec5SDimitry Andric 4990b57cec5SDimitry Andric // Create the struct type node with a vector of pairs (offset, type). 5000b57cec5SDimitry Andric SmallVector<std::pair<llvm::MDNode*, uint64_t>, 4> OffsetsAndTypes; 5010b57cec5SDimitry Andric for (const auto &Field : Fields) 5020b57cec5SDimitry Andric OffsetsAndTypes.push_back(std::make_pair(Field.Type, Field.Offset)); 5030b57cec5SDimitry Andric return MDHelper.createTBAAStructTypeNode(OutName, OffsetsAndTypes); 5040b57cec5SDimitry Andric } 5050b57cec5SDimitry Andric 5060b57cec5SDimitry Andric return nullptr; 5070b57cec5SDimitry Andric } 5080b57cec5SDimitry Andric 5090fca6ea1SDimitry Andric llvm::MDNode *CodeGenTBAA::getValidBaseTypeInfo(QualType QTy) { 5100fca6ea1SDimitry Andric assert(isValidBaseType(QTy) && "Must be a valid base type"); 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric const Type *Ty = Context.getCanonicalType(QTy).getTypePtr(); 5130b57cec5SDimitry Andric 5145f757f3fSDimitry Andric // nullptr is a valid value in the cache, so use find rather than [] 5155f757f3fSDimitry Andric auto I = BaseTypeMetadataCache.find(Ty); 5165f757f3fSDimitry Andric if (I != BaseTypeMetadataCache.end()) 5175f757f3fSDimitry Andric return I->second; 5185f757f3fSDimitry Andric 5195f757f3fSDimitry Andric // First calculate the metadata, before recomputing the insertion point, as 5205f757f3fSDimitry Andric // the helper can recursively call us. 5210b57cec5SDimitry Andric llvm::MDNode *TypeNode = getBaseTypeInfoHelper(Ty); 5225f757f3fSDimitry Andric LLVM_ATTRIBUTE_UNUSED auto inserted = 5235f757f3fSDimitry Andric BaseTypeMetadataCache.insert({Ty, TypeNode}); 5245f757f3fSDimitry Andric assert(inserted.second && "BaseType metadata was already inserted"); 5255f757f3fSDimitry Andric 5265f757f3fSDimitry Andric return TypeNode; 5270b57cec5SDimitry Andric } 5280b57cec5SDimitry Andric 5290fca6ea1SDimitry Andric llvm::MDNode *CodeGenTBAA::getBaseTypeInfo(QualType QTy) { 5300fca6ea1SDimitry Andric return isValidBaseType(QTy) ? getValidBaseTypeInfo(QTy) : nullptr; 5310fca6ea1SDimitry Andric } 5320fca6ea1SDimitry Andric 5330b57cec5SDimitry Andric llvm::MDNode *CodeGenTBAA::getAccessTagInfo(TBAAAccessInfo Info) { 5340b57cec5SDimitry Andric assert(!Info.isIncomplete() && "Access to an object of an incomplete type!"); 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric if (Info.isMayAlias()) 5370b57cec5SDimitry Andric Info = TBAAAccessInfo(getChar(), Info.Size); 5380b57cec5SDimitry Andric 5390b57cec5SDimitry Andric if (!Info.AccessType) 5400b57cec5SDimitry Andric return nullptr; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric if (!CodeGenOpts.StructPathTBAA) 5430b57cec5SDimitry Andric Info = TBAAAccessInfo(Info.AccessType, Info.Size); 5440b57cec5SDimitry Andric 5450b57cec5SDimitry Andric llvm::MDNode *&N = AccessTagMetadataCache[Info]; 5460b57cec5SDimitry Andric if (N) 5470b57cec5SDimitry Andric return N; 5480b57cec5SDimitry Andric 5490b57cec5SDimitry Andric if (!Info.BaseType) { 5500b57cec5SDimitry Andric Info.BaseType = Info.AccessType; 5510b57cec5SDimitry Andric assert(!Info.Offset && "Nonzero offset for an access with no base type!"); 5520b57cec5SDimitry Andric } 5530b57cec5SDimitry Andric if (CodeGenOpts.NewStructPathTBAA) { 5540b57cec5SDimitry Andric return N = MDHelper.createTBAAAccessTag(Info.BaseType, Info.AccessType, 5550b57cec5SDimitry Andric Info.Offset, Info.Size); 5560b57cec5SDimitry Andric } 5570b57cec5SDimitry Andric return N = MDHelper.createTBAAStructTagNode(Info.BaseType, Info.AccessType, 5580b57cec5SDimitry Andric Info.Offset); 5590b57cec5SDimitry Andric } 5600b57cec5SDimitry Andric 5610b57cec5SDimitry Andric TBAAAccessInfo CodeGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo SourceInfo, 5620b57cec5SDimitry Andric TBAAAccessInfo TargetInfo) { 5630b57cec5SDimitry Andric if (SourceInfo.isMayAlias() || TargetInfo.isMayAlias()) 5640b57cec5SDimitry Andric return TBAAAccessInfo::getMayAliasInfo(); 5650b57cec5SDimitry Andric return TargetInfo; 5660b57cec5SDimitry Andric } 5670b57cec5SDimitry Andric 5680b57cec5SDimitry Andric TBAAAccessInfo 5690b57cec5SDimitry Andric CodeGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo InfoA, 5700b57cec5SDimitry Andric TBAAAccessInfo InfoB) { 5710b57cec5SDimitry Andric if (InfoA == InfoB) 5720b57cec5SDimitry Andric return InfoA; 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric if (!InfoA || !InfoB) 5750b57cec5SDimitry Andric return TBAAAccessInfo(); 5760b57cec5SDimitry Andric 5770b57cec5SDimitry Andric if (InfoA.isMayAlias() || InfoB.isMayAlias()) 5780b57cec5SDimitry Andric return TBAAAccessInfo::getMayAliasInfo(); 5790b57cec5SDimitry Andric 5800b57cec5SDimitry Andric // TODO: Implement the rest of the logic here. For example, two accesses 5810b57cec5SDimitry Andric // with same final access types result in an access to an object of that final 5820b57cec5SDimitry Andric // access type regardless of their base types. 5830b57cec5SDimitry Andric return TBAAAccessInfo::getMayAliasInfo(); 5840b57cec5SDimitry Andric } 5850b57cec5SDimitry Andric 5860b57cec5SDimitry Andric TBAAAccessInfo 5870b57cec5SDimitry Andric CodeGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo DestInfo, 5880b57cec5SDimitry Andric TBAAAccessInfo SrcInfo) { 5890b57cec5SDimitry Andric if (DestInfo == SrcInfo) 5900b57cec5SDimitry Andric return DestInfo; 5910b57cec5SDimitry Andric 5920b57cec5SDimitry Andric if (!DestInfo || !SrcInfo) 5930b57cec5SDimitry Andric return TBAAAccessInfo(); 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric if (DestInfo.isMayAlias() || SrcInfo.isMayAlias()) 5960b57cec5SDimitry Andric return TBAAAccessInfo::getMayAliasInfo(); 5970b57cec5SDimitry Andric 5980b57cec5SDimitry Andric // TODO: Implement the rest of the logic here. For example, two accesses 5990b57cec5SDimitry Andric // with same final access types result in an access to an object of that final 6000b57cec5SDimitry Andric // access type regardless of their base types. 6010b57cec5SDimitry Andric return TBAAAccessInfo::getMayAliasInfo(); 6020b57cec5SDimitry Andric } 603