1c3574ef7Sserge-sans-paille //===--- ConfusableIdentifierCheck.cpp - 2c3574ef7Sserge-sans-paille // clang-tidy--------------------------===// 3c3574ef7Sserge-sans-paille // 4c3574ef7Sserge-sans-paille // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5c3574ef7Sserge-sans-paille // See https://llvm.org/LICENSE.txt for license information. 6c3574ef7Sserge-sans-paille // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7c3574ef7Sserge-sans-paille // 8c3574ef7Sserge-sans-paille //===----------------------------------------------------------------------===// 9c3574ef7Sserge-sans-paille 10c3574ef7Sserge-sans-paille #include "ConfusableIdentifierCheck.h" 11c3574ef7Sserge-sans-paille 12c3574ef7Sserge-sans-paille #include "clang/Frontend/CompilerInstance.h" 13c3574ef7Sserge-sans-paille #include "clang/Lex/Preprocessor.h" 148fdedcd1SPiotr Zegar #include "llvm/ADT/SmallString.h" 15c3574ef7Sserge-sans-paille #include "llvm/Support/ConvertUTF.h" 16c3574ef7Sserge-sans-paille 17c3574ef7Sserge-sans-paille namespace { 18c3574ef7Sserge-sans-paille // Preprocessed version of 19c3574ef7Sserge-sans-paille // https://www.unicode.org/Public/security/latest/confusables.txt 20c3574ef7Sserge-sans-paille // 21c3574ef7Sserge-sans-paille // This contains a sorted array of { UTF32 codepoint; UTF32 values[N];} 22c3574ef7Sserge-sans-paille #include "Confusables.inc" 23c3574ef7Sserge-sans-paille } // namespace 24c3574ef7Sserge-sans-paille 257d2ea6c4SCarlos Galvez namespace clang::tidy::misc { 26c3574ef7Sserge-sans-paille 27c3574ef7Sserge-sans-paille ConfusableIdentifierCheck::ConfusableIdentifierCheck(StringRef Name, 28c3574ef7Sserge-sans-paille ClangTidyContext *Context) 29c3574ef7Sserge-sans-paille : ClangTidyCheck(Name, Context) {} 30c3574ef7Sserge-sans-paille 31c3574ef7Sserge-sans-paille ConfusableIdentifierCheck::~ConfusableIdentifierCheck() = default; 32c3574ef7Sserge-sans-paille 33c3574ef7Sserge-sans-paille // Build a skeleton out of the Original identifier, inspired by the algorithm 34c3574ef7Sserge-sans-paille // described in http://www.unicode.org/reports/tr39/#def-skeleton 35c3574ef7Sserge-sans-paille // 36c3574ef7Sserge-sans-paille // FIXME: TR39 mandates: 37c3574ef7Sserge-sans-paille // 38c3574ef7Sserge-sans-paille // For an input string X, define skeleton(X) to be the following transformation 39c3574ef7Sserge-sans-paille // on the string: 40c3574ef7Sserge-sans-paille // 41c3574ef7Sserge-sans-paille // 1. Convert X to NFD format, as described in [UAX15]. 42c3574ef7Sserge-sans-paille // 2. Concatenate the prototypes for each character in X according to the 43c3574ef7Sserge-sans-paille // specified data, producing a string of exemplar characters. 44c3574ef7Sserge-sans-paille // 3. Reapply NFD. 45c3574ef7Sserge-sans-paille // 46c3574ef7Sserge-sans-paille // We're skipping 1. and 3. for the sake of simplicity, but this can lead to 47c3574ef7Sserge-sans-paille // false positive. 48c3574ef7Sserge-sans-paille 498fdedcd1SPiotr Zegar static llvm::SmallString<64U> skeleton(StringRef Name) { 50c3574ef7Sserge-sans-paille using namespace llvm; 518fdedcd1SPiotr Zegar SmallString<64U> Skeleton; 528fdedcd1SPiotr Zegar Skeleton.reserve(1U + Name.size()); 53c3574ef7Sserge-sans-paille 548fdedcd1SPiotr Zegar const char *Curr = Name.data(); 558fdedcd1SPiotr Zegar const char *End = Curr + Name.size(); 56c3574ef7Sserge-sans-paille while (Curr < End) { 57c3574ef7Sserge-sans-paille 58c3574ef7Sserge-sans-paille const char *Prev = Curr; 59cbdc3e1bSPiotr Zegar UTF32 CodePoint = 0; 60c3574ef7Sserge-sans-paille ConversionResult Result = convertUTF8Sequence( 61c3574ef7Sserge-sans-paille reinterpret_cast<const UTF8 **>(&Curr), 62c3574ef7Sserge-sans-paille reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion); 63c3574ef7Sserge-sans-paille if (Result != conversionOK) { 64c3574ef7Sserge-sans-paille errs() << "Unicode conversion issue\n"; 65c3574ef7Sserge-sans-paille break; 66c3574ef7Sserge-sans-paille } 67c3574ef7Sserge-sans-paille 68c3574ef7Sserge-sans-paille StringRef Key(Prev, Curr - Prev); 6989f14332SKazu Hirata auto Where = llvm::lower_bound(ConfusableEntries, CodePoint, 70c3574ef7Sserge-sans-paille [](decltype(ConfusableEntries[0]) x, 71c3574ef7Sserge-sans-paille UTF32 y) { return x.codepoint < y; }); 72c3574ef7Sserge-sans-paille if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) { 73c3574ef7Sserge-sans-paille Skeleton.append(Prev, Curr); 74c3574ef7Sserge-sans-paille } else { 75c3574ef7Sserge-sans-paille UTF8 Buffer[32]; 76c3574ef7Sserge-sans-paille UTF8 *BufferStart = std::begin(Buffer); 77c3574ef7Sserge-sans-paille UTF8 *IBuffer = BufferStart; 78c3574ef7Sserge-sans-paille const UTF32 *ValuesStart = std::begin(Where->values); 79e125e6c4SKazu Hirata const UTF32 *ValuesEnd = llvm::find(Where->values, '\0'); 80c3574ef7Sserge-sans-paille if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer, 81c3574ef7Sserge-sans-paille std::end(Buffer), 82c3574ef7Sserge-sans-paille strictConversion) != conversionOK) { 83c3574ef7Sserge-sans-paille errs() << "Unicode conversion issue\n"; 84c3574ef7Sserge-sans-paille break; 85c3574ef7Sserge-sans-paille } 86c3574ef7Sserge-sans-paille Skeleton.append((char *)BufferStart, (char *)IBuffer); 87c3574ef7Sserge-sans-paille } 88c3574ef7Sserge-sans-paille } 89c3574ef7Sserge-sans-paille return Skeleton; 90c3574ef7Sserge-sans-paille } 91c3574ef7Sserge-sans-paille 922a84c635SPiotr Zegar static bool mayShadowImpl(const DeclContext *DC0, const DeclContext *DC1) { 932a84c635SPiotr Zegar return DC0 && DC0 == DC1; 942a84c635SPiotr Zegar } 952a84c635SPiotr Zegar 967a550212Sserge-sans-paille static bool mayShadowImpl(const NamedDecl *ND0, const NamedDecl *ND1) { 972a84c635SPiotr Zegar return isa<TemplateTypeParmDecl>(ND0) || isa<TemplateTypeParmDecl>(ND1); 982a84c635SPiotr Zegar } 997a550212Sserge-sans-paille 1002a84c635SPiotr Zegar static bool isMemberOf(const ConfusableIdentifierCheck::ContextInfo *DC0, 1012a84c635SPiotr Zegar const ConfusableIdentifierCheck::ContextInfo *DC1) { 1022a84c635SPiotr Zegar return llvm::is_contained(DC1->Bases, DC0->PrimaryContext); 1037a550212Sserge-sans-paille } 1047a550212Sserge-sans-paille 1052a84c635SPiotr Zegar static bool enclosesContext(const ConfusableIdentifierCheck::ContextInfo *DC0, 1062a84c635SPiotr Zegar const ConfusableIdentifierCheck::ContextInfo *DC1) { 1072a84c635SPiotr Zegar if (DC0->PrimaryContext == DC1->PrimaryContext) 1082a84c635SPiotr Zegar return true; 1092a84c635SPiotr Zegar 1102a84c635SPiotr Zegar return llvm::is_contained(DC0->PrimaryContexts, DC1->PrimaryContext) || 1112a84c635SPiotr Zegar llvm::is_contained(DC1->PrimaryContexts, DC0->PrimaryContext); 1122a84c635SPiotr Zegar } 1132a84c635SPiotr Zegar 1142a84c635SPiotr Zegar static bool mayShadow(const NamedDecl *ND0, 1152a84c635SPiotr Zegar const ConfusableIdentifierCheck::ContextInfo *DC0, 1162a84c635SPiotr Zegar const NamedDecl *ND1, 1172a84c635SPiotr Zegar const ConfusableIdentifierCheck::ContextInfo *DC1) { 1188fdedcd1SPiotr Zegar 1198fdedcd1SPiotr Zegar if (!DC0->Bases.empty() && !DC1->Bases.empty()) { 1208fdedcd1SPiotr Zegar // if any of the declaration is a non-private member of the other 1218fdedcd1SPiotr Zegar // declaration, it's shadowed by the former 1228fdedcd1SPiotr Zegar 1238fdedcd1SPiotr Zegar if (ND1->getAccess() != AS_private && isMemberOf(DC1, DC0)) 1242a84c635SPiotr Zegar return true; 1252a84c635SPiotr Zegar 1268fdedcd1SPiotr Zegar if (ND0->getAccess() != AS_private && isMemberOf(DC0, DC1)) 1278fdedcd1SPiotr Zegar return true; 1288fdedcd1SPiotr Zegar } 1298fdedcd1SPiotr Zegar 1308fdedcd1SPiotr Zegar if (!mayShadowImpl(DC0->NonTransparentContext, DC1->NonTransparentContext) && 1318fdedcd1SPiotr Zegar !mayShadowImpl(ND0, ND1)) 1328fdedcd1SPiotr Zegar return false; 1338fdedcd1SPiotr Zegar 1348fdedcd1SPiotr Zegar return enclosesContext(DC0, DC1); 1352a84c635SPiotr Zegar } 1362a84c635SPiotr Zegar 1372a84c635SPiotr Zegar const ConfusableIdentifierCheck::ContextInfo * 1382a84c635SPiotr Zegar ConfusableIdentifierCheck::getContextInfo(const DeclContext *DC) { 1392a84c635SPiotr Zegar const DeclContext *PrimaryContext = DC->getPrimaryContext(); 140*571354e2SKazu Hirata auto [It, Inserted] = ContextInfos.try_emplace(PrimaryContext); 141*571354e2SKazu Hirata if (!Inserted) 1422a84c635SPiotr Zegar return &It->second; 1432a84c635SPiotr Zegar 144*571354e2SKazu Hirata ContextInfo &Info = It->second; 1452a84c635SPiotr Zegar Info.PrimaryContext = PrimaryContext; 1462a84c635SPiotr Zegar Info.NonTransparentContext = PrimaryContext; 1472a84c635SPiotr Zegar 1482a84c635SPiotr Zegar while (Info.NonTransparentContext->isTransparentContext()) { 1492a84c635SPiotr Zegar Info.NonTransparentContext = Info.NonTransparentContext->getParent(); 1502a84c635SPiotr Zegar if (!Info.NonTransparentContext) 1512a84c635SPiotr Zegar break; 1522a84c635SPiotr Zegar } 1532a84c635SPiotr Zegar 1542a84c635SPiotr Zegar if (Info.NonTransparentContext) 1552a84c635SPiotr Zegar Info.NonTransparentContext = 1562a84c635SPiotr Zegar Info.NonTransparentContext->getPrimaryContext(); 1572a84c635SPiotr Zegar 1582a84c635SPiotr Zegar while (DC) { 1592a84c635SPiotr Zegar if (!isa<LinkageSpecDecl>(DC) && !isa<ExportDecl>(DC)) 1602a84c635SPiotr Zegar Info.PrimaryContexts.push_back(DC->getPrimaryContext()); 1612a84c635SPiotr Zegar DC = DC->getParent(); 1622a84c635SPiotr Zegar } 1632a84c635SPiotr Zegar 164fc2a9ad1SPiotr Zegar if (const auto *RD = dyn_cast<CXXRecordDecl>(PrimaryContext)) { 1652a84c635SPiotr Zegar RD = RD->getDefinition(); 1662a84c635SPiotr Zegar if (RD) { 1672a84c635SPiotr Zegar Info.Bases.push_back(RD); 1682a84c635SPiotr Zegar RD->forallBases([&](const CXXRecordDecl *Base) { 1692a84c635SPiotr Zegar Info.Bases.push_back(Base); 1707a550212Sserge-sans-paille return false; 1712a84c635SPiotr Zegar }); 1722a84c635SPiotr Zegar } 1737a550212Sserge-sans-paille } 1747a550212Sserge-sans-paille 1752a84c635SPiotr Zegar return &Info; 1767a550212Sserge-sans-paille } 1777a550212Sserge-sans-paille 178c3574ef7Sserge-sans-paille void ConfusableIdentifierCheck::check( 179c3574ef7Sserge-sans-paille const ast_matchers::MatchFinder::MatchResult &Result) { 1808fdedcd1SPiotr Zegar const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl"); 1818fdedcd1SPiotr Zegar if (!ND) 1828fdedcd1SPiotr Zegar return; 1838fdedcd1SPiotr Zegar 1848fdedcd1SPiotr Zegar IdentifierInfo *NDII = ND->getIdentifier(); 1858fdedcd1SPiotr Zegar if (!NDII) 1868fdedcd1SPiotr Zegar return; 1878fdedcd1SPiotr Zegar 1887a550212Sserge-sans-paille StringRef NDName = NDII->getName(); 1898fdedcd1SPiotr Zegar if (NDName.empty()) 1908fdedcd1SPiotr Zegar return; 1918fdedcd1SPiotr Zegar 1928fdedcd1SPiotr Zegar const ContextInfo *Info = getContextInfo(ND->getDeclContext()); 1938fdedcd1SPiotr Zegar 1942a84c635SPiotr Zegar llvm::SmallVector<Entry> &Mapped = Mapper[skeleton(NDName)]; 1952a84c635SPiotr Zegar for (const Entry &E : Mapped) { 1968fdedcd1SPiotr Zegar if (!mayShadow(ND, Info, E.Declaration, E.Info)) 1978fdedcd1SPiotr Zegar continue; 1988fdedcd1SPiotr Zegar 1992a84c635SPiotr Zegar const IdentifierInfo *ONDII = E.Declaration->getIdentifier(); 2007a550212Sserge-sans-paille StringRef ONDName = ONDII->getName(); 2018fdedcd1SPiotr Zegar if (ONDName == NDName) 2028fdedcd1SPiotr Zegar continue; 2038fdedcd1SPiotr Zegar 2048fdedcd1SPiotr Zegar diag(ND->getLocation(), "%0 is confusable with %1") << ND << E.Declaration; 2052a84c635SPiotr Zegar diag(E.Declaration->getLocation(), "other declaration found here", 206c3574ef7Sserge-sans-paille DiagnosticIDs::Note); 207c3574ef7Sserge-sans-paille } 2088fdedcd1SPiotr Zegar 2092a84c635SPiotr Zegar Mapped.push_back({ND, Info}); 210c3574ef7Sserge-sans-paille } 2118fdedcd1SPiotr Zegar 2128fdedcd1SPiotr Zegar void ConfusableIdentifierCheck::onEndOfTranslationUnit() { 2138fdedcd1SPiotr Zegar Mapper.clear(); 2148fdedcd1SPiotr Zegar ContextInfos.clear(); 215c3574ef7Sserge-sans-paille } 216c3574ef7Sserge-sans-paille 217c3574ef7Sserge-sans-paille void ConfusableIdentifierCheck::registerMatchers( 218c3574ef7Sserge-sans-paille ast_matchers::MatchFinder *Finder) { 219c3574ef7Sserge-sans-paille Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this); 220c3574ef7Sserge-sans-paille } 221c3574ef7Sserge-sans-paille 2227d2ea6c4SCarlos Galvez } // namespace clang::tidy::misc 223