xref: /llvm-project/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp (revision 571354e25130b213146c26d05524fcd215fbd061)
1c3574ef7Sserge-sans-paille //===--- ConfusableIdentifierCheck.cpp -
2c3574ef7Sserge-sans-paille // clang-tidy--------------------------===//
3c3574ef7Sserge-sans-paille //
4c3574ef7Sserge-sans-paille // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5c3574ef7Sserge-sans-paille // See https://llvm.org/LICENSE.txt for license information.
6c3574ef7Sserge-sans-paille // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7c3574ef7Sserge-sans-paille //
8c3574ef7Sserge-sans-paille //===----------------------------------------------------------------------===//
9c3574ef7Sserge-sans-paille 
10c3574ef7Sserge-sans-paille #include "ConfusableIdentifierCheck.h"
11c3574ef7Sserge-sans-paille 
12c3574ef7Sserge-sans-paille #include "clang/Frontend/CompilerInstance.h"
13c3574ef7Sserge-sans-paille #include "clang/Lex/Preprocessor.h"
148fdedcd1SPiotr Zegar #include "llvm/ADT/SmallString.h"
15c3574ef7Sserge-sans-paille #include "llvm/Support/ConvertUTF.h"
16c3574ef7Sserge-sans-paille 
17c3574ef7Sserge-sans-paille namespace {
18c3574ef7Sserge-sans-paille // Preprocessed version of
19c3574ef7Sserge-sans-paille // https://www.unicode.org/Public/security/latest/confusables.txt
20c3574ef7Sserge-sans-paille //
21c3574ef7Sserge-sans-paille // This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
22c3574ef7Sserge-sans-paille #include "Confusables.inc"
23c3574ef7Sserge-sans-paille } // namespace
24c3574ef7Sserge-sans-paille 
257d2ea6c4SCarlos Galvez namespace clang::tidy::misc {
26c3574ef7Sserge-sans-paille 
27c3574ef7Sserge-sans-paille ConfusableIdentifierCheck::ConfusableIdentifierCheck(StringRef Name,
28c3574ef7Sserge-sans-paille                                                      ClangTidyContext *Context)
29c3574ef7Sserge-sans-paille     : ClangTidyCheck(Name, Context) {}
30c3574ef7Sserge-sans-paille 
31c3574ef7Sserge-sans-paille ConfusableIdentifierCheck::~ConfusableIdentifierCheck() = default;
32c3574ef7Sserge-sans-paille 
33c3574ef7Sserge-sans-paille // Build a skeleton out of the Original identifier, inspired by the algorithm
34c3574ef7Sserge-sans-paille // described in http://www.unicode.org/reports/tr39/#def-skeleton
35c3574ef7Sserge-sans-paille //
36c3574ef7Sserge-sans-paille // FIXME: TR39 mandates:
37c3574ef7Sserge-sans-paille //
38c3574ef7Sserge-sans-paille // For an input string X, define skeleton(X) to be the following transformation
39c3574ef7Sserge-sans-paille // on the string:
40c3574ef7Sserge-sans-paille //
41c3574ef7Sserge-sans-paille // 1. Convert X to NFD format, as described in [UAX15].
42c3574ef7Sserge-sans-paille // 2. Concatenate the prototypes for each character in X according to the
43c3574ef7Sserge-sans-paille // specified data, producing a string of exemplar characters.
44c3574ef7Sserge-sans-paille // 3. Reapply NFD.
45c3574ef7Sserge-sans-paille //
46c3574ef7Sserge-sans-paille // We're skipping 1. and 3. for the sake of simplicity, but this can lead to
47c3574ef7Sserge-sans-paille // false positive.
48c3574ef7Sserge-sans-paille 
498fdedcd1SPiotr Zegar static llvm::SmallString<64U> skeleton(StringRef Name) {
50c3574ef7Sserge-sans-paille   using namespace llvm;
518fdedcd1SPiotr Zegar   SmallString<64U> Skeleton;
528fdedcd1SPiotr Zegar   Skeleton.reserve(1U + Name.size());
53c3574ef7Sserge-sans-paille 
548fdedcd1SPiotr Zegar   const char *Curr = Name.data();
558fdedcd1SPiotr Zegar   const char *End = Curr + Name.size();
56c3574ef7Sserge-sans-paille   while (Curr < End) {
57c3574ef7Sserge-sans-paille 
58c3574ef7Sserge-sans-paille     const char *Prev = Curr;
59cbdc3e1bSPiotr Zegar     UTF32 CodePoint = 0;
60c3574ef7Sserge-sans-paille     ConversionResult Result = convertUTF8Sequence(
61c3574ef7Sserge-sans-paille         reinterpret_cast<const UTF8 **>(&Curr),
62c3574ef7Sserge-sans-paille         reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion);
63c3574ef7Sserge-sans-paille     if (Result != conversionOK) {
64c3574ef7Sserge-sans-paille       errs() << "Unicode conversion issue\n";
65c3574ef7Sserge-sans-paille       break;
66c3574ef7Sserge-sans-paille     }
67c3574ef7Sserge-sans-paille 
68c3574ef7Sserge-sans-paille     StringRef Key(Prev, Curr - Prev);
6989f14332SKazu Hirata     auto Where = llvm::lower_bound(ConfusableEntries, CodePoint,
70c3574ef7Sserge-sans-paille                                    [](decltype(ConfusableEntries[0]) x,
71c3574ef7Sserge-sans-paille                                       UTF32 y) { return x.codepoint < y; });
72c3574ef7Sserge-sans-paille     if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
73c3574ef7Sserge-sans-paille       Skeleton.append(Prev, Curr);
74c3574ef7Sserge-sans-paille     } else {
75c3574ef7Sserge-sans-paille       UTF8 Buffer[32];
76c3574ef7Sserge-sans-paille       UTF8 *BufferStart = std::begin(Buffer);
77c3574ef7Sserge-sans-paille       UTF8 *IBuffer = BufferStart;
78c3574ef7Sserge-sans-paille       const UTF32 *ValuesStart = std::begin(Where->values);
79e125e6c4SKazu Hirata       const UTF32 *ValuesEnd = llvm::find(Where->values, '\0');
80c3574ef7Sserge-sans-paille       if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
81c3574ef7Sserge-sans-paille                              std::end(Buffer),
82c3574ef7Sserge-sans-paille                              strictConversion) != conversionOK) {
83c3574ef7Sserge-sans-paille         errs() << "Unicode conversion issue\n";
84c3574ef7Sserge-sans-paille         break;
85c3574ef7Sserge-sans-paille       }
86c3574ef7Sserge-sans-paille       Skeleton.append((char *)BufferStart, (char *)IBuffer);
87c3574ef7Sserge-sans-paille     }
88c3574ef7Sserge-sans-paille   }
89c3574ef7Sserge-sans-paille   return Skeleton;
90c3574ef7Sserge-sans-paille }
91c3574ef7Sserge-sans-paille 
922a84c635SPiotr Zegar static bool mayShadowImpl(const DeclContext *DC0, const DeclContext *DC1) {
932a84c635SPiotr Zegar   return DC0 && DC0 == DC1;
942a84c635SPiotr Zegar }
952a84c635SPiotr Zegar 
967a550212Sserge-sans-paille static bool mayShadowImpl(const NamedDecl *ND0, const NamedDecl *ND1) {
972a84c635SPiotr Zegar   return isa<TemplateTypeParmDecl>(ND0) || isa<TemplateTypeParmDecl>(ND1);
982a84c635SPiotr Zegar }
997a550212Sserge-sans-paille 
1002a84c635SPiotr Zegar static bool isMemberOf(const ConfusableIdentifierCheck::ContextInfo *DC0,
1012a84c635SPiotr Zegar                        const ConfusableIdentifierCheck::ContextInfo *DC1) {
1022a84c635SPiotr Zegar   return llvm::is_contained(DC1->Bases, DC0->PrimaryContext);
1037a550212Sserge-sans-paille }
1047a550212Sserge-sans-paille 
1052a84c635SPiotr Zegar static bool enclosesContext(const ConfusableIdentifierCheck::ContextInfo *DC0,
1062a84c635SPiotr Zegar                             const ConfusableIdentifierCheck::ContextInfo *DC1) {
1072a84c635SPiotr Zegar   if (DC0->PrimaryContext == DC1->PrimaryContext)
1082a84c635SPiotr Zegar     return true;
1092a84c635SPiotr Zegar 
1102a84c635SPiotr Zegar   return llvm::is_contained(DC0->PrimaryContexts, DC1->PrimaryContext) ||
1112a84c635SPiotr Zegar          llvm::is_contained(DC1->PrimaryContexts, DC0->PrimaryContext);
1122a84c635SPiotr Zegar }
1132a84c635SPiotr Zegar 
1142a84c635SPiotr Zegar static bool mayShadow(const NamedDecl *ND0,
1152a84c635SPiotr Zegar                       const ConfusableIdentifierCheck::ContextInfo *DC0,
1162a84c635SPiotr Zegar                       const NamedDecl *ND1,
1172a84c635SPiotr Zegar                       const ConfusableIdentifierCheck::ContextInfo *DC1) {
1188fdedcd1SPiotr Zegar 
1198fdedcd1SPiotr Zegar   if (!DC0->Bases.empty() && !DC1->Bases.empty()) {
1208fdedcd1SPiotr Zegar     // if any of the declaration is a non-private member of the other
1218fdedcd1SPiotr Zegar     // declaration, it's shadowed by the former
1228fdedcd1SPiotr Zegar 
1238fdedcd1SPiotr Zegar     if (ND1->getAccess() != AS_private && isMemberOf(DC1, DC0))
1242a84c635SPiotr Zegar       return true;
1252a84c635SPiotr Zegar 
1268fdedcd1SPiotr Zegar     if (ND0->getAccess() != AS_private && isMemberOf(DC0, DC1))
1278fdedcd1SPiotr Zegar       return true;
1288fdedcd1SPiotr Zegar   }
1298fdedcd1SPiotr Zegar 
1308fdedcd1SPiotr Zegar   if (!mayShadowImpl(DC0->NonTransparentContext, DC1->NonTransparentContext) &&
1318fdedcd1SPiotr Zegar       !mayShadowImpl(ND0, ND1))
1328fdedcd1SPiotr Zegar     return false;
1338fdedcd1SPiotr Zegar 
1348fdedcd1SPiotr Zegar   return enclosesContext(DC0, DC1);
1352a84c635SPiotr Zegar }
1362a84c635SPiotr Zegar 
1372a84c635SPiotr Zegar const ConfusableIdentifierCheck::ContextInfo *
1382a84c635SPiotr Zegar ConfusableIdentifierCheck::getContextInfo(const DeclContext *DC) {
1392a84c635SPiotr Zegar   const DeclContext *PrimaryContext = DC->getPrimaryContext();
140*571354e2SKazu Hirata   auto [It, Inserted] = ContextInfos.try_emplace(PrimaryContext);
141*571354e2SKazu Hirata   if (!Inserted)
1422a84c635SPiotr Zegar     return &It->second;
1432a84c635SPiotr Zegar 
144*571354e2SKazu Hirata   ContextInfo &Info = It->second;
1452a84c635SPiotr Zegar   Info.PrimaryContext = PrimaryContext;
1462a84c635SPiotr Zegar   Info.NonTransparentContext = PrimaryContext;
1472a84c635SPiotr Zegar 
1482a84c635SPiotr Zegar   while (Info.NonTransparentContext->isTransparentContext()) {
1492a84c635SPiotr Zegar     Info.NonTransparentContext = Info.NonTransparentContext->getParent();
1502a84c635SPiotr Zegar     if (!Info.NonTransparentContext)
1512a84c635SPiotr Zegar       break;
1522a84c635SPiotr Zegar   }
1532a84c635SPiotr Zegar 
1542a84c635SPiotr Zegar   if (Info.NonTransparentContext)
1552a84c635SPiotr Zegar     Info.NonTransparentContext =
1562a84c635SPiotr Zegar         Info.NonTransparentContext->getPrimaryContext();
1572a84c635SPiotr Zegar 
1582a84c635SPiotr Zegar   while (DC) {
1592a84c635SPiotr Zegar     if (!isa<LinkageSpecDecl>(DC) && !isa<ExportDecl>(DC))
1602a84c635SPiotr Zegar       Info.PrimaryContexts.push_back(DC->getPrimaryContext());
1612a84c635SPiotr Zegar     DC = DC->getParent();
1622a84c635SPiotr Zegar   }
1632a84c635SPiotr Zegar 
164fc2a9ad1SPiotr Zegar   if (const auto *RD = dyn_cast<CXXRecordDecl>(PrimaryContext)) {
1652a84c635SPiotr Zegar     RD = RD->getDefinition();
1662a84c635SPiotr Zegar     if (RD) {
1672a84c635SPiotr Zegar       Info.Bases.push_back(RD);
1682a84c635SPiotr Zegar       RD->forallBases([&](const CXXRecordDecl *Base) {
1692a84c635SPiotr Zegar         Info.Bases.push_back(Base);
1707a550212Sserge-sans-paille         return false;
1712a84c635SPiotr Zegar       });
1722a84c635SPiotr Zegar     }
1737a550212Sserge-sans-paille   }
1747a550212Sserge-sans-paille 
1752a84c635SPiotr Zegar   return &Info;
1767a550212Sserge-sans-paille }
1777a550212Sserge-sans-paille 
178c3574ef7Sserge-sans-paille void ConfusableIdentifierCheck::check(
179c3574ef7Sserge-sans-paille     const ast_matchers::MatchFinder::MatchResult &Result) {
1808fdedcd1SPiotr Zegar   const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl");
1818fdedcd1SPiotr Zegar   if (!ND)
1828fdedcd1SPiotr Zegar     return;
1838fdedcd1SPiotr Zegar 
1848fdedcd1SPiotr Zegar   IdentifierInfo *NDII = ND->getIdentifier();
1858fdedcd1SPiotr Zegar   if (!NDII)
1868fdedcd1SPiotr Zegar     return;
1878fdedcd1SPiotr Zegar 
1887a550212Sserge-sans-paille   StringRef NDName = NDII->getName();
1898fdedcd1SPiotr Zegar   if (NDName.empty())
1908fdedcd1SPiotr Zegar     return;
1918fdedcd1SPiotr Zegar 
1928fdedcd1SPiotr Zegar   const ContextInfo *Info = getContextInfo(ND->getDeclContext());
1938fdedcd1SPiotr Zegar 
1942a84c635SPiotr Zegar   llvm::SmallVector<Entry> &Mapped = Mapper[skeleton(NDName)];
1952a84c635SPiotr Zegar   for (const Entry &E : Mapped) {
1968fdedcd1SPiotr Zegar     if (!mayShadow(ND, Info, E.Declaration, E.Info))
1978fdedcd1SPiotr Zegar       continue;
1988fdedcd1SPiotr Zegar 
1992a84c635SPiotr Zegar     const IdentifierInfo *ONDII = E.Declaration->getIdentifier();
2007a550212Sserge-sans-paille     StringRef ONDName = ONDII->getName();
2018fdedcd1SPiotr Zegar     if (ONDName == NDName)
2028fdedcd1SPiotr Zegar       continue;
2038fdedcd1SPiotr Zegar 
2048fdedcd1SPiotr Zegar     diag(ND->getLocation(), "%0 is confusable with %1") << ND << E.Declaration;
2052a84c635SPiotr Zegar     diag(E.Declaration->getLocation(), "other declaration found here",
206c3574ef7Sserge-sans-paille          DiagnosticIDs::Note);
207c3574ef7Sserge-sans-paille   }
2088fdedcd1SPiotr Zegar 
2092a84c635SPiotr Zegar   Mapped.push_back({ND, Info});
210c3574ef7Sserge-sans-paille }
2118fdedcd1SPiotr Zegar 
2128fdedcd1SPiotr Zegar void ConfusableIdentifierCheck::onEndOfTranslationUnit() {
2138fdedcd1SPiotr Zegar   Mapper.clear();
2148fdedcd1SPiotr Zegar   ContextInfos.clear();
215c3574ef7Sserge-sans-paille }
216c3574ef7Sserge-sans-paille 
217c3574ef7Sserge-sans-paille void ConfusableIdentifierCheck::registerMatchers(
218c3574ef7Sserge-sans-paille     ast_matchers::MatchFinder *Finder) {
219c3574ef7Sserge-sans-paille   Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this);
220c3574ef7Sserge-sans-paille }
221c3574ef7Sserge-sans-paille 
2227d2ea6c4SCarlos Galvez } // namespace clang::tidy::misc
223