xref: /llvm-project/clang-tools-extra/clang-tidy/misc/ConfusableIdentifierCheck.cpp (revision 571354e25130b213146c26d05524fcd215fbd061)
1 //===--- ConfusableIdentifierCheck.cpp -
2 // clang-tidy--------------------------===//
3 //
4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
5 // See https://llvm.org/LICENSE.txt for license information.
6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "ConfusableIdentifierCheck.h"
11 
12 #include "clang/Frontend/CompilerInstance.h"
13 #include "clang/Lex/Preprocessor.h"
14 #include "llvm/ADT/SmallString.h"
15 #include "llvm/Support/ConvertUTF.h"
16 
17 namespace {
18 // Preprocessed version of
19 // https://www.unicode.org/Public/security/latest/confusables.txt
20 //
21 // This contains a sorted array of { UTF32 codepoint; UTF32 values[N];}
22 #include "Confusables.inc"
23 } // namespace
24 
25 namespace clang::tidy::misc {
26 
27 ConfusableIdentifierCheck::ConfusableIdentifierCheck(StringRef Name,
28                                                      ClangTidyContext *Context)
29     : ClangTidyCheck(Name, Context) {}
30 
31 ConfusableIdentifierCheck::~ConfusableIdentifierCheck() = default;
32 
33 // Build a skeleton out of the Original identifier, inspired by the algorithm
34 // described in http://www.unicode.org/reports/tr39/#def-skeleton
35 //
36 // FIXME: TR39 mandates:
37 //
38 // For an input string X, define skeleton(X) to be the following transformation
39 // on the string:
40 //
41 // 1. Convert X to NFD format, as described in [UAX15].
42 // 2. Concatenate the prototypes for each character in X according to the
43 // specified data, producing a string of exemplar characters.
44 // 3. Reapply NFD.
45 //
46 // We're skipping 1. and 3. for the sake of simplicity, but this can lead to
47 // false positive.
48 
49 static llvm::SmallString<64U> skeleton(StringRef Name) {
50   using namespace llvm;
51   SmallString<64U> Skeleton;
52   Skeleton.reserve(1U + Name.size());
53 
54   const char *Curr = Name.data();
55   const char *End = Curr + Name.size();
56   while (Curr < End) {
57 
58     const char *Prev = Curr;
59     UTF32 CodePoint = 0;
60     ConversionResult Result = convertUTF8Sequence(
61         reinterpret_cast<const UTF8 **>(&Curr),
62         reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion);
63     if (Result != conversionOK) {
64       errs() << "Unicode conversion issue\n";
65       break;
66     }
67 
68     StringRef Key(Prev, Curr - Prev);
69     auto Where = llvm::lower_bound(ConfusableEntries, CodePoint,
70                                    [](decltype(ConfusableEntries[0]) x,
71                                       UTF32 y) { return x.codepoint < y; });
72     if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) {
73       Skeleton.append(Prev, Curr);
74     } else {
75       UTF8 Buffer[32];
76       UTF8 *BufferStart = std::begin(Buffer);
77       UTF8 *IBuffer = BufferStart;
78       const UTF32 *ValuesStart = std::begin(Where->values);
79       const UTF32 *ValuesEnd = llvm::find(Where->values, '\0');
80       if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer,
81                              std::end(Buffer),
82                              strictConversion) != conversionOK) {
83         errs() << "Unicode conversion issue\n";
84         break;
85       }
86       Skeleton.append((char *)BufferStart, (char *)IBuffer);
87     }
88   }
89   return Skeleton;
90 }
91 
92 static bool mayShadowImpl(const DeclContext *DC0, const DeclContext *DC1) {
93   return DC0 && DC0 == DC1;
94 }
95 
96 static bool mayShadowImpl(const NamedDecl *ND0, const NamedDecl *ND1) {
97   return isa<TemplateTypeParmDecl>(ND0) || isa<TemplateTypeParmDecl>(ND1);
98 }
99 
100 static bool isMemberOf(const ConfusableIdentifierCheck::ContextInfo *DC0,
101                        const ConfusableIdentifierCheck::ContextInfo *DC1) {
102   return llvm::is_contained(DC1->Bases, DC0->PrimaryContext);
103 }
104 
105 static bool enclosesContext(const ConfusableIdentifierCheck::ContextInfo *DC0,
106                             const ConfusableIdentifierCheck::ContextInfo *DC1) {
107   if (DC0->PrimaryContext == DC1->PrimaryContext)
108     return true;
109 
110   return llvm::is_contained(DC0->PrimaryContexts, DC1->PrimaryContext) ||
111          llvm::is_contained(DC1->PrimaryContexts, DC0->PrimaryContext);
112 }
113 
114 static bool mayShadow(const NamedDecl *ND0,
115                       const ConfusableIdentifierCheck::ContextInfo *DC0,
116                       const NamedDecl *ND1,
117                       const ConfusableIdentifierCheck::ContextInfo *DC1) {
118 
119   if (!DC0->Bases.empty() && !DC1->Bases.empty()) {
120     // if any of the declaration is a non-private member of the other
121     // declaration, it's shadowed by the former
122 
123     if (ND1->getAccess() != AS_private && isMemberOf(DC1, DC0))
124       return true;
125 
126     if (ND0->getAccess() != AS_private && isMemberOf(DC0, DC1))
127       return true;
128   }
129 
130   if (!mayShadowImpl(DC0->NonTransparentContext, DC1->NonTransparentContext) &&
131       !mayShadowImpl(ND0, ND1))
132     return false;
133 
134   return enclosesContext(DC0, DC1);
135 }
136 
137 const ConfusableIdentifierCheck::ContextInfo *
138 ConfusableIdentifierCheck::getContextInfo(const DeclContext *DC) {
139   const DeclContext *PrimaryContext = DC->getPrimaryContext();
140   auto [It, Inserted] = ContextInfos.try_emplace(PrimaryContext);
141   if (!Inserted)
142     return &It->second;
143 
144   ContextInfo &Info = It->second;
145   Info.PrimaryContext = PrimaryContext;
146   Info.NonTransparentContext = PrimaryContext;
147 
148   while (Info.NonTransparentContext->isTransparentContext()) {
149     Info.NonTransparentContext = Info.NonTransparentContext->getParent();
150     if (!Info.NonTransparentContext)
151       break;
152   }
153 
154   if (Info.NonTransparentContext)
155     Info.NonTransparentContext =
156         Info.NonTransparentContext->getPrimaryContext();
157 
158   while (DC) {
159     if (!isa<LinkageSpecDecl>(DC) && !isa<ExportDecl>(DC))
160       Info.PrimaryContexts.push_back(DC->getPrimaryContext());
161     DC = DC->getParent();
162   }
163 
164   if (const auto *RD = dyn_cast<CXXRecordDecl>(PrimaryContext)) {
165     RD = RD->getDefinition();
166     if (RD) {
167       Info.Bases.push_back(RD);
168       RD->forallBases([&](const CXXRecordDecl *Base) {
169         Info.Bases.push_back(Base);
170         return false;
171       });
172     }
173   }
174 
175   return &Info;
176 }
177 
178 void ConfusableIdentifierCheck::check(
179     const ast_matchers::MatchFinder::MatchResult &Result) {
180   const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl");
181   if (!ND)
182     return;
183 
184   IdentifierInfo *NDII = ND->getIdentifier();
185   if (!NDII)
186     return;
187 
188   StringRef NDName = NDII->getName();
189   if (NDName.empty())
190     return;
191 
192   const ContextInfo *Info = getContextInfo(ND->getDeclContext());
193 
194   llvm::SmallVector<Entry> &Mapped = Mapper[skeleton(NDName)];
195   for (const Entry &E : Mapped) {
196     if (!mayShadow(ND, Info, E.Declaration, E.Info))
197       continue;
198 
199     const IdentifierInfo *ONDII = E.Declaration->getIdentifier();
200     StringRef ONDName = ONDII->getName();
201     if (ONDName == NDName)
202       continue;
203 
204     diag(ND->getLocation(), "%0 is confusable with %1") << ND << E.Declaration;
205     diag(E.Declaration->getLocation(), "other declaration found here",
206          DiagnosticIDs::Note);
207   }
208 
209   Mapped.push_back({ND, Info});
210 }
211 
212 void ConfusableIdentifierCheck::onEndOfTranslationUnit() {
213   Mapper.clear();
214   ContextInfos.clear();
215 }
216 
217 void ConfusableIdentifierCheck::registerMatchers(
218     ast_matchers::MatchFinder *Finder) {
219   Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this);
220 }
221 
222 } // namespace clang::tidy::misc
223