1 //===--- ConfusableIdentifierCheck.cpp - 2 // clang-tidy--------------------------===// 3 // 4 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 5 // See https://llvm.org/LICENSE.txt for license information. 6 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 // 8 //===----------------------------------------------------------------------===// 9 10 #include "ConfusableIdentifierCheck.h" 11 12 #include "clang/Frontend/CompilerInstance.h" 13 #include "clang/Lex/Preprocessor.h" 14 #include "llvm/ADT/SmallString.h" 15 #include "llvm/Support/ConvertUTF.h" 16 17 namespace { 18 // Preprocessed version of 19 // https://www.unicode.org/Public/security/latest/confusables.txt 20 // 21 // This contains a sorted array of { UTF32 codepoint; UTF32 values[N];} 22 #include "Confusables.inc" 23 } // namespace 24 25 namespace clang::tidy::misc { 26 27 ConfusableIdentifierCheck::ConfusableIdentifierCheck(StringRef Name, 28 ClangTidyContext *Context) 29 : ClangTidyCheck(Name, Context) {} 30 31 ConfusableIdentifierCheck::~ConfusableIdentifierCheck() = default; 32 33 // Build a skeleton out of the Original identifier, inspired by the algorithm 34 // described in http://www.unicode.org/reports/tr39/#def-skeleton 35 // 36 // FIXME: TR39 mandates: 37 // 38 // For an input string X, define skeleton(X) to be the following transformation 39 // on the string: 40 // 41 // 1. Convert X to NFD format, as described in [UAX15]. 42 // 2. Concatenate the prototypes for each character in X according to the 43 // specified data, producing a string of exemplar characters. 44 // 3. Reapply NFD. 45 // 46 // We're skipping 1. and 3. for the sake of simplicity, but this can lead to 47 // false positive. 48 49 static llvm::SmallString<64U> skeleton(StringRef Name) { 50 using namespace llvm; 51 SmallString<64U> Skeleton; 52 Skeleton.reserve(1U + Name.size()); 53 54 const char *Curr = Name.data(); 55 const char *End = Curr + Name.size(); 56 while (Curr < End) { 57 58 const char *Prev = Curr; 59 UTF32 CodePoint = 0; 60 ConversionResult Result = convertUTF8Sequence( 61 reinterpret_cast<const UTF8 **>(&Curr), 62 reinterpret_cast<const UTF8 *>(End), &CodePoint, strictConversion); 63 if (Result != conversionOK) { 64 errs() << "Unicode conversion issue\n"; 65 break; 66 } 67 68 StringRef Key(Prev, Curr - Prev); 69 auto Where = llvm::lower_bound(ConfusableEntries, CodePoint, 70 [](decltype(ConfusableEntries[0]) x, 71 UTF32 y) { return x.codepoint < y; }); 72 if (Where == std::end(ConfusableEntries) || CodePoint != Where->codepoint) { 73 Skeleton.append(Prev, Curr); 74 } else { 75 UTF8 Buffer[32]; 76 UTF8 *BufferStart = std::begin(Buffer); 77 UTF8 *IBuffer = BufferStart; 78 const UTF32 *ValuesStart = std::begin(Where->values); 79 const UTF32 *ValuesEnd = llvm::find(Where->values, '\0'); 80 if (ConvertUTF32toUTF8(&ValuesStart, ValuesEnd, &IBuffer, 81 std::end(Buffer), 82 strictConversion) != conversionOK) { 83 errs() << "Unicode conversion issue\n"; 84 break; 85 } 86 Skeleton.append((char *)BufferStart, (char *)IBuffer); 87 } 88 } 89 return Skeleton; 90 } 91 92 static bool mayShadowImpl(const DeclContext *DC0, const DeclContext *DC1) { 93 return DC0 && DC0 == DC1; 94 } 95 96 static bool mayShadowImpl(const NamedDecl *ND0, const NamedDecl *ND1) { 97 return isa<TemplateTypeParmDecl>(ND0) || isa<TemplateTypeParmDecl>(ND1); 98 } 99 100 static bool isMemberOf(const ConfusableIdentifierCheck::ContextInfo *DC0, 101 const ConfusableIdentifierCheck::ContextInfo *DC1) { 102 return llvm::is_contained(DC1->Bases, DC0->PrimaryContext); 103 } 104 105 static bool enclosesContext(const ConfusableIdentifierCheck::ContextInfo *DC0, 106 const ConfusableIdentifierCheck::ContextInfo *DC1) { 107 if (DC0->PrimaryContext == DC1->PrimaryContext) 108 return true; 109 110 return llvm::is_contained(DC0->PrimaryContexts, DC1->PrimaryContext) || 111 llvm::is_contained(DC1->PrimaryContexts, DC0->PrimaryContext); 112 } 113 114 static bool mayShadow(const NamedDecl *ND0, 115 const ConfusableIdentifierCheck::ContextInfo *DC0, 116 const NamedDecl *ND1, 117 const ConfusableIdentifierCheck::ContextInfo *DC1) { 118 119 if (!DC0->Bases.empty() && !DC1->Bases.empty()) { 120 // if any of the declaration is a non-private member of the other 121 // declaration, it's shadowed by the former 122 123 if (ND1->getAccess() != AS_private && isMemberOf(DC1, DC0)) 124 return true; 125 126 if (ND0->getAccess() != AS_private && isMemberOf(DC0, DC1)) 127 return true; 128 } 129 130 if (!mayShadowImpl(DC0->NonTransparentContext, DC1->NonTransparentContext) && 131 !mayShadowImpl(ND0, ND1)) 132 return false; 133 134 return enclosesContext(DC0, DC1); 135 } 136 137 const ConfusableIdentifierCheck::ContextInfo * 138 ConfusableIdentifierCheck::getContextInfo(const DeclContext *DC) { 139 const DeclContext *PrimaryContext = DC->getPrimaryContext(); 140 auto [It, Inserted] = ContextInfos.try_emplace(PrimaryContext); 141 if (!Inserted) 142 return &It->second; 143 144 ContextInfo &Info = It->second; 145 Info.PrimaryContext = PrimaryContext; 146 Info.NonTransparentContext = PrimaryContext; 147 148 while (Info.NonTransparentContext->isTransparentContext()) { 149 Info.NonTransparentContext = Info.NonTransparentContext->getParent(); 150 if (!Info.NonTransparentContext) 151 break; 152 } 153 154 if (Info.NonTransparentContext) 155 Info.NonTransparentContext = 156 Info.NonTransparentContext->getPrimaryContext(); 157 158 while (DC) { 159 if (!isa<LinkageSpecDecl>(DC) && !isa<ExportDecl>(DC)) 160 Info.PrimaryContexts.push_back(DC->getPrimaryContext()); 161 DC = DC->getParent(); 162 } 163 164 if (const auto *RD = dyn_cast<CXXRecordDecl>(PrimaryContext)) { 165 RD = RD->getDefinition(); 166 if (RD) { 167 Info.Bases.push_back(RD); 168 RD->forallBases([&](const CXXRecordDecl *Base) { 169 Info.Bases.push_back(Base); 170 return false; 171 }); 172 } 173 } 174 175 return &Info; 176 } 177 178 void ConfusableIdentifierCheck::check( 179 const ast_matchers::MatchFinder::MatchResult &Result) { 180 const auto *ND = Result.Nodes.getNodeAs<NamedDecl>("nameddecl"); 181 if (!ND) 182 return; 183 184 IdentifierInfo *NDII = ND->getIdentifier(); 185 if (!NDII) 186 return; 187 188 StringRef NDName = NDII->getName(); 189 if (NDName.empty()) 190 return; 191 192 const ContextInfo *Info = getContextInfo(ND->getDeclContext()); 193 194 llvm::SmallVector<Entry> &Mapped = Mapper[skeleton(NDName)]; 195 for (const Entry &E : Mapped) { 196 if (!mayShadow(ND, Info, E.Declaration, E.Info)) 197 continue; 198 199 const IdentifierInfo *ONDII = E.Declaration->getIdentifier(); 200 StringRef ONDName = ONDII->getName(); 201 if (ONDName == NDName) 202 continue; 203 204 diag(ND->getLocation(), "%0 is confusable with %1") << ND << E.Declaration; 205 diag(E.Declaration->getLocation(), "other declaration found here", 206 DiagnosticIDs::Note); 207 } 208 209 Mapped.push_back({ND, Info}); 210 } 211 212 void ConfusableIdentifierCheck::onEndOfTranslationUnit() { 213 Mapper.clear(); 214 ContextInfos.clear(); 215 } 216 217 void ConfusableIdentifierCheck::registerMatchers( 218 ast_matchers::MatchFinder *Finder) { 219 Finder->addMatcher(ast_matchers::namedDecl().bind("nameddecl"), this); 220 } 221 222 } // namespace clang::tidy::misc 223