135cca45bSserge-sans-paille //===--- MisleadingBidirectional.cpp - clang-tidy -------------------------===//
235cca45bSserge-sans-paille //
335cca45bSserge-sans-paille // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
435cca45bSserge-sans-paille // See https://llvm.org/LICENSE.txt for license information.
535cca45bSserge-sans-paille // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
635cca45bSserge-sans-paille //
735cca45bSserge-sans-paille //===----------------------------------------------------------------------===//
835cca45bSserge-sans-paille
935cca45bSserge-sans-paille #include "MisleadingBidirectional.h"
1035cca45bSserge-sans-paille
1135cca45bSserge-sans-paille #include "clang/Frontend/CompilerInstance.h"
1235cca45bSserge-sans-paille #include "clang/Lex/Preprocessor.h"
1335cca45bSserge-sans-paille #include "llvm/Support/ConvertUTF.h"
14c589730aSKrzysztof Parzyszek #include <optional>
1535cca45bSserge-sans-paille
1635cca45bSserge-sans-paille using namespace clang;
1735cca45bSserge-sans-paille using namespace clang::tidy::misc;
1835cca45bSserge-sans-paille
containsMisleadingBidi(StringRef Buffer,bool HonorLineBreaks=true)1935cca45bSserge-sans-paille static bool containsMisleadingBidi(StringRef Buffer,
2035cca45bSserge-sans-paille bool HonorLineBreaks = true) {
2135cca45bSserge-sans-paille const char *CurPtr = Buffer.begin();
2235cca45bSserge-sans-paille
2335cca45bSserge-sans-paille enum BidiChar {
2435cca45bSserge-sans-paille PS = 0x2029,
2535cca45bSserge-sans-paille RLO = 0x202E,
2635cca45bSserge-sans-paille RLE = 0x202B,
2735cca45bSserge-sans-paille LRO = 0x202D,
2835cca45bSserge-sans-paille LRE = 0x202A,
2935cca45bSserge-sans-paille PDF = 0x202C,
3035cca45bSserge-sans-paille RLI = 0x2067,
3135cca45bSserge-sans-paille LRI = 0x2066,
3235cca45bSserge-sans-paille FSI = 0x2068,
3335cca45bSserge-sans-paille PDI = 0x2069
3435cca45bSserge-sans-paille };
3535cca45bSserge-sans-paille
3635cca45bSserge-sans-paille SmallVector<BidiChar> BidiContexts;
3735cca45bSserge-sans-paille
3835cca45bSserge-sans-paille // Scan each character while maintaining a stack of opened bidi context.
3935cca45bSserge-sans-paille // RLO/RLE/LRO/LRE all are closed by PDF while RLI LRI and FSI are closed by
4035cca45bSserge-sans-paille // PDI. New lines reset the context count. Extra PDF / PDI are ignored.
4135cca45bSserge-sans-paille //
4235cca45bSserge-sans-paille // Warn if we end up with an unclosed context.
4335cca45bSserge-sans-paille while (CurPtr < Buffer.end()) {
4435cca45bSserge-sans-paille unsigned char C = *CurPtr;
4535cca45bSserge-sans-paille if (isASCII(C)) {
4635cca45bSserge-sans-paille ++CurPtr;
4735cca45bSserge-sans-paille bool IsParagrapSep =
4835cca45bSserge-sans-paille (C == 0xA || C == 0xD || (0x1C <= C && C <= 0x1E) || C == 0x85);
4935cca45bSserge-sans-paille bool IsSegmentSep = (C == 0x9 || C == 0xB || C == 0x1F);
5035cca45bSserge-sans-paille if (IsParagrapSep || IsSegmentSep)
5135cca45bSserge-sans-paille BidiContexts.clear();
5235cca45bSserge-sans-paille continue;
5335cca45bSserge-sans-paille }
54*cbdc3e1bSPiotr Zegar llvm::UTF32 CodePoint = 0;
5535cca45bSserge-sans-paille llvm::ConversionResult Result = llvm::convertUTF8Sequence(
5635cca45bSserge-sans-paille (const llvm::UTF8 **)&CurPtr, (const llvm::UTF8 *)Buffer.end(),
5735cca45bSserge-sans-paille &CodePoint, llvm::strictConversion);
5835cca45bSserge-sans-paille
5935cca45bSserge-sans-paille // If conversion fails, utf-8 is designed so that we can just try next char.
6035cca45bSserge-sans-paille if (Result != llvm::conversionOK) {
6135cca45bSserge-sans-paille ++CurPtr;
6235cca45bSserge-sans-paille continue;
6335cca45bSserge-sans-paille }
6435cca45bSserge-sans-paille
6535cca45bSserge-sans-paille // Open a PDF context.
6635cca45bSserge-sans-paille if (CodePoint == RLO || CodePoint == RLE || CodePoint == LRO ||
6735cca45bSserge-sans-paille CodePoint == LRE)
6835cca45bSserge-sans-paille BidiContexts.push_back(PDF);
6935cca45bSserge-sans-paille // Close PDF Context.
7035cca45bSserge-sans-paille else if (CodePoint == PDF) {
7135cca45bSserge-sans-paille if (!BidiContexts.empty() && BidiContexts.back() == PDF)
7235cca45bSserge-sans-paille BidiContexts.pop_back();
7335cca45bSserge-sans-paille }
7435cca45bSserge-sans-paille // Open a PDI Context.
7535cca45bSserge-sans-paille else if (CodePoint == RLI || CodePoint == LRI || CodePoint == FSI)
7635cca45bSserge-sans-paille BidiContexts.push_back(PDI);
7735cca45bSserge-sans-paille // Close a PDI Context.
7835cca45bSserge-sans-paille else if (CodePoint == PDI) {
79e125e6c4SKazu Hirata auto R = llvm::find(llvm::reverse(BidiContexts), PDI);
8035cca45bSserge-sans-paille if (R != BidiContexts.rend())
8135cca45bSserge-sans-paille BidiContexts.resize(BidiContexts.rend() - R - 1);
8235cca45bSserge-sans-paille }
8335cca45bSserge-sans-paille // Line break or equivalent
8435cca45bSserge-sans-paille else if (CodePoint == PS)
8535cca45bSserge-sans-paille BidiContexts.clear();
8635cca45bSserge-sans-paille }
8735cca45bSserge-sans-paille return !BidiContexts.empty();
8835cca45bSserge-sans-paille }
8935cca45bSserge-sans-paille
9035cca45bSserge-sans-paille class MisleadingBidirectionalCheck::MisleadingBidirectionalHandler
9135cca45bSserge-sans-paille : public CommentHandler {
9235cca45bSserge-sans-paille public:
MisleadingBidirectionalHandler(MisleadingBidirectionalCheck & Check)93c0d0b123SCarlos Galvez MisleadingBidirectionalHandler(MisleadingBidirectionalCheck &Check)
9435cca45bSserge-sans-paille : Check(Check) {}
9535cca45bSserge-sans-paille
HandleComment(Preprocessor & PP,SourceRange Range)9635cca45bSserge-sans-paille bool HandleComment(Preprocessor &PP, SourceRange Range) override {
9735cca45bSserge-sans-paille // FIXME: check that we are in a /* */ comment
9835cca45bSserge-sans-paille StringRef Text =
9935cca45bSserge-sans-paille Lexer::getSourceText(CharSourceRange::getCharRange(Range),
10035cca45bSserge-sans-paille PP.getSourceManager(), PP.getLangOpts());
10135cca45bSserge-sans-paille
10235cca45bSserge-sans-paille if (containsMisleadingBidi(Text, true))
10335cca45bSserge-sans-paille Check.diag(
10435cca45bSserge-sans-paille Range.getBegin(),
10535cca45bSserge-sans-paille "comment contains misleading bidirectional Unicode characters");
10635cca45bSserge-sans-paille return false;
10735cca45bSserge-sans-paille }
10835cca45bSserge-sans-paille
10935cca45bSserge-sans-paille private:
11035cca45bSserge-sans-paille MisleadingBidirectionalCheck &Check;
11135cca45bSserge-sans-paille };
11235cca45bSserge-sans-paille
MisleadingBidirectionalCheck(StringRef Name,ClangTidyContext * Context)11335cca45bSserge-sans-paille MisleadingBidirectionalCheck::MisleadingBidirectionalCheck(
11435cca45bSserge-sans-paille StringRef Name, ClangTidyContext *Context)
11535cca45bSserge-sans-paille : ClangTidyCheck(Name, Context),
116c0d0b123SCarlos Galvez Handler(std::make_unique<MisleadingBidirectionalHandler>(*this)) {}
11735cca45bSserge-sans-paille
11835cca45bSserge-sans-paille MisleadingBidirectionalCheck::~MisleadingBidirectionalCheck() = default;
11935cca45bSserge-sans-paille
registerPPCallbacks(const SourceManager & SM,Preprocessor * PP,Preprocessor * ModuleExpanderPP)12035cca45bSserge-sans-paille void MisleadingBidirectionalCheck::registerPPCallbacks(
12135cca45bSserge-sans-paille const SourceManager &SM, Preprocessor *PP, Preprocessor *ModuleExpanderPP) {
12235cca45bSserge-sans-paille PP->addCommentHandler(Handler.get());
12335cca45bSserge-sans-paille }
12435cca45bSserge-sans-paille
check(const ast_matchers::MatchFinder::MatchResult & Result)12535cca45bSserge-sans-paille void MisleadingBidirectionalCheck::check(
12635cca45bSserge-sans-paille const ast_matchers::MatchFinder::MatchResult &Result) {
12735cca45bSserge-sans-paille if (const auto *SL = Result.Nodes.getNodeAs<StringLiteral>("strlit")) {
12835cca45bSserge-sans-paille StringRef Literal = SL->getBytes();
12935cca45bSserge-sans-paille if (containsMisleadingBidi(Literal, false))
13035cca45bSserge-sans-paille diag(SL->getBeginLoc(), "string literal contains misleading "
13135cca45bSserge-sans-paille "bidirectional Unicode characters");
13235cca45bSserge-sans-paille }
13335cca45bSserge-sans-paille }
13435cca45bSserge-sans-paille
registerMatchers(ast_matchers::MatchFinder * Finder)13535cca45bSserge-sans-paille void MisleadingBidirectionalCheck::registerMatchers(
13635cca45bSserge-sans-paille ast_matchers::MatchFinder *Finder) {
13735cca45bSserge-sans-paille Finder->addMatcher(ast_matchers::stringLiteral().bind("strlit"), this);
13835cca45bSserge-sans-paille }
139