xref: /llvm-project/clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp (revision 7d2ea6c422d3f5712b7253407005e1a465a76946)
1 //===-- clang-tools-extra/clang-tidy/NoLintDirectiveHandler.cpp -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 ///  \file This file implements the NoLintDirectiveHandler class, which is used
10 ///  to locate NOLINT comments in the file being analyzed, to decide whether a
11 ///  diagnostic should be suppressed.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #include "NoLintDirectiveHandler.h"
16 #include "GlobList.h"
17 #include "clang/Basic/LLVM.h"
18 #include "clang/Basic/SourceLocation.h"
19 #include "clang/Basic/SourceManager.h"
20 #include "clang/Tooling/Core/Diagnostic.h"
21 #include "llvm/ADT/ArrayRef.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/ADT/StringMap.h"
26 #include "llvm/ADT/StringSwitch.h"
27 #include <cassert>
28 #include <cstddef>
29 #include <iterator>
30 #include <optional>
31 #include <string>
32 #include <tuple>
33 #include <type_traits>
34 #include <utility>
35 
36 namespace clang::tidy {
37 
38 //===----------------------------------------------------------------------===//
39 // NoLintType
40 //===----------------------------------------------------------------------===//
41 
42 // The type - one of NOLINT[NEXTLINE/BEGIN/END].
43 enum class NoLintType { NoLint, NoLintNextLine, NoLintBegin, NoLintEnd };
44 
45 // Convert a string like "NOLINTNEXTLINE" to its enum `Type::NoLintNextLine`.
46 // Return `std::nullopt` if the string is unrecognized.
strToNoLintType(StringRef Str)47 static std::optional<NoLintType> strToNoLintType(StringRef Str) {
48   auto Type = llvm::StringSwitch<std::optional<NoLintType>>(Str)
49                   .Case("NOLINT", NoLintType::NoLint)
50                   .Case("NOLINTNEXTLINE", NoLintType::NoLintNextLine)
51                   .Case("NOLINTBEGIN", NoLintType::NoLintBegin)
52                   .Case("NOLINTEND", NoLintType::NoLintEnd)
53                   .Default(std::nullopt);
54   return Type;
55 }
56 
57 //===----------------------------------------------------------------------===//
58 // NoLintToken
59 //===----------------------------------------------------------------------===//
60 
61 // Whitespace within a NOLINT's check list shall be ignored.
62 // "NOLINT( check1, check2 )" is equivalent to "NOLINT(check1,check2)".
63 // Return the check list with all extraneous whitespace removed.
trimWhitespace(StringRef Checks)64 static std::string trimWhitespace(StringRef Checks) {
65   SmallVector<StringRef> Split;
66   Checks.split(Split, ',');
67   for (StringRef &Check : Split)
68     Check = Check.trim();
69   return llvm::join(Split, ",");
70 }
71 
72 namespace {
73 
74 // Record the presence of a NOLINT comment - its type, location, checks -
75 // as parsed from the file's character contents.
76 class NoLintToken {
77 public:
78   // \param Checks:
79   // - If unspecified (i.e. `None`) then ALL checks are suppressed - equivalent
80   //   to NOLINT(*).
81   // - An empty string means nothing is suppressed - equivalent to NOLINT().
82   // - Negative globs ignored (which would effectively disable the suppression).
NoLintToken(NoLintType Type,size_t Pos,const std::optional<std::string> & Checks)83   NoLintToken(NoLintType Type, size_t Pos,
84               const std::optional<std::string> &Checks)
85       : Type(Type), Pos(Pos), ChecksGlob(std::make_unique<CachedGlobList>(
86                                   Checks.value_or("*"),
87                                   /*KeepNegativeGlobs=*/false)) {
88     if (Checks)
89       this->Checks = trimWhitespace(*Checks);
90   }
91 
92   // The type - one of NOLINT[NEXTLINE/BEGIN/END].
93   NoLintType Type;
94 
95   // The location of the first character, "N", in "NOLINT".
96   size_t Pos;
97 
98   // If this NOLINT specifies checks, return the checks.
checks() const99   std::optional<std::string> checks() const { return Checks; }
100 
101   // Whether this NOLINT applies to the provided check.
suppresses(StringRef Check) const102   bool suppresses(StringRef Check) const { return ChecksGlob->contains(Check); }
103 
104 private:
105   std::optional<std::string> Checks;
106   std::unique_ptr<CachedGlobList> ChecksGlob;
107 };
108 
109 } // namespace
110 
111 // Consume the entire buffer and return all `NoLintToken`s that were found.
getNoLints(StringRef Buffer)112 static SmallVector<NoLintToken> getNoLints(StringRef Buffer) {
113   static constexpr llvm::StringLiteral NOLINT = "NOLINT";
114   SmallVector<NoLintToken> NoLints;
115 
116   size_t Pos = 0;
117   while (Pos < Buffer.size()) {
118     // Find NOLINT:
119     const size_t NoLintPos = Buffer.find(NOLINT, Pos);
120     if (NoLintPos == StringRef::npos)
121       break; // Buffer exhausted
122 
123     // Read [A-Z] characters immediately after "NOLINT", e.g. the "NEXTLINE" in
124     // "NOLINTNEXTLINE".
125     Pos = NoLintPos + NOLINT.size();
126     while (Pos < Buffer.size() && llvm::isAlpha(Buffer[Pos]))
127       ++Pos;
128 
129     // Is this a recognized NOLINT type?
130     const std::optional<NoLintType> NoLintType =
131         strToNoLintType(Buffer.slice(NoLintPos, Pos));
132     if (!NoLintType)
133       continue;
134 
135     // Get checks, if specified.
136     std::optional<std::string> Checks;
137     if (Pos < Buffer.size() && Buffer[Pos] == '(') {
138       size_t ClosingBracket = Buffer.find_first_of("\n)", ++Pos);
139       if (ClosingBracket != StringRef::npos && Buffer[ClosingBracket] == ')') {
140         Checks = Buffer.slice(Pos, ClosingBracket).str();
141         Pos = ClosingBracket + 1;
142       }
143     }
144 
145     NoLints.emplace_back(*NoLintType, NoLintPos, Checks);
146   }
147 
148   return NoLints;
149 }
150 
151 //===----------------------------------------------------------------------===//
152 // NoLintBlockToken
153 //===----------------------------------------------------------------------===//
154 
155 namespace {
156 
157 // Represents a source range within a pair of NOLINT(BEGIN/END) comments.
158 class NoLintBlockToken {
159 public:
NoLintBlockToken(NoLintToken Begin,const NoLintToken & End)160   NoLintBlockToken(NoLintToken Begin, const NoLintToken &End)
161       : Begin(std::move(Begin)), EndPos(End.Pos) {
162     assert(this->Begin.Type == NoLintType::NoLintBegin);
163     assert(End.Type == NoLintType::NoLintEnd);
164     assert(this->Begin.Pos < End.Pos);
165     assert(this->Begin.checks() == End.checks());
166   }
167 
168   // Whether the provided diagnostic is within and is suppressible by this block
169   // of NOLINT(BEGIN/END) comments.
suppresses(size_t DiagPos,StringRef DiagName) const170   bool suppresses(size_t DiagPos, StringRef DiagName) const {
171     return (Begin.Pos < DiagPos) && (DiagPos < EndPos) &&
172            Begin.suppresses(DiagName);
173   }
174 
175 private:
176   NoLintToken Begin;
177   size_t EndPos;
178 };
179 
180 } // namespace
181 
182 // Match NOLINTBEGINs with their corresponding NOLINTENDs and move them into
183 // `NoLintBlockToken`s. If any BEGINs or ENDs are left over, they are moved to
184 // `UnmatchedTokens`.
185 static SmallVector<NoLintBlockToken>
formNoLintBlocks(SmallVector<NoLintToken> NoLints,SmallVectorImpl<NoLintToken> & UnmatchedTokens)186 formNoLintBlocks(SmallVector<NoLintToken> NoLints,
187                  SmallVectorImpl<NoLintToken> &UnmatchedTokens) {
188   SmallVector<NoLintBlockToken> CompletedBlocks;
189   SmallVector<NoLintToken> Stack;
190 
191   // Nested blocks must be fully contained within their parent block. What this
192   // means is that when you have a series of nested BEGIN tokens, the END tokens
193   // shall appear in the reverse order, starting with the closing of the
194   // inner-most block first, then the next level up, and so on. This is
195   // essentially a last-in-first-out/stack system.
196   for (NoLintToken &NoLint : NoLints) {
197     if (NoLint.Type == NoLintType::NoLintBegin)
198       // A new block is being started. Add it to the stack.
199       Stack.emplace_back(std::move(NoLint));
200     else if (NoLint.Type == NoLintType::NoLintEnd) {
201       if (!Stack.empty() && Stack.back().checks() == NoLint.checks())
202         // The previous block is being closed. Pop one element off the stack.
203         CompletedBlocks.emplace_back(Stack.pop_back_val(), NoLint);
204       else
205         // Trying to close the wrong block.
206         UnmatchedTokens.emplace_back(std::move(NoLint));
207     }
208   }
209 
210   llvm::move(Stack, std::back_inserter(UnmatchedTokens));
211   return CompletedBlocks;
212 }
213 
214 //===----------------------------------------------------------------------===//
215 // NoLintDirectiveHandler::Impl
216 //===----------------------------------------------------------------------===//
217 
218 class NoLintDirectiveHandler::Impl {
219 public:
220   bool shouldSuppress(DiagnosticsEngine::Level DiagLevel,
221                       const Diagnostic &Diag, StringRef DiagName,
222                       SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
223                       bool AllowIO, bool EnableNoLintBlocks);
224 
225 private:
226   bool diagHasNoLintInMacro(const Diagnostic &Diag, StringRef DiagName,
227                             SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
228                             bool AllowIO, bool EnableNoLintBlocks);
229 
230   bool diagHasNoLint(StringRef DiagName, SourceLocation DiagLoc,
231                      const SourceManager &SrcMgr,
232                      SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
233                      bool AllowIO, bool EnableNoLintBlocks);
234 
235   void generateCache(const SourceManager &SrcMgr, StringRef FileName,
236                      FileID File, StringRef Buffer,
237                      SmallVectorImpl<tooling::Diagnostic> &NoLintErrors);
238 
239   llvm::StringMap<SmallVector<NoLintBlockToken>> Cache;
240 };
241 
shouldSuppress(DiagnosticsEngine::Level DiagLevel,const Diagnostic & Diag,StringRef DiagName,SmallVectorImpl<tooling::Diagnostic> & NoLintErrors,bool AllowIO,bool EnableNoLintBlocks)242 bool NoLintDirectiveHandler::Impl::shouldSuppress(
243     DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
244     StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
245     bool AllowIO, bool EnableNoLintBlocks) {
246   if (DiagLevel >= DiagnosticsEngine::Error)
247     return false;
248   return diagHasNoLintInMacro(Diag, DiagName, NoLintErrors, AllowIO,
249                               EnableNoLintBlocks);
250 }
251 
252 // Look at the macro's spelling location for a NOLINT. If none is found, keep
253 // looking up the call stack.
diagHasNoLintInMacro(const Diagnostic & Diag,StringRef DiagName,SmallVectorImpl<tooling::Diagnostic> & NoLintErrors,bool AllowIO,bool EnableNoLintBlocks)254 bool NoLintDirectiveHandler::Impl::diagHasNoLintInMacro(
255     const Diagnostic &Diag, StringRef DiagName,
256     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
257     bool EnableNoLintBlocks) {
258   SourceLocation DiagLoc = Diag.getLocation();
259   if (DiagLoc.isInvalid())
260     return false;
261   const SourceManager &SrcMgr = Diag.getSourceManager();
262   while (true) {
263     if (diagHasNoLint(DiagName, DiagLoc, SrcMgr, NoLintErrors, AllowIO,
264                       EnableNoLintBlocks))
265       return true;
266     if (!DiagLoc.isMacroID())
267       return false;
268     DiagLoc = SrcMgr.getImmediateExpansionRange(DiagLoc).getBegin();
269   }
270   return false;
271 }
272 
273 // Look behind and ahead for '\n' characters. These mark the start and end of
274 // this line.
getLineStartAndEnd(StringRef Buffer,size_t From)275 static std::pair<size_t, size_t> getLineStartAndEnd(StringRef Buffer,
276                                                     size_t From) {
277   size_t StartPos = Buffer.find_last_of('\n', From) + 1;
278   size_t EndPos = std::min(Buffer.find('\n', From), Buffer.size());
279   return std::make_pair(StartPos, EndPos);
280 }
281 
282 // Whether the line has a NOLINT of type = `Type` that can suppress the
283 // diagnostic `DiagName`.
lineHasNoLint(StringRef Buffer,std::pair<size_t,size_t> LineStartAndEnd,NoLintType Type,StringRef DiagName)284 static bool lineHasNoLint(StringRef Buffer,
285                           std::pair<size_t, size_t> LineStartAndEnd,
286                           NoLintType Type, StringRef DiagName) {
287   // Get all NOLINTs on the line.
288   Buffer = Buffer.slice(LineStartAndEnd.first, LineStartAndEnd.second);
289   SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
290 
291   // Do any of these NOLINTs match the desired type and diag name?
292   return llvm::any_of(NoLints, [&](const NoLintToken &NoLint) {
293     return NoLint.Type == Type && NoLint.suppresses(DiagName);
294   });
295 }
296 
297 // Whether the provided diagnostic is located within and is suppressible by a
298 // block of NOLINT(BEGIN/END) comments.
withinNoLintBlock(ArrayRef<NoLintBlockToken> NoLintBlocks,size_t DiagPos,StringRef DiagName)299 static bool withinNoLintBlock(ArrayRef<NoLintBlockToken> NoLintBlocks,
300                               size_t DiagPos, StringRef DiagName) {
301   return llvm::any_of(NoLintBlocks, [&](const NoLintBlockToken &NoLintBlock) {
302     return NoLintBlock.suppresses(DiagPos, DiagName);
303   });
304 }
305 
306 // Get the file contents as a string.
getBuffer(const SourceManager & SrcMgr,FileID File,bool AllowIO)307 static std::optional<StringRef> getBuffer(const SourceManager &SrcMgr,
308                                           FileID File, bool AllowIO) {
309   return AllowIO ? SrcMgr.getBufferDataOrNone(File)
310                  : SrcMgr.getBufferDataIfLoaded(File);
311 }
312 
313 // We will check for NOLINTs and NOLINTNEXTLINEs first. Checking for these is
314 // not so expensive (just need to parse the current and previous lines). Only if
315 // that fails do we look for NOLINT(BEGIN/END) blocks (which requires reading
316 // the entire file).
diagHasNoLint(StringRef DiagName,SourceLocation DiagLoc,const SourceManager & SrcMgr,SmallVectorImpl<tooling::Diagnostic> & NoLintErrors,bool AllowIO,bool EnableNoLintBlocks)317 bool NoLintDirectiveHandler::Impl::diagHasNoLint(
318     StringRef DiagName, SourceLocation DiagLoc, const SourceManager &SrcMgr,
319     SmallVectorImpl<tooling::Diagnostic> &NoLintErrors, bool AllowIO,
320     bool EnableNoLintBlocks) {
321   // Translate the diagnostic's SourceLocation to a raw file + offset pair.
322   FileID File;
323   unsigned int Pos = 0;
324   std::tie(File, Pos) = SrcMgr.getDecomposedSpellingLoc(DiagLoc);
325 
326   // We will only see NOLINTs in user-authored sources. No point reading the
327   // file if it is a <built-in>.
328   std::optional<StringRef> FileName = SrcMgr.getNonBuiltinFilenameForID(File);
329   if (!FileName)
330     return false;
331 
332   // Get file contents.
333   std::optional<StringRef> Buffer = getBuffer(SrcMgr, File, AllowIO);
334   if (!Buffer)
335     return false;
336 
337   // Check if there's a NOLINT on this line.
338   auto ThisLine = getLineStartAndEnd(*Buffer, Pos);
339   if (lineHasNoLint(*Buffer, ThisLine, NoLintType::NoLint, DiagName))
340     return true;
341 
342   // Check if there's a NOLINTNEXTLINE on the previous line.
343   if (ThisLine.first > 0) {
344     auto PrevLine = getLineStartAndEnd(*Buffer, ThisLine.first - 1);
345     if (lineHasNoLint(*Buffer, PrevLine, NoLintType::NoLintNextLine, DiagName))
346       return true;
347   }
348 
349   // Check if this line is within a NOLINT(BEGIN/END) block.
350   if (!EnableNoLintBlocks)
351     return false;
352 
353   // Do we have cached NOLINT block locations for this file?
354   if (Cache.count(*FileName) == 0)
355     // Warning: heavy operation - need to read entire file.
356     generateCache(SrcMgr, *FileName, File, *Buffer, NoLintErrors);
357 
358   return withinNoLintBlock(Cache[*FileName], Pos, DiagName);
359 }
360 
361 // Construct a [clang-tidy-nolint] diagnostic to do with the unmatched
362 // NOLINT(BEGIN/END) pair.
makeNoLintError(const SourceManager & SrcMgr,FileID File,const NoLintToken & NoLint)363 static tooling::Diagnostic makeNoLintError(const SourceManager &SrcMgr,
364                                            FileID File,
365                                            const NoLintToken &NoLint) {
366   tooling::Diagnostic Error;
367   Error.DiagLevel = tooling::Diagnostic::Error;
368   Error.DiagnosticName = "clang-tidy-nolint";
369   StringRef Message =
370       (NoLint.Type == NoLintType::NoLintBegin)
371           ? ("unmatched 'NOLINTBEGIN' comment without a subsequent 'NOLINT"
372              "END' comment")
373           : ("unmatched 'NOLINTEND' comment without a previous 'NOLINT"
374              "BEGIN' comment");
375   SourceLocation Loc = SrcMgr.getComposedLoc(File, NoLint.Pos);
376   Error.Message = tooling::DiagnosticMessage(Message, SrcMgr, Loc);
377   return Error;
378 }
379 
380 // Find all NOLINT(BEGIN/END) blocks in a file and store in the cache.
generateCache(const SourceManager & SrcMgr,StringRef FileName,FileID File,StringRef Buffer,SmallVectorImpl<tooling::Diagnostic> & NoLintErrors)381 void NoLintDirectiveHandler::Impl::generateCache(
382     const SourceManager &SrcMgr, StringRef FileName, FileID File,
383     StringRef Buffer, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors) {
384   // Read entire file to get all NOLINTs.
385   SmallVector<NoLintToken> NoLints = getNoLints(Buffer);
386 
387   // Match each BEGIN with its corresponding END.
388   SmallVector<NoLintToken> UnmatchedTokens;
389   Cache[FileName] = formNoLintBlocks(std::move(NoLints), UnmatchedTokens);
390 
391   // Raise error for any BEGIN/END left over.
392   for (const NoLintToken &NoLint : UnmatchedTokens)
393     NoLintErrors.emplace_back(makeNoLintError(SrcMgr, File, NoLint));
394 }
395 
396 //===----------------------------------------------------------------------===//
397 // NoLintDirectiveHandler
398 //===----------------------------------------------------------------------===//
399 
NoLintDirectiveHandler()400 NoLintDirectiveHandler::NoLintDirectiveHandler()
401     : PImpl(std::make_unique<Impl>()) {}
402 
403 NoLintDirectiveHandler::~NoLintDirectiveHandler() = default;
404 
shouldSuppress(DiagnosticsEngine::Level DiagLevel,const Diagnostic & Diag,StringRef DiagName,SmallVectorImpl<tooling::Diagnostic> & NoLintErrors,bool AllowIO,bool EnableNoLintBlocks)405 bool NoLintDirectiveHandler::shouldSuppress(
406     DiagnosticsEngine::Level DiagLevel, const Diagnostic &Diag,
407     StringRef DiagName, SmallVectorImpl<tooling::Diagnostic> &NoLintErrors,
408     bool AllowIO, bool EnableNoLintBlocks) {
409   return PImpl->shouldSuppress(DiagLevel, Diag, DiagName, NoLintErrors, AllowIO,
410                                EnableNoLintBlocks);
411 }
412 
413 } // namespace clang::tidy
414