xref: /freebsd-src/contrib/llvm-project/clang/lib/Format/TokenAnalyzer.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric ///
90b57cec5SDimitry Andric /// \file
100b57cec5SDimitry Andric /// This file implements an abstract TokenAnalyzer and associated helper
110b57cec5SDimitry Andric /// classes. TokenAnalyzer can be extended to generate replacements based on
120b57cec5SDimitry Andric /// an annotated and pre-processed token stream.
130b57cec5SDimitry Andric ///
140b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
150b57cec5SDimitry Andric 
160b57cec5SDimitry Andric #include "TokenAnalyzer.h"
170b57cec5SDimitry Andric #include "AffectedRangeManager.h"
180b57cec5SDimitry Andric #include "Encoding.h"
190b57cec5SDimitry Andric #include "FormatToken.h"
200b57cec5SDimitry Andric #include "FormatTokenLexer.h"
210b57cec5SDimitry Andric #include "TokenAnnotator.h"
220b57cec5SDimitry Andric #include "UnwrappedLineParser.h"
230b57cec5SDimitry Andric #include "clang/Basic/Diagnostic.h"
240b57cec5SDimitry Andric #include "clang/Basic/DiagnosticOptions.h"
250b57cec5SDimitry Andric #include "clang/Basic/FileManager.h"
260b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h"
270b57cec5SDimitry Andric #include "clang/Format/Format.h"
280b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
29349cc55cSDimitry Andric #include "llvm/ADT/SmallVector.h"
300b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
31349cc55cSDimitry Andric #include <type_traits>
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric #define DEBUG_TYPE "format-formatter"
340b57cec5SDimitry Andric 
350b57cec5SDimitry Andric namespace clang {
360b57cec5SDimitry Andric namespace format {
370b57cec5SDimitry Andric 
38349cc55cSDimitry Andric // FIXME: Instead of printing the diagnostic we should store it and have a
39349cc55cSDimitry Andric // better way to return errors through the format APIs.
40349cc55cSDimitry Andric class FatalDiagnosticConsumer : public DiagnosticConsumer {
41349cc55cSDimitry Andric public:
42349cc55cSDimitry Andric   void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel,
43349cc55cSDimitry Andric                         const Diagnostic &Info) override {
44349cc55cSDimitry Andric     if (DiagLevel == DiagnosticsEngine::Fatal) {
45349cc55cSDimitry Andric       Fatal = true;
46349cc55cSDimitry Andric       llvm::SmallVector<char, 128> Message;
47349cc55cSDimitry Andric       Info.FormatDiagnostic(Message);
48349cc55cSDimitry Andric       llvm::errs() << Message << "\n";
49349cc55cSDimitry Andric     }
50349cc55cSDimitry Andric   }
51349cc55cSDimitry Andric 
52349cc55cSDimitry Andric   bool fatalError() const { return Fatal; }
53349cc55cSDimitry Andric 
54349cc55cSDimitry Andric private:
55349cc55cSDimitry Andric   bool Fatal = false;
56349cc55cSDimitry Andric };
57349cc55cSDimitry Andric 
58349cc55cSDimitry Andric std::unique_ptr<Environment>
59349cc55cSDimitry Andric Environment::make(StringRef Code, StringRef FileName,
60349cc55cSDimitry Andric                   ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn,
61349cc55cSDimitry Andric                   unsigned NextStartColumn, unsigned LastStartColumn) {
62349cc55cSDimitry Andric   auto Env = std::make_unique<Environment>(Code, FileName, FirstStartColumn,
63349cc55cSDimitry Andric                                            NextStartColumn, LastStartColumn);
64349cc55cSDimitry Andric   FatalDiagnosticConsumer Diags;
65349cc55cSDimitry Andric   Env->SM.getDiagnostics().setClient(&Diags, /*ShouldOwnClient=*/false);
66349cc55cSDimitry Andric   SourceLocation StartOfFile = Env->SM.getLocForStartOfFile(Env->ID);
67349cc55cSDimitry Andric   for (const tooling::Range &Range : Ranges) {
68349cc55cSDimitry Andric     SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset());
69349cc55cSDimitry Andric     SourceLocation End = Start.getLocWithOffset(Range.getLength());
70349cc55cSDimitry Andric     Env->CharRanges.push_back(CharSourceRange::getCharRange(Start, End));
71349cc55cSDimitry Andric   }
72349cc55cSDimitry Andric   // Validate that we can get the buffer data without a fatal error.
73349cc55cSDimitry Andric   Env->SM.getBufferData(Env->ID);
744824e7fdSDimitry Andric   if (Diags.fatalError())
754824e7fdSDimitry Andric     return nullptr;
76349cc55cSDimitry Andric   return Env;
77349cc55cSDimitry Andric }
78349cc55cSDimitry Andric 
790b57cec5SDimitry Andric Environment::Environment(StringRef Code, StringRef FileName,
800b57cec5SDimitry Andric                          unsigned FirstStartColumn, unsigned NextStartColumn,
810b57cec5SDimitry Andric                          unsigned LastStartColumn)
820b57cec5SDimitry Andric     : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()),
830b57cec5SDimitry Andric       ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn),
844824e7fdSDimitry Andric       NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {}
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style)
87*0fca6ea1SDimitry Andric     : Style(Style), LangOpts(getFormattingLangOpts(Style)), Env(Env),
880b57cec5SDimitry Andric       AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()),
890b57cec5SDimitry Andric       UnwrappedLines(1),
900b57cec5SDimitry Andric       Encoding(encoding::detectEncoding(
910b57cec5SDimitry Andric           Env.getSourceManager().getBufferData(Env.getFileID()))) {
920b57cec5SDimitry Andric   LLVM_DEBUG(
930b57cec5SDimitry Andric       llvm::dbgs() << "File encoding: "
940b57cec5SDimitry Andric                    << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown")
950b57cec5SDimitry Andric                    << "\n");
960b57cec5SDimitry Andric   LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language)
970b57cec5SDimitry Andric                           << "\n");
980b57cec5SDimitry Andric }
990b57cec5SDimitry Andric 
100bdd1243dSDimitry Andric std::pair<tooling::Replacements, unsigned>
101bdd1243dSDimitry Andric TokenAnalyzer::process(bool SkipAnnotation) {
1020b57cec5SDimitry Andric   tooling::Replacements Result;
1035ffd83dbSDimitry Andric   llvm::SpecificBumpPtrAllocator<FormatToken> Allocator;
104*0fca6ea1SDimitry Andric   IdentifierTable IdentTable(LangOpts);
1055ffd83dbSDimitry Andric   FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(),
1065ffd83dbSDimitry Andric                        Env.getFirstStartColumn(), Style, Encoding, Allocator,
1075ffd83dbSDimitry Andric                        IdentTable);
1085ffd83dbSDimitry Andric   ArrayRef<FormatToken *> Toks(Lex.lex());
1095ffd83dbSDimitry Andric   SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end());
11006c3fb27SDimitry Andric   UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(),
11106c3fb27SDimitry Andric                              Env.getFirstStartColumn(), Tokens, *this,
11206c3fb27SDimitry Andric                              Allocator, IdentTable);
1130b57cec5SDimitry Andric   Parser.parse();
11481ad6265SDimitry Andric   assert(UnwrappedLines.back().empty());
1150b57cec5SDimitry Andric   unsigned Penalty = 0;
1160b57cec5SDimitry Andric   for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) {
1171fd87a68SDimitry Andric     const auto &Lines = UnwrappedLines[Run];
1180b57cec5SDimitry Andric     LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n");
1190b57cec5SDimitry Andric     SmallVector<AnnotatedLine *, 16> AnnotatedLines;
12081ad6265SDimitry Andric     AnnotatedLines.reserve(Lines.size());
1210b57cec5SDimitry Andric 
1225ffd83dbSDimitry Andric     TokenAnnotator Annotator(Style, Lex.getKeywords());
1231fd87a68SDimitry Andric     for (const UnwrappedLine &Line : Lines) {
1241fd87a68SDimitry Andric       AnnotatedLines.push_back(new AnnotatedLine(Line));
125bdd1243dSDimitry Andric       if (!SkipAnnotation)
1260b57cec5SDimitry Andric         Annotator.annotate(*AnnotatedLines.back());
1270b57cec5SDimitry Andric     }
1280b57cec5SDimitry Andric 
1290b57cec5SDimitry Andric     std::pair<tooling::Replacements, unsigned> RunResult =
1305ffd83dbSDimitry Andric         analyze(Annotator, AnnotatedLines, Lex);
1310b57cec5SDimitry Andric 
1320b57cec5SDimitry Andric     LLVM_DEBUG({
1330b57cec5SDimitry Andric       llvm::dbgs() << "Replacements for run " << Run << ":\n";
13404eeddc0SDimitry Andric       for (const tooling::Replacement &Fix : RunResult.first)
13504eeddc0SDimitry Andric         llvm::dbgs() << Fix.toString() << "\n";
1360b57cec5SDimitry Andric     });
1371fd87a68SDimitry Andric     for (AnnotatedLine *Line : AnnotatedLines)
1381fd87a68SDimitry Andric       delete Line;
1390b57cec5SDimitry Andric 
1400b57cec5SDimitry Andric     Penalty += RunResult.second;
1410b57cec5SDimitry Andric     for (const auto &R : RunResult.first) {
1420b57cec5SDimitry Andric       auto Err = Result.add(R);
1430b57cec5SDimitry Andric       // FIXME: better error handling here. For now, simply return an empty
1440b57cec5SDimitry Andric       // Replacements to indicate failure.
1450b57cec5SDimitry Andric       if (Err) {
1460b57cec5SDimitry Andric         llvm::errs() << llvm::toString(std::move(Err)) << "\n";
1470b57cec5SDimitry Andric         return {tooling::Replacements(), 0};
1480b57cec5SDimitry Andric       }
1490b57cec5SDimitry Andric     }
1500b57cec5SDimitry Andric   }
1510b57cec5SDimitry Andric   return {Result, Penalty};
1520b57cec5SDimitry Andric }
1530b57cec5SDimitry Andric 
1540b57cec5SDimitry Andric void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) {
1550b57cec5SDimitry Andric   assert(!UnwrappedLines.empty());
1560b57cec5SDimitry Andric   UnwrappedLines.back().push_back(TheLine);
1570b57cec5SDimitry Andric }
1580b57cec5SDimitry Andric 
1590b57cec5SDimitry Andric void TokenAnalyzer::finishRun() {
1600b57cec5SDimitry Andric   UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>());
1610b57cec5SDimitry Andric }
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric } // end namespace format
1640b57cec5SDimitry Andric } // end namespace clang
165