10b57cec5SDimitry Andric //===--- TokenAnalyzer.cpp - Analyze Token Streams --------------*- C++ -*-===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric /// 90b57cec5SDimitry Andric /// \file 100b57cec5SDimitry Andric /// This file implements an abstract TokenAnalyzer and associated helper 110b57cec5SDimitry Andric /// classes. TokenAnalyzer can be extended to generate replacements based on 120b57cec5SDimitry Andric /// an annotated and pre-processed token stream. 130b57cec5SDimitry Andric /// 140b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 150b57cec5SDimitry Andric 160b57cec5SDimitry Andric #include "TokenAnalyzer.h" 170b57cec5SDimitry Andric #include "AffectedRangeManager.h" 180b57cec5SDimitry Andric #include "Encoding.h" 190b57cec5SDimitry Andric #include "FormatToken.h" 200b57cec5SDimitry Andric #include "FormatTokenLexer.h" 210b57cec5SDimitry Andric #include "TokenAnnotator.h" 220b57cec5SDimitry Andric #include "UnwrappedLineParser.h" 230b57cec5SDimitry Andric #include "clang/Basic/Diagnostic.h" 240b57cec5SDimitry Andric #include "clang/Basic/DiagnosticOptions.h" 250b57cec5SDimitry Andric #include "clang/Basic/FileManager.h" 260b57cec5SDimitry Andric #include "clang/Basic/SourceManager.h" 270b57cec5SDimitry Andric #include "clang/Format/Format.h" 280b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 29349cc55cSDimitry Andric #include "llvm/ADT/SmallVector.h" 300b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 31349cc55cSDimitry Andric #include <type_traits> 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric #define DEBUG_TYPE "format-formatter" 340b57cec5SDimitry Andric 350b57cec5SDimitry Andric namespace clang { 360b57cec5SDimitry Andric namespace format { 370b57cec5SDimitry Andric 38349cc55cSDimitry Andric // FIXME: Instead of printing the diagnostic we should store it and have a 39349cc55cSDimitry Andric // better way to return errors through the format APIs. 40349cc55cSDimitry Andric class FatalDiagnosticConsumer : public DiagnosticConsumer { 41349cc55cSDimitry Andric public: 42349cc55cSDimitry Andric void HandleDiagnostic(DiagnosticsEngine::Level DiagLevel, 43349cc55cSDimitry Andric const Diagnostic &Info) override { 44349cc55cSDimitry Andric if (DiagLevel == DiagnosticsEngine::Fatal) { 45349cc55cSDimitry Andric Fatal = true; 46349cc55cSDimitry Andric llvm::SmallVector<char, 128> Message; 47349cc55cSDimitry Andric Info.FormatDiagnostic(Message); 48349cc55cSDimitry Andric llvm::errs() << Message << "\n"; 49349cc55cSDimitry Andric } 50349cc55cSDimitry Andric } 51349cc55cSDimitry Andric 52349cc55cSDimitry Andric bool fatalError() const { return Fatal; } 53349cc55cSDimitry Andric 54349cc55cSDimitry Andric private: 55349cc55cSDimitry Andric bool Fatal = false; 56349cc55cSDimitry Andric }; 57349cc55cSDimitry Andric 58349cc55cSDimitry Andric std::unique_ptr<Environment> 59349cc55cSDimitry Andric Environment::make(StringRef Code, StringRef FileName, 60349cc55cSDimitry Andric ArrayRef<tooling::Range> Ranges, unsigned FirstStartColumn, 61349cc55cSDimitry Andric unsigned NextStartColumn, unsigned LastStartColumn) { 62349cc55cSDimitry Andric auto Env = std::make_unique<Environment>(Code, FileName, FirstStartColumn, 63349cc55cSDimitry Andric NextStartColumn, LastStartColumn); 64349cc55cSDimitry Andric FatalDiagnosticConsumer Diags; 65349cc55cSDimitry Andric Env->SM.getDiagnostics().setClient(&Diags, /*ShouldOwnClient=*/false); 66349cc55cSDimitry Andric SourceLocation StartOfFile = Env->SM.getLocForStartOfFile(Env->ID); 67349cc55cSDimitry Andric for (const tooling::Range &Range : Ranges) { 68349cc55cSDimitry Andric SourceLocation Start = StartOfFile.getLocWithOffset(Range.getOffset()); 69349cc55cSDimitry Andric SourceLocation End = Start.getLocWithOffset(Range.getLength()); 70349cc55cSDimitry Andric Env->CharRanges.push_back(CharSourceRange::getCharRange(Start, End)); 71349cc55cSDimitry Andric } 72349cc55cSDimitry Andric // Validate that we can get the buffer data without a fatal error. 73349cc55cSDimitry Andric Env->SM.getBufferData(Env->ID); 744824e7fdSDimitry Andric if (Diags.fatalError()) 754824e7fdSDimitry Andric return nullptr; 76349cc55cSDimitry Andric return Env; 77349cc55cSDimitry Andric } 78349cc55cSDimitry Andric 790b57cec5SDimitry Andric Environment::Environment(StringRef Code, StringRef FileName, 800b57cec5SDimitry Andric unsigned FirstStartColumn, unsigned NextStartColumn, 810b57cec5SDimitry Andric unsigned LastStartColumn) 820b57cec5SDimitry Andric : VirtualSM(new SourceManagerForFile(FileName, Code)), SM(VirtualSM->get()), 830b57cec5SDimitry Andric ID(VirtualSM->get().getMainFileID()), FirstStartColumn(FirstStartColumn), 844824e7fdSDimitry Andric NextStartColumn(NextStartColumn), LastStartColumn(LastStartColumn) {} 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric TokenAnalyzer::TokenAnalyzer(const Environment &Env, const FormatStyle &Style) 87*0fca6ea1SDimitry Andric : Style(Style), LangOpts(getFormattingLangOpts(Style)), Env(Env), 880b57cec5SDimitry Andric AffectedRangeMgr(Env.getSourceManager(), Env.getCharRanges()), 890b57cec5SDimitry Andric UnwrappedLines(1), 900b57cec5SDimitry Andric Encoding(encoding::detectEncoding( 910b57cec5SDimitry Andric Env.getSourceManager().getBufferData(Env.getFileID()))) { 920b57cec5SDimitry Andric LLVM_DEBUG( 930b57cec5SDimitry Andric llvm::dbgs() << "File encoding: " 940b57cec5SDimitry Andric << (Encoding == encoding::Encoding_UTF8 ? "UTF8" : "unknown") 950b57cec5SDimitry Andric << "\n"); 960b57cec5SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Language: " << getLanguageName(Style.Language) 970b57cec5SDimitry Andric << "\n"); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric 100bdd1243dSDimitry Andric std::pair<tooling::Replacements, unsigned> 101bdd1243dSDimitry Andric TokenAnalyzer::process(bool SkipAnnotation) { 1020b57cec5SDimitry Andric tooling::Replacements Result; 1035ffd83dbSDimitry Andric llvm::SpecificBumpPtrAllocator<FormatToken> Allocator; 104*0fca6ea1SDimitry Andric IdentifierTable IdentTable(LangOpts); 1055ffd83dbSDimitry Andric FormatTokenLexer Lex(Env.getSourceManager(), Env.getFileID(), 1065ffd83dbSDimitry Andric Env.getFirstStartColumn(), Style, Encoding, Allocator, 1075ffd83dbSDimitry Andric IdentTable); 1085ffd83dbSDimitry Andric ArrayRef<FormatToken *> Toks(Lex.lex()); 1095ffd83dbSDimitry Andric SmallVector<FormatToken *, 10> Tokens(Toks.begin(), Toks.end()); 11006c3fb27SDimitry Andric UnwrappedLineParser Parser(Env.getSourceManager(), Style, Lex.getKeywords(), 11106c3fb27SDimitry Andric Env.getFirstStartColumn(), Tokens, *this, 11206c3fb27SDimitry Andric Allocator, IdentTable); 1130b57cec5SDimitry Andric Parser.parse(); 11481ad6265SDimitry Andric assert(UnwrappedLines.back().empty()); 1150b57cec5SDimitry Andric unsigned Penalty = 0; 1160b57cec5SDimitry Andric for (unsigned Run = 0, RunE = UnwrappedLines.size(); Run + 1 != RunE; ++Run) { 1171fd87a68SDimitry Andric const auto &Lines = UnwrappedLines[Run]; 1180b57cec5SDimitry Andric LLVM_DEBUG(llvm::dbgs() << "Run " << Run << "...\n"); 1190b57cec5SDimitry Andric SmallVector<AnnotatedLine *, 16> AnnotatedLines; 12081ad6265SDimitry Andric AnnotatedLines.reserve(Lines.size()); 1210b57cec5SDimitry Andric 1225ffd83dbSDimitry Andric TokenAnnotator Annotator(Style, Lex.getKeywords()); 1231fd87a68SDimitry Andric for (const UnwrappedLine &Line : Lines) { 1241fd87a68SDimitry Andric AnnotatedLines.push_back(new AnnotatedLine(Line)); 125bdd1243dSDimitry Andric if (!SkipAnnotation) 1260b57cec5SDimitry Andric Annotator.annotate(*AnnotatedLines.back()); 1270b57cec5SDimitry Andric } 1280b57cec5SDimitry Andric 1290b57cec5SDimitry Andric std::pair<tooling::Replacements, unsigned> RunResult = 1305ffd83dbSDimitry Andric analyze(Annotator, AnnotatedLines, Lex); 1310b57cec5SDimitry Andric 1320b57cec5SDimitry Andric LLVM_DEBUG({ 1330b57cec5SDimitry Andric llvm::dbgs() << "Replacements for run " << Run << ":\n"; 13404eeddc0SDimitry Andric for (const tooling::Replacement &Fix : RunResult.first) 13504eeddc0SDimitry Andric llvm::dbgs() << Fix.toString() << "\n"; 1360b57cec5SDimitry Andric }); 1371fd87a68SDimitry Andric for (AnnotatedLine *Line : AnnotatedLines) 1381fd87a68SDimitry Andric delete Line; 1390b57cec5SDimitry Andric 1400b57cec5SDimitry Andric Penalty += RunResult.second; 1410b57cec5SDimitry Andric for (const auto &R : RunResult.first) { 1420b57cec5SDimitry Andric auto Err = Result.add(R); 1430b57cec5SDimitry Andric // FIXME: better error handling here. For now, simply return an empty 1440b57cec5SDimitry Andric // Replacements to indicate failure. 1450b57cec5SDimitry Andric if (Err) { 1460b57cec5SDimitry Andric llvm::errs() << llvm::toString(std::move(Err)) << "\n"; 1470b57cec5SDimitry Andric return {tooling::Replacements(), 0}; 1480b57cec5SDimitry Andric } 1490b57cec5SDimitry Andric } 1500b57cec5SDimitry Andric } 1510b57cec5SDimitry Andric return {Result, Penalty}; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric void TokenAnalyzer::consumeUnwrappedLine(const UnwrappedLine &TheLine) { 1550b57cec5SDimitry Andric assert(!UnwrappedLines.empty()); 1560b57cec5SDimitry Andric UnwrappedLines.back().push_back(TheLine); 1570b57cec5SDimitry Andric } 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric void TokenAnalyzer::finishRun() { 1600b57cec5SDimitry Andric UnwrappedLines.push_back(SmallVector<UnwrappedLine, 16>()); 1610b57cec5SDimitry Andric } 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric } // end namespace format 1640b57cec5SDimitry Andric } // end namespace clang 165