xref: /llvm-project/clang-tools-extra/clangd/support/Token.cpp (revision ed8f78827895050442f544edef2933a60d4a7935)
1*ed8f7882SAaron Ballman //===--- Token.cpp - Tokens and token streams in the pseudoparser ---------===//
2*ed8f7882SAaron Ballman //
3*ed8f7882SAaron Ballman // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*ed8f7882SAaron Ballman // See https://llvm.org/LICENSE.txt for license information.
5*ed8f7882SAaron Ballman // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*ed8f7882SAaron Ballman //
7*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===//
8*ed8f7882SAaron Ballman 
9*ed8f7882SAaron Ballman #include "Token.h"
10*ed8f7882SAaron Ballman #include "clang/Basic/LangOptions.h"
11*ed8f7882SAaron Ballman #include "llvm/ADT/StringExtras.h"
12*ed8f7882SAaron Ballman #include "llvm/Support/Format.h"
13*ed8f7882SAaron Ballman #include "llvm/Support/FormatVariadic.h"
14*ed8f7882SAaron Ballman 
15*ed8f7882SAaron Ballman namespace clang {
16*ed8f7882SAaron Ballman namespace clangd {
17*ed8f7882SAaron Ballman 
18*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token &T) {
19*ed8f7882SAaron Ballman   OS << llvm::formatv("{0} {1}:{2} ", clang::tok::getTokenName(T.Kind), T.Line,
20*ed8f7882SAaron Ballman                       T.Indent);
21*ed8f7882SAaron Ballman   OS << '"';
22*ed8f7882SAaron Ballman   llvm::printEscapedString(T.text(), OS);
23*ed8f7882SAaron Ballman   OS << '"';
24*ed8f7882SAaron Ballman   if (T.Flags)
25*ed8f7882SAaron Ballman     OS << llvm::format(" flags=%x", T.Flags);
26*ed8f7882SAaron Ballman   return OS;
27*ed8f7882SAaron Ballman }
28*ed8f7882SAaron Ballman 
29*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const TokenStream &TS) {
30*ed8f7882SAaron Ballman   OS << "Index               Kind    Line  Text\n";
31*ed8f7882SAaron Ballman   for (const auto &T : TS.tokens()) {
32*ed8f7882SAaron Ballman     OS << llvm::format("%5d:  %16s %4d:%-2d  ", TS.index(T),
33*ed8f7882SAaron Ballman                        clang::tok::getTokenName(T.Kind), T.Line, T.Indent);
34*ed8f7882SAaron Ballman     OS << '"';
35*ed8f7882SAaron Ballman     llvm::printEscapedString(T.text(), OS);
36*ed8f7882SAaron Ballman     OS << '"';
37*ed8f7882SAaron Ballman     if (T.Flags)
38*ed8f7882SAaron Ballman       OS << llvm::format("  flags=%x", T.Flags);
39*ed8f7882SAaron Ballman     OS << '\n';
40*ed8f7882SAaron Ballman   }
41*ed8f7882SAaron Ballman   return OS;
42*ed8f7882SAaron Ballman }
43*ed8f7882SAaron Ballman 
44*ed8f7882SAaron Ballman llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Token::Range &R) {
45*ed8f7882SAaron Ballman   OS << llvm::formatv("[{0},{1})", R.Begin, R.End);
46*ed8f7882SAaron Ballman   return OS;
47*ed8f7882SAaron Ballman }
48*ed8f7882SAaron Ballman 
49*ed8f7882SAaron Ballman TokenStream::TokenStream(std::shared_ptr<void> Payload)
50*ed8f7882SAaron Ballman     : Payload(std::move(Payload)) {
51*ed8f7882SAaron Ballman   Storage.emplace_back();
52*ed8f7882SAaron Ballman   Storage.back().Kind = clang::tok::eof;
53*ed8f7882SAaron Ballman }
54*ed8f7882SAaron Ballman 
55*ed8f7882SAaron Ballman void TokenStream::finalize() {
56*ed8f7882SAaron Ballman   assert(!isFinalized());
57*ed8f7882SAaron Ballman   unsigned LastLine = Storage.back().Line;
58*ed8f7882SAaron Ballman   Storage.emplace_back();
59*ed8f7882SAaron Ballman   Storage.back().Kind = tok::eof;
60*ed8f7882SAaron Ballman   Storage.back().Line = LastLine + 1;
61*ed8f7882SAaron Ballman 
62*ed8f7882SAaron Ballman   Tokens = Storage;
63*ed8f7882SAaron Ballman   Tokens = Tokens.drop_front().drop_back();
64*ed8f7882SAaron Ballman }
65*ed8f7882SAaron Ballman 
66*ed8f7882SAaron Ballman bool TokenStream::isFinalized() const {
67*ed8f7882SAaron Ballman   assert(!Storage.empty() && Storage.front().Kind == tok::eof);
68*ed8f7882SAaron Ballman   if (Storage.size() == 1)
69*ed8f7882SAaron Ballman     return false;
70*ed8f7882SAaron Ballman   return Storage.back().Kind == tok::eof;
71*ed8f7882SAaron Ballman }
72*ed8f7882SAaron Ballman 
73*ed8f7882SAaron Ballman void TokenStream::print(llvm::raw_ostream &OS) const {
74*ed8f7882SAaron Ballman   bool FirstToken = true;
75*ed8f7882SAaron Ballman   unsigned LastLine = -1;
76*ed8f7882SAaron Ballman   StringRef LastText;
77*ed8f7882SAaron Ballman   for (const auto &T : tokens()) {
78*ed8f7882SAaron Ballman     StringRef Text = T.text();
79*ed8f7882SAaron Ballman     if (FirstToken) {
80*ed8f7882SAaron Ballman       FirstToken = false;
81*ed8f7882SAaron Ballman     } else if (T.Line == LastLine) {
82*ed8f7882SAaron Ballman       if (LastText.data() + LastText.size() != Text.data())
83*ed8f7882SAaron Ballman         OS << ' ';
84*ed8f7882SAaron Ballman     } else {
85*ed8f7882SAaron Ballman       OS << '\n';
86*ed8f7882SAaron Ballman       OS.indent(T.Indent);
87*ed8f7882SAaron Ballman     }
88*ed8f7882SAaron Ballman     OS << Text;
89*ed8f7882SAaron Ballman     LastLine = T.Line;
90*ed8f7882SAaron Ballman     LastText = Text;
91*ed8f7882SAaron Ballman   }
92*ed8f7882SAaron Ballman   if (!FirstToken)
93*ed8f7882SAaron Ballman     OS << '\n';
94*ed8f7882SAaron Ballman }
95*ed8f7882SAaron Ballman 
96*ed8f7882SAaron Ballman clang::LangOptions genericLangOpts(clang::Language Lang,
97*ed8f7882SAaron Ballman                                    clang::LangStandard::Kind Standard) {
98*ed8f7882SAaron Ballman   clang::LangOptions Opts;
99*ed8f7882SAaron Ballman   std::vector<std::string> UnusedIncludes;
100*ed8f7882SAaron Ballman   LangOptions::setLangDefaults(Opts, Lang, llvm::Triple(), UnusedIncludes,
101*ed8f7882SAaron Ballman                                Standard);
102*ed8f7882SAaron Ballman 
103*ed8f7882SAaron Ballman   // Some options are "on by default", but e.g. at the driver level.
104*ed8f7882SAaron Ballman   if (Opts.CPlusPlus)
105*ed8f7882SAaron Ballman     Opts.CXXOperatorNames = true;
106*ed8f7882SAaron Ballman   if (Opts.CPlusPlus20)
107*ed8f7882SAaron Ballman     Opts.Coroutines = true;
108*ed8f7882SAaron Ballman 
109*ed8f7882SAaron Ballman   // Some options are off by default, but define keywords we want to tolerate.
110*ed8f7882SAaron Ballman   if (Opts.CPlusPlus)
111*ed8f7882SAaron Ballman     Opts.MicrosoftExt = true;  // kw__try, kw__finally
112*ed8f7882SAaron Ballman   Opts.DeclSpecKeyword = true; // __declspec
113*ed8f7882SAaron Ballman   Opts.WChar = true;
114*ed8f7882SAaron Ballman 
115*ed8f7882SAaron Ballman   return Opts;
116*ed8f7882SAaron Ballman }
117*ed8f7882SAaron Ballman 
118*ed8f7882SAaron Ballman TokenStream stripComments(const TokenStream &Input) {
119*ed8f7882SAaron Ballman   TokenStream Out(Input.getPayload());
120*ed8f7882SAaron Ballman   for (const Token &T : Input.tokens()) {
121*ed8f7882SAaron Ballman     if (T.Kind == tok::comment)
122*ed8f7882SAaron Ballman       continue;
123*ed8f7882SAaron Ballman     Out.push(T);
124*ed8f7882SAaron Ballman   }
125*ed8f7882SAaron Ballman   Out.finalize();
126*ed8f7882SAaron Ballman   return Out;
127*ed8f7882SAaron Ballman }
128*ed8f7882SAaron Ballman 
129*ed8f7882SAaron Ballman } // namespace clangd
130*ed8f7882SAaron Ballman } // namespace clang
131