180814287SRaphael Isemann //===-- ClangHighlighter.cpp ----------------------------------------------===//
2566afa0aSRaphael Isemann //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6566afa0aSRaphael Isemann //
7566afa0aSRaphael Isemann //===----------------------------------------------------------------------===//
8566afa0aSRaphael Isemann
9566afa0aSRaphael Isemann #include "ClangHighlighter.h"
10566afa0aSRaphael Isemann
119764b65cSJonas Devlieghere #include "lldb/Host/FileSystem.h"
12566afa0aSRaphael Isemann #include "lldb/Target/Language.h"
13566afa0aSRaphael Isemann #include "lldb/Utility/AnsiTerminal.h"
14566afa0aSRaphael Isemann #include "lldb/Utility/StreamString.h"
15566afa0aSRaphael Isemann
16e08464fbSReid Kleckner #include "clang/Basic/FileManager.h"
17566afa0aSRaphael Isemann #include "clang/Basic/SourceManager.h"
18566afa0aSRaphael Isemann #include "clang/Lex/Lexer.h"
19566afa0aSRaphael Isemann #include "llvm/ADT/StringSet.h"
20566afa0aSRaphael Isemann #include "llvm/Support/MemoryBuffer.h"
21f190ce62SKazu Hirata #include <optional>
22566afa0aSRaphael Isemann
23566afa0aSRaphael Isemann using namespace lldb_private;
24566afa0aSRaphael Isemann
isKeyword(llvm::StringRef token) const25566afa0aSRaphael Isemann bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
26*65a2d6d6SKazu Hirata return keywords.contains(token);
27566afa0aSRaphael Isemann }
28566afa0aSRaphael Isemann
ClangHighlighter()29566afa0aSRaphael Isemann ClangHighlighter::ClangHighlighter() {
30566afa0aSRaphael Isemann #define KEYWORD(X, N) keywords.insert(#X);
31566afa0aSRaphael Isemann #include "clang/Basic/TokenKinds.def"
32566afa0aSRaphael Isemann }
33566afa0aSRaphael Isemann
34566afa0aSRaphael Isemann /// Determines which style should be applied to the given token.
35566afa0aSRaphael Isemann /// \param highlighter
36566afa0aSRaphael Isemann /// The current highlighter that should use the style.
37566afa0aSRaphael Isemann /// \param token
38566afa0aSRaphael Isemann /// The current token.
39566afa0aSRaphael Isemann /// \param tok_str
40566afa0aSRaphael Isemann /// The string in the source code the token represents.
41566afa0aSRaphael Isemann /// \param options
42566afa0aSRaphael Isemann /// The style we use for coloring the source code.
43566afa0aSRaphael Isemann /// \param in_pp_directive
44566afa0aSRaphael Isemann /// If we are currently in a preprocessor directive. NOTE: This is
45566afa0aSRaphael Isemann /// passed by reference and will be updated if the current token starts
46566afa0aSRaphael Isemann /// or ends a preprocessor directive.
47566afa0aSRaphael Isemann /// \return
48566afa0aSRaphael Isemann /// The ColorStyle that should be applied to the token.
49566afa0aSRaphael Isemann static HighlightStyle::ColorStyle
determineClangStyle(const ClangHighlighter & highlighter,const clang::Token & token,llvm::StringRef tok_str,const HighlightStyle & options,bool & in_pp_directive)50566afa0aSRaphael Isemann determineClangStyle(const ClangHighlighter &highlighter,
51566afa0aSRaphael Isemann const clang::Token &token, llvm::StringRef tok_str,
52566afa0aSRaphael Isemann const HighlightStyle &options, bool &in_pp_directive) {
53566afa0aSRaphael Isemann using namespace clang;
54566afa0aSRaphael Isemann
55566afa0aSRaphael Isemann if (token.is(tok::comment)) {
56566afa0aSRaphael Isemann // If we were in a preprocessor directive before, we now left it.
57566afa0aSRaphael Isemann in_pp_directive = false;
58566afa0aSRaphael Isemann return options.comment;
59566afa0aSRaphael Isemann } else if (in_pp_directive || token.getKind() == tok::hash) {
60566afa0aSRaphael Isemann // Let's assume that the rest of the line is a PP directive.
61566afa0aSRaphael Isemann in_pp_directive = true;
62566afa0aSRaphael Isemann // Preprocessor directives are hard to match, so we have to hack this in.
63566afa0aSRaphael Isemann return options.pp_directive;
64566afa0aSRaphael Isemann } else if (tok::isStringLiteral(token.getKind()))
65566afa0aSRaphael Isemann return options.string_literal;
66566afa0aSRaphael Isemann else if (tok::isLiteral(token.getKind()))
67566afa0aSRaphael Isemann return options.scalar_literal;
68566afa0aSRaphael Isemann else if (highlighter.isKeyword(tok_str))
69566afa0aSRaphael Isemann return options.keyword;
70566afa0aSRaphael Isemann else
71566afa0aSRaphael Isemann switch (token.getKind()) {
72566afa0aSRaphael Isemann case tok::raw_identifier:
73566afa0aSRaphael Isemann case tok::identifier:
74566afa0aSRaphael Isemann return options.identifier;
75566afa0aSRaphael Isemann case tok::l_brace:
76566afa0aSRaphael Isemann case tok::r_brace:
77566afa0aSRaphael Isemann return options.braces;
78566afa0aSRaphael Isemann case tok::l_square:
79566afa0aSRaphael Isemann case tok::r_square:
80566afa0aSRaphael Isemann return options.square_brackets;
81566afa0aSRaphael Isemann case tok::l_paren:
82566afa0aSRaphael Isemann case tok::r_paren:
83566afa0aSRaphael Isemann return options.parentheses;
84566afa0aSRaphael Isemann case tok::comma:
85566afa0aSRaphael Isemann return options.comma;
86566afa0aSRaphael Isemann case tok::coloncolon:
87566afa0aSRaphael Isemann case tok::colon:
88566afa0aSRaphael Isemann return options.colon;
89566afa0aSRaphael Isemann
90566afa0aSRaphael Isemann case tok::amp:
91566afa0aSRaphael Isemann case tok::ampamp:
92566afa0aSRaphael Isemann case tok::ampequal:
93566afa0aSRaphael Isemann case tok::star:
94566afa0aSRaphael Isemann case tok::starequal:
95566afa0aSRaphael Isemann case tok::plus:
96566afa0aSRaphael Isemann case tok::plusplus:
97566afa0aSRaphael Isemann case tok::plusequal:
98566afa0aSRaphael Isemann case tok::minus:
99566afa0aSRaphael Isemann case tok::arrow:
100566afa0aSRaphael Isemann case tok::minusminus:
101566afa0aSRaphael Isemann case tok::minusequal:
102566afa0aSRaphael Isemann case tok::tilde:
103566afa0aSRaphael Isemann case tok::exclaim:
104566afa0aSRaphael Isemann case tok::exclaimequal:
105566afa0aSRaphael Isemann case tok::slash:
106566afa0aSRaphael Isemann case tok::slashequal:
107566afa0aSRaphael Isemann case tok::percent:
108566afa0aSRaphael Isemann case tok::percentequal:
109566afa0aSRaphael Isemann case tok::less:
110566afa0aSRaphael Isemann case tok::lessless:
111566afa0aSRaphael Isemann case tok::lessequal:
112566afa0aSRaphael Isemann case tok::lesslessequal:
113566afa0aSRaphael Isemann case tok::spaceship:
114566afa0aSRaphael Isemann case tok::greater:
115566afa0aSRaphael Isemann case tok::greatergreater:
116566afa0aSRaphael Isemann case tok::greaterequal:
117566afa0aSRaphael Isemann case tok::greatergreaterequal:
118566afa0aSRaphael Isemann case tok::caret:
119566afa0aSRaphael Isemann case tok::caretequal:
120566afa0aSRaphael Isemann case tok::pipe:
121566afa0aSRaphael Isemann case tok::pipepipe:
122566afa0aSRaphael Isemann case tok::pipeequal:
123566afa0aSRaphael Isemann case tok::question:
124566afa0aSRaphael Isemann case tok::equal:
125566afa0aSRaphael Isemann case tok::equalequal:
126566afa0aSRaphael Isemann return options.operators;
127566afa0aSRaphael Isemann default:
128566afa0aSRaphael Isemann break;
129566afa0aSRaphael Isemann }
130566afa0aSRaphael Isemann return HighlightStyle::ColorStyle();
131566afa0aSRaphael Isemann }
132566afa0aSRaphael Isemann
Highlight(const HighlightStyle & options,llvm::StringRef line,std::optional<size_t> cursor_pos,llvm::StringRef previous_lines,Stream & result) const1332d437f6bSRaphael Isemann void ClangHighlighter::Highlight(const HighlightStyle &options,
134566afa0aSRaphael Isemann llvm::StringRef line,
1352fe83274SKazu Hirata std::optional<size_t> cursor_pos,
136566afa0aSRaphael Isemann llvm::StringRef previous_lines,
137566afa0aSRaphael Isemann Stream &result) const {
138566afa0aSRaphael Isemann using namespace clang;
139566afa0aSRaphael Isemann
140566afa0aSRaphael Isemann FileSystemOptions file_opts;
1419764b65cSJonas Devlieghere FileManager file_mgr(file_opts,
1429764b65cSJonas Devlieghere FileSystem::Instance().GetVirtualFileSystem());
143566afa0aSRaphael Isemann
1448715ffdfSRaphael Isemann // The line might end in a backslash which would cause Clang to drop the
1458715ffdfSRaphael Isemann // backslash and the terminating new line. This makes sense when parsing C++,
1468715ffdfSRaphael Isemann // but when highlighting we care about preserving the backslash/newline. To
1478715ffdfSRaphael Isemann // not lose this information we remove the new line here so that Clang knows
1488715ffdfSRaphael Isemann // this is just a single line we are highlighting. We add back the newline
1498715ffdfSRaphael Isemann // after tokenizing.
1508715ffdfSRaphael Isemann llvm::StringRef line_ending = "";
1518715ffdfSRaphael Isemann // There are a few legal line endings Clang recognizes and we need to
1528715ffdfSRaphael Isemann // temporarily remove from the string.
1538715ffdfSRaphael Isemann if (line.consume_back("\r\n"))
1548715ffdfSRaphael Isemann line_ending = "\r\n";
1558715ffdfSRaphael Isemann else if (line.consume_back("\n"))
1568715ffdfSRaphael Isemann line_ending = "\n";
1578715ffdfSRaphael Isemann else if (line.consume_back("\r"))
1588715ffdfSRaphael Isemann line_ending = "\r";
1598715ffdfSRaphael Isemann
160566afa0aSRaphael Isemann unsigned line_number = previous_lines.count('\n') + 1U;
161566afa0aSRaphael Isemann
162566afa0aSRaphael Isemann // Let's build the actual source code Clang needs and setup some utility
163566afa0aSRaphael Isemann // objects.
164566afa0aSRaphael Isemann std::string full_source = previous_lines.str() + line.str();
165566afa0aSRaphael Isemann llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
166566afa0aSRaphael Isemann llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
167566afa0aSRaphael Isemann new DiagnosticOptions());
168566afa0aSRaphael Isemann DiagnosticsEngine diags(diag_ids, diags_opts);
169566afa0aSRaphael Isemann clang::SourceManager SM(diags, file_mgr);
170566afa0aSRaphael Isemann auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
171566afa0aSRaphael Isemann
172b333d6e1SDuncan P. N. Exon Smith FileID FID = SM.createFileID(buf->getMemBufferRef());
173566afa0aSRaphael Isemann
174566afa0aSRaphael Isemann // Let's just enable the latest ObjC and C++ which should get most tokens
175566afa0aSRaphael Isemann // right.
176566afa0aSRaphael Isemann LangOptions Opts;
177fa98390bSErik Pilkington Opts.ObjC = true;
178fa98390bSErik Pilkington // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
179566afa0aSRaphael Isemann Opts.CPlusPlus17 = true;
180566afa0aSRaphael Isemann Opts.LineComment = true;
181566afa0aSRaphael Isemann
182f96e16bcSDuncan P. N. Exon Smith Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
183566afa0aSRaphael Isemann // The lexer should keep whitespace around.
184566afa0aSRaphael Isemann lex.SetKeepWhitespaceMode(true);
185566afa0aSRaphael Isemann
186566afa0aSRaphael Isemann // Keeps track if we have entered a PP directive.
187566afa0aSRaphael Isemann bool in_pp_directive = false;
188566afa0aSRaphael Isemann
189566afa0aSRaphael Isemann // True once we actually lexed the user provided line.
190566afa0aSRaphael Isemann bool found_user_line = false;
191566afa0aSRaphael Isemann
19220786326SRaphael Isemann // True if we already highlighted the token under the cursor, false otherwise.
19320786326SRaphael Isemann bool highlighted_cursor = false;
194566afa0aSRaphael Isemann Token token;
195566afa0aSRaphael Isemann bool exit = false;
196566afa0aSRaphael Isemann while (!exit) {
197566afa0aSRaphael Isemann // Returns true if this is the last token we get from the lexer.
198566afa0aSRaphael Isemann exit = lex.LexFromRawLexer(token);
199566afa0aSRaphael Isemann
200566afa0aSRaphael Isemann bool invalid = false;
201566afa0aSRaphael Isemann unsigned current_line_number =
202566afa0aSRaphael Isemann SM.getSpellingLineNumber(token.getLocation(), &invalid);
203566afa0aSRaphael Isemann if (current_line_number != line_number)
204566afa0aSRaphael Isemann continue;
205566afa0aSRaphael Isemann found_user_line = true;
206566afa0aSRaphael Isemann
207566afa0aSRaphael Isemann // We don't need to print any tokens without a spelling line number.
208566afa0aSRaphael Isemann if (invalid)
209566afa0aSRaphael Isemann continue;
210566afa0aSRaphael Isemann
211566afa0aSRaphael Isemann // Same as above but with the column number.
212566afa0aSRaphael Isemann invalid = false;
213566afa0aSRaphael Isemann unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
214566afa0aSRaphael Isemann if (invalid)
215566afa0aSRaphael Isemann continue;
216566afa0aSRaphael Isemann // Column numbers start at 1, but indexes in our string start at 0.
217566afa0aSRaphael Isemann --start;
218566afa0aSRaphael Isemann
219566afa0aSRaphael Isemann // Annotations don't have a length, so let's skip them.
220566afa0aSRaphael Isemann if (token.isAnnotation())
221566afa0aSRaphael Isemann continue;
222566afa0aSRaphael Isemann
223566afa0aSRaphael Isemann // Extract the token string from our source code.
224566afa0aSRaphael Isemann llvm::StringRef tok_str = line.substr(start, token.getLength());
225566afa0aSRaphael Isemann
226566afa0aSRaphael Isemann // If the token is just an empty string, we can skip all the work below.
227566afa0aSRaphael Isemann if (tok_str.empty())
228566afa0aSRaphael Isemann continue;
229566afa0aSRaphael Isemann
23020786326SRaphael Isemann // If the cursor is inside this token, we have to apply the 'selected'
23120786326SRaphael Isemann // highlight style before applying the actual token color.
23220786326SRaphael Isemann llvm::StringRef to_print = tok_str;
23320786326SRaphael Isemann StreamString storage;
23420786326SRaphael Isemann auto end = start + token.getLength();
23520786326SRaphael Isemann if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
23620786326SRaphael Isemann highlighted_cursor = true;
23720786326SRaphael Isemann options.selected.Apply(storage, tok_str);
23820786326SRaphael Isemann to_print = storage.GetString();
23920786326SRaphael Isemann }
24020786326SRaphael Isemann
241566afa0aSRaphael Isemann // See how we are supposed to highlight this token.
242566afa0aSRaphael Isemann HighlightStyle::ColorStyle color =
243566afa0aSRaphael Isemann determineClangStyle(*this, token, tok_str, options, in_pp_directive);
244566afa0aSRaphael Isemann
24520786326SRaphael Isemann color.Apply(result, to_print);
246566afa0aSRaphael Isemann }
247566afa0aSRaphael Isemann
2488715ffdfSRaphael Isemann // Add the line ending we trimmed before tokenizing.
2498715ffdfSRaphael Isemann result << line_ending;
2508715ffdfSRaphael Isemann
251566afa0aSRaphael Isemann // If we went over the whole file but couldn't find our own file, then
252566afa0aSRaphael Isemann // somehow our setup was wrong. When we're in release mode we just give the
253566afa0aSRaphael Isemann // user the normal line and pretend we don't know how to highlight it. In
254566afa0aSRaphael Isemann // debug mode we bail out with an assert as this should never happen.
255566afa0aSRaphael Isemann if (!found_user_line) {
256566afa0aSRaphael Isemann result << line;
257566afa0aSRaphael Isemann assert(false && "We couldn't find the user line in the input file?");
258566afa0aSRaphael Isemann }
259566afa0aSRaphael Isemann }
260