1dda28197Spatrick //===-- ClangHighlighter.cpp ----------------------------------------------===//
2061da546Spatrick //
3061da546Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4061da546Spatrick // See https://llvm.org/LICENSE.txt for license information.
5061da546Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6061da546Spatrick //
7061da546Spatrick //===----------------------------------------------------------------------===//
8061da546Spatrick
9061da546Spatrick #include "ClangHighlighter.h"
10061da546Spatrick
11061da546Spatrick #include "lldb/Host/FileSystem.h"
12061da546Spatrick #include "lldb/Target/Language.h"
13061da546Spatrick #include "lldb/Utility/AnsiTerminal.h"
14061da546Spatrick #include "lldb/Utility/StreamString.h"
15061da546Spatrick
16dda28197Spatrick #include "clang/Basic/FileManager.h"
17061da546Spatrick #include "clang/Basic/SourceManager.h"
18061da546Spatrick #include "clang/Lex/Lexer.h"
19061da546Spatrick #include "llvm/ADT/StringSet.h"
20061da546Spatrick #include "llvm/Support/MemoryBuffer.h"
21*f6aab3d8Srobert #include <optional>
22061da546Spatrick
23061da546Spatrick using namespace lldb_private;
24061da546Spatrick
isKeyword(llvm::StringRef token) const25061da546Spatrick bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
26061da546Spatrick return keywords.find(token) != keywords.end();
27061da546Spatrick }
28061da546Spatrick
ClangHighlighter()29061da546Spatrick ClangHighlighter::ClangHighlighter() {
30061da546Spatrick #define KEYWORD(X, N) keywords.insert(#X);
31061da546Spatrick #include "clang/Basic/TokenKinds.def"
32061da546Spatrick }
33061da546Spatrick
34061da546Spatrick /// Determines which style should be applied to the given token.
35061da546Spatrick /// \param highlighter
36061da546Spatrick /// The current highlighter that should use the style.
37061da546Spatrick /// \param token
38061da546Spatrick /// The current token.
39061da546Spatrick /// \param tok_str
40061da546Spatrick /// The string in the source code the token represents.
41061da546Spatrick /// \param options
42061da546Spatrick /// The style we use for coloring the source code.
43061da546Spatrick /// \param in_pp_directive
44061da546Spatrick /// If we are currently in a preprocessor directive. NOTE: This is
45061da546Spatrick /// passed by reference and will be updated if the current token starts
46061da546Spatrick /// or ends a preprocessor directive.
47061da546Spatrick /// \return
48061da546Spatrick /// The ColorStyle that should be applied to the token.
49061da546Spatrick static HighlightStyle::ColorStyle
determineClangStyle(const ClangHighlighter & highlighter,const clang::Token & token,llvm::StringRef tok_str,const HighlightStyle & options,bool & in_pp_directive)50061da546Spatrick determineClangStyle(const ClangHighlighter &highlighter,
51061da546Spatrick const clang::Token &token, llvm::StringRef tok_str,
52061da546Spatrick const HighlightStyle &options, bool &in_pp_directive) {
53061da546Spatrick using namespace clang;
54061da546Spatrick
55061da546Spatrick if (token.is(tok::comment)) {
56061da546Spatrick // If we were in a preprocessor directive before, we now left it.
57061da546Spatrick in_pp_directive = false;
58061da546Spatrick return options.comment;
59061da546Spatrick } else if (in_pp_directive || token.getKind() == tok::hash) {
60061da546Spatrick // Let's assume that the rest of the line is a PP directive.
61061da546Spatrick in_pp_directive = true;
62061da546Spatrick // Preprocessor directives are hard to match, so we have to hack this in.
63061da546Spatrick return options.pp_directive;
64061da546Spatrick } else if (tok::isStringLiteral(token.getKind()))
65061da546Spatrick return options.string_literal;
66061da546Spatrick else if (tok::isLiteral(token.getKind()))
67061da546Spatrick return options.scalar_literal;
68061da546Spatrick else if (highlighter.isKeyword(tok_str))
69061da546Spatrick return options.keyword;
70061da546Spatrick else
71061da546Spatrick switch (token.getKind()) {
72061da546Spatrick case tok::raw_identifier:
73061da546Spatrick case tok::identifier:
74061da546Spatrick return options.identifier;
75061da546Spatrick case tok::l_brace:
76061da546Spatrick case tok::r_brace:
77061da546Spatrick return options.braces;
78061da546Spatrick case tok::l_square:
79061da546Spatrick case tok::r_square:
80061da546Spatrick return options.square_brackets;
81061da546Spatrick case tok::l_paren:
82061da546Spatrick case tok::r_paren:
83061da546Spatrick return options.parentheses;
84061da546Spatrick case tok::comma:
85061da546Spatrick return options.comma;
86061da546Spatrick case tok::coloncolon:
87061da546Spatrick case tok::colon:
88061da546Spatrick return options.colon;
89061da546Spatrick
90061da546Spatrick case tok::amp:
91061da546Spatrick case tok::ampamp:
92061da546Spatrick case tok::ampequal:
93061da546Spatrick case tok::star:
94061da546Spatrick case tok::starequal:
95061da546Spatrick case tok::plus:
96061da546Spatrick case tok::plusplus:
97061da546Spatrick case tok::plusequal:
98061da546Spatrick case tok::minus:
99061da546Spatrick case tok::arrow:
100061da546Spatrick case tok::minusminus:
101061da546Spatrick case tok::minusequal:
102061da546Spatrick case tok::tilde:
103061da546Spatrick case tok::exclaim:
104061da546Spatrick case tok::exclaimequal:
105061da546Spatrick case tok::slash:
106061da546Spatrick case tok::slashequal:
107061da546Spatrick case tok::percent:
108061da546Spatrick case tok::percentequal:
109061da546Spatrick case tok::less:
110061da546Spatrick case tok::lessless:
111061da546Spatrick case tok::lessequal:
112061da546Spatrick case tok::lesslessequal:
113061da546Spatrick case tok::spaceship:
114061da546Spatrick case tok::greater:
115061da546Spatrick case tok::greatergreater:
116061da546Spatrick case tok::greaterequal:
117061da546Spatrick case tok::greatergreaterequal:
118061da546Spatrick case tok::caret:
119061da546Spatrick case tok::caretequal:
120061da546Spatrick case tok::pipe:
121061da546Spatrick case tok::pipepipe:
122061da546Spatrick case tok::pipeequal:
123061da546Spatrick case tok::question:
124061da546Spatrick case tok::equal:
125061da546Spatrick case tok::equalequal:
126061da546Spatrick return options.operators;
127061da546Spatrick default:
128061da546Spatrick break;
129061da546Spatrick }
130061da546Spatrick return HighlightStyle::ColorStyle();
131061da546Spatrick }
132061da546Spatrick
Highlight(const HighlightStyle & options,llvm::StringRef line,std::optional<size_t> cursor_pos,llvm::StringRef previous_lines,Stream & result) const133061da546Spatrick void ClangHighlighter::Highlight(const HighlightStyle &options,
134061da546Spatrick llvm::StringRef line,
135*f6aab3d8Srobert std::optional<size_t> cursor_pos,
136061da546Spatrick llvm::StringRef previous_lines,
137061da546Spatrick Stream &result) const {
138061da546Spatrick using namespace clang;
139061da546Spatrick
140061da546Spatrick FileSystemOptions file_opts;
141061da546Spatrick FileManager file_mgr(file_opts,
142061da546Spatrick FileSystem::Instance().GetVirtualFileSystem());
143061da546Spatrick
144061da546Spatrick // The line might end in a backslash which would cause Clang to drop the
145061da546Spatrick // backslash and the terminating new line. This makes sense when parsing C++,
146061da546Spatrick // but when highlighting we care about preserving the backslash/newline. To
147061da546Spatrick // not lose this information we remove the new line here so that Clang knows
148061da546Spatrick // this is just a single line we are highlighting. We add back the newline
149061da546Spatrick // after tokenizing.
150061da546Spatrick llvm::StringRef line_ending = "";
151061da546Spatrick // There are a few legal line endings Clang recognizes and we need to
152061da546Spatrick // temporarily remove from the string.
153061da546Spatrick if (line.consume_back("\r\n"))
154061da546Spatrick line_ending = "\r\n";
155061da546Spatrick else if (line.consume_back("\n"))
156061da546Spatrick line_ending = "\n";
157061da546Spatrick else if (line.consume_back("\r"))
158061da546Spatrick line_ending = "\r";
159061da546Spatrick
160061da546Spatrick unsigned line_number = previous_lines.count('\n') + 1U;
161061da546Spatrick
162061da546Spatrick // Let's build the actual source code Clang needs and setup some utility
163061da546Spatrick // objects.
164061da546Spatrick std::string full_source = previous_lines.str() + line.str();
165061da546Spatrick llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
166061da546Spatrick llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
167061da546Spatrick new DiagnosticOptions());
168061da546Spatrick DiagnosticsEngine diags(diag_ids, diags_opts);
169061da546Spatrick clang::SourceManager SM(diags, file_mgr);
170061da546Spatrick auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
171061da546Spatrick
172be691f3bSpatrick FileID FID = SM.createFileID(buf->getMemBufferRef());
173061da546Spatrick
174061da546Spatrick // Let's just enable the latest ObjC and C++ which should get most tokens
175061da546Spatrick // right.
176061da546Spatrick LangOptions Opts;
177061da546Spatrick Opts.ObjC = true;
178061da546Spatrick // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
179061da546Spatrick Opts.CPlusPlus17 = true;
180061da546Spatrick Opts.LineComment = true;
181061da546Spatrick
182be691f3bSpatrick Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
183061da546Spatrick // The lexer should keep whitespace around.
184061da546Spatrick lex.SetKeepWhitespaceMode(true);
185061da546Spatrick
186061da546Spatrick // Keeps track if we have entered a PP directive.
187061da546Spatrick bool in_pp_directive = false;
188061da546Spatrick
189061da546Spatrick // True once we actually lexed the user provided line.
190061da546Spatrick bool found_user_line = false;
191061da546Spatrick
192061da546Spatrick // True if we already highlighted the token under the cursor, false otherwise.
193061da546Spatrick bool highlighted_cursor = false;
194061da546Spatrick Token token;
195061da546Spatrick bool exit = false;
196061da546Spatrick while (!exit) {
197061da546Spatrick // Returns true if this is the last token we get from the lexer.
198061da546Spatrick exit = lex.LexFromRawLexer(token);
199061da546Spatrick
200061da546Spatrick bool invalid = false;
201061da546Spatrick unsigned current_line_number =
202061da546Spatrick SM.getSpellingLineNumber(token.getLocation(), &invalid);
203061da546Spatrick if (current_line_number != line_number)
204061da546Spatrick continue;
205061da546Spatrick found_user_line = true;
206061da546Spatrick
207061da546Spatrick // We don't need to print any tokens without a spelling line number.
208061da546Spatrick if (invalid)
209061da546Spatrick continue;
210061da546Spatrick
211061da546Spatrick // Same as above but with the column number.
212061da546Spatrick invalid = false;
213061da546Spatrick unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
214061da546Spatrick if (invalid)
215061da546Spatrick continue;
216061da546Spatrick // Column numbers start at 1, but indexes in our string start at 0.
217061da546Spatrick --start;
218061da546Spatrick
219061da546Spatrick // Annotations don't have a length, so let's skip them.
220061da546Spatrick if (token.isAnnotation())
221061da546Spatrick continue;
222061da546Spatrick
223061da546Spatrick // Extract the token string from our source code.
224061da546Spatrick llvm::StringRef tok_str = line.substr(start, token.getLength());
225061da546Spatrick
226061da546Spatrick // If the token is just an empty string, we can skip all the work below.
227061da546Spatrick if (tok_str.empty())
228061da546Spatrick continue;
229061da546Spatrick
230061da546Spatrick // If the cursor is inside this token, we have to apply the 'selected'
231061da546Spatrick // highlight style before applying the actual token color.
232061da546Spatrick llvm::StringRef to_print = tok_str;
233061da546Spatrick StreamString storage;
234061da546Spatrick auto end = start + token.getLength();
235061da546Spatrick if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
236061da546Spatrick highlighted_cursor = true;
237061da546Spatrick options.selected.Apply(storage, tok_str);
238061da546Spatrick to_print = storage.GetString();
239061da546Spatrick }
240061da546Spatrick
241061da546Spatrick // See how we are supposed to highlight this token.
242061da546Spatrick HighlightStyle::ColorStyle color =
243061da546Spatrick determineClangStyle(*this, token, tok_str, options, in_pp_directive);
244061da546Spatrick
245061da546Spatrick color.Apply(result, to_print);
246061da546Spatrick }
247061da546Spatrick
248061da546Spatrick // Add the line ending we trimmed before tokenizing.
249061da546Spatrick result << line_ending;
250061da546Spatrick
251061da546Spatrick // If we went over the whole file but couldn't find our own file, then
252061da546Spatrick // somehow our setup was wrong. When we're in release mode we just give the
253061da546Spatrick // user the normal line and pretend we don't know how to highlight it. In
254061da546Spatrick // debug mode we bail out with an assert as this should never happen.
255061da546Spatrick if (!found_user_line) {
256061da546Spatrick result << line;
257061da546Spatrick assert(false && "We couldn't find the user line in the input file?");
258061da546Spatrick }
259061da546Spatrick }
260