xref: /openbsd-src/gnu/llvm/lldb/source/Plugins/Language/ClangCommon/ClangHighlighter.cpp (revision f6aab3d83b51b91c24247ad2c2573574de475a82)
1dda28197Spatrick //===-- ClangHighlighter.cpp ----------------------------------------------===//
2061da546Spatrick //
3061da546Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4061da546Spatrick // See https://llvm.org/LICENSE.txt for license information.
5061da546Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6061da546Spatrick //
7061da546Spatrick //===----------------------------------------------------------------------===//
8061da546Spatrick 
9061da546Spatrick #include "ClangHighlighter.h"
10061da546Spatrick 
11061da546Spatrick #include "lldb/Host/FileSystem.h"
12061da546Spatrick #include "lldb/Target/Language.h"
13061da546Spatrick #include "lldb/Utility/AnsiTerminal.h"
14061da546Spatrick #include "lldb/Utility/StreamString.h"
15061da546Spatrick 
16dda28197Spatrick #include "clang/Basic/FileManager.h"
17061da546Spatrick #include "clang/Basic/SourceManager.h"
18061da546Spatrick #include "clang/Lex/Lexer.h"
19061da546Spatrick #include "llvm/ADT/StringSet.h"
20061da546Spatrick #include "llvm/Support/MemoryBuffer.h"
21*f6aab3d8Srobert #include <optional>
22061da546Spatrick 
23061da546Spatrick using namespace lldb_private;
24061da546Spatrick 
isKeyword(llvm::StringRef token) const25061da546Spatrick bool ClangHighlighter::isKeyword(llvm::StringRef token) const {
26061da546Spatrick   return keywords.find(token) != keywords.end();
27061da546Spatrick }
28061da546Spatrick 
ClangHighlighter()29061da546Spatrick ClangHighlighter::ClangHighlighter() {
30061da546Spatrick #define KEYWORD(X, N) keywords.insert(#X);
31061da546Spatrick #include "clang/Basic/TokenKinds.def"
32061da546Spatrick }
33061da546Spatrick 
34061da546Spatrick /// Determines which style should be applied to the given token.
35061da546Spatrick /// \param highlighter
36061da546Spatrick ///     The current highlighter that should use the style.
37061da546Spatrick /// \param token
38061da546Spatrick ///     The current token.
39061da546Spatrick /// \param tok_str
40061da546Spatrick ///     The string in the source code the token represents.
41061da546Spatrick /// \param options
42061da546Spatrick ///     The style we use for coloring the source code.
43061da546Spatrick /// \param in_pp_directive
44061da546Spatrick ///     If we are currently in a preprocessor directive. NOTE: This is
45061da546Spatrick ///     passed by reference and will be updated if the current token starts
46061da546Spatrick ///     or ends a preprocessor directive.
47061da546Spatrick /// \return
48061da546Spatrick ///     The ColorStyle that should be applied to the token.
49061da546Spatrick static HighlightStyle::ColorStyle
determineClangStyle(const ClangHighlighter & highlighter,const clang::Token & token,llvm::StringRef tok_str,const HighlightStyle & options,bool & in_pp_directive)50061da546Spatrick determineClangStyle(const ClangHighlighter &highlighter,
51061da546Spatrick                     const clang::Token &token, llvm::StringRef tok_str,
52061da546Spatrick                     const HighlightStyle &options, bool &in_pp_directive) {
53061da546Spatrick   using namespace clang;
54061da546Spatrick 
55061da546Spatrick   if (token.is(tok::comment)) {
56061da546Spatrick     // If we were in a preprocessor directive before, we now left it.
57061da546Spatrick     in_pp_directive = false;
58061da546Spatrick     return options.comment;
59061da546Spatrick   } else if (in_pp_directive || token.getKind() == tok::hash) {
60061da546Spatrick     // Let's assume that the rest of the line is a PP directive.
61061da546Spatrick     in_pp_directive = true;
62061da546Spatrick     // Preprocessor directives are hard to match, so we have to hack this in.
63061da546Spatrick     return options.pp_directive;
64061da546Spatrick   } else if (tok::isStringLiteral(token.getKind()))
65061da546Spatrick     return options.string_literal;
66061da546Spatrick   else if (tok::isLiteral(token.getKind()))
67061da546Spatrick     return options.scalar_literal;
68061da546Spatrick   else if (highlighter.isKeyword(tok_str))
69061da546Spatrick     return options.keyword;
70061da546Spatrick   else
71061da546Spatrick     switch (token.getKind()) {
72061da546Spatrick     case tok::raw_identifier:
73061da546Spatrick     case tok::identifier:
74061da546Spatrick       return options.identifier;
75061da546Spatrick     case tok::l_brace:
76061da546Spatrick     case tok::r_brace:
77061da546Spatrick       return options.braces;
78061da546Spatrick     case tok::l_square:
79061da546Spatrick     case tok::r_square:
80061da546Spatrick       return options.square_brackets;
81061da546Spatrick     case tok::l_paren:
82061da546Spatrick     case tok::r_paren:
83061da546Spatrick       return options.parentheses;
84061da546Spatrick     case tok::comma:
85061da546Spatrick       return options.comma;
86061da546Spatrick     case tok::coloncolon:
87061da546Spatrick     case tok::colon:
88061da546Spatrick       return options.colon;
89061da546Spatrick 
90061da546Spatrick     case tok::amp:
91061da546Spatrick     case tok::ampamp:
92061da546Spatrick     case tok::ampequal:
93061da546Spatrick     case tok::star:
94061da546Spatrick     case tok::starequal:
95061da546Spatrick     case tok::plus:
96061da546Spatrick     case tok::plusplus:
97061da546Spatrick     case tok::plusequal:
98061da546Spatrick     case tok::minus:
99061da546Spatrick     case tok::arrow:
100061da546Spatrick     case tok::minusminus:
101061da546Spatrick     case tok::minusequal:
102061da546Spatrick     case tok::tilde:
103061da546Spatrick     case tok::exclaim:
104061da546Spatrick     case tok::exclaimequal:
105061da546Spatrick     case tok::slash:
106061da546Spatrick     case tok::slashequal:
107061da546Spatrick     case tok::percent:
108061da546Spatrick     case tok::percentequal:
109061da546Spatrick     case tok::less:
110061da546Spatrick     case tok::lessless:
111061da546Spatrick     case tok::lessequal:
112061da546Spatrick     case tok::lesslessequal:
113061da546Spatrick     case tok::spaceship:
114061da546Spatrick     case tok::greater:
115061da546Spatrick     case tok::greatergreater:
116061da546Spatrick     case tok::greaterequal:
117061da546Spatrick     case tok::greatergreaterequal:
118061da546Spatrick     case tok::caret:
119061da546Spatrick     case tok::caretequal:
120061da546Spatrick     case tok::pipe:
121061da546Spatrick     case tok::pipepipe:
122061da546Spatrick     case tok::pipeequal:
123061da546Spatrick     case tok::question:
124061da546Spatrick     case tok::equal:
125061da546Spatrick     case tok::equalequal:
126061da546Spatrick       return options.operators;
127061da546Spatrick     default:
128061da546Spatrick       break;
129061da546Spatrick     }
130061da546Spatrick   return HighlightStyle::ColorStyle();
131061da546Spatrick }
132061da546Spatrick 
Highlight(const HighlightStyle & options,llvm::StringRef line,std::optional<size_t> cursor_pos,llvm::StringRef previous_lines,Stream & result) const133061da546Spatrick void ClangHighlighter::Highlight(const HighlightStyle &options,
134061da546Spatrick                                  llvm::StringRef line,
135*f6aab3d8Srobert                                  std::optional<size_t> cursor_pos,
136061da546Spatrick                                  llvm::StringRef previous_lines,
137061da546Spatrick                                  Stream &result) const {
138061da546Spatrick   using namespace clang;
139061da546Spatrick 
140061da546Spatrick   FileSystemOptions file_opts;
141061da546Spatrick   FileManager file_mgr(file_opts,
142061da546Spatrick                        FileSystem::Instance().GetVirtualFileSystem());
143061da546Spatrick 
144061da546Spatrick   // The line might end in a backslash which would cause Clang to drop the
145061da546Spatrick   // backslash and the terminating new line. This makes sense when parsing C++,
146061da546Spatrick   // but when highlighting we care about preserving the backslash/newline. To
147061da546Spatrick   // not lose this information we remove the new line here so that Clang knows
148061da546Spatrick   // this is just a single line we are highlighting. We add back the newline
149061da546Spatrick   // after tokenizing.
150061da546Spatrick   llvm::StringRef line_ending = "";
151061da546Spatrick   // There are a few legal line endings Clang recognizes and we need to
152061da546Spatrick   // temporarily remove from the string.
153061da546Spatrick   if (line.consume_back("\r\n"))
154061da546Spatrick     line_ending = "\r\n";
155061da546Spatrick   else if (line.consume_back("\n"))
156061da546Spatrick     line_ending = "\n";
157061da546Spatrick   else if (line.consume_back("\r"))
158061da546Spatrick     line_ending = "\r";
159061da546Spatrick 
160061da546Spatrick   unsigned line_number = previous_lines.count('\n') + 1U;
161061da546Spatrick 
162061da546Spatrick   // Let's build the actual source code Clang needs and setup some utility
163061da546Spatrick   // objects.
164061da546Spatrick   std::string full_source = previous_lines.str() + line.str();
165061da546Spatrick   llvm::IntrusiveRefCntPtr<DiagnosticIDs> diag_ids(new DiagnosticIDs());
166061da546Spatrick   llvm::IntrusiveRefCntPtr<DiagnosticOptions> diags_opts(
167061da546Spatrick       new DiagnosticOptions());
168061da546Spatrick   DiagnosticsEngine diags(diag_ids, diags_opts);
169061da546Spatrick   clang::SourceManager SM(diags, file_mgr);
170061da546Spatrick   auto buf = llvm::MemoryBuffer::getMemBuffer(full_source);
171061da546Spatrick 
172be691f3bSpatrick   FileID FID = SM.createFileID(buf->getMemBufferRef());
173061da546Spatrick 
174061da546Spatrick   // Let's just enable the latest ObjC and C++ which should get most tokens
175061da546Spatrick   // right.
176061da546Spatrick   LangOptions Opts;
177061da546Spatrick   Opts.ObjC = true;
178061da546Spatrick   // FIXME: This should probably set CPlusPlus, CPlusPlus11, ... too
179061da546Spatrick   Opts.CPlusPlus17 = true;
180061da546Spatrick   Opts.LineComment = true;
181061da546Spatrick 
182be691f3bSpatrick   Lexer lex(FID, buf->getMemBufferRef(), SM, Opts);
183061da546Spatrick   // The lexer should keep whitespace around.
184061da546Spatrick   lex.SetKeepWhitespaceMode(true);
185061da546Spatrick 
186061da546Spatrick   // Keeps track if we have entered a PP directive.
187061da546Spatrick   bool in_pp_directive = false;
188061da546Spatrick 
189061da546Spatrick   // True once we actually lexed the user provided line.
190061da546Spatrick   bool found_user_line = false;
191061da546Spatrick 
192061da546Spatrick   // True if we already highlighted the token under the cursor, false otherwise.
193061da546Spatrick   bool highlighted_cursor = false;
194061da546Spatrick   Token token;
195061da546Spatrick   bool exit = false;
196061da546Spatrick   while (!exit) {
197061da546Spatrick     // Returns true if this is the last token we get from the lexer.
198061da546Spatrick     exit = lex.LexFromRawLexer(token);
199061da546Spatrick 
200061da546Spatrick     bool invalid = false;
201061da546Spatrick     unsigned current_line_number =
202061da546Spatrick         SM.getSpellingLineNumber(token.getLocation(), &invalid);
203061da546Spatrick     if (current_line_number != line_number)
204061da546Spatrick       continue;
205061da546Spatrick     found_user_line = true;
206061da546Spatrick 
207061da546Spatrick     // We don't need to print any tokens without a spelling line number.
208061da546Spatrick     if (invalid)
209061da546Spatrick       continue;
210061da546Spatrick 
211061da546Spatrick     // Same as above but with the column number.
212061da546Spatrick     invalid = false;
213061da546Spatrick     unsigned start = SM.getSpellingColumnNumber(token.getLocation(), &invalid);
214061da546Spatrick     if (invalid)
215061da546Spatrick       continue;
216061da546Spatrick     // Column numbers start at 1, but indexes in our string start at 0.
217061da546Spatrick     --start;
218061da546Spatrick 
219061da546Spatrick     // Annotations don't have a length, so let's skip them.
220061da546Spatrick     if (token.isAnnotation())
221061da546Spatrick       continue;
222061da546Spatrick 
223061da546Spatrick     // Extract the token string from our source code.
224061da546Spatrick     llvm::StringRef tok_str = line.substr(start, token.getLength());
225061da546Spatrick 
226061da546Spatrick     // If the token is just an empty string, we can skip all the work below.
227061da546Spatrick     if (tok_str.empty())
228061da546Spatrick       continue;
229061da546Spatrick 
230061da546Spatrick     // If the cursor is inside this token, we have to apply the 'selected'
231061da546Spatrick     // highlight style before applying the actual token color.
232061da546Spatrick     llvm::StringRef to_print = tok_str;
233061da546Spatrick     StreamString storage;
234061da546Spatrick     auto end = start + token.getLength();
235061da546Spatrick     if (cursor_pos && end > *cursor_pos && !highlighted_cursor) {
236061da546Spatrick       highlighted_cursor = true;
237061da546Spatrick       options.selected.Apply(storage, tok_str);
238061da546Spatrick       to_print = storage.GetString();
239061da546Spatrick     }
240061da546Spatrick 
241061da546Spatrick     // See how we are supposed to highlight this token.
242061da546Spatrick     HighlightStyle::ColorStyle color =
243061da546Spatrick         determineClangStyle(*this, token, tok_str, options, in_pp_directive);
244061da546Spatrick 
245061da546Spatrick     color.Apply(result, to_print);
246061da546Spatrick   }
247061da546Spatrick 
248061da546Spatrick   // Add the line ending we trimmed before tokenizing.
249061da546Spatrick   result << line_ending;
250061da546Spatrick 
251061da546Spatrick   // If we went over the whole file but couldn't find our own file, then
252061da546Spatrick   // somehow our setup was wrong. When we're in release mode we just give the
253061da546Spatrick   // user the normal line and pretend we don't know how to highlight it. In
254061da546Spatrick   // debug mode we bail out with an assert as this should never happen.
255061da546Spatrick   if (!found_user_line) {
256061da546Spatrick     result << line;
257061da546Spatrick     assert(false && "We couldn't find the user line in the input file?");
258061da546Spatrick   }
259061da546Spatrick }
260