xref: /llvm-project/clang/unittests/AST/CommentLexer.cpp (revision 1b3a78d1d534550b2f85a81b2e9ac6d7a94a478e)
1 //===- unittests/AST/CommentLexer.cpp ------ Comment lexer tests ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "clang/AST/CommentLexer.h"
10 #include "clang/AST/CommentCommandTraits.h"
11 #include "clang/Basic/CommentOptions.h"
12 #include "clang/Basic/Diagnostic.h"
13 #include "clang/Basic/DiagnosticOptions.h"
14 #include "clang/Basic/FileManager.h"
15 #include "clang/Basic/SourceManager.h"
16 #include "llvm/ADT/STLExtras.h"
17 #include "gtest/gtest.h"
18 #include <vector>
19 
20 using namespace llvm;
21 using namespace clang;
22 
23 namespace clang {
24 namespace comments {
25 
26 namespace {
27 class CommentLexerTest : public ::testing::Test {
28 protected:
CommentLexerTest()29   CommentLexerTest()
30     : FileMgr(FileMgrOpts),
31       DiagID(new DiagnosticIDs()),
32       Diags(DiagID, new DiagnosticOptions, new IgnoringDiagConsumer()),
33       SourceMgr(Diags, FileMgr),
34       Traits(Allocator, CommentOptions()) {
35   }
36 
37   FileSystemOptions FileMgrOpts;
38   FileManager FileMgr;
39   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
40   DiagnosticsEngine Diags;
41   SourceManager SourceMgr;
42   llvm::BumpPtrAllocator Allocator;
43   CommandTraits Traits;
44 
45   void lexString(const char *Source, std::vector<Token> &Toks);
46 
getCommandName(const Token & Tok)47   StringRef getCommandName(const Token &Tok) {
48     return Traits.getCommandInfo(Tok.getCommandID())->Name;
49   }
50 
getVerbatimBlockName(const Token & Tok)51   StringRef getVerbatimBlockName(const Token &Tok) {
52     return Traits.getCommandInfo(Tok.getVerbatimBlockID())->Name;
53   }
54 
getVerbatimLineName(const Token & Tok)55   StringRef getVerbatimLineName(const Token &Tok) {
56     return Traits.getCommandInfo(Tok.getVerbatimLineID())->Name;
57   }
58 };
59 
lexString(const char * Source,std::vector<Token> & Toks)60 void CommentLexerTest::lexString(const char *Source,
61                                  std::vector<Token> &Toks) {
62   std::unique_ptr<MemoryBuffer> Buf = MemoryBuffer::getMemBuffer(Source);
63   FileID File = SourceMgr.createFileID(std::move(Buf));
64   SourceLocation Begin = SourceMgr.getLocForStartOfFile(File);
65 
66   Lexer L(Allocator, Diags, Traits, Begin, Source, Source + strlen(Source));
67 
68   while (1) {
69     Token Tok;
70     L.lex(Tok);
71     if (Tok.is(tok::eof))
72       break;
73     Toks.push_back(Tok);
74   }
75 }
76 
77 } // unnamed namespace
78 
79 // Empty source range should be handled.
TEST_F(CommentLexerTest,Basic1)80 TEST_F(CommentLexerTest, Basic1) {
81   const char *Source = "";
82   std::vector<Token> Toks;
83 
84   lexString(Source, Toks);
85 
86   ASSERT_EQ(0U, Toks.size());
87 }
88 
89 // Empty comments should be handled.
TEST_F(CommentLexerTest,Basic2)90 TEST_F(CommentLexerTest, Basic2) {
91   const char *Sources[] = {
92     "//", "///", "//!", "///<", "//!<"
93   };
94   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
95     std::vector<Token> Toks;
96 
97     lexString(Sources[i], Toks);
98 
99     ASSERT_EQ(1U, Toks.size());
100 
101     ASSERT_EQ(tok::newline, Toks[0].getKind());
102   }
103 }
104 
105 // Empty comments should be handled.
TEST_F(CommentLexerTest,Basic3)106 TEST_F(CommentLexerTest, Basic3) {
107   const char *Sources[] = {
108     "/**/", "/***/", "/*!*/", "/**<*/", "/*!<*/"
109   };
110   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
111     std::vector<Token> Toks;
112 
113     lexString(Sources[i], Toks);
114 
115     ASSERT_EQ(2U, Toks.size());
116 
117     ASSERT_EQ(tok::newline, Toks[0].getKind());
118     ASSERT_EQ(tok::newline, Toks[1].getKind());
119   }
120 }
121 
122 // Single comment with plain text.
TEST_F(CommentLexerTest,Basic4)123 TEST_F(CommentLexerTest, Basic4) {
124   const char *Sources[] = {
125     "// Meow",   "/// Meow",    "//! Meow",
126     "// Meow\n", "// Meow\r\n", "//! Meow\r",
127   };
128 
129   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
130     std::vector<Token> Toks;
131 
132     lexString(Sources[i], Toks);
133 
134     ASSERT_EQ(2U, Toks.size());
135 
136     ASSERT_EQ(tok::text,          Toks[0].getKind());
137     ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
138 
139     ASSERT_EQ(tok::newline,       Toks[1].getKind());
140   }
141 }
142 
143 // Single comment with plain text.
TEST_F(CommentLexerTest,Basic5)144 TEST_F(CommentLexerTest, Basic5) {
145   const char *Sources[] = {
146     "/* Meow*/", "/** Meow*/",  "/*! Meow*/"
147   };
148 
149   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
150     std::vector<Token> Toks;
151 
152     lexString(Sources[i], Toks);
153 
154     ASSERT_EQ(3U, Toks.size());
155 
156     ASSERT_EQ(tok::text,          Toks[0].getKind());
157     ASSERT_EQ(StringRef(" Meow"), Toks[0].getText());
158 
159     ASSERT_EQ(tok::newline,       Toks[1].getKind());
160     ASSERT_EQ(tok::newline,       Toks[2].getKind());
161   }
162 }
163 
164 // Test newline escaping.
TEST_F(CommentLexerTest,Basic6)165 TEST_F(CommentLexerTest, Basic6) {
166   const char *Sources[] = {
167     "// Aaa\\\n"   " Bbb\\ \n"   " Ccc?" "?/\n",
168     "// Aaa\\\r\n" " Bbb\\ \r\n" " Ccc?" "?/\r\n",
169     "// Aaa\\\r"   " Bbb\\ \r"   " Ccc?" "?/\r"
170   };
171 
172   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
173     std::vector<Token> Toks;
174 
175     lexString(Sources[i], Toks);
176 
177     ASSERT_EQ(10U, Toks.size());
178 
179     ASSERT_EQ(tok::text,         Toks[0].getKind());
180     ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
181     ASSERT_EQ(tok::text,         Toks[1].getKind());
182     ASSERT_EQ(StringRef("\\"),   Toks[1].getText());
183     ASSERT_EQ(tok::newline,      Toks[2].getKind());
184 
185     ASSERT_EQ(tok::text,         Toks[3].getKind());
186     ASSERT_EQ(StringRef(" Bbb"), Toks[3].getText());
187     ASSERT_EQ(tok::text,         Toks[4].getKind());
188     ASSERT_EQ(StringRef("\\"),   Toks[4].getText());
189     ASSERT_EQ(tok::text,         Toks[5].getKind());
190     ASSERT_EQ(StringRef(" "),    Toks[5].getText());
191     ASSERT_EQ(tok::newline,      Toks[6].getKind());
192 
193     ASSERT_EQ(tok::text,         Toks[7].getKind());
194     ASSERT_EQ(StringRef(" Ccc?" "?/"), Toks[7].getText());
195     ASSERT_EQ(tok::newline,      Toks[8].getKind());
196 
197     ASSERT_EQ(tok::newline,      Toks[9].getKind());
198   }
199 }
200 
201 // Check that we skip C-style aligned stars correctly.
TEST_F(CommentLexerTest,Basic7)202 TEST_F(CommentLexerTest, Basic7) {
203   const char *Source =
204     "/* Aaa\n"
205     " * Bbb\r\n"
206     "\t* Ccc\n"
207     "  ! Ddd\n"
208     "  * Eee\n"
209     "  ** Fff\n"
210     " */";
211   std::vector<Token> Toks;
212 
213   lexString(Source, Toks);
214 
215   ASSERT_EQ(15U, Toks.size());
216 
217   ASSERT_EQ(tok::text,         Toks[0].getKind());
218   ASSERT_EQ(StringRef(" Aaa"), Toks[0].getText());
219   ASSERT_EQ(tok::newline,      Toks[1].getKind());
220 
221   ASSERT_EQ(tok::text,         Toks[2].getKind());
222   ASSERT_EQ(StringRef(" Bbb"), Toks[2].getText());
223   ASSERT_EQ(tok::newline,      Toks[3].getKind());
224 
225   ASSERT_EQ(tok::text,         Toks[4].getKind());
226   ASSERT_EQ(StringRef(" Ccc"), Toks[4].getText());
227   ASSERT_EQ(tok::newline,      Toks[5].getKind());
228 
229   ASSERT_EQ(tok::text,            Toks[6].getKind());
230   ASSERT_EQ(StringRef("  ! Ddd"), Toks[6].getText());
231   ASSERT_EQ(tok::newline,         Toks[7].getKind());
232 
233   ASSERT_EQ(tok::text,         Toks[8].getKind());
234   ASSERT_EQ(StringRef(" Eee"), Toks[8].getText());
235   ASSERT_EQ(tok::newline,      Toks[9].getKind());
236 
237   ASSERT_EQ(tok::text,          Toks[10].getKind());
238   ASSERT_EQ(StringRef("* Fff"), Toks[10].getText());
239   ASSERT_EQ(tok::newline,       Toks[11].getKind());
240 
241   ASSERT_EQ(tok::text,         Toks[12].getKind());
242   ASSERT_EQ(StringRef(" "),    Toks[12].getText());
243 
244   ASSERT_EQ(tok::newline,      Toks[13].getKind());
245   ASSERT_EQ(tok::newline,      Toks[14].getKind());
246 }
247 
248 // A command marker followed by comment end.
TEST_F(CommentLexerTest,DoxygenCommand1)249 TEST_F(CommentLexerTest, DoxygenCommand1) {
250   const char *Sources[] = { "//@", "///@", "//!@" };
251   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
252     std::vector<Token> Toks;
253 
254     lexString(Sources[i], Toks);
255 
256     ASSERT_EQ(2U, Toks.size());
257 
258     ASSERT_EQ(tok::text,          Toks[0].getKind());
259     ASSERT_EQ(StringRef("@"),     Toks[0].getText());
260 
261     ASSERT_EQ(tok::newline,       Toks[1].getKind());
262   }
263 }
264 
265 // A command marker followed by comment end.
TEST_F(CommentLexerTest,DoxygenCommand2)266 TEST_F(CommentLexerTest, DoxygenCommand2) {
267   const char *Sources[] = { "/*@*/", "/**@*/", "/*!@*/"};
268   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
269     std::vector<Token> Toks;
270 
271     lexString(Sources[i], Toks);
272 
273     ASSERT_EQ(3U, Toks.size());
274 
275     ASSERT_EQ(tok::text,          Toks[0].getKind());
276     ASSERT_EQ(StringRef("@"),     Toks[0].getText());
277 
278     ASSERT_EQ(tok::newline,       Toks[1].getKind());
279     ASSERT_EQ(tok::newline,       Toks[2].getKind());
280   }
281 }
282 
283 // A command marker followed by comment end.
TEST_F(CommentLexerTest,DoxygenCommand3)284 TEST_F(CommentLexerTest, DoxygenCommand3) {
285   const char *Sources[] = { "/*\\*/", "/**\\*/" };
286   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
287     std::vector<Token> Toks;
288 
289     lexString(Sources[i], Toks);
290 
291     ASSERT_EQ(3U, Toks.size());
292 
293     ASSERT_EQ(tok::text,           Toks[0].getKind());
294     ASSERT_EQ(StringRef("\\"),     Toks[0].getText());
295 
296     ASSERT_EQ(tok::newline,        Toks[1].getKind());
297     ASSERT_EQ(tok::newline,        Toks[2].getKind());
298   }
299 }
300 
301 // Doxygen escape sequences.
TEST_F(CommentLexerTest,DoxygenCommand4)302 TEST_F(CommentLexerTest, DoxygenCommand4) {
303   const char *Sources[] = {
304     "/// \\\\ \\@ \\& \\$ \\# \\< \\> \\% \\\" \\. \\::",
305     "/// @\\ @@ @& @$ @# @< @> @% @\" @. @::"
306   };
307   const char *Text[] = {
308     " ",
309     "\\", " ", "@", " ", "&", " ", "$",  " ", "#", " ",
310     "<",  " ", ">", " ", "%", " ", "\"", " ", ".", " ",
311     "::", ""
312   };
313 
314   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
315     std::vector<Token> Toks;
316 
317     lexString(Sources[i], Toks);
318 
319     ASSERT_EQ(std::size(Text), Toks.size());
320 
321     for (size_t j = 0, e = Toks.size(); j != e; j++) {
322       if(Toks[j].is(tok::text)) {
323         ASSERT_EQ(StringRef(Text[j]), Toks[j].getText())
324           << "index " << i;
325       }
326     }
327   }
328 }
329 
330 // A command marker followed by a non-letter that is not a part of an escape
331 // sequence.
TEST_F(CommentLexerTest,DoxygenCommand5)332 TEST_F(CommentLexerTest, DoxygenCommand5) {
333   const char *Source = "/// \\^ \\0";
334   std::vector<Token> Toks;
335 
336   lexString(Source, Toks);
337 
338   ASSERT_EQ(6U, Toks.size());
339 
340   ASSERT_EQ(tok::text,       Toks[0].getKind());
341   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
342 
343   ASSERT_EQ(tok::text,       Toks[1].getKind());
344   ASSERT_EQ(StringRef("\\"), Toks[1].getText());
345 
346   ASSERT_EQ(tok::text,       Toks[2].getKind());
347   ASSERT_EQ(StringRef("^ "), Toks[2].getText());
348 
349   ASSERT_EQ(tok::text,       Toks[3].getKind());
350   ASSERT_EQ(StringRef("\\"), Toks[3].getText());
351 
352   ASSERT_EQ(tok::text,       Toks[4].getKind());
353   ASSERT_EQ(StringRef("0"),  Toks[4].getText());
354 
355   ASSERT_EQ(tok::newline,    Toks[5].getKind());
356 }
357 
TEST_F(CommentLexerTest,DoxygenCommand6)358 TEST_F(CommentLexerTest, DoxygenCommand6) {
359   const char *Source = "/// \\brief Aaa.";
360   std::vector<Token> Toks;
361 
362   lexString(Source, Toks);
363 
364   ASSERT_EQ(4U, Toks.size());
365 
366   ASSERT_EQ(tok::text,          Toks[0].getKind());
367   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
368 
369   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
370   ASSERT_EQ(StringRef("brief"), getCommandName(Toks[1]));
371 
372   ASSERT_EQ(tok::text,          Toks[2].getKind());
373   ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
374 
375   ASSERT_EQ(tok::newline,       Toks[3].getKind());
376 }
377 
TEST_F(CommentLexerTest,DoxygenCommand7)378 TEST_F(CommentLexerTest, DoxygenCommand7) {
379   const char *Source = "/// \\em\\em \\em\t\\em\n";
380   std::vector<Token> Toks;
381 
382   lexString(Source, Toks);
383 
384   ASSERT_EQ(8U, Toks.size());
385 
386   ASSERT_EQ(tok::text,       Toks[0].getKind());
387   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
388 
389   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
390   ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
391 
392   ASSERT_EQ(tok::backslash_command, Toks[2].getKind());
393   ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
394 
395   ASSERT_EQ(tok::text,       Toks[3].getKind());
396   ASSERT_EQ(StringRef(" "),  Toks[3].getText());
397 
398   ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
399   ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
400 
401   ASSERT_EQ(tok::text,       Toks[5].getKind());
402   ASSERT_EQ(StringRef("\t"), Toks[5].getText());
403 
404   ASSERT_EQ(tok::backslash_command, Toks[6].getKind());
405   ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
406 
407   ASSERT_EQ(tok::newline,    Toks[7].getKind());
408 }
409 
TEST_F(CommentLexerTest,DoxygenCommand8)410 TEST_F(CommentLexerTest, DoxygenCommand8) {
411   const char *Source = "/// @em@em @em\t@em\n";
412   std::vector<Token> Toks;
413 
414   lexString(Source, Toks);
415 
416   ASSERT_EQ(8U, Toks.size());
417 
418   ASSERT_EQ(tok::text,       Toks[0].getKind());
419   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
420 
421   ASSERT_EQ(tok::at_command, Toks[1].getKind());
422   ASSERT_EQ(StringRef("em"), getCommandName(Toks[1]));
423 
424   ASSERT_EQ(tok::at_command, Toks[2].getKind());
425   ASSERT_EQ(StringRef("em"), getCommandName(Toks[2]));
426 
427   ASSERT_EQ(tok::text,       Toks[3].getKind());
428   ASSERT_EQ(StringRef(" "),  Toks[3].getText());
429 
430   ASSERT_EQ(tok::at_command, Toks[4].getKind());
431   ASSERT_EQ(StringRef("em"), getCommandName(Toks[4]));
432 
433   ASSERT_EQ(tok::text,       Toks[5].getKind());
434   ASSERT_EQ(StringRef("\t"), Toks[5].getText());
435 
436   ASSERT_EQ(tok::at_command, Toks[6].getKind());
437   ASSERT_EQ(StringRef("em"), getCommandName(Toks[6]));
438 
439   ASSERT_EQ(tok::newline,    Toks[7].getKind());
440 }
441 
TEST_F(CommentLexerTest,DoxygenCommand9)442 TEST_F(CommentLexerTest, DoxygenCommand9) {
443   const char *Source = "/// \\aaa\\bbb \\ccc\t\\ddd\n";
444   std::vector<Token> Toks;
445 
446   lexString(Source, Toks);
447 
448   ASSERT_EQ(8U, Toks.size());
449 
450   ASSERT_EQ(tok::text,        Toks[0].getKind());
451   ASSERT_EQ(StringRef(" "),   Toks[0].getText());
452 
453   ASSERT_EQ(tok::unknown_command, Toks[1].getKind());
454   ASSERT_EQ(StringRef("aaa"), Toks[1].getUnknownCommandName());
455 
456   ASSERT_EQ(tok::unknown_command, Toks[2].getKind());
457   ASSERT_EQ(StringRef("bbb"), Toks[2].getUnknownCommandName());
458 
459   ASSERT_EQ(tok::text,        Toks[3].getKind());
460   ASSERT_EQ(StringRef(" "),   Toks[3].getText());
461 
462   ASSERT_EQ(tok::unknown_command, Toks[4].getKind());
463   ASSERT_EQ(StringRef("ccc"), Toks[4].getUnknownCommandName());
464 
465   ASSERT_EQ(tok::text,        Toks[5].getKind());
466   ASSERT_EQ(StringRef("\t"),  Toks[5].getText());
467 
468   ASSERT_EQ(tok::unknown_command, Toks[6].getKind());
469   ASSERT_EQ(StringRef("ddd"), Toks[6].getUnknownCommandName());
470 
471   ASSERT_EQ(tok::newline,     Toks[7].getKind());
472 }
473 
TEST_F(CommentLexerTest,DoxygenCommand10)474 TEST_F(CommentLexerTest, DoxygenCommand10) {
475   const char *Source = "// \\c\n";
476   std::vector<Token> Toks;
477 
478   lexString(Source, Toks);
479 
480   ASSERT_EQ(3U, Toks.size());
481 
482   ASSERT_EQ(tok::text,      Toks[0].getKind());
483   ASSERT_EQ(StringRef(" "), Toks[0].getText());
484 
485   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
486   ASSERT_EQ(StringRef("c"), getCommandName(Toks[1]));
487 
488   ASSERT_EQ(tok::newline,   Toks[2].getKind());
489 }
490 
TEST_F(CommentLexerTest,RegisterCustomBlockCommand)491 TEST_F(CommentLexerTest, RegisterCustomBlockCommand) {
492   const char *Source =
493     "/// \\NewBlockCommand Aaa.\n"
494     "/// @NewBlockCommand Aaa.\n";
495 
496   Traits.registerBlockCommand(StringRef("NewBlockCommand"));
497 
498   std::vector<Token> Toks;
499 
500   lexString(Source, Toks);
501 
502   ASSERT_EQ(8U, Toks.size());
503 
504   ASSERT_EQ(tok::text,          Toks[0].getKind());
505   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
506 
507   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
508   ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[1]));
509 
510   ASSERT_EQ(tok::text,          Toks[2].getKind());
511   ASSERT_EQ(StringRef(" Aaa."), Toks[2].getText());
512 
513   ASSERT_EQ(tok::newline,       Toks[3].getKind());
514 
515   ASSERT_EQ(tok::text,          Toks[4].getKind());
516   ASSERT_EQ(StringRef(" "),     Toks[4].getText());
517 
518   ASSERT_EQ(tok::at_command,    Toks[5].getKind());
519   ASSERT_EQ(StringRef("NewBlockCommand"), getCommandName(Toks[5]));
520 
521   ASSERT_EQ(tok::text,          Toks[6].getKind());
522   ASSERT_EQ(StringRef(" Aaa."), Toks[6].getText());
523 
524   ASSERT_EQ(tok::newline,       Toks[7].getKind());
525 }
526 
TEST_F(CommentLexerTest,RegisterMultipleBlockCommands)527 TEST_F(CommentLexerTest, RegisterMultipleBlockCommands) {
528   const char *Source =
529     "/// \\Foo\n"
530     "/// \\Bar Baz\n"
531     "/// \\Blech quux=corge\n";
532 
533   Traits.registerBlockCommand(StringRef("Foo"));
534   Traits.registerBlockCommand(StringRef("Bar"));
535   Traits.registerBlockCommand(StringRef("Blech"));
536 
537   std::vector<Token> Toks;
538 
539   lexString(Source, Toks);
540 
541   ASSERT_EQ(11U, Toks.size());
542 
543   ASSERT_EQ(tok::text,      Toks[0].getKind());
544   ASSERT_EQ(StringRef(" "), Toks[0].getText());
545 
546   ASSERT_EQ(tok::backslash_command, Toks[1].getKind());
547   ASSERT_EQ(StringRef("Foo"), getCommandName(Toks[1]));
548 
549   ASSERT_EQ(tok::newline,     Toks[2].getKind());
550 
551   ASSERT_EQ(tok::text,      Toks[3].getKind());
552   ASSERT_EQ(StringRef(" "), Toks[3].getText());
553 
554   ASSERT_EQ(tok::backslash_command, Toks[4].getKind());
555   ASSERT_EQ(StringRef("Bar"), getCommandName(Toks[4]));
556 
557   ASSERT_EQ(tok::text,         Toks[5].getKind());
558   ASSERT_EQ(StringRef(" Baz"), Toks[5].getText());
559 
560   ASSERT_EQ(tok::newline,     Toks[6].getKind());
561 
562   ASSERT_EQ(tok::text,      Toks[7].getKind());
563   ASSERT_EQ(StringRef(" "), Toks[7].getText());
564 
565   ASSERT_EQ(tok::backslash_command, Toks[8].getKind());
566   ASSERT_EQ(StringRef("Blech"), getCommandName(Toks[8]));
567 
568   ASSERT_EQ(tok::text,                Toks[9].getKind());
569   ASSERT_EQ(StringRef(" quux=corge"), Toks[9].getText());
570 
571   ASSERT_EQ(tok::newline,     Toks[10].getKind());
572 }
573 
574 // Empty verbatim block.
TEST_F(CommentLexerTest,VerbatimBlock1)575 TEST_F(CommentLexerTest, VerbatimBlock1) {
576   const char *Sources[] = {
577     "/// \\verbatim\\endverbatim\n//",
578     "/** \\verbatim\\endverbatim*/"
579   };
580 
581   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
582     std::vector<Token> Toks;
583 
584     lexString(Sources[i], Toks);
585 
586     ASSERT_EQ(5U, Toks.size());
587 
588     ASSERT_EQ(tok::text,                 Toks[0].getKind());
589     ASSERT_EQ(StringRef(" "),            Toks[0].getText());
590 
591     ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
592     ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
593 
594     ASSERT_EQ(tok::verbatim_block_end,   Toks[2].getKind());
595     ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[2]));
596 
597     ASSERT_EQ(tok::newline,              Toks[3].getKind());
598     ASSERT_EQ(tok::newline,              Toks[4].getKind());
599   }
600 }
601 
602 // Empty verbatim block without an end command.
TEST_F(CommentLexerTest,VerbatimBlock2)603 TEST_F(CommentLexerTest, VerbatimBlock2) {
604   const char *Source = "/// \\verbatim";
605 
606   std::vector<Token> Toks;
607 
608   lexString(Source, Toks);
609 
610   ASSERT_EQ(3U, Toks.size());
611 
612   ASSERT_EQ(tok::text,                 Toks[0].getKind());
613   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
614 
615   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
616   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
617 
618   ASSERT_EQ(tok::newline,              Toks[2].getKind());
619 }
620 
621 // Empty verbatim block without an end command.
TEST_F(CommentLexerTest,VerbatimBlock3)622 TEST_F(CommentLexerTest, VerbatimBlock3) {
623   const char *Source = "/** \\verbatim*/";
624 
625   std::vector<Token> Toks;
626 
627   lexString(Source, Toks);
628 
629   ASSERT_EQ(4U, Toks.size());
630 
631   ASSERT_EQ(tok::text,                 Toks[0].getKind());
632   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
633 
634   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
635   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
636 
637   ASSERT_EQ(tok::newline,              Toks[2].getKind());
638   ASSERT_EQ(tok::newline,              Toks[3].getKind());
639 }
640 
641 // Single-line verbatim block.
TEST_F(CommentLexerTest,VerbatimBlock4)642 TEST_F(CommentLexerTest, VerbatimBlock4) {
643   const char *Sources[] = {
644     "/// Meow \\verbatim aaa \\endverbatim\n//",
645     "/** Meow \\verbatim aaa \\endverbatim*/"
646   };
647 
648   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
649     std::vector<Token> Toks;
650 
651     lexString(Sources[i], Toks);
652 
653     ASSERT_EQ(6U, Toks.size());
654 
655     ASSERT_EQ(tok::text,                 Toks[0].getKind());
656     ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
657 
658     ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
659     ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
660 
661     ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
662     ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
663 
664     ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
665     ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[3]));
666 
667     ASSERT_EQ(tok::newline,              Toks[4].getKind());
668     ASSERT_EQ(tok::newline,              Toks[5].getKind());
669   }
670 }
671 
672 // Single-line verbatim block without an end command.
TEST_F(CommentLexerTest,VerbatimBlock5)673 TEST_F(CommentLexerTest, VerbatimBlock5) {
674   const char *Sources[] = {
675     "/// Meow \\verbatim aaa \n//",
676     "/** Meow \\verbatim aaa */"
677   };
678 
679   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
680     std::vector<Token> Toks;
681 
682     lexString(Sources[i], Toks);
683 
684     ASSERT_EQ(5U, Toks.size());
685 
686     ASSERT_EQ(tok::text,                 Toks[0].getKind());
687     ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
688 
689     ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
690     ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
691 
692     ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
693     ASSERT_EQ(StringRef(" aaa "),        Toks[2].getVerbatimBlockText());
694 
695     ASSERT_EQ(tok::newline,              Toks[3].getKind());
696     ASSERT_EQ(tok::newline,              Toks[4].getKind());
697   }
698 }
699 
TEST_F(CommentLexerTest,VerbatimBlock6)700 TEST_F(CommentLexerTest, VerbatimBlock6) {
701   const char *Source =
702     "// \\verbatim\n"
703     "// Aaa\n"
704     "//\n"
705     "// Bbb\n"
706     "// \\endverbatim\n";
707 
708   std::vector<Token> Toks;
709 
710   lexString(Source, Toks);
711 
712   ASSERT_EQ(10U, Toks.size());
713 
714   ASSERT_EQ(tok::text,                 Toks[0].getKind());
715   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
716 
717   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
718   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
719 
720   ASSERT_EQ(tok::newline,              Toks[2].getKind());
721 
722   ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
723   ASSERT_EQ(StringRef(" Aaa"),         Toks[3].getVerbatimBlockText());
724 
725   ASSERT_EQ(tok::newline,              Toks[4].getKind());
726 
727   ASSERT_EQ(tok::newline,              Toks[5].getKind());
728 
729   ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
730   ASSERT_EQ(StringRef(" Bbb"),         Toks[6].getVerbatimBlockText());
731 
732   ASSERT_EQ(tok::newline,              Toks[7].getKind());
733 
734   ASSERT_EQ(tok::verbatim_block_end,   Toks[8].getKind());
735   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[8]));
736 
737   ASSERT_EQ(tok::newline,              Toks[9].getKind());
738 }
739 
TEST_F(CommentLexerTest,VerbatimBlock7)740 TEST_F(CommentLexerTest, VerbatimBlock7) {
741   const char *Source =
742     "/* \\verbatim\n"
743     " * Aaa\n"
744     " *\n"
745     " * Bbb\n"
746     " * \\endverbatim\n"
747     " */";
748 
749   std::vector<Token> Toks;
750 
751   lexString(Source, Toks);
752 
753   ASSERT_EQ(10U, Toks.size());
754 
755   ASSERT_EQ(tok::text,                 Toks[0].getKind());
756   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
757 
758   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
759   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
760 
761   ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
762   ASSERT_EQ(StringRef(" Aaa"),         Toks[2].getVerbatimBlockText());
763 
764   ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
765   ASSERT_EQ(StringRef(""),             Toks[3].getVerbatimBlockText());
766 
767   ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
768   ASSERT_EQ(StringRef(" Bbb"),         Toks[4].getVerbatimBlockText());
769 
770   ASSERT_EQ(tok::verbatim_block_end,   Toks[5].getKind());
771   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[5]));
772 
773   ASSERT_EQ(tok::newline,              Toks[6].getKind());
774 
775   ASSERT_EQ(tok::text,                 Toks[7].getKind());
776   ASSERT_EQ(StringRef(" "),            Toks[7].getText());
777 
778   ASSERT_EQ(tok::newline,              Toks[8].getKind());
779   ASSERT_EQ(tok::newline,              Toks[9].getKind());
780 }
781 
782 // Complex test for verbatim blocks.
TEST_F(CommentLexerTest,VerbatimBlock8)783 TEST_F(CommentLexerTest, VerbatimBlock8) {
784   const char *Source =
785     "/* Meow \\verbatim aaa\\$\\@\n"
786     "bbb \\endverbati\r"
787     "ccc\r\n"
788     "ddd \\endverbatim Blah \\verbatim eee\n"
789     "\\endverbatim BlahBlah*/";
790   std::vector<Token> Toks;
791 
792   lexString(Source, Toks);
793 
794   ASSERT_EQ(14U, Toks.size());
795 
796   ASSERT_EQ(tok::text,                 Toks[0].getKind());
797   ASSERT_EQ(StringRef(" Meow "),       Toks[0].getText());
798 
799   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
800   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[1]));
801 
802   ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
803   ASSERT_EQ(StringRef(" aaa\\$\\@"),   Toks[2].getVerbatimBlockText());
804 
805   ASSERT_EQ(tok::verbatim_block_line,  Toks[3].getKind());
806   ASSERT_EQ(StringRef("bbb \\endverbati"), Toks[3].getVerbatimBlockText());
807 
808   ASSERT_EQ(tok::verbatim_block_line,  Toks[4].getKind());
809   ASSERT_EQ(StringRef("ccc"),          Toks[4].getVerbatimBlockText());
810 
811   ASSERT_EQ(tok::verbatim_block_line,  Toks[5].getKind());
812   ASSERT_EQ(StringRef("ddd "),         Toks[5].getVerbatimBlockText());
813 
814   ASSERT_EQ(tok::verbatim_block_end,   Toks[6].getKind());
815   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[6]));
816 
817   ASSERT_EQ(tok::text,                 Toks[7].getKind());
818   ASSERT_EQ(StringRef(" Blah "),       Toks[7].getText());
819 
820   ASSERT_EQ(tok::verbatim_block_begin, Toks[8].getKind());
821   ASSERT_EQ(StringRef("verbatim"),     getVerbatimBlockName(Toks[8]));
822 
823   ASSERT_EQ(tok::verbatim_block_line,  Toks[9].getKind());
824   ASSERT_EQ(StringRef(" eee"),         Toks[9].getVerbatimBlockText());
825 
826   ASSERT_EQ(tok::verbatim_block_end,   Toks[10].getKind());
827   ASSERT_EQ(StringRef("endverbatim"),  getVerbatimBlockName(Toks[10]));
828 
829   ASSERT_EQ(tok::text,                 Toks[11].getKind());
830   ASSERT_EQ(StringRef(" BlahBlah"),    Toks[11].getText());
831 
832   ASSERT_EQ(tok::newline,              Toks[12].getKind());
833   ASSERT_EQ(tok::newline,              Toks[13].getKind());
834 }
835 
836 // LaTeX verbatim blocks.
TEST_F(CommentLexerTest,VerbatimBlock9)837 TEST_F(CommentLexerTest, VerbatimBlock9) {
838   const char *Source =
839     "/// \\f$ Aaa \\f$ \\f[ Bbb \\f] \\f{ Ccc \\f} \\f( Ddd \\f)";
840   std::vector<Token> Toks;
841 
842   lexString(Source, Toks);
843 
844   ASSERT_EQ(17U, Toks.size());
845 
846   ASSERT_EQ(tok::text,                 Toks[0].getKind());
847   ASSERT_EQ(StringRef(" "),            Toks[0].getText());
848 
849   ASSERT_EQ(tok::verbatim_block_begin, Toks[1].getKind());
850   ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[1]));
851 
852   ASSERT_EQ(tok::verbatim_block_line,  Toks[2].getKind());
853   ASSERT_EQ(StringRef(" Aaa "),        Toks[2].getVerbatimBlockText());
854 
855   ASSERT_EQ(tok::verbatim_block_end,   Toks[3].getKind());
856   ASSERT_EQ(StringRef("f$"),           getVerbatimBlockName(Toks[3]));
857 
858   ASSERT_EQ(tok::text,                 Toks[4].getKind());
859   ASSERT_EQ(StringRef(" "),            Toks[4].getText());
860 
861   ASSERT_EQ(tok::verbatim_block_begin, Toks[5].getKind());
862   ASSERT_EQ(StringRef("f["),           getVerbatimBlockName(Toks[5]));
863 
864   ASSERT_EQ(tok::verbatim_block_line,  Toks[6].getKind());
865   ASSERT_EQ(StringRef(" Bbb "),        Toks[6].getVerbatimBlockText());
866 
867   ASSERT_EQ(tok::verbatim_block_end,   Toks[7].getKind());
868   ASSERT_EQ(StringRef("f]"),           getVerbatimBlockName(Toks[7]));
869 
870   ASSERT_EQ(tok::text,                 Toks[8].getKind());
871   ASSERT_EQ(StringRef(" "),            Toks[8].getText());
872 
873   ASSERT_EQ(tok::verbatim_block_begin, Toks[9].getKind());
874   ASSERT_EQ(StringRef("f{"),           getVerbatimBlockName(Toks[9]));
875 
876   ASSERT_EQ(tok::verbatim_block_line,  Toks[10].getKind());
877   ASSERT_EQ(StringRef(" Ccc "),        Toks[10].getVerbatimBlockText());
878 
879   ASSERT_EQ(tok::verbatim_block_end,   Toks[11].getKind());
880   ASSERT_EQ(StringRef("f}"),           getVerbatimBlockName(Toks[11]));
881 
882   ASSERT_EQ(tok::text,                 Toks[12].getKind());
883   ASSERT_EQ(StringRef(" "),            Toks[12].getText());
884 
885   ASSERT_EQ(tok::verbatim_block_begin, Toks[13].getKind());
886   ASSERT_EQ(StringRef("f("),           getVerbatimBlockName(Toks[13]));
887 
888   ASSERT_EQ(tok::verbatim_block_line,  Toks[14].getKind());
889   ASSERT_EQ(StringRef(" Ddd "),        Toks[14].getVerbatimBlockText());
890 
891   ASSERT_EQ(tok::verbatim_block_end,   Toks[15].getKind());
892   ASSERT_EQ(StringRef("f)"),           getVerbatimBlockName(Toks[15]));
893 
894   ASSERT_EQ(tok::newline,              Toks[16].getKind());
895 }
896 
897 // Empty verbatim line.
TEST_F(CommentLexerTest,VerbatimLine1)898 TEST_F(CommentLexerTest, VerbatimLine1) {
899   const char *Sources[] = {
900     "/// \\fn\n//",
901     "/** \\fn*/"
902   };
903 
904   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
905     std::vector<Token> Toks;
906 
907     lexString(Sources[i], Toks);
908 
909     ASSERT_EQ(4U, Toks.size());
910 
911     ASSERT_EQ(tok::text,               Toks[0].getKind());
912     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
913 
914     ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
915     ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
916 
917     ASSERT_EQ(tok::newline,            Toks[2].getKind());
918     ASSERT_EQ(tok::newline,            Toks[3].getKind());
919   }
920 }
921 
922 // Verbatim line with Doxygen escape sequences, which should not be expanded.
TEST_F(CommentLexerTest,VerbatimLine2)923 TEST_F(CommentLexerTest, VerbatimLine2) {
924   const char *Sources[] = {
925     "/// \\fn void *foo(const char *zzz = \"\\$\");\n//",
926     "/** \\fn void *foo(const char *zzz = \"\\$\");*/"
927   };
928 
929   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
930     std::vector<Token> Toks;
931 
932     lexString(Sources[i], Toks);
933 
934     ASSERT_EQ(5U, Toks.size());
935 
936     ASSERT_EQ(tok::text,               Toks[0].getKind());
937     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
938 
939     ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
940     ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
941 
942     ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
943     ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
944                                        Toks[2].getVerbatimLineText());
945 
946     ASSERT_EQ(tok::newline,            Toks[3].getKind());
947     ASSERT_EQ(tok::newline,            Toks[4].getKind());
948   }
949 }
950 
951 // Verbatim line should not eat anything from next source line.
TEST_F(CommentLexerTest,VerbatimLine3)952 TEST_F(CommentLexerTest, VerbatimLine3) {
953   const char *Source =
954     "/** \\fn void *foo(const char *zzz = \"\\$\");\n"
955     " * Meow\n"
956     " */";
957 
958   std::vector<Token> Toks;
959 
960   lexString(Source, Toks);
961 
962   ASSERT_EQ(9U, Toks.size());
963 
964   ASSERT_EQ(tok::text,               Toks[0].getKind());
965   ASSERT_EQ(StringRef(" "),          Toks[0].getText());
966 
967   ASSERT_EQ(tok::verbatim_line_name, Toks[1].getKind());
968   ASSERT_EQ(StringRef("fn"),         getVerbatimLineName(Toks[1]));
969 
970   ASSERT_EQ(tok::verbatim_line_text, Toks[2].getKind());
971   ASSERT_EQ(StringRef(" void *foo(const char *zzz = \"\\$\");"),
972                                      Toks[2].getVerbatimLineText());
973   ASSERT_EQ(tok::newline,            Toks[3].getKind());
974 
975   ASSERT_EQ(tok::text,               Toks[4].getKind());
976   ASSERT_EQ(StringRef(" Meow"),      Toks[4].getText());
977   ASSERT_EQ(tok::newline,            Toks[5].getKind());
978 
979   ASSERT_EQ(tok::text,               Toks[6].getKind());
980   ASSERT_EQ(StringRef(" "),          Toks[6].getText());
981 
982   ASSERT_EQ(tok::newline,            Toks[7].getKind());
983   ASSERT_EQ(tok::newline,            Toks[8].getKind());
984 }
985 
TEST_F(CommentLexerTest,HTML1)986 TEST_F(CommentLexerTest, HTML1) {
987   const char *Source =
988     "// <";
989 
990   std::vector<Token> Toks;
991 
992   lexString(Source, Toks);
993 
994   ASSERT_EQ(3U, Toks.size());
995 
996   ASSERT_EQ(tok::text,      Toks[0].getKind());
997   ASSERT_EQ(StringRef(" "), Toks[0].getText());
998 
999   ASSERT_EQ(tok::text,      Toks[1].getKind());
1000   ASSERT_EQ(StringRef("<"), Toks[1].getText());
1001 
1002   ASSERT_EQ(tok::newline,   Toks[2].getKind());
1003 }
1004 
TEST_F(CommentLexerTest,HTML2)1005 TEST_F(CommentLexerTest, HTML2) {
1006   const char *Source =
1007     "// a<2";
1008 
1009   std::vector<Token> Toks;
1010 
1011   lexString(Source, Toks);
1012 
1013   ASSERT_EQ(4U, Toks.size());
1014 
1015   ASSERT_EQ(tok::text,       Toks[0].getKind());
1016   ASSERT_EQ(StringRef(" a"), Toks[0].getText());
1017 
1018   ASSERT_EQ(tok::text,       Toks[1].getKind());
1019   ASSERT_EQ(StringRef("<"),  Toks[1].getText());
1020 
1021   ASSERT_EQ(tok::text,       Toks[2].getKind());
1022   ASSERT_EQ(StringRef("2"),  Toks[2].getText());
1023 
1024   ASSERT_EQ(tok::newline,    Toks[3].getKind());
1025 }
1026 
TEST_F(CommentLexerTest,HTML3)1027 TEST_F(CommentLexerTest, HTML3) {
1028   const char *Source =
1029     "// < img";
1030 
1031   std::vector<Token> Toks;
1032 
1033   lexString(Source, Toks);
1034 
1035   ASSERT_EQ(4U, Toks.size());
1036 
1037   ASSERT_EQ(tok::text,         Toks[0].getKind());
1038   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1039 
1040   ASSERT_EQ(tok::text,         Toks[1].getKind());
1041   ASSERT_EQ(StringRef("<"),    Toks[1].getText());
1042 
1043   ASSERT_EQ(tok::text,         Toks[2].getKind());
1044   ASSERT_EQ(StringRef(" img"), Toks[2].getText());
1045 
1046   ASSERT_EQ(tok::newline,      Toks[3].getKind());
1047 }
1048 
TEST_F(CommentLexerTest,HTML4)1049 TEST_F(CommentLexerTest, HTML4) {
1050   const char *Sources[] = {
1051     "// <img",
1052     "// <img "
1053   };
1054 
1055   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1056     std::vector<Token> Toks;
1057 
1058     lexString(Sources[i], Toks);
1059 
1060     ASSERT_EQ(3U, Toks.size());
1061 
1062     ASSERT_EQ(tok::text,           Toks[0].getKind());
1063     ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1064 
1065     ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1066     ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1067 
1068     ASSERT_EQ(tok::newline,        Toks[2].getKind());
1069   }
1070 }
1071 
TEST_F(CommentLexerTest,HTML5)1072 TEST_F(CommentLexerTest, HTML5) {
1073   const char *Source =
1074     "// <img 42";
1075 
1076   std::vector<Token> Toks;
1077 
1078   lexString(Source, Toks);
1079 
1080   ASSERT_EQ(4U, Toks.size());
1081 
1082   ASSERT_EQ(tok::text,           Toks[0].getKind());
1083   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1084 
1085   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1086   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1087 
1088   ASSERT_EQ(tok::text,           Toks[2].getKind());
1089   ASSERT_EQ(StringRef("42"),     Toks[2].getText());
1090 
1091   ASSERT_EQ(tok::newline,        Toks[3].getKind());
1092 }
1093 
TEST_F(CommentLexerTest,HTML6)1094 TEST_F(CommentLexerTest, HTML6) {
1095   const char *Source = "// <img> Meow";
1096 
1097   std::vector<Token> Toks;
1098 
1099   lexString(Source, Toks);
1100 
1101   ASSERT_EQ(5U, Toks.size());
1102 
1103   ASSERT_EQ(tok::text,           Toks[0].getKind());
1104   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1105 
1106   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1107   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1108 
1109   ASSERT_EQ(tok::html_greater,   Toks[2].getKind());
1110 
1111   ASSERT_EQ(tok::text,           Toks[3].getKind());
1112   ASSERT_EQ(StringRef(" Meow"),  Toks[3].getText());
1113 
1114   ASSERT_EQ(tok::newline,        Toks[4].getKind());
1115 }
1116 
TEST_F(CommentLexerTest,HTML7)1117 TEST_F(CommentLexerTest, HTML7) {
1118   const char *Source = "// <img=";
1119 
1120   std::vector<Token> Toks;
1121 
1122   lexString(Source, Toks);
1123 
1124   ASSERT_EQ(4U, Toks.size());
1125 
1126   ASSERT_EQ(tok::text,           Toks[0].getKind());
1127   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1128 
1129   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1130   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1131 
1132   ASSERT_EQ(tok::text,           Toks[2].getKind());
1133   ASSERT_EQ(StringRef("="),      Toks[2].getText());
1134 
1135   ASSERT_EQ(tok::newline,        Toks[3].getKind());
1136 }
1137 
TEST_F(CommentLexerTest,HTML8)1138 TEST_F(CommentLexerTest, HTML8) {
1139   const char *Source = "// <img src=> Meow";
1140 
1141   std::vector<Token> Toks;
1142 
1143   lexString(Source, Toks);
1144 
1145   ASSERT_EQ(7U, Toks.size());
1146 
1147   ASSERT_EQ(tok::text,           Toks[0].getKind());
1148   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1149 
1150   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1151   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1152 
1153   ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1154   ASSERT_EQ(StringRef("src"),   Toks[2].getHTMLIdent());
1155 
1156   ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1157 
1158   ASSERT_EQ(tok::html_greater,   Toks[4].getKind());
1159 
1160   ASSERT_EQ(tok::text,           Toks[5].getKind());
1161   ASSERT_EQ(StringRef(" Meow"),  Toks[5].getText());
1162 
1163   ASSERT_EQ(tok::newline,        Toks[6].getKind());
1164 }
1165 
TEST_F(CommentLexerTest,HTML9)1166 TEST_F(CommentLexerTest, HTML9) {
1167   const char *Sources[] = {
1168     "// <img src",
1169     "// <img src "
1170   };
1171 
1172   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1173     std::vector<Token> Toks;
1174 
1175     lexString(Sources[i], Toks);
1176 
1177     ASSERT_EQ(4U, Toks.size());
1178 
1179     ASSERT_EQ(tok::text,           Toks[0].getKind());
1180     ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1181 
1182     ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1183     ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1184 
1185     ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1186     ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1187 
1188     ASSERT_EQ(tok::newline,        Toks[3].getKind());
1189   }
1190 }
1191 
TEST_F(CommentLexerTest,HTML10)1192 TEST_F(CommentLexerTest, HTML10) {
1193   const char *Sources[] = {
1194     "// <img src=",
1195     "// <img src ="
1196   };
1197 
1198   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1199     std::vector<Token> Toks;
1200 
1201     lexString(Sources[i], Toks);
1202 
1203     ASSERT_EQ(5U, Toks.size());
1204 
1205     ASSERT_EQ(tok::text,           Toks[0].getKind());
1206     ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1207 
1208     ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1209     ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1210 
1211     ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1212     ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1213 
1214     ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1215 
1216     ASSERT_EQ(tok::newline,        Toks[4].getKind());
1217   }
1218 }
1219 
TEST_F(CommentLexerTest,HTML11)1220 TEST_F(CommentLexerTest, HTML11) {
1221   const char *Sources[] = {
1222     "// <img src=\"",
1223     "// <img src = \"",
1224     "// <img src=\'",
1225     "// <img src = \'"
1226   };
1227 
1228   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1229     std::vector<Token> Toks;
1230 
1231     lexString(Sources[i], Toks);
1232 
1233     ASSERT_EQ(6U, Toks.size());
1234 
1235     ASSERT_EQ(tok::text,               Toks[0].getKind());
1236     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1237 
1238     ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1239     ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1240 
1241     ASSERT_EQ(tok::html_ident,         Toks[2].getKind());
1242     ASSERT_EQ(StringRef("src"),        Toks[2].getHTMLIdent());
1243 
1244     ASSERT_EQ(tok::html_equals,        Toks[3].getKind());
1245 
1246     ASSERT_EQ(tok::html_quoted_string, Toks[4].getKind());
1247     ASSERT_EQ(StringRef(""),           Toks[4].getHTMLQuotedString());
1248 
1249     ASSERT_EQ(tok::newline,            Toks[5].getKind());
1250   }
1251 }
1252 
TEST_F(CommentLexerTest,HTML12)1253 TEST_F(CommentLexerTest, HTML12) {
1254   const char *Source = "// <img src=@";
1255 
1256   std::vector<Token> Toks;
1257 
1258   lexString(Source, Toks);
1259 
1260   ASSERT_EQ(6U, Toks.size());
1261 
1262   ASSERT_EQ(tok::text,           Toks[0].getKind());
1263   ASSERT_EQ(StringRef(" "),      Toks[0].getText());
1264 
1265   ASSERT_EQ(tok::html_start_tag, Toks[1].getKind());
1266   ASSERT_EQ(StringRef("img"),    Toks[1].getHTMLTagStartName());
1267 
1268   ASSERT_EQ(tok::html_ident,     Toks[2].getKind());
1269   ASSERT_EQ(StringRef("src"),    Toks[2].getHTMLIdent());
1270 
1271   ASSERT_EQ(tok::html_equals,    Toks[3].getKind());
1272 
1273   ASSERT_EQ(tok::text,           Toks[4].getKind());
1274   ASSERT_EQ(StringRef("@"),      Toks[4].getText());
1275 
1276   ASSERT_EQ(tok::newline,        Toks[5].getKind());
1277 }
1278 
TEST_F(CommentLexerTest,HTML13)1279 TEST_F(CommentLexerTest, HTML13) {
1280   const char *Sources[] = {
1281     "// <img src=\"val\\\"\\'val",
1282     "// <img src=\"val\\\"\\'val\"",
1283     "// <img src=\'val\\\"\\'val",
1284     "// <img src=\'val\\\"\\'val\'"
1285   };
1286 
1287   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1288     std::vector<Token> Toks;
1289 
1290     lexString(Sources[i], Toks);
1291 
1292     ASSERT_EQ(6U, Toks.size());
1293 
1294     ASSERT_EQ(tok::text,                  Toks[0].getKind());
1295     ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1296 
1297     ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1298     ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1299 
1300     ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1301     ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1302 
1303     ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1304 
1305     ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1306     ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1307 
1308     ASSERT_EQ(tok::newline,               Toks[5].getKind());
1309   }
1310 }
1311 
TEST_F(CommentLexerTest,HTML14)1312 TEST_F(CommentLexerTest, HTML14) {
1313   const char *Sources[] = {
1314     "// <img src=\"val\\\"\\'val\">",
1315     "// <img src=\'val\\\"\\'val\'>"
1316   };
1317 
1318   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1319     std::vector<Token> Toks;
1320 
1321     lexString(Sources[i], Toks);
1322 
1323     ASSERT_EQ(7U, Toks.size());
1324 
1325     ASSERT_EQ(tok::text,                  Toks[0].getKind());
1326     ASSERT_EQ(StringRef(" "),             Toks[0].getText());
1327 
1328     ASSERT_EQ(tok::html_start_tag,        Toks[1].getKind());
1329     ASSERT_EQ(StringRef("img"),           Toks[1].getHTMLTagStartName());
1330 
1331     ASSERT_EQ(tok::html_ident,            Toks[2].getKind());
1332     ASSERT_EQ(StringRef("src"),           Toks[2].getHTMLIdent());
1333 
1334     ASSERT_EQ(tok::html_equals,           Toks[3].getKind());
1335 
1336     ASSERT_EQ(tok::html_quoted_string,    Toks[4].getKind());
1337     ASSERT_EQ(StringRef("val\\\"\\'val"), Toks[4].getHTMLQuotedString());
1338 
1339     ASSERT_EQ(tok::html_greater,          Toks[5].getKind());
1340 
1341     ASSERT_EQ(tok::newline,               Toks[6].getKind());
1342   }
1343 }
1344 
TEST_F(CommentLexerTest,HTML15)1345 TEST_F(CommentLexerTest, HTML15) {
1346   const char *Sources[] = {
1347     "// <img/>",
1348     "// <img />"
1349   };
1350 
1351   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1352     std::vector<Token> Toks;
1353 
1354     lexString(Sources[i], Toks);
1355 
1356     ASSERT_EQ(4U, Toks.size());
1357 
1358     ASSERT_EQ(tok::text,               Toks[0].getKind());
1359     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1360 
1361     ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1362     ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1363 
1364     ASSERT_EQ(tok::html_slash_greater, Toks[2].getKind());
1365 
1366     ASSERT_EQ(tok::newline,            Toks[3].getKind());
1367   }
1368 }
1369 
TEST_F(CommentLexerTest,HTML16)1370 TEST_F(CommentLexerTest, HTML16) {
1371   const char *Sources[] = {
1372     "// <img/ Aaa",
1373     "// <img / Aaa"
1374   };
1375 
1376   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1377     std::vector<Token> Toks;
1378 
1379     lexString(Sources[i], Toks);
1380 
1381     ASSERT_EQ(5U, Toks.size());
1382 
1383     ASSERT_EQ(tok::text,               Toks[0].getKind());
1384     ASSERT_EQ(StringRef(" "),          Toks[0].getText());
1385 
1386     ASSERT_EQ(tok::html_start_tag,     Toks[1].getKind());
1387     ASSERT_EQ(StringRef("img"),        Toks[1].getHTMLTagStartName());
1388 
1389     ASSERT_EQ(tok::text,               Toks[2].getKind());
1390     ASSERT_EQ(StringRef("/"),          Toks[2].getText());
1391 
1392     ASSERT_EQ(tok::text,               Toks[3].getKind());
1393     ASSERT_EQ(StringRef(" Aaa"),       Toks[3].getText());
1394 
1395     ASSERT_EQ(tok::newline,            Toks[4].getKind());
1396   }
1397 }
1398 
TEST_F(CommentLexerTest,HTML17)1399 TEST_F(CommentLexerTest, HTML17) {
1400   const char *Source = "// </";
1401 
1402   std::vector<Token> Toks;
1403 
1404   lexString(Source, Toks);
1405 
1406   ASSERT_EQ(3U, Toks.size());
1407 
1408   ASSERT_EQ(tok::text,       Toks[0].getKind());
1409   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1410 
1411   ASSERT_EQ(tok::text,       Toks[1].getKind());
1412   ASSERT_EQ(StringRef("</"), Toks[1].getText());
1413 
1414   ASSERT_EQ(tok::newline,    Toks[2].getKind());
1415 }
1416 
TEST_F(CommentLexerTest,HTML18)1417 TEST_F(CommentLexerTest, HTML18) {
1418   const char *Source = "// </@";
1419 
1420   std::vector<Token> Toks;
1421 
1422   lexString(Source, Toks);
1423 
1424   ASSERT_EQ(4U, Toks.size());
1425 
1426   ASSERT_EQ(tok::text,       Toks[0].getKind());
1427   ASSERT_EQ(StringRef(" "),  Toks[0].getText());
1428 
1429   ASSERT_EQ(tok::text,       Toks[1].getKind());
1430   ASSERT_EQ(StringRef("</"), Toks[1].getText());
1431 
1432   ASSERT_EQ(tok::text,       Toks[2].getKind());
1433   ASSERT_EQ(StringRef("@"),  Toks[2].getText());
1434 
1435   ASSERT_EQ(tok::newline,    Toks[3].getKind());
1436 }
1437 
TEST_F(CommentLexerTest,HTML19)1438 TEST_F(CommentLexerTest, HTML19) {
1439   const char *Source = "// </img";
1440 
1441   std::vector<Token> Toks;
1442 
1443   lexString(Source, Toks);
1444 
1445   ASSERT_EQ(3U, Toks.size());
1446 
1447   ASSERT_EQ(tok::text,         Toks[0].getKind());
1448   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1449 
1450   ASSERT_EQ(tok::html_end_tag, Toks[1].getKind());
1451   ASSERT_EQ(StringRef("img"),  Toks[1].getHTMLTagEndName());
1452 
1453   ASSERT_EQ(tok::newline,      Toks[2].getKind());
1454 }
1455 
TEST_F(CommentLexerTest,NotAKnownHTMLTag1)1456 TEST_F(CommentLexerTest, NotAKnownHTMLTag1) {
1457   const char *Source = "// <tag>";
1458 
1459   std::vector<Token> Toks;
1460 
1461   lexString(Source, Toks);
1462 
1463   ASSERT_EQ(4U, Toks.size());
1464 
1465   ASSERT_EQ(tok::text,         Toks[0].getKind());
1466   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1467 
1468   ASSERT_EQ(tok::text,         Toks[1].getKind());
1469   ASSERT_EQ(StringRef("<tag"), Toks[1].getText());
1470 
1471   ASSERT_EQ(tok::text,         Toks[2].getKind());
1472   ASSERT_EQ(StringRef(">"),    Toks[2].getText());
1473 
1474   ASSERT_EQ(tok::newline,      Toks[3].getKind());
1475 }
1476 
TEST_F(CommentLexerTest,NotAKnownHTMLTag2)1477 TEST_F(CommentLexerTest, NotAKnownHTMLTag2) {
1478   const char *Source = "// </tag>";
1479 
1480   std::vector<Token> Toks;
1481 
1482   lexString(Source, Toks);
1483 
1484   ASSERT_EQ(4U, Toks.size());
1485 
1486   ASSERT_EQ(tok::text,          Toks[0].getKind());
1487   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1488 
1489   ASSERT_EQ(tok::text,          Toks[1].getKind());
1490   ASSERT_EQ(StringRef("</tag"), Toks[1].getText());
1491 
1492   ASSERT_EQ(tok::text,          Toks[2].getKind());
1493   ASSERT_EQ(StringRef(">"),     Toks[2].getText());
1494 
1495   ASSERT_EQ(tok::newline,       Toks[3].getKind());
1496 }
1497 
TEST_F(CommentLexerTest,HTMLCharacterReferences1)1498 TEST_F(CommentLexerTest, HTMLCharacterReferences1) {
1499   const char *Source = "// &";
1500 
1501   std::vector<Token> Toks;
1502 
1503   lexString(Source, Toks);
1504 
1505   ASSERT_EQ(3U, Toks.size());
1506 
1507   ASSERT_EQ(tok::text,         Toks[0].getKind());
1508   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1509 
1510   ASSERT_EQ(tok::text,         Toks[1].getKind());
1511   ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1512 
1513   ASSERT_EQ(tok::newline,      Toks[2].getKind());
1514 }
1515 
TEST_F(CommentLexerTest,HTMLCharacterReferences2)1516 TEST_F(CommentLexerTest, HTMLCharacterReferences2) {
1517   const char *Source = "// &!";
1518 
1519   std::vector<Token> Toks;
1520 
1521   lexString(Source, Toks);
1522 
1523   ASSERT_EQ(4U, Toks.size());
1524 
1525   ASSERT_EQ(tok::text,         Toks[0].getKind());
1526   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1527 
1528   ASSERT_EQ(tok::text,         Toks[1].getKind());
1529   ASSERT_EQ(StringRef("&"),    Toks[1].getText());
1530 
1531   ASSERT_EQ(tok::text,         Toks[2].getKind());
1532   ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1533 
1534   ASSERT_EQ(tok::newline,      Toks[3].getKind());
1535 }
1536 
TEST_F(CommentLexerTest,HTMLCharacterReferences3)1537 TEST_F(CommentLexerTest, HTMLCharacterReferences3) {
1538   const char *Source = "// &amp";
1539 
1540   std::vector<Token> Toks;
1541 
1542   lexString(Source, Toks);
1543 
1544   ASSERT_EQ(3U, Toks.size());
1545 
1546   ASSERT_EQ(tok::text,         Toks[0].getKind());
1547   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1548 
1549   ASSERT_EQ(tok::text,         Toks[1].getKind());
1550   ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1551 
1552   ASSERT_EQ(tok::newline,      Toks[2].getKind());
1553 }
1554 
TEST_F(CommentLexerTest,HTMLCharacterReferences4)1555 TEST_F(CommentLexerTest, HTMLCharacterReferences4) {
1556   const char *Source = "// &amp!";
1557 
1558   std::vector<Token> Toks;
1559 
1560   lexString(Source, Toks);
1561 
1562   ASSERT_EQ(4U, Toks.size());
1563 
1564   ASSERT_EQ(tok::text,         Toks[0].getKind());
1565   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1566 
1567   ASSERT_EQ(tok::text,         Toks[1].getKind());
1568   ASSERT_EQ(StringRef("&amp"), Toks[1].getText());
1569 
1570   ASSERT_EQ(tok::text,         Toks[2].getKind());
1571   ASSERT_EQ(StringRef("!"),    Toks[2].getText());
1572 
1573   ASSERT_EQ(tok::newline,      Toks[3].getKind());
1574 }
1575 
TEST_F(CommentLexerTest,HTMLCharacterReferences5)1576 TEST_F(CommentLexerTest, HTMLCharacterReferences5) {
1577   const char *Source = "// &#";
1578 
1579   std::vector<Token> Toks;
1580 
1581   lexString(Source, Toks);
1582 
1583   ASSERT_EQ(3U, Toks.size());
1584 
1585   ASSERT_EQ(tok::text,         Toks[0].getKind());
1586   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1587 
1588   ASSERT_EQ(tok::text,         Toks[1].getKind());
1589   ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1590 
1591   ASSERT_EQ(tok::newline,      Toks[2].getKind());
1592 }
1593 
TEST_F(CommentLexerTest,HTMLCharacterReferences6)1594 TEST_F(CommentLexerTest, HTMLCharacterReferences6) {
1595   const char *Source = "// &#a";
1596 
1597   std::vector<Token> Toks;
1598 
1599   lexString(Source, Toks);
1600 
1601   ASSERT_EQ(4U, Toks.size());
1602 
1603   ASSERT_EQ(tok::text,         Toks[0].getKind());
1604   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1605 
1606   ASSERT_EQ(tok::text,         Toks[1].getKind());
1607   ASSERT_EQ(StringRef("&#"),   Toks[1].getText());
1608 
1609   ASSERT_EQ(tok::text,         Toks[2].getKind());
1610   ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1611 
1612   ASSERT_EQ(tok::newline,      Toks[3].getKind());
1613 }
1614 
TEST_F(CommentLexerTest,HTMLCharacterReferences7)1615 TEST_F(CommentLexerTest, HTMLCharacterReferences7) {
1616   const char *Source = "// &#42";
1617 
1618   std::vector<Token> Toks;
1619 
1620   lexString(Source, Toks);
1621 
1622   ASSERT_EQ(3U, Toks.size());
1623 
1624   ASSERT_EQ(tok::text,         Toks[0].getKind());
1625   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1626 
1627   ASSERT_EQ(tok::text,         Toks[1].getKind());
1628   ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1629 
1630   ASSERT_EQ(tok::newline,      Toks[2].getKind());
1631 }
1632 
TEST_F(CommentLexerTest,HTMLCharacterReferences8)1633 TEST_F(CommentLexerTest, HTMLCharacterReferences8) {
1634   const char *Source = "// &#42a";
1635 
1636   std::vector<Token> Toks;
1637 
1638   lexString(Source, Toks);
1639 
1640   ASSERT_EQ(4U, Toks.size());
1641 
1642   ASSERT_EQ(tok::text,         Toks[0].getKind());
1643   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1644 
1645   ASSERT_EQ(tok::text,         Toks[1].getKind());
1646   ASSERT_EQ(StringRef("&#42"), Toks[1].getText());
1647 
1648   ASSERT_EQ(tok::text,         Toks[2].getKind());
1649   ASSERT_EQ(StringRef("a"),    Toks[2].getText());
1650 
1651   ASSERT_EQ(tok::newline,      Toks[3].getKind());
1652 }
1653 
TEST_F(CommentLexerTest,HTMLCharacterReferences9)1654 TEST_F(CommentLexerTest, HTMLCharacterReferences9) {
1655   const char *Source = "// &#x";
1656 
1657   std::vector<Token> Toks;
1658 
1659   lexString(Source, Toks);
1660 
1661   ASSERT_EQ(3U, Toks.size());
1662 
1663   ASSERT_EQ(tok::text,         Toks[0].getKind());
1664   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1665 
1666   ASSERT_EQ(tok::text,         Toks[1].getKind());
1667   ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1668 
1669   ASSERT_EQ(tok::newline,      Toks[2].getKind());
1670 }
1671 
TEST_F(CommentLexerTest,HTMLCharacterReferences10)1672 TEST_F(CommentLexerTest, HTMLCharacterReferences10) {
1673   const char *Source = "// &#xz";
1674 
1675   std::vector<Token> Toks;
1676 
1677   lexString(Source, Toks);
1678 
1679   ASSERT_EQ(4U, Toks.size());
1680 
1681   ASSERT_EQ(tok::text,         Toks[0].getKind());
1682   ASSERT_EQ(StringRef(" "),    Toks[0].getText());
1683 
1684   ASSERT_EQ(tok::text,         Toks[1].getKind());
1685   ASSERT_EQ(StringRef("&#x"),  Toks[1].getText());
1686 
1687   ASSERT_EQ(tok::text,         Toks[2].getKind());
1688   ASSERT_EQ(StringRef("z"),    Toks[2].getText());
1689 
1690   ASSERT_EQ(tok::newline,      Toks[3].getKind());
1691 }
1692 
TEST_F(CommentLexerTest,HTMLCharacterReferences11)1693 TEST_F(CommentLexerTest, HTMLCharacterReferences11) {
1694   const char *Source = "// &#xab";
1695 
1696   std::vector<Token> Toks;
1697 
1698   lexString(Source, Toks);
1699 
1700   ASSERT_EQ(3U, Toks.size());
1701 
1702   ASSERT_EQ(tok::text,          Toks[0].getKind());
1703   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1704 
1705   ASSERT_EQ(tok::text,          Toks[1].getKind());
1706   ASSERT_EQ(StringRef("&#xab"), Toks[1].getText());
1707 
1708   ASSERT_EQ(tok::newline,       Toks[2].getKind());
1709 }
1710 
TEST_F(CommentLexerTest,HTMLCharacterReferences12)1711 TEST_F(CommentLexerTest, HTMLCharacterReferences12) {
1712   const char *Source = "// &#xaBz";
1713 
1714   std::vector<Token> Toks;
1715 
1716   lexString(Source, Toks);
1717 
1718   ASSERT_EQ(4U, Toks.size());
1719 
1720   ASSERT_EQ(tok::text,          Toks[0].getKind());
1721   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1722 
1723   ASSERT_EQ(tok::text,          Toks[1].getKind());
1724   ASSERT_EQ(StringRef("&#xaB"), Toks[1].getText());
1725 
1726   ASSERT_EQ(tok::text,          Toks[2].getKind());
1727   ASSERT_EQ(StringRef("z"),     Toks[2].getText());
1728 
1729   ASSERT_EQ(tok::newline,       Toks[3].getKind());
1730 }
1731 
TEST_F(CommentLexerTest,HTMLCharacterReferences13)1732 TEST_F(CommentLexerTest, HTMLCharacterReferences13) {
1733   const char *Source = "// &amp;";
1734 
1735   std::vector<Token> Toks;
1736 
1737   lexString(Source, Toks);
1738 
1739   ASSERT_EQ(3U, Toks.size());
1740 
1741   ASSERT_EQ(tok::text,          Toks[0].getKind());
1742   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1743 
1744   ASSERT_EQ(tok::text,          Toks[1].getKind());
1745   ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1746 
1747   ASSERT_EQ(tok::newline,       Toks[2].getKind());
1748 }
1749 
TEST_F(CommentLexerTest,HTMLCharacterReferences14)1750 TEST_F(CommentLexerTest, HTMLCharacterReferences14) {
1751   const char *Source = "// &amp;&lt;";
1752 
1753   std::vector<Token> Toks;
1754 
1755   lexString(Source, Toks);
1756 
1757   ASSERT_EQ(4U, Toks.size());
1758 
1759   ASSERT_EQ(tok::text,          Toks[0].getKind());
1760   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1761 
1762   ASSERT_EQ(tok::text,          Toks[1].getKind());
1763   ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1764 
1765   ASSERT_EQ(tok::text,          Toks[2].getKind());
1766   ASSERT_EQ(StringRef("<"),     Toks[2].getText());
1767 
1768   ASSERT_EQ(tok::newline,       Toks[3].getKind());
1769 }
1770 
TEST_F(CommentLexerTest,HTMLCharacterReferences15)1771 TEST_F(CommentLexerTest, HTMLCharacterReferences15) {
1772   const char *Source = "// &amp; meow";
1773 
1774   std::vector<Token> Toks;
1775 
1776   lexString(Source, Toks);
1777 
1778   ASSERT_EQ(4U, Toks.size());
1779 
1780   ASSERT_EQ(tok::text,          Toks[0].getKind());
1781   ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1782 
1783   ASSERT_EQ(tok::text,          Toks[1].getKind());
1784   ASSERT_EQ(StringRef("&"),     Toks[1].getText());
1785 
1786   ASSERT_EQ(tok::text,          Toks[2].getKind());
1787   ASSERT_EQ(StringRef(" meow"), Toks[2].getText());
1788 
1789   ASSERT_EQ(tok::newline,       Toks[3].getKind());
1790 }
1791 
TEST_F(CommentLexerTest,HTMLCharacterReferences16)1792 TEST_F(CommentLexerTest, HTMLCharacterReferences16) {
1793   const char *Sources[] = {
1794     "// &#61;",
1795     "// &#x3d;",
1796     "// &#X3d;",
1797     "// &#X3D;"
1798   };
1799 
1800   for (size_t i = 0, e = std::size(Sources); i != e; i++) {
1801     std::vector<Token> Toks;
1802 
1803     lexString(Sources[i], Toks);
1804 
1805     ASSERT_EQ(3U, Toks.size());
1806 
1807     ASSERT_EQ(tok::text,          Toks[0].getKind());
1808     ASSERT_EQ(StringRef(" "),     Toks[0].getText());
1809 
1810     ASSERT_EQ(tok::text,          Toks[1].getKind());
1811     ASSERT_EQ(StringRef("="),     Toks[1].getText());
1812 
1813     ASSERT_EQ(tok::newline,       Toks[2].getKind());
1814   }
1815 }
1816 
TEST_F(CommentLexerTest,MultipleComments)1817 TEST_F(CommentLexerTest, MultipleComments) {
1818   const char *Source =
1819     "// Aaa\n"
1820     "/// Bbb\n"
1821     "/* Ccc\n"
1822     " * Ddd*/\n"
1823     "/** Eee*/";
1824 
1825   std::vector<Token> Toks;
1826 
1827   lexString(Source, Toks);
1828 
1829   ASSERT_EQ(12U, Toks.size());
1830 
1831   ASSERT_EQ(tok::text,           Toks[0].getKind());
1832   ASSERT_EQ(StringRef(" Aaa"),   Toks[0].getText());
1833   ASSERT_EQ(tok::newline,        Toks[1].getKind());
1834 
1835   ASSERT_EQ(tok::text,           Toks[2].getKind());
1836   ASSERT_EQ(StringRef(" Bbb"),   Toks[2].getText());
1837   ASSERT_EQ(tok::newline,        Toks[3].getKind());
1838 
1839   ASSERT_EQ(tok::text,           Toks[4].getKind());
1840   ASSERT_EQ(StringRef(" Ccc"),   Toks[4].getText());
1841   ASSERT_EQ(tok::newline,        Toks[5].getKind());
1842 
1843   ASSERT_EQ(tok::text,           Toks[6].getKind());
1844   ASSERT_EQ(StringRef(" Ddd"),   Toks[6].getText());
1845   ASSERT_EQ(tok::newline,        Toks[7].getKind());
1846   ASSERT_EQ(tok::newline,        Toks[8].getKind());
1847 
1848   ASSERT_EQ(tok::text,           Toks[9].getKind());
1849   ASSERT_EQ(StringRef(" Eee"),   Toks[9].getText());
1850 
1851   ASSERT_EQ(tok::newline,        Toks[10].getKind());
1852   ASSERT_EQ(tok::newline,        Toks[11].getKind());
1853 }
1854 
1855 } // end namespace comments
1856 } // end namespace clang
1857 
1858