xref: /llvm-project/clang/unittests/Tooling/Syntax/TokensTest.cpp (revision 5f1adf0433c6007f8be885b832c852da67e8524c)
1e7230ea7SIlya Biryukov //===- TokensTest.cpp -----------------------------------------------------===//
2e7230ea7SIlya Biryukov //
3e7230ea7SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e7230ea7SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information.
5e7230ea7SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e7230ea7SIlya Biryukov //
7e7230ea7SIlya Biryukov //===----------------------------------------------------------------------===//
8e7230ea7SIlya Biryukov 
9e7230ea7SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h"
10e7230ea7SIlya Biryukov #include "clang/AST/ASTConsumer.h"
11e7230ea7SIlya Biryukov #include "clang/AST/Expr.h"
12e7230ea7SIlya Biryukov #include "clang/Basic/Diagnostic.h"
13e7230ea7SIlya Biryukov #include "clang/Basic/DiagnosticIDs.h"
14e7230ea7SIlya Biryukov #include "clang/Basic/DiagnosticOptions.h"
15e7230ea7SIlya Biryukov #include "clang/Basic/FileManager.h"
16e7230ea7SIlya Biryukov #include "clang/Basic/FileSystemOptions.h"
17e7230ea7SIlya Biryukov #include "clang/Basic/LLVM.h"
18e7230ea7SIlya Biryukov #include "clang/Basic/LangOptions.h"
19e7230ea7SIlya Biryukov #include "clang/Basic/SourceLocation.h"
20e7230ea7SIlya Biryukov #include "clang/Basic/SourceManager.h"
21e7230ea7SIlya Biryukov #include "clang/Basic/TokenKinds.def"
22e7230ea7SIlya Biryukov #include "clang/Basic/TokenKinds.h"
23e7230ea7SIlya Biryukov #include "clang/Frontend/CompilerInstance.h"
24e7230ea7SIlya Biryukov #include "clang/Frontend/FrontendAction.h"
25e7230ea7SIlya Biryukov #include "clang/Frontend/Utils.h"
26e7230ea7SIlya Biryukov #include "clang/Lex/Lexer.h"
27e7230ea7SIlya Biryukov #include "clang/Lex/PreprocessorOptions.h"
28e7230ea7SIlya Biryukov #include "clang/Lex/Token.h"
29e7230ea7SIlya Biryukov #include "clang/Tooling/Tooling.h"
30e7230ea7SIlya Biryukov #include "llvm/ADT/ArrayRef.h"
31e7230ea7SIlya Biryukov #include "llvm/ADT/IntrusiveRefCntPtr.h"
32e7230ea7SIlya Biryukov #include "llvm/ADT/STLExtras.h"
33e7230ea7SIlya Biryukov #include "llvm/ADT/StringRef.h"
34e7230ea7SIlya Biryukov #include "llvm/Support/FormatVariadic.h"
35e7230ea7SIlya Biryukov #include "llvm/Support/MemoryBuffer.h"
36e7230ea7SIlya Biryukov #include "llvm/Support/VirtualFileSystem.h"
37e7230ea7SIlya Biryukov #include "llvm/Support/raw_os_ostream.h"
38e7230ea7SIlya Biryukov #include "llvm/Support/raw_ostream.h"
393432f4bfSJordan Rupprecht #include "llvm/Testing/Annotations/Annotations.h"
40e7230ea7SIlya Biryukov #include "llvm/Testing/Support/SupportHelpers.h"
41e7230ea7SIlya Biryukov #include <cassert>
42e7230ea7SIlya Biryukov #include <cstdlib>
43e7230ea7SIlya Biryukov #include <gmock/gmock.h>
44e7230ea7SIlya Biryukov #include <gtest/gtest.h>
45e7230ea7SIlya Biryukov #include <memory>
46a1580d7bSKazu Hirata #include <optional>
47e7230ea7SIlya Biryukov #include <ostream>
48e7230ea7SIlya Biryukov #include <string>
49e7230ea7SIlya Biryukov 
50e7230ea7SIlya Biryukov using namespace clang;
51e7230ea7SIlya Biryukov using namespace clang::syntax;
52e7230ea7SIlya Biryukov 
53e7230ea7SIlya Biryukov using llvm::ValueIs;
54f0ab336eSSam McCall using ::testing::_;
55e7230ea7SIlya Biryukov using ::testing::AllOf;
56e7230ea7SIlya Biryukov using ::testing::Contains;
57e7230ea7SIlya Biryukov using ::testing::ElementsAre;
585aed309aSIlya Biryukov using ::testing::Field;
591bf055c9SMarcel Hlopko using ::testing::IsEmpty;
60e7230ea7SIlya Biryukov using ::testing::Matcher;
61e7230ea7SIlya Biryukov using ::testing::Not;
62cd9b2e18SKadir Cetinkaya using ::testing::Pointee;
63e7230ea7SIlya Biryukov using ::testing::StartsWith;
64e7230ea7SIlya Biryukov 
65e7230ea7SIlya Biryukov namespace {
66e7230ea7SIlya Biryukov // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
67e7230ea7SIlya Biryukov // argument.
68e7230ea7SIlya Biryukov MATCHER_P(SameRange, A, "") {
69e7230ea7SIlya Biryukov   return A.begin() == arg.begin() && A.end() == arg.end();
70e7230ea7SIlya Biryukov }
715aed309aSIlya Biryukov 
725aed309aSIlya Biryukov Matcher<TokenBuffer::Expansion>
IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,Matcher<llvm::ArrayRef<syntax::Token>> Expanded)735aed309aSIlya Biryukov IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
745aed309aSIlya Biryukov             Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
755aed309aSIlya Biryukov   return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
765aed309aSIlya Biryukov                Field(&TokenBuffer::Expansion::Expanded, Expanded));
775aed309aSIlya Biryukov }
78e7230ea7SIlya Biryukov // Matchers for syntax::Token.
79e7230ea7SIlya Biryukov MATCHER_P(Kind, K, "") { return arg.kind() == K; }
80e7230ea7SIlya Biryukov MATCHER_P2(HasText, Text, SourceMgr, "") {
81e7230ea7SIlya Biryukov   return arg.text(*SourceMgr) == Text;
82e7230ea7SIlya Biryukov }
83e7230ea7SIlya Biryukov /// Checks the start and end location of a token are equal to SourceRng.
84e7230ea7SIlya Biryukov MATCHER_P(RangeIs, SourceRng, "") {
85e7230ea7SIlya Biryukov   return arg.location() == SourceRng.first &&
86e7230ea7SIlya Biryukov          arg.endLocation() == SourceRng.second;
87e7230ea7SIlya Biryukov }
88e7230ea7SIlya Biryukov 
89e7230ea7SIlya Biryukov class TokenCollectorTest : public ::testing::Test {
90e7230ea7SIlya Biryukov public:
91e7230ea7SIlya Biryukov   /// Run the clang frontend, collect the preprocessed tokens from the frontend
92e7230ea7SIlya Biryukov   /// invocation and store them in this->Buffer.
93e7230ea7SIlya Biryukov   /// This also clears SourceManager before running the compiler.
recordTokens(llvm::StringRef Code)94e7230ea7SIlya Biryukov   void recordTokens(llvm::StringRef Code) {
95e7230ea7SIlya Biryukov     class RecordTokens : public ASTFrontendAction {
96e7230ea7SIlya Biryukov     public:
97e7230ea7SIlya Biryukov       explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
98e7230ea7SIlya Biryukov 
99e7230ea7SIlya Biryukov       bool BeginSourceFileAction(CompilerInstance &CI) override {
100e7230ea7SIlya Biryukov         assert(!Collector && "expected only a single call to BeginSourceFile");
101e7230ea7SIlya Biryukov         Collector.emplace(CI.getPreprocessor());
102e7230ea7SIlya Biryukov         return true;
103e7230ea7SIlya Biryukov       }
104e7230ea7SIlya Biryukov       void EndSourceFileAction() override {
105e7230ea7SIlya Biryukov         assert(Collector && "BeginSourceFileAction was never called");
106e7230ea7SIlya Biryukov         Result = std::move(*Collector).consume();
107aa979084SUtkarsh Saxena         Result.indexExpandedTokens();
108e7230ea7SIlya Biryukov       }
109e7230ea7SIlya Biryukov 
110e7230ea7SIlya Biryukov       std::unique_ptr<ASTConsumer>
111e7230ea7SIlya Biryukov       CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
1122b3d49b6SJonas Devlieghere         return std::make_unique<ASTConsumer>();
113e7230ea7SIlya Biryukov       }
114e7230ea7SIlya Biryukov 
115e7230ea7SIlya Biryukov     private:
116e7230ea7SIlya Biryukov       TokenBuffer &Result;
1176ad0788cSKazu Hirata       std::optional<TokenCollector> Collector;
118e7230ea7SIlya Biryukov     };
119e7230ea7SIlya Biryukov 
120e7230ea7SIlya Biryukov     constexpr const char *FileName = "./input.cpp";
121e7230ea7SIlya Biryukov     FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
122e7230ea7SIlya Biryukov     // Prepare to run a compiler.
123e7230ea7SIlya Biryukov     if (!Diags->getClient())
124e7230ea7SIlya Biryukov       Diags->setClient(new IgnoringDiagConsumer);
125e7230ea7SIlya Biryukov     std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
126e7230ea7SIlya Biryukov                                       FileName};
127499d0b96SSam McCall     CreateInvocationOptions CIOpts;
128499d0b96SSam McCall     CIOpts.Diags = Diags;
129499d0b96SSam McCall     CIOpts.VFS = FS;
130499d0b96SSam McCall     auto CI = createInvocation(Args, std::move(CIOpts));
131e7230ea7SIlya Biryukov     assert(CI);
132e7230ea7SIlya Biryukov     CI->getFrontendOpts().DisableFree = false;
133e7230ea7SIlya Biryukov     CI->getPreprocessorOpts().addRemappedFile(
134e7230ea7SIlya Biryukov         FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
135e7230ea7SIlya Biryukov     CompilerInstance Compiler;
136e7230ea7SIlya Biryukov     Compiler.setInvocation(std::move(CI));
137e7230ea7SIlya Biryukov     Compiler.setDiagnostics(Diags.get());
138e7230ea7SIlya Biryukov     Compiler.setFileManager(FileMgr.get());
139e7230ea7SIlya Biryukov     Compiler.setSourceManager(SourceMgr.get());
140e7230ea7SIlya Biryukov 
141e7230ea7SIlya Biryukov     this->Buffer = TokenBuffer(*SourceMgr);
142e7230ea7SIlya Biryukov     RecordTokens Recorder(this->Buffer);
143e7230ea7SIlya Biryukov     ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
144e7230ea7SIlya Biryukov         << "failed to run the frontend";
145e7230ea7SIlya Biryukov   }
146e7230ea7SIlya Biryukov 
147e7230ea7SIlya Biryukov   /// Record the tokens and return a test dump of the resulting buffer.
collectAndDump(llvm::StringRef Code)148e7230ea7SIlya Biryukov   std::string collectAndDump(llvm::StringRef Code) {
149e7230ea7SIlya Biryukov     recordTokens(Code);
150e7230ea7SIlya Biryukov     return Buffer.dumpForTests();
151e7230ea7SIlya Biryukov   }
152e7230ea7SIlya Biryukov 
153e7230ea7SIlya Biryukov   // Adds a file to the test VFS.
addFile(llvm::StringRef Path,llvm::StringRef Contents)154e7230ea7SIlya Biryukov   void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
155e7230ea7SIlya Biryukov     if (!FS->addFile(Path, time_t(),
156e7230ea7SIlya Biryukov                      llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
157e7230ea7SIlya Biryukov       ADD_FAILURE() << "could not add a file to VFS: " << Path;
158e7230ea7SIlya Biryukov     }
159e7230ea7SIlya Biryukov   }
160e7230ea7SIlya Biryukov 
1618c2cf499SKadir Cetinkaya   /// Add a new file, run syntax::tokenize() on the range if any, run it on the
1628c2cf499SKadir Cetinkaya   /// whole file otherwise and return the results.
tokenize(llvm::StringRef Text)163e7230ea7SIlya Biryukov   std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
1648c2cf499SKadir Cetinkaya     llvm::Annotations Annot(Text);
1658c2cf499SKadir Cetinkaya     auto FID = SourceMgr->createFileID(
1668c2cf499SKadir Cetinkaya         llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
167e7230ea7SIlya Biryukov     // FIXME: pass proper LangOptions.
1688c2cf499SKadir Cetinkaya     if (Annot.ranges().empty())
1698c2cf499SKadir Cetinkaya       return syntax::tokenize(FID, *SourceMgr, LangOptions());
170e7230ea7SIlya Biryukov     return syntax::tokenize(
1718c2cf499SKadir Cetinkaya         syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
172e7230ea7SIlya Biryukov         *SourceMgr, LangOptions());
173e7230ea7SIlya Biryukov   }
174e7230ea7SIlya Biryukov 
175e7230ea7SIlya Biryukov   // Specialized versions of matchers that hide the SourceManager from clients.
HasText(std::string Text) const176e7230ea7SIlya Biryukov   Matcher<syntax::Token> HasText(std::string Text) const {
177e7230ea7SIlya Biryukov     return ::HasText(Text, SourceMgr.get());
178e7230ea7SIlya Biryukov   }
RangeIs(llvm::Annotations::Range R) const179e7230ea7SIlya Biryukov   Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
180e7230ea7SIlya Biryukov     std::pair<SourceLocation, SourceLocation> Ls;
181e7230ea7SIlya Biryukov     Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
182e7230ea7SIlya Biryukov                    .getLocWithOffset(R.Begin);
183e7230ea7SIlya Biryukov     Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
184e7230ea7SIlya Biryukov                     .getLocWithOffset(R.End);
185e7230ea7SIlya Biryukov     return ::RangeIs(Ls);
186e7230ea7SIlya Biryukov   }
187e7230ea7SIlya Biryukov 
188e7230ea7SIlya Biryukov   /// Finds a subrange in O(n * m).
189e7230ea7SIlya Biryukov   template <class T, class U, class Eq>
findSubrange(llvm::ArrayRef<U> Subrange,llvm::ArrayRef<T> Range,Eq F)190e7230ea7SIlya Biryukov   llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
191e7230ea7SIlya Biryukov                                  llvm::ArrayRef<T> Range, Eq F) {
1921bf055c9SMarcel Hlopko     assert(Subrange.size() >= 1);
1931bf055c9SMarcel Hlopko     if (Range.size() < Subrange.size())
194a3c248dbSserge-sans-paille       return llvm::ArrayRef(Range.end(), Range.end());
1951bf055c9SMarcel Hlopko     for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
1961bf055c9SMarcel Hlopko          Begin <= Last; ++Begin) {
197e7230ea7SIlya Biryukov       auto It = Begin;
1981bf055c9SMarcel Hlopko       for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
1991bf055c9SMarcel Hlopko            ++ItSub, ++It) {
200e7230ea7SIlya Biryukov         if (!F(*ItSub, *It))
201e7230ea7SIlya Biryukov           goto continue_outer;
202e7230ea7SIlya Biryukov       }
203a3c248dbSserge-sans-paille       return llvm::ArrayRef(Begin, It);
204e7230ea7SIlya Biryukov     continue_outer:;
205e7230ea7SIlya Biryukov     }
206a3c248dbSserge-sans-paille     return llvm::ArrayRef(Range.end(), Range.end());
207e7230ea7SIlya Biryukov   }
208e7230ea7SIlya Biryukov 
209e7230ea7SIlya Biryukov   /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
210e7230ea7SIlya Biryukov   /// The match should be unique. \p Query is a whitespace-separated list of
211e7230ea7SIlya Biryukov   /// tokens to search for.
212e7230ea7SIlya Biryukov   llvm::ArrayRef<syntax::Token>
findTokenRange(llvm::StringRef Query,llvm::ArrayRef<syntax::Token> Tokens)213e7230ea7SIlya Biryukov   findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
214e7230ea7SIlya Biryukov     llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
215e7230ea7SIlya Biryukov     Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
216e7230ea7SIlya Biryukov     if (QueryTokens.empty()) {
217e7230ea7SIlya Biryukov       ADD_FAILURE() << "will not look for an empty list of tokens";
218e7230ea7SIlya Biryukov       std::abort();
219e7230ea7SIlya Biryukov     }
220e7230ea7SIlya Biryukov     // An equality test for search.
221e7230ea7SIlya Biryukov     auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
222e7230ea7SIlya Biryukov       return Q == T.text(*SourceMgr);
223e7230ea7SIlya Biryukov     };
224e7230ea7SIlya Biryukov     // Find a match.
225a3c248dbSserge-sans-paille     auto Found = findSubrange(llvm::ArrayRef(QueryTokens), Tokens, TextMatches);
226e7230ea7SIlya Biryukov     if (Found.begin() == Tokens.end()) {
227e7230ea7SIlya Biryukov       ADD_FAILURE() << "could not find the subrange for " << Query;
228e7230ea7SIlya Biryukov       std::abort();
229e7230ea7SIlya Biryukov     }
230e7230ea7SIlya Biryukov     // Check that the match is unique.
231a3c248dbSserge-sans-paille     if (findSubrange(llvm::ArrayRef(QueryTokens),
232a3c248dbSserge-sans-paille                      llvm::ArrayRef(Found.end(), Tokens.end()), TextMatches)
233e7230ea7SIlya Biryukov             .begin() != Tokens.end()) {
234e7230ea7SIlya Biryukov       ADD_FAILURE() << "match is not unique for " << Query;
235e7230ea7SIlya Biryukov       std::abort();
236e7230ea7SIlya Biryukov     }
237e7230ea7SIlya Biryukov     return Found;
238e7230ea7SIlya Biryukov   };
239e7230ea7SIlya Biryukov 
240e7230ea7SIlya Biryukov   // Specialized versions of findTokenRange for expanded and spelled tokens.
findExpanded(llvm::StringRef Query)241e7230ea7SIlya Biryukov   llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
242e7230ea7SIlya Biryukov     return findTokenRange(Query, Buffer.expandedTokens());
243e7230ea7SIlya Biryukov   }
findSpelled(llvm::StringRef Query,FileID File=FileID ())244e7230ea7SIlya Biryukov   llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
245e7230ea7SIlya Biryukov                                             FileID File = FileID()) {
246e7230ea7SIlya Biryukov     if (!File.isValid())
247e7230ea7SIlya Biryukov       File = SourceMgr->getMainFileID();
248e7230ea7SIlya Biryukov     return findTokenRange(Query, Buffer.spelledTokens(File));
249e7230ea7SIlya Biryukov   }
250e7230ea7SIlya Biryukov 
251e7230ea7SIlya Biryukov   // Data fields.
252e7230ea7SIlya Biryukov   llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
253e7230ea7SIlya Biryukov       new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
254e7230ea7SIlya Biryukov   IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
255e7230ea7SIlya Biryukov       new llvm::vfs::InMemoryFileSystem;
256e7230ea7SIlya Biryukov   llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
257e7230ea7SIlya Biryukov       new FileManager(FileSystemOptions(), FS);
258e7230ea7SIlya Biryukov   llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
259e7230ea7SIlya Biryukov       new SourceManager(*Diags, *FileMgr);
260e7230ea7SIlya Biryukov   /// Contains last result of calling recordTokens().
261e7230ea7SIlya Biryukov   TokenBuffer Buffer = TokenBuffer(*SourceMgr);
262e7230ea7SIlya Biryukov };
263e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,RawMode)264e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, RawMode) {
265e7230ea7SIlya Biryukov   EXPECT_THAT(tokenize("int main() {}"),
266e7230ea7SIlya Biryukov               ElementsAre(Kind(tok::kw_int),
267e7230ea7SIlya Biryukov                           AllOf(HasText("main"), Kind(tok::identifier)),
268e7230ea7SIlya Biryukov                           Kind(tok::l_paren), Kind(tok::r_paren),
269e7230ea7SIlya Biryukov                           Kind(tok::l_brace), Kind(tok::r_brace)));
270e7230ea7SIlya Biryukov   // Comments are ignored for now.
271e7230ea7SIlya Biryukov   EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
272e7230ea7SIlya Biryukov               ElementsAre(Kind(tok::kw_int),
273e7230ea7SIlya Biryukov                           AllOf(HasText("a"), Kind(tok::identifier)),
274e7230ea7SIlya Biryukov                           Kind(tok::semi)));
2758c2cf499SKadir Cetinkaya   EXPECT_THAT(tokenize("int [[main() {]]}"),
2768c2cf499SKadir Cetinkaya               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
2778c2cf499SKadir Cetinkaya                           Kind(tok::l_paren), Kind(tok::r_paren),
2788c2cf499SKadir Cetinkaya                           Kind(tok::l_brace)));
2798c2cf499SKadir Cetinkaya   EXPECT_THAT(tokenize("int [[main() {   ]]}"),
2808c2cf499SKadir Cetinkaya               ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
2818c2cf499SKadir Cetinkaya                           Kind(tok::l_paren), Kind(tok::r_paren),
2828c2cf499SKadir Cetinkaya                           Kind(tok::l_brace)));
2838c2cf499SKadir Cetinkaya   // First token is partially parsed, last token is fully included even though
2848c2cf499SKadir Cetinkaya   // only a part of it is contained in the range.
2858c2cf499SKadir Cetinkaya   EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
2868c2cf499SKadir Cetinkaya               ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
2878c2cf499SKadir Cetinkaya                           Kind(tok::l_paren), Kind(tok::r_paren),
2888c2cf499SKadir Cetinkaya                           Kind(tok::l_brace), Kind(tok::kw_return)));
289e7230ea7SIlya Biryukov }
290e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,Basic)291e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, Basic) {
292e7230ea7SIlya Biryukov   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
293e7230ea7SIlya Biryukov       {"int main() {}",
294e7230ea7SIlya Biryukov        R"(expanded tokens:
295e7230ea7SIlya Biryukov   int main ( ) { }
296e7230ea7SIlya Biryukov file './input.cpp'
297e7230ea7SIlya Biryukov   spelled tokens:
298e7230ea7SIlya Biryukov     int main ( ) { }
299e7230ea7SIlya Biryukov   no mappings.
300e7230ea7SIlya Biryukov )"},
301e7230ea7SIlya Biryukov       // All kinds of whitespace are ignored.
302e7230ea7SIlya Biryukov       {"\t\n  int\t\n  main\t\n  (\t\n  )\t\n{\t\n  }\t\n",
303e7230ea7SIlya Biryukov        R"(expanded tokens:
304e7230ea7SIlya Biryukov   int main ( ) { }
305e7230ea7SIlya Biryukov file './input.cpp'
306e7230ea7SIlya Biryukov   spelled tokens:
307e7230ea7SIlya Biryukov     int main ( ) { }
308e7230ea7SIlya Biryukov   no mappings.
309e7230ea7SIlya Biryukov )"},
310e7230ea7SIlya Biryukov       // Annotation tokens are ignored.
311e7230ea7SIlya Biryukov       {R"cpp(
312e7230ea7SIlya Biryukov         #pragma GCC visibility push (public)
313e7230ea7SIlya Biryukov         #pragma GCC visibility pop
314e7230ea7SIlya Biryukov       )cpp",
315e7230ea7SIlya Biryukov        R"(expanded tokens:
316e7230ea7SIlya Biryukov   <empty>
317e7230ea7SIlya Biryukov file './input.cpp'
318e7230ea7SIlya Biryukov   spelled tokens:
319e7230ea7SIlya Biryukov     # pragma GCC visibility push ( public ) # pragma GCC visibility pop
320e7230ea7SIlya Biryukov   mappings:
321e7230ea7SIlya Biryukov     ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
32226c066d6SIlya Biryukov )"},
32326c066d6SIlya Biryukov       // Empty files should not crash.
32426c066d6SIlya Biryukov       {R"cpp()cpp", R"(expanded tokens:
32526c066d6SIlya Biryukov   <empty>
32626c066d6SIlya Biryukov file './input.cpp'
32726c066d6SIlya Biryukov   spelled tokens:
32826c066d6SIlya Biryukov     <empty>
32926c066d6SIlya Biryukov   no mappings.
330b455fc42SIlya Biryukov )"},
331b455fc42SIlya Biryukov       // Should not crash on errors inside '#define' directives. Error is that
332b455fc42SIlya Biryukov       // stringification (#B) does not refer to a macro parameter.
333b455fc42SIlya Biryukov       {
334b455fc42SIlya Biryukov           R"cpp(
335b455fc42SIlya Biryukov a
336b455fc42SIlya Biryukov #define MACRO() A #B
337b455fc42SIlya Biryukov )cpp",
338b455fc42SIlya Biryukov           R"(expanded tokens:
339b455fc42SIlya Biryukov   a
340b455fc42SIlya Biryukov file './input.cpp'
341b455fc42SIlya Biryukov   spelled tokens:
342b455fc42SIlya Biryukov     a # define MACRO ( ) A # B
343b455fc42SIlya Biryukov   mappings:
344b455fc42SIlya Biryukov     ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
345e7230ea7SIlya Biryukov )"}};
346e7230ea7SIlya Biryukov   for (auto &Test : TestCases)
347e7230ea7SIlya Biryukov     EXPECT_EQ(collectAndDump(Test.first), Test.second)
348e7230ea7SIlya Biryukov         << collectAndDump(Test.first);
349e7230ea7SIlya Biryukov }
350e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,Locations)351e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, Locations) {
352e7230ea7SIlya Biryukov   // Check locations of the tokens.
353e7230ea7SIlya Biryukov   llvm::Annotations Code(R"cpp(
354e7230ea7SIlya Biryukov     $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
355e7230ea7SIlya Biryukov   )cpp");
356e7230ea7SIlya Biryukov   recordTokens(Code.code());
357e7230ea7SIlya Biryukov   // Check expanded tokens.
358e7230ea7SIlya Biryukov   EXPECT_THAT(
359e7230ea7SIlya Biryukov       Buffer.expandedTokens(),
360e7230ea7SIlya Biryukov       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
361e7230ea7SIlya Biryukov                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
362e7230ea7SIlya Biryukov                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
363e7230ea7SIlya Biryukov                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
364e7230ea7SIlya Biryukov                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
365e7230ea7SIlya Biryukov                   Kind(tok::eof)));
366e7230ea7SIlya Biryukov   // Check spelled tokens.
367e7230ea7SIlya Biryukov   EXPECT_THAT(
368e7230ea7SIlya Biryukov       Buffer.spelledTokens(SourceMgr->getMainFileID()),
369e7230ea7SIlya Biryukov       ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
370e7230ea7SIlya Biryukov                   AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
371e7230ea7SIlya Biryukov                   AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
372e7230ea7SIlya Biryukov                   AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
373e7230ea7SIlya Biryukov                   AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
374cd9b2e18SKadir Cetinkaya 
375cd9b2e18SKadir Cetinkaya   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
376cd9b2e18SKadir Cetinkaya   for (auto &R : Code.ranges()) {
377*5f1adf04SUtkarsh Saxena     EXPECT_THAT(
378*5f1adf04SUtkarsh Saxena         Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(R.Begin)),
379cd9b2e18SKadir Cetinkaya         Pointee(RangeIs(R)));
380cd9b2e18SKadir Cetinkaya   }
381e7230ea7SIlya Biryukov }
382e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,LocationInMiddleOfSpelledToken)383*5f1adf04SUtkarsh Saxena TEST_F(TokenCollectorTest, LocationInMiddleOfSpelledToken) {
384*5f1adf04SUtkarsh Saxena   llvm::Annotations Code(R"cpp(
385*5f1adf04SUtkarsh Saxena     int foo = [[baa^aar]];
386*5f1adf04SUtkarsh Saxena   )cpp");
387*5f1adf04SUtkarsh Saxena   recordTokens(Code.code());
388*5f1adf04SUtkarsh Saxena   // Check spelled tokens.
389*5f1adf04SUtkarsh Saxena   auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
390*5f1adf04SUtkarsh Saxena   EXPECT_THAT(
391*5f1adf04SUtkarsh Saxena       Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(Code.point())),
392*5f1adf04SUtkarsh Saxena       Pointee(RangeIs(Code.range())));
393*5f1adf04SUtkarsh Saxena }
394*5f1adf04SUtkarsh Saxena 
TEST_F(TokenCollectorTest,MacroDirectives)395e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, MacroDirectives) {
396e7230ea7SIlya Biryukov   // Macro directives are not stored anywhere at the moment.
397e7230ea7SIlya Biryukov   std::string Code = R"cpp(
398e7230ea7SIlya Biryukov     #define FOO a
399e7230ea7SIlya Biryukov     #include "unresolved_file.h"
400e7230ea7SIlya Biryukov     #undef FOO
401e7230ea7SIlya Biryukov     #ifdef X
402e7230ea7SIlya Biryukov     #else
403e7230ea7SIlya Biryukov     #endif
404e7230ea7SIlya Biryukov     #ifndef Y
405e7230ea7SIlya Biryukov     #endif
406e7230ea7SIlya Biryukov     #if 1
407e7230ea7SIlya Biryukov     #elif 2
408e7230ea7SIlya Biryukov     #else
409e7230ea7SIlya Biryukov     #endif
410e7230ea7SIlya Biryukov     #pragma once
411e7230ea7SIlya Biryukov     #pragma something lalala
412e7230ea7SIlya Biryukov 
413e7230ea7SIlya Biryukov     int a;
414e7230ea7SIlya Biryukov   )cpp";
415e7230ea7SIlya Biryukov   std::string Expected =
416e7230ea7SIlya Biryukov       "expanded tokens:\n"
417e7230ea7SIlya Biryukov       "  int a ;\n"
418e7230ea7SIlya Biryukov       "file './input.cpp'\n"
419e7230ea7SIlya Biryukov       "  spelled tokens:\n"
420e7230ea7SIlya Biryukov       "    # define FOO a # include \"unresolved_file.h\" # undef FOO "
421e7230ea7SIlya Biryukov       "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
422e7230ea7SIlya Biryukov       "# endif # pragma once # pragma something lalala int a ;\n"
423e7230ea7SIlya Biryukov       "  mappings:\n"
424e7230ea7SIlya Biryukov       "    ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
425e7230ea7SIlya Biryukov   EXPECT_EQ(collectAndDump(Code), Expected);
426e7230ea7SIlya Biryukov }
427e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,MacroReplacements)428e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, MacroReplacements) {
429e7230ea7SIlya Biryukov   std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
430e7230ea7SIlya Biryukov       // A simple object-like macro.
431e7230ea7SIlya Biryukov       {R"cpp(
432e7230ea7SIlya Biryukov     #define INT int const
433e7230ea7SIlya Biryukov     INT a;
434e7230ea7SIlya Biryukov   )cpp",
435e7230ea7SIlya Biryukov        R"(expanded tokens:
436e7230ea7SIlya Biryukov   int const a ;
437e7230ea7SIlya Biryukov file './input.cpp'
438e7230ea7SIlya Biryukov   spelled tokens:
439e7230ea7SIlya Biryukov     # define INT int const INT a ;
440e7230ea7SIlya Biryukov   mappings:
441e7230ea7SIlya Biryukov     ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
442e7230ea7SIlya Biryukov     ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
443e7230ea7SIlya Biryukov )"},
444e7230ea7SIlya Biryukov       // A simple function-like macro.
445e7230ea7SIlya Biryukov       {R"cpp(
446e7230ea7SIlya Biryukov     #define INT(a) const int
447e7230ea7SIlya Biryukov     INT(10+10) a;
448e7230ea7SIlya Biryukov   )cpp",
449e7230ea7SIlya Biryukov        R"(expanded tokens:
450e7230ea7SIlya Biryukov   const int a ;
451e7230ea7SIlya Biryukov file './input.cpp'
452e7230ea7SIlya Biryukov   spelled tokens:
453e7230ea7SIlya Biryukov     # define INT ( a ) const int INT ( 10 + 10 ) a ;
454e7230ea7SIlya Biryukov   mappings:
455e7230ea7SIlya Biryukov     ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
456e7230ea7SIlya Biryukov     ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
457e7230ea7SIlya Biryukov )"},
458e7230ea7SIlya Biryukov       // Recursive macro replacements.
459e7230ea7SIlya Biryukov       {R"cpp(
460e7230ea7SIlya Biryukov     #define ID(X) X
461e7230ea7SIlya Biryukov     #define INT int const
462e7230ea7SIlya Biryukov     ID(ID(INT)) a;
463e7230ea7SIlya Biryukov   )cpp",
464e7230ea7SIlya Biryukov        R"(expanded tokens:
465e7230ea7SIlya Biryukov   int const a ;
466e7230ea7SIlya Biryukov file './input.cpp'
467e7230ea7SIlya Biryukov   spelled tokens:
468e7230ea7SIlya Biryukov     # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
469e7230ea7SIlya Biryukov   mappings:
470e7230ea7SIlya Biryukov     ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
471e7230ea7SIlya Biryukov     ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
472e7230ea7SIlya Biryukov )"},
473e7230ea7SIlya Biryukov       // A little more complicated recursive macro replacements.
474e7230ea7SIlya Biryukov       {R"cpp(
475e7230ea7SIlya Biryukov     #define ADD(X, Y) X+Y
476e7230ea7SIlya Biryukov     #define MULT(X, Y) X*Y
477e7230ea7SIlya Biryukov 
478e7230ea7SIlya Biryukov     int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
479e7230ea7SIlya Biryukov   )cpp",
480e7230ea7SIlya Biryukov        "expanded tokens:\n"
481e7230ea7SIlya Biryukov        "  int a = 1 * 2 + 3 * 4 + 5 ;\n"
482e7230ea7SIlya Biryukov        "file './input.cpp'\n"
483e7230ea7SIlya Biryukov        "  spelled tokens:\n"
484e7230ea7SIlya Biryukov        "    # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
485e7230ea7SIlya Biryukov        "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
486e7230ea7SIlya Biryukov        "  mappings:\n"
487e7230ea7SIlya Biryukov        "    ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
488e7230ea7SIlya Biryukov        "    ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
489e7230ea7SIlya Biryukov       // Empty macro replacement.
4905e69f27eSIlya Biryukov       // FIXME: the #define directives should not be glued together.
491e7230ea7SIlya Biryukov       {R"cpp(
492e7230ea7SIlya Biryukov     #define EMPTY
493e7230ea7SIlya Biryukov     #define EMPTY_FUNC(X)
494e7230ea7SIlya Biryukov     EMPTY
495e7230ea7SIlya Biryukov     EMPTY_FUNC(1+2+3)
496e7230ea7SIlya Biryukov     )cpp",
497e7230ea7SIlya Biryukov        R"(expanded tokens:
498e7230ea7SIlya Biryukov   <empty>
499e7230ea7SIlya Biryukov file './input.cpp'
500e7230ea7SIlya Biryukov   spelled tokens:
501e7230ea7SIlya Biryukov     # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
502e7230ea7SIlya Biryukov   mappings:
5035e69f27eSIlya Biryukov     ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
5045e69f27eSIlya Biryukov     ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
5055e69f27eSIlya Biryukov     ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
506e7230ea7SIlya Biryukov )"},
507e7230ea7SIlya Biryukov       // File ends with a macro replacement.
508e7230ea7SIlya Biryukov       {R"cpp(
509e7230ea7SIlya Biryukov     #define FOO 10+10;
510e7230ea7SIlya Biryukov     int a = FOO
511e7230ea7SIlya Biryukov     )cpp",
512e7230ea7SIlya Biryukov        R"(expanded tokens:
513e7230ea7SIlya Biryukov   int a = 10 + 10 ;
514e7230ea7SIlya Biryukov file './input.cpp'
515e7230ea7SIlya Biryukov   spelled tokens:
516e7230ea7SIlya Biryukov     # define FOO 10 + 10 ; int a = FOO
517e7230ea7SIlya Biryukov   mappings:
518e7230ea7SIlya Biryukov     ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
519e7230ea7SIlya Biryukov     ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
520d66afd6dSSam McCall )"},
521d66afd6dSSam McCall       {R"cpp(
522d66afd6dSSam McCall          #define NUM 42
523d66afd6dSSam McCall          #define ID(a) a
524d66afd6dSSam McCall          #define M 1 + ID
525d66afd6dSSam McCall          M(NUM)
526d66afd6dSSam McCall        )cpp",
527d66afd6dSSam McCall        R"(expanded tokens:
528d66afd6dSSam McCall   1 + 42
529d66afd6dSSam McCall file './input.cpp'
530d66afd6dSSam McCall   spelled tokens:
531d66afd6dSSam McCall     # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
532d66afd6dSSam McCall   mappings:
533d66afd6dSSam McCall     ['#'_0, 'M'_17) => ['1'_0, '1'_0)
534d66afd6dSSam McCall     ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
535d66afd6dSSam McCall )"},
536d66afd6dSSam McCall   };
537e7230ea7SIlya Biryukov 
538d66afd6dSSam McCall   for (auto &Test : TestCases) {
539d66afd6dSSam McCall     std::string Dump = collectAndDump(Test.first);
540d66afd6dSSam McCall     EXPECT_EQ(Test.second, Dump) << Dump;
541d66afd6dSSam McCall   }
542e7230ea7SIlya Biryukov }
543e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,SpecialTokens)544e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, SpecialTokens) {
545e7230ea7SIlya Biryukov   // Tokens coming from concatenations.
546e7230ea7SIlya Biryukov   recordTokens(R"cpp(
547e7230ea7SIlya Biryukov     #define CONCAT(a, b) a ## b
548e7230ea7SIlya Biryukov     int a = CONCAT(1, 2);
549e7230ea7SIlya Biryukov   )cpp");
550e7230ea7SIlya Biryukov   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
551e7230ea7SIlya Biryukov               Contains(HasText("12")));
552e7230ea7SIlya Biryukov   // Multi-line tokens with slashes at the end.
553e7230ea7SIlya Biryukov   recordTokens("i\\\nn\\\nt");
554e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.expandedTokens(),
555e7230ea7SIlya Biryukov               ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
556e7230ea7SIlya Biryukov                           Kind(tok::eof)));
557e7230ea7SIlya Biryukov   // FIXME: test tokens with digraphs and UCN identifiers.
558e7230ea7SIlya Biryukov }
559e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,LateBoundTokens)560e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, LateBoundTokens) {
561e7230ea7SIlya Biryukov   // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
562e7230ea7SIlya Biryukov   // but we choose to record them as a single token (for now).
563e7230ea7SIlya Biryukov   llvm::Annotations Code(R"cpp(
564e7230ea7SIlya Biryukov     template <class T>
565e7230ea7SIlya Biryukov     struct foo { int a; };
566e7230ea7SIlya Biryukov     int bar = foo<foo<int$br[[>>]]().a;
567e7230ea7SIlya Biryukov     int baz = 10 $op[[>>]] 2;
568e7230ea7SIlya Biryukov   )cpp");
569e7230ea7SIlya Biryukov   recordTokens(Code.code());
570e7230ea7SIlya Biryukov   EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
571e7230ea7SIlya Biryukov               AllOf(Contains(AllOf(Kind(tok::greatergreater),
572e7230ea7SIlya Biryukov                                    RangeIs(Code.range("br")))),
573e7230ea7SIlya Biryukov                     Contains(AllOf(Kind(tok::greatergreater),
574e7230ea7SIlya Biryukov                                    RangeIs(Code.range("op"))))));
575e7230ea7SIlya Biryukov }
576e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,DelayedParsing)577e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, DelayedParsing) {
578e7230ea7SIlya Biryukov   llvm::StringLiteral Code = R"cpp(
579e7230ea7SIlya Biryukov     struct Foo {
580e7230ea7SIlya Biryukov       int method() {
581e7230ea7SIlya Biryukov         // Parser will visit method bodies and initializers multiple times, but
582e7230ea7SIlya Biryukov         // TokenBuffer should only record the first walk over the tokens;
583e7230ea7SIlya Biryukov         return 100;
584e7230ea7SIlya Biryukov       }
585e7230ea7SIlya Biryukov       int a = 10;
586e7230ea7SIlya Biryukov 
587e7230ea7SIlya Biryukov       struct Subclass {
588e7230ea7SIlya Biryukov         void foo() {
589e7230ea7SIlya Biryukov           Foo().method();
590e7230ea7SIlya Biryukov         }
591e7230ea7SIlya Biryukov       };
592e7230ea7SIlya Biryukov     };
593e7230ea7SIlya Biryukov   )cpp";
594e7230ea7SIlya Biryukov   std::string ExpectedTokens =
595e7230ea7SIlya Biryukov       "expanded tokens:\n"
596e7230ea7SIlya Biryukov       "  struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
597e7230ea7SIlya Biryukov       "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
598e7230ea7SIlya Biryukov   EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
599e7230ea7SIlya Biryukov }
600e7230ea7SIlya Biryukov 
TEST_F(TokenCollectorTest,MultiFile)601e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, MultiFile) {
602e7230ea7SIlya Biryukov   addFile("./foo.h", R"cpp(
603e7230ea7SIlya Biryukov     #define ADD(X, Y) X+Y
604e7230ea7SIlya Biryukov     int a = 100;
605e7230ea7SIlya Biryukov     #include "bar.h"
606e7230ea7SIlya Biryukov   )cpp");
607e7230ea7SIlya Biryukov   addFile("./bar.h", R"cpp(
608e7230ea7SIlya Biryukov     int b = ADD(1, 2);
609e7230ea7SIlya Biryukov     #define MULT(X, Y) X*Y
610e7230ea7SIlya Biryukov   )cpp");
611e7230ea7SIlya Biryukov   llvm::StringLiteral Code = R"cpp(
612e7230ea7SIlya Biryukov     #include "foo.h"
613e7230ea7SIlya Biryukov     int c = ADD(1, MULT(2,3));
614e7230ea7SIlya Biryukov   )cpp";
615e7230ea7SIlya Biryukov 
616e7230ea7SIlya Biryukov   std::string Expected = R"(expanded tokens:
617e7230ea7SIlya Biryukov   int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
618e7230ea7SIlya Biryukov file './input.cpp'
619e7230ea7SIlya Biryukov   spelled tokens:
620e7230ea7SIlya Biryukov     # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
621e7230ea7SIlya Biryukov   mappings:
622e7230ea7SIlya Biryukov     ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
623e7230ea7SIlya Biryukov     ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
624e7230ea7SIlya Biryukov file './foo.h'
625e7230ea7SIlya Biryukov   spelled tokens:
626e7230ea7SIlya Biryukov     # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
627e7230ea7SIlya Biryukov   mappings:
628e7230ea7SIlya Biryukov     ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
629e7230ea7SIlya Biryukov     ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
630e7230ea7SIlya Biryukov file './bar.h'
631e7230ea7SIlya Biryukov   spelled tokens:
632e7230ea7SIlya Biryukov     int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
633e7230ea7SIlya Biryukov   mappings:
634e7230ea7SIlya Biryukov     ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
635e7230ea7SIlya Biryukov     ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
636e7230ea7SIlya Biryukov )";
637e7230ea7SIlya Biryukov 
638e7230ea7SIlya Biryukov   EXPECT_EQ(Expected, collectAndDump(Code))
639e7230ea7SIlya Biryukov       << "input: " << Code << "\nresults: " << collectAndDump(Code);
640e7230ea7SIlya Biryukov }
641e7230ea7SIlya Biryukov 
642e7230ea7SIlya Biryukov class TokenBufferTest : public TokenCollectorTest {};
643e7230ea7SIlya Biryukov 
TEST_F(TokenBufferTest,SpelledByExpanded)644e7230ea7SIlya Biryukov TEST_F(TokenBufferTest, SpelledByExpanded) {
645e7230ea7SIlya Biryukov   recordTokens(R"cpp(
646e7230ea7SIlya Biryukov     a1 a2 a3 b1 b2
647e7230ea7SIlya Biryukov   )cpp");
648e7230ea7SIlya Biryukov 
649c79345fbSZarko Todorovski   // Expanded and spelled tokens are stored separately.
650e7230ea7SIlya Biryukov   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
651e7230ea7SIlya Biryukov   // Searching for subranges of expanded tokens should give the corresponding
652e7230ea7SIlya Biryukov   // spelled ones.
653e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
654e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
655e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
656e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled("a1 a2 a3"))));
657e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
658e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled("b1 b2"))));
659e7230ea7SIlya Biryukov 
660e7230ea7SIlya Biryukov   // Test search on simple macro expansions.
661e7230ea7SIlya Biryukov   recordTokens(R"cpp(
662e7230ea7SIlya Biryukov     #define A a1 a2 a3
663e7230ea7SIlya Biryukov     #define B b1 b2
664e7230ea7SIlya Biryukov 
665e7230ea7SIlya Biryukov     A split B
666e7230ea7SIlya Biryukov   )cpp");
6679619c2ccSKadir Cetinkaya   // Ranges going across expansion boundaries.
668e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
669e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled("A split B"))));
670e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
671e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled("A split").drop_back())));
672e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
673e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled("split B").drop_front())));
674e7230ea7SIlya Biryukov   // Ranges not fully covering macro invocations should fail.
675a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
676a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), std::nullopt);
677e7230ea7SIlya Biryukov   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
678a41fbb1fSKazu Hirata             std::nullopt);
679e7230ea7SIlya Biryukov 
680e7230ea7SIlya Biryukov   // Recursive macro invocations.
681e7230ea7SIlya Biryukov   recordTokens(R"cpp(
682e7230ea7SIlya Biryukov     #define ID(x) x
683e7230ea7SIlya Biryukov     #define B b1 b2
684e7230ea7SIlya Biryukov 
685e7230ea7SIlya Biryukov     ID(ID(ID(a1) a2 a3)) split ID(B)
686e7230ea7SIlya Biryukov   )cpp");
687e7230ea7SIlya Biryukov 
688e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
6899619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("( B").drop_front())));
690e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
691e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled(
692e7230ea7SIlya Biryukov                   "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
6939619c2ccSKadir Cetinkaya   // Mixed ranges with expanded and spelled tokens.
6949619c2ccSKadir Cetinkaya   EXPECT_THAT(
6959619c2ccSKadir Cetinkaya       Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
6969619c2ccSKadir Cetinkaya       ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
6979619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
6989619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("split ID ( B )"))));
6999619c2ccSKadir Cetinkaya   // Macro arguments
7009619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
7019619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("a1"))));
7029619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
7039619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("a2"))));
7049619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
7059619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("a3"))));
7069619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
7079619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
7089619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
7099619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
710e7230ea7SIlya Biryukov 
711e7230ea7SIlya Biryukov   // Empty macro expansions.
712e7230ea7SIlya Biryukov   recordTokens(R"cpp(
713e7230ea7SIlya Biryukov     #define EMPTY
714e7230ea7SIlya Biryukov     #define ID(X) X
715e7230ea7SIlya Biryukov 
716e7230ea7SIlya Biryukov     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
717e7230ea7SIlya Biryukov     EMPTY EMPTY ID(4 5 6) split2
718e7230ea7SIlya Biryukov     ID(7 8 9) EMPTY EMPTY
719e7230ea7SIlya Biryukov   )cpp");
720e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
7219619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("1 2 3"))));
722e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
7239619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("4 5 6"))));
724e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
7259619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("7 8 9"))));
726e7230ea7SIlya Biryukov 
727e7230ea7SIlya Biryukov   // Empty mappings coming from various directives.
728e7230ea7SIlya Biryukov   recordTokens(R"cpp(
729e7230ea7SIlya Biryukov     #define ID(X) X
730e7230ea7SIlya Biryukov     ID(1)
731e7230ea7SIlya Biryukov     #pragma lalala
732e7230ea7SIlya Biryukov     not_mapped
733e7230ea7SIlya Biryukov   )cpp");
734e7230ea7SIlya Biryukov   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
735e7230ea7SIlya Biryukov               ValueIs(SameRange(findSpelled("not_mapped"))));
7369619c2ccSKadir Cetinkaya 
7379619c2ccSKadir Cetinkaya   // Multiple macro arguments
7389619c2ccSKadir Cetinkaya   recordTokens(R"cpp(
7399619c2ccSKadir Cetinkaya     #define ID(X) X
7409619c2ccSKadir Cetinkaya     #define ID2(X, Y) X Y
7419619c2ccSKadir Cetinkaya 
7429619c2ccSKadir Cetinkaya     ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
7439619c2ccSKadir Cetinkaya   )cpp");
7449619c2ccSKadir Cetinkaya   // Should fail, spans multiple arguments.
745a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
7469619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
7479619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
7489619c2ccSKadir Cetinkaya   EXPECT_THAT(
7499619c2ccSKadir Cetinkaya       Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
7509619c2ccSKadir Cetinkaya       ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
7519619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
7529619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("a5 a6"))));
7539619c2ccSKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
7549619c2ccSKadir Cetinkaya               ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
7559619c2ccSKadir Cetinkaya   // Should fail, spans multiple invocations.
756a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")),
757a41fbb1fSKazu Hirata             std::nullopt);
75867268ee1SSam McCall 
75967268ee1SSam McCall   // https://github.com/clangd/clangd/issues/1289
76067268ee1SSam McCall   recordTokens(R"cpp(
76167268ee1SSam McCall     #define FOO(X) foo(X)
76267268ee1SSam McCall     #define INDIRECT FOO(y)
76367268ee1SSam McCall     INDIRECT // expands to foo(y)
76467268ee1SSam McCall   )cpp");
765a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), std::nullopt);
76667268ee1SSam McCall 
76767268ee1SSam McCall   recordTokens(R"cpp(
76867268ee1SSam McCall     #define FOO(X) a X b
76967268ee1SSam McCall     FOO(y)
77067268ee1SSam McCall   )cpp");
77167268ee1SSam McCall   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y")),
77267268ee1SSam McCall               ValueIs(SameRange(findSpelled("y"))));
77367268ee1SSam McCall 
77467268ee1SSam McCall   recordTokens(R"cpp(
77567268ee1SSam McCall     #define ID(X) X
77667268ee1SSam McCall     #define BAR ID(1)
77767268ee1SSam McCall     BAR
77867268ee1SSam McCall   )cpp");
77967268ee1SSam McCall   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1")),
78067268ee1SSam McCall               ValueIs(SameRange(findSpelled(") BAR").drop_front())));
78167268ee1SSam McCall 
78267268ee1SSam McCall   // Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
78367268ee1SSam McCall   recordTokens(R"cpp(
78467268ee1SSam McCall     #define ID(X) X
7859841daf2SKadir Cetinkaya     ID(prev good)
7869841daf2SKadir Cetinkaya     ID(prev ID(good2))
78767268ee1SSam McCall     #define LARGE ID(prev ID(bad))
78867268ee1SSam McCall     LARGE
78967268ee1SSam McCall   )cpp");
79067268ee1SSam McCall   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
79167268ee1SSam McCall               ValueIs(SameRange(findSpelled("good"))));
7929841daf2SKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
7939841daf2SKadir Cetinkaya               ValueIs(SameRange(findSpelled("good2"))));
794a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
79567268ee1SSam McCall 
79667268ee1SSam McCall   recordTokens(R"cpp(
79767268ee1SSam McCall     #define PREV prev
79867268ee1SSam McCall     #define ID(X) X
79967268ee1SSam McCall     PREV ID(good)
80067268ee1SSam McCall     #define LARGE PREV ID(bad)
80167268ee1SSam McCall     LARGE
80267268ee1SSam McCall   )cpp");
80367268ee1SSam McCall   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
80467268ee1SSam McCall               ValueIs(SameRange(findSpelled("good"))));
805a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
80667268ee1SSam McCall 
80767268ee1SSam McCall   recordTokens(R"cpp(
80867268ee1SSam McCall     #define ID(X) X
80967268ee1SSam McCall     #define ID2(X, Y) X Y
8109841daf2SKadir Cetinkaya     ID2(prev, good)
8119841daf2SKadir Cetinkaya     ID2(prev, ID(good2))
81267268ee1SSam McCall     #define LARGE ID2(prev, bad)
81367268ee1SSam McCall     LARGE
81467268ee1SSam McCall   )cpp");
81567268ee1SSam McCall   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
81667268ee1SSam McCall               ValueIs(SameRange(findSpelled("good"))));
8179841daf2SKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
8189841daf2SKadir Cetinkaya               ValueIs(SameRange(findSpelled("good2"))));
819a41fbb1fSKazu Hirata   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
8209841daf2SKadir Cetinkaya 
8219841daf2SKadir Cetinkaya   // Prev from macro body.
8229841daf2SKadir Cetinkaya   recordTokens(R"cpp(
8239841daf2SKadir Cetinkaya     #define ID(X) X
8249841daf2SKadir Cetinkaya     #define ID2(X, Y) X prev ID(Y)
8259841daf2SKadir Cetinkaya     ID2(not_prev, good)
8269841daf2SKadir Cetinkaya   )cpp");
8279841daf2SKadir Cetinkaya   EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
8289841daf2SKadir Cetinkaya               ValueIs(SameRange(findSpelled("good"))));
8299841daf2SKadir Cetinkaya   EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt);
830e7230ea7SIlya Biryukov }
831e7230ea7SIlya Biryukov 
TEST_F(TokenBufferTest,NoCrashForEofToken)8329d1dada5SNathan Ridge TEST_F(TokenBufferTest, NoCrashForEofToken) {
8339d1dada5SNathan Ridge   recordTokens(R"cpp(
8349d1dada5SNathan Ridge     int main() {
8359d1dada5SNathan Ridge   )cpp");
8369d1dada5SNathan Ridge   ASSERT_TRUE(!Buffer.expandedTokens().empty());
8379d1dada5SNathan Ridge   ASSERT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
8389d1dada5SNathan Ridge   // Expanded range including `eof` is handled gracefully (`eof` is ignored).
8399d1dada5SNathan Ridge   EXPECT_THAT(
8409d1dada5SNathan Ridge       Buffer.spelledForExpanded(Buffer.expandedTokens()),
8419d1dada5SNathan Ridge       ValueIs(SameRange(Buffer.spelledTokens(SourceMgr->getMainFileID()))));
8429d1dada5SNathan Ridge }
8439d1dada5SNathan Ridge 
TEST_F(TokenBufferTest,ExpandedTokensForRange)844c9c714c7SSam McCall TEST_F(TokenBufferTest, ExpandedTokensForRange) {
845c9c714c7SSam McCall   recordTokens(R"cpp(
846c9c714c7SSam McCall     #define SIGN(X) X##_washere
847c9c714c7SSam McCall     A SIGN(B) C SIGN(D) E SIGN(F) G
848c9c714c7SSam McCall   )cpp");
849c9c714c7SSam McCall 
850c9c714c7SSam McCall   SourceRange R(findExpanded("C").front().location(),
851c9c714c7SSam McCall                 findExpanded("F_washere").front().location());
852c79345fbSZarko Todorovski   // Expanded and spelled tokens are stored separately.
853c9c714c7SSam McCall   EXPECT_THAT(Buffer.expandedTokens(R),
854c9c714c7SSam McCall               SameRange(findExpanded("C D_washere E F_washere")));
855c9c714c7SSam McCall   EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
856c9c714c7SSam McCall }
857c9c714c7SSam McCall 
TEST_F(TokenBufferTest,ExpansionsOverlapping)858f0ab336eSSam McCall TEST_F(TokenBufferTest, ExpansionsOverlapping) {
8595aed309aSIlya Biryukov   // Object-like macro expansions.
8605aed309aSIlya Biryukov   recordTokens(R"cpp(
8615aed309aSIlya Biryukov     #define FOO 3+4
8625aed309aSIlya Biryukov     int a = FOO 1;
8635aed309aSIlya Biryukov     int b = FOO 2;
8645aed309aSIlya Biryukov   )cpp");
8655aed309aSIlya Biryukov 
866f0ab336eSSam McCall   llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1");
8675aed309aSIlya Biryukov   EXPECT_THAT(
8685aed309aSIlya Biryukov       Buffer.expansionStartingAt(Foo1.data()),
869f0ab336eSSam McCall       ValueIs(IsExpansion(SameRange(Foo1.drop_back()),
870f0ab336eSSam McCall                           SameRange(findExpanded("3 + 4 1").drop_back()))));
871f0ab336eSSam McCall   EXPECT_THAT(
872f0ab336eSSam McCall       Buffer.expansionsOverlapping(Foo1),
873f0ab336eSSam McCall       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()),
8745aed309aSIlya Biryukov                               SameRange(findExpanded("3 + 4 1").drop_back()))));
8755aed309aSIlya Biryukov 
876f0ab336eSSam McCall   llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2");
8775aed309aSIlya Biryukov   EXPECT_THAT(
8785aed309aSIlya Biryukov       Buffer.expansionStartingAt(Foo2.data()),
879f0ab336eSSam McCall       ValueIs(IsExpansion(SameRange(Foo2.drop_back()),
8805aed309aSIlya Biryukov                           SameRange(findExpanded("3 + 4 2").drop_back()))));
881a3c248dbSserge-sans-paille   EXPECT_THAT(
882a3c248dbSserge-sans-paille       Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())),
883f0ab336eSSam McCall       ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _),
884f0ab336eSSam McCall                   IsExpansion(SameRange(Foo2.drop_back()), _)));
8855aed309aSIlya Biryukov 
8865aed309aSIlya Biryukov   // Function-like macro expansions.
8875aed309aSIlya Biryukov   recordTokens(R"cpp(
8885aed309aSIlya Biryukov     #define ID(X) X
8895aed309aSIlya Biryukov     int a = ID(1+2+3);
8905aed309aSIlya Biryukov     int b = ID(ID(2+3+4));
8915aed309aSIlya Biryukov   )cpp");
8925aed309aSIlya Biryukov 
8935aed309aSIlya Biryukov   llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
8945aed309aSIlya Biryukov   EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
8955aed309aSIlya Biryukov               ValueIs(IsExpansion(SameRange(ID1),
8965aed309aSIlya Biryukov                                   SameRange(findExpanded("1 + 2 + 3")))));
8975aed309aSIlya Biryukov   // Only the first spelled token should be found.
8985aed309aSIlya Biryukov   for (const auto &T : ID1.drop_front())
899a41fbb1fSKazu Hirata     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
9005aed309aSIlya Biryukov 
9015aed309aSIlya Biryukov   llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
9025aed309aSIlya Biryukov   EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
9035aed309aSIlya Biryukov               ValueIs(IsExpansion(SameRange(ID2),
9045aed309aSIlya Biryukov                                   SameRange(findExpanded("2 + 3 + 4")))));
9055aed309aSIlya Biryukov   // Only the first spelled token should be found.
9065aed309aSIlya Biryukov   for (const auto &T : ID2.drop_front())
907a41fbb1fSKazu Hirata     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
9085aed309aSIlya Biryukov 
909a3c248dbSserge-sans-paille   EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef(
910f0ab336eSSam McCall                   findSpelled("1 + 2").data(), findSpelled("4").data())),
911f0ab336eSSam McCall               ElementsAre(IsExpansion(SameRange(ID1), _),
912f0ab336eSSam McCall                           IsExpansion(SameRange(ID2), _)));
913f0ab336eSSam McCall 
9145aed309aSIlya Biryukov   // PP directives.
9155aed309aSIlya Biryukov   recordTokens(R"cpp(
9165aed309aSIlya Biryukov #define FOO 1
9175aed309aSIlya Biryukov int a = FOO;
9185aed309aSIlya Biryukov #pragma once
9195aed309aSIlya Biryukov int b = 1;
9205aed309aSIlya Biryukov   )cpp");
9215aed309aSIlya Biryukov 
9225aed309aSIlya Biryukov   llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
9235aed309aSIlya Biryukov   EXPECT_THAT(
9245aed309aSIlya Biryukov       Buffer.expansionStartingAt(&DefineFoo.front()),
9255aed309aSIlya Biryukov       ValueIs(IsExpansion(SameRange(DefineFoo),
9265aed309aSIlya Biryukov                           SameRange(findExpanded("int a").take_front(0)))));
9275aed309aSIlya Biryukov   // Only the first spelled token should be found.
9285aed309aSIlya Biryukov   for (const auto &T : DefineFoo.drop_front())
929a41fbb1fSKazu Hirata     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
9305aed309aSIlya Biryukov 
9315aed309aSIlya Biryukov   llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
9325aed309aSIlya Biryukov   EXPECT_THAT(
9335aed309aSIlya Biryukov       Buffer.expansionStartingAt(&PragmaOnce.front()),
9345aed309aSIlya Biryukov       ValueIs(IsExpansion(SameRange(PragmaOnce),
9355aed309aSIlya Biryukov                           SameRange(findExpanded("int b").take_front(0)))));
9365aed309aSIlya Biryukov   // Only the first spelled token should be found.
9375aed309aSIlya Biryukov   for (const auto &T : PragmaOnce.drop_front())
938a41fbb1fSKazu Hirata     EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
939f0ab336eSSam McCall 
940f0ab336eSSam McCall   EXPECT_THAT(
941f0ab336eSSam McCall       Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")),
942f0ab336eSSam McCall       ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _),
943f0ab336eSSam McCall                   IsExpansion(SameRange(PragmaOnce), _)));
9445aed309aSIlya Biryukov }
9455aed309aSIlya Biryukov 
TEST_F(TokenBufferTest,TokensToFileRange)946e7230ea7SIlya Biryukov TEST_F(TokenBufferTest, TokensToFileRange) {
947e7230ea7SIlya Biryukov   addFile("./foo.h", "token_from_header");
948e7230ea7SIlya Biryukov   llvm::Annotations Code(R"cpp(
949e7230ea7SIlya Biryukov     #define FOO token_from_expansion
950e7230ea7SIlya Biryukov     #include "./foo.h"
951e7230ea7SIlya Biryukov     $all[[$i[[int]] a = FOO;]]
952e7230ea7SIlya Biryukov   )cpp");
953e7230ea7SIlya Biryukov   recordTokens(Code.code());
954e7230ea7SIlya Biryukov 
955e7230ea7SIlya Biryukov   auto &SM = *SourceMgr;
956e7230ea7SIlya Biryukov 
957e7230ea7SIlya Biryukov   // Two simple examples.
958e7230ea7SIlya Biryukov   auto Int = findExpanded("int").front();
959e7230ea7SIlya Biryukov   auto Semi = findExpanded(";").front();
960e7230ea7SIlya Biryukov   EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
961e7230ea7SIlya Biryukov                                      Code.range("i").End));
962e7230ea7SIlya Biryukov   EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
963e7230ea7SIlya Biryukov             FileRange(SM.getMainFileID(), Code.range("all").Begin,
964e7230ea7SIlya Biryukov                       Code.range("all").End));
965e7230ea7SIlya Biryukov   // We don't test assertion failures because death tests are slow.
966e7230ea7SIlya Biryukov }
967e7230ea7SIlya Biryukov 
TEST_F(TokenBufferTest,MacroExpansions)968038f5388SIlya Biryukov TEST_F(TokenBufferTest, MacroExpansions) {
9696687fde0SJohan Vikstrom   llvm::Annotations Code(R"cpp(
9706687fde0SJohan Vikstrom     #define FOO B
9716687fde0SJohan Vikstrom     #define FOO2 BA
9726687fde0SJohan Vikstrom     #define CALL(X) int X
9736687fde0SJohan Vikstrom     #define G CALL(FOO2)
9746687fde0SJohan Vikstrom     int B;
9756687fde0SJohan Vikstrom     $macro[[FOO]];
9766687fde0SJohan Vikstrom     $macro[[CALL]](A);
9776687fde0SJohan Vikstrom     $macro[[G]];
9786687fde0SJohan Vikstrom   )cpp");
9796687fde0SJohan Vikstrom   recordTokens(Code.code());
9806687fde0SJohan Vikstrom   auto &SM = *SourceMgr;
9816687fde0SJohan Vikstrom   auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
9826687fde0SJohan Vikstrom   std::vector<FileRange> ExpectedMacroRanges;
9836687fde0SJohan Vikstrom   for (auto Range : Code.ranges("macro"))
9846687fde0SJohan Vikstrom     ExpectedMacroRanges.push_back(
9856687fde0SJohan Vikstrom         FileRange(SM.getMainFileID(), Range.Begin, Range.End));
9866687fde0SJohan Vikstrom   std::vector<FileRange> ActualMacroRanges;
9876687fde0SJohan Vikstrom   for (auto Expansion : Expansions)
9886687fde0SJohan Vikstrom     ActualMacroRanges.push_back(Expansion->range(SM));
9896687fde0SJohan Vikstrom   EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
9906687fde0SJohan Vikstrom }
9913f8da5d0SSam McCall 
TEST_F(TokenBufferTest,Touching)9923f8da5d0SSam McCall TEST_F(TokenBufferTest, Touching) {
9933f8da5d0SSam McCall   llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
9943f8da5d0SSam McCall   recordTokens(Code.code());
9953f8da5d0SSam McCall 
9963f8da5d0SSam McCall   auto Touching = [&](int Index) {
9973f8da5d0SSam McCall     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
9983f8da5d0SSam McCall                                                    Code.points()[Index]);
9993f8da5d0SSam McCall     return spelledTokensTouching(Loc, Buffer);
10003f8da5d0SSam McCall   };
10013f8da5d0SSam McCall   auto Identifier = [&](int Index) {
10023f8da5d0SSam McCall     SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
10033f8da5d0SSam McCall                                                    Code.points()[Index]);
10043f8da5d0SSam McCall     const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
10053f8da5d0SSam McCall     return Tok ? Tok->text(*SourceMgr) : "";
10063f8da5d0SSam McCall   };
10073f8da5d0SSam McCall 
10083f8da5d0SSam McCall   EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
10093f8da5d0SSam McCall   EXPECT_EQ(Identifier(0), "");
10103f8da5d0SSam McCall   EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
10113f8da5d0SSam McCall   EXPECT_EQ(Identifier(1), "");
10123f8da5d0SSam McCall   EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
10133f8da5d0SSam McCall   EXPECT_EQ(Identifier(2), "");
10143f8da5d0SSam McCall 
10153f8da5d0SSam McCall   EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
10163f8da5d0SSam McCall   EXPECT_EQ(Identifier(3), "ab");
10173f8da5d0SSam McCall   EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
10183f8da5d0SSam McCall   EXPECT_EQ(Identifier(4), "ab");
10193f8da5d0SSam McCall 
10203f8da5d0SSam McCall   EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
10213f8da5d0SSam McCall   EXPECT_EQ(Identifier(5), "ab");
10223f8da5d0SSam McCall 
10233f8da5d0SSam McCall   EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
10243f8da5d0SSam McCall   EXPECT_EQ(Identifier(6), "");
10253f8da5d0SSam McCall 
10263f8da5d0SSam McCall   EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
10273f8da5d0SSam McCall   EXPECT_EQ(Identifier(7), "");
10283f8da5d0SSam McCall 
10293f8da5d0SSam McCall   ASSERT_EQ(Code.points().size(), 8u);
10303f8da5d0SSam McCall }
10313f8da5d0SSam McCall 
TEST_F(TokenBufferTest,ExpandedBySpelled)10321bf055c9SMarcel Hlopko TEST_F(TokenBufferTest, ExpandedBySpelled) {
10331bf055c9SMarcel Hlopko   recordTokens(R"cpp(
10341bf055c9SMarcel Hlopko     a1 a2 a3 b1 b2
10351bf055c9SMarcel Hlopko   )cpp");
1036c79345fbSZarko Todorovski   // Expanded and spelled tokens are stored separately.
10371bf055c9SMarcel Hlopko   EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
10381bf055c9SMarcel Hlopko   // Searching for subranges of expanded tokens should give the corresponding
10391bf055c9SMarcel Hlopko   // spelled ones.
10401bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
10411bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
10421bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
10431bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
10441bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
10451bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("b1 b2"))));
10461bf055c9SMarcel Hlopko 
10471bf055c9SMarcel Hlopko   // Test search on simple macro expansions.
10481bf055c9SMarcel Hlopko   recordTokens(R"cpp(
10491bf055c9SMarcel Hlopko     #define A a1 a2 a3
10501bf055c9SMarcel Hlopko     #define B b1 b2
10511bf055c9SMarcel Hlopko 
10521bf055c9SMarcel Hlopko     A split B
10531bf055c9SMarcel Hlopko   )cpp");
10541bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
10551bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
10561bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
10571bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
10581bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
10591bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("b1 b2"))));
10601bf055c9SMarcel Hlopko 
10611bf055c9SMarcel Hlopko   // Ranges not fully covering macro expansions should fail.
10621bf055c9SMarcel Hlopko   recordTokens(R"cpp(
10631bf055c9SMarcel Hlopko     #define ID(x) x
10641bf055c9SMarcel Hlopko 
10651bf055c9SMarcel Hlopko     ID(a)
10661bf055c9SMarcel Hlopko   )cpp");
10671bf055c9SMarcel Hlopko   // Spelled don't cover entire mapping (missing ID token) -> empty result
10681bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
10691bf055c9SMarcel Hlopko   // Spelled don't cover entire mapping (missing ) token) -> empty result
10701bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
10711bf055c9SMarcel Hlopko 
10721bf055c9SMarcel Hlopko   // Recursive macro invocations.
10731bf055c9SMarcel Hlopko   recordTokens(R"cpp(
10741bf055c9SMarcel Hlopko     #define ID(x) x
10751bf055c9SMarcel Hlopko     #define B b1 b2
10761bf055c9SMarcel Hlopko 
10771bf055c9SMarcel Hlopko     ID(ID(ID(a1) a2 a3)) split ID(B)
10781bf055c9SMarcel Hlopko   )cpp");
10791bf055c9SMarcel Hlopko 
10801bf055c9SMarcel Hlopko   EXPECT_THAT(
10811bf055c9SMarcel Hlopko       Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
10821bf055c9SMarcel Hlopko       ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
10831bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
10841bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("b1 b2"))));
10851bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(
10861bf055c9SMarcel Hlopko                   findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
10871bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
10881bf055c9SMarcel Hlopko   // FIXME: these should succeed, but we do not support macro arguments yet.
10891bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
10901bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
10911bf055c9SMarcel Hlopko               IsEmpty());
10921bf055c9SMarcel Hlopko 
10931bf055c9SMarcel Hlopko   // Empty macro expansions.
10941bf055c9SMarcel Hlopko   recordTokens(R"cpp(
10951bf055c9SMarcel Hlopko     #define EMPTY
10961bf055c9SMarcel Hlopko     #define ID(X) X
10971bf055c9SMarcel Hlopko 
10981bf055c9SMarcel Hlopko     EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
10991bf055c9SMarcel Hlopko     EMPTY EMPTY ID(4 5 6) split2
11001bf055c9SMarcel Hlopko     ID(7 8 9) EMPTY EMPTY
11011bf055c9SMarcel Hlopko   )cpp");
11021bf055c9SMarcel Hlopko   // Covered by empty expansions on one of both of the sides.
11031bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
11041bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("1 2 3"))));
11051bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
11061bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("4 5 6"))));
11071bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
11081bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("7 8 9"))));
11091bf055c9SMarcel Hlopko   // Including the empty macro expansions on the side.
11101bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
11111bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("1 2 3"))));
11121bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
11131bf055c9SMarcel Hlopko               ElementsAre(SameRange(findExpanded("1 2 3"))));
11141bf055c9SMarcel Hlopko   EXPECT_THAT(
11151bf055c9SMarcel Hlopko       Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
11161bf055c9SMarcel Hlopko       ElementsAre(SameRange(findExpanded("1 2 3"))));
11171bf055c9SMarcel Hlopko 
11181bf055c9SMarcel Hlopko   // Empty mappings coming from various directives.
11191bf055c9SMarcel Hlopko   recordTokens(R"cpp(
11201bf055c9SMarcel Hlopko     #define ID(X) X
11211bf055c9SMarcel Hlopko     ID(1)
11221bf055c9SMarcel Hlopko     #pragma lalala
11231bf055c9SMarcel Hlopko     not_mapped
11241bf055c9SMarcel Hlopko   )cpp");
11251bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
11261bf055c9SMarcel Hlopko               IsEmpty());
11271bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
11281bf055c9SMarcel Hlopko               IsEmpty());
11291bf055c9SMarcel Hlopko 
11301bf055c9SMarcel Hlopko   // Empty macro expansion.
11311bf055c9SMarcel Hlopko   recordTokens(R"cpp(
11321bf055c9SMarcel Hlopko     #define EMPTY
11331bf055c9SMarcel Hlopko     EMPTY int a = 100;
11341bf055c9SMarcel Hlopko   )cpp");
11351bf055c9SMarcel Hlopko   EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
11361bf055c9SMarcel Hlopko               IsEmpty());
11371bf055c9SMarcel Hlopko }
11381bf055c9SMarcel Hlopko 
TEST_F(TokenCollectorTest,Pragmas)1139f43ff34aSKadir Cetinkaya TEST_F(TokenCollectorTest, Pragmas) {
1140f43ff34aSKadir Cetinkaya   // Tokens coming from concatenations.
1141f43ff34aSKadir Cetinkaya   recordTokens(R"cpp(
1142f43ff34aSKadir Cetinkaya     void foo() {
1143f43ff34aSKadir Cetinkaya       #pragma unroll 4
1144f43ff34aSKadir Cetinkaya       for(int i=0;i<4;++i);
1145f43ff34aSKadir Cetinkaya     }
1146f43ff34aSKadir Cetinkaya   )cpp");
1147f43ff34aSKadir Cetinkaya }
1148e7230ea7SIlya Biryukov } // namespace
1149