1e7230ea7SIlya Biryukov //===- TokensTest.cpp -----------------------------------------------------===//
2e7230ea7SIlya Biryukov //
3e7230ea7SIlya Biryukov // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e7230ea7SIlya Biryukov // See https://llvm.org/LICENSE.txt for license information.
5e7230ea7SIlya Biryukov // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e7230ea7SIlya Biryukov //
7e7230ea7SIlya Biryukov //===----------------------------------------------------------------------===//
8e7230ea7SIlya Biryukov
9e7230ea7SIlya Biryukov #include "clang/Tooling/Syntax/Tokens.h"
10e7230ea7SIlya Biryukov #include "clang/AST/ASTConsumer.h"
11e7230ea7SIlya Biryukov #include "clang/AST/Expr.h"
12e7230ea7SIlya Biryukov #include "clang/Basic/Diagnostic.h"
13e7230ea7SIlya Biryukov #include "clang/Basic/DiagnosticIDs.h"
14e7230ea7SIlya Biryukov #include "clang/Basic/DiagnosticOptions.h"
15e7230ea7SIlya Biryukov #include "clang/Basic/FileManager.h"
16e7230ea7SIlya Biryukov #include "clang/Basic/FileSystemOptions.h"
17e7230ea7SIlya Biryukov #include "clang/Basic/LLVM.h"
18e7230ea7SIlya Biryukov #include "clang/Basic/LangOptions.h"
19e7230ea7SIlya Biryukov #include "clang/Basic/SourceLocation.h"
20e7230ea7SIlya Biryukov #include "clang/Basic/SourceManager.h"
21e7230ea7SIlya Biryukov #include "clang/Basic/TokenKinds.def"
22e7230ea7SIlya Biryukov #include "clang/Basic/TokenKinds.h"
23e7230ea7SIlya Biryukov #include "clang/Frontend/CompilerInstance.h"
24e7230ea7SIlya Biryukov #include "clang/Frontend/FrontendAction.h"
25e7230ea7SIlya Biryukov #include "clang/Frontend/Utils.h"
26e7230ea7SIlya Biryukov #include "clang/Lex/Lexer.h"
27e7230ea7SIlya Biryukov #include "clang/Lex/PreprocessorOptions.h"
28e7230ea7SIlya Biryukov #include "clang/Lex/Token.h"
29e7230ea7SIlya Biryukov #include "clang/Tooling/Tooling.h"
30e7230ea7SIlya Biryukov #include "llvm/ADT/ArrayRef.h"
31e7230ea7SIlya Biryukov #include "llvm/ADT/IntrusiveRefCntPtr.h"
32e7230ea7SIlya Biryukov #include "llvm/ADT/STLExtras.h"
33e7230ea7SIlya Biryukov #include "llvm/ADT/StringRef.h"
34e7230ea7SIlya Biryukov #include "llvm/Support/FormatVariadic.h"
35e7230ea7SIlya Biryukov #include "llvm/Support/MemoryBuffer.h"
36e7230ea7SIlya Biryukov #include "llvm/Support/VirtualFileSystem.h"
37e7230ea7SIlya Biryukov #include "llvm/Support/raw_os_ostream.h"
38e7230ea7SIlya Biryukov #include "llvm/Support/raw_ostream.h"
393432f4bfSJordan Rupprecht #include "llvm/Testing/Annotations/Annotations.h"
40e7230ea7SIlya Biryukov #include "llvm/Testing/Support/SupportHelpers.h"
41e7230ea7SIlya Biryukov #include <cassert>
42e7230ea7SIlya Biryukov #include <cstdlib>
43e7230ea7SIlya Biryukov #include <gmock/gmock.h>
44e7230ea7SIlya Biryukov #include <gtest/gtest.h>
45e7230ea7SIlya Biryukov #include <memory>
46a1580d7bSKazu Hirata #include <optional>
47e7230ea7SIlya Biryukov #include <ostream>
48e7230ea7SIlya Biryukov #include <string>
49e7230ea7SIlya Biryukov
50e7230ea7SIlya Biryukov using namespace clang;
51e7230ea7SIlya Biryukov using namespace clang::syntax;
52e7230ea7SIlya Biryukov
53e7230ea7SIlya Biryukov using llvm::ValueIs;
54f0ab336eSSam McCall using ::testing::_;
55e7230ea7SIlya Biryukov using ::testing::AllOf;
56e7230ea7SIlya Biryukov using ::testing::Contains;
57e7230ea7SIlya Biryukov using ::testing::ElementsAre;
585aed309aSIlya Biryukov using ::testing::Field;
591bf055c9SMarcel Hlopko using ::testing::IsEmpty;
60e7230ea7SIlya Biryukov using ::testing::Matcher;
61e7230ea7SIlya Biryukov using ::testing::Not;
62cd9b2e18SKadir Cetinkaya using ::testing::Pointee;
63e7230ea7SIlya Biryukov using ::testing::StartsWith;
64e7230ea7SIlya Biryukov
65e7230ea7SIlya Biryukov namespace {
66e7230ea7SIlya Biryukov // Checks the passed ArrayRef<T> has the same begin() and end() iterators as the
67e7230ea7SIlya Biryukov // argument.
68e7230ea7SIlya Biryukov MATCHER_P(SameRange, A, "") {
69e7230ea7SIlya Biryukov return A.begin() == arg.begin() && A.end() == arg.end();
70e7230ea7SIlya Biryukov }
715aed309aSIlya Biryukov
725aed309aSIlya Biryukov Matcher<TokenBuffer::Expansion>
IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,Matcher<llvm::ArrayRef<syntax::Token>> Expanded)735aed309aSIlya Biryukov IsExpansion(Matcher<llvm::ArrayRef<syntax::Token>> Spelled,
745aed309aSIlya Biryukov Matcher<llvm::ArrayRef<syntax::Token>> Expanded) {
755aed309aSIlya Biryukov return AllOf(Field(&TokenBuffer::Expansion::Spelled, Spelled),
765aed309aSIlya Biryukov Field(&TokenBuffer::Expansion::Expanded, Expanded));
775aed309aSIlya Biryukov }
78e7230ea7SIlya Biryukov // Matchers for syntax::Token.
79e7230ea7SIlya Biryukov MATCHER_P(Kind, K, "") { return arg.kind() == K; }
80e7230ea7SIlya Biryukov MATCHER_P2(HasText, Text, SourceMgr, "") {
81e7230ea7SIlya Biryukov return arg.text(*SourceMgr) == Text;
82e7230ea7SIlya Biryukov }
83e7230ea7SIlya Biryukov /// Checks the start and end location of a token are equal to SourceRng.
84e7230ea7SIlya Biryukov MATCHER_P(RangeIs, SourceRng, "") {
85e7230ea7SIlya Biryukov return arg.location() == SourceRng.first &&
86e7230ea7SIlya Biryukov arg.endLocation() == SourceRng.second;
87e7230ea7SIlya Biryukov }
88e7230ea7SIlya Biryukov
89e7230ea7SIlya Biryukov class TokenCollectorTest : public ::testing::Test {
90e7230ea7SIlya Biryukov public:
91e7230ea7SIlya Biryukov /// Run the clang frontend, collect the preprocessed tokens from the frontend
92e7230ea7SIlya Biryukov /// invocation and store them in this->Buffer.
93e7230ea7SIlya Biryukov /// This also clears SourceManager before running the compiler.
recordTokens(llvm::StringRef Code)94e7230ea7SIlya Biryukov void recordTokens(llvm::StringRef Code) {
95e7230ea7SIlya Biryukov class RecordTokens : public ASTFrontendAction {
96e7230ea7SIlya Biryukov public:
97e7230ea7SIlya Biryukov explicit RecordTokens(TokenBuffer &Result) : Result(Result) {}
98e7230ea7SIlya Biryukov
99e7230ea7SIlya Biryukov bool BeginSourceFileAction(CompilerInstance &CI) override {
100e7230ea7SIlya Biryukov assert(!Collector && "expected only a single call to BeginSourceFile");
101e7230ea7SIlya Biryukov Collector.emplace(CI.getPreprocessor());
102e7230ea7SIlya Biryukov return true;
103e7230ea7SIlya Biryukov }
104e7230ea7SIlya Biryukov void EndSourceFileAction() override {
105e7230ea7SIlya Biryukov assert(Collector && "BeginSourceFileAction was never called");
106e7230ea7SIlya Biryukov Result = std::move(*Collector).consume();
107aa979084SUtkarsh Saxena Result.indexExpandedTokens();
108e7230ea7SIlya Biryukov }
109e7230ea7SIlya Biryukov
110e7230ea7SIlya Biryukov std::unique_ptr<ASTConsumer>
111e7230ea7SIlya Biryukov CreateASTConsumer(CompilerInstance &CI, StringRef InFile) override {
1122b3d49b6SJonas Devlieghere return std::make_unique<ASTConsumer>();
113e7230ea7SIlya Biryukov }
114e7230ea7SIlya Biryukov
115e7230ea7SIlya Biryukov private:
116e7230ea7SIlya Biryukov TokenBuffer &Result;
1176ad0788cSKazu Hirata std::optional<TokenCollector> Collector;
118e7230ea7SIlya Biryukov };
119e7230ea7SIlya Biryukov
120e7230ea7SIlya Biryukov constexpr const char *FileName = "./input.cpp";
121e7230ea7SIlya Biryukov FS->addFile(FileName, time_t(), llvm::MemoryBuffer::getMemBufferCopy(""));
122e7230ea7SIlya Biryukov // Prepare to run a compiler.
123e7230ea7SIlya Biryukov if (!Diags->getClient())
124e7230ea7SIlya Biryukov Diags->setClient(new IgnoringDiagConsumer);
125e7230ea7SIlya Biryukov std::vector<const char *> Args = {"tok-test", "-std=c++03", "-fsyntax-only",
126e7230ea7SIlya Biryukov FileName};
127499d0b96SSam McCall CreateInvocationOptions CIOpts;
128499d0b96SSam McCall CIOpts.Diags = Diags;
129499d0b96SSam McCall CIOpts.VFS = FS;
130499d0b96SSam McCall auto CI = createInvocation(Args, std::move(CIOpts));
131e7230ea7SIlya Biryukov assert(CI);
132e7230ea7SIlya Biryukov CI->getFrontendOpts().DisableFree = false;
133e7230ea7SIlya Biryukov CI->getPreprocessorOpts().addRemappedFile(
134e7230ea7SIlya Biryukov FileName, llvm::MemoryBuffer::getMemBufferCopy(Code).release());
135e7230ea7SIlya Biryukov CompilerInstance Compiler;
136e7230ea7SIlya Biryukov Compiler.setInvocation(std::move(CI));
137e7230ea7SIlya Biryukov Compiler.setDiagnostics(Diags.get());
138e7230ea7SIlya Biryukov Compiler.setFileManager(FileMgr.get());
139e7230ea7SIlya Biryukov Compiler.setSourceManager(SourceMgr.get());
140e7230ea7SIlya Biryukov
141e7230ea7SIlya Biryukov this->Buffer = TokenBuffer(*SourceMgr);
142e7230ea7SIlya Biryukov RecordTokens Recorder(this->Buffer);
143e7230ea7SIlya Biryukov ASSERT_TRUE(Compiler.ExecuteAction(Recorder))
144e7230ea7SIlya Biryukov << "failed to run the frontend";
145e7230ea7SIlya Biryukov }
146e7230ea7SIlya Biryukov
147e7230ea7SIlya Biryukov /// Record the tokens and return a test dump of the resulting buffer.
collectAndDump(llvm::StringRef Code)148e7230ea7SIlya Biryukov std::string collectAndDump(llvm::StringRef Code) {
149e7230ea7SIlya Biryukov recordTokens(Code);
150e7230ea7SIlya Biryukov return Buffer.dumpForTests();
151e7230ea7SIlya Biryukov }
152e7230ea7SIlya Biryukov
153e7230ea7SIlya Biryukov // Adds a file to the test VFS.
addFile(llvm::StringRef Path,llvm::StringRef Contents)154e7230ea7SIlya Biryukov void addFile(llvm::StringRef Path, llvm::StringRef Contents) {
155e7230ea7SIlya Biryukov if (!FS->addFile(Path, time_t(),
156e7230ea7SIlya Biryukov llvm::MemoryBuffer::getMemBufferCopy(Contents))) {
157e7230ea7SIlya Biryukov ADD_FAILURE() << "could not add a file to VFS: " << Path;
158e7230ea7SIlya Biryukov }
159e7230ea7SIlya Biryukov }
160e7230ea7SIlya Biryukov
1618c2cf499SKadir Cetinkaya /// Add a new file, run syntax::tokenize() on the range if any, run it on the
1628c2cf499SKadir Cetinkaya /// whole file otherwise and return the results.
tokenize(llvm::StringRef Text)163e7230ea7SIlya Biryukov std::vector<syntax::Token> tokenize(llvm::StringRef Text) {
1648c2cf499SKadir Cetinkaya llvm::Annotations Annot(Text);
1658c2cf499SKadir Cetinkaya auto FID = SourceMgr->createFileID(
1668c2cf499SKadir Cetinkaya llvm::MemoryBuffer::getMemBufferCopy(Annot.code()));
167e7230ea7SIlya Biryukov // FIXME: pass proper LangOptions.
1688c2cf499SKadir Cetinkaya if (Annot.ranges().empty())
1698c2cf499SKadir Cetinkaya return syntax::tokenize(FID, *SourceMgr, LangOptions());
170e7230ea7SIlya Biryukov return syntax::tokenize(
1718c2cf499SKadir Cetinkaya syntax::FileRange(FID, Annot.range().Begin, Annot.range().End),
172e7230ea7SIlya Biryukov *SourceMgr, LangOptions());
173e7230ea7SIlya Biryukov }
174e7230ea7SIlya Biryukov
175e7230ea7SIlya Biryukov // Specialized versions of matchers that hide the SourceManager from clients.
HasText(std::string Text) const176e7230ea7SIlya Biryukov Matcher<syntax::Token> HasText(std::string Text) const {
177e7230ea7SIlya Biryukov return ::HasText(Text, SourceMgr.get());
178e7230ea7SIlya Biryukov }
RangeIs(llvm::Annotations::Range R) const179e7230ea7SIlya Biryukov Matcher<syntax::Token> RangeIs(llvm::Annotations::Range R) const {
180e7230ea7SIlya Biryukov std::pair<SourceLocation, SourceLocation> Ls;
181e7230ea7SIlya Biryukov Ls.first = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
182e7230ea7SIlya Biryukov .getLocWithOffset(R.Begin);
183e7230ea7SIlya Biryukov Ls.second = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID())
184e7230ea7SIlya Biryukov .getLocWithOffset(R.End);
185e7230ea7SIlya Biryukov return ::RangeIs(Ls);
186e7230ea7SIlya Biryukov }
187e7230ea7SIlya Biryukov
188e7230ea7SIlya Biryukov /// Finds a subrange in O(n * m).
189e7230ea7SIlya Biryukov template <class T, class U, class Eq>
findSubrange(llvm::ArrayRef<U> Subrange,llvm::ArrayRef<T> Range,Eq F)190e7230ea7SIlya Biryukov llvm::ArrayRef<T> findSubrange(llvm::ArrayRef<U> Subrange,
191e7230ea7SIlya Biryukov llvm::ArrayRef<T> Range, Eq F) {
1921bf055c9SMarcel Hlopko assert(Subrange.size() >= 1);
1931bf055c9SMarcel Hlopko if (Range.size() < Subrange.size())
194a3c248dbSserge-sans-paille return llvm::ArrayRef(Range.end(), Range.end());
1951bf055c9SMarcel Hlopko for (auto Begin = Range.begin(), Last = Range.end() - Subrange.size();
1961bf055c9SMarcel Hlopko Begin <= Last; ++Begin) {
197e7230ea7SIlya Biryukov auto It = Begin;
1981bf055c9SMarcel Hlopko for (auto ItSub = Subrange.begin(); ItSub != Subrange.end();
1991bf055c9SMarcel Hlopko ++ItSub, ++It) {
200e7230ea7SIlya Biryukov if (!F(*ItSub, *It))
201e7230ea7SIlya Biryukov goto continue_outer;
202e7230ea7SIlya Biryukov }
203a3c248dbSserge-sans-paille return llvm::ArrayRef(Begin, It);
204e7230ea7SIlya Biryukov continue_outer:;
205e7230ea7SIlya Biryukov }
206a3c248dbSserge-sans-paille return llvm::ArrayRef(Range.end(), Range.end());
207e7230ea7SIlya Biryukov }
208e7230ea7SIlya Biryukov
209e7230ea7SIlya Biryukov /// Finds a subrange in \p Tokens that match the tokens specified in \p Query.
210e7230ea7SIlya Biryukov /// The match should be unique. \p Query is a whitespace-separated list of
211e7230ea7SIlya Biryukov /// tokens to search for.
212e7230ea7SIlya Biryukov llvm::ArrayRef<syntax::Token>
findTokenRange(llvm::StringRef Query,llvm::ArrayRef<syntax::Token> Tokens)213e7230ea7SIlya Biryukov findTokenRange(llvm::StringRef Query, llvm::ArrayRef<syntax::Token> Tokens) {
214e7230ea7SIlya Biryukov llvm::SmallVector<llvm::StringRef, 8> QueryTokens;
215e7230ea7SIlya Biryukov Query.split(QueryTokens, ' ', /*MaxSplit=*/-1, /*KeepEmpty=*/false);
216e7230ea7SIlya Biryukov if (QueryTokens.empty()) {
217e7230ea7SIlya Biryukov ADD_FAILURE() << "will not look for an empty list of tokens";
218e7230ea7SIlya Biryukov std::abort();
219e7230ea7SIlya Biryukov }
220e7230ea7SIlya Biryukov // An equality test for search.
221e7230ea7SIlya Biryukov auto TextMatches = [this](llvm::StringRef Q, const syntax::Token &T) {
222e7230ea7SIlya Biryukov return Q == T.text(*SourceMgr);
223e7230ea7SIlya Biryukov };
224e7230ea7SIlya Biryukov // Find a match.
225a3c248dbSserge-sans-paille auto Found = findSubrange(llvm::ArrayRef(QueryTokens), Tokens, TextMatches);
226e7230ea7SIlya Biryukov if (Found.begin() == Tokens.end()) {
227e7230ea7SIlya Biryukov ADD_FAILURE() << "could not find the subrange for " << Query;
228e7230ea7SIlya Biryukov std::abort();
229e7230ea7SIlya Biryukov }
230e7230ea7SIlya Biryukov // Check that the match is unique.
231a3c248dbSserge-sans-paille if (findSubrange(llvm::ArrayRef(QueryTokens),
232a3c248dbSserge-sans-paille llvm::ArrayRef(Found.end(), Tokens.end()), TextMatches)
233e7230ea7SIlya Biryukov .begin() != Tokens.end()) {
234e7230ea7SIlya Biryukov ADD_FAILURE() << "match is not unique for " << Query;
235e7230ea7SIlya Biryukov std::abort();
236e7230ea7SIlya Biryukov }
237e7230ea7SIlya Biryukov return Found;
238e7230ea7SIlya Biryukov };
239e7230ea7SIlya Biryukov
240e7230ea7SIlya Biryukov // Specialized versions of findTokenRange for expanded and spelled tokens.
findExpanded(llvm::StringRef Query)241e7230ea7SIlya Biryukov llvm::ArrayRef<syntax::Token> findExpanded(llvm::StringRef Query) {
242e7230ea7SIlya Biryukov return findTokenRange(Query, Buffer.expandedTokens());
243e7230ea7SIlya Biryukov }
findSpelled(llvm::StringRef Query,FileID File=FileID ())244e7230ea7SIlya Biryukov llvm::ArrayRef<syntax::Token> findSpelled(llvm::StringRef Query,
245e7230ea7SIlya Biryukov FileID File = FileID()) {
246e7230ea7SIlya Biryukov if (!File.isValid())
247e7230ea7SIlya Biryukov File = SourceMgr->getMainFileID();
248e7230ea7SIlya Biryukov return findTokenRange(Query, Buffer.spelledTokens(File));
249e7230ea7SIlya Biryukov }
250e7230ea7SIlya Biryukov
251e7230ea7SIlya Biryukov // Data fields.
252e7230ea7SIlya Biryukov llvm::IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
253e7230ea7SIlya Biryukov new DiagnosticsEngine(new DiagnosticIDs, new DiagnosticOptions);
254e7230ea7SIlya Biryukov IntrusiveRefCntPtr<llvm::vfs::InMemoryFileSystem> FS =
255e7230ea7SIlya Biryukov new llvm::vfs::InMemoryFileSystem;
256e7230ea7SIlya Biryukov llvm::IntrusiveRefCntPtr<FileManager> FileMgr =
257e7230ea7SIlya Biryukov new FileManager(FileSystemOptions(), FS);
258e7230ea7SIlya Biryukov llvm::IntrusiveRefCntPtr<SourceManager> SourceMgr =
259e7230ea7SIlya Biryukov new SourceManager(*Diags, *FileMgr);
260e7230ea7SIlya Biryukov /// Contains last result of calling recordTokens().
261e7230ea7SIlya Biryukov TokenBuffer Buffer = TokenBuffer(*SourceMgr);
262e7230ea7SIlya Biryukov };
263e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,RawMode)264e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, RawMode) {
265e7230ea7SIlya Biryukov EXPECT_THAT(tokenize("int main() {}"),
266e7230ea7SIlya Biryukov ElementsAre(Kind(tok::kw_int),
267e7230ea7SIlya Biryukov AllOf(HasText("main"), Kind(tok::identifier)),
268e7230ea7SIlya Biryukov Kind(tok::l_paren), Kind(tok::r_paren),
269e7230ea7SIlya Biryukov Kind(tok::l_brace), Kind(tok::r_brace)));
270e7230ea7SIlya Biryukov // Comments are ignored for now.
271e7230ea7SIlya Biryukov EXPECT_THAT(tokenize("/* foo */int a; // more comments"),
272e7230ea7SIlya Biryukov ElementsAre(Kind(tok::kw_int),
273e7230ea7SIlya Biryukov AllOf(HasText("a"), Kind(tok::identifier)),
274e7230ea7SIlya Biryukov Kind(tok::semi)));
2758c2cf499SKadir Cetinkaya EXPECT_THAT(tokenize("int [[main() {]]}"),
2768c2cf499SKadir Cetinkaya ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
2778c2cf499SKadir Cetinkaya Kind(tok::l_paren), Kind(tok::r_paren),
2788c2cf499SKadir Cetinkaya Kind(tok::l_brace)));
2798c2cf499SKadir Cetinkaya EXPECT_THAT(tokenize("int [[main() { ]]}"),
2808c2cf499SKadir Cetinkaya ElementsAre(AllOf(HasText("main"), Kind(tok::identifier)),
2818c2cf499SKadir Cetinkaya Kind(tok::l_paren), Kind(tok::r_paren),
2828c2cf499SKadir Cetinkaya Kind(tok::l_brace)));
2838c2cf499SKadir Cetinkaya // First token is partially parsed, last token is fully included even though
2848c2cf499SKadir Cetinkaya // only a part of it is contained in the range.
2858c2cf499SKadir Cetinkaya EXPECT_THAT(tokenize("int m[[ain() {ret]]urn 0;}"),
2868c2cf499SKadir Cetinkaya ElementsAre(AllOf(HasText("ain"), Kind(tok::identifier)),
2878c2cf499SKadir Cetinkaya Kind(tok::l_paren), Kind(tok::r_paren),
2888c2cf499SKadir Cetinkaya Kind(tok::l_brace), Kind(tok::kw_return)));
289e7230ea7SIlya Biryukov }
290e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,Basic)291e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, Basic) {
292e7230ea7SIlya Biryukov std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
293e7230ea7SIlya Biryukov {"int main() {}",
294e7230ea7SIlya Biryukov R"(expanded tokens:
295e7230ea7SIlya Biryukov int main ( ) { }
296e7230ea7SIlya Biryukov file './input.cpp'
297e7230ea7SIlya Biryukov spelled tokens:
298e7230ea7SIlya Biryukov int main ( ) { }
299e7230ea7SIlya Biryukov no mappings.
300e7230ea7SIlya Biryukov )"},
301e7230ea7SIlya Biryukov // All kinds of whitespace are ignored.
302e7230ea7SIlya Biryukov {"\t\n int\t\n main\t\n (\t\n )\t\n{\t\n }\t\n",
303e7230ea7SIlya Biryukov R"(expanded tokens:
304e7230ea7SIlya Biryukov int main ( ) { }
305e7230ea7SIlya Biryukov file './input.cpp'
306e7230ea7SIlya Biryukov spelled tokens:
307e7230ea7SIlya Biryukov int main ( ) { }
308e7230ea7SIlya Biryukov no mappings.
309e7230ea7SIlya Biryukov )"},
310e7230ea7SIlya Biryukov // Annotation tokens are ignored.
311e7230ea7SIlya Biryukov {R"cpp(
312e7230ea7SIlya Biryukov #pragma GCC visibility push (public)
313e7230ea7SIlya Biryukov #pragma GCC visibility pop
314e7230ea7SIlya Biryukov )cpp",
315e7230ea7SIlya Biryukov R"(expanded tokens:
316e7230ea7SIlya Biryukov <empty>
317e7230ea7SIlya Biryukov file './input.cpp'
318e7230ea7SIlya Biryukov spelled tokens:
319e7230ea7SIlya Biryukov # pragma GCC visibility push ( public ) # pragma GCC visibility pop
320e7230ea7SIlya Biryukov mappings:
321e7230ea7SIlya Biryukov ['#'_0, '<eof>'_13) => ['<eof>'_0, '<eof>'_0)
32226c066d6SIlya Biryukov )"},
32326c066d6SIlya Biryukov // Empty files should not crash.
32426c066d6SIlya Biryukov {R"cpp()cpp", R"(expanded tokens:
32526c066d6SIlya Biryukov <empty>
32626c066d6SIlya Biryukov file './input.cpp'
32726c066d6SIlya Biryukov spelled tokens:
32826c066d6SIlya Biryukov <empty>
32926c066d6SIlya Biryukov no mappings.
330b455fc42SIlya Biryukov )"},
331b455fc42SIlya Biryukov // Should not crash on errors inside '#define' directives. Error is that
332b455fc42SIlya Biryukov // stringification (#B) does not refer to a macro parameter.
333b455fc42SIlya Biryukov {
334b455fc42SIlya Biryukov R"cpp(
335b455fc42SIlya Biryukov a
336b455fc42SIlya Biryukov #define MACRO() A #B
337b455fc42SIlya Biryukov )cpp",
338b455fc42SIlya Biryukov R"(expanded tokens:
339b455fc42SIlya Biryukov a
340b455fc42SIlya Biryukov file './input.cpp'
341b455fc42SIlya Biryukov spelled tokens:
342b455fc42SIlya Biryukov a # define MACRO ( ) A # B
343b455fc42SIlya Biryukov mappings:
344b455fc42SIlya Biryukov ['#'_1, '<eof>'_9) => ['<eof>'_1, '<eof>'_1)
345e7230ea7SIlya Biryukov )"}};
346e7230ea7SIlya Biryukov for (auto &Test : TestCases)
347e7230ea7SIlya Biryukov EXPECT_EQ(collectAndDump(Test.first), Test.second)
348e7230ea7SIlya Biryukov << collectAndDump(Test.first);
349e7230ea7SIlya Biryukov }
350e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,Locations)351e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, Locations) {
352e7230ea7SIlya Biryukov // Check locations of the tokens.
353e7230ea7SIlya Biryukov llvm::Annotations Code(R"cpp(
354e7230ea7SIlya Biryukov $r1[[int]] $r2[[a]] $r3[[=]] $r4[["foo bar baz"]] $r5[[;]]
355e7230ea7SIlya Biryukov )cpp");
356e7230ea7SIlya Biryukov recordTokens(Code.code());
357e7230ea7SIlya Biryukov // Check expanded tokens.
358e7230ea7SIlya Biryukov EXPECT_THAT(
359e7230ea7SIlya Biryukov Buffer.expandedTokens(),
360e7230ea7SIlya Biryukov ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
361e7230ea7SIlya Biryukov AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
362e7230ea7SIlya Biryukov AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
363e7230ea7SIlya Biryukov AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
364e7230ea7SIlya Biryukov AllOf(Kind(tok::semi), RangeIs(Code.range("r5"))),
365e7230ea7SIlya Biryukov Kind(tok::eof)));
366e7230ea7SIlya Biryukov // Check spelled tokens.
367e7230ea7SIlya Biryukov EXPECT_THAT(
368e7230ea7SIlya Biryukov Buffer.spelledTokens(SourceMgr->getMainFileID()),
369e7230ea7SIlya Biryukov ElementsAre(AllOf(Kind(tok::kw_int), RangeIs(Code.range("r1"))),
370e7230ea7SIlya Biryukov AllOf(Kind(tok::identifier), RangeIs(Code.range("r2"))),
371e7230ea7SIlya Biryukov AllOf(Kind(tok::equal), RangeIs(Code.range("r3"))),
372e7230ea7SIlya Biryukov AllOf(Kind(tok::string_literal), RangeIs(Code.range("r4"))),
373e7230ea7SIlya Biryukov AllOf(Kind(tok::semi), RangeIs(Code.range("r5")))));
374cd9b2e18SKadir Cetinkaya
375cd9b2e18SKadir Cetinkaya auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
376cd9b2e18SKadir Cetinkaya for (auto &R : Code.ranges()) {
377*5f1adf04SUtkarsh Saxena EXPECT_THAT(
378*5f1adf04SUtkarsh Saxena Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(R.Begin)),
379cd9b2e18SKadir Cetinkaya Pointee(RangeIs(R)));
380cd9b2e18SKadir Cetinkaya }
381e7230ea7SIlya Biryukov }
382e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,LocationInMiddleOfSpelledToken)383*5f1adf04SUtkarsh Saxena TEST_F(TokenCollectorTest, LocationInMiddleOfSpelledToken) {
384*5f1adf04SUtkarsh Saxena llvm::Annotations Code(R"cpp(
385*5f1adf04SUtkarsh Saxena int foo = [[baa^aar]];
386*5f1adf04SUtkarsh Saxena )cpp");
387*5f1adf04SUtkarsh Saxena recordTokens(Code.code());
388*5f1adf04SUtkarsh Saxena // Check spelled tokens.
389*5f1adf04SUtkarsh Saxena auto StartLoc = SourceMgr->getLocForStartOfFile(SourceMgr->getMainFileID());
390*5f1adf04SUtkarsh Saxena EXPECT_THAT(
391*5f1adf04SUtkarsh Saxena Buffer.spelledTokenContaining(StartLoc.getLocWithOffset(Code.point())),
392*5f1adf04SUtkarsh Saxena Pointee(RangeIs(Code.range())));
393*5f1adf04SUtkarsh Saxena }
394*5f1adf04SUtkarsh Saxena
TEST_F(TokenCollectorTest,MacroDirectives)395e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, MacroDirectives) {
396e7230ea7SIlya Biryukov // Macro directives are not stored anywhere at the moment.
397e7230ea7SIlya Biryukov std::string Code = R"cpp(
398e7230ea7SIlya Biryukov #define FOO a
399e7230ea7SIlya Biryukov #include "unresolved_file.h"
400e7230ea7SIlya Biryukov #undef FOO
401e7230ea7SIlya Biryukov #ifdef X
402e7230ea7SIlya Biryukov #else
403e7230ea7SIlya Biryukov #endif
404e7230ea7SIlya Biryukov #ifndef Y
405e7230ea7SIlya Biryukov #endif
406e7230ea7SIlya Biryukov #if 1
407e7230ea7SIlya Biryukov #elif 2
408e7230ea7SIlya Biryukov #else
409e7230ea7SIlya Biryukov #endif
410e7230ea7SIlya Biryukov #pragma once
411e7230ea7SIlya Biryukov #pragma something lalala
412e7230ea7SIlya Biryukov
413e7230ea7SIlya Biryukov int a;
414e7230ea7SIlya Biryukov )cpp";
415e7230ea7SIlya Biryukov std::string Expected =
416e7230ea7SIlya Biryukov "expanded tokens:\n"
417e7230ea7SIlya Biryukov " int a ;\n"
418e7230ea7SIlya Biryukov "file './input.cpp'\n"
419e7230ea7SIlya Biryukov " spelled tokens:\n"
420e7230ea7SIlya Biryukov " # define FOO a # include \"unresolved_file.h\" # undef FOO "
421e7230ea7SIlya Biryukov "# ifdef X # else # endif # ifndef Y # endif # if 1 # elif 2 # else "
422e7230ea7SIlya Biryukov "# endif # pragma once # pragma something lalala int a ;\n"
423e7230ea7SIlya Biryukov " mappings:\n"
424e7230ea7SIlya Biryukov " ['#'_0, 'int'_39) => ['int'_0, 'int'_0)\n";
425e7230ea7SIlya Biryukov EXPECT_EQ(collectAndDump(Code), Expected);
426e7230ea7SIlya Biryukov }
427e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,MacroReplacements)428e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, MacroReplacements) {
429e7230ea7SIlya Biryukov std::pair</*Input*/ std::string, /*Expected*/ std::string> TestCases[] = {
430e7230ea7SIlya Biryukov // A simple object-like macro.
431e7230ea7SIlya Biryukov {R"cpp(
432e7230ea7SIlya Biryukov #define INT int const
433e7230ea7SIlya Biryukov INT a;
434e7230ea7SIlya Biryukov )cpp",
435e7230ea7SIlya Biryukov R"(expanded tokens:
436e7230ea7SIlya Biryukov int const a ;
437e7230ea7SIlya Biryukov file './input.cpp'
438e7230ea7SIlya Biryukov spelled tokens:
439e7230ea7SIlya Biryukov # define INT int const INT a ;
440e7230ea7SIlya Biryukov mappings:
441e7230ea7SIlya Biryukov ['#'_0, 'INT'_5) => ['int'_0, 'int'_0)
442e7230ea7SIlya Biryukov ['INT'_5, 'a'_6) => ['int'_0, 'a'_2)
443e7230ea7SIlya Biryukov )"},
444e7230ea7SIlya Biryukov // A simple function-like macro.
445e7230ea7SIlya Biryukov {R"cpp(
446e7230ea7SIlya Biryukov #define INT(a) const int
447e7230ea7SIlya Biryukov INT(10+10) a;
448e7230ea7SIlya Biryukov )cpp",
449e7230ea7SIlya Biryukov R"(expanded tokens:
450e7230ea7SIlya Biryukov const int a ;
451e7230ea7SIlya Biryukov file './input.cpp'
452e7230ea7SIlya Biryukov spelled tokens:
453e7230ea7SIlya Biryukov # define INT ( a ) const int INT ( 10 + 10 ) a ;
454e7230ea7SIlya Biryukov mappings:
455e7230ea7SIlya Biryukov ['#'_0, 'INT'_8) => ['const'_0, 'const'_0)
456e7230ea7SIlya Biryukov ['INT'_8, 'a'_14) => ['const'_0, 'a'_2)
457e7230ea7SIlya Biryukov )"},
458e7230ea7SIlya Biryukov // Recursive macro replacements.
459e7230ea7SIlya Biryukov {R"cpp(
460e7230ea7SIlya Biryukov #define ID(X) X
461e7230ea7SIlya Biryukov #define INT int const
462e7230ea7SIlya Biryukov ID(ID(INT)) a;
463e7230ea7SIlya Biryukov )cpp",
464e7230ea7SIlya Biryukov R"(expanded tokens:
465e7230ea7SIlya Biryukov int const a ;
466e7230ea7SIlya Biryukov file './input.cpp'
467e7230ea7SIlya Biryukov spelled tokens:
468e7230ea7SIlya Biryukov # define ID ( X ) X # define INT int const ID ( ID ( INT ) ) a ;
469e7230ea7SIlya Biryukov mappings:
470e7230ea7SIlya Biryukov ['#'_0, 'ID'_12) => ['int'_0, 'int'_0)
471e7230ea7SIlya Biryukov ['ID'_12, 'a'_19) => ['int'_0, 'a'_2)
472e7230ea7SIlya Biryukov )"},
473e7230ea7SIlya Biryukov // A little more complicated recursive macro replacements.
474e7230ea7SIlya Biryukov {R"cpp(
475e7230ea7SIlya Biryukov #define ADD(X, Y) X+Y
476e7230ea7SIlya Biryukov #define MULT(X, Y) X*Y
477e7230ea7SIlya Biryukov
478e7230ea7SIlya Biryukov int a = ADD(MULT(1,2), MULT(3,ADD(4,5)));
479e7230ea7SIlya Biryukov )cpp",
480e7230ea7SIlya Biryukov "expanded tokens:\n"
481e7230ea7SIlya Biryukov " int a = 1 * 2 + 3 * 4 + 5 ;\n"
482e7230ea7SIlya Biryukov "file './input.cpp'\n"
483e7230ea7SIlya Biryukov " spelled tokens:\n"
484e7230ea7SIlya Biryukov " # define ADD ( X , Y ) X + Y # define MULT ( X , Y ) X * Y int "
485e7230ea7SIlya Biryukov "a = ADD ( MULT ( 1 , 2 ) , MULT ( 3 , ADD ( 4 , 5 ) ) ) ;\n"
486e7230ea7SIlya Biryukov " mappings:\n"
487e7230ea7SIlya Biryukov " ['#'_0, 'int'_22) => ['int'_0, 'int'_0)\n"
488e7230ea7SIlya Biryukov " ['ADD'_25, ';'_46) => ['1'_3, ';'_12)\n"},
489e7230ea7SIlya Biryukov // Empty macro replacement.
4905e69f27eSIlya Biryukov // FIXME: the #define directives should not be glued together.
491e7230ea7SIlya Biryukov {R"cpp(
492e7230ea7SIlya Biryukov #define EMPTY
493e7230ea7SIlya Biryukov #define EMPTY_FUNC(X)
494e7230ea7SIlya Biryukov EMPTY
495e7230ea7SIlya Biryukov EMPTY_FUNC(1+2+3)
496e7230ea7SIlya Biryukov )cpp",
497e7230ea7SIlya Biryukov R"(expanded tokens:
498e7230ea7SIlya Biryukov <empty>
499e7230ea7SIlya Biryukov file './input.cpp'
500e7230ea7SIlya Biryukov spelled tokens:
501e7230ea7SIlya Biryukov # define EMPTY # define EMPTY_FUNC ( X ) EMPTY EMPTY_FUNC ( 1 + 2 + 3 )
502e7230ea7SIlya Biryukov mappings:
5035e69f27eSIlya Biryukov ['#'_0, 'EMPTY'_9) => ['<eof>'_0, '<eof>'_0)
5045e69f27eSIlya Biryukov ['EMPTY'_9, 'EMPTY_FUNC'_10) => ['<eof>'_0, '<eof>'_0)
5055e69f27eSIlya Biryukov ['EMPTY_FUNC'_10, '<eof>'_18) => ['<eof>'_0, '<eof>'_0)
506e7230ea7SIlya Biryukov )"},
507e7230ea7SIlya Biryukov // File ends with a macro replacement.
508e7230ea7SIlya Biryukov {R"cpp(
509e7230ea7SIlya Biryukov #define FOO 10+10;
510e7230ea7SIlya Biryukov int a = FOO
511e7230ea7SIlya Biryukov )cpp",
512e7230ea7SIlya Biryukov R"(expanded tokens:
513e7230ea7SIlya Biryukov int a = 10 + 10 ;
514e7230ea7SIlya Biryukov file './input.cpp'
515e7230ea7SIlya Biryukov spelled tokens:
516e7230ea7SIlya Biryukov # define FOO 10 + 10 ; int a = FOO
517e7230ea7SIlya Biryukov mappings:
518e7230ea7SIlya Biryukov ['#'_0, 'int'_7) => ['int'_0, 'int'_0)
519e7230ea7SIlya Biryukov ['FOO'_10, '<eof>'_11) => ['10'_3, '<eof>'_7)
520d66afd6dSSam McCall )"},
521d66afd6dSSam McCall {R"cpp(
522d66afd6dSSam McCall #define NUM 42
523d66afd6dSSam McCall #define ID(a) a
524d66afd6dSSam McCall #define M 1 + ID
525d66afd6dSSam McCall M(NUM)
526d66afd6dSSam McCall )cpp",
527d66afd6dSSam McCall R"(expanded tokens:
528d66afd6dSSam McCall 1 + 42
529d66afd6dSSam McCall file './input.cpp'
530d66afd6dSSam McCall spelled tokens:
531d66afd6dSSam McCall # define NUM 42 # define ID ( a ) a # define M 1 + ID M ( NUM )
532d66afd6dSSam McCall mappings:
533d66afd6dSSam McCall ['#'_0, 'M'_17) => ['1'_0, '1'_0)
534d66afd6dSSam McCall ['M'_17, '<eof>'_21) => ['1'_0, '<eof>'_3)
535d66afd6dSSam McCall )"},
536d66afd6dSSam McCall };
537e7230ea7SIlya Biryukov
538d66afd6dSSam McCall for (auto &Test : TestCases) {
539d66afd6dSSam McCall std::string Dump = collectAndDump(Test.first);
540d66afd6dSSam McCall EXPECT_EQ(Test.second, Dump) << Dump;
541d66afd6dSSam McCall }
542e7230ea7SIlya Biryukov }
543e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,SpecialTokens)544e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, SpecialTokens) {
545e7230ea7SIlya Biryukov // Tokens coming from concatenations.
546e7230ea7SIlya Biryukov recordTokens(R"cpp(
547e7230ea7SIlya Biryukov #define CONCAT(a, b) a ## b
548e7230ea7SIlya Biryukov int a = CONCAT(1, 2);
549e7230ea7SIlya Biryukov )cpp");
550e7230ea7SIlya Biryukov EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
551e7230ea7SIlya Biryukov Contains(HasText("12")));
552e7230ea7SIlya Biryukov // Multi-line tokens with slashes at the end.
553e7230ea7SIlya Biryukov recordTokens("i\\\nn\\\nt");
554e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.expandedTokens(),
555e7230ea7SIlya Biryukov ElementsAre(AllOf(Kind(tok::kw_int), HasText("i\\\nn\\\nt")),
556e7230ea7SIlya Biryukov Kind(tok::eof)));
557e7230ea7SIlya Biryukov // FIXME: test tokens with digraphs and UCN identifiers.
558e7230ea7SIlya Biryukov }
559e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,LateBoundTokens)560e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, LateBoundTokens) {
561e7230ea7SIlya Biryukov // The parser eventually breaks the first '>>' into two tokens ('>' and '>'),
562e7230ea7SIlya Biryukov // but we choose to record them as a single token (for now).
563e7230ea7SIlya Biryukov llvm::Annotations Code(R"cpp(
564e7230ea7SIlya Biryukov template <class T>
565e7230ea7SIlya Biryukov struct foo { int a; };
566e7230ea7SIlya Biryukov int bar = foo<foo<int$br[[>>]]().a;
567e7230ea7SIlya Biryukov int baz = 10 $op[[>>]] 2;
568e7230ea7SIlya Biryukov )cpp");
569e7230ea7SIlya Biryukov recordTokens(Code.code());
570e7230ea7SIlya Biryukov EXPECT_THAT(std::vector<syntax::Token>(Buffer.expandedTokens()),
571e7230ea7SIlya Biryukov AllOf(Contains(AllOf(Kind(tok::greatergreater),
572e7230ea7SIlya Biryukov RangeIs(Code.range("br")))),
573e7230ea7SIlya Biryukov Contains(AllOf(Kind(tok::greatergreater),
574e7230ea7SIlya Biryukov RangeIs(Code.range("op"))))));
575e7230ea7SIlya Biryukov }
576e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,DelayedParsing)577e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, DelayedParsing) {
578e7230ea7SIlya Biryukov llvm::StringLiteral Code = R"cpp(
579e7230ea7SIlya Biryukov struct Foo {
580e7230ea7SIlya Biryukov int method() {
581e7230ea7SIlya Biryukov // Parser will visit method bodies and initializers multiple times, but
582e7230ea7SIlya Biryukov // TokenBuffer should only record the first walk over the tokens;
583e7230ea7SIlya Biryukov return 100;
584e7230ea7SIlya Biryukov }
585e7230ea7SIlya Biryukov int a = 10;
586e7230ea7SIlya Biryukov
587e7230ea7SIlya Biryukov struct Subclass {
588e7230ea7SIlya Biryukov void foo() {
589e7230ea7SIlya Biryukov Foo().method();
590e7230ea7SIlya Biryukov }
591e7230ea7SIlya Biryukov };
592e7230ea7SIlya Biryukov };
593e7230ea7SIlya Biryukov )cpp";
594e7230ea7SIlya Biryukov std::string ExpectedTokens =
595e7230ea7SIlya Biryukov "expanded tokens:\n"
596e7230ea7SIlya Biryukov " struct Foo { int method ( ) { return 100 ; } int a = 10 ; struct "
597e7230ea7SIlya Biryukov "Subclass { void foo ( ) { Foo ( ) . method ( ) ; } } ; } ;\n";
598e7230ea7SIlya Biryukov EXPECT_THAT(collectAndDump(Code), StartsWith(ExpectedTokens));
599e7230ea7SIlya Biryukov }
600e7230ea7SIlya Biryukov
TEST_F(TokenCollectorTest,MultiFile)601e7230ea7SIlya Biryukov TEST_F(TokenCollectorTest, MultiFile) {
602e7230ea7SIlya Biryukov addFile("./foo.h", R"cpp(
603e7230ea7SIlya Biryukov #define ADD(X, Y) X+Y
604e7230ea7SIlya Biryukov int a = 100;
605e7230ea7SIlya Biryukov #include "bar.h"
606e7230ea7SIlya Biryukov )cpp");
607e7230ea7SIlya Biryukov addFile("./bar.h", R"cpp(
608e7230ea7SIlya Biryukov int b = ADD(1, 2);
609e7230ea7SIlya Biryukov #define MULT(X, Y) X*Y
610e7230ea7SIlya Biryukov )cpp");
611e7230ea7SIlya Biryukov llvm::StringLiteral Code = R"cpp(
612e7230ea7SIlya Biryukov #include "foo.h"
613e7230ea7SIlya Biryukov int c = ADD(1, MULT(2,3));
614e7230ea7SIlya Biryukov )cpp";
615e7230ea7SIlya Biryukov
616e7230ea7SIlya Biryukov std::string Expected = R"(expanded tokens:
617e7230ea7SIlya Biryukov int a = 100 ; int b = 1 + 2 ; int c = 1 + 2 * 3 ;
618e7230ea7SIlya Biryukov file './input.cpp'
619e7230ea7SIlya Biryukov spelled tokens:
620e7230ea7SIlya Biryukov # include "foo.h" int c = ADD ( 1 , MULT ( 2 , 3 ) ) ;
621e7230ea7SIlya Biryukov mappings:
622e7230ea7SIlya Biryukov ['#'_0, 'int'_3) => ['int'_12, 'int'_12)
623e7230ea7SIlya Biryukov ['ADD'_6, ';'_17) => ['1'_15, ';'_20)
624e7230ea7SIlya Biryukov file './foo.h'
625e7230ea7SIlya Biryukov spelled tokens:
626e7230ea7SIlya Biryukov # define ADD ( X , Y ) X + Y int a = 100 ; # include "bar.h"
627e7230ea7SIlya Biryukov mappings:
628e7230ea7SIlya Biryukov ['#'_0, 'int'_11) => ['int'_0, 'int'_0)
629e7230ea7SIlya Biryukov ['#'_16, '<eof>'_19) => ['int'_5, 'int'_5)
630e7230ea7SIlya Biryukov file './bar.h'
631e7230ea7SIlya Biryukov spelled tokens:
632e7230ea7SIlya Biryukov int b = ADD ( 1 , 2 ) ; # define MULT ( X , Y ) X * Y
633e7230ea7SIlya Biryukov mappings:
634e7230ea7SIlya Biryukov ['ADD'_3, ';'_9) => ['1'_8, ';'_11)
635e7230ea7SIlya Biryukov ['#'_10, '<eof>'_21) => ['int'_12, 'int'_12)
636e7230ea7SIlya Biryukov )";
637e7230ea7SIlya Biryukov
638e7230ea7SIlya Biryukov EXPECT_EQ(Expected, collectAndDump(Code))
639e7230ea7SIlya Biryukov << "input: " << Code << "\nresults: " << collectAndDump(Code);
640e7230ea7SIlya Biryukov }
641e7230ea7SIlya Biryukov
642e7230ea7SIlya Biryukov class TokenBufferTest : public TokenCollectorTest {};
643e7230ea7SIlya Biryukov
TEST_F(TokenBufferTest,SpelledByExpanded)644e7230ea7SIlya Biryukov TEST_F(TokenBufferTest, SpelledByExpanded) {
645e7230ea7SIlya Biryukov recordTokens(R"cpp(
646e7230ea7SIlya Biryukov a1 a2 a3 b1 b2
647e7230ea7SIlya Biryukov )cpp");
648e7230ea7SIlya Biryukov
649c79345fbSZarko Todorovski // Expanded and spelled tokens are stored separately.
650e7230ea7SIlya Biryukov EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
651e7230ea7SIlya Biryukov // Searching for subranges of expanded tokens should give the corresponding
652e7230ea7SIlya Biryukov // spelled ones.
653e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 b1 b2")),
654e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled("a1 a2 a3 b1 b2"))));
655e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
656e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled("a1 a2 a3"))));
657e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
658e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled("b1 b2"))));
659e7230ea7SIlya Biryukov
660e7230ea7SIlya Biryukov // Test search on simple macro expansions.
661e7230ea7SIlya Biryukov recordTokens(R"cpp(
662e7230ea7SIlya Biryukov #define A a1 a2 a3
663e7230ea7SIlya Biryukov #define B b1 b2
664e7230ea7SIlya Biryukov
665e7230ea7SIlya Biryukov A split B
666e7230ea7SIlya Biryukov )cpp");
6679619c2ccSKadir Cetinkaya // Ranges going across expansion boundaries.
668e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
669e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled("A split B"))));
670e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
671e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled("A split").drop_back())));
672e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
673e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled("split B").drop_front())));
674e7230ea7SIlya Biryukov // Ranges not fully covering macro invocations should fail.
675a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
676a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("b2")), std::nullopt);
677e7230ea7SIlya Biryukov EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a2 a3 split b1 b2")),
678a41fbb1fSKazu Hirata std::nullopt);
679e7230ea7SIlya Biryukov
680e7230ea7SIlya Biryukov // Recursive macro invocations.
681e7230ea7SIlya Biryukov recordTokens(R"cpp(
682e7230ea7SIlya Biryukov #define ID(x) x
683e7230ea7SIlya Biryukov #define B b1 b2
684e7230ea7SIlya Biryukov
685e7230ea7SIlya Biryukov ID(ID(ID(a1) a2 a3)) split ID(B)
686e7230ea7SIlya Biryukov )cpp");
687e7230ea7SIlya Biryukov
688e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("b1 b2")),
6899619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("( B").drop_front())));
690e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split b1 b2")),
691e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled(
692e7230ea7SIlya Biryukov "ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )"))));
6939619c2ccSKadir Cetinkaya // Mixed ranges with expanded and spelled tokens.
6949619c2ccSKadir Cetinkaya EXPECT_THAT(
6959619c2ccSKadir Cetinkaya Buffer.spelledForExpanded(findExpanded("a1 a2 a3 split")),
6969619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split"))));
6979619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("split b1 b2")),
6989619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("split ID ( B )"))));
6999619c2ccSKadir Cetinkaya // Macro arguments
7009619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1")),
7019619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("a1"))));
7029619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2")),
7039619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("a2"))));
7049619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a3")),
7059619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("a3"))));
7069619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2")),
7079619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("ID ( a1 ) a2"))));
7089619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
7099619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("ID ( a1 ) a2 a3"))));
710e7230ea7SIlya Biryukov
711e7230ea7SIlya Biryukov // Empty macro expansions.
712e7230ea7SIlya Biryukov recordTokens(R"cpp(
713e7230ea7SIlya Biryukov #define EMPTY
714e7230ea7SIlya Biryukov #define ID(X) X
715e7230ea7SIlya Biryukov
716e7230ea7SIlya Biryukov EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
717e7230ea7SIlya Biryukov EMPTY EMPTY ID(4 5 6) split2
718e7230ea7SIlya Biryukov ID(7 8 9) EMPTY EMPTY
719e7230ea7SIlya Biryukov )cpp");
720e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1 2 3")),
7219619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("1 2 3"))));
722e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("4 5 6")),
7239619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("4 5 6"))));
724e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("7 8 9")),
7259619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("7 8 9"))));
726e7230ea7SIlya Biryukov
727e7230ea7SIlya Biryukov // Empty mappings coming from various directives.
728e7230ea7SIlya Biryukov recordTokens(R"cpp(
729e7230ea7SIlya Biryukov #define ID(X) X
730e7230ea7SIlya Biryukov ID(1)
731e7230ea7SIlya Biryukov #pragma lalala
732e7230ea7SIlya Biryukov not_mapped
733e7230ea7SIlya Biryukov )cpp");
734e7230ea7SIlya Biryukov EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("not_mapped")),
735e7230ea7SIlya Biryukov ValueIs(SameRange(findSpelled("not_mapped"))));
7369619c2ccSKadir Cetinkaya
7379619c2ccSKadir Cetinkaya // Multiple macro arguments
7389619c2ccSKadir Cetinkaya recordTokens(R"cpp(
7399619c2ccSKadir Cetinkaya #define ID(X) X
7409619c2ccSKadir Cetinkaya #define ID2(X, Y) X Y
7419619c2ccSKadir Cetinkaya
7429619c2ccSKadir Cetinkaya ID2(ID(a1), ID(a2) a3) ID2(a4, a5 a6 a7)
7439619c2ccSKadir Cetinkaya )cpp");
7449619c2ccSKadir Cetinkaya // Should fail, spans multiple arguments.
745a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2")), std::nullopt);
7469619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a2 a3")),
7479619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("ID ( a2 ) a3"))));
7489619c2ccSKadir Cetinkaya EXPECT_THAT(
7499619c2ccSKadir Cetinkaya Buffer.spelledForExpanded(findExpanded("a1 a2 a3")),
7509619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("ID2 ( ID ( a1 ) , ID ( a2 ) a3 )"))));
7519619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a5 a6")),
7529619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("a5 a6"))));
7539619c2ccSKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("a4 a5 a6 a7")),
7549619c2ccSKadir Cetinkaya ValueIs(SameRange(findSpelled("ID2 ( a4 , a5 a6 a7 )"))));
7559619c2ccSKadir Cetinkaya // Should fail, spans multiple invocations.
756a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("a1 a2 a3 a4")),
757a41fbb1fSKazu Hirata std::nullopt);
75867268ee1SSam McCall
75967268ee1SSam McCall // https://github.com/clangd/clangd/issues/1289
76067268ee1SSam McCall recordTokens(R"cpp(
76167268ee1SSam McCall #define FOO(X) foo(X)
76267268ee1SSam McCall #define INDIRECT FOO(y)
76367268ee1SSam McCall INDIRECT // expands to foo(y)
76467268ee1SSam McCall )cpp");
765a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("y")), std::nullopt);
76667268ee1SSam McCall
76767268ee1SSam McCall recordTokens(R"cpp(
76867268ee1SSam McCall #define FOO(X) a X b
76967268ee1SSam McCall FOO(y)
77067268ee1SSam McCall )cpp");
77167268ee1SSam McCall EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("y")),
77267268ee1SSam McCall ValueIs(SameRange(findSpelled("y"))));
77367268ee1SSam McCall
77467268ee1SSam McCall recordTokens(R"cpp(
77567268ee1SSam McCall #define ID(X) X
77667268ee1SSam McCall #define BAR ID(1)
77767268ee1SSam McCall BAR
77867268ee1SSam McCall )cpp");
77967268ee1SSam McCall EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("1")),
78067268ee1SSam McCall ValueIs(SameRange(findSpelled(") BAR").drop_front())));
78167268ee1SSam McCall
78267268ee1SSam McCall // Critical cases for mapping of Prev/Next in spelledForExpandedSlow.
78367268ee1SSam McCall recordTokens(R"cpp(
78467268ee1SSam McCall #define ID(X) X
7859841daf2SKadir Cetinkaya ID(prev good)
7869841daf2SKadir Cetinkaya ID(prev ID(good2))
78767268ee1SSam McCall #define LARGE ID(prev ID(bad))
78867268ee1SSam McCall LARGE
78967268ee1SSam McCall )cpp");
79067268ee1SSam McCall EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
79167268ee1SSam McCall ValueIs(SameRange(findSpelled("good"))));
7929841daf2SKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
7939841daf2SKadir Cetinkaya ValueIs(SameRange(findSpelled("good2"))));
794a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
79567268ee1SSam McCall
79667268ee1SSam McCall recordTokens(R"cpp(
79767268ee1SSam McCall #define PREV prev
79867268ee1SSam McCall #define ID(X) X
79967268ee1SSam McCall PREV ID(good)
80067268ee1SSam McCall #define LARGE PREV ID(bad)
80167268ee1SSam McCall LARGE
80267268ee1SSam McCall )cpp");
80367268ee1SSam McCall EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
80467268ee1SSam McCall ValueIs(SameRange(findSpelled("good"))));
805a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
80667268ee1SSam McCall
80767268ee1SSam McCall recordTokens(R"cpp(
80867268ee1SSam McCall #define ID(X) X
80967268ee1SSam McCall #define ID2(X, Y) X Y
8109841daf2SKadir Cetinkaya ID2(prev, good)
8119841daf2SKadir Cetinkaya ID2(prev, ID(good2))
81267268ee1SSam McCall #define LARGE ID2(prev, bad)
81367268ee1SSam McCall LARGE
81467268ee1SSam McCall )cpp");
81567268ee1SSam McCall EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
81667268ee1SSam McCall ValueIs(SameRange(findSpelled("good"))));
8179841daf2SKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good2")),
8189841daf2SKadir Cetinkaya ValueIs(SameRange(findSpelled("good2"))));
819a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("bad")), std::nullopt);
8209841daf2SKadir Cetinkaya
8219841daf2SKadir Cetinkaya // Prev from macro body.
8229841daf2SKadir Cetinkaya recordTokens(R"cpp(
8239841daf2SKadir Cetinkaya #define ID(X) X
8249841daf2SKadir Cetinkaya #define ID2(X, Y) X prev ID(Y)
8259841daf2SKadir Cetinkaya ID2(not_prev, good)
8269841daf2SKadir Cetinkaya )cpp");
8279841daf2SKadir Cetinkaya EXPECT_THAT(Buffer.spelledForExpanded(findExpanded("good")),
8289841daf2SKadir Cetinkaya ValueIs(SameRange(findSpelled("good"))));
8299841daf2SKadir Cetinkaya EXPECT_EQ(Buffer.spelledForExpanded(findExpanded("prev good")), std::nullopt);
830e7230ea7SIlya Biryukov }
831e7230ea7SIlya Biryukov
TEST_F(TokenBufferTest,NoCrashForEofToken)8329d1dada5SNathan Ridge TEST_F(TokenBufferTest, NoCrashForEofToken) {
8339d1dada5SNathan Ridge recordTokens(R"cpp(
8349d1dada5SNathan Ridge int main() {
8359d1dada5SNathan Ridge )cpp");
8369d1dada5SNathan Ridge ASSERT_TRUE(!Buffer.expandedTokens().empty());
8379d1dada5SNathan Ridge ASSERT_EQ(Buffer.expandedTokens().back().kind(), tok::eof);
8389d1dada5SNathan Ridge // Expanded range including `eof` is handled gracefully (`eof` is ignored).
8399d1dada5SNathan Ridge EXPECT_THAT(
8409d1dada5SNathan Ridge Buffer.spelledForExpanded(Buffer.expandedTokens()),
8419d1dada5SNathan Ridge ValueIs(SameRange(Buffer.spelledTokens(SourceMgr->getMainFileID()))));
8429d1dada5SNathan Ridge }
8439d1dada5SNathan Ridge
TEST_F(TokenBufferTest,ExpandedTokensForRange)844c9c714c7SSam McCall TEST_F(TokenBufferTest, ExpandedTokensForRange) {
845c9c714c7SSam McCall recordTokens(R"cpp(
846c9c714c7SSam McCall #define SIGN(X) X##_washere
847c9c714c7SSam McCall A SIGN(B) C SIGN(D) E SIGN(F) G
848c9c714c7SSam McCall )cpp");
849c9c714c7SSam McCall
850c9c714c7SSam McCall SourceRange R(findExpanded("C").front().location(),
851c9c714c7SSam McCall findExpanded("F_washere").front().location());
852c79345fbSZarko Todorovski // Expanded and spelled tokens are stored separately.
853c9c714c7SSam McCall EXPECT_THAT(Buffer.expandedTokens(R),
854c9c714c7SSam McCall SameRange(findExpanded("C D_washere E F_washere")));
855c9c714c7SSam McCall EXPECT_THAT(Buffer.expandedTokens(SourceRange()), testing::IsEmpty());
856c9c714c7SSam McCall }
857c9c714c7SSam McCall
TEST_F(TokenBufferTest,ExpansionsOverlapping)858f0ab336eSSam McCall TEST_F(TokenBufferTest, ExpansionsOverlapping) {
8595aed309aSIlya Biryukov // Object-like macro expansions.
8605aed309aSIlya Biryukov recordTokens(R"cpp(
8615aed309aSIlya Biryukov #define FOO 3+4
8625aed309aSIlya Biryukov int a = FOO 1;
8635aed309aSIlya Biryukov int b = FOO 2;
8645aed309aSIlya Biryukov )cpp");
8655aed309aSIlya Biryukov
866f0ab336eSSam McCall llvm::ArrayRef<syntax::Token> Foo1 = findSpelled("FOO 1");
8675aed309aSIlya Biryukov EXPECT_THAT(
8685aed309aSIlya Biryukov Buffer.expansionStartingAt(Foo1.data()),
869f0ab336eSSam McCall ValueIs(IsExpansion(SameRange(Foo1.drop_back()),
870f0ab336eSSam McCall SameRange(findExpanded("3 + 4 1").drop_back()))));
871f0ab336eSSam McCall EXPECT_THAT(
872f0ab336eSSam McCall Buffer.expansionsOverlapping(Foo1),
873f0ab336eSSam McCall ElementsAre(IsExpansion(SameRange(Foo1.drop_back()),
8745aed309aSIlya Biryukov SameRange(findExpanded("3 + 4 1").drop_back()))));
8755aed309aSIlya Biryukov
876f0ab336eSSam McCall llvm::ArrayRef<syntax::Token> Foo2 = findSpelled("FOO 2");
8775aed309aSIlya Biryukov EXPECT_THAT(
8785aed309aSIlya Biryukov Buffer.expansionStartingAt(Foo2.data()),
879f0ab336eSSam McCall ValueIs(IsExpansion(SameRange(Foo2.drop_back()),
8805aed309aSIlya Biryukov SameRange(findExpanded("3 + 4 2").drop_back()))));
881a3c248dbSserge-sans-paille EXPECT_THAT(
882a3c248dbSserge-sans-paille Buffer.expansionsOverlapping(llvm::ArrayRef(Foo1.begin(), Foo2.end())),
883f0ab336eSSam McCall ElementsAre(IsExpansion(SameRange(Foo1.drop_back()), _),
884f0ab336eSSam McCall IsExpansion(SameRange(Foo2.drop_back()), _)));
8855aed309aSIlya Biryukov
8865aed309aSIlya Biryukov // Function-like macro expansions.
8875aed309aSIlya Biryukov recordTokens(R"cpp(
8885aed309aSIlya Biryukov #define ID(X) X
8895aed309aSIlya Biryukov int a = ID(1+2+3);
8905aed309aSIlya Biryukov int b = ID(ID(2+3+4));
8915aed309aSIlya Biryukov )cpp");
8925aed309aSIlya Biryukov
8935aed309aSIlya Biryukov llvm::ArrayRef<syntax::Token> ID1 = findSpelled("ID ( 1 + 2 + 3 )");
8945aed309aSIlya Biryukov EXPECT_THAT(Buffer.expansionStartingAt(&ID1.front()),
8955aed309aSIlya Biryukov ValueIs(IsExpansion(SameRange(ID1),
8965aed309aSIlya Biryukov SameRange(findExpanded("1 + 2 + 3")))));
8975aed309aSIlya Biryukov // Only the first spelled token should be found.
8985aed309aSIlya Biryukov for (const auto &T : ID1.drop_front())
899a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
9005aed309aSIlya Biryukov
9015aed309aSIlya Biryukov llvm::ArrayRef<syntax::Token> ID2 = findSpelled("ID ( ID ( 2 + 3 + 4 ) )");
9025aed309aSIlya Biryukov EXPECT_THAT(Buffer.expansionStartingAt(&ID2.front()),
9035aed309aSIlya Biryukov ValueIs(IsExpansion(SameRange(ID2),
9045aed309aSIlya Biryukov SameRange(findExpanded("2 + 3 + 4")))));
9055aed309aSIlya Biryukov // Only the first spelled token should be found.
9065aed309aSIlya Biryukov for (const auto &T : ID2.drop_front())
907a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
9085aed309aSIlya Biryukov
909a3c248dbSserge-sans-paille EXPECT_THAT(Buffer.expansionsOverlapping(llvm::ArrayRef(
910f0ab336eSSam McCall findSpelled("1 + 2").data(), findSpelled("4").data())),
911f0ab336eSSam McCall ElementsAre(IsExpansion(SameRange(ID1), _),
912f0ab336eSSam McCall IsExpansion(SameRange(ID2), _)));
913f0ab336eSSam McCall
9145aed309aSIlya Biryukov // PP directives.
9155aed309aSIlya Biryukov recordTokens(R"cpp(
9165aed309aSIlya Biryukov #define FOO 1
9175aed309aSIlya Biryukov int a = FOO;
9185aed309aSIlya Biryukov #pragma once
9195aed309aSIlya Biryukov int b = 1;
9205aed309aSIlya Biryukov )cpp");
9215aed309aSIlya Biryukov
9225aed309aSIlya Biryukov llvm::ArrayRef<syntax::Token> DefineFoo = findSpelled("# define FOO 1");
9235aed309aSIlya Biryukov EXPECT_THAT(
9245aed309aSIlya Biryukov Buffer.expansionStartingAt(&DefineFoo.front()),
9255aed309aSIlya Biryukov ValueIs(IsExpansion(SameRange(DefineFoo),
9265aed309aSIlya Biryukov SameRange(findExpanded("int a").take_front(0)))));
9275aed309aSIlya Biryukov // Only the first spelled token should be found.
9285aed309aSIlya Biryukov for (const auto &T : DefineFoo.drop_front())
929a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
9305aed309aSIlya Biryukov
9315aed309aSIlya Biryukov llvm::ArrayRef<syntax::Token> PragmaOnce = findSpelled("# pragma once");
9325aed309aSIlya Biryukov EXPECT_THAT(
9335aed309aSIlya Biryukov Buffer.expansionStartingAt(&PragmaOnce.front()),
9345aed309aSIlya Biryukov ValueIs(IsExpansion(SameRange(PragmaOnce),
9355aed309aSIlya Biryukov SameRange(findExpanded("int b").take_front(0)))));
9365aed309aSIlya Biryukov // Only the first spelled token should be found.
9375aed309aSIlya Biryukov for (const auto &T : PragmaOnce.drop_front())
938a41fbb1fSKazu Hirata EXPECT_EQ(Buffer.expansionStartingAt(&T), std::nullopt);
939f0ab336eSSam McCall
940f0ab336eSSam McCall EXPECT_THAT(
941f0ab336eSSam McCall Buffer.expansionsOverlapping(findSpelled("FOO ; # pragma")),
942f0ab336eSSam McCall ElementsAre(IsExpansion(SameRange(findSpelled("FOO ;").drop_back()), _),
943f0ab336eSSam McCall IsExpansion(SameRange(PragmaOnce), _)));
9445aed309aSIlya Biryukov }
9455aed309aSIlya Biryukov
TEST_F(TokenBufferTest,TokensToFileRange)946e7230ea7SIlya Biryukov TEST_F(TokenBufferTest, TokensToFileRange) {
947e7230ea7SIlya Biryukov addFile("./foo.h", "token_from_header");
948e7230ea7SIlya Biryukov llvm::Annotations Code(R"cpp(
949e7230ea7SIlya Biryukov #define FOO token_from_expansion
950e7230ea7SIlya Biryukov #include "./foo.h"
951e7230ea7SIlya Biryukov $all[[$i[[int]] a = FOO;]]
952e7230ea7SIlya Biryukov )cpp");
953e7230ea7SIlya Biryukov recordTokens(Code.code());
954e7230ea7SIlya Biryukov
955e7230ea7SIlya Biryukov auto &SM = *SourceMgr;
956e7230ea7SIlya Biryukov
957e7230ea7SIlya Biryukov // Two simple examples.
958e7230ea7SIlya Biryukov auto Int = findExpanded("int").front();
959e7230ea7SIlya Biryukov auto Semi = findExpanded(";").front();
960e7230ea7SIlya Biryukov EXPECT_EQ(Int.range(SM), FileRange(SM.getMainFileID(), Code.range("i").Begin,
961e7230ea7SIlya Biryukov Code.range("i").End));
962e7230ea7SIlya Biryukov EXPECT_EQ(syntax::Token::range(SM, Int, Semi),
963e7230ea7SIlya Biryukov FileRange(SM.getMainFileID(), Code.range("all").Begin,
964e7230ea7SIlya Biryukov Code.range("all").End));
965e7230ea7SIlya Biryukov // We don't test assertion failures because death tests are slow.
966e7230ea7SIlya Biryukov }
967e7230ea7SIlya Biryukov
TEST_F(TokenBufferTest,MacroExpansions)968038f5388SIlya Biryukov TEST_F(TokenBufferTest, MacroExpansions) {
9696687fde0SJohan Vikstrom llvm::Annotations Code(R"cpp(
9706687fde0SJohan Vikstrom #define FOO B
9716687fde0SJohan Vikstrom #define FOO2 BA
9726687fde0SJohan Vikstrom #define CALL(X) int X
9736687fde0SJohan Vikstrom #define G CALL(FOO2)
9746687fde0SJohan Vikstrom int B;
9756687fde0SJohan Vikstrom $macro[[FOO]];
9766687fde0SJohan Vikstrom $macro[[CALL]](A);
9776687fde0SJohan Vikstrom $macro[[G]];
9786687fde0SJohan Vikstrom )cpp");
9796687fde0SJohan Vikstrom recordTokens(Code.code());
9806687fde0SJohan Vikstrom auto &SM = *SourceMgr;
9816687fde0SJohan Vikstrom auto Expansions = Buffer.macroExpansions(SM.getMainFileID());
9826687fde0SJohan Vikstrom std::vector<FileRange> ExpectedMacroRanges;
9836687fde0SJohan Vikstrom for (auto Range : Code.ranges("macro"))
9846687fde0SJohan Vikstrom ExpectedMacroRanges.push_back(
9856687fde0SJohan Vikstrom FileRange(SM.getMainFileID(), Range.Begin, Range.End));
9866687fde0SJohan Vikstrom std::vector<FileRange> ActualMacroRanges;
9876687fde0SJohan Vikstrom for (auto Expansion : Expansions)
9886687fde0SJohan Vikstrom ActualMacroRanges.push_back(Expansion->range(SM));
9896687fde0SJohan Vikstrom EXPECT_EQ(ExpectedMacroRanges, ActualMacroRanges);
9906687fde0SJohan Vikstrom }
9913f8da5d0SSam McCall
TEST_F(TokenBufferTest,Touching)9923f8da5d0SSam McCall TEST_F(TokenBufferTest, Touching) {
9933f8da5d0SSam McCall llvm::Annotations Code("^i^nt^ ^a^b^=^1;^");
9943f8da5d0SSam McCall recordTokens(Code.code());
9953f8da5d0SSam McCall
9963f8da5d0SSam McCall auto Touching = [&](int Index) {
9973f8da5d0SSam McCall SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
9983f8da5d0SSam McCall Code.points()[Index]);
9993f8da5d0SSam McCall return spelledTokensTouching(Loc, Buffer);
10003f8da5d0SSam McCall };
10013f8da5d0SSam McCall auto Identifier = [&](int Index) {
10023f8da5d0SSam McCall SourceLocation Loc = SourceMgr->getComposedLoc(SourceMgr->getMainFileID(),
10033f8da5d0SSam McCall Code.points()[Index]);
10043f8da5d0SSam McCall const syntax::Token *Tok = spelledIdentifierTouching(Loc, Buffer);
10053f8da5d0SSam McCall return Tok ? Tok->text(*SourceMgr) : "";
10063f8da5d0SSam McCall };
10073f8da5d0SSam McCall
10083f8da5d0SSam McCall EXPECT_THAT(Touching(0), SameRange(findSpelled("int")));
10093f8da5d0SSam McCall EXPECT_EQ(Identifier(0), "");
10103f8da5d0SSam McCall EXPECT_THAT(Touching(1), SameRange(findSpelled("int")));
10113f8da5d0SSam McCall EXPECT_EQ(Identifier(1), "");
10123f8da5d0SSam McCall EXPECT_THAT(Touching(2), SameRange(findSpelled("int")));
10133f8da5d0SSam McCall EXPECT_EQ(Identifier(2), "");
10143f8da5d0SSam McCall
10153f8da5d0SSam McCall EXPECT_THAT(Touching(3), SameRange(findSpelled("ab")));
10163f8da5d0SSam McCall EXPECT_EQ(Identifier(3), "ab");
10173f8da5d0SSam McCall EXPECT_THAT(Touching(4), SameRange(findSpelled("ab")));
10183f8da5d0SSam McCall EXPECT_EQ(Identifier(4), "ab");
10193f8da5d0SSam McCall
10203f8da5d0SSam McCall EXPECT_THAT(Touching(5), SameRange(findSpelled("ab =")));
10213f8da5d0SSam McCall EXPECT_EQ(Identifier(5), "ab");
10223f8da5d0SSam McCall
10233f8da5d0SSam McCall EXPECT_THAT(Touching(6), SameRange(findSpelled("= 1")));
10243f8da5d0SSam McCall EXPECT_EQ(Identifier(6), "");
10253f8da5d0SSam McCall
10263f8da5d0SSam McCall EXPECT_THAT(Touching(7), SameRange(findSpelled(";")));
10273f8da5d0SSam McCall EXPECT_EQ(Identifier(7), "");
10283f8da5d0SSam McCall
10293f8da5d0SSam McCall ASSERT_EQ(Code.points().size(), 8u);
10303f8da5d0SSam McCall }
10313f8da5d0SSam McCall
TEST_F(TokenBufferTest,ExpandedBySpelled)10321bf055c9SMarcel Hlopko TEST_F(TokenBufferTest, ExpandedBySpelled) {
10331bf055c9SMarcel Hlopko recordTokens(R"cpp(
10341bf055c9SMarcel Hlopko a1 a2 a3 b1 b2
10351bf055c9SMarcel Hlopko )cpp");
1036c79345fbSZarko Todorovski // Expanded and spelled tokens are stored separately.
10371bf055c9SMarcel Hlopko EXPECT_THAT(findExpanded("a1 a2"), Not(SameRange(findSpelled("a1 a2"))));
10381bf055c9SMarcel Hlopko // Searching for subranges of expanded tokens should give the corresponding
10391bf055c9SMarcel Hlopko // spelled ones.
10401bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3 b1 b2")),
10411bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("a1 a2 a3 b1 b2"))));
10421bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1 a2 a3")),
10431bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
10441bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("b1 b2")),
10451bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("b1 b2"))));
10461bf055c9SMarcel Hlopko
10471bf055c9SMarcel Hlopko // Test search on simple macro expansions.
10481bf055c9SMarcel Hlopko recordTokens(R"cpp(
10491bf055c9SMarcel Hlopko #define A a1 a2 a3
10501bf055c9SMarcel Hlopko #define B b1 b2
10511bf055c9SMarcel Hlopko
10521bf055c9SMarcel Hlopko A split B
10531bf055c9SMarcel Hlopko )cpp");
10541bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split B")),
10551bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
10561bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("A split").drop_back()),
10571bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
10581bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("split B").drop_front()),
10591bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("b1 b2"))));
10601bf055c9SMarcel Hlopko
10611bf055c9SMarcel Hlopko // Ranges not fully covering macro expansions should fail.
10621bf055c9SMarcel Hlopko recordTokens(R"cpp(
10631bf055c9SMarcel Hlopko #define ID(x) x
10641bf055c9SMarcel Hlopko
10651bf055c9SMarcel Hlopko ID(a)
10661bf055c9SMarcel Hlopko )cpp");
10671bf055c9SMarcel Hlopko // Spelled don't cover entire mapping (missing ID token) -> empty result
10681bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("( a )")), IsEmpty());
10691bf055c9SMarcel Hlopko // Spelled don't cover entire mapping (missing ) token) -> empty result
10701bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a")), IsEmpty());
10711bf055c9SMarcel Hlopko
10721bf055c9SMarcel Hlopko // Recursive macro invocations.
10731bf055c9SMarcel Hlopko recordTokens(R"cpp(
10741bf055c9SMarcel Hlopko #define ID(x) x
10751bf055c9SMarcel Hlopko #define B b1 b2
10761bf055c9SMarcel Hlopko
10771bf055c9SMarcel Hlopko ID(ID(ID(a1) a2 a3)) split ID(B)
10781bf055c9SMarcel Hlopko )cpp");
10791bf055c9SMarcel Hlopko
10801bf055c9SMarcel Hlopko EXPECT_THAT(
10811bf055c9SMarcel Hlopko Buffer.expandedForSpelled(findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) )")),
10821bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("a1 a2 a3"))));
10831bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( B )")),
10841bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("b1 b2"))));
10851bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(
10861bf055c9SMarcel Hlopko findSpelled("ID ( ID ( ID ( a1 ) a2 a3 ) ) split ID ( B )")),
10871bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("a1 a2 a3 split b1 b2"))));
10881bf055c9SMarcel Hlopko // FIXME: these should succeed, but we do not support macro arguments yet.
10891bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("a1")), IsEmpty());
10901bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( a1 ) a2")),
10911bf055c9SMarcel Hlopko IsEmpty());
10921bf055c9SMarcel Hlopko
10931bf055c9SMarcel Hlopko // Empty macro expansions.
10941bf055c9SMarcel Hlopko recordTokens(R"cpp(
10951bf055c9SMarcel Hlopko #define EMPTY
10961bf055c9SMarcel Hlopko #define ID(X) X
10971bf055c9SMarcel Hlopko
10981bf055c9SMarcel Hlopko EMPTY EMPTY ID(1 2 3) EMPTY EMPTY split1
10991bf055c9SMarcel Hlopko EMPTY EMPTY ID(4 5 6) split2
11001bf055c9SMarcel Hlopko ID(7 8 9) EMPTY EMPTY
11011bf055c9SMarcel Hlopko )cpp");
11021bf055c9SMarcel Hlopko // Covered by empty expansions on one of both of the sides.
11031bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 )")),
11041bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("1 2 3"))));
11051bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 4 5 6 )")),
11061bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("4 5 6"))));
11071bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 7 8 9 )")),
11081bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("7 8 9"))));
11091bf055c9SMarcel Hlopko // Including the empty macro expansions on the side.
11101bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 )")),
11111bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("1 2 3"))));
11121bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("ID ( 1 2 3 ) EMPTY")),
11131bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("1 2 3"))));
11141bf055c9SMarcel Hlopko EXPECT_THAT(
11151bf055c9SMarcel Hlopko Buffer.expandedForSpelled(findSpelled("EMPTY ID ( 1 2 3 ) EMPTY")),
11161bf055c9SMarcel Hlopko ElementsAre(SameRange(findExpanded("1 2 3"))));
11171bf055c9SMarcel Hlopko
11181bf055c9SMarcel Hlopko // Empty mappings coming from various directives.
11191bf055c9SMarcel Hlopko recordTokens(R"cpp(
11201bf055c9SMarcel Hlopko #define ID(X) X
11211bf055c9SMarcel Hlopko ID(1)
11221bf055c9SMarcel Hlopko #pragma lalala
11231bf055c9SMarcel Hlopko not_mapped
11241bf055c9SMarcel Hlopko )cpp");
11251bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# define ID ( X ) X")),
11261bf055c9SMarcel Hlopko IsEmpty());
11271bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("# pragma lalala")),
11281bf055c9SMarcel Hlopko IsEmpty());
11291bf055c9SMarcel Hlopko
11301bf055c9SMarcel Hlopko // Empty macro expansion.
11311bf055c9SMarcel Hlopko recordTokens(R"cpp(
11321bf055c9SMarcel Hlopko #define EMPTY
11331bf055c9SMarcel Hlopko EMPTY int a = 100;
11341bf055c9SMarcel Hlopko )cpp");
11351bf055c9SMarcel Hlopko EXPECT_THAT(Buffer.expandedForSpelled(findSpelled("EMPTY int").drop_back()),
11361bf055c9SMarcel Hlopko IsEmpty());
11371bf055c9SMarcel Hlopko }
11381bf055c9SMarcel Hlopko
TEST_F(TokenCollectorTest,Pragmas)1139f43ff34aSKadir Cetinkaya TEST_F(TokenCollectorTest, Pragmas) {
1140f43ff34aSKadir Cetinkaya // Tokens coming from concatenations.
1141f43ff34aSKadir Cetinkaya recordTokens(R"cpp(
1142f43ff34aSKadir Cetinkaya void foo() {
1143f43ff34aSKadir Cetinkaya #pragma unroll 4
1144f43ff34aSKadir Cetinkaya for(int i=0;i<4;++i);
1145f43ff34aSKadir Cetinkaya }
1146f43ff34aSKadir Cetinkaya )cpp");
1147f43ff34aSKadir Cetinkaya }
1148e7230ea7SIlya Biryukov } // namespace
1149