xref: /llvm-project/clang-tools-extra/clangd/Selection.cpp (revision 9ef2ac3ad1bd5aa9e589f63047e8abeac11ad1b2)
1 //===--- Selection.cpp ----------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "Selection.h"
10 #include "AST.h"
11 #include "support/Logger.h"
12 #include "support/Trace.h"
13 #include "clang/AST/ASTConcept.h"
14 #include "clang/AST/ASTTypeTraits.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/ExprCXX.h"
19 #include "clang/AST/PrettyPrinter.h"
20 #include "clang/AST/RecursiveASTVisitor.h"
21 #include "clang/AST/TypeLoc.h"
22 #include "clang/Basic/OperatorKinds.h"
23 #include "clang/Basic/SourceLocation.h"
24 #include "clang/Basic/SourceManager.h"
25 #include "clang/Basic/TokenKinds.h"
26 #include "clang/Lex/Lexer.h"
27 #include "clang/Tooling/Syntax/Tokens.h"
28 #include "llvm/ADT/BitVector.h"
29 #include "llvm/ADT/STLExtras.h"
30 #include "llvm/ADT/StringExtras.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include <algorithm>
34 #include <optional>
35 #include <set>
36 #include <string>
37 
38 namespace clang {
39 namespace clangd {
40 namespace {
41 using Node = SelectionTree::Node;
42 
43 // Measure the fraction of selections that were enabled by recovery AST.
recordMetrics(const SelectionTree & S,const LangOptions & Lang)44 void recordMetrics(const SelectionTree &S, const LangOptions &Lang) {
45   if (!trace::enabled())
46     return;
47   const char *LanguageLabel = Lang.CPlusPlus ? "C++" : Lang.ObjC ? "ObjC" : "C";
48   static constexpr trace::Metric SelectionUsedRecovery(
49       "selection_recovery", trace::Metric::Distribution, "language");
50   static constexpr trace::Metric RecoveryType(
51       "selection_recovery_type", trace::Metric::Distribution, "language");
52   const auto *Common = S.commonAncestor();
53   for (const auto *N = Common; N; N = N->Parent) {
54     if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
55       SelectionUsedRecovery.record(1, LanguageLabel); // used recovery ast.
56       RecoveryType.record(RE->isTypeDependent() ? 0 : 1, LanguageLabel);
57       return;
58     }
59   }
60   if (Common)
61     SelectionUsedRecovery.record(0, LanguageLabel); // unused.
62 }
63 
64 // Return the range covering a node and all its children.
getSourceRange(const DynTypedNode & N)65 SourceRange getSourceRange(const DynTypedNode &N) {
66   // MemberExprs to implicitly access anonymous fields should not claim any
67   // tokens for themselves. Given:
68   //   struct A { struct { int b; }; };
69   // The clang AST reports the following nodes for an access to b:
70   //   A().b;
71   //   [----] MemberExpr, base = A().<anonymous>, member = b
72   //   [----] MemberExpr: base = A(), member = <anonymous>
73   //   [-]    CXXConstructExpr
74   // For our purposes, we don't want the second MemberExpr to own any tokens,
75   // so we reduce its range to match the CXXConstructExpr.
76   // (It's not clear that changing the clang AST would be correct in general).
77   if (const auto *ME = N.get<MemberExpr>()) {
78     if (!ME->getMemberDecl()->getDeclName())
79       return ME->getBase()
80                  ? getSourceRange(DynTypedNode::create(*ME->getBase()))
81                  : SourceRange();
82   }
83   return N.getSourceRange();
84 }
85 
86 // An IntervalSet maintains a set of disjoint subranges of an array.
87 //
88 // Initially, it contains the entire array.
89 //           [-----------------------------------------------------------]
90 //
91 // When a range is erased(), it will typically split the array in two.
92 //  Claim:                     [--------------------]
93 //  after:   [----------------]                      [-------------------]
94 //
95 // erase() returns the segments actually erased. Given the state above:
96 //  Claim:          [---------------------------------------]
97 //  Out:            [---------]                      [------]
98 //  After:   [-----]                                         [-----------]
99 //
100 // It is used to track (expanded) tokens not yet associated with an AST node.
101 // On traversing an AST node, its token range is erased from the unclaimed set.
102 // The tokens actually removed are associated with that node, and hit-tested
103 // against the selection to determine whether the node is selected.
104 template <typename T> class IntervalSet {
105 public:
IntervalSet(llvm::ArrayRef<T> Range)106   IntervalSet(llvm::ArrayRef<T> Range) { UnclaimedRanges.insert(Range); }
107 
108   // Removes the elements of Claim from the set, modifying or removing ranges
109   // that overlap it.
110   // Returns the continuous subranges of Claim that were actually removed.
erase(llvm::ArrayRef<T> Claim)111   llvm::SmallVector<llvm::ArrayRef<T>> erase(llvm::ArrayRef<T> Claim) {
112     llvm::SmallVector<llvm::ArrayRef<T>> Out;
113     if (Claim.empty())
114       return Out;
115 
116     // General case:
117     // Claim:                   [-----------------]
118     // UnclaimedRanges: [-A-] [-B-] [-C-] [-D-] [-E-] [-F-] [-G-]
119     // Overlap:               ^first                  ^second
120     // Ranges C and D are fully included. Ranges B and E must be trimmed.
121     auto Overlap = std::make_pair(
122         UnclaimedRanges.lower_bound({Claim.begin(), Claim.begin()}), // C
123         UnclaimedRanges.lower_bound({Claim.end(), Claim.end()}));    // F
124     // Rewind to cover B.
125     if (Overlap.first != UnclaimedRanges.begin()) {
126       --Overlap.first;
127       // ...unless B isn't selected at all.
128       if (Overlap.first->end() <= Claim.begin())
129         ++Overlap.first;
130     }
131     if (Overlap.first == Overlap.second)
132       return Out;
133 
134     // First, copy all overlapping ranges into the output.
135     auto OutFirst = Out.insert(Out.end(), Overlap.first, Overlap.second);
136     // If any of the overlapping ranges were sliced by the claim, split them:
137     //  - restrict the returned range to the claimed part
138     //  - save the unclaimed part so it can be reinserted
139     llvm::ArrayRef<T> RemainingHead, RemainingTail;
140     if (Claim.begin() > OutFirst->begin()) {
141       RemainingHead = {OutFirst->begin(), Claim.begin()};
142       *OutFirst = {Claim.begin(), OutFirst->end()};
143     }
144     if (Claim.end() < Out.back().end()) {
145       RemainingTail = {Claim.end(), Out.back().end()};
146       Out.back() = {Out.back().begin(), Claim.end()};
147     }
148 
149     // Erase all the overlapping ranges (invalidating all iterators).
150     UnclaimedRanges.erase(Overlap.first, Overlap.second);
151     // Reinsert ranges that were merely trimmed.
152     if (!RemainingHead.empty())
153       UnclaimedRanges.insert(RemainingHead);
154     if (!RemainingTail.empty())
155       UnclaimedRanges.insert(RemainingTail);
156 
157     return Out;
158   }
159 
160 private:
161   using TokenRange = llvm::ArrayRef<T>;
162   struct RangeLess {
operator ()clang::clangd::__anone6e496d40111::IntervalSet::RangeLess163     bool operator()(llvm::ArrayRef<T> L, llvm::ArrayRef<T> R) const {
164       return L.begin() < R.begin();
165     }
166   };
167 
168   // Disjoint sorted unclaimed ranges of expanded tokens.
169   std::set<llvm::ArrayRef<T>, RangeLess> UnclaimedRanges;
170 };
171 
172 // Sentinel value for the selectedness of a node where we've seen no tokens yet.
173 // This resolves to Unselected if no tokens are ever seen.
174 // But Unselected + Complete -> Partial, while NoTokens + Complete --> Complete.
175 // This value is never exposed publicly.
176 constexpr SelectionTree::Selection NoTokens =
177     static_cast<SelectionTree::Selection>(
178         static_cast<unsigned char>(SelectionTree::Complete + 1));
179 
180 // Nodes start with NoTokens, and then use this function to aggregate the
181 // selectedness as more tokens are found.
update(SelectionTree::Selection & Result,SelectionTree::Selection New)182 void update(SelectionTree::Selection &Result, SelectionTree::Selection New) {
183   if (New == NoTokens)
184     return;
185   if (Result == NoTokens)
186     Result = New;
187   else if (Result != New)
188     // Can only be completely selected (or unselected) if all tokens are.
189     Result = SelectionTree::Partial;
190 }
191 
192 // As well as comments, don't count semicolons as real tokens.
193 // They're not properly claimed as expr-statement is missing from the AST.
shouldIgnore(const syntax::Token & Tok)194 bool shouldIgnore(const syntax::Token &Tok) {
195   switch (Tok.kind()) {
196     // Even "attached" comments are not considered part of a node's range.
197     case tok::comment:
198     // The AST doesn't directly store locations for terminating semicolons.
199     case tok::semi:
200     // We don't have locations for cvr-qualifiers: see QualifiedTypeLoc.
201     case tok::kw_const:
202     case tok::kw_volatile:
203     case tok::kw_restrict:
204       return true;
205     default:
206       return false;
207   }
208 }
209 
210 // Determine whether 'Target' is the first expansion of the macro
211 // argument whose top-level spelling location is 'SpellingLoc'.
isFirstExpansion(FileID Target,SourceLocation SpellingLoc,const SourceManager & SM)212 bool isFirstExpansion(FileID Target, SourceLocation SpellingLoc,
213                       const SourceManager &SM) {
214   SourceLocation Prev = SpellingLoc;
215   while (true) {
216     // If the arg is expanded multiple times, getMacroArgExpandedLocation()
217     // returns the first expansion.
218     SourceLocation Next = SM.getMacroArgExpandedLocation(Prev);
219     // So if we reach the target, target is the first-expansion of the
220     // first-expansion ...
221     if (SM.getFileID(Next) == Target)
222       return true;
223 
224     // Otherwise, if the FileID stops changing, we've reached the innermost
225     // macro expansion, and Target was on a different branch.
226     if (SM.getFileID(Next) == SM.getFileID(Prev))
227       return false;
228 
229     Prev = Next;
230   }
231   return false;
232 }
233 
234 // SelectionTester can determine whether a range of tokens from the PP-expanded
235 // stream (corresponding to an AST node) is considered selected.
236 //
237 // When the tokens result from macro expansions, the appropriate tokens in the
238 // main file are examined (macro invocation or args). Similarly for #includes.
239 // However, only the first expansion of a given spelled token is considered
240 // selected.
241 //
242 // It tests each token in the range (not just the endpoints) as contiguous
243 // expanded tokens may not have contiguous spellings (with macros).
244 //
245 // Non-token text, and tokens not modeled in the AST (comments, semicolons)
246 // are ignored when determining selectedness.
247 class SelectionTester {
248 public:
249   // The selection is offsets [SelBegin, SelEnd) in SelFile.
SelectionTester(const syntax::TokenBuffer & Buf,FileID SelFile,unsigned SelBegin,unsigned SelEnd,const SourceManager & SM)250   SelectionTester(const syntax::TokenBuffer &Buf, FileID SelFile,
251                   unsigned SelBegin, unsigned SelEnd, const SourceManager &SM)
252       : SelFile(SelFile), SelFileBounds(SM.getLocForStartOfFile(SelFile),
253                                         SM.getLocForEndOfFile(SelFile)),
254         SM(SM) {
255     // Find all tokens (partially) selected in the file.
256     auto AllSpelledTokens = Buf.spelledTokens(SelFile);
257     const syntax::Token *SelFirst =
258         llvm::partition_point(AllSpelledTokens, [&](const syntax::Token &Tok) {
259           return SM.getFileOffset(Tok.endLocation()) <= SelBegin;
260         });
261     const syntax::Token *SelLimit = std::partition_point(
262         SelFirst, AllSpelledTokens.end(), [&](const syntax::Token &Tok) {
263           return SM.getFileOffset(Tok.location()) < SelEnd;
264         });
265     auto Sel = llvm::ArrayRef(SelFirst, SelLimit);
266     // Find which of these are preprocessed to nothing and should be ignored.
267     llvm::BitVector PPIgnored(Sel.size(), false);
268     for (const syntax::TokenBuffer::Expansion &X :
269          Buf.expansionsOverlapping(Sel)) {
270       if (X.Expanded.empty()) {
271         for (const syntax::Token &Tok : X.Spelled) {
272           if (&Tok >= SelFirst && &Tok < SelLimit)
273             PPIgnored[&Tok - SelFirst] = true;
274         }
275       }
276     }
277     // Precompute selectedness and offset for selected spelled tokens.
278     for (unsigned I = 0; I < Sel.size(); ++I) {
279       if (shouldIgnore(Sel[I]) || PPIgnored[I])
280         continue;
281       SelectedSpelled.emplace_back();
282       Tok &S = SelectedSpelled.back();
283       S.Offset = SM.getFileOffset(Sel[I].location());
284       if (S.Offset >= SelBegin && S.Offset + Sel[I].length() <= SelEnd)
285         S.Selected = SelectionTree::Complete;
286       else
287         S.Selected = SelectionTree::Partial;
288     }
289     MaybeSelectedExpanded = computeMaybeSelectedExpandedTokens(Buf);
290   }
291 
292   // Test whether a consecutive range of tokens is selected.
293   // The tokens are taken from the expanded token stream.
294   SelectionTree::Selection
test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const295   test(llvm::ArrayRef<syntax::Token> ExpandedTokens) const {
296     if (ExpandedTokens.empty())
297       return NoTokens;
298     if (SelectedSpelled.empty())
299       return SelectionTree::Unselected;
300     // Cheap (pointer) check whether any of the tokens could touch selection.
301     // In most cases, the node's overall source range touches ExpandedTokens,
302     // or we would have failed mayHit(). However now we're only considering
303     // the *unclaimed* spans of expanded tokens.
304     // This is a significant performance improvement when a lot of nodes
305     // surround the selection, including when generated by macros.
306     if (MaybeSelectedExpanded.empty() ||
307         &ExpandedTokens.front() > &MaybeSelectedExpanded.back() ||
308         &ExpandedTokens.back() < &MaybeSelectedExpanded.front()) {
309       return SelectionTree::Unselected;
310     }
311 
312     // The eof token is used as a sentinel.
313     // In general, source range from an AST node should not claim the eof token,
314     // but it could occur for unmatched-bracket cases.
315     // FIXME: fix it in TokenBuffer, expandedTokens(SourceRange) should not
316     // return the eof token.
317     if (ExpandedTokens.back().kind() == tok::eof)
318       ExpandedTokens = ExpandedTokens.drop_back();
319 
320     SelectionTree::Selection Result = NoTokens;
321     while (!ExpandedTokens.empty()) {
322       // Take consecutive tokens from the same context together for efficiency.
323       SourceLocation Start = ExpandedTokens.front().location();
324       FileID FID = SM.getFileID(Start);
325       // Comparing SourceLocations against bounds is cheaper than getFileID().
326       SourceLocation Limit = SM.getComposedLoc(FID, SM.getFileIDSize(FID));
327       auto Batch = ExpandedTokens.take_while([&](const syntax::Token &T) {
328         return T.location() >= Start && T.location() < Limit;
329       });
330       assert(!Batch.empty());
331       ExpandedTokens = ExpandedTokens.drop_front(Batch.size());
332 
333       update(Result, testChunk(FID, Batch));
334     }
335     return Result;
336   }
337 
338   // Cheap check whether any of the tokens in R might be selected.
339   // If it returns false, test() will return NoTokens or Unselected.
340   // If it returns true, test() may return any value.
mayHit(SourceRange R) const341   bool mayHit(SourceRange R) const {
342     if (SelectedSpelled.empty() || MaybeSelectedExpanded.empty())
343       return false;
344     // If the node starts after the selection ends, it is not selected.
345     // Tokens a macro location might claim are >= its expansion start.
346     // So if the expansion start > last selected token, we can prune it.
347     // (This is particularly helpful for GTest's TEST macro).
348     if (auto B = offsetInSelFile(getExpansionStart(R.getBegin())))
349       if (*B > SelectedSpelled.back().Offset)
350         return false;
351     // If the node ends before the selection begins, it is not selected.
352     SourceLocation EndLoc = R.getEnd();
353     while (EndLoc.isMacroID())
354       EndLoc = SM.getImmediateExpansionRange(EndLoc).getEnd();
355     // In the rare case that the expansion range is a char range, EndLoc is
356     // ~one token too far to the right. We may fail to prune, that's OK.
357     if (auto E = offsetInSelFile(EndLoc))
358       if (*E < SelectedSpelled.front().Offset)
359         return false;
360     return true;
361   }
362 
363 private:
364   // Plausible expanded tokens that might be affected by the selection.
365   // This is an overestimate, it may contain tokens that are not selected.
366   // The point is to allow cheap pruning in test()
367   llvm::ArrayRef<syntax::Token>
computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer & Toks)368   computeMaybeSelectedExpandedTokens(const syntax::TokenBuffer &Toks) {
369     if (SelectedSpelled.empty())
370       return {};
371 
372     auto LastAffectedToken = [&](SourceLocation Loc) {
373       auto Offset = offsetInSelFile(Loc);
374       while (Loc.isValid() && !Offset) {
375         Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getEnd()
376                               : SM.getIncludeLoc(SM.getFileID(Loc));
377         Offset = offsetInSelFile(Loc);
378       }
379       return Offset;
380     };
381     auto FirstAffectedToken = [&](SourceLocation Loc) {
382       auto Offset = offsetInSelFile(Loc);
383       while (Loc.isValid() && !Offset) {
384         Loc = Loc.isMacroID() ? SM.getImmediateExpansionRange(Loc).getBegin()
385                               : SM.getIncludeLoc(SM.getFileID(Loc));
386         Offset = offsetInSelFile(Loc);
387       }
388       return Offset;
389     };
390 
391     const syntax::Token *Start = llvm::partition_point(
392         Toks.expandedTokens(),
393         [&, First = SelectedSpelled.front().Offset](const syntax::Token &Tok) {
394           if (Tok.kind() == tok::eof)
395             return false;
396           // Implausible if upperbound(Tok) < First.
397           if (auto Offset = LastAffectedToken(Tok.location()))
398             return *Offset < First;
399           // A prefix of the expanded tokens may be from an implicit
400           // inclusion (e.g. preamble patch, or command-line -include).
401           return true;
402         });
403 
404     bool EndInvalid = false;
405     const syntax::Token *End = std::partition_point(
406         Start, Toks.expandedTokens().end(),
407         [&, Last = SelectedSpelled.back().Offset](const syntax::Token &Tok) {
408           if (Tok.kind() == tok::eof)
409             return false;
410           // Plausible if lowerbound(Tok) <= Last.
411           if (auto Offset = FirstAffectedToken(Tok.location()))
412             return *Offset <= Last;
413           // Shouldn't happen: once we've seen tokens traceable to the main
414           // file, there shouldn't be any more implicit inclusions.
415           assert(false && "Expanded token could not be resolved to main file!");
416           EndInvalid = true;
417           return true; // conservatively assume this token can overlap
418         });
419     if (EndInvalid)
420       End = Toks.expandedTokens().end();
421 
422     return llvm::ArrayRef(Start, End);
423   }
424 
425   // Hit-test a consecutive range of tokens from a single file ID.
426   SelectionTree::Selection
testChunk(FileID FID,llvm::ArrayRef<syntax::Token> Batch) const427   testChunk(FileID FID, llvm::ArrayRef<syntax::Token> Batch) const {
428     assert(!Batch.empty());
429     SourceLocation StartLoc = Batch.front().location();
430     // There are several possible categories of FileID depending on how the
431     // preprocessor was used to generate these tokens:
432     //   main file, #included file, macro args, macro bodies.
433     // We need to identify the main-file tokens that represent Batch, and
434     // determine whether we want to exclusively claim them. Regular tokens
435     // represent one AST construct, but a macro invocation can represent many.
436 
437     // Handle tokens written directly in the main file.
438     if (FID == SelFile) {
439       return testTokenRange(*offsetInSelFile(Batch.front().location()),
440                             *offsetInSelFile(Batch.back().location()));
441     }
442 
443     // Handle tokens in another file #included into the main file.
444     // Check if the #include is selected, but don't claim it exclusively.
445     if (StartLoc.isFileID()) {
446       for (SourceLocation Loc = Batch.front().location(); Loc.isValid();
447            Loc = SM.getIncludeLoc(SM.getFileID(Loc))) {
448         if (auto Offset = offsetInSelFile(Loc))
449           // FIXME: use whole #include directive, not just the filename string.
450           return testToken(*Offset);
451       }
452       return NoTokens;
453     }
454 
455     assert(StartLoc.isMacroID());
456     // Handle tokens that were passed as a macro argument.
457     SourceLocation ArgStart = SM.getTopMacroCallerLoc(StartLoc);
458     if (auto ArgOffset = offsetInSelFile(ArgStart)) {
459       if (isFirstExpansion(FID, ArgStart, SM)) {
460         SourceLocation ArgEnd =
461             SM.getTopMacroCallerLoc(Batch.back().location());
462         return testTokenRange(*ArgOffset, *offsetInSelFile(ArgEnd));
463       } else { // NOLINT(llvm-else-after-return)
464         /* fall through and treat as part of the macro body */
465       }
466     }
467 
468     // Handle tokens produced by non-argument macro expansion.
469     // Check if the macro name is selected, don't claim it exclusively.
470     if (auto ExpansionOffset = offsetInSelFile(getExpansionStart(StartLoc)))
471       // FIXME: also check ( and ) for function-like macros?
472       return testToken(*ExpansionOffset);
473     return NoTokens;
474   }
475 
476   // Is the closed token range [Begin, End] selected?
testTokenRange(unsigned Begin,unsigned End) const477   SelectionTree::Selection testTokenRange(unsigned Begin, unsigned End) const {
478     assert(Begin <= End);
479     // Outside the selection entirely?
480     if (End < SelectedSpelled.front().Offset ||
481         Begin > SelectedSpelled.back().Offset)
482       return SelectionTree::Unselected;
483 
484     // Compute range of tokens.
485     auto B = llvm::partition_point(
486         SelectedSpelled, [&](const Tok &T) { return T.Offset < Begin; });
487     auto E = std::partition_point(B, SelectedSpelled.end(), [&](const Tok &T) {
488       return T.Offset <= End;
489     });
490 
491     // Aggregate selectedness of tokens in range.
492     bool ExtendsOutsideSelection = Begin < SelectedSpelled.front().Offset ||
493                                    End > SelectedSpelled.back().Offset;
494     SelectionTree::Selection Result =
495         ExtendsOutsideSelection ? SelectionTree::Unselected : NoTokens;
496     for (auto It = B; It != E; ++It)
497       update(Result, It->Selected);
498     return Result;
499   }
500 
501   // Is the token at `Offset` selected?
testToken(unsigned Offset) const502   SelectionTree::Selection testToken(unsigned Offset) const {
503     // Outside the selection entirely?
504     if (Offset < SelectedSpelled.front().Offset ||
505         Offset > SelectedSpelled.back().Offset)
506       return SelectionTree::Unselected;
507     // Find the token, if it exists.
508     auto It = llvm::partition_point(
509         SelectedSpelled, [&](const Tok &T) { return T.Offset < Offset; });
510     if (It != SelectedSpelled.end() && It->Offset == Offset)
511       return It->Selected;
512     return NoTokens;
513   }
514 
515   // Decomposes Loc and returns the offset if the file ID is SelFile.
offsetInSelFile(SourceLocation Loc) const516   std::optional<unsigned> offsetInSelFile(SourceLocation Loc) const {
517     // Decoding Loc with SM.getDecomposedLoc is relatively expensive.
518     // But SourceLocations for a file are numerically contiguous, so we
519     // can use cheap integer operations instead.
520     if (Loc < SelFileBounds.getBegin() || Loc >= SelFileBounds.getEnd())
521       return std::nullopt;
522     // FIXME: subtracting getRawEncoding() is dubious, move this logic into SM.
523     return Loc.getRawEncoding() - SelFileBounds.getBegin().getRawEncoding();
524   }
525 
getExpansionStart(SourceLocation Loc) const526   SourceLocation getExpansionStart(SourceLocation Loc) const {
527     while (Loc.isMacroID())
528       Loc = SM.getImmediateExpansionRange(Loc).getBegin();
529     return Loc;
530   }
531 
532   struct Tok {
533     unsigned Offset;
534     SelectionTree::Selection Selected;
535   };
536   std::vector<Tok> SelectedSpelled;
537   llvm::ArrayRef<syntax::Token> MaybeSelectedExpanded;
538   FileID SelFile;
539   SourceRange SelFileBounds;
540   const SourceManager &SM;
541 };
542 
543 // Show the type of a node for debugging.
printNodeKind(llvm::raw_ostream & OS,const DynTypedNode & N)544 void printNodeKind(llvm::raw_ostream &OS, const DynTypedNode &N) {
545   if (const TypeLoc *TL = N.get<TypeLoc>()) {
546     // TypeLoc is a hierarchy, but has only a single ASTNodeKind.
547     // Synthesize the name from the Type subclass (except for QualifiedTypeLoc).
548     if (TL->getTypeLocClass() == TypeLoc::Qualified)
549       OS << "QualifiedTypeLoc";
550     else
551       OS << TL->getType()->getTypeClassName() << "TypeLoc";
552   } else {
553     OS << N.getNodeKind().asStringRef();
554   }
555 }
556 
557 #ifndef NDEBUG
printNodeToString(const DynTypedNode & N,const PrintingPolicy & PP)558 std::string printNodeToString(const DynTypedNode &N, const PrintingPolicy &PP) {
559   std::string S;
560   llvm::raw_string_ostream OS(S);
561   printNodeKind(OS, N);
562   return std::move(OS.str());
563 }
564 #endif
565 
isImplicit(const Stmt * S)566 bool isImplicit(const Stmt *S) {
567   // Some Stmts are implicit and shouldn't be traversed, but there's no
568   // "implicit" attribute on Stmt/Expr.
569   // Unwrap implicit casts first if present (other nodes too?).
570   if (auto *ICE = llvm::dyn_cast<ImplicitCastExpr>(S))
571     S = ICE->getSubExprAsWritten();
572   // Implicit this in a MemberExpr is not filtered out by RecursiveASTVisitor.
573   // It would be nice if RAV handled this (!shouldTraverseImplicitCode()).
574   if (auto *CTI = llvm::dyn_cast<CXXThisExpr>(S))
575     if (CTI->isImplicit())
576       return true;
577   // Make sure implicit access of anonymous structs don't end up owning tokens.
578   if (auto *ME = llvm::dyn_cast<MemberExpr>(S)) {
579     if (auto *FD = llvm::dyn_cast<FieldDecl>(ME->getMemberDecl()))
580       if (FD->isAnonymousStructOrUnion())
581         // If Base is an implicit CXXThis, then the whole MemberExpr has no
582         // tokens. If it's a normal e.g. DeclRef, we treat the MemberExpr like
583         // an implicit cast.
584         return isImplicit(ME->getBase());
585   }
586   // Refs to operator() and [] are (almost?) always implicit as part of calls.
587   if (auto *DRE = llvm::dyn_cast<DeclRefExpr>(S)) {
588     if (auto *FD = llvm::dyn_cast<FunctionDecl>(DRE->getDecl())) {
589       switch (FD->getOverloadedOperator()) {
590       case OO_Call:
591       case OO_Subscript:
592         return true;
593       default:
594         break;
595       }
596     }
597   }
598   return false;
599 }
600 
601 // We find the selection by visiting written nodes in the AST, looking for nodes
602 // that intersect with the selected character range.
603 //
604 // While traversing, we maintain a parent stack. As nodes pop off the stack,
605 // we decide whether to keep them or not. To be kept, they must either be
606 // selected or contain some nodes that are.
607 //
608 // For simple cases (not inside macros) we prune subtrees that don't intersect.
609 class SelectionVisitor : public RecursiveASTVisitor<SelectionVisitor> {
610 public:
611   // Runs the visitor to gather selected nodes and their ancestors.
612   // If there is any selection, the root (TUDecl) is the first node.
collect(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned Begin,unsigned End,FileID File)613   static std::deque<Node> collect(ASTContext &AST,
614                                   const syntax::TokenBuffer &Tokens,
615                                   const PrintingPolicy &PP, unsigned Begin,
616                                   unsigned End, FileID File) {
617     SelectionVisitor V(AST, Tokens, PP, Begin, End, File);
618     V.TraverseAST(AST);
619     assert(V.Stack.size() == 1 && "Unpaired push/pop?");
620     assert(V.Stack.top() == &V.Nodes.front());
621     return std::move(V.Nodes);
622   }
623 
624   // We traverse all "well-behaved" nodes the same way:
625   //  - push the node onto the stack
626   //  - traverse its children recursively
627   //  - pop it from the stack
628   //  - hit testing: is intersection(node, selection) - union(children) empty?
629   //  - attach it to the tree if it or any children hit the selection
630   //
631   // Two categories of nodes are not "well-behaved":
632   //  - those without source range information, we don't record those
633   //  - those that can't be stored in DynTypedNode.
TraverseDecl(Decl * X)634   bool TraverseDecl(Decl *X) {
635     if (llvm::isa_and_nonnull<TranslationUnitDecl>(X))
636       return Base::TraverseDecl(X); // Already pushed by constructor.
637     // Base::TraverseDecl will suppress children, but not this node itself.
638     if (X && X->isImplicit()) {
639       // Most implicit nodes have only implicit children and can be skipped.
640       // However there are exceptions (`void foo(Concept auto x)`), and
641       // the base implementation knows how to find them.
642       return Base::TraverseDecl(X);
643     }
644     return traverseNode(X, [&] { return Base::TraverseDecl(X); });
645   }
TraverseTypeLoc(TypeLoc X)646   bool TraverseTypeLoc(TypeLoc X) {
647     return traverseNode(&X, [&] { return Base::TraverseTypeLoc(X); });
648   }
TraverseTemplateArgumentLoc(const TemplateArgumentLoc & X)649   bool TraverseTemplateArgumentLoc(const TemplateArgumentLoc &X) {
650     return traverseNode(&X,
651                         [&] { return Base::TraverseTemplateArgumentLoc(X); });
652   }
TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X)653   bool TraverseNestedNameSpecifierLoc(NestedNameSpecifierLoc X) {
654     return traverseNode(
655         &X, [&] { return Base::TraverseNestedNameSpecifierLoc(X); });
656   }
TraverseConstructorInitializer(CXXCtorInitializer * X)657   bool TraverseConstructorInitializer(CXXCtorInitializer *X) {
658     return traverseNode(
659         X, [&] { return Base::TraverseConstructorInitializer(X); });
660   }
TraverseCXXBaseSpecifier(const CXXBaseSpecifier & X)661   bool TraverseCXXBaseSpecifier(const CXXBaseSpecifier &X) {
662     return traverseNode(&X, [&] { return Base::TraverseCXXBaseSpecifier(X); });
663   }
TraverseAttr(Attr * X)664   bool TraverseAttr(Attr *X) {
665     return traverseNode(X, [&] { return Base::TraverseAttr(X); });
666   }
TraverseConceptReference(ConceptReference * X)667   bool TraverseConceptReference(ConceptReference *X) {
668     return traverseNode(X, [&] { return Base::TraverseConceptReference(X); });
669   }
670   // Stmt is the same, but this form allows the data recursion optimization.
dataTraverseStmtPre(Stmt * X)671   bool dataTraverseStmtPre(Stmt *X) {
672     if (!X || isImplicit(X))
673       return false;
674     auto N = DynTypedNode::create(*X);
675     if (canSafelySkipNode(N))
676       return false;
677     push(std::move(N));
678     if (shouldSkipChildren(X)) {
679       pop();
680       return false;
681     }
682     return true;
683   }
dataTraverseStmtPost(Stmt * X)684   bool dataTraverseStmtPost(Stmt *X) {
685     pop();
686     return true;
687   }
688   // QualifiedTypeLoc is handled strangely in RecursiveASTVisitor: the derived
689   // TraverseTypeLoc is not called for the inner UnqualTypeLoc.
690   // This means we'd never see 'int' in 'const int'! Work around that here.
691   // (The reason for the behavior is to avoid traversing the nested Type twice,
692   // but we ignore TraverseType anyway).
TraverseQualifiedTypeLoc(QualifiedTypeLoc QX)693   bool TraverseQualifiedTypeLoc(QualifiedTypeLoc QX) {
694     return traverseNode<TypeLoc>(
695         &QX, [&] { return TraverseTypeLoc(QX.getUnqualifiedLoc()); });
696   }
TraverseObjCProtocolLoc(ObjCProtocolLoc PL)697   bool TraverseObjCProtocolLoc(ObjCProtocolLoc PL) {
698     return traverseNode(&PL, [&] { return Base::TraverseObjCProtocolLoc(PL); });
699   }
700   // Uninteresting parts of the AST that don't have locations within them.
TraverseNestedNameSpecifier(NestedNameSpecifier *)701   bool TraverseNestedNameSpecifier(NestedNameSpecifier *) { return true; }
TraverseType(QualType)702   bool TraverseType(QualType) { return true; }
703 
704   // The DeclStmt for the loop variable claims to cover the whole range
705   // inside the parens, this causes the range-init expression to not be hit.
706   // Traverse the loop VarDecl instead, which has the right source range.
TraverseCXXForRangeStmt(CXXForRangeStmt * S)707   bool TraverseCXXForRangeStmt(CXXForRangeStmt *S) {
708     return traverseNode(S, [&] {
709       return TraverseStmt(S->getInit()) && TraverseDecl(S->getLoopVariable()) &&
710              TraverseStmt(S->getRangeInit()) && TraverseStmt(S->getBody());
711     });
712   }
713   // OpaqueValueExpr blocks traversal, we must explicitly traverse it.
TraverseOpaqueValueExpr(OpaqueValueExpr * E)714   bool TraverseOpaqueValueExpr(OpaqueValueExpr *E) {
715     return traverseNode(E, [&] { return TraverseStmt(E->getSourceExpr()); });
716   }
717   // We only want to traverse the *syntactic form* to understand the selection.
TraversePseudoObjectExpr(PseudoObjectExpr * E)718   bool TraversePseudoObjectExpr(PseudoObjectExpr *E) {
719     return traverseNode(E, [&] { return TraverseStmt(E->getSyntacticForm()); });
720   }
TraverseTypeConstraint(const TypeConstraint * C)721   bool TraverseTypeConstraint(const TypeConstraint *C) {
722     if (auto *E = C->getImmediatelyDeclaredConstraint()) {
723       // Technically this expression is 'implicit' and not traversed by the RAV.
724       // However, the range is correct, so we visit expression to avoid adding
725       // an extra kind to 'DynTypeNode' that hold 'TypeConstraint'.
726       return TraverseStmt(E);
727     }
728     return Base::TraverseTypeConstraint(C);
729   }
730 
731   // Override child traversal for certain node types.
732   using RecursiveASTVisitor::getStmtChildren;
733   // PredefinedExpr like __func__ has a StringLiteral child for its value.
734   // It's not written, so don't traverse it.
getStmtChildren(PredefinedExpr *)735   Stmt::child_range getStmtChildren(PredefinedExpr *) {
736     return {StmtIterator{}, StmtIterator{}};
737   }
738 
739 private:
740   using Base = RecursiveASTVisitor<SelectionVisitor>;
741 
SelectionVisitor(ASTContext & AST,const syntax::TokenBuffer & Tokens,const PrintingPolicy & PP,unsigned SelBegin,unsigned SelEnd,FileID SelFile)742   SelectionVisitor(ASTContext &AST, const syntax::TokenBuffer &Tokens,
743                    const PrintingPolicy &PP, unsigned SelBegin, unsigned SelEnd,
744                    FileID SelFile)
745       : SM(AST.getSourceManager()), LangOpts(AST.getLangOpts()),
746 #ifndef NDEBUG
747         PrintPolicy(PP),
748 #endif
749         TokenBuf(Tokens), SelChecker(Tokens, SelFile, SelBegin, SelEnd, SM),
750         UnclaimedExpandedTokens(Tokens.expandedTokens()) {
751     // Ensure we have a node for the TU decl, regardless of traversal scope.
752     Nodes.emplace_back();
753     Nodes.back().ASTNode = DynTypedNode::create(*AST.getTranslationUnitDecl());
754     Nodes.back().Parent = nullptr;
755     Nodes.back().Selected = SelectionTree::Unselected;
756     Stack.push(&Nodes.back());
757   }
758 
759   // Generic case of TraverseFoo. Func should be the call to Base::TraverseFoo.
760   // Node is always a pointer so the generic code can handle any null checks.
761   template <typename T, typename Func>
traverseNode(T * Node,const Func & Body)762   bool traverseNode(T *Node, const Func &Body) {
763     if (Node == nullptr)
764       return true;
765     auto N = DynTypedNode::create(*Node);
766     if (canSafelySkipNode(N))
767       return true;
768     push(DynTypedNode::create(*Node));
769     bool Ret = Body();
770     pop();
771     return Ret;
772   }
773 
774   // HIT TESTING
775   //
776   // We do rough hit testing on the way down the tree to avoid traversing
777   // subtrees that don't touch the selection (canSafelySkipNode), but
778   // fine-grained hit-testing is mostly done on the way back up (in pop()).
779   // This means children get to claim parts of the selection first, and parents
780   // are only selected if they own tokens that no child owned.
781   //
782   // Nodes *usually* nest nicely: a child's getSourceRange() lies within the
783   // parent's, and a node (transitively) owns all tokens in its range.
784   //
785   // Exception 1: when declarators nest, *inner* declarator is the *outer* type.
786   //              e.g. void foo[5](int) is an array of functions.
787   // To handle this case, declarators are careful to only claim the tokens they
788   // own, rather than claim a range and rely on claim ordering.
789   //
790   // Exception 2: siblings both claim the same node.
791   //              e.g. `int x, y;` produces two sibling VarDecls.
792   //                    ~~~~~ x
793   //                    ~~~~~~~~ y
794   // Here the first ("leftmost") sibling claims the tokens it wants, and the
795   // other sibling gets what's left. So selecting "int" only includes the left
796   // VarDecl in the selection tree.
797 
798   // An optimization for a common case: nodes outside macro expansions that
799   // don't intersect the selection may be recursively skipped.
canSafelySkipNode(const DynTypedNode & N)800   bool canSafelySkipNode(const DynTypedNode &N) {
801     SourceRange S = getSourceRange(N);
802     if (auto *TL = N.get<TypeLoc>()) {
803       // FIXME: TypeLoc::getBeginLoc()/getEndLoc() are pretty fragile
804       // heuristics. We should consider only pruning critical TypeLoc nodes, to
805       // be more robust.
806 
807       // AttributedTypeLoc may point to the attribute's range, NOT the modified
808       // type's range.
809       if (auto AT = TL->getAs<AttributedTypeLoc>())
810         S = AT.getModifiedLoc().getSourceRange();
811     }
812     // SourceRange often doesn't manage to accurately cover attributes.
813     // Fortunately, attributes are rare.
814     if (llvm::any_of(getAttributes(N),
815                      [](const Attr *A) { return !A->isImplicit(); }))
816       return false;
817     if (!SelChecker.mayHit(S)) {
818       dlog("{2}skip: {0} {1}", printNodeToString(N, PrintPolicy),
819            S.printToString(SM), indent());
820       return true;
821     }
822     return false;
823   }
824 
825   // There are certain nodes we want to treat as leaves in the SelectionTree,
826   // although they do have children.
shouldSkipChildren(const Stmt * X) const827   bool shouldSkipChildren(const Stmt *X) const {
828     // UserDefinedLiteral (e.g. 12_i) has two children (12 and _i).
829     // Unfortunately TokenBuffer sees 12_i as one token and can't split it.
830     // So we treat UserDefinedLiteral as a leaf node, owning the token.
831     return llvm::isa<UserDefinedLiteral>(X);
832   }
833 
834   // Pushes a node onto the ancestor stack. Pairs with pop().
835   // Performs early hit detection for some nodes (on the earlySourceRange).
push(DynTypedNode Node)836   void push(DynTypedNode Node) {
837     SourceRange Early = earlySourceRange(Node);
838     dlog("{2}push: {0} {1}", printNodeToString(Node, PrintPolicy),
839          Node.getSourceRange().printToString(SM), indent());
840     Nodes.emplace_back();
841     Nodes.back().ASTNode = std::move(Node);
842     Nodes.back().Parent = Stack.top();
843     Nodes.back().Selected = NoTokens;
844     Stack.push(&Nodes.back());
845     claimRange(Early, Nodes.back().Selected);
846   }
847 
848   // Pops a node off the ancestor stack, and finalizes it. Pairs with push().
849   // Performs primary hit detection.
pop()850   void pop() {
851     Node &N = *Stack.top();
852     dlog("{1}pop: {0}", printNodeToString(N.ASTNode, PrintPolicy), indent(-1));
853     claimTokensFor(N.ASTNode, N.Selected);
854     if (N.Selected == NoTokens)
855       N.Selected = SelectionTree::Unselected;
856     if (N.Selected || !N.Children.empty()) {
857       // Attach to the tree.
858       N.Parent->Children.push_back(&N);
859     } else {
860       // Neither N any children are selected, it doesn't belong in the tree.
861       assert(&N == &Nodes.back());
862       Nodes.pop_back();
863     }
864     Stack.pop();
865   }
866 
867   // Returns the range of tokens that this node will claim directly, and
868   // is not available to the node's children.
869   // Usually empty, but sometimes children cover tokens but shouldn't own them.
earlySourceRange(const DynTypedNode & N)870   SourceRange earlySourceRange(const DynTypedNode &N) {
871     if (const Decl *VD = N.get<VarDecl>()) {
872       // We want the name in the var-decl to be claimed by the decl itself and
873       // not by any children. Ususally, we don't need this, because source
874       // ranges of children are not overlapped with their parent's.
875       // An exception is lambda captured var decl, where AutoTypeLoc is
876       // overlapped with the name loc.
877       //    auto fun = [bar = foo]() { ... }
878       //                ~~~~~~~~~   VarDecl
879       //                ~~~         |- AutoTypeLoc
880       return VD->getLocation();
881     }
882 
883     // When referring to a destructor ~Foo(), attribute Foo to the destructor
884     // rather than the TypeLoc nested inside it.
885     // We still traverse the TypeLoc, because it may contain other targeted
886     // things like the T in ~Foo<T>().
887     if (const auto *CDD = N.get<CXXDestructorDecl>())
888       return CDD->getNameInfo().getNamedTypeInfo()->getTypeLoc().getBeginLoc();
889     if (const auto *ME = N.get<MemberExpr>()) {
890       auto NameInfo = ME->getMemberNameInfo();
891       if (NameInfo.getName().getNameKind() ==
892           DeclarationName::CXXDestructorName)
893         return NameInfo.getNamedTypeInfo()->getTypeLoc().getBeginLoc();
894     }
895 
896     return SourceRange();
897   }
898 
899   // Claim tokens for N, after processing its children.
900   // By default this claims all unclaimed tokens in getSourceRange().
901   // We override this if we want to claim fewer tokens (e.g. there are gaps).
claimTokensFor(const DynTypedNode & N,SelectionTree::Selection & Result)902   void claimTokensFor(const DynTypedNode &N, SelectionTree::Selection &Result) {
903     // CXXConstructExpr often shows implicit construction, like `string s;`.
904     // Don't associate any tokens with it unless there's some syntax like {}.
905     // This prevents it from claiming 's', its primary location.
906     if (const auto *CCE = N.get<CXXConstructExpr>()) {
907       claimRange(CCE->getParenOrBraceRange(), Result);
908       return;
909     }
910     // ExprWithCleanups is always implicit. It often wraps CXXConstructExpr.
911     // Prevent it claiming 's' in the case above.
912     if (N.get<ExprWithCleanups>())
913       return;
914 
915     // Declarators nest "inside out", with parent types inside child ones.
916     // Instead of claiming the whole range (clobbering parent tokens), carefully
917     // claim the tokens owned by this node and non-declarator children.
918     // (We could manipulate traversal order instead, but this is easier).
919     //
920     // Non-declarator types nest normally, and are handled like other nodes.
921     //
922     // Example:
923     //   Vec<R<int>(*[2])(A<char>)> is a Vec of arrays of pointers to functions,
924     //                              which accept A<char> and return R<int>.
925     // The TypeLoc hierarchy:
926     //   Vec<R<int>(*[2])(A<char>)> m;
927     //   Vec<#####################>      TemplateSpecialization Vec
928     //       --------[2]----------       `-Array
929     //       -------*-------------         `-Pointer
930     //       ------(----)---------           `-Paren
931     //       ------------(#######)             `-Function
932     //       R<###>                              |-TemplateSpecialization R
933     //         int                               | `-Builtin int
934     //                    A<####>                `-TemplateSpecialization A
935     //                      char                   `-Builtin char
936     //
937     // In each row
938     //   --- represents unclaimed parts of the SourceRange.
939     //   ### represents parts that children already claimed.
940     if (const auto *TL = N.get<TypeLoc>()) {
941       if (auto PTL = TL->getAs<ParenTypeLoc>()) {
942         claimRange(PTL.getLParenLoc(), Result);
943         claimRange(PTL.getRParenLoc(), Result);
944         return;
945       }
946       if (auto ATL = TL->getAs<ArrayTypeLoc>()) {
947         claimRange(ATL.getBracketsRange(), Result);
948         return;
949       }
950       if (auto PTL = TL->getAs<PointerTypeLoc>()) {
951         claimRange(PTL.getStarLoc(), Result);
952         return;
953       }
954       if (auto FTL = TL->getAs<FunctionTypeLoc>()) {
955         claimRange(SourceRange(FTL.getLParenLoc(), FTL.getEndLoc()), Result);
956         return;
957       }
958     }
959     claimRange(getSourceRange(N), Result);
960   }
961 
962   // Perform hit-testing of a complete Node against the selection.
963   // This runs for every node in the AST, and must be fast in common cases.
964   // This is usually called from pop(), so we can take children into account.
965   // The existing state of Result is relevant.
claimRange(SourceRange S,SelectionTree::Selection & Result)966   void claimRange(SourceRange S, SelectionTree::Selection &Result) {
967     for (const auto &ClaimedRange :
968          UnclaimedExpandedTokens.erase(TokenBuf.expandedTokens(S)))
969       update(Result, SelChecker.test(ClaimedRange));
970 
971     if (Result && Result != NoTokens)
972       dlog("{1}hit selection: {0}", S.printToString(SM), indent());
973   }
974 
indent(int Offset=0)975   std::string indent(int Offset = 0) {
976     // Cast for signed arithmetic.
977     int Amount = int(Stack.size()) + Offset;
978     assert(Amount >= 0);
979     return std::string(Amount, ' ');
980   }
981 
982   SourceManager &SM;
983   const LangOptions &LangOpts;
984 #ifndef NDEBUG
985   const PrintingPolicy &PrintPolicy;
986 #endif
987   const syntax::TokenBuffer &TokenBuf;
988   std::stack<Node *> Stack;
989   SelectionTester SelChecker;
990   IntervalSet<syntax::Token> UnclaimedExpandedTokens;
991   std::deque<Node> Nodes; // Stable pointers as we add more nodes.
992 };
993 
994 } // namespace
995 
abbreviatedString(DynTypedNode N,const PrintingPolicy & PP)996 llvm::SmallString<256> abbreviatedString(DynTypedNode N,
997                                          const PrintingPolicy &PP) {
998   llvm::SmallString<256> Result;
999   {
1000     llvm::raw_svector_ostream OS(Result);
1001     N.print(OS, PP);
1002   }
1003   auto Pos = Result.find('\n');
1004   if (Pos != llvm::StringRef::npos) {
1005     bool MoreText = !llvm::all_of(Result.str().drop_front(Pos), llvm::isSpace);
1006     Result.resize(Pos);
1007     if (MoreText)
1008       Result.append(" …");
1009   }
1010   return Result;
1011 }
1012 
print(llvm::raw_ostream & OS,const SelectionTree::Node & N,int Indent) const1013 void SelectionTree::print(llvm::raw_ostream &OS, const SelectionTree::Node &N,
1014                           int Indent) const {
1015   if (N.Selected)
1016     OS.indent(Indent - 1) << (N.Selected == SelectionTree::Complete ? '*'
1017                                                                     : '.');
1018   else
1019     OS.indent(Indent);
1020   printNodeKind(OS, N.ASTNode);
1021   OS << ' ' << abbreviatedString(N.ASTNode, PrintPolicy) << "\n";
1022   for (const Node *Child : N.Children)
1023     print(OS, *Child, Indent + 2);
1024 }
1025 
kind() const1026 std::string SelectionTree::Node::kind() const {
1027   std::string S;
1028   llvm::raw_string_ostream OS(S);
1029   printNodeKind(OS, ASTNode);
1030   return std::move(OS.str());
1031 }
1032 
1033 // Decide which selections emulate a "point" query in between characters.
1034 // If it's ambiguous (the neighboring characters are selectable tokens), returns
1035 // both possibilities in preference order.
1036 // Always returns at least one range - if no tokens touched, and empty range.
1037 static llvm::SmallVector<std::pair<unsigned, unsigned>, 2>
pointBounds(unsigned Offset,const syntax::TokenBuffer & Tokens)1038 pointBounds(unsigned Offset, const syntax::TokenBuffer &Tokens) {
1039   const auto &SM = Tokens.sourceManager();
1040   SourceLocation Loc = SM.getComposedLoc(SM.getMainFileID(), Offset);
1041   llvm::SmallVector<std::pair<unsigned, unsigned>, 2> Result;
1042   // Prefer right token over left.
1043   for (const syntax::Token &Tok :
1044        llvm::reverse(spelledTokensTouching(Loc, Tokens))) {
1045     if (shouldIgnore(Tok))
1046       continue;
1047     unsigned Offset = Tokens.sourceManager().getFileOffset(Tok.location());
1048     Result.emplace_back(Offset, Offset + Tok.length());
1049   }
1050   if (Result.empty())
1051     Result.emplace_back(Offset, Offset);
1052   return Result;
1053 }
1054 
createEach(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Begin,unsigned End,llvm::function_ref<bool (SelectionTree)> Func)1055 bool SelectionTree::createEach(ASTContext &AST,
1056                                const syntax::TokenBuffer &Tokens,
1057                                unsigned Begin, unsigned End,
1058                                llvm::function_ref<bool(SelectionTree)> Func) {
1059   if (Begin != End)
1060     return Func(SelectionTree(AST, Tokens, Begin, End));
1061   for (std::pair<unsigned, unsigned> Bounds : pointBounds(Begin, Tokens))
1062     if (Func(SelectionTree(AST, Tokens, Bounds.first, Bounds.second)))
1063       return true;
1064   return false;
1065 }
1066 
createRight(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned int Begin,unsigned int End)1067 SelectionTree SelectionTree::createRight(ASTContext &AST,
1068                                          const syntax::TokenBuffer &Tokens,
1069                                          unsigned int Begin, unsigned int End) {
1070   std::optional<SelectionTree> Result;
1071   createEach(AST, Tokens, Begin, End, [&](SelectionTree T) {
1072     Result = std::move(T);
1073     return true;
1074   });
1075   return std::move(*Result);
1076 }
1077 
SelectionTree(ASTContext & AST,const syntax::TokenBuffer & Tokens,unsigned Begin,unsigned End)1078 SelectionTree::SelectionTree(ASTContext &AST, const syntax::TokenBuffer &Tokens,
1079                              unsigned Begin, unsigned End)
1080     : PrintPolicy(AST.getLangOpts()) {
1081   // No fundamental reason the selection needs to be in the main file,
1082   // but that's all clangd has needed so far.
1083   const SourceManager &SM = AST.getSourceManager();
1084   FileID FID = SM.getMainFileID();
1085   PrintPolicy.TerseOutput = true;
1086   PrintPolicy.IncludeNewlines = false;
1087 
1088   dlog("Computing selection for {0}",
1089        SourceRange(SM.getComposedLoc(FID, Begin), SM.getComposedLoc(FID, End))
1090            .printToString(SM));
1091   Nodes = SelectionVisitor::collect(AST, Tokens, PrintPolicy, Begin, End, FID);
1092   Root = Nodes.empty() ? nullptr : &Nodes.front();
1093   recordMetrics(*this, AST.getLangOpts());
1094   dlog("Built selection tree\n{0}", *this);
1095 }
1096 
commonAncestor() const1097 const Node *SelectionTree::commonAncestor() const {
1098   const Node *Ancestor = Root;
1099   while (Ancestor->Children.size() == 1 && !Ancestor->Selected)
1100     Ancestor = Ancestor->Children.front();
1101   // Returning nullptr here is a bit unprincipled, but it makes the API safer:
1102   // the TranslationUnitDecl contains all of the preamble, so traversing it is a
1103   // performance cliff. Callers can check for null and use root() if they want.
1104   return Ancestor != Root ? Ancestor : nullptr;
1105 }
1106 
getDeclContext() const1107 const DeclContext &SelectionTree::Node::getDeclContext() const {
1108   for (const Node *CurrentNode = this; CurrentNode != nullptr;
1109        CurrentNode = CurrentNode->Parent) {
1110     if (const Decl *Current = CurrentNode->ASTNode.get<Decl>()) {
1111       if (CurrentNode != this)
1112         if (auto *DC = dyn_cast<DeclContext>(Current))
1113           return *DC;
1114       return *Current->getLexicalDeclContext();
1115     }
1116     if (const auto *LE = CurrentNode->ASTNode.get<LambdaExpr>())
1117       if (CurrentNode != this)
1118         return *LE->getCallOperator();
1119   }
1120   llvm_unreachable("A tree must always be rooted at TranslationUnitDecl.");
1121 }
1122 
ignoreImplicit() const1123 const SelectionTree::Node &SelectionTree::Node::ignoreImplicit() const {
1124   if (Children.size() == 1 &&
1125       getSourceRange(Children.front()->ASTNode) == getSourceRange(ASTNode))
1126     return Children.front()->ignoreImplicit();
1127   return *this;
1128 }
1129 
outerImplicit() const1130 const SelectionTree::Node &SelectionTree::Node::outerImplicit() const {
1131   if (Parent && getSourceRange(Parent->ASTNode) == getSourceRange(ASTNode))
1132     return Parent->outerImplicit();
1133   return *this;
1134 }
1135 
1136 } // namespace clangd
1137 } // namespace clang
1138