xref: /llvm-project/clang-tools-extra/clangd/support/Bracket.cpp (revision ed8f78827895050442f544edef2933a60d4a7935)
1 //===--- Bracket.cpp - Analyze bracket structure --------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The basic phases of our bracket matching are:
10 //
11 // 1) A simple "greedy" match looks for well-nested subsequences.
12 //
13 //    We can't fully trust the results of this, consider:
14 //      while (1) {   // A
15 //        if (true) { // B
16 //          break;
17 //      }             // C
18 //    Greedy matching will match B=C, when we should at least consider A=C.
19 //    However for the correct parts of the file, the greedy match gives the
20 //    right answer. It produces useful candidates for phase 2.
21 //
22 //    simplePairBrackets handles this step.
23 //
24 // 2) Try to identify places where formatting indicates that the greedy match
25 //    was correct. This is similar to how a human would scan a large file.
26 //
27 //    For example:
28 //      int foo() {      // X
29 //        // indented
30 //        while (1) {
31 //          // valid code
32 //        }
33 //        return bar(42);
34 //      }                // Y
35 //    We can "verify" that X..Y looks like a braced block, and the greedy match
36 //    tells us that substring is perfectly nested.
37 //    We trust the pairings of those brackets and don't examine them further.
38 //    However in the first example above, we do not trust B=C because the brace
39 //    indentation is suspect.
40 //
41 //    FIXME: implement this step.
42 //
43 // 3) Run full best-match optimization on remaining brackets.
44 //
45 //    Conceptually, this considers all possible matchings and optimizes cost:
46 //      - there is a cost for failing to match a bracket
47 //      - there is a variable cost for matching two brackets.
48 //        (For example if brace indentation doesn't match).
49 //
50 //    In the first example we have three alternatives, and they are ranked:
51 //      1) A=C, skip B
52 //      2) B=C, skip A
53 //      3) skip A, skip B, skip C
54 //    The cost for skipping a bracket is high, so option 3 is worst.
55 //    B=C costs more than A=C, because the indentation doesn't match.
56 //
57 //    It would be correct to run this step alone, but it would be too slow.
58 //    The implementation is dynamic programming in N^3 space and N^2 time.
59 //    Having earlier steps filter out most brackets is key to performance.
60 //
61 //    FIXME: implement this step.
62 //
63 //===----------------------------------------------------------------------===//
64 
65 #include "Bracket.h"
66 
67 namespace clang {
68 namespace clangd {
69 namespace {
70 
71 struct Bracket {
72   using Index = unsigned;
73   constexpr static Index None = -1;
74 
75   enum BracketKind : char { Paren, Brace, Square } Kind;
76   enum Direction : bool { Open, Close } Dir;
77   unsigned Line;
78   unsigned Indent;
79   Token::Index Tok;
80   Bracket::Index Pair = None;
81 };
82 
83 // Find brackets in the stream and convert to Bracket struct.
84 std::vector<Bracket> findBrackets(const TokenStream &Stream) {
85   std::vector<Bracket> Brackets;
86   auto Add = [&](const Token &Tok, Bracket::BracketKind K,
87                  Bracket::Direction D) {
88     Brackets.push_back(
89         {K, D, Tok.Line, Tok.Indent, Stream.index(Tok), Bracket::None});
90   };
91   for (const auto &Tok : Stream.tokens()) {
92     switch (Tok.Kind) {
93     case clang::tok::l_paren:
94       Add(Tok, Bracket::Paren, Bracket::Open);
95       break;
96     case clang::tok::r_paren:
97       Add(Tok, Bracket::Paren, Bracket::Close);
98       break;
99     case clang::tok::l_brace:
100       Add(Tok, Bracket::Brace, Bracket::Open);
101       break;
102     case clang::tok::r_brace:
103       Add(Tok, Bracket::Brace, Bracket::Close);
104       break;
105     case clang::tok::l_square:
106       Add(Tok, Bracket::Square, Bracket::Open);
107       break;
108     case clang::tok::r_square:
109       Add(Tok, Bracket::Square, Bracket::Close);
110       break;
111     default:
112       break;
113     }
114   }
115   return Brackets;
116 }
117 
118 // Write the bracket pairings from Brackets back to Tokens.
119 void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) {
120   for (const auto &B : Brackets)
121     Tokens.tokens()[B.Tok].Pair =
122         (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok;
123 }
124 
125 // Find perfect pairings (ignoring whitespace) via greedy algorithm.
126 // This means two brackets are paired if they match and the brackets between
127 // them nest perfectly, with no skipped or crossed brackets.
128 void simplePairBrackets(MutableArrayRef<Bracket> Brackets) {
129   std::vector<unsigned> Stack;
130   for (unsigned I = 0; I < Brackets.size(); ++I) {
131     if (Brackets[I].Dir == Bracket::Open) {
132       Stack.push_back(I);
133     } else if (!Stack.empty() &&
134                Brackets[Stack.back()].Kind == Brackets[I].Kind) {
135       Brackets[Stack.back()].Pair = I;
136       Brackets[I].Pair = Stack.back();
137       Stack.pop_back();
138     } else {
139       // Unpaired closer, no brackets on stack are part of a perfect sequence.
140       Stack.clear();
141     }
142   }
143   // Any remaining brackets on the stack stay unpaired.
144 }
145 
146 } // namespace
147 
148 void pairBrackets(TokenStream &Stream) {
149   auto Brackets = findBrackets(Stream);
150   simplePairBrackets(Brackets);
151   applyPairings(Brackets, Stream);
152 }
153 
154 } // namespace clangd
155 } // namespace clang
156