xref: /llvm-project/clang-tools-extra/clangd/support/Bracket.cpp (revision ed8f78827895050442f544edef2933a60d4a7935)
1*ed8f7882SAaron Ballman //===--- Bracket.cpp - Analyze bracket structure --------------------------===//
2*ed8f7882SAaron Ballman //
3*ed8f7882SAaron Ballman // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*ed8f7882SAaron Ballman // See https://llvm.org/LICENSE.txt for license information.
5*ed8f7882SAaron Ballman // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*ed8f7882SAaron Ballman //
7*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===//
8*ed8f7882SAaron Ballman //
9*ed8f7882SAaron Ballman // The basic phases of our bracket matching are:
10*ed8f7882SAaron Ballman //
11*ed8f7882SAaron Ballman // 1) A simple "greedy" match looks for well-nested subsequences.
12*ed8f7882SAaron Ballman //
13*ed8f7882SAaron Ballman //    We can't fully trust the results of this, consider:
14*ed8f7882SAaron Ballman //      while (1) {   // A
15*ed8f7882SAaron Ballman //        if (true) { // B
16*ed8f7882SAaron Ballman //          break;
17*ed8f7882SAaron Ballman //      }             // C
18*ed8f7882SAaron Ballman //    Greedy matching will match B=C, when we should at least consider A=C.
19*ed8f7882SAaron Ballman //    However for the correct parts of the file, the greedy match gives the
20*ed8f7882SAaron Ballman //    right answer. It produces useful candidates for phase 2.
21*ed8f7882SAaron Ballman //
22*ed8f7882SAaron Ballman //    simplePairBrackets handles this step.
23*ed8f7882SAaron Ballman //
24*ed8f7882SAaron Ballman // 2) Try to identify places where formatting indicates that the greedy match
25*ed8f7882SAaron Ballman //    was correct. This is similar to how a human would scan a large file.
26*ed8f7882SAaron Ballman //
27*ed8f7882SAaron Ballman //    For example:
28*ed8f7882SAaron Ballman //      int foo() {      // X
29*ed8f7882SAaron Ballman //        // indented
30*ed8f7882SAaron Ballman //        while (1) {
31*ed8f7882SAaron Ballman //          // valid code
32*ed8f7882SAaron Ballman //        }
33*ed8f7882SAaron Ballman //        return bar(42);
34*ed8f7882SAaron Ballman //      }                // Y
35*ed8f7882SAaron Ballman //    We can "verify" that X..Y looks like a braced block, and the greedy match
36*ed8f7882SAaron Ballman //    tells us that substring is perfectly nested.
37*ed8f7882SAaron Ballman //    We trust the pairings of those brackets and don't examine them further.
38*ed8f7882SAaron Ballman //    However in the first example above, we do not trust B=C because the brace
39*ed8f7882SAaron Ballman //    indentation is suspect.
40*ed8f7882SAaron Ballman //
41*ed8f7882SAaron Ballman //    FIXME: implement this step.
42*ed8f7882SAaron Ballman //
43*ed8f7882SAaron Ballman // 3) Run full best-match optimization on remaining brackets.
44*ed8f7882SAaron Ballman //
45*ed8f7882SAaron Ballman //    Conceptually, this considers all possible matchings and optimizes cost:
46*ed8f7882SAaron Ballman //      - there is a cost for failing to match a bracket
47*ed8f7882SAaron Ballman //      - there is a variable cost for matching two brackets.
48*ed8f7882SAaron Ballman //        (For example if brace indentation doesn't match).
49*ed8f7882SAaron Ballman //
50*ed8f7882SAaron Ballman //    In the first example we have three alternatives, and they are ranked:
51*ed8f7882SAaron Ballman //      1) A=C, skip B
52*ed8f7882SAaron Ballman //      2) B=C, skip A
53*ed8f7882SAaron Ballman //      3) skip A, skip B, skip C
54*ed8f7882SAaron Ballman //    The cost for skipping a bracket is high, so option 3 is worst.
55*ed8f7882SAaron Ballman //    B=C costs more than A=C, because the indentation doesn't match.
56*ed8f7882SAaron Ballman //
57*ed8f7882SAaron Ballman //    It would be correct to run this step alone, but it would be too slow.
58*ed8f7882SAaron Ballman //    The implementation is dynamic programming in N^3 space and N^2 time.
59*ed8f7882SAaron Ballman //    Having earlier steps filter out most brackets is key to performance.
60*ed8f7882SAaron Ballman //
61*ed8f7882SAaron Ballman //    FIXME: implement this step.
62*ed8f7882SAaron Ballman //
63*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===//
64*ed8f7882SAaron Ballman 
65*ed8f7882SAaron Ballman #include "Bracket.h"
66*ed8f7882SAaron Ballman 
67*ed8f7882SAaron Ballman namespace clang {
68*ed8f7882SAaron Ballman namespace clangd {
69*ed8f7882SAaron Ballman namespace {
70*ed8f7882SAaron Ballman 
71*ed8f7882SAaron Ballman struct Bracket {
72*ed8f7882SAaron Ballman   using Index = unsigned;
73*ed8f7882SAaron Ballman   constexpr static Index None = -1;
74*ed8f7882SAaron Ballman 
75*ed8f7882SAaron Ballman   enum BracketKind : char { Paren, Brace, Square } Kind;
76*ed8f7882SAaron Ballman   enum Direction : bool { Open, Close } Dir;
77*ed8f7882SAaron Ballman   unsigned Line;
78*ed8f7882SAaron Ballman   unsigned Indent;
79*ed8f7882SAaron Ballman   Token::Index Tok;
80*ed8f7882SAaron Ballman   Bracket::Index Pair = None;
81*ed8f7882SAaron Ballman };
82*ed8f7882SAaron Ballman 
83*ed8f7882SAaron Ballman // Find brackets in the stream and convert to Bracket struct.
84*ed8f7882SAaron Ballman std::vector<Bracket> findBrackets(const TokenStream &Stream) {
85*ed8f7882SAaron Ballman   std::vector<Bracket> Brackets;
86*ed8f7882SAaron Ballman   auto Add = [&](const Token &Tok, Bracket::BracketKind K,
87*ed8f7882SAaron Ballman                  Bracket::Direction D) {
88*ed8f7882SAaron Ballman     Brackets.push_back(
89*ed8f7882SAaron Ballman         {K, D, Tok.Line, Tok.Indent, Stream.index(Tok), Bracket::None});
90*ed8f7882SAaron Ballman   };
91*ed8f7882SAaron Ballman   for (const auto &Tok : Stream.tokens()) {
92*ed8f7882SAaron Ballman     switch (Tok.Kind) {
93*ed8f7882SAaron Ballman     case clang::tok::l_paren:
94*ed8f7882SAaron Ballman       Add(Tok, Bracket::Paren, Bracket::Open);
95*ed8f7882SAaron Ballman       break;
96*ed8f7882SAaron Ballman     case clang::tok::r_paren:
97*ed8f7882SAaron Ballman       Add(Tok, Bracket::Paren, Bracket::Close);
98*ed8f7882SAaron Ballman       break;
99*ed8f7882SAaron Ballman     case clang::tok::l_brace:
100*ed8f7882SAaron Ballman       Add(Tok, Bracket::Brace, Bracket::Open);
101*ed8f7882SAaron Ballman       break;
102*ed8f7882SAaron Ballman     case clang::tok::r_brace:
103*ed8f7882SAaron Ballman       Add(Tok, Bracket::Brace, Bracket::Close);
104*ed8f7882SAaron Ballman       break;
105*ed8f7882SAaron Ballman     case clang::tok::l_square:
106*ed8f7882SAaron Ballman       Add(Tok, Bracket::Square, Bracket::Open);
107*ed8f7882SAaron Ballman       break;
108*ed8f7882SAaron Ballman     case clang::tok::r_square:
109*ed8f7882SAaron Ballman       Add(Tok, Bracket::Square, Bracket::Close);
110*ed8f7882SAaron Ballman       break;
111*ed8f7882SAaron Ballman     default:
112*ed8f7882SAaron Ballman       break;
113*ed8f7882SAaron Ballman     }
114*ed8f7882SAaron Ballman   }
115*ed8f7882SAaron Ballman   return Brackets;
116*ed8f7882SAaron Ballman }
117*ed8f7882SAaron Ballman 
118*ed8f7882SAaron Ballman // Write the bracket pairings from Brackets back to Tokens.
119*ed8f7882SAaron Ballman void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) {
120*ed8f7882SAaron Ballman   for (const auto &B : Brackets)
121*ed8f7882SAaron Ballman     Tokens.tokens()[B.Tok].Pair =
122*ed8f7882SAaron Ballman         (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok;
123*ed8f7882SAaron Ballman }
124*ed8f7882SAaron Ballman 
125*ed8f7882SAaron Ballman // Find perfect pairings (ignoring whitespace) via greedy algorithm.
126*ed8f7882SAaron Ballman // This means two brackets are paired if they match and the brackets between
127*ed8f7882SAaron Ballman // them nest perfectly, with no skipped or crossed brackets.
128*ed8f7882SAaron Ballman void simplePairBrackets(MutableArrayRef<Bracket> Brackets) {
129*ed8f7882SAaron Ballman   std::vector<unsigned> Stack;
130*ed8f7882SAaron Ballman   for (unsigned I = 0; I < Brackets.size(); ++I) {
131*ed8f7882SAaron Ballman     if (Brackets[I].Dir == Bracket::Open) {
132*ed8f7882SAaron Ballman       Stack.push_back(I);
133*ed8f7882SAaron Ballman     } else if (!Stack.empty() &&
134*ed8f7882SAaron Ballman                Brackets[Stack.back()].Kind == Brackets[I].Kind) {
135*ed8f7882SAaron Ballman       Brackets[Stack.back()].Pair = I;
136*ed8f7882SAaron Ballman       Brackets[I].Pair = Stack.back();
137*ed8f7882SAaron Ballman       Stack.pop_back();
138*ed8f7882SAaron Ballman     } else {
139*ed8f7882SAaron Ballman       // Unpaired closer, no brackets on stack are part of a perfect sequence.
140*ed8f7882SAaron Ballman       Stack.clear();
141*ed8f7882SAaron Ballman     }
142*ed8f7882SAaron Ballman   }
143*ed8f7882SAaron Ballman   // Any remaining brackets on the stack stay unpaired.
144*ed8f7882SAaron Ballman }
145*ed8f7882SAaron Ballman 
146*ed8f7882SAaron Ballman } // namespace
147*ed8f7882SAaron Ballman 
148*ed8f7882SAaron Ballman void pairBrackets(TokenStream &Stream) {
149*ed8f7882SAaron Ballman   auto Brackets = findBrackets(Stream);
150*ed8f7882SAaron Ballman   simplePairBrackets(Brackets);
151*ed8f7882SAaron Ballman   applyPairings(Brackets, Stream);
152*ed8f7882SAaron Ballman }
153*ed8f7882SAaron Ballman 
154*ed8f7882SAaron Ballman } // namespace clangd
155*ed8f7882SAaron Ballman } // namespace clang
156