1*ed8f7882SAaron Ballman //===--- Bracket.cpp - Analyze bracket structure --------------------------===// 2*ed8f7882SAaron Ballman // 3*ed8f7882SAaron Ballman // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*ed8f7882SAaron Ballman // See https://llvm.org/LICENSE.txt for license information. 5*ed8f7882SAaron Ballman // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*ed8f7882SAaron Ballman // 7*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===// 8*ed8f7882SAaron Ballman // 9*ed8f7882SAaron Ballman // The basic phases of our bracket matching are: 10*ed8f7882SAaron Ballman // 11*ed8f7882SAaron Ballman // 1) A simple "greedy" match looks for well-nested subsequences. 12*ed8f7882SAaron Ballman // 13*ed8f7882SAaron Ballman // We can't fully trust the results of this, consider: 14*ed8f7882SAaron Ballman // while (1) { // A 15*ed8f7882SAaron Ballman // if (true) { // B 16*ed8f7882SAaron Ballman // break; 17*ed8f7882SAaron Ballman // } // C 18*ed8f7882SAaron Ballman // Greedy matching will match B=C, when we should at least consider A=C. 19*ed8f7882SAaron Ballman // However for the correct parts of the file, the greedy match gives the 20*ed8f7882SAaron Ballman // right answer. It produces useful candidates for phase 2. 21*ed8f7882SAaron Ballman // 22*ed8f7882SAaron Ballman // simplePairBrackets handles this step. 23*ed8f7882SAaron Ballman // 24*ed8f7882SAaron Ballman // 2) Try to identify places where formatting indicates that the greedy match 25*ed8f7882SAaron Ballman // was correct. This is similar to how a human would scan a large file. 26*ed8f7882SAaron Ballman // 27*ed8f7882SAaron Ballman // For example: 28*ed8f7882SAaron Ballman // int foo() { // X 29*ed8f7882SAaron Ballman // // indented 30*ed8f7882SAaron Ballman // while (1) { 31*ed8f7882SAaron Ballman // // valid code 32*ed8f7882SAaron Ballman // } 33*ed8f7882SAaron Ballman // return bar(42); 34*ed8f7882SAaron Ballman // } // Y 35*ed8f7882SAaron Ballman // We can "verify" that X..Y looks like a braced block, and the greedy match 36*ed8f7882SAaron Ballman // tells us that substring is perfectly nested. 37*ed8f7882SAaron Ballman // We trust the pairings of those brackets and don't examine them further. 38*ed8f7882SAaron Ballman // However in the first example above, we do not trust B=C because the brace 39*ed8f7882SAaron Ballman // indentation is suspect. 40*ed8f7882SAaron Ballman // 41*ed8f7882SAaron Ballman // FIXME: implement this step. 42*ed8f7882SAaron Ballman // 43*ed8f7882SAaron Ballman // 3) Run full best-match optimization on remaining brackets. 44*ed8f7882SAaron Ballman // 45*ed8f7882SAaron Ballman // Conceptually, this considers all possible matchings and optimizes cost: 46*ed8f7882SAaron Ballman // - there is a cost for failing to match a bracket 47*ed8f7882SAaron Ballman // - there is a variable cost for matching two brackets. 48*ed8f7882SAaron Ballman // (For example if brace indentation doesn't match). 49*ed8f7882SAaron Ballman // 50*ed8f7882SAaron Ballman // In the first example we have three alternatives, and they are ranked: 51*ed8f7882SAaron Ballman // 1) A=C, skip B 52*ed8f7882SAaron Ballman // 2) B=C, skip A 53*ed8f7882SAaron Ballman // 3) skip A, skip B, skip C 54*ed8f7882SAaron Ballman // The cost for skipping a bracket is high, so option 3 is worst. 55*ed8f7882SAaron Ballman // B=C costs more than A=C, because the indentation doesn't match. 56*ed8f7882SAaron Ballman // 57*ed8f7882SAaron Ballman // It would be correct to run this step alone, but it would be too slow. 58*ed8f7882SAaron Ballman // The implementation is dynamic programming in N^3 space and N^2 time. 59*ed8f7882SAaron Ballman // Having earlier steps filter out most brackets is key to performance. 60*ed8f7882SAaron Ballman // 61*ed8f7882SAaron Ballman // FIXME: implement this step. 62*ed8f7882SAaron Ballman // 63*ed8f7882SAaron Ballman //===----------------------------------------------------------------------===// 64*ed8f7882SAaron Ballman 65*ed8f7882SAaron Ballman #include "Bracket.h" 66*ed8f7882SAaron Ballman 67*ed8f7882SAaron Ballman namespace clang { 68*ed8f7882SAaron Ballman namespace clangd { 69*ed8f7882SAaron Ballman namespace { 70*ed8f7882SAaron Ballman 71*ed8f7882SAaron Ballman struct Bracket { 72*ed8f7882SAaron Ballman using Index = unsigned; 73*ed8f7882SAaron Ballman constexpr static Index None = -1; 74*ed8f7882SAaron Ballman 75*ed8f7882SAaron Ballman enum BracketKind : char { Paren, Brace, Square } Kind; 76*ed8f7882SAaron Ballman enum Direction : bool { Open, Close } Dir; 77*ed8f7882SAaron Ballman unsigned Line; 78*ed8f7882SAaron Ballman unsigned Indent; 79*ed8f7882SAaron Ballman Token::Index Tok; 80*ed8f7882SAaron Ballman Bracket::Index Pair = None; 81*ed8f7882SAaron Ballman }; 82*ed8f7882SAaron Ballman 83*ed8f7882SAaron Ballman // Find brackets in the stream and convert to Bracket struct. 84*ed8f7882SAaron Ballman std::vector<Bracket> findBrackets(const TokenStream &Stream) { 85*ed8f7882SAaron Ballman std::vector<Bracket> Brackets; 86*ed8f7882SAaron Ballman auto Add = [&](const Token &Tok, Bracket::BracketKind K, 87*ed8f7882SAaron Ballman Bracket::Direction D) { 88*ed8f7882SAaron Ballman Brackets.push_back( 89*ed8f7882SAaron Ballman {K, D, Tok.Line, Tok.Indent, Stream.index(Tok), Bracket::None}); 90*ed8f7882SAaron Ballman }; 91*ed8f7882SAaron Ballman for (const auto &Tok : Stream.tokens()) { 92*ed8f7882SAaron Ballman switch (Tok.Kind) { 93*ed8f7882SAaron Ballman case clang::tok::l_paren: 94*ed8f7882SAaron Ballman Add(Tok, Bracket::Paren, Bracket::Open); 95*ed8f7882SAaron Ballman break; 96*ed8f7882SAaron Ballman case clang::tok::r_paren: 97*ed8f7882SAaron Ballman Add(Tok, Bracket::Paren, Bracket::Close); 98*ed8f7882SAaron Ballman break; 99*ed8f7882SAaron Ballman case clang::tok::l_brace: 100*ed8f7882SAaron Ballman Add(Tok, Bracket::Brace, Bracket::Open); 101*ed8f7882SAaron Ballman break; 102*ed8f7882SAaron Ballman case clang::tok::r_brace: 103*ed8f7882SAaron Ballman Add(Tok, Bracket::Brace, Bracket::Close); 104*ed8f7882SAaron Ballman break; 105*ed8f7882SAaron Ballman case clang::tok::l_square: 106*ed8f7882SAaron Ballman Add(Tok, Bracket::Square, Bracket::Open); 107*ed8f7882SAaron Ballman break; 108*ed8f7882SAaron Ballman case clang::tok::r_square: 109*ed8f7882SAaron Ballman Add(Tok, Bracket::Square, Bracket::Close); 110*ed8f7882SAaron Ballman break; 111*ed8f7882SAaron Ballman default: 112*ed8f7882SAaron Ballman break; 113*ed8f7882SAaron Ballman } 114*ed8f7882SAaron Ballman } 115*ed8f7882SAaron Ballman return Brackets; 116*ed8f7882SAaron Ballman } 117*ed8f7882SAaron Ballman 118*ed8f7882SAaron Ballman // Write the bracket pairings from Brackets back to Tokens. 119*ed8f7882SAaron Ballman void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) { 120*ed8f7882SAaron Ballman for (const auto &B : Brackets) 121*ed8f7882SAaron Ballman Tokens.tokens()[B.Tok].Pair = 122*ed8f7882SAaron Ballman (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok; 123*ed8f7882SAaron Ballman } 124*ed8f7882SAaron Ballman 125*ed8f7882SAaron Ballman // Find perfect pairings (ignoring whitespace) via greedy algorithm. 126*ed8f7882SAaron Ballman // This means two brackets are paired if they match and the brackets between 127*ed8f7882SAaron Ballman // them nest perfectly, with no skipped or crossed brackets. 128*ed8f7882SAaron Ballman void simplePairBrackets(MutableArrayRef<Bracket> Brackets) { 129*ed8f7882SAaron Ballman std::vector<unsigned> Stack; 130*ed8f7882SAaron Ballman for (unsigned I = 0; I < Brackets.size(); ++I) { 131*ed8f7882SAaron Ballman if (Brackets[I].Dir == Bracket::Open) { 132*ed8f7882SAaron Ballman Stack.push_back(I); 133*ed8f7882SAaron Ballman } else if (!Stack.empty() && 134*ed8f7882SAaron Ballman Brackets[Stack.back()].Kind == Brackets[I].Kind) { 135*ed8f7882SAaron Ballman Brackets[Stack.back()].Pair = I; 136*ed8f7882SAaron Ballman Brackets[I].Pair = Stack.back(); 137*ed8f7882SAaron Ballman Stack.pop_back(); 138*ed8f7882SAaron Ballman } else { 139*ed8f7882SAaron Ballman // Unpaired closer, no brackets on stack are part of a perfect sequence. 140*ed8f7882SAaron Ballman Stack.clear(); 141*ed8f7882SAaron Ballman } 142*ed8f7882SAaron Ballman } 143*ed8f7882SAaron Ballman // Any remaining brackets on the stack stay unpaired. 144*ed8f7882SAaron Ballman } 145*ed8f7882SAaron Ballman 146*ed8f7882SAaron Ballman } // namespace 147*ed8f7882SAaron Ballman 148*ed8f7882SAaron Ballman void pairBrackets(TokenStream &Stream) { 149*ed8f7882SAaron Ballman auto Brackets = findBrackets(Stream); 150*ed8f7882SAaron Ballman simplePairBrackets(Brackets); 151*ed8f7882SAaron Ballman applyPairings(Brackets, Stream); 152*ed8f7882SAaron Ballman } 153*ed8f7882SAaron Ballman 154*ed8f7882SAaron Ballman } // namespace clangd 155*ed8f7882SAaron Ballman } // namespace clang 156