1 //===--- Bracket.cpp - Analyze bracket structure --------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // The basic phases of our bracket matching are: 10 // 11 // 1) A simple "greedy" match looks for well-nested subsequences. 12 // 13 // We can't fully trust the results of this, consider: 14 // while (1) { // A 15 // if (true) { // B 16 // break; 17 // } // C 18 // Greedy matching will match B=C, when we should at least consider A=C. 19 // However for the correct parts of the file, the greedy match gives the 20 // right answer. It produces useful candidates for phase 2. 21 // 22 // simplePairBrackets handles this step. 23 // 24 // 2) Try to identify places where formatting indicates that the greedy match 25 // was correct. This is similar to how a human would scan a large file. 26 // 27 // For example: 28 // int foo() { // X 29 // // indented 30 // while (1) { 31 // // valid code 32 // } 33 // return bar(42); 34 // } // Y 35 // We can "verify" that X..Y looks like a braced block, and the greedy match 36 // tells us that substring is perfectly nested. 37 // We trust the pairings of those brackets and don't examine them further. 38 // However in the first example above, we do not trust B=C because the brace 39 // indentation is suspect. 40 // 41 // FIXME: implement this step. 42 // 43 // 3) Run full best-match optimization on remaining brackets. 44 // 45 // Conceptually, this considers all possible matchings and optimizes cost: 46 // - there is a cost for failing to match a bracket 47 // - there is a variable cost for matching two brackets. 48 // (For example if brace indentation doesn't match). 49 // 50 // In the first example we have three alternatives, and they are ranked: 51 // 1) A=C, skip B 52 // 2) B=C, skip A 53 // 3) skip A, skip B, skip C 54 // The cost for skipping a bracket is high, so option 3 is worst. 55 // B=C costs more than A=C, because the indentation doesn't match. 56 // 57 // It would be correct to run this step alone, but it would be too slow. 58 // The implementation is dynamic programming in N^3 space and N^2 time. 59 // Having earlier steps filter out most brackets is key to performance. 60 // 61 // FIXME: implement this step. 62 // 63 //===----------------------------------------------------------------------===// 64 65 #include "Bracket.h" 66 67 namespace clang { 68 namespace clangd { 69 namespace { 70 71 struct Bracket { 72 using Index = unsigned; 73 constexpr static Index None = -1; 74 75 enum BracketKind : char { Paren, Brace, Square } Kind; 76 enum Direction : bool { Open, Close } Dir; 77 unsigned Line; 78 unsigned Indent; 79 Token::Index Tok; 80 Bracket::Index Pair = None; 81 }; 82 83 // Find brackets in the stream and convert to Bracket struct. 84 std::vector<Bracket> findBrackets(const TokenStream &Stream) { 85 std::vector<Bracket> Brackets; 86 auto Add = [&](const Token &Tok, Bracket::BracketKind K, 87 Bracket::Direction D) { 88 Brackets.push_back( 89 {K, D, Tok.Line, Tok.Indent, Stream.index(Tok), Bracket::None}); 90 }; 91 for (const auto &Tok : Stream.tokens()) { 92 switch (Tok.Kind) { 93 case clang::tok::l_paren: 94 Add(Tok, Bracket::Paren, Bracket::Open); 95 break; 96 case clang::tok::r_paren: 97 Add(Tok, Bracket::Paren, Bracket::Close); 98 break; 99 case clang::tok::l_brace: 100 Add(Tok, Bracket::Brace, Bracket::Open); 101 break; 102 case clang::tok::r_brace: 103 Add(Tok, Bracket::Brace, Bracket::Close); 104 break; 105 case clang::tok::l_square: 106 Add(Tok, Bracket::Square, Bracket::Open); 107 break; 108 case clang::tok::r_square: 109 Add(Tok, Bracket::Square, Bracket::Close); 110 break; 111 default: 112 break; 113 } 114 } 115 return Brackets; 116 } 117 118 // Write the bracket pairings from Brackets back to Tokens. 119 void applyPairings(ArrayRef<Bracket> Brackets, TokenStream &Tokens) { 120 for (const auto &B : Brackets) 121 Tokens.tokens()[B.Tok].Pair = 122 (B.Pair == Bracket::None) ? 0 : (int32_t)Brackets[B.Pair].Tok - B.Tok; 123 } 124 125 // Find perfect pairings (ignoring whitespace) via greedy algorithm. 126 // This means two brackets are paired if they match and the brackets between 127 // them nest perfectly, with no skipped or crossed brackets. 128 void simplePairBrackets(MutableArrayRef<Bracket> Brackets) { 129 std::vector<unsigned> Stack; 130 for (unsigned I = 0; I < Brackets.size(); ++I) { 131 if (Brackets[I].Dir == Bracket::Open) { 132 Stack.push_back(I); 133 } else if (!Stack.empty() && 134 Brackets[Stack.back()].Kind == Brackets[I].Kind) { 135 Brackets[Stack.back()].Pair = I; 136 Brackets[I].Pair = Stack.back(); 137 Stack.pop_back(); 138 } else { 139 // Unpaired closer, no brackets on stack are part of a perfect sequence. 140 Stack.clear(); 141 } 142 } 143 // Any remaining brackets on the stack stay unpaired. 144 } 145 146 } // namespace 147 148 void pairBrackets(TokenStream &Stream) { 149 auto Brackets = findBrackets(Stream); 150 simplePairBrackets(Brackets); 151 applyPairings(Brackets, Stream); 152 } 153 154 } // namespace clangd 155 } // namespace clang 156