1*e5dd7070Spatrick //===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// 2*e5dd7070Spatrick // 3*e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information. 5*e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e5dd7070Spatrick // 7*e5dd7070Spatrick //===----------------------------------------------------------------------===// 8*e5dd7070Spatrick /// 9*e5dd7070Spatrick /// \file 10*e5dd7070Spatrick /// This file contains the declaration of the UnwrappedLineParser, 11*e5dd7070Spatrick /// which turns a stream of tokens into UnwrappedLines. 12*e5dd7070Spatrick /// 13*e5dd7070Spatrick //===----------------------------------------------------------------------===// 14*e5dd7070Spatrick 15*e5dd7070Spatrick #ifndef LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H 16*e5dd7070Spatrick #define LLVM_CLANG_LIB_FORMAT_UNWRAPPEDLINEPARSER_H 17*e5dd7070Spatrick 18*e5dd7070Spatrick #include "FormatToken.h" 19*e5dd7070Spatrick #include "clang/Basic/IdentifierTable.h" 20*e5dd7070Spatrick #include "clang/Format/Format.h" 21*e5dd7070Spatrick #include "llvm/Support/Regex.h" 22*e5dd7070Spatrick #include <list> 23*e5dd7070Spatrick #include <stack> 24*e5dd7070Spatrick 25*e5dd7070Spatrick namespace clang { 26*e5dd7070Spatrick namespace format { 27*e5dd7070Spatrick 28*e5dd7070Spatrick struct UnwrappedLineNode; 29*e5dd7070Spatrick 30*e5dd7070Spatrick /// An unwrapped line is a sequence of \c Token, that we would like to 31*e5dd7070Spatrick /// put on a single line if there was no column limit. 32*e5dd7070Spatrick /// 33*e5dd7070Spatrick /// This is used as a main interface between the \c UnwrappedLineParser and the 34*e5dd7070Spatrick /// \c UnwrappedLineFormatter. The key property is that changing the formatting 35*e5dd7070Spatrick /// within an unwrapped line does not affect any other unwrapped lines. 36*e5dd7070Spatrick struct UnwrappedLine { 37*e5dd7070Spatrick UnwrappedLine(); 38*e5dd7070Spatrick 39*e5dd7070Spatrick // FIXME: Don't use std::list here. 40*e5dd7070Spatrick /// The \c Tokens comprising this \c UnwrappedLine. 41*e5dd7070Spatrick std::list<UnwrappedLineNode> Tokens; 42*e5dd7070Spatrick 43*e5dd7070Spatrick /// The indent level of the \c UnwrappedLine. 44*e5dd7070Spatrick unsigned Level; 45*e5dd7070Spatrick 46*e5dd7070Spatrick /// Whether this \c UnwrappedLine is part of a preprocessor directive. 47*e5dd7070Spatrick bool InPPDirective; 48*e5dd7070Spatrick 49*e5dd7070Spatrick bool MustBeDeclaration; 50*e5dd7070Spatrick 51*e5dd7070Spatrick /// If this \c UnwrappedLine closes a block in a sequence of lines, 52*e5dd7070Spatrick /// \c MatchingOpeningBlockLineIndex stores the index of the corresponding 53*e5dd7070Spatrick /// opening line. Otherwise, \c MatchingOpeningBlockLineIndex must be 54*e5dd7070Spatrick /// \c kInvalidIndex. 55*e5dd7070Spatrick size_t MatchingOpeningBlockLineIndex = kInvalidIndex; 56*e5dd7070Spatrick 57*e5dd7070Spatrick /// If this \c UnwrappedLine opens a block, stores the index of the 58*e5dd7070Spatrick /// line with the corresponding closing brace. 59*e5dd7070Spatrick size_t MatchingClosingBlockLineIndex = kInvalidIndex; 60*e5dd7070Spatrick 61*e5dd7070Spatrick static const size_t kInvalidIndex = -1; 62*e5dd7070Spatrick 63*e5dd7070Spatrick unsigned FirstStartColumn = 0; 64*e5dd7070Spatrick }; 65*e5dd7070Spatrick 66*e5dd7070Spatrick class UnwrappedLineConsumer { 67*e5dd7070Spatrick public: 68*e5dd7070Spatrick virtual ~UnwrappedLineConsumer() {} 69*e5dd7070Spatrick virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; 70*e5dd7070Spatrick virtual void finishRun() = 0; 71*e5dd7070Spatrick }; 72*e5dd7070Spatrick 73*e5dd7070Spatrick class FormatTokenSource; 74*e5dd7070Spatrick 75*e5dd7070Spatrick class UnwrappedLineParser { 76*e5dd7070Spatrick public: 77*e5dd7070Spatrick UnwrappedLineParser(const FormatStyle &Style, 78*e5dd7070Spatrick const AdditionalKeywords &Keywords, 79*e5dd7070Spatrick unsigned FirstStartColumn, ArrayRef<FormatToken *> Tokens, 80*e5dd7070Spatrick UnwrappedLineConsumer &Callback); 81*e5dd7070Spatrick 82*e5dd7070Spatrick void parse(); 83*e5dd7070Spatrick 84*e5dd7070Spatrick private: 85*e5dd7070Spatrick void reset(); 86*e5dd7070Spatrick void parseFile(); 87*e5dd7070Spatrick void parseLevel(bool HasOpeningBrace); 88*e5dd7070Spatrick void parseBlock(bool MustBeDeclaration, bool AddLevel = true, 89*e5dd7070Spatrick bool MunchSemi = true); 90*e5dd7070Spatrick void parseChildBlock(); 91*e5dd7070Spatrick void parsePPDirective(); 92*e5dd7070Spatrick void parsePPDefine(); 93*e5dd7070Spatrick void parsePPIf(bool IfDef); 94*e5dd7070Spatrick void parsePPElIf(); 95*e5dd7070Spatrick void parsePPElse(); 96*e5dd7070Spatrick void parsePPEndIf(); 97*e5dd7070Spatrick void parsePPUnknown(); 98*e5dd7070Spatrick void readTokenWithJavaScriptASI(); 99*e5dd7070Spatrick void parseStructuralElement(); 100*e5dd7070Spatrick bool tryToParseBracedList(); 101*e5dd7070Spatrick bool parseBracedList(bool ContinueOnSemicolons = false, 102*e5dd7070Spatrick tok::TokenKind ClosingBraceKind = tok::r_brace); 103*e5dd7070Spatrick void parseParens(); 104*e5dd7070Spatrick void parseSquare(bool LambdaIntroducer = false); 105*e5dd7070Spatrick void parseIfThenElse(); 106*e5dd7070Spatrick void parseTryCatch(); 107*e5dd7070Spatrick void parseForOrWhileLoop(); 108*e5dd7070Spatrick void parseDoWhile(); 109*e5dd7070Spatrick void parseLabel(bool LeftAlignLabel = false); 110*e5dd7070Spatrick void parseCaseLabel(); 111*e5dd7070Spatrick void parseSwitch(); 112*e5dd7070Spatrick void parseNamespace(); 113*e5dd7070Spatrick void parseNew(); 114*e5dd7070Spatrick void parseAccessSpecifier(); 115*e5dd7070Spatrick bool parseEnum(); 116*e5dd7070Spatrick void parseJavaEnumBody(); 117*e5dd7070Spatrick // Parses a record (aka class) as a top level element. If ParseAsExpr is true, 118*e5dd7070Spatrick // parses the record as a child block, i.e. if the class declaration is an 119*e5dd7070Spatrick // expression. 120*e5dd7070Spatrick void parseRecord(bool ParseAsExpr = false); 121*e5dd7070Spatrick void parseObjCMethod(); 122*e5dd7070Spatrick void parseObjCProtocolList(); 123*e5dd7070Spatrick void parseObjCUntilAtEnd(); 124*e5dd7070Spatrick void parseObjCInterfaceOrImplementation(); 125*e5dd7070Spatrick bool parseObjCProtocol(); 126*e5dd7070Spatrick void parseJavaScriptEs6ImportExport(); 127*e5dd7070Spatrick void parseStatementMacro(); 128*e5dd7070Spatrick bool tryToParseLambda(); 129*e5dd7070Spatrick bool tryToParseLambdaIntroducer(); 130*e5dd7070Spatrick void tryToParseJSFunction(); 131*e5dd7070Spatrick void addUnwrappedLine(); 132*e5dd7070Spatrick bool eof() const; 133*e5dd7070Spatrick // LevelDifference is the difference of levels after and before the current 134*e5dd7070Spatrick // token. For example: 135*e5dd7070Spatrick // - if the token is '{' and opens a block, LevelDifference is 1. 136*e5dd7070Spatrick // - if the token is '}' and closes a block, LevelDifference is -1. 137*e5dd7070Spatrick void nextToken(int LevelDifference = 0); 138*e5dd7070Spatrick void readToken(int LevelDifference = 0); 139*e5dd7070Spatrick 140*e5dd7070Spatrick // Decides which comment tokens should be added to the current line and which 141*e5dd7070Spatrick // should be added as comments before the next token. 142*e5dd7070Spatrick // 143*e5dd7070Spatrick // Comments specifies the sequence of comment tokens to analyze. They get 144*e5dd7070Spatrick // either pushed to the current line or added to the comments before the next 145*e5dd7070Spatrick // token. 146*e5dd7070Spatrick // 147*e5dd7070Spatrick // NextTok specifies the next token. A null pointer NextTok is supported, and 148*e5dd7070Spatrick // signifies either the absence of a next token, or that the next token 149*e5dd7070Spatrick // shouldn't be taken into accunt for the analysis. 150*e5dd7070Spatrick void distributeComments(const SmallVectorImpl<FormatToken *> &Comments, 151*e5dd7070Spatrick const FormatToken *NextTok); 152*e5dd7070Spatrick 153*e5dd7070Spatrick // Adds the comment preceding the next token to unwrapped lines. 154*e5dd7070Spatrick void flushComments(bool NewlineBeforeNext); 155*e5dd7070Spatrick void pushToken(FormatToken *Tok); 156*e5dd7070Spatrick void calculateBraceTypes(bool ExpectClassBody = false); 157*e5dd7070Spatrick 158*e5dd7070Spatrick // Marks a conditional compilation edge (for example, an '#if', '#ifdef', 159*e5dd7070Spatrick // '#else' or merge conflict marker). If 'Unreachable' is true, assumes 160*e5dd7070Spatrick // this branch either cannot be taken (for example '#if false'), or should 161*e5dd7070Spatrick // not be taken in this round. 162*e5dd7070Spatrick void conditionalCompilationCondition(bool Unreachable); 163*e5dd7070Spatrick void conditionalCompilationStart(bool Unreachable); 164*e5dd7070Spatrick void conditionalCompilationAlternative(); 165*e5dd7070Spatrick void conditionalCompilationEnd(); 166*e5dd7070Spatrick 167*e5dd7070Spatrick bool isOnNewLine(const FormatToken &FormatTok); 168*e5dd7070Spatrick 169*e5dd7070Spatrick // Compute hash of the current preprocessor branch. 170*e5dd7070Spatrick // This is used to identify the different branches, and thus track if block 171*e5dd7070Spatrick // open and close in the same branch. 172*e5dd7070Spatrick size_t computePPHash() const; 173*e5dd7070Spatrick 174*e5dd7070Spatrick // FIXME: We are constantly running into bugs where Line.Level is incorrectly 175*e5dd7070Spatrick // subtracted from beyond 0. Introduce a method to subtract from Line.Level 176*e5dd7070Spatrick // and use that everywhere in the Parser. 177*e5dd7070Spatrick std::unique_ptr<UnwrappedLine> Line; 178*e5dd7070Spatrick 179*e5dd7070Spatrick // Comments are sorted into unwrapped lines by whether they are in the same 180*e5dd7070Spatrick // line as the previous token, or not. If not, they belong to the next token. 181*e5dd7070Spatrick // Since the next token might already be in a new unwrapped line, we need to 182*e5dd7070Spatrick // store the comments belonging to that token. 183*e5dd7070Spatrick SmallVector<FormatToken *, 1> CommentsBeforeNextToken; 184*e5dd7070Spatrick FormatToken *FormatTok; 185*e5dd7070Spatrick bool MustBreakBeforeNextToken; 186*e5dd7070Spatrick 187*e5dd7070Spatrick // The parsed lines. Only added to through \c CurrentLines. 188*e5dd7070Spatrick SmallVector<UnwrappedLine, 8> Lines; 189*e5dd7070Spatrick 190*e5dd7070Spatrick // Preprocessor directives are parsed out-of-order from other unwrapped lines. 191*e5dd7070Spatrick // Thus, we need to keep a list of preprocessor directives to be reported 192*e5dd7070Spatrick // after an unwrapped line that has been started was finished. 193*e5dd7070Spatrick SmallVector<UnwrappedLine, 4> PreprocessorDirectives; 194*e5dd7070Spatrick 195*e5dd7070Spatrick // New unwrapped lines are added via CurrentLines. 196*e5dd7070Spatrick // Usually points to \c &Lines. While parsing a preprocessor directive when 197*e5dd7070Spatrick // there is an unfinished previous unwrapped line, will point to 198*e5dd7070Spatrick // \c &PreprocessorDirectives. 199*e5dd7070Spatrick SmallVectorImpl<UnwrappedLine> *CurrentLines; 200*e5dd7070Spatrick 201*e5dd7070Spatrick // We store for each line whether it must be a declaration depending on 202*e5dd7070Spatrick // whether we are in a compound statement or not. 203*e5dd7070Spatrick std::vector<bool> DeclarationScopeStack; 204*e5dd7070Spatrick 205*e5dd7070Spatrick const FormatStyle &Style; 206*e5dd7070Spatrick const AdditionalKeywords &Keywords; 207*e5dd7070Spatrick 208*e5dd7070Spatrick llvm::Regex CommentPragmasRegex; 209*e5dd7070Spatrick 210*e5dd7070Spatrick FormatTokenSource *Tokens; 211*e5dd7070Spatrick UnwrappedLineConsumer &Callback; 212*e5dd7070Spatrick 213*e5dd7070Spatrick // FIXME: This is a temporary measure until we have reworked the ownership 214*e5dd7070Spatrick // of the format tokens. The goal is to have the actual tokens created and 215*e5dd7070Spatrick // owned outside of and handed into the UnwrappedLineParser. 216*e5dd7070Spatrick ArrayRef<FormatToken *> AllTokens; 217*e5dd7070Spatrick 218*e5dd7070Spatrick // Represents preprocessor branch type, so we can find matching 219*e5dd7070Spatrick // #if/#else/#endif directives. 220*e5dd7070Spatrick enum PPBranchKind { 221*e5dd7070Spatrick PP_Conditional, // Any #if, #ifdef, #ifndef, #elif, block outside #if 0 222*e5dd7070Spatrick PP_Unreachable // #if 0 or a conditional preprocessor block inside #if 0 223*e5dd7070Spatrick }; 224*e5dd7070Spatrick 225*e5dd7070Spatrick struct PPBranch { 226*e5dd7070Spatrick PPBranch(PPBranchKind Kind, size_t Line) : Kind(Kind), Line(Line) {} 227*e5dd7070Spatrick PPBranchKind Kind; 228*e5dd7070Spatrick size_t Line; 229*e5dd7070Spatrick }; 230*e5dd7070Spatrick 231*e5dd7070Spatrick // Keeps a stack of currently active preprocessor branching directives. 232*e5dd7070Spatrick SmallVector<PPBranch, 16> PPStack; 233*e5dd7070Spatrick 234*e5dd7070Spatrick // The \c UnwrappedLineParser re-parses the code for each combination 235*e5dd7070Spatrick // of preprocessor branches that can be taken. 236*e5dd7070Spatrick // To that end, we take the same branch (#if, #else, or one of the #elif 237*e5dd7070Spatrick // branches) for each nesting level of preprocessor branches. 238*e5dd7070Spatrick // \c PPBranchLevel stores the current nesting level of preprocessor 239*e5dd7070Spatrick // branches during one pass over the code. 240*e5dd7070Spatrick int PPBranchLevel; 241*e5dd7070Spatrick 242*e5dd7070Spatrick // Contains the current branch (#if, #else or one of the #elif branches) 243*e5dd7070Spatrick // for each nesting level. 244*e5dd7070Spatrick SmallVector<int, 8> PPLevelBranchIndex; 245*e5dd7070Spatrick 246*e5dd7070Spatrick // Contains the maximum number of branches at each nesting level. 247*e5dd7070Spatrick SmallVector<int, 8> PPLevelBranchCount; 248*e5dd7070Spatrick 249*e5dd7070Spatrick // Contains the number of branches per nesting level we are currently 250*e5dd7070Spatrick // in while parsing a preprocessor branch sequence. 251*e5dd7070Spatrick // This is used to update PPLevelBranchCount at the end of a branch 252*e5dd7070Spatrick // sequence. 253*e5dd7070Spatrick std::stack<int> PPChainBranchIndex; 254*e5dd7070Spatrick 255*e5dd7070Spatrick // Include guard search state. Used to fixup preprocessor indent levels 256*e5dd7070Spatrick // so that include guards do not participate in indentation. 257*e5dd7070Spatrick enum IncludeGuardState { 258*e5dd7070Spatrick IG_Inited, // Search started, looking for #ifndef. 259*e5dd7070Spatrick IG_IfNdefed, // #ifndef found, IncludeGuardToken points to condition. 260*e5dd7070Spatrick IG_Defined, // Matching #define found, checking other requirements. 261*e5dd7070Spatrick IG_Found, // All requirements met, need to fix indents. 262*e5dd7070Spatrick IG_Rejected, // Search failed or never started. 263*e5dd7070Spatrick }; 264*e5dd7070Spatrick 265*e5dd7070Spatrick // Current state of include guard search. 266*e5dd7070Spatrick IncludeGuardState IncludeGuard; 267*e5dd7070Spatrick 268*e5dd7070Spatrick // Points to the #ifndef condition for a potential include guard. Null unless 269*e5dd7070Spatrick // IncludeGuardState == IG_IfNdefed. 270*e5dd7070Spatrick FormatToken *IncludeGuardToken; 271*e5dd7070Spatrick 272*e5dd7070Spatrick // Contains the first start column where the source begins. This is zero for 273*e5dd7070Spatrick // normal source code and may be nonzero when formatting a code fragment that 274*e5dd7070Spatrick // does not start at the beginning of the file. 275*e5dd7070Spatrick unsigned FirstStartColumn; 276*e5dd7070Spatrick 277*e5dd7070Spatrick friend class ScopedLineState; 278*e5dd7070Spatrick friend class CompoundStatementIndenter; 279*e5dd7070Spatrick }; 280*e5dd7070Spatrick 281*e5dd7070Spatrick struct UnwrappedLineNode { 282*e5dd7070Spatrick UnwrappedLineNode() : Tok(nullptr) {} 283*e5dd7070Spatrick UnwrappedLineNode(FormatToken *Tok) : Tok(Tok) {} 284*e5dd7070Spatrick 285*e5dd7070Spatrick FormatToken *Tok; 286*e5dd7070Spatrick SmallVector<UnwrappedLine, 0> Children; 287*e5dd7070Spatrick }; 288*e5dd7070Spatrick 289*e5dd7070Spatrick inline UnwrappedLine::UnwrappedLine() 290*e5dd7070Spatrick : Level(0), InPPDirective(false), MustBeDeclaration(false), 291*e5dd7070Spatrick MatchingOpeningBlockLineIndex(kInvalidIndex) {} 292*e5dd7070Spatrick 293*e5dd7070Spatrick } // end namespace format 294*e5dd7070Spatrick } // end namespace clang 295*e5dd7070Spatrick 296*e5dd7070Spatrick #endif 297