1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 /// 10 /// \file 11 /// \brief This file contains the declaration of the FormatToken, a wrapper 12 /// around Token with additional information related to formatting. 13 /// 14 //===----------------------------------------------------------------------===// 15 16 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 18 19 #include "clang/Basic/IdentifierTable.h" 20 #include "clang/Basic/OperatorPrecedence.h" 21 #include "clang/Format/Format.h" 22 #include "clang/Lex/Lexer.h" 23 #include <memory> 24 25 namespace clang { 26 namespace format { 27 28 enum TokenType { 29 TT_ArrayInitializerLSquare, 30 TT_ArraySubscriptLSquare, 31 TT_AttributeParen, 32 TT_BinaryOperator, 33 TT_BitFieldColon, 34 TT_BlockComment, 35 TT_CastRParen, 36 TT_ConditionalExpr, 37 TT_ConflictAlternative, 38 TT_ConflictEnd, 39 TT_ConflictStart, 40 TT_CtorInitializerColon, 41 TT_CtorInitializerComma, 42 TT_DesignatedInitializerPeriod, 43 TT_DictLiteral, 44 TT_FunctionDeclarationName, 45 TT_FunctionLBrace, 46 TT_FunctionTypeLParen, 47 TT_ImplicitStringLiteral, 48 TT_InheritanceColon, 49 TT_InlineASMColon, 50 TT_JavaAnnotation, 51 TT_LambdaArrow, 52 TT_LambdaLSquare, 53 TT_LeadingJavaAnnotation, 54 TT_LineComment, 55 TT_ObjCBlockLBrace, 56 TT_ObjCBlockLParen, 57 TT_ObjCDecl, 58 TT_ObjCForIn, 59 TT_ObjCMethodExpr, 60 TT_ObjCMethodSpecifier, 61 TT_ObjCProperty, 62 TT_OverloadedOperator, 63 TT_OverloadedOperatorLParen, 64 TT_PointerOrReference, 65 TT_PureVirtualSpecifier, 66 TT_RangeBasedForLoopColon, 67 TT_RegexLiteral, 68 TT_SelectorName, 69 TT_StartOfName, 70 TT_TemplateCloser, 71 TT_TemplateOpener, 72 TT_TrailingAnnotation, 73 TT_TrailingReturnArrow, 74 TT_TrailingUnaryOperator, 75 TT_UnaryOperator, 76 TT_Unknown 77 }; 78 79 // Represents what type of block a set of braces open. 80 enum BraceBlockKind { 81 BK_Unknown, 82 BK_Block, 83 BK_BracedInit 84 }; 85 86 // The packing kind of a function's parameters. 87 enum ParameterPackingKind { 88 PPK_BinPacked, 89 PPK_OnePerLine, 90 PPK_Inconclusive 91 }; 92 93 enum FormatDecision { 94 FD_Unformatted, 95 FD_Continue, 96 FD_Break 97 }; 98 99 class TokenRole; 100 class AnnotatedLine; 101 102 /// \brief A wrapper around a \c Token storing information about the 103 /// whitespace characters preceding it. 104 struct FormatToken { FormatTokenFormatToken105 FormatToken() 106 : NewlinesBefore(0), HasUnescapedNewline(false), LastNewlineOffset(0), 107 ColumnWidth(0), LastLineColumnWidth(0), IsMultiline(false), 108 IsFirst(false), MustBreakBefore(false), IsUnterminatedLiteral(false), 109 BlockKind(BK_Unknown), Type(TT_Unknown), SpacesRequiredBefore(0), 110 CanBreakBefore(false), ClosesTemplateDeclaration(false), 111 ParameterCount(0), BlockParameterCount(0), 112 PackingKind(PPK_Inconclusive), TotalLength(0), UnbreakableTailLength(0), 113 BindingStrength(0), NestingLevel(0), SplitPenalty(0), 114 LongestObjCSelectorName(0), FakeRParens(0), 115 StartsBinaryExpression(false), EndsBinaryExpression(false), 116 OperatorIndex(0), LastOperator(false), 117 PartOfMultiVariableDeclStmt(false), IsForEachMacro(false), 118 MatchingParen(nullptr), Previous(nullptr), Next(nullptr), 119 Decision(FD_Unformatted), Finalized(false) {} 120 121 /// \brief The \c Token. 122 Token Tok; 123 124 /// \brief The number of newlines immediately before the \c Token. 125 /// 126 /// This can be used to determine what the user wrote in the original code 127 /// and thereby e.g. leave an empty line between two function definitions. 128 unsigned NewlinesBefore; 129 130 /// \brief Whether there is at least one unescaped newline before the \c 131 /// Token. 132 bool HasUnescapedNewline; 133 134 /// \brief The range of the whitespace immediately preceding the \c Token. 135 SourceRange WhitespaceRange; 136 137 /// \brief The offset just past the last '\n' in this token's leading 138 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 139 unsigned LastNewlineOffset; 140 141 /// \brief The width of the non-whitespace parts of the token (or its first 142 /// line for multi-line tokens) in columns. 143 /// We need this to correctly measure number of columns a token spans. 144 unsigned ColumnWidth; 145 146 /// \brief Contains the width in columns of the last line of a multi-line 147 /// token. 148 unsigned LastLineColumnWidth; 149 150 /// \brief Whether the token text contains newlines (escaped or not). 151 bool IsMultiline; 152 153 /// \brief Indicates that this is the first token. 154 bool IsFirst; 155 156 /// \brief Whether there must be a line break before this token. 157 /// 158 /// This happens for example when a preprocessor directive ended directly 159 /// before the token. 160 bool MustBreakBefore; 161 162 /// \brief Returns actual token start location without leading escaped 163 /// newlines and whitespace. 164 /// 165 /// This can be different to Tok.getLocation(), which includes leading escaped 166 /// newlines. getStartOfNonWhitespaceFormatToken167 SourceLocation getStartOfNonWhitespace() const { 168 return WhitespaceRange.getEnd(); 169 } 170 171 /// \brief The raw text of the token. 172 /// 173 /// Contains the raw token text without leading whitespace and without leading 174 /// escaped newlines. 175 StringRef TokenText; 176 177 /// \brief Set to \c true if this token is an unterminated literal. 178 bool IsUnterminatedLiteral; 179 180 /// \brief Contains the kind of block if this token is a brace. 181 BraceBlockKind BlockKind; 182 183 TokenType Type; 184 185 /// \brief The number of spaces that should be inserted before this token. 186 unsigned SpacesRequiredBefore; 187 188 /// \brief \c true if it is allowed to break before this token. 189 bool CanBreakBefore; 190 191 bool ClosesTemplateDeclaration; 192 193 /// \brief Number of parameters, if this is "(", "[" or "<". 194 /// 195 /// This is initialized to 1 as we don't need to distinguish functions with 196 /// 0 parameters from functions with 1 parameter. Thus, we can simply count 197 /// the number of commas. 198 unsigned ParameterCount; 199 200 /// \brief Number of parameters that are nested blocks, 201 /// if this is "(", "[" or "<". 202 unsigned BlockParameterCount; 203 204 /// \brief A token can have a special role that can carry extra information 205 /// about the token's formatting. 206 std::unique_ptr<TokenRole> Role; 207 208 /// \brief If this is an opening parenthesis, how are the parameters packed? 209 ParameterPackingKind PackingKind; 210 211 /// \brief The total length of the unwrapped line up to and including this 212 /// token. 213 unsigned TotalLength; 214 215 /// \brief The original 0-based column of this token, including expanded tabs. 216 /// The configured TabWidth is used as tab width. 217 unsigned OriginalColumn; 218 219 /// \brief The length of following tokens until the next natural split point, 220 /// or the next token that can be broken. 221 unsigned UnbreakableTailLength; 222 223 // FIXME: Come up with a 'cleaner' concept. 224 /// \brief The binding strength of a token. This is a combined value of 225 /// operator precedence, parenthesis nesting, etc. 226 unsigned BindingStrength; 227 228 /// \brief The nesting level of this token, i.e. the number of surrounding (), 229 /// [], {} or <>. 230 unsigned NestingLevel; 231 232 /// \brief Penalty for inserting a line break before this token. 233 unsigned SplitPenalty; 234 235 /// \brief If this is the first ObjC selector name in an ObjC method 236 /// definition or call, this contains the length of the longest name. 237 /// 238 /// This being set to 0 means that the selectors should not be colon-aligned, 239 /// e.g. because several of them are block-type. 240 unsigned LongestObjCSelectorName; 241 242 /// \brief Stores the number of required fake parentheses and the 243 /// corresponding operator precedence. 244 /// 245 /// If multiple fake parentheses start at a token, this vector stores them in 246 /// reverse order, i.e. inner fake parenthesis first. 247 SmallVector<prec::Level, 4> FakeLParens; 248 /// \brief Insert this many fake ) after this token for correct indentation. 249 unsigned FakeRParens; 250 251 /// \brief \c true if this token starts a binary expression, i.e. has at least 252 /// one fake l_paren with a precedence greater than prec::Unknown. 253 bool StartsBinaryExpression; 254 /// \brief \c true if this token ends a binary expression. 255 bool EndsBinaryExpression; 256 257 /// \brief Is this is an operator (or "."/"->") in a sequence of operators 258 /// with the same precedence, contains the 0-based operator index. 259 unsigned OperatorIndex; 260 261 /// \brief Is this the last operator (or "."/"->") in a sequence of operators 262 /// with the same precedence? 263 bool LastOperator; 264 265 /// \brief Is this token part of a \c DeclStmt defining multiple variables? 266 /// 267 /// Only set if \c Type == \c TT_StartOfName. 268 bool PartOfMultiVariableDeclStmt; 269 270 /// \brief Is this a foreach macro? 271 bool IsForEachMacro; 272 isFormatToken273 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } isFormatToken274 bool is(TokenType TT) const { return Type == TT; } isFormatToken275 bool is(const IdentifierInfo *II) const { 276 return II && II == Tok.getIdentifierInfo(); 277 } isOneOfFormatToken278 template <typename A, typename B> bool isOneOf(A K1, B K2) const { 279 return is(K1) || is(K2); 280 } 281 template <typename A, typename B, typename C> isOneOfFormatToken282 bool isOneOf(A K1, B K2, C K3) const { 283 return is(K1) || is(K2) || is(K3); 284 } 285 template <typename A, typename B, typename C, typename D> isOneOfFormatToken286 bool isOneOf(A K1, B K2, C K3, D K4) const { 287 return is(K1) || is(K2) || is(K3) || is(K4); 288 } 289 template <typename A, typename B, typename C, typename D, typename E> isOneOfFormatToken290 bool isOneOf(A K1, B K2, C K3, D K4, E K5) const { 291 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5); 292 } 293 template <typename T> 294 bool isOneOf(T K1, T K2, T K3, T K4, T K5, T K6, T K7 = tok::NUM_TOKENS, 295 T K8 = tok::NUM_TOKENS, T K9 = tok::NUM_TOKENS, 296 T K10 = tok::NUM_TOKENS, T K11 = tok::NUM_TOKENS, 297 T K12 = tok::NUM_TOKENS) const { 298 return is(K1) || is(K2) || is(K3) || is(K4) || is(K5) || is(K6) || is(K7) || 299 is(K8) || is(K9) || is(K10) || is(K11) || is(K12); 300 } 301 isNotFormatToken302 template <typename T> bool isNot(T Kind) const { return !is(Kind); } 303 isStringLiteralFormatToken304 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 305 isObjCAtKeywordFormatToken306 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 307 return Tok.isObjCAtKeyword(Kind); 308 } 309 310 bool isAccessSpecifier(bool ColonRequired = true) const { 311 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) && 312 (!ColonRequired || (Next && Next->is(tok::colon))); 313 } 314 315 /// \brief Determine whether the token is a simple-type-specifier. 316 bool isSimpleTypeSpecifier() const; 317 isObjCAccessSpecifierFormatToken318 bool isObjCAccessSpecifier() const { 319 return is(tok::at) && Next && (Next->isObjCAtKeyword(tok::objc_public) || 320 Next->isObjCAtKeyword(tok::objc_protected) || 321 Next->isObjCAtKeyword(tok::objc_package) || 322 Next->isObjCAtKeyword(tok::objc_private)); 323 } 324 325 /// \brief Returns whether \p Tok is ([{ or a template opening <. opensScopeFormatToken326 bool opensScope() const { 327 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 328 TT_TemplateOpener); 329 } 330 /// \brief Returns whether \p Tok is )]} or a template closing >. closesScopeFormatToken331 bool closesScope() const { 332 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 333 TT_TemplateCloser); 334 } 335 336 /// \brief Returns \c true if this is a "." or "->" accessing a member. isMemberAccessFormatToken337 bool isMemberAccess() const { 338 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 339 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow); 340 } 341 isUnaryOperatorFormatToken342 bool isUnaryOperator() const { 343 switch (Tok.getKind()) { 344 case tok::plus: 345 case tok::plusplus: 346 case tok::minus: 347 case tok::minusminus: 348 case tok::exclaim: 349 case tok::tilde: 350 case tok::kw_sizeof: 351 case tok::kw_alignof: 352 return true; 353 default: 354 return false; 355 } 356 } 357 isBinaryOperatorFormatToken358 bool isBinaryOperator() const { 359 // Comma is a binary operator, but does not behave as such wrt. formatting. 360 return getPrecedence() > prec::Comma; 361 } 362 isTrailingCommentFormatToken363 bool isTrailingComment() const { 364 return is(tok::comment) && 365 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 366 } 367 368 /// \brief Returns \c true if this is a keyword that can be used 369 /// like a function call (e.g. sizeof, typeid, ...). isFunctionLikeKeywordFormatToken370 bool isFunctionLikeKeyword() const { 371 switch (Tok.getKind()) { 372 case tok::kw_throw: 373 case tok::kw_typeid: 374 case tok::kw_return: 375 case tok::kw_sizeof: 376 case tok::kw_alignof: 377 case tok::kw_alignas: 378 case tok::kw_decltype: 379 case tok::kw_noexcept: 380 case tok::kw_static_assert: 381 case tok::kw___attribute: 382 return true; 383 default: 384 return false; 385 } 386 } 387 getPrecedenceFormatToken388 prec::Level getPrecedence() const { 389 return getBinOpPrecedence(Tok.getKind(), true, true); 390 } 391 392 /// \brief Returns the previous token ignoring comments. getPreviousNonCommentFormatToken393 FormatToken *getPreviousNonComment() const { 394 FormatToken *Tok = Previous; 395 while (Tok && Tok->is(tok::comment)) 396 Tok = Tok->Previous; 397 return Tok; 398 } 399 400 /// \brief Returns the next token ignoring comments. getNextNonCommentFormatToken401 const FormatToken *getNextNonComment() const { 402 const FormatToken *Tok = Next; 403 while (Tok && Tok->is(tok::comment)) 404 Tok = Tok->Next; 405 return Tok; 406 } 407 408 /// \brief Returns \c true if this tokens starts a block-type list, i.e. a 409 /// list that should be indented with a block indent. opensBlockTypeListFormatToken410 bool opensBlockTypeList(const FormatStyle &Style) const { 411 return is(TT_ArrayInitializerLSquare) || 412 (is(tok::l_brace) && 413 (BlockKind == BK_Block || is(TT_DictLiteral) || 414 (!Style.Cpp11BracedListStyle && NestingLevel == 0))); 415 } 416 417 /// \brief Same as opensBlockTypeList, but for the closing token. closesBlockTypeListFormatToken418 bool closesBlockTypeList(const FormatStyle &Style) const { 419 return MatchingParen && MatchingParen->opensBlockTypeList(Style); 420 } 421 422 FormatToken *MatchingParen; 423 424 FormatToken *Previous; 425 FormatToken *Next; 426 427 SmallVector<AnnotatedLine *, 1> Children; 428 429 /// \brief Stores the formatting decision for the token once it was made. 430 FormatDecision Decision; 431 432 /// \brief If \c true, this token has been fully formatted (indented and 433 /// potentially re-formatted inside), and we do not allow further formatting 434 /// changes. 435 bool Finalized; 436 437 private: 438 // Disallow copying. 439 FormatToken(const FormatToken &) LLVM_DELETED_FUNCTION; 440 void operator=(const FormatToken &) LLVM_DELETED_FUNCTION; 441 }; 442 443 class ContinuationIndenter; 444 struct LineState; 445 446 class TokenRole { 447 public: TokenRole(const FormatStyle & Style)448 TokenRole(const FormatStyle &Style) : Style(Style) {} 449 virtual ~TokenRole(); 450 451 /// \brief After the \c TokenAnnotator has finished annotating all the tokens, 452 /// this function precomputes required information for formatting. 453 virtual void precomputeFormattingInfos(const FormatToken *Token); 454 455 /// \brief Apply the special formatting that the given role demands. 456 /// 457 /// Assumes that the token having this role is already formatted. 458 /// 459 /// Continues formatting from \p State leaving indentation to \p Indenter and 460 /// returns the total penalty that this formatting incurs. formatFromToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)461 virtual unsigned formatFromToken(LineState &State, 462 ContinuationIndenter *Indenter, 463 bool DryRun) { 464 return 0; 465 } 466 467 /// \brief Same as \c formatFromToken, but assumes that the first token has 468 /// already been set thereby deciding on the first line break. formatAfterToken(LineState & State,ContinuationIndenter * Indenter,bool DryRun)469 virtual unsigned formatAfterToken(LineState &State, 470 ContinuationIndenter *Indenter, 471 bool DryRun) { 472 return 0; 473 } 474 475 /// \brief Notifies the \c Role that a comma was found. CommaFound(const FormatToken * Token)476 virtual void CommaFound(const FormatToken *Token) {} 477 478 protected: 479 const FormatStyle &Style; 480 }; 481 482 class CommaSeparatedList : public TokenRole { 483 public: CommaSeparatedList(const FormatStyle & Style)484 CommaSeparatedList(const FormatStyle &Style) 485 : TokenRole(Style), HasNestedBracedList(false) {} 486 487 void precomputeFormattingInfos(const FormatToken *Token) override; 488 489 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 490 bool DryRun) override; 491 492 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 493 bool DryRun) override; 494 495 /// \brief Adds \p Token as the next comma to the \c CommaSeparated list. CommaFound(const FormatToken * Token)496 void CommaFound(const FormatToken *Token) override { 497 Commas.push_back(Token); 498 } 499 500 private: 501 /// \brief A struct that holds information on how to format a given list with 502 /// a specific number of columns. 503 struct ColumnFormat { 504 /// \brief The number of columns to use. 505 unsigned Columns; 506 507 /// \brief The total width in characters. 508 unsigned TotalWidth; 509 510 /// \brief The number of lines required for this format. 511 unsigned LineCount; 512 513 /// \brief The size of each column in characters. 514 SmallVector<unsigned, 8> ColumnSizes; 515 }; 516 517 /// \brief Calculate which \c ColumnFormat fits best into 518 /// \p RemainingCharacters. 519 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 520 521 /// \brief The ordered \c FormatTokens making up the commas of this list. 522 SmallVector<const FormatToken *, 8> Commas; 523 524 /// \brief The length of each of the list's items in characters including the 525 /// trailing comma. 526 SmallVector<unsigned, 8> ItemLengths; 527 528 /// \brief Precomputed formats that can be used for this list. 529 SmallVector<ColumnFormat, 4> Formats; 530 531 bool HasNestedBracedList; 532 }; 533 534 /// \brief Encapsulates keywords that are context sensitive or for languages not 535 /// properly supported by Clang's lexer. 536 struct AdditionalKeywords { AdditionalKeywordsAdditionalKeywords537 AdditionalKeywords(IdentifierTable &IdentTable) { 538 kw_in = &IdentTable.get("in"); 539 kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 540 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 541 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 542 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 543 544 kw_finally = &IdentTable.get("finally"); 545 kw_function = &IdentTable.get("function"); 546 kw_var = &IdentTable.get("var"); 547 548 kw_abstract = &IdentTable.get("abstract"); 549 kw_extends = &IdentTable.get("extends"); 550 kw_final = &IdentTable.get("final"); 551 kw_implements = &IdentTable.get("implements"); 552 kw_instanceof = &IdentTable.get("instanceof"); 553 kw_interface = &IdentTable.get("interface"); 554 kw_native = &IdentTable.get("native"); 555 kw_package = &IdentTable.get("package"); 556 kw_synchronized = &IdentTable.get("synchronized"); 557 kw_throws = &IdentTable.get("throws"); 558 559 kw_option = &IdentTable.get("option"); 560 kw_optional = &IdentTable.get("optional"); 561 kw_repeated = &IdentTable.get("repeated"); 562 kw_required = &IdentTable.get("required"); 563 kw_returns = &IdentTable.get("returns"); 564 } 565 566 // ObjC context sensitive keywords. 567 IdentifierInfo *kw_in; 568 IdentifierInfo *kw_CF_ENUM; 569 IdentifierInfo *kw_CF_OPTIONS; 570 IdentifierInfo *kw_NS_ENUM; 571 IdentifierInfo *kw_NS_OPTIONS; 572 573 // JavaScript keywords. 574 IdentifierInfo *kw_finally; 575 IdentifierInfo *kw_function; 576 IdentifierInfo *kw_var; 577 578 // Java keywords. 579 IdentifierInfo *kw_abstract; 580 IdentifierInfo *kw_extends; 581 IdentifierInfo *kw_final; 582 IdentifierInfo *kw_implements; 583 IdentifierInfo *kw_instanceof; 584 IdentifierInfo *kw_interface; 585 IdentifierInfo *kw_native; 586 IdentifierInfo *kw_package; 587 IdentifierInfo *kw_synchronized; 588 IdentifierInfo *kw_throws; 589 590 // Proto keywords. 591 IdentifierInfo *kw_option; 592 IdentifierInfo *kw_optional; 593 IdentifierInfo *kw_repeated; 594 IdentifierInfo *kw_required; 595 IdentifierInfo *kw_returns; 596 }; 597 598 } // namespace format 599 } // namespace clang 600 601 #endif 602