1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file contains the declaration of the FormatToken, a wrapper 11 /// around Token with additional information related to formatting. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H 17 18 #include "clang/Basic/IdentifierTable.h" 19 #include "clang/Basic/OperatorPrecedence.h" 20 #include "clang/Format/Format.h" 21 #include "clang/Lex/Lexer.h" 22 #include <unordered_set> 23 24 namespace clang { 25 namespace format { 26 27 #define LIST_TOKEN_TYPES \ 28 TYPE(AfterPPDirective) \ 29 TYPE(ArrayInitializerLSquare) \ 30 TYPE(ArraySubscriptLSquare) \ 31 TYPE(AttributeColon) \ 32 TYPE(AttributeLParen) \ 33 TYPE(AttributeMacro) \ 34 TYPE(AttributeRParen) \ 35 TYPE(AttributeSquare) \ 36 TYPE(BinaryOperator) \ 37 TYPE(BitFieldColon) \ 38 TYPE(BlockComment) \ 39 /* l_brace of a block that is not the body of a (e.g. loop) statement. */ \ 40 TYPE(BlockLBrace) \ 41 TYPE(BracedListLBrace) \ 42 TYPE(CaseLabelArrow) \ 43 /* The colon at the end of a case label. */ \ 44 TYPE(CaseLabelColon) \ 45 TYPE(CastRParen) \ 46 TYPE(ClassLBrace) \ 47 TYPE(ClassRBrace) \ 48 TYPE(CompoundRequirementLBrace) \ 49 /* ternary ?: expression */ \ 50 TYPE(ConditionalExpr) \ 51 /* the condition in an if statement */ \ 52 TYPE(ConditionLParen) \ 53 TYPE(ConflictAlternative) \ 54 TYPE(ConflictEnd) \ 55 TYPE(ConflictStart) \ 56 /* l_brace of if/for/while */ \ 57 TYPE(ControlStatementLBrace) \ 58 TYPE(ControlStatementRBrace) \ 59 TYPE(CppCastLParen) \ 60 TYPE(CSharpGenericTypeConstraint) \ 61 TYPE(CSharpGenericTypeConstraintColon) \ 62 TYPE(CSharpGenericTypeConstraintComma) \ 63 TYPE(CSharpNamedArgumentColon) \ 64 TYPE(CSharpNullable) \ 65 TYPE(CSharpNullConditionalLSquare) \ 66 TYPE(CSharpStringLiteral) \ 67 TYPE(CtorInitializerColon) \ 68 TYPE(CtorInitializerComma) \ 69 TYPE(CtorDtorDeclName) \ 70 TYPE(DesignatedInitializerLSquare) \ 71 TYPE(DesignatedInitializerPeriod) \ 72 TYPE(DictLiteral) \ 73 TYPE(DoWhile) \ 74 TYPE(ElseLBrace) \ 75 TYPE(ElseRBrace) \ 76 TYPE(EnumLBrace) \ 77 TYPE(EnumRBrace) \ 78 TYPE(FatArrow) \ 79 TYPE(ForEachMacro) \ 80 TYPE(FunctionAnnotationRParen) \ 81 TYPE(FunctionDeclarationName) \ 82 TYPE(FunctionDeclarationLParen) \ 83 TYPE(FunctionLBrace) \ 84 TYPE(FunctionLikeOrFreestandingMacro) \ 85 TYPE(FunctionTypeLParen) \ 86 /* The colons as part of a C11 _Generic selection */ \ 87 TYPE(GenericSelectionColon) \ 88 /* The colon at the end of a goto label. */ \ 89 TYPE(GotoLabelColon) \ 90 TYPE(IfMacro) \ 91 TYPE(ImplicitStringLiteral) \ 92 TYPE(InheritanceColon) \ 93 TYPE(InheritanceComma) \ 94 TYPE(InlineASMBrace) \ 95 TYPE(InlineASMColon) \ 96 TYPE(InlineASMSymbolicNameLSquare) \ 97 TYPE(JavaAnnotation) \ 98 TYPE(JsAndAndEqual) \ 99 TYPE(JsComputedPropertyName) \ 100 TYPE(JsExponentiation) \ 101 TYPE(JsExponentiationEqual) \ 102 TYPE(JsPipePipeEqual) \ 103 TYPE(JsPrivateIdentifier) \ 104 TYPE(JsTypeColon) \ 105 TYPE(JsTypeOperator) \ 106 TYPE(JsTypeOptionalQuestion) \ 107 TYPE(LambdaArrow) \ 108 TYPE(LambdaDefinitionLParen) \ 109 TYPE(LambdaLBrace) \ 110 TYPE(LambdaLSquare) \ 111 TYPE(LeadingJavaAnnotation) \ 112 TYPE(LineComment) \ 113 TYPE(MacroBlockBegin) \ 114 TYPE(MacroBlockEnd) \ 115 TYPE(ModulePartitionColon) \ 116 TYPE(NamespaceLBrace) \ 117 TYPE(NamespaceMacro) \ 118 TYPE(NamespaceRBrace) \ 119 TYPE(NonNullAssertion) \ 120 TYPE(NullCoalescingEqual) \ 121 TYPE(NullCoalescingOperator) \ 122 TYPE(NullPropagatingOperator) \ 123 TYPE(ObjCBlockLBrace) \ 124 TYPE(ObjCBlockLParen) \ 125 TYPE(ObjCDecl) \ 126 TYPE(ObjCForIn) \ 127 TYPE(ObjCMethodExpr) \ 128 TYPE(ObjCMethodSpecifier) \ 129 TYPE(ObjCProperty) \ 130 TYPE(ObjCStringLiteral) \ 131 TYPE(OverloadedOperator) \ 132 TYPE(OverloadedOperatorLParen) \ 133 TYPE(PointerOrReference) \ 134 TYPE(ProtoExtensionLSquare) \ 135 TYPE(PureVirtualSpecifier) \ 136 TYPE(RangeBasedForLoopColon) \ 137 TYPE(RecordLBrace) \ 138 TYPE(RecordRBrace) \ 139 TYPE(RegexLiteral) \ 140 TYPE(RequiresClause) \ 141 TYPE(RequiresClauseInARequiresExpression) \ 142 TYPE(RequiresExpression) \ 143 TYPE(RequiresExpressionLBrace) \ 144 TYPE(RequiresExpressionLParen) \ 145 TYPE(SelectorName) \ 146 TYPE(StartOfName) \ 147 TYPE(StatementAttributeLikeMacro) \ 148 TYPE(StatementMacro) \ 149 /* A string that is part of a string concatenation. For C#, JavaScript, and \ 150 * Java, it is used for marking whether a string needs parentheses around it \ 151 * if it is to be split into parts joined by `+`. For Verilog, whether \ 152 * braces need to be added to split it. Not used for other languages. */ \ 153 TYPE(StringInConcatenation) \ 154 TYPE(StructLBrace) \ 155 TYPE(StructRBrace) \ 156 TYPE(StructuredBindingLSquare) \ 157 TYPE(SwitchExpressionLabel) \ 158 TYPE(SwitchExpressionLBrace) \ 159 TYPE(TableGenBangOperator) \ 160 TYPE(TableGenCondOperator) \ 161 TYPE(TableGenCondOperatorColon) \ 162 TYPE(TableGenCondOperatorComma) \ 163 TYPE(TableGenDAGArgCloser) \ 164 TYPE(TableGenDAGArgListColon) \ 165 TYPE(TableGenDAGArgListColonToAlign) \ 166 TYPE(TableGenDAGArgListComma) \ 167 TYPE(TableGenDAGArgListCommaToBreak) \ 168 TYPE(TableGenDAGArgOpener) \ 169 TYPE(TableGenDAGArgOpenerToBreak) \ 170 TYPE(TableGenDAGArgOperatorID) \ 171 TYPE(TableGenDAGArgOperatorToBreak) \ 172 TYPE(TableGenListCloser) \ 173 TYPE(TableGenListOpener) \ 174 TYPE(TableGenMultiLineString) \ 175 TYPE(TableGenTrailingPasteOperator) \ 176 TYPE(TableGenValueSuffix) \ 177 TYPE(TemplateCloser) \ 178 TYPE(TemplateOpener) \ 179 TYPE(TemplateString) \ 180 TYPE(TrailingAnnotation) \ 181 TYPE(TrailingReturnArrow) \ 182 TYPE(TrailingUnaryOperator) \ 183 TYPE(TypeDeclarationParen) \ 184 TYPE(TemplateName) \ 185 TYPE(TypeName) \ 186 TYPE(TypenameMacro) \ 187 TYPE(UnaryOperator) \ 188 TYPE(UnionLBrace) \ 189 TYPE(UnionRBrace) \ 190 TYPE(UntouchableMacroFunc) \ 191 TYPE(VariableTemplate) \ 192 /* Like in 'assign x = 0, y = 1;' . */ \ 193 TYPE(VerilogAssignComma) \ 194 /* like in begin : block */ \ 195 TYPE(VerilogBlockLabelColon) \ 196 /* The square bracket for the dimension part of the type name. \ 197 * In 'logic [1:0] x[1:0]', only the first '['. This way we can have space \ 198 * before the first bracket but not the second. */ \ 199 TYPE(VerilogDimensionedTypeName) \ 200 /* list of port connections or parameters in a module instantiation */ \ 201 TYPE(VerilogInstancePortComma) \ 202 TYPE(VerilogInstancePortLParen) \ 203 /* A parenthesized list within which line breaks are inserted by the \ 204 * formatter, for example the list of ports in a module header. */ \ 205 TYPE(VerilogMultiLineListLParen) \ 206 /* for the base in a number literal, not including the quote */ \ 207 TYPE(VerilogNumberBase) \ 208 /* like `(strong1, pull0)` */ \ 209 TYPE(VerilogStrength) \ 210 /* Things inside the table in user-defined primitives. */ \ 211 TYPE(VerilogTableItem) \ 212 /* those that separate ports of different types */ \ 213 TYPE(VerilogTypeComma) \ 214 TYPE(Unknown) 215 216 /// Determines the semantic type of a syntactic token, e.g. whether "<" is a 217 /// template opener or binary operator. 218 enum TokenType : uint8_t { 219 #define TYPE(X) TT_##X, 220 LIST_TOKEN_TYPES 221 #undef TYPE 222 NUM_TOKEN_TYPES 223 }; 224 225 /// Determines the name of a token type. 226 const char *getTokenTypeName(TokenType Type); 227 228 // Represents what type of block a set of braces open. 229 enum BraceBlockKind { BK_Unknown, BK_Block, BK_BracedInit }; 230 231 // The packing kind of a function's parameters. 232 enum ParameterPackingKind { PPK_BinPacked, PPK_OnePerLine, PPK_Inconclusive }; 233 234 enum FormatDecision { FD_Unformatted, FD_Continue, FD_Break }; 235 236 /// Roles a token can take in a configured macro expansion. 237 enum MacroRole { 238 /// The token was expanded from a macro argument when formatting the expanded 239 /// token sequence. 240 MR_ExpandedArg, 241 /// The token is part of a macro argument that was previously formatted as 242 /// expansion when formatting the unexpanded macro call. 243 MR_UnexpandedArg, 244 /// The token was expanded from a macro definition, and is not visible as part 245 /// of the macro call. 246 MR_Hidden, 247 }; 248 249 struct FormatToken; 250 251 /// Contains information on the token's role in a macro expansion. 252 /// 253 /// Given the following definitions: 254 /// A(X) = [ X ] 255 /// B(X) = < X > 256 /// C(X) = X 257 /// 258 /// Consider the macro call: 259 /// A({B(C(C(x)))}) -> [{<x>}] 260 /// 261 /// In this case, the tokens of the unexpanded macro call will have the 262 /// following relevant entries in their macro context (note that formatting 263 /// the unexpanded macro call happens *after* formatting the expanded macro 264 /// call): 265 /// A( { B( C( C(x) ) ) } ) 266 /// Role: NN U NN NN NNUN N N U N (N=None, U=UnexpandedArg) 267 /// 268 /// [ { < x > } ] 269 /// Role: H E H E H E H (H=Hidden, E=ExpandedArg) 270 /// ExpandedFrom[0]: A A A A A A A 271 /// ExpandedFrom[1]: B B B 272 /// ExpandedFrom[2]: C 273 /// ExpandedFrom[3]: C 274 /// StartOfExpansion: 1 0 1 2 0 0 0 275 /// EndOfExpansion: 0 0 0 2 1 0 1 276 struct MacroExpansion { 277 MacroExpansion(MacroRole Role) : Role(Role) {} 278 279 /// The token's role in the macro expansion. 280 /// When formatting an expanded macro, all tokens that are part of macro 281 /// arguments will be MR_ExpandedArg, while all tokens that are not visible in 282 /// the macro call will be MR_Hidden. 283 /// When formatting an unexpanded macro call, all tokens that are part of 284 /// macro arguments will be MR_UnexpandedArg. 285 MacroRole Role; 286 287 /// The stack of macro call identifier tokens this token was expanded from. 288 llvm::SmallVector<FormatToken *, 1> ExpandedFrom; 289 290 /// The number of expansions of which this macro is the first entry. 291 unsigned StartOfExpansion = 0; 292 293 /// The number of currently open expansions in \c ExpandedFrom this macro is 294 /// the last token in. 295 unsigned EndOfExpansion = 0; 296 }; 297 298 class TokenRole; 299 class AnnotatedLine; 300 301 /// A wrapper around a \c Token storing information about the 302 /// whitespace characters preceding it. 303 struct FormatToken { 304 FormatToken() 305 : HasUnescapedNewline(false), IsMultiline(false), IsFirst(false), 306 MustBreakBefore(false), MustBreakBeforeFinalized(false), 307 IsUnterminatedLiteral(false), CanBreakBefore(false), 308 ClosesTemplateDeclaration(false), StartsBinaryExpression(false), 309 EndsBinaryExpression(false), PartOfMultiVariableDeclStmt(false), 310 ContinuesLineCommentSection(false), Finalized(false), 311 ClosesRequiresClause(false), EndsCppAttributeGroup(false), 312 BlockKind(BK_Unknown), Decision(FD_Unformatted), 313 PackingKind(PPK_Inconclusive), TypeIsFinalized(false), 314 Type(TT_Unknown) {} 315 316 /// The \c Token. 317 Token Tok; 318 319 /// The raw text of the token. 320 /// 321 /// Contains the raw token text without leading whitespace and without leading 322 /// escaped newlines. 323 StringRef TokenText; 324 325 /// A token can have a special role that can carry extra information 326 /// about the token's formatting. 327 /// FIXME: Make FormatToken for parsing and AnnotatedToken two different 328 /// classes and make this a unique_ptr in the AnnotatedToken class. 329 std::shared_ptr<TokenRole> Role; 330 331 /// The range of the whitespace immediately preceding the \c Token. 332 SourceRange WhitespaceRange; 333 334 /// Whether there is at least one unescaped newline before the \c 335 /// Token. 336 unsigned HasUnescapedNewline : 1; 337 338 /// Whether the token text contains newlines (escaped or not). 339 unsigned IsMultiline : 1; 340 341 /// Indicates that this is the first token of the file. 342 unsigned IsFirst : 1; 343 344 /// Whether there must be a line break before this token. 345 /// 346 /// This happens for example when a preprocessor directive ended directly 347 /// before the token. 348 unsigned MustBreakBefore : 1; 349 350 /// Whether MustBreakBefore is finalized during parsing and must not 351 /// be reset between runs. 352 unsigned MustBreakBeforeFinalized : 1; 353 354 /// Set to \c true if this token is an unterminated literal. 355 unsigned IsUnterminatedLiteral : 1; 356 357 /// \c true if it is allowed to break before this token. 358 unsigned CanBreakBefore : 1; 359 360 /// \c true if this is the ">" of "template<..>". 361 unsigned ClosesTemplateDeclaration : 1; 362 363 /// \c true if this token starts a binary expression, i.e. has at least 364 /// one fake l_paren with a precedence greater than prec::Unknown. 365 unsigned StartsBinaryExpression : 1; 366 /// \c true if this token ends a binary expression. 367 unsigned EndsBinaryExpression : 1; 368 369 /// Is this token part of a \c DeclStmt defining multiple variables? 370 /// 371 /// Only set if \c Type == \c TT_StartOfName. 372 unsigned PartOfMultiVariableDeclStmt : 1; 373 374 /// Does this line comment continue a line comment section? 375 /// 376 /// Only set to true if \c Type == \c TT_LineComment. 377 unsigned ContinuesLineCommentSection : 1; 378 379 /// If \c true, this token has been fully formatted (indented and 380 /// potentially re-formatted inside), and we do not allow further formatting 381 /// changes. 382 unsigned Finalized : 1; 383 384 /// \c true if this is the last token within requires clause. 385 unsigned ClosesRequiresClause : 1; 386 387 /// \c true if this token ends a group of C++ attributes. 388 unsigned EndsCppAttributeGroup : 1; 389 390 private: 391 /// Contains the kind of block if this token is a brace. 392 unsigned BlockKind : 2; 393 394 public: 395 BraceBlockKind getBlockKind() const { 396 return static_cast<BraceBlockKind>(BlockKind); 397 } 398 void setBlockKind(BraceBlockKind BBK) { 399 BlockKind = BBK; 400 assert(getBlockKind() == BBK && "BraceBlockKind overflow!"); 401 } 402 403 private: 404 /// Stores the formatting decision for the token once it was made. 405 unsigned Decision : 2; 406 407 public: 408 FormatDecision getDecision() const { 409 return static_cast<FormatDecision>(Decision); 410 } 411 void setDecision(FormatDecision D) { 412 Decision = D; 413 assert(getDecision() == D && "FormatDecision overflow!"); 414 } 415 416 private: 417 /// If this is an opening parenthesis, how are the parameters packed? 418 unsigned PackingKind : 2; 419 420 public: 421 ParameterPackingKind getPackingKind() const { 422 return static_cast<ParameterPackingKind>(PackingKind); 423 } 424 void setPackingKind(ParameterPackingKind K) { 425 PackingKind = K; 426 assert(getPackingKind() == K && "ParameterPackingKind overflow!"); 427 } 428 429 private: 430 unsigned TypeIsFinalized : 1; 431 TokenType Type; 432 433 public: 434 /// Returns the token's type, e.g. whether "<" is a template opener or 435 /// binary operator. 436 TokenType getType() const { return Type; } 437 void setType(TokenType T) { 438 // If this token is a macro argument while formatting an unexpanded macro 439 // call, we do not change its type any more - the type was deduced from 440 // formatting the expanded macro stream already. 441 if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) 442 return; 443 assert((!TypeIsFinalized || T == Type) && 444 "Please use overwriteFixedType to change a fixed type."); 445 Type = T; 446 } 447 /// Sets the type and also the finalized flag. This prevents the type to be 448 /// reset in TokenAnnotator::resetTokenMetadata(). If the type needs to be set 449 /// to another one please use overwriteFixedType, or even better remove the 450 /// need to reassign the type. 451 void setFinalizedType(TokenType T) { 452 if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) 453 return; 454 Type = T; 455 TypeIsFinalized = true; 456 } 457 void overwriteFixedType(TokenType T) { 458 if (MacroCtx && MacroCtx->Role == MR_UnexpandedArg) 459 return; 460 TypeIsFinalized = false; 461 setType(T); 462 } 463 bool isTypeFinalized() const { return TypeIsFinalized; } 464 465 /// Used to set an operator precedence explicitly. 466 prec::Level ForcedPrecedence = prec::Unknown; 467 468 /// The number of newlines immediately before the \c Token. 469 /// 470 /// This can be used to determine what the user wrote in the original code 471 /// and thereby e.g. leave an empty line between two function definitions. 472 unsigned NewlinesBefore = 0; 473 474 /// The number of newlines immediately before the \c Token after formatting. 475 /// 476 /// This is used to avoid overlapping whitespace replacements when \c Newlines 477 /// is recomputed for a finalized preprocessor branching directive. 478 int Newlines = -1; 479 480 /// The offset just past the last '\n' in this token's leading 481 /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'. 482 unsigned LastNewlineOffset = 0; 483 484 /// The width of the non-whitespace parts of the token (or its first 485 /// line for multi-line tokens) in columns. 486 /// We need this to correctly measure number of columns a token spans. 487 unsigned ColumnWidth = 0; 488 489 /// Contains the width in columns of the last line of a multi-line 490 /// token. 491 unsigned LastLineColumnWidth = 0; 492 493 /// The number of spaces that should be inserted before this token. 494 unsigned SpacesRequiredBefore = 0; 495 496 /// Number of parameters, if this is "(", "[" or "<". 497 unsigned ParameterCount = 0; 498 499 /// Number of parameters that are nested blocks, 500 /// if this is "(", "[" or "<". 501 unsigned BlockParameterCount = 0; 502 503 /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of 504 /// the surrounding bracket. 505 tok::TokenKind ParentBracket = tok::unknown; 506 507 /// The total length of the unwrapped line up to and including this 508 /// token. 509 unsigned TotalLength = 0; 510 511 /// The original 0-based column of this token, including expanded tabs. 512 /// The configured TabWidth is used as tab width. 513 unsigned OriginalColumn = 0; 514 515 /// The length of following tokens until the next natural split point, 516 /// or the next token that can be broken. 517 unsigned UnbreakableTailLength = 0; 518 519 // FIXME: Come up with a 'cleaner' concept. 520 /// The binding strength of a token. This is a combined value of 521 /// operator precedence, parenthesis nesting, etc. 522 unsigned BindingStrength = 0; 523 524 /// The nesting level of this token, i.e. the number of surrounding (), 525 /// [], {} or <>. 526 unsigned NestingLevel = 0; 527 528 /// The indent level of this token. Copied from the surrounding line. 529 unsigned IndentLevel = 0; 530 531 /// Penalty for inserting a line break before this token. 532 unsigned SplitPenalty = 0; 533 534 /// If this is the first ObjC selector name in an ObjC method 535 /// definition or call, this contains the length of the longest name. 536 /// 537 /// This being set to 0 means that the selectors should not be colon-aligned, 538 /// e.g. because several of them are block-type. 539 unsigned LongestObjCSelectorName = 0; 540 541 /// If this is the first ObjC selector name in an ObjC method 542 /// definition or call, this contains the number of parts that the whole 543 /// selector consist of. 544 unsigned ObjCSelectorNameParts = 0; 545 546 /// The 0-based index of the parameter/argument. For ObjC it is set 547 /// for the selector name token. 548 /// For now calculated only for ObjC. 549 unsigned ParameterIndex = 0; 550 551 /// Stores the number of required fake parentheses and the 552 /// corresponding operator precedence. 553 /// 554 /// If multiple fake parentheses start at a token, this vector stores them in 555 /// reverse order, i.e. inner fake parenthesis first. 556 SmallVector<prec::Level, 4> FakeLParens; 557 /// Insert this many fake ) after this token for correct indentation. 558 unsigned FakeRParens = 0; 559 560 /// If this is an operator (or "."/"->") in a sequence of operators 561 /// with the same precedence, contains the 0-based operator index. 562 unsigned OperatorIndex = 0; 563 564 /// If this is an operator (or "."/"->") in a sequence of operators 565 /// with the same precedence, points to the next operator. 566 FormatToken *NextOperator = nullptr; 567 568 /// If this is a bracket, this points to the matching one. 569 FormatToken *MatchingParen = nullptr; 570 571 /// The previous token in the unwrapped line. 572 FormatToken *Previous = nullptr; 573 574 /// The next token in the unwrapped line. 575 FormatToken *Next = nullptr; 576 577 /// The first token in set of column elements. 578 bool StartsColumn = false; 579 580 /// This notes the start of the line of an array initializer. 581 bool ArrayInitializerLineStart = false; 582 583 /// This starts an array initializer. 584 bool IsArrayInitializer = false; 585 586 /// Is optional and can be removed. 587 bool Optional = false; 588 589 /// Might be function declaration open/closing paren. 590 bool MightBeFunctionDeclParen = false; 591 592 /// Has "\n\f\n" or "\n\f\r\n" before TokenText. 593 bool HasFormFeedBefore = false; 594 595 /// Number of optional braces to be inserted after this token: 596 /// -1: a single left brace 597 /// 0: no braces 598 /// >0: number of right braces 599 int8_t BraceCount = 0; 600 601 /// If this token starts a block, this contains all the unwrapped lines 602 /// in it. 603 SmallVector<AnnotatedLine *, 1> Children; 604 605 // Contains all attributes related to how this token takes part 606 // in a configured macro expansion. 607 std::optional<MacroExpansion> MacroCtx; 608 609 /// When macro expansion introduces nodes with children, those are marked as 610 /// \c MacroParent. 611 /// FIXME: The formatting code currently hard-codes the assumption that 612 /// child nodes are introduced by blocks following an opening brace. 613 /// This is deeply baked into the code and disentangling this will require 614 /// signficant refactorings. \c MacroParent allows us to special-case the 615 /// cases in which we treat parents as block-openers for now. 616 bool MacroParent = false; 617 618 bool is(tok::TokenKind Kind) const { return Tok.is(Kind); } 619 bool is(TokenType TT) const { return getType() == TT; } 620 bool is(const IdentifierInfo *II) const { 621 return II && II == Tok.getIdentifierInfo(); 622 } 623 bool is(tok::PPKeywordKind Kind) const { 624 return Tok.getIdentifierInfo() && 625 Tok.getIdentifierInfo()->getPPKeywordID() == Kind; 626 } 627 bool is(BraceBlockKind BBK) const { return getBlockKind() == BBK; } 628 bool is(ParameterPackingKind PPK) const { return getPackingKind() == PPK; } 629 630 template <typename A, typename B> bool isOneOf(A K1, B K2) const { 631 return is(K1) || is(K2); 632 } 633 template <typename A, typename B, typename... Ts> 634 bool isOneOf(A K1, B K2, Ts... Ks) const { 635 return is(K1) || isOneOf(K2, Ks...); 636 } 637 template <typename T> bool isNot(T Kind) const { return !is(Kind); } 638 639 bool isIf(bool AllowConstexprMacro = true) const { 640 return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) || 641 (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro); 642 } 643 644 bool closesScopeAfterBlock() const { 645 if (getBlockKind() == BK_Block) 646 return true; 647 if (closesScope()) 648 return Previous->closesScopeAfterBlock(); 649 return false; 650 } 651 652 /// \c true if this token starts a sequence with the given tokens in order, 653 /// following the ``Next`` pointers, ignoring comments. 654 template <typename A, typename... Ts> 655 bool startsSequence(A K1, Ts... Tokens) const { 656 return startsSequenceInternal(K1, Tokens...); 657 } 658 659 /// \c true if this token ends a sequence with the given tokens in order, 660 /// following the ``Previous`` pointers, ignoring comments. 661 /// For example, given tokens [T1, T2, T3], the function returns true if 662 /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other 663 /// words, the tokens passed to this function need to the reverse of the 664 /// order the tokens appear in code. 665 template <typename A, typename... Ts> 666 bool endsSequence(A K1, Ts... Tokens) const { 667 return endsSequenceInternal(K1, Tokens...); 668 } 669 670 bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); } 671 672 bool isAttribute() const { 673 return isOneOf(tok::kw___attribute, tok::kw___declspec, TT_AttributeMacro); 674 } 675 676 bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const { 677 return Tok.isObjCAtKeyword(Kind); 678 } 679 680 bool isAccessSpecifierKeyword() const { 681 return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private); 682 } 683 684 bool isAccessSpecifier(bool ColonRequired = true) const { 685 if (!isAccessSpecifierKeyword()) 686 return false; 687 if (!ColonRequired) 688 return true; 689 const auto *NextNonComment = getNextNonComment(); 690 return NextNonComment && NextNonComment->is(tok::colon); 691 } 692 693 bool canBePointerOrReferenceQualifier() const { 694 return isOneOf(tok::kw_const, tok::kw_restrict, tok::kw_volatile, 695 tok::kw__Nonnull, tok::kw__Nullable, 696 tok::kw__Null_unspecified, tok::kw___ptr32, tok::kw___ptr64, 697 tok::kw___funcref) || 698 isAttribute(); 699 } 700 701 [[nodiscard]] bool isTypeName(const LangOptions &LangOpts) const; 702 [[nodiscard]] bool isTypeOrIdentifier(const LangOptions &LangOpts) const; 703 704 bool isObjCAccessSpecifier() const { 705 return is(tok::at) && Next && 706 (Next->isObjCAtKeyword(tok::objc_public) || 707 Next->isObjCAtKeyword(tok::objc_protected) || 708 Next->isObjCAtKeyword(tok::objc_package) || 709 Next->isObjCAtKeyword(tok::objc_private)); 710 } 711 712 /// Returns whether \p Tok is ([{ or an opening < of a template or in 713 /// protos. 714 bool opensScope() const { 715 if (is(TT_TemplateString) && TokenText.ends_with("${")) 716 return true; 717 if (is(TT_DictLiteral) && is(tok::less)) 718 return true; 719 return isOneOf(tok::l_paren, tok::l_brace, tok::l_square, 720 TT_TemplateOpener); 721 } 722 /// Returns whether \p Tok is )]} or a closing > of a template or in 723 /// protos. 724 bool closesScope() const { 725 if (is(TT_TemplateString) && TokenText.starts_with("}")) 726 return true; 727 if (is(TT_DictLiteral) && is(tok::greater)) 728 return true; 729 return isOneOf(tok::r_paren, tok::r_brace, tok::r_square, 730 TT_TemplateCloser); 731 } 732 733 /// Returns \c true if this is a "." or "->" accessing a member. 734 bool isMemberAccess() const { 735 return isOneOf(tok::arrow, tok::period, tok::arrowstar) && 736 !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow, 737 TT_LambdaArrow, TT_LeadingJavaAnnotation); 738 } 739 740 bool isPointerOrReference() const { 741 return isOneOf(tok::star, tok::amp, tok::ampamp); 742 } 743 744 bool isCppAlternativeOperatorKeyword() const { 745 assert(!TokenText.empty()); 746 if (!isalpha(TokenText[0])) 747 return false; 748 749 switch (Tok.getKind()) { 750 case tok::ampamp: 751 case tok::ampequal: 752 case tok::amp: 753 case tok::pipe: 754 case tok::tilde: 755 case tok::exclaim: 756 case tok::exclaimequal: 757 case tok::pipepipe: 758 case tok::pipeequal: 759 case tok::caret: 760 case tok::caretequal: 761 return true; 762 default: 763 return false; 764 } 765 } 766 767 bool isUnaryOperator() const { 768 switch (Tok.getKind()) { 769 case tok::plus: 770 case tok::plusplus: 771 case tok::minus: 772 case tok::minusminus: 773 case tok::exclaim: 774 case tok::tilde: 775 case tok::kw_sizeof: 776 case tok::kw_alignof: 777 return true; 778 default: 779 return false; 780 } 781 } 782 783 bool isBinaryOperator() const { 784 // Comma is a binary operator, but does not behave as such wrt. formatting. 785 return getPrecedence() > prec::Comma; 786 } 787 788 bool isTrailingComment() const { 789 return is(tok::comment) && 790 (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0); 791 } 792 793 /// Returns \c true if this is a keyword that can be used 794 /// like a function call (e.g. sizeof, typeid, ...). 795 bool isFunctionLikeKeyword() const { 796 if (isAttribute()) 797 return true; 798 799 return isOneOf(tok::kw_throw, tok::kw_typeid, tok::kw_return, 800 tok::kw_sizeof, tok::kw_alignof, tok::kw_alignas, 801 tok::kw_decltype, tok::kw_noexcept, tok::kw_static_assert, 802 tok::kw__Atomic, 803 #define TRANSFORM_TYPE_TRAIT_DEF(_, Trait) tok::kw___##Trait, 804 #include "clang/Basic/TransformTypeTraits.def" 805 tok::kw_requires); 806 } 807 808 /// Returns \c true if this is a string literal that's like a label, 809 /// e.g. ends with "=" or ":". 810 bool isLabelString() const { 811 if (isNot(tok::string_literal)) 812 return false; 813 StringRef Content = TokenText; 814 if (Content.starts_with("\"") || Content.starts_with("'")) 815 Content = Content.drop_front(1); 816 if (Content.ends_with("\"") || Content.ends_with("'")) 817 Content = Content.drop_back(1); 818 Content = Content.trim(); 819 return Content.size() > 1 && 820 (Content.back() == ':' || Content.back() == '='); 821 } 822 823 /// Returns actual token start location without leading escaped 824 /// newlines and whitespace. 825 /// 826 /// This can be different to Tok.getLocation(), which includes leading escaped 827 /// newlines. 828 SourceLocation getStartOfNonWhitespace() const { 829 return WhitespaceRange.getEnd(); 830 } 831 832 /// Returns \c true if the range of whitespace immediately preceding the \c 833 /// Token is not empty. 834 bool hasWhitespaceBefore() const { 835 return WhitespaceRange.getBegin() != WhitespaceRange.getEnd(); 836 } 837 838 prec::Level getPrecedence() const { 839 if (ForcedPrecedence != prec::Unknown) 840 return ForcedPrecedence; 841 return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true, 842 /*CPlusPlus11=*/true); 843 } 844 845 /// Returns the previous token ignoring comments. 846 [[nodiscard]] FormatToken *getPreviousNonComment() const { 847 FormatToken *Tok = Previous; 848 while (Tok && Tok->is(tok::comment)) 849 Tok = Tok->Previous; 850 return Tok; 851 } 852 853 /// Returns the next token ignoring comments. 854 [[nodiscard]] FormatToken *getNextNonComment() const { 855 FormatToken *Tok = Next; 856 while (Tok && Tok->is(tok::comment)) 857 Tok = Tok->Next; 858 return Tok; 859 } 860 861 /// Returns \c true if this token ends a block indented initializer list. 862 [[nodiscard]] bool isBlockIndentedInitRBrace(const FormatStyle &Style) const; 863 864 /// Returns \c true if this tokens starts a block-type list, i.e. a 865 /// list that should be indented with a block indent. 866 [[nodiscard]] bool opensBlockOrBlockTypeList(const FormatStyle &Style) const; 867 868 /// Returns whether the token is the left square bracket of a C++ 869 /// structured binding declaration. 870 bool isCppStructuredBinding(bool IsCpp) const { 871 if (!IsCpp || isNot(tok::l_square)) 872 return false; 873 const FormatToken *T = this; 874 do { 875 T = T->getPreviousNonComment(); 876 } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp, 877 tok::ampamp)); 878 return T && T->is(tok::kw_auto); 879 } 880 881 /// Same as opensBlockOrBlockTypeList, but for the closing token. 882 bool closesBlockOrBlockTypeList(const FormatStyle &Style) const { 883 if (is(TT_TemplateString) && closesScope()) 884 return true; 885 return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style); 886 } 887 888 /// Return the actual namespace token, if this token starts a namespace 889 /// block. 890 const FormatToken *getNamespaceToken() const { 891 const FormatToken *NamespaceTok = this; 892 if (is(tok::comment)) 893 NamespaceTok = NamespaceTok->getNextNonComment(); 894 // Detect "(inline|export)? namespace" in the beginning of a line. 895 if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export)) 896 NamespaceTok = NamespaceTok->getNextNonComment(); 897 return NamespaceTok && 898 NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro) 899 ? NamespaceTok 900 : nullptr; 901 } 902 903 void copyFrom(const FormatToken &Tok) { *this = Tok; } 904 905 private: 906 // Only allow copying via the explicit copyFrom method. 907 FormatToken(const FormatToken &) = delete; 908 FormatToken &operator=(const FormatToken &) = default; 909 910 template <typename A, typename... Ts> 911 bool startsSequenceInternal(A K1, Ts... Tokens) const { 912 if (is(tok::comment) && Next) 913 return Next->startsSequenceInternal(K1, Tokens...); 914 return is(K1) && Next && Next->startsSequenceInternal(Tokens...); 915 } 916 917 template <typename A> bool startsSequenceInternal(A K1) const { 918 if (is(tok::comment) && Next) 919 return Next->startsSequenceInternal(K1); 920 return is(K1); 921 } 922 923 template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const { 924 if (is(tok::comment) && Previous) 925 return Previous->endsSequenceInternal(K1); 926 return is(K1); 927 } 928 929 template <typename A, typename... Ts> 930 bool endsSequenceInternal(A K1, Ts... Tokens) const { 931 if (is(tok::comment) && Previous) 932 return Previous->endsSequenceInternal(K1, Tokens...); 933 return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...); 934 } 935 }; 936 937 class ContinuationIndenter; 938 struct LineState; 939 940 class TokenRole { 941 public: 942 TokenRole(const FormatStyle &Style) : Style(Style) {} 943 virtual ~TokenRole(); 944 945 /// After the \c TokenAnnotator has finished annotating all the tokens, 946 /// this function precomputes required information for formatting. 947 virtual void precomputeFormattingInfos(const FormatToken *Token); 948 949 /// Apply the special formatting that the given role demands. 950 /// 951 /// Assumes that the token having this role is already formatted. 952 /// 953 /// Continues formatting from \p State leaving indentation to \p Indenter and 954 /// returns the total penalty that this formatting incurs. 955 virtual unsigned formatFromToken(LineState &State, 956 ContinuationIndenter *Indenter, 957 bool DryRun) { 958 return 0; 959 } 960 961 /// Same as \c formatFromToken, but assumes that the first token has 962 /// already been set thereby deciding on the first line break. 963 virtual unsigned formatAfterToken(LineState &State, 964 ContinuationIndenter *Indenter, 965 bool DryRun) { 966 return 0; 967 } 968 969 /// Notifies the \c Role that a comma was found. 970 virtual void CommaFound(const FormatToken *Token) {} 971 972 virtual const FormatToken *lastComma() { return nullptr; } 973 974 protected: 975 const FormatStyle &Style; 976 }; 977 978 class CommaSeparatedList : public TokenRole { 979 public: 980 CommaSeparatedList(const FormatStyle &Style) 981 : TokenRole(Style), HasNestedBracedList(false) {} 982 983 void precomputeFormattingInfos(const FormatToken *Token) override; 984 985 unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, 986 bool DryRun) override; 987 988 unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, 989 bool DryRun) override; 990 991 /// Adds \p Token as the next comma to the \c CommaSeparated list. 992 void CommaFound(const FormatToken *Token) override { 993 Commas.push_back(Token); 994 } 995 996 const FormatToken *lastComma() override { 997 if (Commas.empty()) 998 return nullptr; 999 return Commas.back(); 1000 } 1001 1002 private: 1003 /// A struct that holds information on how to format a given list with 1004 /// a specific number of columns. 1005 struct ColumnFormat { 1006 /// The number of columns to use. 1007 unsigned Columns; 1008 1009 /// The total width in characters. 1010 unsigned TotalWidth; 1011 1012 /// The number of lines required for this format. 1013 unsigned LineCount; 1014 1015 /// The size of each column in characters. 1016 SmallVector<unsigned, 8> ColumnSizes; 1017 }; 1018 1019 /// Calculate which \c ColumnFormat fits best into 1020 /// \p RemainingCharacters. 1021 const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const; 1022 1023 /// The ordered \c FormatTokens making up the commas of this list. 1024 SmallVector<const FormatToken *, 8> Commas; 1025 1026 /// The length of each of the list's items in characters including the 1027 /// trailing comma. 1028 SmallVector<unsigned, 8> ItemLengths; 1029 1030 /// Precomputed formats that can be used for this list. 1031 SmallVector<ColumnFormat, 4> Formats; 1032 1033 bool HasNestedBracedList; 1034 }; 1035 1036 /// Encapsulates keywords that are context sensitive or for languages not 1037 /// properly supported by Clang's lexer. 1038 struct AdditionalKeywords { 1039 AdditionalKeywords(IdentifierTable &IdentTable) { 1040 kw_final = &IdentTable.get("final"); 1041 kw_override = &IdentTable.get("override"); 1042 kw_in = &IdentTable.get("in"); 1043 kw_of = &IdentTable.get("of"); 1044 kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM"); 1045 kw_CF_ENUM = &IdentTable.get("CF_ENUM"); 1046 kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS"); 1047 kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM"); 1048 kw_NS_ENUM = &IdentTable.get("NS_ENUM"); 1049 kw_NS_ERROR_ENUM = &IdentTable.get("NS_ERROR_ENUM"); 1050 kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS"); 1051 1052 kw_as = &IdentTable.get("as"); 1053 kw_async = &IdentTable.get("async"); 1054 kw_await = &IdentTable.get("await"); 1055 kw_declare = &IdentTable.get("declare"); 1056 kw_finally = &IdentTable.get("finally"); 1057 kw_from = &IdentTable.get("from"); 1058 kw_function = &IdentTable.get("function"); 1059 kw_get = &IdentTable.get("get"); 1060 kw_import = &IdentTable.get("import"); 1061 kw_infer = &IdentTable.get("infer"); 1062 kw_is = &IdentTable.get("is"); 1063 kw_let = &IdentTable.get("let"); 1064 kw_module = &IdentTable.get("module"); 1065 kw_readonly = &IdentTable.get("readonly"); 1066 kw_set = &IdentTable.get("set"); 1067 kw_type = &IdentTable.get("type"); 1068 kw_typeof = &IdentTable.get("typeof"); 1069 kw_var = &IdentTable.get("var"); 1070 kw_yield = &IdentTable.get("yield"); 1071 1072 kw_abstract = &IdentTable.get("abstract"); 1073 kw_assert = &IdentTable.get("assert"); 1074 kw_extends = &IdentTable.get("extends"); 1075 kw_implements = &IdentTable.get("implements"); 1076 kw_instanceof = &IdentTable.get("instanceof"); 1077 kw_interface = &IdentTable.get("interface"); 1078 kw_native = &IdentTable.get("native"); 1079 kw_package = &IdentTable.get("package"); 1080 kw_synchronized = &IdentTable.get("synchronized"); 1081 kw_throws = &IdentTable.get("throws"); 1082 kw___except = &IdentTable.get("__except"); 1083 kw___has_include = &IdentTable.get("__has_include"); 1084 kw___has_include_next = &IdentTable.get("__has_include_next"); 1085 1086 kw_mark = &IdentTable.get("mark"); 1087 kw_region = &IdentTable.get("region"); 1088 1089 kw_extend = &IdentTable.get("extend"); 1090 kw_option = &IdentTable.get("option"); 1091 kw_optional = &IdentTable.get("optional"); 1092 kw_repeated = &IdentTable.get("repeated"); 1093 kw_required = &IdentTable.get("required"); 1094 kw_returns = &IdentTable.get("returns"); 1095 1096 kw_signals = &IdentTable.get("signals"); 1097 kw_qsignals = &IdentTable.get("Q_SIGNALS"); 1098 kw_slots = &IdentTable.get("slots"); 1099 kw_qslots = &IdentTable.get("Q_SLOTS"); 1100 1101 // For internal clang-format use. 1102 kw_internal_ident_after_define = 1103 &IdentTable.get("__CLANG_FORMAT_INTERNAL_IDENT_AFTER_DEFINE__"); 1104 1105 // C# keywords 1106 kw_dollar = &IdentTable.get("dollar"); 1107 kw_base = &IdentTable.get("base"); 1108 kw_byte = &IdentTable.get("byte"); 1109 kw_checked = &IdentTable.get("checked"); 1110 kw_decimal = &IdentTable.get("decimal"); 1111 kw_delegate = &IdentTable.get("delegate"); 1112 kw_event = &IdentTable.get("event"); 1113 kw_fixed = &IdentTable.get("fixed"); 1114 kw_foreach = &IdentTable.get("foreach"); 1115 kw_init = &IdentTable.get("init"); 1116 kw_implicit = &IdentTable.get("implicit"); 1117 kw_internal = &IdentTable.get("internal"); 1118 kw_lock = &IdentTable.get("lock"); 1119 kw_null = &IdentTable.get("null"); 1120 kw_object = &IdentTable.get("object"); 1121 kw_out = &IdentTable.get("out"); 1122 kw_params = &IdentTable.get("params"); 1123 kw_ref = &IdentTable.get("ref"); 1124 kw_string = &IdentTable.get("string"); 1125 kw_stackalloc = &IdentTable.get("stackalloc"); 1126 kw_sbyte = &IdentTable.get("sbyte"); 1127 kw_sealed = &IdentTable.get("sealed"); 1128 kw_uint = &IdentTable.get("uint"); 1129 kw_ulong = &IdentTable.get("ulong"); 1130 kw_unchecked = &IdentTable.get("unchecked"); 1131 kw_unsafe = &IdentTable.get("unsafe"); 1132 kw_ushort = &IdentTable.get("ushort"); 1133 kw_when = &IdentTable.get("when"); 1134 kw_where = &IdentTable.get("where"); 1135 1136 // Verilog keywords 1137 kw_always = &IdentTable.get("always"); 1138 kw_always_comb = &IdentTable.get("always_comb"); 1139 kw_always_ff = &IdentTable.get("always_ff"); 1140 kw_always_latch = &IdentTable.get("always_latch"); 1141 kw_assign = &IdentTable.get("assign"); 1142 kw_assume = &IdentTable.get("assume"); 1143 kw_automatic = &IdentTable.get("automatic"); 1144 kw_before = &IdentTable.get("before"); 1145 kw_begin = &IdentTable.get("begin"); 1146 kw_begin_keywords = &IdentTable.get("begin_keywords"); 1147 kw_bins = &IdentTable.get("bins"); 1148 kw_binsof = &IdentTable.get("binsof"); 1149 kw_casex = &IdentTable.get("casex"); 1150 kw_casez = &IdentTable.get("casez"); 1151 kw_celldefine = &IdentTable.get("celldefine"); 1152 kw_checker = &IdentTable.get("checker"); 1153 kw_clocking = &IdentTable.get("clocking"); 1154 kw_constraint = &IdentTable.get("constraint"); 1155 kw_cover = &IdentTable.get("cover"); 1156 kw_covergroup = &IdentTable.get("covergroup"); 1157 kw_coverpoint = &IdentTable.get("coverpoint"); 1158 kw_default_decay_time = &IdentTable.get("default_decay_time"); 1159 kw_default_nettype = &IdentTable.get("default_nettype"); 1160 kw_default_trireg_strength = &IdentTable.get("default_trireg_strength"); 1161 kw_delay_mode_distributed = &IdentTable.get("delay_mode_distributed"); 1162 kw_delay_mode_path = &IdentTable.get("delay_mode_path"); 1163 kw_delay_mode_unit = &IdentTable.get("delay_mode_unit"); 1164 kw_delay_mode_zero = &IdentTable.get("delay_mode_zero"); 1165 kw_disable = &IdentTable.get("disable"); 1166 kw_dist = &IdentTable.get("dist"); 1167 kw_edge = &IdentTable.get("edge"); 1168 kw_elsif = &IdentTable.get("elsif"); 1169 kw_end = &IdentTable.get("end"); 1170 kw_end_keywords = &IdentTable.get("end_keywords"); 1171 kw_endcase = &IdentTable.get("endcase"); 1172 kw_endcelldefine = &IdentTable.get("endcelldefine"); 1173 kw_endchecker = &IdentTable.get("endchecker"); 1174 kw_endclass = &IdentTable.get("endclass"); 1175 kw_endclocking = &IdentTable.get("endclocking"); 1176 kw_endfunction = &IdentTable.get("endfunction"); 1177 kw_endgenerate = &IdentTable.get("endgenerate"); 1178 kw_endgroup = &IdentTable.get("endgroup"); 1179 kw_endinterface = &IdentTable.get("endinterface"); 1180 kw_endmodule = &IdentTable.get("endmodule"); 1181 kw_endpackage = &IdentTable.get("endpackage"); 1182 kw_endprimitive = &IdentTable.get("endprimitive"); 1183 kw_endprogram = &IdentTable.get("endprogram"); 1184 kw_endproperty = &IdentTable.get("endproperty"); 1185 kw_endsequence = &IdentTable.get("endsequence"); 1186 kw_endspecify = &IdentTable.get("endspecify"); 1187 kw_endtable = &IdentTable.get("endtable"); 1188 kw_endtask = &IdentTable.get("endtask"); 1189 kw_forever = &IdentTable.get("forever"); 1190 kw_fork = &IdentTable.get("fork"); 1191 kw_generate = &IdentTable.get("generate"); 1192 kw_highz0 = &IdentTable.get("highz0"); 1193 kw_highz1 = &IdentTable.get("highz1"); 1194 kw_iff = &IdentTable.get("iff"); 1195 kw_ifnone = &IdentTable.get("ifnone"); 1196 kw_ignore_bins = &IdentTable.get("ignore_bins"); 1197 kw_illegal_bins = &IdentTable.get("illegal_bins"); 1198 kw_initial = &IdentTable.get("initial"); 1199 kw_inout = &IdentTable.get("inout"); 1200 kw_input = &IdentTable.get("input"); 1201 kw_inside = &IdentTable.get("inside"); 1202 kw_interconnect = &IdentTable.get("interconnect"); 1203 kw_intersect = &IdentTable.get("intersect"); 1204 kw_join = &IdentTable.get("join"); 1205 kw_join_any = &IdentTable.get("join_any"); 1206 kw_join_none = &IdentTable.get("join_none"); 1207 kw_large = &IdentTable.get("large"); 1208 kw_local = &IdentTable.get("local"); 1209 kw_localparam = &IdentTable.get("localparam"); 1210 kw_macromodule = &IdentTable.get("macromodule"); 1211 kw_matches = &IdentTable.get("matches"); 1212 kw_medium = &IdentTable.get("medium"); 1213 kw_negedge = &IdentTable.get("negedge"); 1214 kw_nounconnected_drive = &IdentTable.get("nounconnected_drive"); 1215 kw_output = &IdentTable.get("output"); 1216 kw_packed = &IdentTable.get("packed"); 1217 kw_parameter = &IdentTable.get("parameter"); 1218 kw_posedge = &IdentTable.get("posedge"); 1219 kw_primitive = &IdentTable.get("primitive"); 1220 kw_priority = &IdentTable.get("priority"); 1221 kw_program = &IdentTable.get("program"); 1222 kw_property = &IdentTable.get("property"); 1223 kw_pull0 = &IdentTable.get("pull0"); 1224 kw_pull1 = &IdentTable.get("pull1"); 1225 kw_pure = &IdentTable.get("pure"); 1226 kw_rand = &IdentTable.get("rand"); 1227 kw_randc = &IdentTable.get("randc"); 1228 kw_randcase = &IdentTable.get("randcase"); 1229 kw_randsequence = &IdentTable.get("randsequence"); 1230 kw_repeat = &IdentTable.get("repeat"); 1231 kw_resetall = &IdentTable.get("resetall"); 1232 kw_sample = &IdentTable.get("sample"); 1233 kw_scalared = &IdentTable.get("scalared"); 1234 kw_sequence = &IdentTable.get("sequence"); 1235 kw_small = &IdentTable.get("small"); 1236 kw_soft = &IdentTable.get("soft"); 1237 kw_solve = &IdentTable.get("solve"); 1238 kw_specify = &IdentTable.get("specify"); 1239 kw_specparam = &IdentTable.get("specparam"); 1240 kw_strong0 = &IdentTable.get("strong0"); 1241 kw_strong1 = &IdentTable.get("strong1"); 1242 kw_supply0 = &IdentTable.get("supply0"); 1243 kw_supply1 = &IdentTable.get("supply1"); 1244 kw_table = &IdentTable.get("table"); 1245 kw_tagged = &IdentTable.get("tagged"); 1246 kw_task = &IdentTable.get("task"); 1247 kw_timescale = &IdentTable.get("timescale"); 1248 kw_tri = &IdentTable.get("tri"); 1249 kw_tri0 = &IdentTable.get("tri0"); 1250 kw_tri1 = &IdentTable.get("tri1"); 1251 kw_triand = &IdentTable.get("triand"); 1252 kw_trior = &IdentTable.get("trior"); 1253 kw_trireg = &IdentTable.get("trireg"); 1254 kw_unconnected_drive = &IdentTable.get("unconnected_drive"); 1255 kw_undefineall = &IdentTable.get("undefineall"); 1256 kw_unique = &IdentTable.get("unique"); 1257 kw_unique0 = &IdentTable.get("unique0"); 1258 kw_uwire = &IdentTable.get("uwire"); 1259 kw_vectored = &IdentTable.get("vectored"); 1260 kw_wand = &IdentTable.get("wand"); 1261 kw_weak0 = &IdentTable.get("weak0"); 1262 kw_weak1 = &IdentTable.get("weak1"); 1263 kw_wildcard = &IdentTable.get("wildcard"); 1264 kw_wire = &IdentTable.get("wire"); 1265 kw_with = &IdentTable.get("with"); 1266 kw_wor = &IdentTable.get("wor"); 1267 1268 // Symbols that are treated as keywords. 1269 kw_verilogHash = &IdentTable.get("#"); 1270 kw_verilogHashHash = &IdentTable.get("##"); 1271 kw_apostrophe = &IdentTable.get("\'"); 1272 1273 // TableGen keywords 1274 kw_bit = &IdentTable.get("bit"); 1275 kw_bits = &IdentTable.get("bits"); 1276 kw_code = &IdentTable.get("code"); 1277 kw_dag = &IdentTable.get("dag"); 1278 kw_def = &IdentTable.get("def"); 1279 kw_defm = &IdentTable.get("defm"); 1280 kw_defset = &IdentTable.get("defset"); 1281 kw_defvar = &IdentTable.get("defvar"); 1282 kw_dump = &IdentTable.get("dump"); 1283 kw_include = &IdentTable.get("include"); 1284 kw_list = &IdentTable.get("list"); 1285 kw_multiclass = &IdentTable.get("multiclass"); 1286 kw_then = &IdentTable.get("then"); 1287 1288 // Keep this at the end of the constructor to make sure everything here 1289 // is 1290 // already initialized. 1291 JsExtraKeywords = std::unordered_set<IdentifierInfo *>( 1292 {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 1293 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_override, 1294 kw_readonly, kw_set, kw_type, kw_typeof, kw_var, kw_yield, 1295 // Keywords from the Java section. 1296 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 1297 1298 CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>( 1299 {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event, 1300 kw_fixed, kw_foreach, kw_implicit, kw_in, kw_init, kw_interface, 1301 kw_internal, kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, 1302 kw_params, kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, 1303 kw_sealed, kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort, 1304 kw_when, kw_where, 1305 // Keywords from the JavaScript section. 1306 kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from, 1307 kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly, 1308 kw_set, kw_type, kw_typeof, kw_var, kw_yield, 1309 // Keywords from the Java section. 1310 kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface}); 1311 1312 // Some keywords are not included here because they don't need special 1313 // treatment like `showcancelled` or they should be treated as identifiers 1314 // like `int` and `logic`. 1315 VerilogExtraKeywords = std::unordered_set<IdentifierInfo *>( 1316 {kw_always, kw_always_comb, 1317 kw_always_ff, kw_always_latch, 1318 kw_assert, kw_assign, 1319 kw_assume, kw_automatic, 1320 kw_before, kw_begin, 1321 kw_bins, kw_binsof, 1322 kw_casex, kw_casez, 1323 kw_celldefine, kw_checker, 1324 kw_clocking, kw_constraint, 1325 kw_cover, kw_covergroup, 1326 kw_coverpoint, kw_disable, 1327 kw_dist, kw_edge, 1328 kw_end, kw_endcase, 1329 kw_endchecker, kw_endclass, 1330 kw_endclocking, kw_endfunction, 1331 kw_endgenerate, kw_endgroup, 1332 kw_endinterface, kw_endmodule, 1333 kw_endpackage, kw_endprimitive, 1334 kw_endprogram, kw_endproperty, 1335 kw_endsequence, kw_endspecify, 1336 kw_endtable, kw_endtask, 1337 kw_extends, kw_final, 1338 kw_foreach, kw_forever, 1339 kw_fork, kw_function, 1340 kw_generate, kw_highz0, 1341 kw_highz1, kw_iff, 1342 kw_ifnone, kw_ignore_bins, 1343 kw_illegal_bins, kw_implements, 1344 kw_import, kw_initial, 1345 kw_inout, kw_input, 1346 kw_inside, kw_interconnect, 1347 kw_interface, kw_intersect, 1348 kw_join, kw_join_any, 1349 kw_join_none, kw_large, 1350 kw_let, kw_local, 1351 kw_localparam, kw_macromodule, 1352 kw_matches, kw_medium, 1353 kw_negedge, kw_output, 1354 kw_package, kw_packed, 1355 kw_parameter, kw_posedge, 1356 kw_primitive, kw_priority, 1357 kw_program, kw_property, 1358 kw_pull0, kw_pull1, 1359 kw_pure, kw_rand, 1360 kw_randc, kw_randcase, 1361 kw_randsequence, kw_ref, 1362 kw_repeat, kw_sample, 1363 kw_scalared, kw_sequence, 1364 kw_small, kw_soft, 1365 kw_solve, kw_specify, 1366 kw_specparam, kw_strong0, 1367 kw_strong1, kw_supply0, 1368 kw_supply1, kw_table, 1369 kw_tagged, kw_task, 1370 kw_tri, kw_tri0, 1371 kw_tri1, kw_triand, 1372 kw_trior, kw_trireg, 1373 kw_unique, kw_unique0, 1374 kw_uwire, kw_var, 1375 kw_vectored, kw_wand, 1376 kw_weak0, kw_weak1, 1377 kw_wildcard, kw_wire, 1378 kw_with, kw_wor, 1379 kw_verilogHash, kw_verilogHashHash}); 1380 1381 TableGenExtraKeywords = std::unordered_set<IdentifierInfo *>({ 1382 kw_assert, 1383 kw_bit, 1384 kw_bits, 1385 kw_code, 1386 kw_dag, 1387 kw_def, 1388 kw_defm, 1389 kw_defset, 1390 kw_defvar, 1391 kw_dump, 1392 kw_foreach, 1393 kw_in, 1394 kw_include, 1395 kw_let, 1396 kw_list, 1397 kw_multiclass, 1398 kw_string, 1399 kw_then, 1400 }); 1401 } 1402 1403 // Context sensitive keywords. 1404 IdentifierInfo *kw_final; 1405 IdentifierInfo *kw_override; 1406 IdentifierInfo *kw_in; 1407 IdentifierInfo *kw_of; 1408 IdentifierInfo *kw_CF_CLOSED_ENUM; 1409 IdentifierInfo *kw_CF_ENUM; 1410 IdentifierInfo *kw_CF_OPTIONS; 1411 IdentifierInfo *kw_NS_CLOSED_ENUM; 1412 IdentifierInfo *kw_NS_ENUM; 1413 IdentifierInfo *kw_NS_ERROR_ENUM; 1414 IdentifierInfo *kw_NS_OPTIONS; 1415 IdentifierInfo *kw___except; 1416 IdentifierInfo *kw___has_include; 1417 IdentifierInfo *kw___has_include_next; 1418 1419 // JavaScript keywords. 1420 IdentifierInfo *kw_as; 1421 IdentifierInfo *kw_async; 1422 IdentifierInfo *kw_await; 1423 IdentifierInfo *kw_declare; 1424 IdentifierInfo *kw_finally; 1425 IdentifierInfo *kw_from; 1426 IdentifierInfo *kw_function; 1427 IdentifierInfo *kw_get; 1428 IdentifierInfo *kw_import; 1429 IdentifierInfo *kw_infer; 1430 IdentifierInfo *kw_is; 1431 IdentifierInfo *kw_let; 1432 IdentifierInfo *kw_module; 1433 IdentifierInfo *kw_readonly; 1434 IdentifierInfo *kw_set; 1435 IdentifierInfo *kw_type; 1436 IdentifierInfo *kw_typeof; 1437 IdentifierInfo *kw_var; 1438 IdentifierInfo *kw_yield; 1439 1440 // Java keywords. 1441 IdentifierInfo *kw_abstract; 1442 IdentifierInfo *kw_assert; 1443 IdentifierInfo *kw_extends; 1444 IdentifierInfo *kw_implements; 1445 IdentifierInfo *kw_instanceof; 1446 IdentifierInfo *kw_interface; 1447 IdentifierInfo *kw_native; 1448 IdentifierInfo *kw_package; 1449 IdentifierInfo *kw_synchronized; 1450 IdentifierInfo *kw_throws; 1451 1452 // Pragma keywords. 1453 IdentifierInfo *kw_mark; 1454 IdentifierInfo *kw_region; 1455 1456 // Proto keywords. 1457 IdentifierInfo *kw_extend; 1458 IdentifierInfo *kw_option; 1459 IdentifierInfo *kw_optional; 1460 IdentifierInfo *kw_repeated; 1461 IdentifierInfo *kw_required; 1462 IdentifierInfo *kw_returns; 1463 1464 // QT keywords. 1465 IdentifierInfo *kw_signals; 1466 IdentifierInfo *kw_qsignals; 1467 IdentifierInfo *kw_slots; 1468 IdentifierInfo *kw_qslots; 1469 1470 // For internal use by clang-format. 1471 IdentifierInfo *kw_internal_ident_after_define; 1472 1473 // C# keywords 1474 IdentifierInfo *kw_dollar; 1475 IdentifierInfo *kw_base; 1476 IdentifierInfo *kw_byte; 1477 IdentifierInfo *kw_checked; 1478 IdentifierInfo *kw_decimal; 1479 IdentifierInfo *kw_delegate; 1480 IdentifierInfo *kw_event; 1481 IdentifierInfo *kw_fixed; 1482 IdentifierInfo *kw_foreach; 1483 IdentifierInfo *kw_implicit; 1484 IdentifierInfo *kw_init; 1485 IdentifierInfo *kw_internal; 1486 1487 IdentifierInfo *kw_lock; 1488 IdentifierInfo *kw_null; 1489 IdentifierInfo *kw_object; 1490 IdentifierInfo *kw_out; 1491 1492 IdentifierInfo *kw_params; 1493 1494 IdentifierInfo *kw_ref; 1495 IdentifierInfo *kw_string; 1496 IdentifierInfo *kw_stackalloc; 1497 IdentifierInfo *kw_sbyte; 1498 IdentifierInfo *kw_sealed; 1499 IdentifierInfo *kw_uint; 1500 IdentifierInfo *kw_ulong; 1501 IdentifierInfo *kw_unchecked; 1502 IdentifierInfo *kw_unsafe; 1503 IdentifierInfo *kw_ushort; 1504 IdentifierInfo *kw_when; 1505 IdentifierInfo *kw_where; 1506 1507 // Verilog keywords 1508 IdentifierInfo *kw_always; 1509 IdentifierInfo *kw_always_comb; 1510 IdentifierInfo *kw_always_ff; 1511 IdentifierInfo *kw_always_latch; 1512 IdentifierInfo *kw_assign; 1513 IdentifierInfo *kw_assume; 1514 IdentifierInfo *kw_automatic; 1515 IdentifierInfo *kw_before; 1516 IdentifierInfo *kw_begin; 1517 IdentifierInfo *kw_begin_keywords; 1518 IdentifierInfo *kw_bins; 1519 IdentifierInfo *kw_binsof; 1520 IdentifierInfo *kw_casex; 1521 IdentifierInfo *kw_casez; 1522 IdentifierInfo *kw_celldefine; 1523 IdentifierInfo *kw_checker; 1524 IdentifierInfo *kw_clocking; 1525 IdentifierInfo *kw_constraint; 1526 IdentifierInfo *kw_cover; 1527 IdentifierInfo *kw_covergroup; 1528 IdentifierInfo *kw_coverpoint; 1529 IdentifierInfo *kw_default_decay_time; 1530 IdentifierInfo *kw_default_nettype; 1531 IdentifierInfo *kw_default_trireg_strength; 1532 IdentifierInfo *kw_delay_mode_distributed; 1533 IdentifierInfo *kw_delay_mode_path; 1534 IdentifierInfo *kw_delay_mode_unit; 1535 IdentifierInfo *kw_delay_mode_zero; 1536 IdentifierInfo *kw_disable; 1537 IdentifierInfo *kw_dist; 1538 IdentifierInfo *kw_elsif; 1539 IdentifierInfo *kw_edge; 1540 IdentifierInfo *kw_end; 1541 IdentifierInfo *kw_end_keywords; 1542 IdentifierInfo *kw_endcase; 1543 IdentifierInfo *kw_endcelldefine; 1544 IdentifierInfo *kw_endchecker; 1545 IdentifierInfo *kw_endclass; 1546 IdentifierInfo *kw_endclocking; 1547 IdentifierInfo *kw_endfunction; 1548 IdentifierInfo *kw_endgenerate; 1549 IdentifierInfo *kw_endgroup; 1550 IdentifierInfo *kw_endinterface; 1551 IdentifierInfo *kw_endmodule; 1552 IdentifierInfo *kw_endpackage; 1553 IdentifierInfo *kw_endprimitive; 1554 IdentifierInfo *kw_endprogram; 1555 IdentifierInfo *kw_endproperty; 1556 IdentifierInfo *kw_endsequence; 1557 IdentifierInfo *kw_endspecify; 1558 IdentifierInfo *kw_endtable; 1559 IdentifierInfo *kw_endtask; 1560 IdentifierInfo *kw_forever; 1561 IdentifierInfo *kw_fork; 1562 IdentifierInfo *kw_generate; 1563 IdentifierInfo *kw_highz0; 1564 IdentifierInfo *kw_highz1; 1565 IdentifierInfo *kw_iff; 1566 IdentifierInfo *kw_ifnone; 1567 IdentifierInfo *kw_ignore_bins; 1568 IdentifierInfo *kw_illegal_bins; 1569 IdentifierInfo *kw_initial; 1570 IdentifierInfo *kw_inout; 1571 IdentifierInfo *kw_input; 1572 IdentifierInfo *kw_inside; 1573 IdentifierInfo *kw_interconnect; 1574 IdentifierInfo *kw_intersect; 1575 IdentifierInfo *kw_join; 1576 IdentifierInfo *kw_join_any; 1577 IdentifierInfo *kw_join_none; 1578 IdentifierInfo *kw_large; 1579 IdentifierInfo *kw_local; 1580 IdentifierInfo *kw_localparam; 1581 IdentifierInfo *kw_macromodule; 1582 IdentifierInfo *kw_matches; 1583 IdentifierInfo *kw_medium; 1584 IdentifierInfo *kw_negedge; 1585 IdentifierInfo *kw_nounconnected_drive; 1586 IdentifierInfo *kw_output; 1587 IdentifierInfo *kw_packed; 1588 IdentifierInfo *kw_parameter; 1589 IdentifierInfo *kw_posedge; 1590 IdentifierInfo *kw_primitive; 1591 IdentifierInfo *kw_priority; 1592 IdentifierInfo *kw_program; 1593 IdentifierInfo *kw_property; 1594 IdentifierInfo *kw_pull0; 1595 IdentifierInfo *kw_pull1; 1596 IdentifierInfo *kw_pure; 1597 IdentifierInfo *kw_rand; 1598 IdentifierInfo *kw_randc; 1599 IdentifierInfo *kw_randcase; 1600 IdentifierInfo *kw_randsequence; 1601 IdentifierInfo *kw_repeat; 1602 IdentifierInfo *kw_resetall; 1603 IdentifierInfo *kw_sample; 1604 IdentifierInfo *kw_scalared; 1605 IdentifierInfo *kw_sequence; 1606 IdentifierInfo *kw_small; 1607 IdentifierInfo *kw_soft; 1608 IdentifierInfo *kw_solve; 1609 IdentifierInfo *kw_specify; 1610 IdentifierInfo *kw_specparam; 1611 IdentifierInfo *kw_strong0; 1612 IdentifierInfo *kw_strong1; 1613 IdentifierInfo *kw_supply0; 1614 IdentifierInfo *kw_supply1; 1615 IdentifierInfo *kw_table; 1616 IdentifierInfo *kw_tagged; 1617 IdentifierInfo *kw_task; 1618 IdentifierInfo *kw_timescale; 1619 IdentifierInfo *kw_tri0; 1620 IdentifierInfo *kw_tri1; 1621 IdentifierInfo *kw_tri; 1622 IdentifierInfo *kw_triand; 1623 IdentifierInfo *kw_trior; 1624 IdentifierInfo *kw_trireg; 1625 IdentifierInfo *kw_unconnected_drive; 1626 IdentifierInfo *kw_undefineall; 1627 IdentifierInfo *kw_unique; 1628 IdentifierInfo *kw_unique0; 1629 IdentifierInfo *kw_uwire; 1630 IdentifierInfo *kw_vectored; 1631 IdentifierInfo *kw_wand; 1632 IdentifierInfo *kw_weak0; 1633 IdentifierInfo *kw_weak1; 1634 IdentifierInfo *kw_wildcard; 1635 IdentifierInfo *kw_wire; 1636 IdentifierInfo *kw_with; 1637 IdentifierInfo *kw_wor; 1638 1639 // Workaround for hashes and backticks in Verilog. 1640 IdentifierInfo *kw_verilogHash; 1641 IdentifierInfo *kw_verilogHashHash; 1642 1643 // Symbols in Verilog that don't exist in C++. 1644 IdentifierInfo *kw_apostrophe; 1645 1646 // TableGen keywords 1647 IdentifierInfo *kw_bit; 1648 IdentifierInfo *kw_bits; 1649 IdentifierInfo *kw_code; 1650 IdentifierInfo *kw_dag; 1651 IdentifierInfo *kw_def; 1652 IdentifierInfo *kw_defm; 1653 IdentifierInfo *kw_defset; 1654 IdentifierInfo *kw_defvar; 1655 IdentifierInfo *kw_dump; 1656 IdentifierInfo *kw_include; 1657 IdentifierInfo *kw_list; 1658 IdentifierInfo *kw_multiclass; 1659 IdentifierInfo *kw_then; 1660 1661 /// Returns \c true if \p Tok is a keyword or an identifier. 1662 bool isWordLike(const FormatToken &Tok, bool IsVerilog = true) const { 1663 // getIdentifierinfo returns non-null for keywords as well as identifiers. 1664 return Tok.Tok.getIdentifierInfo() && 1665 (!IsVerilog || !isVerilogKeywordSymbol(Tok)); 1666 } 1667 1668 /// Returns \c true if \p Tok is a true JavaScript identifier, returns 1669 /// \c false if it is a keyword or a pseudo keyword. 1670 /// If \c AcceptIdentifierName is true, returns true not only for keywords, 1671 // but also for IdentifierName tokens (aka pseudo-keywords), such as 1672 // ``yield``. 1673 bool isJavaScriptIdentifier(const FormatToken &Tok, 1674 bool AcceptIdentifierName = true) const { 1675 // Based on the list of JavaScript & TypeScript keywords here: 1676 // https://github.com/microsoft/TypeScript/blob/main/src/compiler/scanner.ts#L74 1677 if (Tok.isAccessSpecifierKeyword()) 1678 return false; 1679 switch (Tok.Tok.getKind()) { 1680 case tok::kw_break: 1681 case tok::kw_case: 1682 case tok::kw_catch: 1683 case tok::kw_class: 1684 case tok::kw_continue: 1685 case tok::kw_const: 1686 case tok::kw_default: 1687 case tok::kw_delete: 1688 case tok::kw_do: 1689 case tok::kw_else: 1690 case tok::kw_enum: 1691 case tok::kw_export: 1692 case tok::kw_false: 1693 case tok::kw_for: 1694 case tok::kw_if: 1695 case tok::kw_import: 1696 case tok::kw_module: 1697 case tok::kw_new: 1698 case tok::kw_return: 1699 case tok::kw_static: 1700 case tok::kw_switch: 1701 case tok::kw_this: 1702 case tok::kw_throw: 1703 case tok::kw_true: 1704 case tok::kw_try: 1705 case tok::kw_typeof: 1706 case tok::kw_void: 1707 case tok::kw_while: 1708 // These are JS keywords that are lexed by LLVM/clang as keywords. 1709 return false; 1710 case tok::identifier: { 1711 // For identifiers, make sure they are true identifiers, excluding the 1712 // JavaScript pseudo-keywords (not lexed by LLVM/clang as keywords). 1713 bool IsPseudoKeyword = 1714 JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != 1715 JsExtraKeywords.end(); 1716 return AcceptIdentifierName || !IsPseudoKeyword; 1717 } 1718 default: 1719 // Other keywords are handled in the switch below, to avoid problems due 1720 // to duplicate case labels when using the #include trick. 1721 break; 1722 } 1723 1724 switch (Tok.Tok.getKind()) { 1725 // Handle C++ keywords not included above: these are all JS identifiers. 1726 #define KEYWORD(X, Y) case tok::kw_##X: 1727 #include "clang/Basic/TokenKinds.def" 1728 // #undef KEYWORD is not needed -- it's #undef-ed at the end of 1729 // TokenKinds.def 1730 return true; 1731 default: 1732 // All other tokens (punctuation etc) are not JS identifiers. 1733 return false; 1734 } 1735 } 1736 1737 /// Returns \c true if \p Tok is a C# keyword, returns 1738 /// \c false if it is a anything else. 1739 bool isCSharpKeyword(const FormatToken &Tok) const { 1740 if (Tok.isAccessSpecifierKeyword()) 1741 return true; 1742 switch (Tok.Tok.getKind()) { 1743 case tok::kw_bool: 1744 case tok::kw_break: 1745 case tok::kw_case: 1746 case tok::kw_catch: 1747 case tok::kw_char: 1748 case tok::kw_class: 1749 case tok::kw_const: 1750 case tok::kw_continue: 1751 case tok::kw_default: 1752 case tok::kw_do: 1753 case tok::kw_double: 1754 case tok::kw_else: 1755 case tok::kw_enum: 1756 case tok::kw_explicit: 1757 case tok::kw_extern: 1758 case tok::kw_false: 1759 case tok::kw_float: 1760 case tok::kw_for: 1761 case tok::kw_goto: 1762 case tok::kw_if: 1763 case tok::kw_int: 1764 case tok::kw_long: 1765 case tok::kw_namespace: 1766 case tok::kw_new: 1767 case tok::kw_operator: 1768 case tok::kw_return: 1769 case tok::kw_short: 1770 case tok::kw_sizeof: 1771 case tok::kw_static: 1772 case tok::kw_struct: 1773 case tok::kw_switch: 1774 case tok::kw_this: 1775 case tok::kw_throw: 1776 case tok::kw_true: 1777 case tok::kw_try: 1778 case tok::kw_typeof: 1779 case tok::kw_using: 1780 case tok::kw_virtual: 1781 case tok::kw_void: 1782 case tok::kw_volatile: 1783 case tok::kw_while: 1784 return true; 1785 default: 1786 return Tok.is(tok::identifier) && 1787 CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 1788 CSharpExtraKeywords.end(); 1789 } 1790 } 1791 1792 bool isVerilogKeywordSymbol(const FormatToken &Tok) const { 1793 return Tok.isOneOf(kw_verilogHash, kw_verilogHashHash, kw_apostrophe); 1794 } 1795 1796 bool isVerilogWordOperator(const FormatToken &Tok) const { 1797 return Tok.isOneOf(kw_before, kw_intersect, kw_dist, kw_iff, kw_inside, 1798 kw_with); 1799 } 1800 1801 bool isVerilogIdentifier(const FormatToken &Tok) const { 1802 switch (Tok.Tok.getKind()) { 1803 case tok::kw_case: 1804 case tok::kw_class: 1805 case tok::kw_const: 1806 case tok::kw_continue: 1807 case tok::kw_default: 1808 case tok::kw_do: 1809 case tok::kw_extern: 1810 case tok::kw_else: 1811 case tok::kw_enum: 1812 case tok::kw_for: 1813 case tok::kw_if: 1814 case tok::kw_restrict: 1815 case tok::kw_signed: 1816 case tok::kw_static: 1817 case tok::kw_struct: 1818 case tok::kw_typedef: 1819 case tok::kw_union: 1820 case tok::kw_unsigned: 1821 case tok::kw_virtual: 1822 case tok::kw_while: 1823 return false; 1824 case tok::identifier: 1825 return isWordLike(Tok) && 1826 VerilogExtraKeywords.find(Tok.Tok.getIdentifierInfo()) == 1827 VerilogExtraKeywords.end(); 1828 default: 1829 // getIdentifierInfo returns non-null for both identifiers and keywords. 1830 return Tok.Tok.getIdentifierInfo(); 1831 } 1832 } 1833 1834 /// Returns whether \p Tok is a Verilog preprocessor directive. This is 1835 /// needed because macro expansions start with a backtick as well and they 1836 /// need to be treated differently. 1837 bool isVerilogPPDirective(const FormatToken &Tok) const { 1838 auto Info = Tok.Tok.getIdentifierInfo(); 1839 if (!Info) 1840 return false; 1841 switch (Info->getPPKeywordID()) { 1842 case tok::pp_define: 1843 case tok::pp_else: 1844 case tok::pp_endif: 1845 case tok::pp_ifdef: 1846 case tok::pp_ifndef: 1847 case tok::pp_include: 1848 case tok::pp_line: 1849 case tok::pp_pragma: 1850 case tok::pp_undef: 1851 return true; 1852 default: 1853 return Tok.isOneOf(kw_begin_keywords, kw_celldefine, 1854 kw_default_decay_time, kw_default_nettype, 1855 kw_default_trireg_strength, kw_delay_mode_distributed, 1856 kw_delay_mode_path, kw_delay_mode_unit, 1857 kw_delay_mode_zero, kw_elsif, kw_end_keywords, 1858 kw_endcelldefine, kw_nounconnected_drive, kw_resetall, 1859 kw_timescale, kw_unconnected_drive, kw_undefineall); 1860 } 1861 } 1862 1863 /// Returns whether \p Tok is a Verilog keyword that opens a block. 1864 bool isVerilogBegin(const FormatToken &Tok) const { 1865 // `table` is not included since it needs to be treated specially. 1866 return !Tok.endsSequence(kw_fork, kw_disable) && 1867 Tok.isOneOf(kw_begin, kw_fork, kw_generate, kw_specify); 1868 } 1869 1870 /// Returns whether \p Tok is a Verilog keyword that closes a block. 1871 bool isVerilogEnd(const FormatToken &Tok) const { 1872 return !Tok.endsSequence(kw_join, kw_rand) && 1873 Tok.isOneOf(TT_MacroBlockEnd, kw_end, kw_endcase, kw_endclass, 1874 kw_endclocking, kw_endchecker, kw_endfunction, 1875 kw_endgenerate, kw_endgroup, kw_endinterface, 1876 kw_endmodule, kw_endpackage, kw_endprimitive, 1877 kw_endprogram, kw_endproperty, kw_endsequence, 1878 kw_endspecify, kw_endtable, kw_endtask, kw_join, 1879 kw_join_any, kw_join_none); 1880 } 1881 1882 /// Returns whether \p Tok is a Verilog keyword that opens a module, etc. 1883 bool isVerilogHierarchy(const FormatToken &Tok) const { 1884 if (Tok.endsSequence(kw_function, kw_with)) 1885 return false; 1886 if (Tok.is(kw_property)) { 1887 const FormatToken *Prev = Tok.getPreviousNonComment(); 1888 return !(Prev && 1889 Prev->isOneOf(tok::kw_restrict, kw_assert, kw_assume, kw_cover)); 1890 } 1891 return Tok.isOneOf(tok::kw_case, tok::kw_class, kw_function, kw_module, 1892 kw_interface, kw_package, kw_casex, kw_casez, kw_checker, 1893 kw_clocking, kw_covergroup, kw_macromodule, kw_primitive, 1894 kw_program, kw_property, kw_randcase, kw_randsequence, 1895 kw_task); 1896 } 1897 1898 bool isVerilogEndOfLabel(const FormatToken &Tok) const { 1899 const FormatToken *Next = Tok.getNextNonComment(); 1900 // In Verilog the colon in a default label is optional. 1901 return Tok.is(TT_CaseLabelColon) || 1902 (Tok.is(tok::kw_default) && 1903 !(Next && Next->isOneOf(tok::colon, tok::semi, kw_clocking, kw_iff, 1904 kw_input, kw_output, kw_sequence))); 1905 } 1906 1907 /// Returns whether \p Tok is a Verilog keyword that starts a 1908 /// structured procedure like 'always'. 1909 bool isVerilogStructuredProcedure(const FormatToken &Tok) const { 1910 return Tok.isOneOf(kw_always, kw_always_comb, kw_always_ff, kw_always_latch, 1911 kw_final, kw_forever, kw_initial); 1912 } 1913 1914 bool isVerilogQualifier(const FormatToken &Tok) const { 1915 switch (Tok.Tok.getKind()) { 1916 case tok::kw_extern: 1917 case tok::kw_signed: 1918 case tok::kw_static: 1919 case tok::kw_unsigned: 1920 case tok::kw_virtual: 1921 return true; 1922 case tok::identifier: 1923 return Tok.isOneOf( 1924 kw_let, kw_var, kw_ref, kw_automatic, kw_bins, kw_coverpoint, 1925 kw_ignore_bins, kw_illegal_bins, kw_inout, kw_input, kw_interconnect, 1926 kw_local, kw_localparam, kw_output, kw_parameter, kw_pure, kw_rand, 1927 kw_randc, kw_scalared, kw_specparam, kw_tri, kw_tri0, kw_tri1, 1928 kw_triand, kw_trior, kw_trireg, kw_uwire, kw_vectored, kw_wand, 1929 kw_wildcard, kw_wire, kw_wor); 1930 default: 1931 return false; 1932 } 1933 } 1934 1935 bool isTableGenDefinition(const FormatToken &Tok) const { 1936 return Tok.isOneOf(kw_def, kw_defm, kw_defset, kw_defvar, kw_multiclass, 1937 kw_let, tok::kw_class); 1938 } 1939 1940 bool isTableGenKeyword(const FormatToken &Tok) const { 1941 switch (Tok.Tok.getKind()) { 1942 case tok::kw_class: 1943 case tok::kw_else: 1944 case tok::kw_false: 1945 case tok::kw_if: 1946 case tok::kw_int: 1947 case tok::kw_true: 1948 return true; 1949 default: 1950 return Tok.is(tok::identifier) && 1951 TableGenExtraKeywords.find(Tok.Tok.getIdentifierInfo()) != 1952 TableGenExtraKeywords.end(); 1953 } 1954 } 1955 1956 private: 1957 /// The JavaScript keywords beyond the C++ keyword set. 1958 std::unordered_set<IdentifierInfo *> JsExtraKeywords; 1959 1960 /// The C# keywords beyond the C++ keyword set 1961 std::unordered_set<IdentifierInfo *> CSharpExtraKeywords; 1962 1963 /// The Verilog keywords beyond the C++ keyword set. 1964 std::unordered_set<IdentifierInfo *> VerilogExtraKeywords; 1965 1966 /// The TableGen keywords beyond the C++ keyword set. 1967 std::unordered_set<IdentifierInfo *> TableGenExtraKeywords; 1968 }; 1969 1970 inline bool isLineComment(const FormatToken &FormatTok) { 1971 return FormatTok.is(tok::comment) && !FormatTok.TokenText.starts_with("/*"); 1972 } 1973 1974 // Checks if \p FormatTok is a line comment that continues the line comment 1975 // \p Previous. The original column of \p MinColumnToken is used to determine 1976 // whether \p FormatTok is indented enough to the right to continue \p Previous. 1977 inline bool continuesLineComment(const FormatToken &FormatTok, 1978 const FormatToken *Previous, 1979 const FormatToken *MinColumnToken) { 1980 if (!Previous || !MinColumnToken) 1981 return false; 1982 unsigned MinContinueColumn = 1983 MinColumnToken->OriginalColumn + (isLineComment(*MinColumnToken) ? 0 : 1); 1984 return isLineComment(FormatTok) && FormatTok.NewlinesBefore == 1 && 1985 isLineComment(*Previous) && 1986 FormatTok.OriginalColumn >= MinContinueColumn; 1987 } 1988 1989 // Returns \c true if \c Current starts a new parameter. 1990 bool startsNextParameter(const FormatToken &Current, const FormatStyle &Style); 1991 1992 } // namespace format 1993 } // namespace clang 1994 1995 #endif 1996