1 //===--- TokenAnnotator.h - Format C++ code ---------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements a token annotator, i.e. creates 11 /// \c AnnotatedTokens out of \c FormatTokens with required extra information. 12 /// 13 //===----------------------------------------------------------------------===// 14 15 #ifndef LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 16 #define LLVM_CLANG_LIB_FORMAT_TOKENANNOTATOR_H 17 18 #include "UnwrappedLineParser.h" 19 20 namespace clang { 21 namespace format { 22 23 enum LineType { 24 LT_Invalid, 25 // Contains public/private/protected followed by TT_InheritanceColon. 26 LT_AccessModifier, 27 LT_ImportStatement, 28 LT_ObjCDecl, // An @interface, @implementation, or @protocol line. 29 LT_ObjCMethodDecl, 30 LT_ObjCProperty, // An @property line. 31 LT_Other, 32 LT_PreprocessorDirective, 33 LT_VirtualFunctionDecl, 34 LT_ArrayOfStructInitializer, 35 LT_CommentAbovePPDirective, 36 LT_RequiresExpression, 37 LT_SimpleRequirement, 38 }; 39 40 enum ScopeType { 41 // Contained in class declaration/definition. 42 ST_Class, 43 // Contained in compound requirement. 44 ST_CompoundRequirement, 45 // Contained in other blocks (function, lambda, loop, if/else, child, etc). 46 ST_Other, 47 }; 48 49 class AnnotatedLine { 50 public: 51 AnnotatedLine(const UnwrappedLine &Line) 52 : First(Line.Tokens.front().Tok), Type(LT_Other), Level(Line.Level), 53 PPLevel(Line.PPLevel), 54 MatchingOpeningBlockLineIndex(Line.MatchingOpeningBlockLineIndex), 55 MatchingClosingBlockLineIndex(Line.MatchingClosingBlockLineIndex), 56 InPPDirective(Line.InPPDirective), 57 InPragmaDirective(Line.InPragmaDirective), 58 InMacroBody(Line.InMacroBody), 59 MustBeDeclaration(Line.MustBeDeclaration), MightBeFunctionDecl(false), 60 IsMultiVariableDeclStmt(false), Affected(false), 61 LeadingEmptyLinesAffected(false), ChildrenAffected(false), 62 ReturnTypeWrapped(false), IsContinuation(Line.IsContinuation), 63 FirstStartColumn(Line.FirstStartColumn) { 64 assert(!Line.Tokens.empty()); 65 66 // Calculate Next and Previous for all tokens. Note that we must overwrite 67 // Next and Previous for every token, as previous formatting runs might have 68 // left them in a different state. 69 First->Previous = nullptr; 70 FormatToken *Current = First; 71 addChildren(Line.Tokens.front(), Current); 72 for (const UnwrappedLineNode &Node : llvm::drop_begin(Line.Tokens)) { 73 if (Node.Tok->MacroParent) 74 ContainsMacroCall = true; 75 Current->Next = Node.Tok; 76 Node.Tok->Previous = Current; 77 Current = Current->Next; 78 addChildren(Node, Current); 79 // FIXME: if we add children, previous will point to the token before 80 // the children; changing this requires significant changes across 81 // clang-format. 82 } 83 Last = Current; 84 Last->Next = nullptr; 85 } 86 87 void addChildren(const UnwrappedLineNode &Node, FormatToken *Current) { 88 Current->Children.clear(); 89 for (const auto &Child : Node.Children) { 90 Children.push_back(new AnnotatedLine(Child)); 91 if (Children.back()->ContainsMacroCall) 92 ContainsMacroCall = true; 93 Current->Children.push_back(Children.back()); 94 } 95 } 96 97 size_t size() const { 98 size_t Size = 1; 99 for (const auto *Child : Children) 100 Size += Child->size(); 101 return Size; 102 } 103 104 ~AnnotatedLine() { 105 for (AnnotatedLine *Child : Children) 106 delete Child; 107 FormatToken *Current = First; 108 while (Current) { 109 Current->Children.clear(); 110 Current->Role.reset(); 111 Current = Current->Next; 112 } 113 } 114 115 bool isComment() const { 116 return First && First->is(tok::comment) && !First->getNextNonComment(); 117 } 118 119 /// \c true if this line starts with the given tokens in order, ignoring 120 /// comments. 121 template <typename... Ts> bool startsWith(Ts... Tokens) const { 122 return First && First->startsSequence(Tokens...); 123 } 124 125 /// \c true if this line ends with the given tokens in reversed order, 126 /// ignoring comments. 127 /// For example, given tokens [T1, T2, T3, ...], the function returns true if 128 /// this line is like "... T3 T2 T1". 129 template <typename... Ts> bool endsWith(Ts... Tokens) const { 130 return Last && Last->endsSequence(Tokens...); 131 } 132 133 /// \c true if this line looks like a function definition instead of a 134 /// function declaration. Asserts MightBeFunctionDecl. 135 bool mightBeFunctionDefinition() const { 136 assert(MightBeFunctionDecl); 137 // Try to determine if the end of a stream of tokens is either the 138 // Definition or the Declaration for a function. It does this by looking for 139 // the ';' in foo(); and using that it ends with a ; to know this is the 140 // Definition, however the line could end with 141 // foo(); /* comment */ 142 // or 143 // foo(); // comment 144 // or 145 // foo() // comment 146 // endsWith() ignores the comment. 147 return !endsWith(tok::semi); 148 } 149 150 /// \c true if this line starts a namespace definition. 151 bool startsWithNamespace() const { 152 return startsWith(tok::kw_namespace) || startsWith(TT_NamespaceMacro) || 153 startsWith(tok::kw_inline, tok::kw_namespace) || 154 startsWith(tok::kw_export, tok::kw_namespace); 155 } 156 157 /// \c true if this line starts a C++ export block. 158 bool startsWithExportBlock() const { 159 return startsWith(tok::kw_export, tok::l_brace); 160 } 161 162 FormatToken *getFirstNonComment() const { 163 assert(First); 164 return First->is(tok::comment) ? First->getNextNonComment() : First; 165 } 166 167 FormatToken *getLastNonComment() const { 168 assert(Last); 169 return Last->is(tok::comment) ? Last->getPreviousNonComment() : Last; 170 } 171 172 FormatToken *First; 173 FormatToken *Last; 174 175 SmallVector<AnnotatedLine *, 0> Children; 176 177 LineType Type; 178 unsigned Level; 179 unsigned PPLevel; 180 size_t MatchingOpeningBlockLineIndex; 181 size_t MatchingClosingBlockLineIndex; 182 bool InPPDirective; 183 bool InPragmaDirective; 184 bool InMacroBody; 185 bool MustBeDeclaration; 186 bool MightBeFunctionDecl; 187 bool IsMultiVariableDeclStmt; 188 189 /// \c True if this line contains a macro call for which an expansion exists. 190 bool ContainsMacroCall = false; 191 192 /// \c True if calculateFormattingInformation() has been called on this line. 193 bool Computed = false; 194 195 /// \c True if this line should be formatted, i.e. intersects directly or 196 /// indirectly with one of the input ranges. 197 bool Affected; 198 199 /// \c True if the leading empty lines of this line intersect with one of the 200 /// input ranges. 201 bool LeadingEmptyLinesAffected; 202 203 /// \c True if one of this line's children intersects with an input range. 204 bool ChildrenAffected; 205 206 /// \c True if breaking after last attribute group in function return type. 207 bool ReturnTypeWrapped; 208 209 /// \c True if this line should be indented by ContinuationIndent in addition 210 /// to the normal indention level. 211 bool IsContinuation; 212 213 unsigned FirstStartColumn; 214 215 private: 216 // Disallow copying. 217 AnnotatedLine(const AnnotatedLine &) = delete; 218 void operator=(const AnnotatedLine &) = delete; 219 }; 220 221 /// Determines extra information about the tokens comprising an 222 /// \c UnwrappedLine. 223 class TokenAnnotator { 224 public: 225 TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) 226 : Style(Style), IsCpp(Style.isCpp()), 227 LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) { 228 assert(IsCpp == LangOpts.CXXOperatorNames); 229 } 230 231 /// Adapts the indent levels of comment lines to the indent of the 232 /// subsequent line. 233 // FIXME: Can/should this be done in the UnwrappedLineParser? 234 void setCommentLineLevels(SmallVectorImpl<AnnotatedLine *> &Lines) const; 235 236 void annotate(AnnotatedLine &Line); 237 void calculateFormattingInformation(AnnotatedLine &Line) const; 238 239 private: 240 /// Calculate the penalty for splitting before \c Tok. 241 unsigned splitPenalty(const AnnotatedLine &Line, const FormatToken &Tok, 242 bool InFunctionDecl) const; 243 244 bool spaceRequiredBeforeParens(const FormatToken &Right) const; 245 246 bool spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Left, 247 const FormatToken &Right) const; 248 249 bool spaceRequiredBefore(const AnnotatedLine &Line, 250 const FormatToken &Right) const; 251 252 bool mustBreakBefore(const AnnotatedLine &Line, 253 const FormatToken &Right) const; 254 255 bool canBreakBefore(const AnnotatedLine &Line, 256 const FormatToken &Right) const; 257 258 bool mustBreakForReturnType(const AnnotatedLine &Line) const; 259 260 void printDebugInfo(const AnnotatedLine &Line) const; 261 262 void calculateUnbreakableTailLengths(AnnotatedLine &Line) const; 263 264 void calculateArrayInitializerColumnList(AnnotatedLine &Line) const; 265 266 FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, 267 FormatToken *CurrentToken, 268 unsigned Depth) const; 269 FormatStyle::PointerAlignmentStyle 270 getTokenReferenceAlignment(const FormatToken &PointerOrReference) const; 271 272 FormatStyle::PointerAlignmentStyle getTokenPointerOrReferenceAlignment( 273 const FormatToken &PointerOrReference) const; 274 275 const FormatStyle &Style; 276 277 bool IsCpp; 278 LangOptions LangOpts; 279 280 const AdditionalKeywords &Keywords; 281 282 SmallVector<ScopeType> Scopes, MacroBodyScopes; 283 }; 284 285 } // end namespace format 286 } // end namespace clang 287 288 #endif 289