xref: /llvm-project/clang-tools-extra/clang-doc/Representation.h (revision 5ef2456a438578b0783241a2744efc62d47e5ab6)
1 ///===-- Representation.h - ClangDoc Representation -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the internal representations of different declaration
10 // types for the clang-doc tool.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
15 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
16 
17 #include "clang/AST/Type.h"
18 #include "clang/Basic/Specifiers.h"
19 #include "clang/Tooling/StandaloneExecution.h"
20 #include "llvm/ADT/APSInt.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/StringExtras.h"
23 #include <array>
24 #include <optional>
25 #include <string>
26 
27 namespace clang {
28 namespace doc {
29 
30 // SHA1'd hash of a USR.
31 using SymbolID = std::array<uint8_t, 20>;
32 
33 struct BaseRecordInfo;
34 struct EnumInfo;
35 struct FunctionInfo;
36 struct Info;
37 struct TypedefInfo;
38 
39 enum class InfoType {
40   IT_default,
41   IT_namespace,
42   IT_record,
43   IT_function,
44   IT_enum,
45   IT_typedef
46 };
47 
48 // A representation of a parsed comment.
49 struct CommentInfo {
50   CommentInfo() = default;
51   CommentInfo(CommentInfo &Other) = delete;
52   CommentInfo(CommentInfo &&Other) = default;
53   CommentInfo &operator=(CommentInfo &&Other) = default;
54 
55   bool operator==(const CommentInfo &Other) const;
56 
57   // This operator is used to sort a vector of CommentInfos.
58   // No specific order (attributes more important than others) is required. Any
59   // sort is enough, the order is only needed to call std::unique after sorting
60   // the vector.
61   bool operator<(const CommentInfo &Other) const;
62 
63   SmallString<16>
64       Kind; // Kind of comment (FullComment, ParagraphComment, TextComment,
65             // InlineCommandComment, HTMLStartTagComment, HTMLEndTagComment,
66             // BlockCommandComment, ParamCommandComment,
67             // TParamCommandComment, VerbatimBlockComment,
68             // VerbatimBlockLineComment, VerbatimLineComment).
69   SmallString<64> Text;      // Text of the comment.
70   SmallString<16> Name;      // Name of the comment (for Verbatim and HTML).
71   SmallString<8> Direction;  // Parameter direction (for (T)ParamCommand).
72   SmallString<16> ParamName; // Parameter name (for (T)ParamCommand).
73   SmallString<16> CloseName; // Closing tag name (for VerbatimBlock).
74   bool SelfClosing = false;  // Indicates if tag is self-closing (for HTML).
75   bool Explicit = false; // Indicates if the direction of a param is explicit
76                          // (for (T)ParamCommand).
77   llvm::SmallVector<SmallString<16>, 4>
78       AttrKeys; // List of attribute keys (for HTML).
79   llvm::SmallVector<SmallString<16>, 4>
80       AttrValues; // List of attribute values for each key (for HTML).
81   llvm::SmallVector<SmallString<16>, 4>
82       Args; // List of arguments to commands (for InlineCommand).
83   std::vector<std::unique_ptr<CommentInfo>>
84       Children; // List of child comments for this CommentInfo.
85 };
86 
87 struct Reference {
88   // This variant (that takes no qualified name parameter) uses the Name as the
89   // QualName (very useful in unit tests to reduce verbosity). This can't use an
90   // empty string to indicate the default because we need to accept the empty
91   // string as a valid input for the global namespace (it will have
92   // "GlobalNamespace" as the name, but an empty QualName).
93   Reference(SymbolID USR = SymbolID(), StringRef Name = StringRef(),
94             InfoType IT = InfoType::IT_default)
95       : USR(USR), Name(Name), QualName(Name), RefType(IT) {}
96   Reference(SymbolID USR, StringRef Name, InfoType IT, StringRef QualName,
97             StringRef Path = StringRef())
98       : USR(USR), Name(Name), QualName(QualName), RefType(IT), Path(Path) {}
99 
100   bool operator==(const Reference &Other) const {
101     return std::tie(USR, Name, QualName, RefType) ==
102            std::tie(Other.USR, Other.Name, QualName, Other.RefType);
103   }
104 
105   bool mergeable(const Reference &Other);
106   void merge(Reference &&I);
107   bool operator<(const Reference &Other) const { return Name < Other.Name; }
108 
109   /// Returns the path for this Reference relative to CurrentPath.
110   llvm::SmallString<64> getRelativeFilePath(const StringRef &CurrentPath) const;
111 
112   /// Returns the basename that should be used for this Reference.
113   llvm::SmallString<16> getFileBaseName() const;
114 
115   SymbolID USR = SymbolID(); // Unique identifier for referenced decl
116 
117   // Name of type (possibly unresolved). Not including namespaces or template
118   // parameters (so for a std::vector<int> this would be "vector"). See also
119   // QualName.
120   SmallString<16> Name;
121 
122   // Full qualified name of this type, including namespaces and template
123   // parameter (for example this could be "std::vector<int>"). Contrast to
124   // Name.
125   SmallString<16> QualName;
126 
127   InfoType RefType = InfoType::IT_default; // Indicates the type of this
128                                            // Reference (namespace, record,
129                                            // function, enum, default).
130   // Path of directory where the clang-doc generated file will be saved
131   // (possibly unresolved)
132   llvm::SmallString<128> Path;
133 };
134 
135 // Holds the children of a record or namespace.
136 struct ScopeChildren {
137   // Namespaces and Records are references because they will be properly
138   // documented in their own info, while the entirety of Functions and Enums are
139   // included here because they should not have separate documentation from
140   // their scope.
141   //
142   // Namespaces are not syntactically valid as children of records, but making
143   // this general for all possible container types reduces code complexity.
144   std::vector<Reference> Namespaces;
145   std::vector<Reference> Records;
146   std::vector<FunctionInfo> Functions;
147   std::vector<EnumInfo> Enums;
148   std::vector<TypedefInfo> Typedefs;
149 
150   void sort();
151 };
152 
153 // A base struct for TypeInfos
154 struct TypeInfo {
155   TypeInfo() = default;
156   TypeInfo(const Reference &R) : Type(R) {}
157 
158   // Convenience constructor for when there is no symbol ID or info type
159   // (normally used for built-in types in tests).
160   TypeInfo(StringRef Name, StringRef Path = StringRef())
161       : Type(SymbolID(), Name, InfoType::IT_default, Name, Path) {}
162 
163   bool operator==(const TypeInfo &Other) const { return Type == Other.Type; }
164 
165   Reference Type; // Referenced type in this info.
166 };
167 
168 // Represents one template parameter.
169 //
170 // This is a very simple serialization of the text of the source code of the
171 // template parameter. It is saved in a struct so there is a place to add the
172 // name and default values in the future if needed.
173 struct TemplateParamInfo {
174   TemplateParamInfo() = default;
175   explicit TemplateParamInfo(StringRef Contents) : Contents(Contents) {}
176 
177   // The literal contents of the code for that specifies this template parameter
178   // for this declaration. Typical values will be "class T" and
179   // "typename T = int".
180   SmallString<16> Contents;
181 };
182 
183 struct TemplateSpecializationInfo {
184   // Indicates the declaration that this specializes.
185   SymbolID SpecializationOf;
186 
187   // Template parameters applying to the specialized record/function.
188   std::vector<TemplateParamInfo> Params;
189 };
190 
191 // Records the template information for a struct or function that is a template
192 // or an explicit template specialization.
193 struct TemplateInfo {
194   // May be empty for non-partial specializations.
195   std::vector<TemplateParamInfo> Params;
196 
197   // Set when this is a specialization of another record/function.
198   std::optional<TemplateSpecializationInfo> Specialization;
199 };
200 
201 // Info for field types.
202 struct FieldTypeInfo : public TypeInfo {
203   FieldTypeInfo() = default;
204   FieldTypeInfo(const TypeInfo &TI, StringRef Name = StringRef(),
205                 StringRef DefaultValue = StringRef())
206       : TypeInfo(TI), Name(Name), DefaultValue(DefaultValue) {}
207 
208   bool operator==(const FieldTypeInfo &Other) const {
209     return std::tie(Type, Name, DefaultValue) ==
210            std::tie(Other.Type, Other.Name, Other.DefaultValue);
211   }
212 
213   SmallString<16> Name; // Name associated with this info.
214 
215   // When used for function parameters, contains the string representing the
216   // expression of the default value, if any.
217   SmallString<16> DefaultValue;
218 };
219 
220 // Info for member types.
221 struct MemberTypeInfo : public FieldTypeInfo {
222   MemberTypeInfo() = default;
223   MemberTypeInfo(const TypeInfo &TI, StringRef Name, AccessSpecifier Access)
224       : FieldTypeInfo(TI, Name), Access(Access) {}
225 
226   bool operator==(const MemberTypeInfo &Other) const {
227     return std::tie(Type, Name, Access, Description) ==
228            std::tie(Other.Type, Other.Name, Other.Access, Other.Description);
229   }
230 
231   // Access level associated with this info (public, protected, private, none).
232   // AS_public is set as default because the bitcode writer requires the enum
233   // with value 0 to be used as the default.
234   // (AS_public = 0, AS_protected = 1, AS_private = 2, AS_none = 3)
235   AccessSpecifier Access = AccessSpecifier::AS_public;
236 
237   std::vector<CommentInfo> Description; // Comment description of this field.
238 };
239 
240 struct Location {
241   Location(int LineNumber = 0, StringRef Filename = StringRef(),
242            bool IsFileInRootDir = false)
243       : LineNumber(LineNumber), Filename(Filename),
244         IsFileInRootDir(IsFileInRootDir) {}
245 
246   bool operator==(const Location &Other) const {
247     return std::tie(LineNumber, Filename) ==
248            std::tie(Other.LineNumber, Other.Filename);
249   }
250 
251   bool operator!=(const Location &Other) const {
252     return std::tie(LineNumber, Filename) !=
253            std::tie(Other.LineNumber, Other.Filename);
254   }
255 
256   // This operator is used to sort a vector of Locations.
257   // No specific order (attributes more important than others) is required. Any
258   // sort is enough, the order is only needed to call std::unique after sorting
259   // the vector.
260   bool operator<(const Location &Other) const {
261     return std::tie(LineNumber, Filename) <
262            std::tie(Other.LineNumber, Other.Filename);
263   }
264 
265   int LineNumber = 0;           // Line number of this Location.
266   SmallString<32> Filename;     // File for this Location.
267   bool IsFileInRootDir = false; // Indicates if file is inside root directory
268 };
269 
270 /// A base struct for Infos.
271 struct Info {
272   Info(InfoType IT = InfoType::IT_default, SymbolID USR = SymbolID(),
273        StringRef Name = StringRef(), StringRef Path = StringRef())
274       : USR(USR), IT(IT), Name(Name), Path(Path) {}
275 
276   Info(const Info &Other) = delete;
277   Info(Info &&Other) = default;
278 
279   virtual ~Info() = default;
280 
281   Info &operator=(Info &&Other) = default;
282 
283   SymbolID USR =
284       SymbolID(); // Unique identifier for the decl described by this Info.
285   InfoType IT = InfoType::IT_default; // InfoType of this particular Info.
286   SmallString<16> Name;               // Unqualified name of the decl.
287   llvm::SmallVector<Reference, 4>
288       Namespace; // List of parent namespaces for this decl.
289   std::vector<CommentInfo> Description; // Comment description of this decl.
290   llvm::SmallString<128> Path;          // Path of directory where the clang-doc
291                                         // generated file will be saved
292 
293   void mergeBase(Info &&I);
294   bool mergeable(const Info &Other);
295 
296   llvm::SmallString<16> extractName() const;
297 
298   /// Returns the file path for this Info relative to CurrentPath.
299   llvm::SmallString<64> getRelativeFilePath(const StringRef &CurrentPath) const;
300 
301   /// Returns the basename that should be used for this Info.
302   llvm::SmallString<16> getFileBaseName() const;
303 };
304 
305 // Info for namespaces.
306 struct NamespaceInfo : public Info {
307   NamespaceInfo(SymbolID USR = SymbolID(), StringRef Name = StringRef(),
308                 StringRef Path = StringRef());
309 
310   void merge(NamespaceInfo &&I);
311 
312   ScopeChildren Children;
313 };
314 
315 // Info for symbols.
316 struct SymbolInfo : public Info {
317   SymbolInfo(InfoType IT, SymbolID USR = SymbolID(),
318              StringRef Name = StringRef(), StringRef Path = StringRef())
319       : Info(IT, USR, Name, Path) {}
320 
321   void merge(SymbolInfo &&I);
322 
323   std::optional<Location> DefLoc;     // Location where this decl is defined.
324   llvm::SmallVector<Location, 2> Loc; // Locations where this decl is declared.
325 
326   bool operator<(const SymbolInfo &Other) const {
327     // Sort by declaration location since we want the doc to be
328     // generated in the order of the source code.
329     // If the declaration location is the same, or not present
330     // we sort by defined location otherwise fallback to the extracted name
331     if (Loc.size() > 0 && Other.Loc.size() > 0 && Loc[0] != Other.Loc[0])
332       return Loc[0] < Other.Loc[0];
333 
334     if (DefLoc && Other.DefLoc && *DefLoc != *Other.DefLoc)
335       return *DefLoc < *Other.DefLoc;
336 
337     return extractName() < Other.extractName();
338   }
339 };
340 
341 // TODO: Expand to allow for documenting templating and default args.
342 // Info for functions.
343 struct FunctionInfo : public SymbolInfo {
344   FunctionInfo(SymbolID USR = SymbolID())
345       : SymbolInfo(InfoType::IT_function, USR) {}
346 
347   void merge(FunctionInfo &&I);
348 
349   bool IsMethod = false; // Indicates whether this function is a class method.
350   Reference Parent;      // Reference to the parent class decl for this method.
351   TypeInfo ReturnType;   // Info about the return type of this function.
352   llvm::SmallVector<FieldTypeInfo, 4> Params; // List of parameters.
353   // Access level for this method (public, private, protected, none).
354   // AS_public is set as default because the bitcode writer requires the enum
355   // with value 0 to be used as the default.
356   // (AS_public = 0, AS_protected = 1, AS_private = 2, AS_none = 3)
357   AccessSpecifier Access = AccessSpecifier::AS_public;
358 
359   // Full qualified name of this function, including namespaces and template
360   // specializations.
361   SmallString<16> FullName;
362 
363   // When present, this function is a template or specialization.
364   std::optional<TemplateInfo> Template;
365 };
366 
367 // TODO: Expand to allow for documenting templating, inheritance access,
368 // friend classes
369 // Info for types.
370 struct RecordInfo : public SymbolInfo {
371   RecordInfo(SymbolID USR = SymbolID(), StringRef Name = StringRef(),
372              StringRef Path = StringRef());
373 
374   void merge(RecordInfo &&I);
375 
376   // Type of this record (struct, class, union, interface).
377   TagTypeKind TagType = TagTypeKind::Struct;
378 
379   // Full qualified name of this record, including namespaces and template
380   // specializations.
381   SmallString<16> FullName;
382 
383   // When present, this record is a template or specialization.
384   std::optional<TemplateInfo> Template;
385 
386   // Indicates if the record was declared using a typedef. Things like anonymous
387   // structs in a typedef:
388   //   typedef struct { ... } foo_t;
389   // are converted into records with the typedef as the Name + this flag set.
390   bool IsTypeDef = false;
391 
392   llvm::SmallVector<MemberTypeInfo, 4>
393       Members;                             // List of info about record members.
394   llvm::SmallVector<Reference, 4> Parents; // List of base/parent records
395                                            // (does not include virtual
396                                            // parents).
397   llvm::SmallVector<Reference, 4>
398       VirtualParents; // List of virtual base/parent records.
399 
400   std::vector<BaseRecordInfo>
401       Bases; // List of base/parent records; this includes inherited methods and
402              // attributes
403 
404   ScopeChildren Children;
405 };
406 
407 // Info for typedef and using statements.
408 struct TypedefInfo : public SymbolInfo {
409   TypedefInfo(SymbolID USR = SymbolID())
410       : SymbolInfo(InfoType::IT_typedef, USR) {}
411 
412   void merge(TypedefInfo &&I);
413 
414   TypeInfo Underlying;
415 
416   // Inidicates if this is a new C++ "using"-style typedef:
417   //   using MyVector = std::vector<int>
418   // False means it's a C-style typedef:
419   //   typedef std::vector<int> MyVector;
420   bool IsUsing = false;
421 };
422 
423 struct BaseRecordInfo : public RecordInfo {
424   BaseRecordInfo();
425   BaseRecordInfo(SymbolID USR, StringRef Name, StringRef Path, bool IsVirtual,
426                  AccessSpecifier Access, bool IsParent);
427 
428   // Indicates if base corresponds to a virtual inheritance
429   bool IsVirtual = false;
430   // Access level associated with this inherited info (public, protected,
431   // private).
432   AccessSpecifier Access = AccessSpecifier::AS_public;
433   bool IsParent = false; // Indicates if this base is a direct parent
434 };
435 
436 // Information for a single possible value of an enumeration.
437 struct EnumValueInfo {
438   explicit EnumValueInfo(StringRef Name = StringRef(),
439                          StringRef Value = StringRef("0"),
440                          StringRef ValueExpr = StringRef())
441       : Name(Name), Value(Value), ValueExpr(ValueExpr) {}
442 
443   bool operator==(const EnumValueInfo &Other) const {
444     return std::tie(Name, Value, ValueExpr) ==
445            std::tie(Other.Name, Other.Value, Other.ValueExpr);
446   }
447 
448   SmallString<16> Name;
449 
450   // The computed value of the enumeration constant. This could be the result of
451   // evaluating the ValueExpr, or it could be automatically generated according
452   // to C rules.
453   SmallString<16> Value;
454 
455   // Stores the user-supplied initialization expression for this enumeration
456   // constant. This will be empty for implicit enumeration values.
457   SmallString<16> ValueExpr;
458 
459   std::vector<CommentInfo> Description; /// Comment description of this field.
460 };
461 
462 // TODO: Expand to allow for documenting templating.
463 // Info for types.
464 struct EnumInfo : public SymbolInfo {
465   EnumInfo() : SymbolInfo(InfoType::IT_enum) {}
466   EnumInfo(SymbolID USR) : SymbolInfo(InfoType::IT_enum, USR) {}
467 
468   void merge(EnumInfo &&I);
469 
470   // Indicates whether this enum is scoped (e.g. enum class).
471   bool Scoped = false;
472 
473   // Set to nonempty to the type when this is an explicitly typed enum. For
474   //   enum Foo : short { ... };
475   // this will be "short".
476   std::optional<TypeInfo> BaseType;
477 
478   llvm::SmallVector<EnumValueInfo, 4> Members; // List of enum members.
479 };
480 
481 struct Index : public Reference {
482   Index() = default;
483   Index(StringRef Name) : Reference(SymbolID(), Name) {}
484   Index(StringRef Name, StringRef JumpToSection)
485       : Reference(SymbolID(), Name), JumpToSection(JumpToSection) {}
486   Index(SymbolID USR, StringRef Name, InfoType IT, StringRef Path)
487       : Reference(USR, Name, IT, Name, Path) {}
488   // This is used to look for a USR in a vector of Indexes using std::find
489   bool operator==(const SymbolID &Other) const { return USR == Other; }
490   bool operator<(const Index &Other) const;
491 
492   std::optional<SmallString<16>> JumpToSection;
493   std::vector<Index> Children;
494 
495   void sort();
496 };
497 
498 // TODO: Add functionality to include separate markdown pages.
499 
500 // A standalone function to call to merge a vector of infos into one.
501 // This assumes that all infos in the vector are of the same type, and will fail
502 // if they are different.
503 llvm::Expected<std::unique_ptr<Info>>
504 mergeInfos(std::vector<std::unique_ptr<Info>> &Values);
505 
506 struct ClangDocContext {
507   ClangDocContext() = default;
508   ClangDocContext(tooling::ExecutionContext *ECtx, StringRef ProjectName,
509                   bool PublicOnly, StringRef OutDirectory, StringRef SourceRoot,
510                   StringRef RepositoryUrl,
511                   std::vector<std::string> UserStylesheets);
512   tooling::ExecutionContext *ECtx;
513   std::string ProjectName; // Name of project clang-doc is documenting.
514   bool PublicOnly; // Indicates if only public declarations are documented.
515   std::string OutDirectory; // Directory for outputting generated files.
516   std::string SourceRoot;   // Directory where processed files are stored. Links
517                             // to definition locations will only be generated if
518                             // the file is in this dir.
519   // URL of repository that hosts code used for links to definition locations.
520   std::optional<std::string> RepositoryUrl;
521   // Path of CSS stylesheets that will be copied to OutDirectory and used to
522   // style all HTML files.
523   std::vector<std::string> UserStylesheets;
524   // JavaScript files that will be imported in allHTML file.
525   std::vector<std::string> JsScripts;
526   Index Idx;
527 };
528 
529 } // namespace doc
530 } // namespace clang
531 
532 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_DOC_REPRESENTATION_H
533