xref: /llvm-project/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp (revision d1d34bafef56b732b461e12032eaf030e609f55a)
1 //===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the SymbolGraphSerializer.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
15 #include "clang/Basic/Version.h"
16 #include "clang/ExtractAPI/API.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/VersionTuple.h"
20 
21 using namespace clang;
22 using namespace clang::extractapi;
23 using namespace llvm;
24 using namespace llvm::json;
25 
26 namespace {
27 
28 /// Helper function to inject a JSON object \p Obj into another object \p Paren
29 /// at position \p Key.
30 void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) {
31   if (Obj)
32     Paren[Key] = std::move(Obj.getValue());
33 }
34 
35 /// Helper function to inject a JSON array \p Array into object \p Paren at
36 /// position \p Key.
37 void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) {
38   if (Array)
39     Paren[Key] = std::move(Array.getValue());
40 }
41 
42 /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version
43 /// format.
44 ///
45 /// A semantic version object contains three numeric fields, representing the
46 /// \c major, \c minor, and \c patch parts of the version tuple.
47 /// For example version tuple 1.0.3 is serialized as:
48 /// \code
49 ///   {
50 ///     "major" : 1,
51 ///     "minor" : 0,
52 ///     "patch" : 3
53 ///   }
54 /// \endcode
55 ///
56 /// \returns \c None if the version \p V is empty, or an \c Object containing
57 /// the semantic version representation of \p V.
58 Optional<Object> serializeSemanticVersion(const VersionTuple &V) {
59   if (V.empty())
60     return None;
61 
62   Object Version;
63   Version["major"] = V.getMajor();
64   Version["minor"] = V.getMinor().getValueOr(0);
65   Version["patch"] = V.getSubminor().getValueOr(0);
66   return Version;
67 }
68 
69 /// Serialize the OS information in the Symbol Graph platform property.
70 ///
71 /// The OS information in Symbol Graph contains the \c name of the OS, and an
72 /// optional \c minimumVersion semantic version field.
73 Object serializeOperatingSystem(const Triple &T) {
74   Object OS;
75   OS["name"] = T.getOSTypeName(T.getOS());
76   serializeObject(OS, "minimumVersion",
77                   serializeSemanticVersion(T.getMinimumSupportedOSVersion()));
78   return OS;
79 }
80 
81 /// Serialize the platform information in the Symbol Graph module section.
82 ///
83 /// The platform object describes a target platform triple in corresponding
84 /// three fields: \c architecture, \c vendor, and \c operatingSystem.
85 Object serializePlatform(const Triple &T) {
86   Object Platform;
87   Platform["architecture"] = T.getArchName();
88   Platform["vendor"] = T.getVendorName();
89   Platform["operatingSystem"] = serializeOperatingSystem(T);
90   return Platform;
91 }
92 
93 /// Serialize a source location in file.
94 ///
95 /// \param Loc The presumed location to serialize.
96 /// \param IncludeFileURI If true, include the file path of \p Loc as a URI.
97 /// Defaults to false.
98 Object serializeSourcePosition(const PresumedLoc &Loc,
99                                bool IncludeFileURI = false) {
100   assert(Loc.isValid() && "invalid source position");
101 
102   Object SourcePosition;
103   SourcePosition["line"] = Loc.getLine();
104   SourcePosition["character"] = Loc.getColumn();
105 
106   if (IncludeFileURI) {
107     std::string FileURI = "file://";
108     // Normalize file path to use forward slashes for the URI.
109     FileURI += sys::path::convert_to_slash(Loc.getFilename());
110     SourcePosition["uri"] = FileURI;
111   }
112 
113   return SourcePosition;
114 }
115 
116 /// Serialize a source range with begin and end locations.
117 Object serializeSourceRange(const PresumedLoc &BeginLoc,
118                             const PresumedLoc &EndLoc) {
119   Object SourceRange;
120   serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc));
121   serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc));
122   return SourceRange;
123 }
124 
125 /// Serialize the availability attributes of a symbol.
126 ///
127 /// Availability information contains the introduced, deprecated, and obsoleted
128 /// versions of the symbol as semantic versions, if not default.
129 /// Availability information also contains flags to indicate if the symbol is
130 /// unconditionally unavailable or deprecated,
131 /// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)).
132 ///
133 /// \returns \c None if the symbol has default availability attributes, or
134 /// an \c Object containing the formatted availability information.
135 Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) {
136   if (Avail.isDefault())
137     return None;
138 
139   Object Availbility;
140   serializeObject(Availbility, "introducedVersion",
141                   serializeSemanticVersion(Avail.Introduced));
142   serializeObject(Availbility, "deprecatedVersion",
143                   serializeSemanticVersion(Avail.Deprecated));
144   serializeObject(Availbility, "obsoletedVersion",
145                   serializeSemanticVersion(Avail.Obsoleted));
146   if (Avail.isUnavailable())
147     Availbility["isUnconditionallyUnavailable"] = true;
148   if (Avail.isUnconditionallyDeprecated())
149     Availbility["isUnconditionallyDeprecated"] = true;
150 
151   return Availbility;
152 }
153 
154 /// Get the language name string for interface language references.
155 StringRef getLanguageName(Language Lang) {
156   switch (Lang) {
157   case Language::C:
158     return "c";
159   case Language::ObjC:
160     return "objective-c";
161 
162   // Unsupported language currently
163   case Language::CXX:
164   case Language::ObjCXX:
165   case Language::OpenCL:
166   case Language::OpenCLCXX:
167   case Language::CUDA:
168   case Language::RenderScript:
169   case Language::HIP:
170   case Language::HLSL:
171 
172   // Languages that the frontend cannot parse and compile
173   case Language::Unknown:
174   case Language::Asm:
175   case Language::LLVM_IR:
176     llvm_unreachable("Unsupported language kind");
177   }
178 
179   llvm_unreachable("Unhandled language kind");
180 }
181 
182 /// Serialize the identifier object as specified by the Symbol Graph format.
183 ///
184 /// The identifier property of a symbol contains the USR for precise and unique
185 /// references, and the interface language name.
186 Object serializeIdentifier(const APIRecord &Record, Language Lang) {
187   Object Identifier;
188   Identifier["precise"] = Record.USR;
189   Identifier["interfaceLanguage"] = getLanguageName(Lang);
190 
191   return Identifier;
192 }
193 
194 /// Serialize the documentation comments attached to a symbol, as specified by
195 /// the Symbol Graph format.
196 ///
197 /// The Symbol Graph \c docComment object contains an array of lines. Each line
198 /// represents one line of striped documentation comment, with source range
199 /// information.
200 /// e.g.
201 /// \code
202 ///   /// This is a documentation comment
203 ///       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'  First line.
204 ///   ///     with multiple lines.
205 ///       ^~~~~~~~~~~~~~~~~~~~~~~'         Second line.
206 /// \endcode
207 ///
208 /// \returns \c None if \p Comment is empty, or an \c Object containing the
209 /// formatted lines.
210 Optional<Object> serializeDocComment(const DocComment &Comment) {
211   if (Comment.empty())
212     return None;
213 
214   Object DocComment;
215   Array LinesArray;
216   for (const auto &CommentLine : Comment) {
217     Object Line;
218     Line["text"] = CommentLine.Text;
219     serializeObject(Line, "range",
220                     serializeSourceRange(CommentLine.Begin, CommentLine.End));
221     LinesArray.emplace_back(std::move(Line));
222   }
223   serializeArray(DocComment, "lines", LinesArray);
224 
225   return DocComment;
226 }
227 
228 /// Serialize the declaration fragments of a symbol.
229 ///
230 /// The Symbol Graph declaration fragments is an array of tagged important
231 /// parts of a symbol's declaration. The fragments sequence can be joined to
232 /// form spans of declaration text, with attached information useful for
233 /// purposes like syntax-highlighting etc. For example:
234 /// \code
235 ///   const int pi; -> "declarationFragments" : [
236 ///                      {
237 ///                        "kind" : "keyword",
238 ///                        "spelling" : "const"
239 ///                      },
240 ///                      {
241 ///                        "kind" : "text",
242 ///                        "spelling" : " "
243 ///                      },
244 ///                      {
245 ///                        "kind" : "typeIdentifier",
246 ///                        "preciseIdentifier" : "c:I",
247 ///                        "spelling" : "int"
248 ///                      },
249 ///                      {
250 ///                        "kind" : "text",
251 ///                        "spelling" : " "
252 ///                      },
253 ///                      {
254 ///                        "kind" : "identifier",
255 ///                        "spelling" : "pi"
256 ///                      }
257 ///                    ]
258 /// \endcode
259 ///
260 /// \returns \c None if \p DF is empty, or an \c Array containing the formatted
261 /// declaration fragments array.
262 Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) {
263   if (DF.getFragments().empty())
264     return None;
265 
266   Array Fragments;
267   for (const auto &F : DF.getFragments()) {
268     Object Fragment;
269     Fragment["spelling"] = F.Spelling;
270     Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind);
271     if (!F.PreciseIdentifier.empty())
272       Fragment["preciseIdentifier"] = F.PreciseIdentifier;
273     Fragments.emplace_back(std::move(Fragment));
274   }
275 
276   return Fragments;
277 }
278 
279 /// Serialize the function signature field of a function, as specified by the
280 /// Symbol Graph format.
281 ///
282 /// The Symbol Graph function signature property contains two arrays.
283 ///   - The \c returns array is the declaration fragments of the return type;
284 ///   - The \c parameters array contains names and declaration fragments of the
285 ///     parameters.
286 ///
287 /// \returns \c None if \p FS is empty, or an \c Object containing the
288 /// formatted function signature.
289 Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) {
290   if (FS.empty())
291     return None;
292 
293   Object Signature;
294   serializeArray(Signature, "returns",
295                  serializeDeclarationFragments(FS.getReturnType()));
296 
297   Array Parameters;
298   for (const auto &P : FS.getParameters()) {
299     Object Parameter;
300     Parameter["name"] = P.Name;
301     serializeArray(Parameter, "declarationFragments",
302                    serializeDeclarationFragments(P.Fragments));
303     Parameters.emplace_back(std::move(Parameter));
304   }
305 
306   if (!Parameters.empty())
307     Signature["parameters"] = std::move(Parameters);
308 
309   return Signature;
310 }
311 
312 /// Serialize the \c names field of a symbol as specified by the Symbol Graph
313 /// format.
314 ///
315 /// The Symbol Graph names field contains multiple representations of a symbol
316 /// that can be used for different applications:
317 ///   - \c title : The simple declared name of the symbol;
318 ///   - \c subHeading : An array of declaration fragments that provides tags,
319 ///     and potentially more tokens (for example the \c +/- symbol for
320 ///     Objective-C methods). Can be used as sub-headings for documentation.
321 Object serializeNames(const APIRecord &Record) {
322   Object Names;
323   Names["title"] = Record.Name;
324   serializeArray(Names, "subHeading",
325                  serializeDeclarationFragments(Record.SubHeading));
326 
327   return Names;
328 }
329 
330 /// Serialize the symbol kind information.
331 ///
332 /// The Symbol Graph symbol kind property contains a shorthand \c identifier
333 /// which is prefixed by the source language name, useful for tooling to parse
334 /// the kind, and a \c displayName for rendering human-readable names.
335 Object serializeSymbolKind(const APIRecord &Record, Language Lang) {
336   auto AddLangPrefix = [&Lang](StringRef S) -> std::string {
337     return (getLanguageName(Lang) + "." + S).str();
338   };
339 
340   Object Kind;
341   switch (Record.getKind()) {
342   case APIRecord::RK_Global: {
343     auto *GR = dyn_cast<GlobalRecord>(&Record);
344     switch (GR->GlobalKind) {
345     case GVKind::Function:
346       Kind["identifier"] = AddLangPrefix("func");
347       Kind["displayName"] = "Function";
348       break;
349     case GVKind::Variable:
350       Kind["identifier"] = AddLangPrefix("var");
351       Kind["displayName"] = "Global Variable";
352       break;
353     case GVKind::Unknown:
354       // Unknown global kind
355       break;
356     }
357     break;
358   }
359   case APIRecord::RK_EnumConstant:
360     Kind["identifier"] = AddLangPrefix("enum.case");
361     Kind["displayName"] = "Enumeration Case";
362     break;
363   case APIRecord::RK_Enum:
364     Kind["identifier"] = AddLangPrefix("enum");
365     Kind["displayName"] = "Enumeration";
366     break;
367   case APIRecord::RK_StructField:
368     Kind["identifier"] = AddLangPrefix("property");
369     Kind["displayName"] = "Instance Property";
370     break;
371   case APIRecord::RK_Struct:
372     Kind["identifier"] = AddLangPrefix("struct");
373     Kind["displayName"] = "Structure";
374     break;
375   case APIRecord::RK_ObjCIvar:
376     Kind["identifier"] = AddLangPrefix("ivar");
377     Kind["displayName"] = "Instance Variable";
378     break;
379   case APIRecord::RK_ObjCMethod:
380     if (dyn_cast<ObjCMethodRecord>(&Record)->IsInstanceMethod) {
381       Kind["identifier"] = AddLangPrefix("method");
382       Kind["displayName"] = "Instance Method";
383     } else {
384       Kind["identifier"] = AddLangPrefix("type.method");
385       Kind["displayName"] = "Type Method";
386     }
387     break;
388   case APIRecord::RK_ObjCProperty:
389     Kind["identifier"] = AddLangPrefix("property");
390     Kind["displayName"] = "Instance Property";
391     break;
392   case APIRecord::RK_ObjCInterface:
393     Kind["identifier"] = AddLangPrefix("class");
394     Kind["displayName"] = "Class";
395     break;
396   case APIRecord::RK_ObjCProtocol:
397     Kind["identifier"] = AddLangPrefix("protocol");
398     Kind["displayName"] = "Protocol";
399     break;
400   }
401 
402   return Kind;
403 }
404 
405 } // namespace
406 
407 void SymbolGraphSerializer::anchor() {}
408 
409 /// Defines the format version emitted by SymbolGraphSerializer.
410 const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3};
411 
412 Object SymbolGraphSerializer::serializeMetadata() const {
413   Object Metadata;
414   serializeObject(Metadata, "formatVersion",
415                   serializeSemanticVersion(FormatVersion));
416   Metadata["generator"] = clang::getClangFullVersion();
417   return Metadata;
418 }
419 
420 Object SymbolGraphSerializer::serializeModule() const {
421   Object Module;
422   // The user is expected to always pass `--product-name=` on the command line
423   // to populate this field.
424   Module["name"] = ProductName;
425   serializeObject(Module, "platform", serializePlatform(API.getTarget()));
426   return Module;
427 }
428 
429 bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const {
430   // Skip unconditionally unavailable symbols
431   if (Record.Availability.isUnconditionallyUnavailable())
432     return true;
433 
434   return false;
435 }
436 
437 Optional<Object>
438 SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const {
439   if (shouldSkip(Record))
440     return None;
441 
442   Object Obj;
443   serializeObject(Obj, "identifier",
444                   serializeIdentifier(Record, API.getLanguage()));
445   serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLanguage()));
446   serializeObject(Obj, "names", serializeNames(Record));
447   serializeObject(
448       Obj, "location",
449       serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true));
450   serializeObject(Obj, "availbility",
451                   serializeAvailability(Record.Availability));
452   serializeObject(Obj, "docComment", serializeDocComment(Record.Comment));
453   serializeArray(Obj, "declarationFragments",
454                  serializeDeclarationFragments(Record.Declaration));
455 
456   return Obj;
457 }
458 
459 StringRef SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind) {
460   switch (Kind) {
461   case RelationshipKind::MemberOf:
462     return "memberOf";
463   case RelationshipKind::InheritsFrom:
464     return "inheritsFrom";
465   case RelationshipKind::ConformsTo:
466     return "conformsTo";
467   }
468   llvm_unreachable("Unhandled relationship kind");
469 }
470 
471 void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind,
472                                                   SymbolReference Source,
473                                                   SymbolReference Target) {
474   Object Relationship;
475   Relationship["source"] = Source.USR;
476   Relationship["target"] = Target.USR;
477   Relationship["kind"] = getRelationshipString(Kind);
478 
479   Relationships.emplace_back(std::move(Relationship));
480 }
481 
482 void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) {
483   auto Obj = serializeAPIRecord(Record);
484   if (!Obj)
485     return;
486 
487   if (Record.GlobalKind == GVKind::Function)
488     serializeObject(*Obj, "parameters",
489                     serializeFunctionSignature(Record.Signature));
490 
491   Symbols.emplace_back(std::move(*Obj));
492 }
493 
494 void SymbolGraphSerializer::serializeEnumRecord(const EnumRecord &Record) {
495   auto Enum = serializeAPIRecord(Record);
496   if (!Enum)
497     return;
498 
499   Symbols.emplace_back(std::move(*Enum));
500 
501   for (const auto &Constant : Record.Constants) {
502     auto EnumConstant = serializeAPIRecord(*Constant);
503     if (!EnumConstant)
504       continue;
505 
506     Symbols.emplace_back(std::move(*EnumConstant));
507     serializeRelationship(RelationshipKind::MemberOf, *Constant, Record);
508   }
509 }
510 
511 void SymbolGraphSerializer::serializeStructRecord(const StructRecord &Record) {
512   auto Struct = serializeAPIRecord(Record);
513   if (!Struct)
514     return;
515 
516   Symbols.emplace_back(std::move(*Struct));
517 
518   for (const auto &Field : Record.Fields) {
519     auto StructField = serializeAPIRecord(*Field);
520     if (!StructField)
521       continue;
522 
523     Symbols.emplace_back(std::move(*StructField));
524     serializeRelationship(RelationshipKind::MemberOf, *Field, Record);
525   }
526 }
527 
528 void SymbolGraphSerializer::serializeObjCContainerRecord(
529     const ObjCContainerRecord &Record) {
530   auto ObjCContainer = serializeAPIRecord(Record);
531   if (!ObjCContainer)
532     return;
533 
534   Symbols.emplace_back(std::move(*ObjCContainer));
535 
536   // Record instance variables and that the instance variables are members of
537   // the container.
538   for (const auto &Ivar : Record.Ivars) {
539     auto ObjCIvar = serializeAPIRecord(*Ivar);
540     if (!ObjCIvar)
541       continue;
542 
543     Symbols.emplace_back(std::move(*ObjCIvar));
544     serializeRelationship(RelationshipKind::MemberOf, *Ivar, Record);
545   }
546 
547   // Record methods and that the methods are members of the container.
548   for (const auto &Method : Record.Methods) {
549     auto ObjCMethod = serializeAPIRecord(*Method);
550     if (!ObjCMethod)
551       continue;
552 
553     Symbols.emplace_back(std::move(*ObjCMethod));
554     serializeRelationship(RelationshipKind::MemberOf, *Method, Record);
555   }
556 
557   // Record properties and that the properties are members of the container.
558   for (const auto &Property : Record.Properties) {
559     auto ObjCProperty = serializeAPIRecord(*Property);
560     if (!ObjCProperty)
561       continue;
562 
563     Symbols.emplace_back(std::move(*ObjCProperty));
564     serializeRelationship(RelationshipKind::MemberOf, *Property, Record);
565   }
566 
567   for (const auto &Protocol : Record.Protocols)
568     // Record that Record conforms to Protocol.
569     serializeRelationship(RelationshipKind::ConformsTo, Record, Protocol);
570 
571   if (auto *ObjCInterface = dyn_cast<ObjCInterfaceRecord>(&Record))
572     if (!ObjCInterface->SuperClass.empty())
573       // If Record is an Objective-C interface record and it has a super class,
574       // record that Record is inherited from SuperClass.
575       serializeRelationship(RelationshipKind::InheritsFrom, Record,
576                             ObjCInterface->SuperClass);
577 }
578 
579 Object SymbolGraphSerializer::serialize() {
580   Object Root;
581   serializeObject(Root, "metadata", serializeMetadata());
582   serializeObject(Root, "module", serializeModule());
583 
584   // Serialize global records in the API set.
585   for (const auto &Global : API.getGlobals())
586     serializeGlobalRecord(*Global.second);
587 
588   // Serialize enum records in the API set.
589   for (const auto &Enum : API.getEnums())
590     serializeEnumRecord(*Enum.second);
591 
592   // Serialize struct records in the API set.
593   for (const auto &Struct : API.getStructs())
594     serializeStructRecord(*Struct.second);
595 
596   // Serialize Objective-C interface records in the API set.
597   for (const auto &ObjCInterface : API.getObjCInterfaces())
598     serializeObjCContainerRecord(*ObjCInterface.second);
599 
600   // Serialize Objective-C protocol records in the API set.
601   for (const auto &ObjCProtocol : API.getObjCProtocols())
602     serializeObjCContainerRecord(*ObjCProtocol.second);
603 
604   Root["symbols"] = std::move(Symbols);
605   Root["relationhips"] = std::move(Relationships);
606 
607   return Root;
608 }
609 
610 void SymbolGraphSerializer::serialize(raw_ostream &os) {
611   Object root = serialize();
612   if (Options.Compact)
613     os << formatv("{0}", Value(std::move(root))) << "\n";
614   else
615     os << formatv("{0:2}", Value(std::move(root))) << "\n";
616 }
617