xref: /llvm-project/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp (revision 89f6b26f1beb2c1344f5cfeb34e405128544c76b)
1 //===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file implements the SymbolGraphSerializer.
11 ///
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h"
15 #include "clang/Basic/Version.h"
16 #include "clang/ExtractAPI/API.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/Path.h"
19 #include "llvm/Support/VersionTuple.h"
20 
21 using namespace clang;
22 using namespace clang::extractapi;
23 using namespace llvm;
24 using namespace llvm::json;
25 
26 namespace {
27 
28 /// Helper function to inject a JSON object \p Obj into another object \p Paren
29 /// at position \p Key.
30 void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) {
31   if (Obj)
32     Paren[Key] = std::move(Obj.getValue());
33 }
34 
35 /// Helper function to inject a JSON array \p Array into object \p Paren at
36 /// position \p Key.
37 void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) {
38   if (Array)
39     Paren[Key] = std::move(Array.getValue());
40 }
41 
42 /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version
43 /// format.
44 ///
45 /// A semantic version object contains three numeric fields, representing the
46 /// \c major, \c minor, and \c patch parts of the version tuple.
47 /// For example version tuple 1.0.3 is serialized as:
48 /// \code
49 ///   {
50 ///     "major" : 1,
51 ///     "minor" : 0,
52 ///     "patch" : 3
53 ///   }
54 /// \endcode
55 ///
56 /// \returns \c None if the version \p V is empty, or an \c Object containing
57 /// the semantic version representation of \p V.
58 Optional<Object> serializeSemanticVersion(const VersionTuple &V) {
59   if (V.empty())
60     return None;
61 
62   Object Version;
63   Version["major"] = V.getMajor();
64   Version["minor"] = V.getMinor().getValueOr(0);
65   Version["patch"] = V.getSubminor().getValueOr(0);
66   return Version;
67 }
68 
69 /// Serialize the OS information in the Symbol Graph platform property.
70 ///
71 /// The OS information in Symbol Graph contains the \c name of the OS, and an
72 /// optional \c minimumVersion semantic version field.
73 Object serializeOperatingSystem(const Triple &T) {
74   Object OS;
75   OS["name"] = T.getOSTypeName(T.getOS());
76   serializeObject(OS, "minimumVersion",
77                   serializeSemanticVersion(T.getMinimumSupportedOSVersion()));
78   return OS;
79 }
80 
81 /// Serialize the platform information in the Symbol Graph module section.
82 ///
83 /// The platform object describes a target platform triple in corresponding
84 /// three fields: \c architecture, \c vendor, and \c operatingSystem.
85 Object serializePlatform(const Triple &T) {
86   Object Platform;
87   Platform["architecture"] = T.getArchName();
88   Platform["vendor"] = T.getVendorName();
89   Platform["operatingSystem"] = serializeOperatingSystem(T);
90   return Platform;
91 }
92 
93 /// Serialize a source location in file.
94 ///
95 /// \param Loc The presumed location to serialize.
96 /// \param IncludeFileURI If true, include the file path of \p Loc as a URI.
97 /// Defaults to false.
98 Object serializeSourcePosition(const PresumedLoc &Loc,
99                                bool IncludeFileURI = false) {
100   assert(Loc.isValid() && "invalid source position");
101 
102   Object SourcePosition;
103   SourcePosition["line"] = Loc.getLine();
104   SourcePosition["character"] = Loc.getColumn();
105 
106   if (IncludeFileURI) {
107     std::string FileURI = "file://";
108     // Normalize file path to use forward slashes for the URI.
109     FileURI += sys::path::convert_to_slash(Loc.getFilename());
110     SourcePosition["uri"] = FileURI;
111   }
112 
113   return SourcePosition;
114 }
115 
116 /// Serialize a source range with begin and end locations.
117 Object serializeSourceRange(const PresumedLoc &BeginLoc,
118                             const PresumedLoc &EndLoc) {
119   Object SourceRange;
120   serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc));
121   serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc));
122   return SourceRange;
123 }
124 
125 /// Serialize the availability attributes of a symbol.
126 ///
127 /// Availability information contains the introduced, deprecated, and obsoleted
128 /// versions of the symbol as semantic versions, if not default.
129 /// Availability information also contains flags to indicate if the symbol is
130 /// unconditionally unavailable or deprecated,
131 /// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)).
132 ///
133 /// \returns \c None if the symbol has default availability attributes, or
134 /// an \c Object containing the formatted availability information.
135 Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) {
136   if (Avail.isDefault())
137     return None;
138 
139   Object Availbility;
140   serializeObject(Availbility, "introducedVersion",
141                   serializeSemanticVersion(Avail.Introduced));
142   serializeObject(Availbility, "deprecatedVersion",
143                   serializeSemanticVersion(Avail.Deprecated));
144   serializeObject(Availbility, "obsoletedVersion",
145                   serializeSemanticVersion(Avail.Obsoleted));
146   if (Avail.isUnavailable())
147     Availbility["isUnconditionallyUnavailable"] = true;
148   if (Avail.isUnconditionallyDeprecated())
149     Availbility["isUnconditionallyDeprecated"] = true;
150 
151   return Availbility;
152 }
153 
154 /// Get the short language name string for interface language references.
155 StringRef getLanguageName(const LangOptions &LangOpts) {
156   auto Language =
157       LangStandard::getLangStandardForKind(LangOpts.LangStd).getLanguage();
158   switch (Language) {
159   case Language::C:
160     return "c";
161   case Language::ObjC:
162     return "objc";
163 
164   // Unsupported language currently
165   case Language::CXX:
166   case Language::ObjCXX:
167   case Language::OpenCL:
168   case Language::OpenCLCXX:
169   case Language::CUDA:
170   case Language::RenderScript:
171   case Language::HIP:
172 
173   // Languages that the frontend cannot parse and compile
174   case Language::Unknown:
175   case Language::Asm:
176   case Language::LLVM_IR:
177     llvm_unreachable("Unsupported language kind");
178   }
179 
180   llvm_unreachable("Unhandled language kind");
181 }
182 
183 /// Serialize the identifier object as specified by the Symbol Graph format.
184 ///
185 /// The identifier property of a symbol contains the USR for precise and unique
186 /// references, and the interface language name.
187 Object serializeIdentifier(const APIRecord &Record,
188                            const LangOptions &LangOpts) {
189   Object Identifier;
190   Identifier["precise"] = Record.USR;
191   Identifier["interfaceLanguage"] = getLanguageName(LangOpts);
192 
193   return Identifier;
194 }
195 
196 /// Serialize the documentation comments attached to a symbol, as specified by
197 /// the Symbol Graph format.
198 ///
199 /// The Symbol Graph \c docComment object contains an array of lines. Each line
200 /// represents one line of striped documentation comment, with source range
201 /// information.
202 /// e.g.
203 /// \code
204 ///   /// This is a documentation comment
205 ///       ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~'  First line.
206 ///   ///     with multiple lines.
207 ///       ^~~~~~~~~~~~~~~~~~~~~~~'         Second line.
208 /// \endcode
209 ///
210 /// \returns \c None if \p Comment is empty, or an \c Object containing the
211 /// formatted lines.
212 Optional<Object> serializeDocComment(const DocComment &Comment) {
213   if (Comment.empty())
214     return None;
215 
216   Object DocComment;
217   Array LinesArray;
218   for (const auto &CommentLine : Comment) {
219     Object Line;
220     Line["text"] = CommentLine.Text;
221     serializeObject(Line, "range",
222                     serializeSourceRange(CommentLine.Begin, CommentLine.End));
223     LinesArray.emplace_back(std::move(Line));
224   }
225   serializeArray(DocComment, "lines", LinesArray);
226 
227   return DocComment;
228 }
229 
230 /// Serialize the declaration fragments of a symbol.
231 ///
232 /// The Symbol Graph declaration fragments is an array of tagged important
233 /// parts of a symbol's declaration. The fragments sequence can be joined to
234 /// form spans of declaration text, with attached information useful for
235 /// purposes like syntax-highlighting etc. For example:
236 /// \code
237 ///   const int pi; -> "declarationFragments" : [
238 ///                      {
239 ///                        "kind" : "keyword",
240 ///                        "spelling" : "const"
241 ///                      },
242 ///                      {
243 ///                        "kind" : "text",
244 ///                        "spelling" : " "
245 ///                      },
246 ///                      {
247 ///                        "kind" : "typeIdentifier",
248 ///                        "preciseIdentifier" : "c:I",
249 ///                        "spelling" : "int"
250 ///                      },
251 ///                      {
252 ///                        "kind" : "text",
253 ///                        "spelling" : " "
254 ///                      },
255 ///                      {
256 ///                        "kind" : "identifier",
257 ///                        "spelling" : "pi"
258 ///                      }
259 ///                    ]
260 /// \endcode
261 ///
262 /// \returns \c None if \p DF is empty, or an \c Array containing the formatted
263 /// declaration fragments array.
264 Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) {
265   if (DF.getFragments().empty())
266     return None;
267 
268   Array Fragments;
269   for (const auto &F : DF.getFragments()) {
270     Object Fragment;
271     Fragment["spelling"] = F.Spelling;
272     Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind);
273     if (!F.PreciseIdentifier.empty())
274       Fragment["preciseIdentifier"] = F.PreciseIdentifier;
275     Fragments.emplace_back(std::move(Fragment));
276   }
277 
278   return Fragments;
279 }
280 
281 /// Serialize the function signature field of a function, as specified by the
282 /// Symbol Graph format.
283 ///
284 /// The Symbol Graph function signature property contains two arrays.
285 ///   - The \c returns array is the declaration fragments of the return type;
286 ///   - The \c parameters array contains names and declaration fragments of the
287 ///     parameters.
288 ///
289 /// \returns \c None if \p FS is empty, or an \c Object containing the
290 /// formatted function signature.
291 Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) {
292   if (FS.empty())
293     return None;
294 
295   Object Signature;
296   serializeArray(Signature, "returns",
297                  serializeDeclarationFragments(FS.getReturnType()));
298 
299   Array Parameters;
300   for (const auto &P : FS.getParameters()) {
301     Object Parameter;
302     Parameter["name"] = P.Name;
303     serializeArray(Parameter, "declarationFragments",
304                    serializeDeclarationFragments(P.Fragments));
305     Parameters.emplace_back(std::move(Parameter));
306   }
307 
308   if (!Parameters.empty())
309     Signature["parameters"] = std::move(Parameters);
310 
311   return Signature;
312 }
313 
314 /// Serialize the \c names field of a symbol as specified by the Symbol Graph
315 /// format.
316 ///
317 /// The Symbol Graph names field contains multiple representations of a symbol
318 /// that can be used for different applications:
319 ///   - \c title : The simple declared name of the symbol;
320 ///   - \c subHeading : An array of declaration fragments that provides tags,
321 ///     and potentially more tokens (for example the \c +/- symbol for
322 ///     Objective-C methods). Can be used as sub-headings for documentation.
323 Object serializeNames(const APIRecord &Record) {
324   Object Names;
325   Names["title"] = Record.Name;
326   serializeArray(Names, "subHeading",
327                  serializeDeclarationFragments(Record.SubHeading));
328 
329   return Names;
330 }
331 
332 /// Serialize the symbol kind information.
333 ///
334 /// The Symbol Graph symbol kind property contains a shorthand \c identifier
335 /// which is prefixed by the source language name, useful for tooling to parse
336 /// the kind, and a \c displayName for rendering human-readable names.
337 Object serializeSymbolKind(const APIRecord &Record,
338                            const LangOptions &LangOpts) {
339   Object Kind;
340   switch (Record.getKind()) {
341   case APIRecord::RK_Global:
342     auto *GR = dyn_cast<GlobalRecord>(&Record);
343     switch (GR->GlobalKind) {
344     case GVKind::Function:
345       Kind["identifier"] = (getLanguageName(LangOpts) + ".func").str();
346       Kind["displayName"] = "Function";
347       break;
348     case GVKind::Variable:
349       Kind["identifier"] = (getLanguageName(LangOpts) + ".var").str();
350       Kind["displayName"] = "Global Variable";
351       break;
352     case GVKind::Unknown:
353       // Unknown global kind
354       break;
355     }
356     break;
357   }
358 
359   return Kind;
360 }
361 
362 } // namespace
363 
364 void SymbolGraphSerializer::anchor() {}
365 
366 /// Defines the format version emitted by SymbolGraphSerializer.
367 const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3};
368 
369 Object SymbolGraphSerializer::serializeMetadata() const {
370   Object Metadata;
371   serializeObject(Metadata, "formatVersion",
372                   serializeSemanticVersion(FormatVersion));
373   Metadata["generator"] = clang::getClangFullVersion();
374   return Metadata;
375 }
376 
377 Object SymbolGraphSerializer::serializeModule() const {
378   Object Module;
379   // FIXME: We might not be building a module, some Clang-based languages might
380   // not have a "module" concept. Figure out a way to provide a name to
381   // describe the API set.
382   Module["name"] = "";
383   serializeObject(Module, "platform", serializePlatform(API.getTarget()));
384   return Module;
385 }
386 
387 bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const {
388   // Skip unconditionally unavailable symbols
389   if (Record.Availability.isUnconditionallyUnavailable())
390     return true;
391 
392   return false;
393 }
394 
395 Optional<Object>
396 SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const {
397   if (shouldSkip(Record))
398     return None;
399 
400   Object Obj;
401   serializeObject(Obj, "identifier",
402                   serializeIdentifier(Record, API.getLangOpts()));
403   serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLangOpts()));
404   serializeObject(Obj, "names", serializeNames(Record));
405   serializeObject(
406       Obj, "location",
407       serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true));
408   serializeObject(Obj, "availbility",
409                   serializeAvailability(Record.Availability));
410   serializeObject(Obj, "docComment", serializeDocComment(Record.Comment));
411   serializeArray(Obj, "declarationFragments",
412                  serializeDeclarationFragments(Record.Declaration));
413 
414   return Obj;
415 }
416 
417 void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) {
418   auto Obj = serializeAPIRecord(Record);
419   if (!Obj)
420     return;
421 
422   if (Record.GlobalKind == GVKind::Function)
423     serializeObject(*Obj, "parameters",
424                     serializeFunctionSignature(Record.Signature));
425 
426   Symbols.emplace_back(std::move(*Obj));
427 }
428 
429 Object SymbolGraphSerializer::serialize() {
430   Object Root;
431   serializeObject(Root, "metadata", serializeMetadata());
432   serializeObject(Root, "module", serializeModule());
433 
434   // Serialize global records in the API set.
435   for (const auto &Global : API.getGlobals())
436     serializeGlobalRecord(*Global.second);
437 
438   Root["symbols"] = std::move(Symbols);
439   Root["relationhips"] = std::move(Relationships);
440 
441   return Root;
442 }
443 
444 void SymbolGraphSerializer::serialize(raw_ostream &os) {
445   Object root = serialize();
446   if (Options.Compact)
447     os << formatv("{0}", Value(std::move(root))) << "\n";
448   else
449     os << formatv("{0:2}", Value(std::move(root))) << "\n";
450 }
451