1 //===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the SymbolGraphSerializer. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 15 #include "clang/Basic/Version.h" 16 #include "clang/ExtractAPI/API.h" 17 #include "llvm/Support/JSON.h" 18 #include "llvm/Support/Path.h" 19 #include "llvm/Support/VersionTuple.h" 20 21 using namespace clang; 22 using namespace clang::extractapi; 23 using namespace llvm; 24 using namespace llvm::json; 25 26 namespace { 27 28 /// Helper function to inject a JSON object \p Obj into another object \p Paren 29 /// at position \p Key. 30 void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) { 31 if (Obj) 32 Paren[Key] = std::move(Obj.getValue()); 33 } 34 35 /// Helper function to inject a JSON array \p Array into object \p Paren at 36 /// position \p Key. 37 void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) { 38 if (Array) 39 Paren[Key] = std::move(Array.getValue()); 40 } 41 42 /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version 43 /// format. 44 /// 45 /// A semantic version object contains three numeric fields, representing the 46 /// \c major, \c minor, and \c patch parts of the version tuple. 47 /// For example version tuple 1.0.3 is serialized as: 48 /// \code 49 /// { 50 /// "major" : 1, 51 /// "minor" : 0, 52 /// "patch" : 3 53 /// } 54 /// \endcode 55 /// 56 /// \returns \c None if the version \p V is empty, or an \c Object containing 57 /// the semantic version representation of \p V. 58 Optional<Object> serializeSemanticVersion(const VersionTuple &V) { 59 if (V.empty()) 60 return None; 61 62 Object Version; 63 Version["major"] = V.getMajor(); 64 Version["minor"] = V.getMinor().getValueOr(0); 65 Version["patch"] = V.getSubminor().getValueOr(0); 66 return Version; 67 } 68 69 /// Serialize the OS information in the Symbol Graph platform property. 70 /// 71 /// The OS information in Symbol Graph contains the \c name of the OS, and an 72 /// optional \c minimumVersion semantic version field. 73 Object serializeOperatingSystem(const Triple &T) { 74 Object OS; 75 OS["name"] = T.getOSTypeName(T.getOS()); 76 serializeObject(OS, "minimumVersion", 77 serializeSemanticVersion(T.getMinimumSupportedOSVersion())); 78 return OS; 79 } 80 81 /// Serialize the platform information in the Symbol Graph module section. 82 /// 83 /// The platform object describes a target platform triple in corresponding 84 /// three fields: \c architecture, \c vendor, and \c operatingSystem. 85 Object serializePlatform(const Triple &T) { 86 Object Platform; 87 Platform["architecture"] = T.getArchName(); 88 Platform["vendor"] = T.getVendorName(); 89 Platform["operatingSystem"] = serializeOperatingSystem(T); 90 return Platform; 91 } 92 93 /// Serialize a source location in file. 94 /// 95 /// \param Loc The presumed location to serialize. 96 /// \param IncludeFileURI If true, include the file path of \p Loc as a URI. 97 /// Defaults to false. 98 Object serializeSourcePosition(const PresumedLoc &Loc, 99 bool IncludeFileURI = false) { 100 assert(Loc.isValid() && "invalid source position"); 101 102 Object SourcePosition; 103 SourcePosition["line"] = Loc.getLine(); 104 SourcePosition["character"] = Loc.getColumn(); 105 106 if (IncludeFileURI) { 107 std::string FileURI = "file://"; 108 // Normalize file path to use forward slashes for the URI. 109 FileURI += sys::path::convert_to_slash(Loc.getFilename()); 110 SourcePosition["uri"] = FileURI; 111 } 112 113 return SourcePosition; 114 } 115 116 /// Serialize a source range with begin and end locations. 117 Object serializeSourceRange(const PresumedLoc &BeginLoc, 118 const PresumedLoc &EndLoc) { 119 Object SourceRange; 120 serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc)); 121 serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc)); 122 return SourceRange; 123 } 124 125 /// Serialize the availability attributes of a symbol. 126 /// 127 /// Availability information contains the introduced, deprecated, and obsoleted 128 /// versions of the symbol as semantic versions, if not default. 129 /// Availability information also contains flags to indicate if the symbol is 130 /// unconditionally unavailable or deprecated, 131 /// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)). 132 /// 133 /// \returns \c None if the symbol has default availability attributes, or 134 /// an \c Object containing the formatted availability information. 135 Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) { 136 if (Avail.isDefault()) 137 return None; 138 139 Object Availbility; 140 serializeObject(Availbility, "introducedVersion", 141 serializeSemanticVersion(Avail.Introduced)); 142 serializeObject(Availbility, "deprecatedVersion", 143 serializeSemanticVersion(Avail.Deprecated)); 144 serializeObject(Availbility, "obsoletedVersion", 145 serializeSemanticVersion(Avail.Obsoleted)); 146 if (Avail.isUnavailable()) 147 Availbility["isUnconditionallyUnavailable"] = true; 148 if (Avail.isUnconditionallyDeprecated()) 149 Availbility["isUnconditionallyDeprecated"] = true; 150 151 return Availbility; 152 } 153 154 /// Get the short language name string for interface language references. 155 StringRef getLanguageName(const LangOptions &LangOpts) { 156 auto Language = 157 LangStandard::getLangStandardForKind(LangOpts.LangStd).getLanguage(); 158 switch (Language) { 159 case Language::C: 160 return "c"; 161 case Language::ObjC: 162 return "objective-c"; 163 164 // Unsupported language currently 165 case Language::CXX: 166 case Language::ObjCXX: 167 case Language::OpenCL: 168 case Language::OpenCLCXX: 169 case Language::CUDA: 170 case Language::RenderScript: 171 case Language::HIP: 172 173 // Languages that the frontend cannot parse and compile 174 case Language::Unknown: 175 case Language::Asm: 176 case Language::LLVM_IR: 177 llvm_unreachable("Unsupported language kind"); 178 } 179 180 llvm_unreachable("Unhandled language kind"); 181 } 182 183 /// Serialize the identifier object as specified by the Symbol Graph format. 184 /// 185 /// The identifier property of a symbol contains the USR for precise and unique 186 /// references, and the interface language name. 187 Object serializeIdentifier(const APIRecord &Record, 188 const LangOptions &LangOpts) { 189 Object Identifier; 190 Identifier["precise"] = Record.USR; 191 Identifier["interfaceLanguage"] = getLanguageName(LangOpts); 192 193 return Identifier; 194 } 195 196 /// Serialize the documentation comments attached to a symbol, as specified by 197 /// the Symbol Graph format. 198 /// 199 /// The Symbol Graph \c docComment object contains an array of lines. Each line 200 /// represents one line of striped documentation comment, with source range 201 /// information. 202 /// e.g. 203 /// \code 204 /// /// This is a documentation comment 205 /// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' First line. 206 /// /// with multiple lines. 207 /// ^~~~~~~~~~~~~~~~~~~~~~~' Second line. 208 /// \endcode 209 /// 210 /// \returns \c None if \p Comment is empty, or an \c Object containing the 211 /// formatted lines. 212 Optional<Object> serializeDocComment(const DocComment &Comment) { 213 if (Comment.empty()) 214 return None; 215 216 Object DocComment; 217 Array LinesArray; 218 for (const auto &CommentLine : Comment) { 219 Object Line; 220 Line["text"] = CommentLine.Text; 221 serializeObject(Line, "range", 222 serializeSourceRange(CommentLine.Begin, CommentLine.End)); 223 LinesArray.emplace_back(std::move(Line)); 224 } 225 serializeArray(DocComment, "lines", LinesArray); 226 227 return DocComment; 228 } 229 230 /// Serialize the declaration fragments of a symbol. 231 /// 232 /// The Symbol Graph declaration fragments is an array of tagged important 233 /// parts of a symbol's declaration. The fragments sequence can be joined to 234 /// form spans of declaration text, with attached information useful for 235 /// purposes like syntax-highlighting etc. For example: 236 /// \code 237 /// const int pi; -> "declarationFragments" : [ 238 /// { 239 /// "kind" : "keyword", 240 /// "spelling" : "const" 241 /// }, 242 /// { 243 /// "kind" : "text", 244 /// "spelling" : " " 245 /// }, 246 /// { 247 /// "kind" : "typeIdentifier", 248 /// "preciseIdentifier" : "c:I", 249 /// "spelling" : "int" 250 /// }, 251 /// { 252 /// "kind" : "text", 253 /// "spelling" : " " 254 /// }, 255 /// { 256 /// "kind" : "identifier", 257 /// "spelling" : "pi" 258 /// } 259 /// ] 260 /// \endcode 261 /// 262 /// \returns \c None if \p DF is empty, or an \c Array containing the formatted 263 /// declaration fragments array. 264 Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) { 265 if (DF.getFragments().empty()) 266 return None; 267 268 Array Fragments; 269 for (const auto &F : DF.getFragments()) { 270 Object Fragment; 271 Fragment["spelling"] = F.Spelling; 272 Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind); 273 if (!F.PreciseIdentifier.empty()) 274 Fragment["preciseIdentifier"] = F.PreciseIdentifier; 275 Fragments.emplace_back(std::move(Fragment)); 276 } 277 278 return Fragments; 279 } 280 281 /// Serialize the function signature field of a function, as specified by the 282 /// Symbol Graph format. 283 /// 284 /// The Symbol Graph function signature property contains two arrays. 285 /// - The \c returns array is the declaration fragments of the return type; 286 /// - The \c parameters array contains names and declaration fragments of the 287 /// parameters. 288 /// 289 /// \returns \c None if \p FS is empty, or an \c Object containing the 290 /// formatted function signature. 291 Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) { 292 if (FS.empty()) 293 return None; 294 295 Object Signature; 296 serializeArray(Signature, "returns", 297 serializeDeclarationFragments(FS.getReturnType())); 298 299 Array Parameters; 300 for (const auto &P : FS.getParameters()) { 301 Object Parameter; 302 Parameter["name"] = P.Name; 303 serializeArray(Parameter, "declarationFragments", 304 serializeDeclarationFragments(P.Fragments)); 305 Parameters.emplace_back(std::move(Parameter)); 306 } 307 308 if (!Parameters.empty()) 309 Signature["parameters"] = std::move(Parameters); 310 311 return Signature; 312 } 313 314 /// Serialize the \c names field of a symbol as specified by the Symbol Graph 315 /// format. 316 /// 317 /// The Symbol Graph names field contains multiple representations of a symbol 318 /// that can be used for different applications: 319 /// - \c title : The simple declared name of the symbol; 320 /// - \c subHeading : An array of declaration fragments that provides tags, 321 /// and potentially more tokens (for example the \c +/- symbol for 322 /// Objective-C methods). Can be used as sub-headings for documentation. 323 Object serializeNames(const APIRecord &Record) { 324 Object Names; 325 Names["title"] = Record.Name; 326 serializeArray(Names, "subHeading", 327 serializeDeclarationFragments(Record.SubHeading)); 328 329 return Names; 330 } 331 332 /// Serialize the symbol kind information. 333 /// 334 /// The Symbol Graph symbol kind property contains a shorthand \c identifier 335 /// which is prefixed by the source language name, useful for tooling to parse 336 /// the kind, and a \c displayName for rendering human-readable names. 337 Object serializeSymbolKind(const APIRecord &Record, 338 const LangOptions &LangOpts) { 339 auto AddLangPrefix = [&LangOpts](StringRef S) -> std::string { 340 return (getLanguageName(LangOpts) + "." + S).str(); 341 }; 342 343 Object Kind; 344 switch (Record.getKind()) { 345 case APIRecord::RK_Global: { 346 auto *GR = dyn_cast<GlobalRecord>(&Record); 347 switch (GR->GlobalKind) { 348 case GVKind::Function: 349 Kind["identifier"] = AddLangPrefix("func"); 350 Kind["displayName"] = "Function"; 351 break; 352 case GVKind::Variable: 353 Kind["identifier"] = AddLangPrefix("var"); 354 Kind["displayName"] = "Global Variable"; 355 break; 356 case GVKind::Unknown: 357 // Unknown global kind 358 break; 359 } 360 break; 361 } 362 case APIRecord::RK_EnumConstant: 363 Kind["identifier"] = AddLangPrefix("enum.case"); 364 Kind["displayName"] = "Enumeration Case"; 365 break; 366 case APIRecord::RK_Enum: 367 Kind["identifier"] = AddLangPrefix("enum"); 368 Kind["displayName"] = "Enumeration"; 369 break; 370 case APIRecord::RK_StructField: 371 Kind["identifier"] = AddLangPrefix("property"); 372 Kind["displayName"] = "Instance Property"; 373 break; 374 case APIRecord::RK_Struct: 375 Kind["identifier"] = AddLangPrefix("struct"); 376 Kind["displayName"] = "Structure"; 377 break; 378 } 379 380 return Kind; 381 } 382 383 } // namespace 384 385 void SymbolGraphSerializer::anchor() {} 386 387 /// Defines the format version emitted by SymbolGraphSerializer. 388 const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3}; 389 390 Object SymbolGraphSerializer::serializeMetadata() const { 391 Object Metadata; 392 serializeObject(Metadata, "formatVersion", 393 serializeSemanticVersion(FormatVersion)); 394 Metadata["generator"] = clang::getClangFullVersion(); 395 return Metadata; 396 } 397 398 Object SymbolGraphSerializer::serializeModule() const { 399 Object Module; 400 // The user is expected to always pass `--product-name=` on the command line 401 // to populate this field. 402 Module["name"] = ProductName; 403 serializeObject(Module, "platform", serializePlatform(API.getTarget())); 404 return Module; 405 } 406 407 bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const { 408 // Skip unconditionally unavailable symbols 409 if (Record.Availability.isUnconditionallyUnavailable()) 410 return true; 411 412 return false; 413 } 414 415 Optional<Object> 416 SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const { 417 if (shouldSkip(Record)) 418 return None; 419 420 Object Obj; 421 serializeObject(Obj, "identifier", 422 serializeIdentifier(Record, API.getLangOpts())); 423 serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLangOpts())); 424 serializeObject(Obj, "names", serializeNames(Record)); 425 serializeObject( 426 Obj, "location", 427 serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true)); 428 serializeObject(Obj, "availbility", 429 serializeAvailability(Record.Availability)); 430 serializeObject(Obj, "docComment", serializeDocComment(Record.Comment)); 431 serializeArray(Obj, "declarationFragments", 432 serializeDeclarationFragments(Record.Declaration)); 433 434 return Obj; 435 } 436 437 StringRef SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind) { 438 switch (Kind) { 439 case RelationshipKind::MemberOf: 440 return "memberOf"; 441 } 442 llvm_unreachable("Unhandled relationship kind"); 443 } 444 445 void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, 446 const APIRecord &Source, 447 const APIRecord &Target) { 448 Object Relationship; 449 Relationship["source"] = Source.USR; 450 Relationship["target"] = Target.USR; 451 Relationship["kind"] = getRelationshipString(Kind); 452 453 Relationships.emplace_back(std::move(Relationship)); 454 } 455 456 void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) { 457 auto Obj = serializeAPIRecord(Record); 458 if (!Obj) 459 return; 460 461 if (Record.GlobalKind == GVKind::Function) 462 serializeObject(*Obj, "parameters", 463 serializeFunctionSignature(Record.Signature)); 464 465 Symbols.emplace_back(std::move(*Obj)); 466 } 467 468 void SymbolGraphSerializer::serializeEnumRecord(const EnumRecord &Record) { 469 auto Enum = serializeAPIRecord(Record); 470 if (!Enum) 471 return; 472 473 Symbols.emplace_back(std::move(*Enum)); 474 475 for (const auto &Constant : Record.Constants) { 476 auto EnumConstant = serializeAPIRecord(*Constant); 477 if (!EnumConstant) 478 continue; 479 480 Symbols.emplace_back(std::move(*EnumConstant)); 481 serializeRelationship(RelationshipKind::MemberOf, *Constant, Record); 482 } 483 } 484 485 void SymbolGraphSerializer::serializeStructRecord(const StructRecord &Record) { 486 auto Struct = serializeAPIRecord(Record); 487 if (!Struct) 488 return; 489 490 Symbols.emplace_back(std::move(*Struct)); 491 492 for (const auto &Field : Record.Fields) { 493 auto StructField = serializeAPIRecord(*Field); 494 if (!StructField) 495 continue; 496 497 Symbols.emplace_back(std::move(*StructField)); 498 serializeRelationship(RelationshipKind::MemberOf, *Field, Record); 499 } 500 } 501 502 Object SymbolGraphSerializer::serialize() { 503 Object Root; 504 serializeObject(Root, "metadata", serializeMetadata()); 505 serializeObject(Root, "module", serializeModule()); 506 507 // Serialize global records in the API set. 508 for (const auto &Global : API.getGlobals()) 509 serializeGlobalRecord(*Global.second); 510 511 // Serialize enum records in the API set. 512 for (const auto &Enum : API.getEnums()) 513 serializeEnumRecord(*Enum.second); 514 515 // Serialize struct records in the API set. 516 for (const auto &Struct : API.getStructs()) 517 serializeStructRecord(*Struct.second); 518 519 Root["symbols"] = std::move(Symbols); 520 Root["relationhips"] = std::move(Relationships); 521 522 return Root; 523 } 524 525 void SymbolGraphSerializer::serialize(raw_ostream &os) { 526 Object root = serialize(); 527 if (Options.Compact) 528 os << formatv("{0}", Value(std::move(root))) << "\n"; 529 else 530 os << formatv("{0:2}", Value(std::move(root))) << "\n"; 531 } 532