1 //===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the SymbolGraphSerializer. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 15 #include "clang/Basic/Version.h" 16 #include "clang/ExtractAPI/API.h" 17 #include "llvm/Support/JSON.h" 18 #include "llvm/Support/Path.h" 19 #include "llvm/Support/VersionTuple.h" 20 21 using namespace clang; 22 using namespace clang::extractapi; 23 using namespace llvm; 24 using namespace llvm::json; 25 26 namespace { 27 28 /// Helper function to inject a JSON object \p Obj into another object \p Paren 29 /// at position \p Key. 30 void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) { 31 if (Obj) 32 Paren[Key] = std::move(Obj.getValue()); 33 } 34 35 /// Helper function to inject a JSON array \p Array into object \p Paren at 36 /// position \p Key. 37 void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) { 38 if (Array) 39 Paren[Key] = std::move(Array.getValue()); 40 } 41 42 /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version 43 /// format. 44 /// 45 /// A semantic version object contains three numeric fields, representing the 46 /// \c major, \c minor, and \c patch parts of the version tuple. 47 /// For example version tuple 1.0.3 is serialized as: 48 /// \code 49 /// { 50 /// "major" : 1, 51 /// "minor" : 0, 52 /// "patch" : 3 53 /// } 54 /// \endcode 55 /// 56 /// \returns \c None if the version \p V is empty, or an \c Object containing 57 /// the semantic version representation of \p V. 58 Optional<Object> serializeSemanticVersion(const VersionTuple &V) { 59 if (V.empty()) 60 return None; 61 62 Object Version; 63 Version["major"] = V.getMajor(); 64 Version["minor"] = V.getMinor().getValueOr(0); 65 Version["patch"] = V.getSubminor().getValueOr(0); 66 return Version; 67 } 68 69 /// Serialize the OS information in the Symbol Graph platform property. 70 /// 71 /// The OS information in Symbol Graph contains the \c name of the OS, and an 72 /// optional \c minimumVersion semantic version field. 73 Object serializeOperatingSystem(const Triple &T) { 74 Object OS; 75 OS["name"] = T.getOSTypeName(T.getOS()); 76 serializeObject(OS, "minimumVersion", 77 serializeSemanticVersion(T.getMinimumSupportedOSVersion())); 78 return OS; 79 } 80 81 /// Serialize the platform information in the Symbol Graph module section. 82 /// 83 /// The platform object describes a target platform triple in corresponding 84 /// three fields: \c architecture, \c vendor, and \c operatingSystem. 85 Object serializePlatform(const Triple &T) { 86 Object Platform; 87 Platform["architecture"] = T.getArchName(); 88 Platform["vendor"] = T.getVendorName(); 89 Platform["operatingSystem"] = serializeOperatingSystem(T); 90 return Platform; 91 } 92 93 /// Serialize a source location in file. 94 /// 95 /// \param Loc The presumed location to serialize. 96 /// \param IncludeFileURI If true, include the file path of \p Loc as a URI. 97 /// Defaults to false. 98 Object serializeSourcePosition(const PresumedLoc &Loc, 99 bool IncludeFileURI = false) { 100 assert(Loc.isValid() && "invalid source position"); 101 102 Object SourcePosition; 103 SourcePosition["line"] = Loc.getLine(); 104 SourcePosition["character"] = Loc.getColumn(); 105 106 if (IncludeFileURI) { 107 std::string FileURI = "file://"; 108 // Normalize file path to use forward slashes for the URI. 109 FileURI += sys::path::convert_to_slash(Loc.getFilename()); 110 SourcePosition["uri"] = FileURI; 111 } 112 113 return SourcePosition; 114 } 115 116 /// Serialize a source range with begin and end locations. 117 Object serializeSourceRange(const PresumedLoc &BeginLoc, 118 const PresumedLoc &EndLoc) { 119 Object SourceRange; 120 serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc)); 121 serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc)); 122 return SourceRange; 123 } 124 125 /// Serialize the availability attributes of a symbol. 126 /// 127 /// Availability information contains the introduced, deprecated, and obsoleted 128 /// versions of the symbol as semantic versions, if not default. 129 /// Availability information also contains flags to indicate if the symbol is 130 /// unconditionally unavailable or deprecated, 131 /// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)). 132 /// 133 /// \returns \c None if the symbol has default availability attributes, or 134 /// an \c Object containing the formatted availability information. 135 Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) { 136 if (Avail.isDefault()) 137 return None; 138 139 Object Availbility; 140 serializeObject(Availbility, "introducedVersion", 141 serializeSemanticVersion(Avail.Introduced)); 142 serializeObject(Availbility, "deprecatedVersion", 143 serializeSemanticVersion(Avail.Deprecated)); 144 serializeObject(Availbility, "obsoletedVersion", 145 serializeSemanticVersion(Avail.Obsoleted)); 146 if (Avail.isUnavailable()) 147 Availbility["isUnconditionallyUnavailable"] = true; 148 if (Avail.isUnconditionallyDeprecated()) 149 Availbility["isUnconditionallyDeprecated"] = true; 150 151 return Availbility; 152 } 153 154 /// Get the language name string for interface language references. 155 StringRef getLanguageName(Language Lang) { 156 switch (Lang) { 157 case Language::C: 158 return "c"; 159 case Language::ObjC: 160 return "objective-c"; 161 162 // Unsupported language currently 163 case Language::CXX: 164 case Language::ObjCXX: 165 case Language::OpenCL: 166 case Language::OpenCLCXX: 167 case Language::CUDA: 168 case Language::RenderScript: 169 case Language::HIP: 170 case Language::HLSL: 171 172 // Languages that the frontend cannot parse and compile 173 case Language::Unknown: 174 case Language::Asm: 175 case Language::LLVM_IR: 176 llvm_unreachable("Unsupported language kind"); 177 } 178 179 llvm_unreachable("Unhandled language kind"); 180 } 181 182 /// Serialize the identifier object as specified by the Symbol Graph format. 183 /// 184 /// The identifier property of a symbol contains the USR for precise and unique 185 /// references, and the interface language name. 186 Object serializeIdentifier(const APIRecord &Record, Language Lang) { 187 Object Identifier; 188 Identifier["precise"] = Record.USR; 189 Identifier["interfaceLanguage"] = getLanguageName(Lang); 190 191 return Identifier; 192 } 193 194 /// Serialize the documentation comments attached to a symbol, as specified by 195 /// the Symbol Graph format. 196 /// 197 /// The Symbol Graph \c docComment object contains an array of lines. Each line 198 /// represents one line of striped documentation comment, with source range 199 /// information. 200 /// e.g. 201 /// \code 202 /// /// This is a documentation comment 203 /// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' First line. 204 /// /// with multiple lines. 205 /// ^~~~~~~~~~~~~~~~~~~~~~~' Second line. 206 /// \endcode 207 /// 208 /// \returns \c None if \p Comment is empty, or an \c Object containing the 209 /// formatted lines. 210 Optional<Object> serializeDocComment(const DocComment &Comment) { 211 if (Comment.empty()) 212 return None; 213 214 Object DocComment; 215 Array LinesArray; 216 for (const auto &CommentLine : Comment) { 217 Object Line; 218 Line["text"] = CommentLine.Text; 219 serializeObject(Line, "range", 220 serializeSourceRange(CommentLine.Begin, CommentLine.End)); 221 LinesArray.emplace_back(std::move(Line)); 222 } 223 serializeArray(DocComment, "lines", LinesArray); 224 225 return DocComment; 226 } 227 228 /// Serialize the declaration fragments of a symbol. 229 /// 230 /// The Symbol Graph declaration fragments is an array of tagged important 231 /// parts of a symbol's declaration. The fragments sequence can be joined to 232 /// form spans of declaration text, with attached information useful for 233 /// purposes like syntax-highlighting etc. For example: 234 /// \code 235 /// const int pi; -> "declarationFragments" : [ 236 /// { 237 /// "kind" : "keyword", 238 /// "spelling" : "const" 239 /// }, 240 /// { 241 /// "kind" : "text", 242 /// "spelling" : " " 243 /// }, 244 /// { 245 /// "kind" : "typeIdentifier", 246 /// "preciseIdentifier" : "c:I", 247 /// "spelling" : "int" 248 /// }, 249 /// { 250 /// "kind" : "text", 251 /// "spelling" : " " 252 /// }, 253 /// { 254 /// "kind" : "identifier", 255 /// "spelling" : "pi" 256 /// } 257 /// ] 258 /// \endcode 259 /// 260 /// \returns \c None if \p DF is empty, or an \c Array containing the formatted 261 /// declaration fragments array. 262 Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) { 263 if (DF.getFragments().empty()) 264 return None; 265 266 Array Fragments; 267 for (const auto &F : DF.getFragments()) { 268 Object Fragment; 269 Fragment["spelling"] = F.Spelling; 270 Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind); 271 if (!F.PreciseIdentifier.empty()) 272 Fragment["preciseIdentifier"] = F.PreciseIdentifier; 273 Fragments.emplace_back(std::move(Fragment)); 274 } 275 276 return Fragments; 277 } 278 279 /// Serialize the function signature field of a function, as specified by the 280 /// Symbol Graph format. 281 /// 282 /// The Symbol Graph function signature property contains two arrays. 283 /// - The \c returns array is the declaration fragments of the return type; 284 /// - The \c parameters array contains names and declaration fragments of the 285 /// parameters. 286 /// 287 /// \returns \c None if \p FS is empty, or an \c Object containing the 288 /// formatted function signature. 289 Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) { 290 if (FS.empty()) 291 return None; 292 293 Object Signature; 294 serializeArray(Signature, "returns", 295 serializeDeclarationFragments(FS.getReturnType())); 296 297 Array Parameters; 298 for (const auto &P : FS.getParameters()) { 299 Object Parameter; 300 Parameter["name"] = P.Name; 301 serializeArray(Parameter, "declarationFragments", 302 serializeDeclarationFragments(P.Fragments)); 303 Parameters.emplace_back(std::move(Parameter)); 304 } 305 306 if (!Parameters.empty()) 307 Signature["parameters"] = std::move(Parameters); 308 309 return Signature; 310 } 311 312 /// Serialize the \c names field of a symbol as specified by the Symbol Graph 313 /// format. 314 /// 315 /// The Symbol Graph names field contains multiple representations of a symbol 316 /// that can be used for different applications: 317 /// - \c title : The simple declared name of the symbol; 318 /// - \c subHeading : An array of declaration fragments that provides tags, 319 /// and potentially more tokens (for example the \c +/- symbol for 320 /// Objective-C methods). Can be used as sub-headings for documentation. 321 Object serializeNames(const APIRecord &Record) { 322 Object Names; 323 Names["title"] = Record.Name; 324 serializeArray(Names, "subHeading", 325 serializeDeclarationFragments(Record.SubHeading)); 326 327 return Names; 328 } 329 330 /// Serialize the symbol kind information. 331 /// 332 /// The Symbol Graph symbol kind property contains a shorthand \c identifier 333 /// which is prefixed by the source language name, useful for tooling to parse 334 /// the kind, and a \c displayName for rendering human-readable names. 335 Object serializeSymbolKind(const APIRecord &Record, Language Lang) { 336 auto AddLangPrefix = [&Lang](StringRef S) -> std::string { 337 return (getLanguageName(Lang) + "." + S).str(); 338 }; 339 340 Object Kind; 341 switch (Record.getKind()) { 342 case APIRecord::RK_Global: { 343 auto *GR = dyn_cast<GlobalRecord>(&Record); 344 switch (GR->GlobalKind) { 345 case GVKind::Function: 346 Kind["identifier"] = AddLangPrefix("func"); 347 Kind["displayName"] = "Function"; 348 break; 349 case GVKind::Variable: 350 Kind["identifier"] = AddLangPrefix("var"); 351 Kind["displayName"] = "Global Variable"; 352 break; 353 case GVKind::Unknown: 354 // Unknown global kind 355 break; 356 } 357 break; 358 } 359 case APIRecord::RK_EnumConstant: 360 Kind["identifier"] = AddLangPrefix("enum.case"); 361 Kind["displayName"] = "Enumeration Case"; 362 break; 363 case APIRecord::RK_Enum: 364 Kind["identifier"] = AddLangPrefix("enum"); 365 Kind["displayName"] = "Enumeration"; 366 break; 367 case APIRecord::RK_StructField: 368 Kind["identifier"] = AddLangPrefix("property"); 369 Kind["displayName"] = "Instance Property"; 370 break; 371 case APIRecord::RK_Struct: 372 Kind["identifier"] = AddLangPrefix("struct"); 373 Kind["displayName"] = "Structure"; 374 break; 375 case APIRecord::RK_ObjCIvar: 376 Kind["identifier"] = AddLangPrefix("ivar"); 377 Kind["displayName"] = "Instance Variable"; 378 break; 379 case APIRecord::RK_ObjCMethod: 380 if (dyn_cast<ObjCMethodRecord>(&Record)->IsInstanceMethod) { 381 Kind["identifier"] = AddLangPrefix("method"); 382 Kind["displayName"] = "Instance Method"; 383 } else { 384 Kind["identifier"] = AddLangPrefix("type.method"); 385 Kind["displayName"] = "Type Method"; 386 } 387 break; 388 case APIRecord::RK_ObjCProperty: 389 Kind["identifier"] = AddLangPrefix("property"); 390 Kind["displayName"] = "Instance Property"; 391 break; 392 case APIRecord::RK_ObjCInterface: 393 Kind["identifier"] = AddLangPrefix("class"); 394 Kind["displayName"] = "Class"; 395 break; 396 case APIRecord::RK_ObjCProtocol: 397 Kind["identifier"] = AddLangPrefix("protocol"); 398 Kind["displayName"] = "Protocol"; 399 break; 400 } 401 402 return Kind; 403 } 404 405 } // namespace 406 407 void SymbolGraphSerializer::anchor() {} 408 409 /// Defines the format version emitted by SymbolGraphSerializer. 410 const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3}; 411 412 Object SymbolGraphSerializer::serializeMetadata() const { 413 Object Metadata; 414 serializeObject(Metadata, "formatVersion", 415 serializeSemanticVersion(FormatVersion)); 416 Metadata["generator"] = clang::getClangFullVersion(); 417 return Metadata; 418 } 419 420 Object SymbolGraphSerializer::serializeModule() const { 421 Object Module; 422 // The user is expected to always pass `--product-name=` on the command line 423 // to populate this field. 424 Module["name"] = ProductName; 425 serializeObject(Module, "platform", serializePlatform(API.getTarget())); 426 return Module; 427 } 428 429 bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const { 430 // Skip unconditionally unavailable symbols 431 if (Record.Availability.isUnconditionallyUnavailable()) 432 return true; 433 434 return false; 435 } 436 437 Optional<Object> 438 SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const { 439 if (shouldSkip(Record)) 440 return None; 441 442 Object Obj; 443 serializeObject(Obj, "identifier", 444 serializeIdentifier(Record, API.getLanguage())); 445 serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLanguage())); 446 serializeObject(Obj, "names", serializeNames(Record)); 447 serializeObject( 448 Obj, "location", 449 serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true)); 450 serializeObject(Obj, "availbility", 451 serializeAvailability(Record.Availability)); 452 serializeObject(Obj, "docComment", serializeDocComment(Record.Comment)); 453 serializeArray(Obj, "declarationFragments", 454 serializeDeclarationFragments(Record.Declaration)); 455 456 return Obj; 457 } 458 459 StringRef SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind) { 460 switch (Kind) { 461 case RelationshipKind::MemberOf: 462 return "memberOf"; 463 case RelationshipKind::InheritsFrom: 464 return "inheritsFrom"; 465 case RelationshipKind::ConformsTo: 466 return "conformsTo"; 467 } 468 llvm_unreachable("Unhandled relationship kind"); 469 } 470 471 void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, 472 SymbolReference Source, 473 SymbolReference Target) { 474 Object Relationship; 475 Relationship["source"] = Source.USR; 476 Relationship["target"] = Target.USR; 477 Relationship["kind"] = getRelationshipString(Kind); 478 479 Relationships.emplace_back(std::move(Relationship)); 480 } 481 482 void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) { 483 auto Obj = serializeAPIRecord(Record); 484 if (!Obj) 485 return; 486 487 if (Record.GlobalKind == GVKind::Function) 488 serializeObject(*Obj, "parameters", 489 serializeFunctionSignature(Record.Signature)); 490 491 Symbols.emplace_back(std::move(*Obj)); 492 } 493 494 void SymbolGraphSerializer::serializeEnumRecord(const EnumRecord &Record) { 495 auto Enum = serializeAPIRecord(Record); 496 if (!Enum) 497 return; 498 499 Symbols.emplace_back(std::move(*Enum)); 500 501 for (const auto &Constant : Record.Constants) { 502 auto EnumConstant = serializeAPIRecord(*Constant); 503 if (!EnumConstant) 504 continue; 505 506 Symbols.emplace_back(std::move(*EnumConstant)); 507 serializeRelationship(RelationshipKind::MemberOf, *Constant, Record); 508 } 509 } 510 511 void SymbolGraphSerializer::serializeStructRecord(const StructRecord &Record) { 512 auto Struct = serializeAPIRecord(Record); 513 if (!Struct) 514 return; 515 516 Symbols.emplace_back(std::move(*Struct)); 517 518 for (const auto &Field : Record.Fields) { 519 auto StructField = serializeAPIRecord(*Field); 520 if (!StructField) 521 continue; 522 523 Symbols.emplace_back(std::move(*StructField)); 524 serializeRelationship(RelationshipKind::MemberOf, *Field, Record); 525 } 526 } 527 528 void SymbolGraphSerializer::serializeObjCContainerRecord( 529 const ObjCContainerRecord &Record) { 530 auto ObjCContainer = serializeAPIRecord(Record); 531 if (!ObjCContainer) 532 return; 533 534 Symbols.emplace_back(std::move(*ObjCContainer)); 535 536 // Record instance variables and that the instance variables are members of 537 // the container. 538 for (const auto &Ivar : Record.Ivars) { 539 auto ObjCIvar = serializeAPIRecord(*Ivar); 540 if (!ObjCIvar) 541 continue; 542 543 Symbols.emplace_back(std::move(*ObjCIvar)); 544 serializeRelationship(RelationshipKind::MemberOf, *Ivar, Record); 545 } 546 547 // Record methods and that the methods are members of the container. 548 for (const auto &Method : Record.Methods) { 549 auto ObjCMethod = serializeAPIRecord(*Method); 550 if (!ObjCMethod) 551 continue; 552 553 Symbols.emplace_back(std::move(*ObjCMethod)); 554 serializeRelationship(RelationshipKind::MemberOf, *Method, Record); 555 } 556 557 // Record properties and that the properties are members of the container. 558 for (const auto &Property : Record.Properties) { 559 auto ObjCProperty = serializeAPIRecord(*Property); 560 if (!ObjCProperty) 561 continue; 562 563 Symbols.emplace_back(std::move(*ObjCProperty)); 564 serializeRelationship(RelationshipKind::MemberOf, *Property, Record); 565 } 566 567 for (const auto &Protocol : Record.Protocols) 568 // Record that Record conforms to Protocol. 569 serializeRelationship(RelationshipKind::ConformsTo, Record, Protocol); 570 571 if (auto *ObjCInterface = dyn_cast<ObjCInterfaceRecord>(&Record)) 572 if (!ObjCInterface->SuperClass.empty()) 573 // If Record is an Objective-C interface record and it has a super class, 574 // record that Record is inherited from SuperClass. 575 serializeRelationship(RelationshipKind::InheritsFrom, Record, 576 ObjCInterface->SuperClass); 577 } 578 579 Object SymbolGraphSerializer::serialize() { 580 Object Root; 581 serializeObject(Root, "metadata", serializeMetadata()); 582 serializeObject(Root, "module", serializeModule()); 583 584 // Serialize global records in the API set. 585 for (const auto &Global : API.getGlobals()) 586 serializeGlobalRecord(*Global.second); 587 588 // Serialize enum records in the API set. 589 for (const auto &Enum : API.getEnums()) 590 serializeEnumRecord(*Enum.second); 591 592 // Serialize struct records in the API set. 593 for (const auto &Struct : API.getStructs()) 594 serializeStructRecord(*Struct.second); 595 596 // Serialize Objective-C interface records in the API set. 597 for (const auto &ObjCInterface : API.getObjCInterfaces()) 598 serializeObjCContainerRecord(*ObjCInterface.second); 599 600 // Serialize Objective-C protocol records in the API set. 601 for (const auto &ObjCProtocol : API.getObjCProtocols()) 602 serializeObjCContainerRecord(*ObjCProtocol.second); 603 604 Root["symbols"] = std::move(Symbols); 605 Root["relationhips"] = std::move(Relationships); 606 607 return Root; 608 } 609 610 void SymbolGraphSerializer::serialize(raw_ostream &os) { 611 Object root = serialize(); 612 if (Options.Compact) 613 os << formatv("{0}", Value(std::move(root))) << "\n"; 614 else 615 os << formatv("{0:2}", Value(std::move(root))) << "\n"; 616 } 617