1 //===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the SymbolGraphSerializer. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 15 #include "clang/Basic/Version.h" 16 #include "clang/ExtractAPI/API.h" 17 #include "llvm/Support/JSON.h" 18 #include "llvm/Support/Path.h" 19 #include "llvm/Support/VersionTuple.h" 20 21 using namespace clang; 22 using namespace clang::extractapi; 23 using namespace llvm; 24 using namespace llvm::json; 25 26 namespace { 27 28 /// Helper function to inject a JSON object \p Obj into another object \p Paren 29 /// at position \p Key. 30 void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) { 31 if (Obj) 32 Paren[Key] = std::move(Obj.getValue()); 33 } 34 35 /// Helper function to inject a JSON array \p Array into object \p Paren at 36 /// position \p Key. 37 void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) { 38 if (Array) 39 Paren[Key] = std::move(Array.getValue()); 40 } 41 42 /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version 43 /// format. 44 /// 45 /// A semantic version object contains three numeric fields, representing the 46 /// \c major, \c minor, and \c patch parts of the version tuple. 47 /// For example version tuple 1.0.3 is serialized as: 48 /// \code 49 /// { 50 /// "major" : 1, 51 /// "minor" : 0, 52 /// "patch" : 3 53 /// } 54 /// \endcode 55 /// 56 /// \returns \c None if the version \p V is empty, or an \c Object containing 57 /// the semantic version representation of \p V. 58 Optional<Object> serializeSemanticVersion(const VersionTuple &V) { 59 if (V.empty()) 60 return None; 61 62 Object Version; 63 Version["major"] = V.getMajor(); 64 Version["minor"] = V.getMinor().getValueOr(0); 65 Version["patch"] = V.getSubminor().getValueOr(0); 66 return Version; 67 } 68 69 /// Serialize the OS information in the Symbol Graph platform property. 70 /// 71 /// The OS information in Symbol Graph contains the \c name of the OS, and an 72 /// optional \c minimumVersion semantic version field. 73 Object serializeOperatingSystem(const Triple &T) { 74 Object OS; 75 OS["name"] = T.getOSTypeName(T.getOS()); 76 serializeObject(OS, "minimumVersion", 77 serializeSemanticVersion(T.getMinimumSupportedOSVersion())); 78 return OS; 79 } 80 81 /// Serialize the platform information in the Symbol Graph module section. 82 /// 83 /// The platform object describes a target platform triple in corresponding 84 /// three fields: \c architecture, \c vendor, and \c operatingSystem. 85 Object serializePlatform(const Triple &T) { 86 Object Platform; 87 Platform["architecture"] = T.getArchName(); 88 Platform["vendor"] = T.getVendorName(); 89 Platform["operatingSystem"] = serializeOperatingSystem(T); 90 return Platform; 91 } 92 93 /// Serialize a source location in file. 94 /// 95 /// \param Loc The presumed location to serialize. 96 /// \param IncludeFileURI If true, include the file path of \p Loc as a URI. 97 /// Defaults to false. 98 Object serializeSourcePosition(const PresumedLoc &Loc, 99 bool IncludeFileURI = false) { 100 assert(Loc.isValid() && "invalid source position"); 101 102 Object SourcePosition; 103 SourcePosition["line"] = Loc.getLine(); 104 SourcePosition["character"] = Loc.getColumn(); 105 106 if (IncludeFileURI) { 107 std::string FileURI = "file://"; 108 // Normalize file path to use forward slashes for the URI. 109 FileURI += sys::path::convert_to_slash(Loc.getFilename()); 110 SourcePosition["uri"] = FileURI; 111 } 112 113 return SourcePosition; 114 } 115 116 /// Serialize a source range with begin and end locations. 117 Object serializeSourceRange(const PresumedLoc &BeginLoc, 118 const PresumedLoc &EndLoc) { 119 Object SourceRange; 120 serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc)); 121 serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc)); 122 return SourceRange; 123 } 124 125 /// Serialize the availability attributes of a symbol. 126 /// 127 /// Availability information contains the introduced, deprecated, and obsoleted 128 /// versions of the symbol as semantic versions, if not default. 129 /// Availability information also contains flags to indicate if the symbol is 130 /// unconditionally unavailable or deprecated, 131 /// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)). 132 /// 133 /// \returns \c None if the symbol has default availability attributes, or 134 /// an \c Object containing the formatted availability information. 135 Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) { 136 if (Avail.isDefault()) 137 return None; 138 139 Object Availbility; 140 serializeObject(Availbility, "introducedVersion", 141 serializeSemanticVersion(Avail.Introduced)); 142 serializeObject(Availbility, "deprecatedVersion", 143 serializeSemanticVersion(Avail.Deprecated)); 144 serializeObject(Availbility, "obsoletedVersion", 145 serializeSemanticVersion(Avail.Obsoleted)); 146 if (Avail.isUnavailable()) 147 Availbility["isUnconditionallyUnavailable"] = true; 148 if (Avail.isUnconditionallyDeprecated()) 149 Availbility["isUnconditionallyDeprecated"] = true; 150 151 return Availbility; 152 } 153 154 /// Get the short language name string for interface language references. 155 StringRef getLanguageName(const LangOptions &LangOpts) { 156 auto LanguageKind = 157 LangStandard::getLangStandardForKind(LangOpts.LangStd).getLanguage(); 158 switch (LanguageKind) { 159 case Language::C: 160 return "c"; 161 case Language::ObjC: 162 return "objective-c"; 163 164 // Unsupported language currently 165 case Language::CXX: 166 case Language::ObjCXX: 167 case Language::OpenCL: 168 case Language::OpenCLCXX: 169 case Language::CUDA: 170 case Language::RenderScript: 171 case Language::HIP: 172 case Language::HLSL: 173 174 // Languages that the frontend cannot parse and compile 175 case Language::Unknown: 176 case Language::Asm: 177 case Language::LLVM_IR: 178 llvm_unreachable("Unsupported language kind"); 179 } 180 181 llvm_unreachable("Unhandled language kind"); 182 } 183 184 /// Serialize the identifier object as specified by the Symbol Graph format. 185 /// 186 /// The identifier property of a symbol contains the USR for precise and unique 187 /// references, and the interface language name. 188 Object serializeIdentifier(const APIRecord &Record, 189 const LangOptions &LangOpts) { 190 Object Identifier; 191 Identifier["precise"] = Record.USR; 192 Identifier["interfaceLanguage"] = getLanguageName(LangOpts); 193 194 return Identifier; 195 } 196 197 /// Serialize the documentation comments attached to a symbol, as specified by 198 /// the Symbol Graph format. 199 /// 200 /// The Symbol Graph \c docComment object contains an array of lines. Each line 201 /// represents one line of striped documentation comment, with source range 202 /// information. 203 /// e.g. 204 /// \code 205 /// /// This is a documentation comment 206 /// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' First line. 207 /// /// with multiple lines. 208 /// ^~~~~~~~~~~~~~~~~~~~~~~' Second line. 209 /// \endcode 210 /// 211 /// \returns \c None if \p Comment is empty, or an \c Object containing the 212 /// formatted lines. 213 Optional<Object> serializeDocComment(const DocComment &Comment) { 214 if (Comment.empty()) 215 return None; 216 217 Object DocComment; 218 Array LinesArray; 219 for (const auto &CommentLine : Comment) { 220 Object Line; 221 Line["text"] = CommentLine.Text; 222 serializeObject(Line, "range", 223 serializeSourceRange(CommentLine.Begin, CommentLine.End)); 224 LinesArray.emplace_back(std::move(Line)); 225 } 226 serializeArray(DocComment, "lines", LinesArray); 227 228 return DocComment; 229 } 230 231 /// Serialize the declaration fragments of a symbol. 232 /// 233 /// The Symbol Graph declaration fragments is an array of tagged important 234 /// parts of a symbol's declaration. The fragments sequence can be joined to 235 /// form spans of declaration text, with attached information useful for 236 /// purposes like syntax-highlighting etc. For example: 237 /// \code 238 /// const int pi; -> "declarationFragments" : [ 239 /// { 240 /// "kind" : "keyword", 241 /// "spelling" : "const" 242 /// }, 243 /// { 244 /// "kind" : "text", 245 /// "spelling" : " " 246 /// }, 247 /// { 248 /// "kind" : "typeIdentifier", 249 /// "preciseIdentifier" : "c:I", 250 /// "spelling" : "int" 251 /// }, 252 /// { 253 /// "kind" : "text", 254 /// "spelling" : " " 255 /// }, 256 /// { 257 /// "kind" : "identifier", 258 /// "spelling" : "pi" 259 /// } 260 /// ] 261 /// \endcode 262 /// 263 /// \returns \c None if \p DF is empty, or an \c Array containing the formatted 264 /// declaration fragments array. 265 Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) { 266 if (DF.getFragments().empty()) 267 return None; 268 269 Array Fragments; 270 for (const auto &F : DF.getFragments()) { 271 Object Fragment; 272 Fragment["spelling"] = F.Spelling; 273 Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind); 274 if (!F.PreciseIdentifier.empty()) 275 Fragment["preciseIdentifier"] = F.PreciseIdentifier; 276 Fragments.emplace_back(std::move(Fragment)); 277 } 278 279 return Fragments; 280 } 281 282 /// Serialize the function signature field of a function, as specified by the 283 /// Symbol Graph format. 284 /// 285 /// The Symbol Graph function signature property contains two arrays. 286 /// - The \c returns array is the declaration fragments of the return type; 287 /// - The \c parameters array contains names and declaration fragments of the 288 /// parameters. 289 /// 290 /// \returns \c None if \p FS is empty, or an \c Object containing the 291 /// formatted function signature. 292 Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) { 293 if (FS.empty()) 294 return None; 295 296 Object Signature; 297 serializeArray(Signature, "returns", 298 serializeDeclarationFragments(FS.getReturnType())); 299 300 Array Parameters; 301 for (const auto &P : FS.getParameters()) { 302 Object Parameter; 303 Parameter["name"] = P.Name; 304 serializeArray(Parameter, "declarationFragments", 305 serializeDeclarationFragments(P.Fragments)); 306 Parameters.emplace_back(std::move(Parameter)); 307 } 308 309 if (!Parameters.empty()) 310 Signature["parameters"] = std::move(Parameters); 311 312 return Signature; 313 } 314 315 /// Serialize the \c names field of a symbol as specified by the Symbol Graph 316 /// format. 317 /// 318 /// The Symbol Graph names field contains multiple representations of a symbol 319 /// that can be used for different applications: 320 /// - \c title : The simple declared name of the symbol; 321 /// - \c subHeading : An array of declaration fragments that provides tags, 322 /// and potentially more tokens (for example the \c +/- symbol for 323 /// Objective-C methods). Can be used as sub-headings for documentation. 324 Object serializeNames(const APIRecord &Record) { 325 Object Names; 326 Names["title"] = Record.Name; 327 serializeArray(Names, "subHeading", 328 serializeDeclarationFragments(Record.SubHeading)); 329 330 return Names; 331 } 332 333 /// Serialize the symbol kind information. 334 /// 335 /// The Symbol Graph symbol kind property contains a shorthand \c identifier 336 /// which is prefixed by the source language name, useful for tooling to parse 337 /// the kind, and a \c displayName for rendering human-readable names. 338 Object serializeSymbolKind(const APIRecord &Record, 339 const LangOptions &LangOpts) { 340 auto AddLangPrefix = [&LangOpts](StringRef S) -> std::string { 341 return (getLanguageName(LangOpts) + "." + S).str(); 342 }; 343 344 Object Kind; 345 switch (Record.getKind()) { 346 case APIRecord::RK_Global: { 347 auto *GR = dyn_cast<GlobalRecord>(&Record); 348 switch (GR->GlobalKind) { 349 case GVKind::Function: 350 Kind["identifier"] = AddLangPrefix("func"); 351 Kind["displayName"] = "Function"; 352 break; 353 case GVKind::Variable: 354 Kind["identifier"] = AddLangPrefix("var"); 355 Kind["displayName"] = "Global Variable"; 356 break; 357 case GVKind::Unknown: 358 // Unknown global kind 359 break; 360 } 361 break; 362 } 363 case APIRecord::RK_EnumConstant: 364 Kind["identifier"] = AddLangPrefix("enum.case"); 365 Kind["displayName"] = "Enumeration Case"; 366 break; 367 case APIRecord::RK_Enum: 368 Kind["identifier"] = AddLangPrefix("enum"); 369 Kind["displayName"] = "Enumeration"; 370 break; 371 case APIRecord::RK_StructField: 372 Kind["identifier"] = AddLangPrefix("property"); 373 Kind["displayName"] = "Instance Property"; 374 break; 375 case APIRecord::RK_Struct: 376 Kind["identifier"] = AddLangPrefix("struct"); 377 Kind["displayName"] = "Structure"; 378 break; 379 } 380 381 return Kind; 382 } 383 384 } // namespace 385 386 void SymbolGraphSerializer::anchor() {} 387 388 /// Defines the format version emitted by SymbolGraphSerializer. 389 const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3}; 390 391 Object SymbolGraphSerializer::serializeMetadata() const { 392 Object Metadata; 393 serializeObject(Metadata, "formatVersion", 394 serializeSemanticVersion(FormatVersion)); 395 Metadata["generator"] = clang::getClangFullVersion(); 396 return Metadata; 397 } 398 399 Object SymbolGraphSerializer::serializeModule() const { 400 Object Module; 401 // The user is expected to always pass `--product-name=` on the command line 402 // to populate this field. 403 Module["name"] = ProductName; 404 serializeObject(Module, "platform", serializePlatform(API.getTarget())); 405 return Module; 406 } 407 408 bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const { 409 // Skip unconditionally unavailable symbols 410 if (Record.Availability.isUnconditionallyUnavailable()) 411 return true; 412 413 return false; 414 } 415 416 Optional<Object> 417 SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const { 418 if (shouldSkip(Record)) 419 return None; 420 421 Object Obj; 422 serializeObject(Obj, "identifier", 423 serializeIdentifier(Record, API.getLangOpts())); 424 serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLangOpts())); 425 serializeObject(Obj, "names", serializeNames(Record)); 426 serializeObject( 427 Obj, "location", 428 serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true)); 429 serializeObject(Obj, "availbility", 430 serializeAvailability(Record.Availability)); 431 serializeObject(Obj, "docComment", serializeDocComment(Record.Comment)); 432 serializeArray(Obj, "declarationFragments", 433 serializeDeclarationFragments(Record.Declaration)); 434 435 return Obj; 436 } 437 438 StringRef SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind) { 439 switch (Kind) { 440 case RelationshipKind::MemberOf: 441 return "memberOf"; 442 } 443 llvm_unreachable("Unhandled relationship kind"); 444 } 445 446 void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, 447 const APIRecord &Source, 448 const APIRecord &Target) { 449 Object Relationship; 450 Relationship["source"] = Source.USR; 451 Relationship["target"] = Target.USR; 452 Relationship["kind"] = getRelationshipString(Kind); 453 454 Relationships.emplace_back(std::move(Relationship)); 455 } 456 457 void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) { 458 auto Obj = serializeAPIRecord(Record); 459 if (!Obj) 460 return; 461 462 if (Record.GlobalKind == GVKind::Function) 463 serializeObject(*Obj, "parameters", 464 serializeFunctionSignature(Record.Signature)); 465 466 Symbols.emplace_back(std::move(*Obj)); 467 } 468 469 void SymbolGraphSerializer::serializeEnumRecord(const EnumRecord &Record) { 470 auto Enum = serializeAPIRecord(Record); 471 if (!Enum) 472 return; 473 474 Symbols.emplace_back(std::move(*Enum)); 475 476 for (const auto &Constant : Record.Constants) { 477 auto EnumConstant = serializeAPIRecord(*Constant); 478 if (!EnumConstant) 479 continue; 480 481 Symbols.emplace_back(std::move(*EnumConstant)); 482 serializeRelationship(RelationshipKind::MemberOf, *Constant, Record); 483 } 484 } 485 486 void SymbolGraphSerializer::serializeStructRecord(const StructRecord &Record) { 487 auto Struct = serializeAPIRecord(Record); 488 if (!Struct) 489 return; 490 491 Symbols.emplace_back(std::move(*Struct)); 492 493 for (const auto &Field : Record.Fields) { 494 auto StructField = serializeAPIRecord(*Field); 495 if (!StructField) 496 continue; 497 498 Symbols.emplace_back(std::move(*StructField)); 499 serializeRelationship(RelationshipKind::MemberOf, *Field, Record); 500 } 501 } 502 503 Object SymbolGraphSerializer::serialize() { 504 Object Root; 505 serializeObject(Root, "metadata", serializeMetadata()); 506 serializeObject(Root, "module", serializeModule()); 507 508 // Serialize global records in the API set. 509 for (const auto &Global : API.getGlobals()) 510 serializeGlobalRecord(*Global.second); 511 512 // Serialize enum records in the API set. 513 for (const auto &Enum : API.getEnums()) 514 serializeEnumRecord(*Enum.second); 515 516 // Serialize struct records in the API set. 517 for (const auto &Struct : API.getStructs()) 518 serializeStructRecord(*Struct.second); 519 520 Root["symbols"] = std::move(Symbols); 521 Root["relationhips"] = std::move(Relationships); 522 523 return Root; 524 } 525 526 void SymbolGraphSerializer::serialize(raw_ostream &os) { 527 Object root = serialize(); 528 if (Options.Compact) 529 os << formatv("{0}", Value(std::move(root))) << "\n"; 530 else 531 os << formatv("{0:2}", Value(std::move(root))) << "\n"; 532 } 533