1 //===- ExtractAPI/Serialization/SymbolGraphSerializer.cpp -------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// \file 10 /// This file implements the SymbolGraphSerializer. 11 /// 12 //===----------------------------------------------------------------------===// 13 14 #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" 15 #include "clang/Basic/Version.h" 16 #include "clang/ExtractAPI/API.h" 17 #include "llvm/Support/JSON.h" 18 #include "llvm/Support/Path.h" 19 #include "llvm/Support/VersionTuple.h" 20 21 using namespace clang; 22 using namespace clang::extractapi; 23 using namespace llvm; 24 using namespace llvm::json; 25 26 namespace { 27 28 /// Helper function to inject a JSON object \p Obj into another object \p Paren 29 /// at position \p Key. 30 void serializeObject(Object &Paren, StringRef Key, Optional<Object> Obj) { 31 if (Obj) 32 Paren[Key] = std::move(Obj.getValue()); 33 } 34 35 /// Helper function to inject a JSON array \p Array into object \p Paren at 36 /// position \p Key. 37 void serializeArray(Object &Paren, StringRef Key, Optional<Array> Array) { 38 if (Array) 39 Paren[Key] = std::move(Array.getValue()); 40 } 41 42 /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version 43 /// format. 44 /// 45 /// A semantic version object contains three numeric fields, representing the 46 /// \c major, \c minor, and \c patch parts of the version tuple. 47 /// For example version tuple 1.0.3 is serialized as: 48 /// \code 49 /// { 50 /// "major" : 1, 51 /// "minor" : 0, 52 /// "patch" : 3 53 /// } 54 /// \endcode 55 /// 56 /// \returns \c None if the version \p V is empty, or an \c Object containing 57 /// the semantic version representation of \p V. 58 Optional<Object> serializeSemanticVersion(const VersionTuple &V) { 59 if (V.empty()) 60 return None; 61 62 Object Version; 63 Version["major"] = V.getMajor(); 64 Version["minor"] = V.getMinor().getValueOr(0); 65 Version["patch"] = V.getSubminor().getValueOr(0); 66 return Version; 67 } 68 69 /// Serialize the OS information in the Symbol Graph platform property. 70 /// 71 /// The OS information in Symbol Graph contains the \c name of the OS, and an 72 /// optional \c minimumVersion semantic version field. 73 Object serializeOperatingSystem(const Triple &T) { 74 Object OS; 75 OS["name"] = T.getOSTypeName(T.getOS()); 76 serializeObject(OS, "minimumVersion", 77 serializeSemanticVersion(T.getMinimumSupportedOSVersion())); 78 return OS; 79 } 80 81 /// Serialize the platform information in the Symbol Graph module section. 82 /// 83 /// The platform object describes a target platform triple in corresponding 84 /// three fields: \c architecture, \c vendor, and \c operatingSystem. 85 Object serializePlatform(const Triple &T) { 86 Object Platform; 87 Platform["architecture"] = T.getArchName(); 88 Platform["vendor"] = T.getVendorName(); 89 Platform["operatingSystem"] = serializeOperatingSystem(T); 90 return Platform; 91 } 92 93 /// Serialize a source location in file. 94 /// 95 /// \param Loc The presumed location to serialize. 96 /// \param IncludeFileURI If true, include the file path of \p Loc as a URI. 97 /// Defaults to false. 98 Object serializeSourcePosition(const PresumedLoc &Loc, 99 bool IncludeFileURI = false) { 100 assert(Loc.isValid() && "invalid source position"); 101 102 Object SourcePosition; 103 SourcePosition["line"] = Loc.getLine(); 104 SourcePosition["character"] = Loc.getColumn(); 105 106 if (IncludeFileURI) { 107 std::string FileURI = "file://"; 108 // Normalize file path to use forward slashes for the URI. 109 FileURI += sys::path::convert_to_slash(Loc.getFilename()); 110 SourcePosition["uri"] = FileURI; 111 } 112 113 return SourcePosition; 114 } 115 116 /// Serialize a source range with begin and end locations. 117 Object serializeSourceRange(const PresumedLoc &BeginLoc, 118 const PresumedLoc &EndLoc) { 119 Object SourceRange; 120 serializeObject(SourceRange, "start", serializeSourcePosition(BeginLoc)); 121 serializeObject(SourceRange, "end", serializeSourcePosition(EndLoc)); 122 return SourceRange; 123 } 124 125 /// Serialize the availability attributes of a symbol. 126 /// 127 /// Availability information contains the introduced, deprecated, and obsoleted 128 /// versions of the symbol as semantic versions, if not default. 129 /// Availability information also contains flags to indicate if the symbol is 130 /// unconditionally unavailable or deprecated, 131 /// i.e. \c __attribute__((unavailable)) and \c __attribute__((deprecated)). 132 /// 133 /// \returns \c None if the symbol has default availability attributes, or 134 /// an \c Object containing the formatted availability information. 135 Optional<Object> serializeAvailability(const AvailabilityInfo &Avail) { 136 if (Avail.isDefault()) 137 return None; 138 139 Object Availbility; 140 serializeObject(Availbility, "introducedVersion", 141 serializeSemanticVersion(Avail.Introduced)); 142 serializeObject(Availbility, "deprecatedVersion", 143 serializeSemanticVersion(Avail.Deprecated)); 144 serializeObject(Availbility, "obsoletedVersion", 145 serializeSemanticVersion(Avail.Obsoleted)); 146 if (Avail.isUnavailable()) 147 Availbility["isUnconditionallyUnavailable"] = true; 148 if (Avail.isUnconditionallyDeprecated()) 149 Availbility["isUnconditionallyDeprecated"] = true; 150 151 return Availbility; 152 } 153 154 /// Get the language name string for interface language references. 155 StringRef getLanguageName(Language Lang) { 156 switch (Lang) { 157 case Language::C: 158 return "c"; 159 case Language::ObjC: 160 return "objective-c"; 161 162 // Unsupported language currently 163 case Language::CXX: 164 case Language::ObjCXX: 165 case Language::OpenCL: 166 case Language::OpenCLCXX: 167 case Language::CUDA: 168 case Language::RenderScript: 169 case Language::HIP: 170 case Language::HLSL: 171 172 // Languages that the frontend cannot parse and compile 173 case Language::Unknown: 174 case Language::Asm: 175 case Language::LLVM_IR: 176 llvm_unreachable("Unsupported language kind"); 177 } 178 179 llvm_unreachable("Unhandled language kind"); 180 } 181 182 /// Serialize the identifier object as specified by the Symbol Graph format. 183 /// 184 /// The identifier property of a symbol contains the USR for precise and unique 185 /// references, and the interface language name. 186 Object serializeIdentifier(const APIRecord &Record, Language Lang) { 187 Object Identifier; 188 Identifier["precise"] = Record.USR; 189 Identifier["interfaceLanguage"] = getLanguageName(Lang); 190 191 return Identifier; 192 } 193 194 /// Serialize the documentation comments attached to a symbol, as specified by 195 /// the Symbol Graph format. 196 /// 197 /// The Symbol Graph \c docComment object contains an array of lines. Each line 198 /// represents one line of striped documentation comment, with source range 199 /// information. 200 /// e.g. 201 /// \code 202 /// /// This is a documentation comment 203 /// ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~' First line. 204 /// /// with multiple lines. 205 /// ^~~~~~~~~~~~~~~~~~~~~~~' Second line. 206 /// \endcode 207 /// 208 /// \returns \c None if \p Comment is empty, or an \c Object containing the 209 /// formatted lines. 210 Optional<Object> serializeDocComment(const DocComment &Comment) { 211 if (Comment.empty()) 212 return None; 213 214 Object DocComment; 215 Array LinesArray; 216 for (const auto &CommentLine : Comment) { 217 Object Line; 218 Line["text"] = CommentLine.Text; 219 serializeObject(Line, "range", 220 serializeSourceRange(CommentLine.Begin, CommentLine.End)); 221 LinesArray.emplace_back(std::move(Line)); 222 } 223 serializeArray(DocComment, "lines", LinesArray); 224 225 return DocComment; 226 } 227 228 /// Serialize the declaration fragments of a symbol. 229 /// 230 /// The Symbol Graph declaration fragments is an array of tagged important 231 /// parts of a symbol's declaration. The fragments sequence can be joined to 232 /// form spans of declaration text, with attached information useful for 233 /// purposes like syntax-highlighting etc. For example: 234 /// \code 235 /// const int pi; -> "declarationFragments" : [ 236 /// { 237 /// "kind" : "keyword", 238 /// "spelling" : "const" 239 /// }, 240 /// { 241 /// "kind" : "text", 242 /// "spelling" : " " 243 /// }, 244 /// { 245 /// "kind" : "typeIdentifier", 246 /// "preciseIdentifier" : "c:I", 247 /// "spelling" : "int" 248 /// }, 249 /// { 250 /// "kind" : "text", 251 /// "spelling" : " " 252 /// }, 253 /// { 254 /// "kind" : "identifier", 255 /// "spelling" : "pi" 256 /// } 257 /// ] 258 /// \endcode 259 /// 260 /// \returns \c None if \p DF is empty, or an \c Array containing the formatted 261 /// declaration fragments array. 262 Optional<Array> serializeDeclarationFragments(const DeclarationFragments &DF) { 263 if (DF.getFragments().empty()) 264 return None; 265 266 Array Fragments; 267 for (const auto &F : DF.getFragments()) { 268 Object Fragment; 269 Fragment["spelling"] = F.Spelling; 270 Fragment["kind"] = DeclarationFragments::getFragmentKindString(F.Kind); 271 if (!F.PreciseIdentifier.empty()) 272 Fragment["preciseIdentifier"] = F.PreciseIdentifier; 273 Fragments.emplace_back(std::move(Fragment)); 274 } 275 276 return Fragments; 277 } 278 279 /// Serialize the function signature field of a function, as specified by the 280 /// Symbol Graph format. 281 /// 282 /// The Symbol Graph function signature property contains two arrays. 283 /// - The \c returns array is the declaration fragments of the return type; 284 /// - The \c parameters array contains names and declaration fragments of the 285 /// parameters. 286 /// 287 /// \returns \c None if \p FS is empty, or an \c Object containing the 288 /// formatted function signature. 289 Optional<Object> serializeFunctionSignature(const FunctionSignature &FS) { 290 if (FS.empty()) 291 return None; 292 293 Object Signature; 294 serializeArray(Signature, "returns", 295 serializeDeclarationFragments(FS.getReturnType())); 296 297 Array Parameters; 298 for (const auto &P : FS.getParameters()) { 299 Object Parameter; 300 Parameter["name"] = P.Name; 301 serializeArray(Parameter, "declarationFragments", 302 serializeDeclarationFragments(P.Fragments)); 303 Parameters.emplace_back(std::move(Parameter)); 304 } 305 306 if (!Parameters.empty()) 307 Signature["parameters"] = std::move(Parameters); 308 309 return Signature; 310 } 311 312 /// Serialize the \c names field of a symbol as specified by the Symbol Graph 313 /// format. 314 /// 315 /// The Symbol Graph names field contains multiple representations of a symbol 316 /// that can be used for different applications: 317 /// - \c title : The simple declared name of the symbol; 318 /// - \c subHeading : An array of declaration fragments that provides tags, 319 /// and potentially more tokens (for example the \c +/- symbol for 320 /// Objective-C methods). Can be used as sub-headings for documentation. 321 Object serializeNames(const APIRecord &Record) { 322 Object Names; 323 Names["title"] = Record.Name; 324 serializeArray(Names, "subHeading", 325 serializeDeclarationFragments(Record.SubHeading)); 326 327 return Names; 328 } 329 330 /// Serialize the symbol kind information. 331 /// 332 /// The Symbol Graph symbol kind property contains a shorthand \c identifier 333 /// which is prefixed by the source language name, useful for tooling to parse 334 /// the kind, and a \c displayName for rendering human-readable names. 335 Object serializeSymbolKind(const APIRecord &Record, Language Lang) { 336 auto AddLangPrefix = [&Lang](StringRef S) -> std::string { 337 return (getLanguageName(Lang) + "." + S).str(); 338 }; 339 340 Object Kind; 341 switch (Record.getKind()) { 342 case APIRecord::RK_Global: { 343 auto *GR = dyn_cast<GlobalRecord>(&Record); 344 switch (GR->GlobalKind) { 345 case GVKind::Function: 346 Kind["identifier"] = AddLangPrefix("func"); 347 Kind["displayName"] = "Function"; 348 break; 349 case GVKind::Variable: 350 Kind["identifier"] = AddLangPrefix("var"); 351 Kind["displayName"] = "Global Variable"; 352 break; 353 case GVKind::Unknown: 354 // Unknown global kind 355 break; 356 } 357 break; 358 } 359 case APIRecord::RK_EnumConstant: 360 Kind["identifier"] = AddLangPrefix("enum.case"); 361 Kind["displayName"] = "Enumeration Case"; 362 break; 363 case APIRecord::RK_Enum: 364 Kind["identifier"] = AddLangPrefix("enum"); 365 Kind["displayName"] = "Enumeration"; 366 break; 367 case APIRecord::RK_StructField: 368 Kind["identifier"] = AddLangPrefix("property"); 369 Kind["displayName"] = "Instance Property"; 370 break; 371 case APIRecord::RK_Struct: 372 Kind["identifier"] = AddLangPrefix("struct"); 373 Kind["displayName"] = "Structure"; 374 break; 375 } 376 377 return Kind; 378 } 379 380 } // namespace 381 382 void SymbolGraphSerializer::anchor() {} 383 384 /// Defines the format version emitted by SymbolGraphSerializer. 385 const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3}; 386 387 Object SymbolGraphSerializer::serializeMetadata() const { 388 Object Metadata; 389 serializeObject(Metadata, "formatVersion", 390 serializeSemanticVersion(FormatVersion)); 391 Metadata["generator"] = clang::getClangFullVersion(); 392 return Metadata; 393 } 394 395 Object SymbolGraphSerializer::serializeModule() const { 396 Object Module; 397 // The user is expected to always pass `--product-name=` on the command line 398 // to populate this field. 399 Module["name"] = ProductName; 400 serializeObject(Module, "platform", serializePlatform(API.getTarget())); 401 return Module; 402 } 403 404 bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const { 405 // Skip unconditionally unavailable symbols 406 if (Record.Availability.isUnconditionallyUnavailable()) 407 return true; 408 409 return false; 410 } 411 412 Optional<Object> 413 SymbolGraphSerializer::serializeAPIRecord(const APIRecord &Record) const { 414 if (shouldSkip(Record)) 415 return None; 416 417 Object Obj; 418 serializeObject(Obj, "identifier", 419 serializeIdentifier(Record, API.getLanguage())); 420 serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLanguage())); 421 serializeObject(Obj, "names", serializeNames(Record)); 422 serializeObject( 423 Obj, "location", 424 serializeSourcePosition(Record.Location, /*IncludeFileURI=*/true)); 425 serializeObject(Obj, "availbility", 426 serializeAvailability(Record.Availability)); 427 serializeObject(Obj, "docComment", serializeDocComment(Record.Comment)); 428 serializeArray(Obj, "declarationFragments", 429 serializeDeclarationFragments(Record.Declaration)); 430 431 return Obj; 432 } 433 434 StringRef SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind) { 435 switch (Kind) { 436 case RelationshipKind::MemberOf: 437 return "memberOf"; 438 } 439 llvm_unreachable("Unhandled relationship kind"); 440 } 441 442 void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, 443 const APIRecord &Source, 444 const APIRecord &Target) { 445 Object Relationship; 446 Relationship["source"] = Source.USR; 447 Relationship["target"] = Target.USR; 448 Relationship["kind"] = getRelationshipString(Kind); 449 450 Relationships.emplace_back(std::move(Relationship)); 451 } 452 453 void SymbolGraphSerializer::serializeGlobalRecord(const GlobalRecord &Record) { 454 auto Obj = serializeAPIRecord(Record); 455 if (!Obj) 456 return; 457 458 if (Record.GlobalKind == GVKind::Function) 459 serializeObject(*Obj, "parameters", 460 serializeFunctionSignature(Record.Signature)); 461 462 Symbols.emplace_back(std::move(*Obj)); 463 } 464 465 void SymbolGraphSerializer::serializeEnumRecord(const EnumRecord &Record) { 466 auto Enum = serializeAPIRecord(Record); 467 if (!Enum) 468 return; 469 470 Symbols.emplace_back(std::move(*Enum)); 471 472 for (const auto &Constant : Record.Constants) { 473 auto EnumConstant = serializeAPIRecord(*Constant); 474 if (!EnumConstant) 475 continue; 476 477 Symbols.emplace_back(std::move(*EnumConstant)); 478 serializeRelationship(RelationshipKind::MemberOf, *Constant, Record); 479 } 480 } 481 482 void SymbolGraphSerializer::serializeStructRecord(const StructRecord &Record) { 483 auto Struct = serializeAPIRecord(Record); 484 if (!Struct) 485 return; 486 487 Symbols.emplace_back(std::move(*Struct)); 488 489 for (const auto &Field : Record.Fields) { 490 auto StructField = serializeAPIRecord(*Field); 491 if (!StructField) 492 continue; 493 494 Symbols.emplace_back(std::move(*StructField)); 495 serializeRelationship(RelationshipKind::MemberOf, *Field, Record); 496 } 497 } 498 499 Object SymbolGraphSerializer::serialize() { 500 Object Root; 501 serializeObject(Root, "metadata", serializeMetadata()); 502 serializeObject(Root, "module", serializeModule()); 503 504 // Serialize global records in the API set. 505 for (const auto &Global : API.getGlobals()) 506 serializeGlobalRecord(*Global.second); 507 508 // Serialize enum records in the API set. 509 for (const auto &Enum : API.getEnums()) 510 serializeEnumRecord(*Enum.second); 511 512 // Serialize struct records in the API set. 513 for (const auto &Struct : API.getStructs()) 514 serializeStructRecord(*Struct.second); 515 516 Root["symbols"] = std::move(Symbols); 517 Root["relationhips"] = std::move(Relationships); 518 519 return Root; 520 } 521 522 void SymbolGraphSerializer::serialize(raw_ostream &os) { 523 Object root = serialize(); 524 if (Options.Compact) 525 os << formatv("{0}", Value(std::move(root))) << "\n"; 526 else 527 os << formatv("{0:2}", Value(std::move(root))) << "\n"; 528 } 529