1 //===- BitcodeAnalyzer.cpp - Internal BitcodeAnalyzer implementation ------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitcode/BitcodeAnalyzer.h" 10 #include "llvm/Bitcode/BitcodeReader.h" 11 #include "llvm/Bitcode/LLVMBitCodes.h" 12 #include "llvm/Bitstream/BitCodes.h" 13 #include "llvm/Bitstream/BitstreamReader.h" 14 #include "llvm/Support/Format.h" 15 #include "llvm/Support/SHA1.h" 16 17 using namespace llvm; 18 19 static Error reportError(StringRef Message) { 20 return createStringError(std::errc::illegal_byte_sequence, Message.data()); 21 } 22 23 /// Return a symbolic block name if known, otherwise return null. 24 static Optional<const char *> GetBlockName(unsigned BlockID, 25 const BitstreamBlockInfo &BlockInfo, 26 CurStreamTypeType CurStreamType) { 27 // Standard blocks for all bitcode files. 28 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 29 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) 30 return "BLOCKINFO_BLOCK"; 31 return std::nullopt; 32 } 33 34 // Check to see if we have a blockinfo record for this block, with a name. 35 if (const BitstreamBlockInfo::BlockInfo *Info = 36 BlockInfo.getBlockInfo(BlockID)) { 37 if (!Info->Name.empty()) 38 return Info->Name.c_str(); 39 } 40 41 if (CurStreamType != LLVMIRBitstream) 42 return std::nullopt; 43 44 switch (BlockID) { 45 default: 46 return std::nullopt; 47 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 48 return "OPERAND_BUNDLE_TAGS_BLOCK"; 49 case bitc::MODULE_BLOCK_ID: 50 return "MODULE_BLOCK"; 51 case bitc::PARAMATTR_BLOCK_ID: 52 return "PARAMATTR_BLOCK"; 53 case bitc::PARAMATTR_GROUP_BLOCK_ID: 54 return "PARAMATTR_GROUP_BLOCK_ID"; 55 case bitc::TYPE_BLOCK_ID_NEW: 56 return "TYPE_BLOCK_ID"; 57 case bitc::CONSTANTS_BLOCK_ID: 58 return "CONSTANTS_BLOCK"; 59 case bitc::FUNCTION_BLOCK_ID: 60 return "FUNCTION_BLOCK"; 61 case bitc::IDENTIFICATION_BLOCK_ID: 62 return "IDENTIFICATION_BLOCK_ID"; 63 case bitc::VALUE_SYMTAB_BLOCK_ID: 64 return "VALUE_SYMTAB"; 65 case bitc::METADATA_BLOCK_ID: 66 return "METADATA_BLOCK"; 67 case bitc::METADATA_KIND_BLOCK_ID: 68 return "METADATA_KIND_BLOCK"; 69 case bitc::METADATA_ATTACHMENT_ID: 70 return "METADATA_ATTACHMENT_BLOCK"; 71 case bitc::USELIST_BLOCK_ID: 72 return "USELIST_BLOCK_ID"; 73 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 74 return "GLOBALVAL_SUMMARY_BLOCK"; 75 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 76 return "FULL_LTO_GLOBALVAL_SUMMARY_BLOCK"; 77 case bitc::MODULE_STRTAB_BLOCK_ID: 78 return "MODULE_STRTAB_BLOCK"; 79 case bitc::STRTAB_BLOCK_ID: 80 return "STRTAB_BLOCK"; 81 case bitc::SYMTAB_BLOCK_ID: 82 return "SYMTAB_BLOCK"; 83 } 84 } 85 86 /// Return a symbolic code name if known, otherwise return null. 87 static Optional<const char *> GetCodeName(unsigned CodeID, unsigned BlockID, 88 const BitstreamBlockInfo &BlockInfo, 89 CurStreamTypeType CurStreamType) { 90 // Standard blocks for all bitcode files. 91 if (BlockID < bitc::FIRST_APPLICATION_BLOCKID) { 92 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 93 switch (CodeID) { 94 default: 95 return std::nullopt; 96 case bitc::BLOCKINFO_CODE_SETBID: 97 return "SETBID"; 98 case bitc::BLOCKINFO_CODE_BLOCKNAME: 99 return "BLOCKNAME"; 100 case bitc::BLOCKINFO_CODE_SETRECORDNAME: 101 return "SETRECORDNAME"; 102 } 103 } 104 return std::nullopt; 105 } 106 107 // Check to see if we have a blockinfo record for this record, with a name. 108 if (const BitstreamBlockInfo::BlockInfo *Info = 109 BlockInfo.getBlockInfo(BlockID)) { 110 for (const std::pair<unsigned, std::string> &RN : Info->RecordNames) 111 if (RN.first == CodeID) 112 return RN.second.c_str(); 113 } 114 115 if (CurStreamType != LLVMIRBitstream) 116 return std::nullopt; 117 118 #define STRINGIFY_CODE(PREFIX, CODE) \ 119 case bitc::PREFIX##_##CODE: \ 120 return #CODE; 121 switch (BlockID) { 122 default: 123 return std::nullopt; 124 case bitc::MODULE_BLOCK_ID: 125 switch (CodeID) { 126 default: 127 return std::nullopt; 128 STRINGIFY_CODE(MODULE_CODE, VERSION) 129 STRINGIFY_CODE(MODULE_CODE, TRIPLE) 130 STRINGIFY_CODE(MODULE_CODE, DATALAYOUT) 131 STRINGIFY_CODE(MODULE_CODE, ASM) 132 STRINGIFY_CODE(MODULE_CODE, SECTIONNAME) 133 STRINGIFY_CODE(MODULE_CODE, DEPLIB) // Deprecated, present in old bitcode 134 STRINGIFY_CODE(MODULE_CODE, GLOBALVAR) 135 STRINGIFY_CODE(MODULE_CODE, FUNCTION) 136 STRINGIFY_CODE(MODULE_CODE, ALIAS) 137 STRINGIFY_CODE(MODULE_CODE, GCNAME) 138 STRINGIFY_CODE(MODULE_CODE, COMDAT) 139 STRINGIFY_CODE(MODULE_CODE, VSTOFFSET) 140 STRINGIFY_CODE(MODULE_CODE, METADATA_VALUES_UNUSED) 141 STRINGIFY_CODE(MODULE_CODE, SOURCE_FILENAME) 142 STRINGIFY_CODE(MODULE_CODE, HASH) 143 } 144 case bitc::IDENTIFICATION_BLOCK_ID: 145 switch (CodeID) { 146 default: 147 return std::nullopt; 148 STRINGIFY_CODE(IDENTIFICATION_CODE, STRING) 149 STRINGIFY_CODE(IDENTIFICATION_CODE, EPOCH) 150 } 151 case bitc::PARAMATTR_BLOCK_ID: 152 switch (CodeID) { 153 default: 154 return std::nullopt; 155 // FIXME: Should these be different? 156 case bitc::PARAMATTR_CODE_ENTRY_OLD: 157 return "ENTRY"; 158 case bitc::PARAMATTR_CODE_ENTRY: 159 return "ENTRY"; 160 } 161 case bitc::PARAMATTR_GROUP_BLOCK_ID: 162 switch (CodeID) { 163 default: 164 return std::nullopt; 165 case bitc::PARAMATTR_GRP_CODE_ENTRY: 166 return "ENTRY"; 167 } 168 case bitc::TYPE_BLOCK_ID_NEW: 169 switch (CodeID) { 170 default: 171 return std::nullopt; 172 STRINGIFY_CODE(TYPE_CODE, NUMENTRY) 173 STRINGIFY_CODE(TYPE_CODE, VOID) 174 STRINGIFY_CODE(TYPE_CODE, FLOAT) 175 STRINGIFY_CODE(TYPE_CODE, DOUBLE) 176 STRINGIFY_CODE(TYPE_CODE, LABEL) 177 STRINGIFY_CODE(TYPE_CODE, OPAQUE) 178 STRINGIFY_CODE(TYPE_CODE, INTEGER) 179 STRINGIFY_CODE(TYPE_CODE, POINTER) 180 STRINGIFY_CODE(TYPE_CODE, HALF) 181 STRINGIFY_CODE(TYPE_CODE, ARRAY) 182 STRINGIFY_CODE(TYPE_CODE, VECTOR) 183 STRINGIFY_CODE(TYPE_CODE, X86_FP80) 184 STRINGIFY_CODE(TYPE_CODE, FP128) 185 STRINGIFY_CODE(TYPE_CODE, PPC_FP128) 186 STRINGIFY_CODE(TYPE_CODE, METADATA) 187 STRINGIFY_CODE(TYPE_CODE, X86_MMX) 188 STRINGIFY_CODE(TYPE_CODE, STRUCT_ANON) 189 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAME) 190 STRINGIFY_CODE(TYPE_CODE, STRUCT_NAMED) 191 STRINGIFY_CODE(TYPE_CODE, FUNCTION) 192 STRINGIFY_CODE(TYPE_CODE, TOKEN) 193 STRINGIFY_CODE(TYPE_CODE, BFLOAT) 194 } 195 196 case bitc::CONSTANTS_BLOCK_ID: 197 switch (CodeID) { 198 default: 199 return std::nullopt; 200 STRINGIFY_CODE(CST_CODE, SETTYPE) 201 STRINGIFY_CODE(CST_CODE, NULL) 202 STRINGIFY_CODE(CST_CODE, UNDEF) 203 STRINGIFY_CODE(CST_CODE, INTEGER) 204 STRINGIFY_CODE(CST_CODE, WIDE_INTEGER) 205 STRINGIFY_CODE(CST_CODE, FLOAT) 206 STRINGIFY_CODE(CST_CODE, AGGREGATE) 207 STRINGIFY_CODE(CST_CODE, STRING) 208 STRINGIFY_CODE(CST_CODE, CSTRING) 209 STRINGIFY_CODE(CST_CODE, CE_BINOP) 210 STRINGIFY_CODE(CST_CODE, CE_CAST) 211 STRINGIFY_CODE(CST_CODE, CE_GEP) 212 STRINGIFY_CODE(CST_CODE, CE_INBOUNDS_GEP) 213 STRINGIFY_CODE(CST_CODE, CE_SELECT) 214 STRINGIFY_CODE(CST_CODE, CE_EXTRACTELT) 215 STRINGIFY_CODE(CST_CODE, CE_INSERTELT) 216 STRINGIFY_CODE(CST_CODE, CE_SHUFFLEVEC) 217 STRINGIFY_CODE(CST_CODE, CE_CMP) 218 STRINGIFY_CODE(CST_CODE, INLINEASM) 219 STRINGIFY_CODE(CST_CODE, CE_SHUFVEC_EX) 220 STRINGIFY_CODE(CST_CODE, CE_UNOP) 221 STRINGIFY_CODE(CST_CODE, DSO_LOCAL_EQUIVALENT) 222 STRINGIFY_CODE(CST_CODE, NO_CFI_VALUE) 223 case bitc::CST_CODE_BLOCKADDRESS: 224 return "CST_CODE_BLOCKADDRESS"; 225 STRINGIFY_CODE(CST_CODE, DATA) 226 } 227 case bitc::FUNCTION_BLOCK_ID: 228 switch (CodeID) { 229 default: 230 return std::nullopt; 231 STRINGIFY_CODE(FUNC_CODE, DECLAREBLOCKS) 232 STRINGIFY_CODE(FUNC_CODE, INST_BINOP) 233 STRINGIFY_CODE(FUNC_CODE, INST_CAST) 234 STRINGIFY_CODE(FUNC_CODE, INST_GEP_OLD) 235 STRINGIFY_CODE(FUNC_CODE, INST_INBOUNDS_GEP_OLD) 236 STRINGIFY_CODE(FUNC_CODE, INST_SELECT) 237 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTELT) 238 STRINGIFY_CODE(FUNC_CODE, INST_INSERTELT) 239 STRINGIFY_CODE(FUNC_CODE, INST_SHUFFLEVEC) 240 STRINGIFY_CODE(FUNC_CODE, INST_CMP) 241 STRINGIFY_CODE(FUNC_CODE, INST_RET) 242 STRINGIFY_CODE(FUNC_CODE, INST_BR) 243 STRINGIFY_CODE(FUNC_CODE, INST_SWITCH) 244 STRINGIFY_CODE(FUNC_CODE, INST_INVOKE) 245 STRINGIFY_CODE(FUNC_CODE, INST_UNOP) 246 STRINGIFY_CODE(FUNC_CODE, INST_UNREACHABLE) 247 STRINGIFY_CODE(FUNC_CODE, INST_CLEANUPRET) 248 STRINGIFY_CODE(FUNC_CODE, INST_CATCHRET) 249 STRINGIFY_CODE(FUNC_CODE, INST_CATCHPAD) 250 STRINGIFY_CODE(FUNC_CODE, INST_PHI) 251 STRINGIFY_CODE(FUNC_CODE, INST_ALLOCA) 252 STRINGIFY_CODE(FUNC_CODE, INST_LOAD) 253 STRINGIFY_CODE(FUNC_CODE, INST_VAARG) 254 STRINGIFY_CODE(FUNC_CODE, INST_STORE) 255 STRINGIFY_CODE(FUNC_CODE, INST_EXTRACTVAL) 256 STRINGIFY_CODE(FUNC_CODE, INST_INSERTVAL) 257 STRINGIFY_CODE(FUNC_CODE, INST_CMP2) 258 STRINGIFY_CODE(FUNC_CODE, INST_VSELECT) 259 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC_AGAIN) 260 STRINGIFY_CODE(FUNC_CODE, INST_CALL) 261 STRINGIFY_CODE(FUNC_CODE, DEBUG_LOC) 262 STRINGIFY_CODE(FUNC_CODE, INST_GEP) 263 STRINGIFY_CODE(FUNC_CODE, OPERAND_BUNDLE) 264 STRINGIFY_CODE(FUNC_CODE, INST_FENCE) 265 STRINGIFY_CODE(FUNC_CODE, INST_ATOMICRMW) 266 STRINGIFY_CODE(FUNC_CODE, INST_LOADATOMIC) 267 STRINGIFY_CODE(FUNC_CODE, INST_STOREATOMIC) 268 STRINGIFY_CODE(FUNC_CODE, INST_CMPXCHG) 269 STRINGIFY_CODE(FUNC_CODE, INST_CALLBR) 270 STRINGIFY_CODE(FUNC_CODE, BLOCKADDR_USERS) 271 } 272 case bitc::VALUE_SYMTAB_BLOCK_ID: 273 switch (CodeID) { 274 default: 275 return std::nullopt; 276 STRINGIFY_CODE(VST_CODE, ENTRY) 277 STRINGIFY_CODE(VST_CODE, BBENTRY) 278 STRINGIFY_CODE(VST_CODE, FNENTRY) 279 STRINGIFY_CODE(VST_CODE, COMBINED_ENTRY) 280 } 281 case bitc::MODULE_STRTAB_BLOCK_ID: 282 switch (CodeID) { 283 default: 284 return std::nullopt; 285 STRINGIFY_CODE(MST_CODE, ENTRY) 286 STRINGIFY_CODE(MST_CODE, HASH) 287 } 288 case bitc::GLOBALVAL_SUMMARY_BLOCK_ID: 289 case bitc::FULL_LTO_GLOBALVAL_SUMMARY_BLOCK_ID: 290 switch (CodeID) { 291 default: 292 return std::nullopt; 293 STRINGIFY_CODE(FS, PERMODULE) 294 STRINGIFY_CODE(FS, PERMODULE_PROFILE) 295 STRINGIFY_CODE(FS, PERMODULE_RELBF) 296 STRINGIFY_CODE(FS, PERMODULE_GLOBALVAR_INIT_REFS) 297 STRINGIFY_CODE(FS, PERMODULE_VTABLE_GLOBALVAR_INIT_REFS) 298 STRINGIFY_CODE(FS, COMBINED) 299 STRINGIFY_CODE(FS, COMBINED_PROFILE) 300 STRINGIFY_CODE(FS, COMBINED_GLOBALVAR_INIT_REFS) 301 STRINGIFY_CODE(FS, ALIAS) 302 STRINGIFY_CODE(FS, COMBINED_ALIAS) 303 STRINGIFY_CODE(FS, COMBINED_ORIGINAL_NAME) 304 STRINGIFY_CODE(FS, VERSION) 305 STRINGIFY_CODE(FS, FLAGS) 306 STRINGIFY_CODE(FS, TYPE_TESTS) 307 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_VCALLS) 308 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_VCALLS) 309 STRINGIFY_CODE(FS, TYPE_TEST_ASSUME_CONST_VCALL) 310 STRINGIFY_CODE(FS, TYPE_CHECKED_LOAD_CONST_VCALL) 311 STRINGIFY_CODE(FS, VALUE_GUID) 312 STRINGIFY_CODE(FS, CFI_FUNCTION_DEFS) 313 STRINGIFY_CODE(FS, CFI_FUNCTION_DECLS) 314 STRINGIFY_CODE(FS, TYPE_ID) 315 STRINGIFY_CODE(FS, TYPE_ID_METADATA) 316 STRINGIFY_CODE(FS, BLOCK_COUNT) 317 STRINGIFY_CODE(FS, PARAM_ACCESS) 318 STRINGIFY_CODE(FS, PERMODULE_CALLSITE_INFO) 319 STRINGIFY_CODE(FS, PERMODULE_ALLOC_INFO) 320 STRINGIFY_CODE(FS, COMBINED_CALLSITE_INFO) 321 STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO) 322 STRINGIFY_CODE(FS, STACK_IDS) 323 } 324 case bitc::METADATA_ATTACHMENT_ID: 325 switch (CodeID) { 326 default: 327 return std::nullopt; 328 STRINGIFY_CODE(METADATA, ATTACHMENT) 329 } 330 case bitc::METADATA_BLOCK_ID: 331 switch (CodeID) { 332 default: 333 return std::nullopt; 334 STRINGIFY_CODE(METADATA, STRING_OLD) 335 STRINGIFY_CODE(METADATA, VALUE) 336 STRINGIFY_CODE(METADATA, NODE) 337 STRINGIFY_CODE(METADATA, NAME) 338 STRINGIFY_CODE(METADATA, DISTINCT_NODE) 339 STRINGIFY_CODE(METADATA, KIND) // Older bitcode has it in a MODULE_BLOCK 340 STRINGIFY_CODE(METADATA, LOCATION) 341 STRINGIFY_CODE(METADATA, OLD_NODE) 342 STRINGIFY_CODE(METADATA, OLD_FN_NODE) 343 STRINGIFY_CODE(METADATA, NAMED_NODE) 344 STRINGIFY_CODE(METADATA, GENERIC_DEBUG) 345 STRINGIFY_CODE(METADATA, SUBRANGE) 346 STRINGIFY_CODE(METADATA, ENUMERATOR) 347 STRINGIFY_CODE(METADATA, BASIC_TYPE) 348 STRINGIFY_CODE(METADATA, FILE) 349 STRINGIFY_CODE(METADATA, DERIVED_TYPE) 350 STRINGIFY_CODE(METADATA, COMPOSITE_TYPE) 351 STRINGIFY_CODE(METADATA, SUBROUTINE_TYPE) 352 STRINGIFY_CODE(METADATA, COMPILE_UNIT) 353 STRINGIFY_CODE(METADATA, SUBPROGRAM) 354 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK) 355 STRINGIFY_CODE(METADATA, LEXICAL_BLOCK_FILE) 356 STRINGIFY_CODE(METADATA, NAMESPACE) 357 STRINGIFY_CODE(METADATA, TEMPLATE_TYPE) 358 STRINGIFY_CODE(METADATA, TEMPLATE_VALUE) 359 STRINGIFY_CODE(METADATA, GLOBAL_VAR) 360 STRINGIFY_CODE(METADATA, LOCAL_VAR) 361 STRINGIFY_CODE(METADATA, EXPRESSION) 362 STRINGIFY_CODE(METADATA, OBJC_PROPERTY) 363 STRINGIFY_CODE(METADATA, IMPORTED_ENTITY) 364 STRINGIFY_CODE(METADATA, MODULE) 365 STRINGIFY_CODE(METADATA, MACRO) 366 STRINGIFY_CODE(METADATA, MACRO_FILE) 367 STRINGIFY_CODE(METADATA, STRINGS) 368 STRINGIFY_CODE(METADATA, GLOBAL_DECL_ATTACHMENT) 369 STRINGIFY_CODE(METADATA, GLOBAL_VAR_EXPR) 370 STRINGIFY_CODE(METADATA, INDEX_OFFSET) 371 STRINGIFY_CODE(METADATA, INDEX) 372 STRINGIFY_CODE(METADATA, ARG_LIST) 373 } 374 case bitc::METADATA_KIND_BLOCK_ID: 375 switch (CodeID) { 376 default: 377 return std::nullopt; 378 STRINGIFY_CODE(METADATA, KIND) 379 } 380 case bitc::USELIST_BLOCK_ID: 381 switch (CodeID) { 382 default: 383 return std::nullopt; 384 case bitc::USELIST_CODE_DEFAULT: 385 return "USELIST_CODE_DEFAULT"; 386 case bitc::USELIST_CODE_BB: 387 return "USELIST_CODE_BB"; 388 } 389 390 case bitc::OPERAND_BUNDLE_TAGS_BLOCK_ID: 391 switch (CodeID) { 392 default: 393 return std::nullopt; 394 case bitc::OPERAND_BUNDLE_TAG: 395 return "OPERAND_BUNDLE_TAG"; 396 } 397 case bitc::STRTAB_BLOCK_ID: 398 switch (CodeID) { 399 default: 400 return std::nullopt; 401 case bitc::STRTAB_BLOB: 402 return "BLOB"; 403 } 404 case bitc::SYMTAB_BLOCK_ID: 405 switch (CodeID) { 406 default: 407 return std::nullopt; 408 case bitc::SYMTAB_BLOB: 409 return "BLOB"; 410 } 411 } 412 #undef STRINGIFY_CODE 413 } 414 415 static void printSize(raw_ostream &OS, double Bits) { 416 OS << format("%.2f/%.2fB/%luW", Bits, Bits / 8, (unsigned long)(Bits / 32)); 417 } 418 static void printSize(raw_ostream &OS, uint64_t Bits) { 419 OS << format("%lub/%.2fB/%luW", (unsigned long)Bits, (double)Bits / 8, 420 (unsigned long)(Bits / 32)); 421 } 422 423 static Expected<CurStreamTypeType> ReadSignature(BitstreamCursor &Stream) { 424 auto tryRead = [&Stream](char &Dest, size_t size) -> Error { 425 if (Expected<SimpleBitstreamCursor::word_t> MaybeWord = Stream.Read(size)) 426 Dest = MaybeWord.get(); 427 else 428 return MaybeWord.takeError(); 429 return Error::success(); 430 }; 431 432 char Signature[6]; 433 if (Error Err = tryRead(Signature[0], 8)) 434 return std::move(Err); 435 if (Error Err = tryRead(Signature[1], 8)) 436 return std::move(Err); 437 438 // Autodetect the file contents, if it is one we know. 439 if (Signature[0] == 'C' && Signature[1] == 'P') { 440 if (Error Err = tryRead(Signature[2], 8)) 441 return std::move(Err); 442 if (Error Err = tryRead(Signature[3], 8)) 443 return std::move(Err); 444 if (Signature[2] == 'C' && Signature[3] == 'H') 445 return ClangSerializedASTBitstream; 446 } else if (Signature[0] == 'D' && Signature[1] == 'I') { 447 if (Error Err = tryRead(Signature[2], 8)) 448 return std::move(Err); 449 if (Error Err = tryRead(Signature[3], 8)) 450 return std::move(Err); 451 if (Signature[2] == 'A' && Signature[3] == 'G') 452 return ClangSerializedDiagnosticsBitstream; 453 } else if (Signature[0] == 'R' && Signature[1] == 'M') { 454 if (Error Err = tryRead(Signature[2], 8)) 455 return std::move(Err); 456 if (Error Err = tryRead(Signature[3], 8)) 457 return std::move(Err); 458 if (Signature[2] == 'R' && Signature[3] == 'K') 459 return LLVMBitstreamRemarks; 460 } else { 461 if (Error Err = tryRead(Signature[2], 4)) 462 return std::move(Err); 463 if (Error Err = tryRead(Signature[3], 4)) 464 return std::move(Err); 465 if (Error Err = tryRead(Signature[4], 4)) 466 return std::move(Err); 467 if (Error Err = tryRead(Signature[5], 4)) 468 return std::move(Err); 469 if (Signature[0] == 'B' && Signature[1] == 'C' && Signature[2] == 0x0 && 470 Signature[3] == 0xC && Signature[4] == 0xE && Signature[5] == 0xD) 471 return LLVMIRBitstream; 472 } 473 return UnknownBitstream; 474 } 475 476 static Expected<CurStreamTypeType> analyzeHeader(Optional<BCDumpOptions> O, 477 BitstreamCursor &Stream) { 478 ArrayRef<uint8_t> Bytes = Stream.getBitcodeBytes(); 479 const unsigned char *BufPtr = (const unsigned char *)Bytes.data(); 480 const unsigned char *EndBufPtr = BufPtr + Bytes.size(); 481 482 // If we have a wrapper header, parse it and ignore the non-bc file 483 // contents. The magic number is 0x0B17C0DE stored in little endian. 484 if (isBitcodeWrapper(BufPtr, EndBufPtr)) { 485 if (Bytes.size() < BWH_HeaderSize) 486 return reportError("Invalid bitcode wrapper header"); 487 488 if (O) { 489 unsigned Magic = support::endian::read32le(&BufPtr[BWH_MagicField]); 490 unsigned Version = support::endian::read32le(&BufPtr[BWH_VersionField]); 491 unsigned Offset = support::endian::read32le(&BufPtr[BWH_OffsetField]); 492 unsigned Size = support::endian::read32le(&BufPtr[BWH_SizeField]); 493 unsigned CPUType = support::endian::read32le(&BufPtr[BWH_CPUTypeField]); 494 495 O->OS << "<BITCODE_WRAPPER_HEADER" 496 << " Magic=" << format_hex(Magic, 10) 497 << " Version=" << format_hex(Version, 10) 498 << " Offset=" << format_hex(Offset, 10) 499 << " Size=" << format_hex(Size, 10) 500 << " CPUType=" << format_hex(CPUType, 10) << "/>\n"; 501 } 502 503 if (SkipBitcodeWrapperHeader(BufPtr, EndBufPtr, true)) 504 return reportError("Invalid bitcode wrapper header"); 505 } 506 507 // Use the cursor modified by skipping the wrapper header. 508 Stream = BitstreamCursor(ArrayRef<uint8_t>(BufPtr, EndBufPtr)); 509 510 return ReadSignature(Stream); 511 } 512 513 static bool canDecodeBlob(unsigned Code, unsigned BlockID) { 514 return BlockID == bitc::METADATA_BLOCK_ID && Code == bitc::METADATA_STRINGS; 515 } 516 517 Error BitcodeAnalyzer::decodeMetadataStringsBlob(StringRef Indent, 518 ArrayRef<uint64_t> Record, 519 StringRef Blob, 520 raw_ostream &OS) { 521 if (Blob.empty()) 522 return reportError("Cannot decode empty blob."); 523 524 if (Record.size() != 2) 525 return reportError( 526 "Decoding metadata strings blob needs two record entries."); 527 528 unsigned NumStrings = Record[0]; 529 unsigned StringsOffset = Record[1]; 530 OS << " num-strings = " << NumStrings << " {\n"; 531 532 StringRef Lengths = Blob.slice(0, StringsOffset); 533 SimpleBitstreamCursor R(Lengths); 534 StringRef Strings = Blob.drop_front(StringsOffset); 535 do { 536 if (R.AtEndOfStream()) 537 return reportError("bad length"); 538 539 uint32_t Size; 540 if (Error E = R.ReadVBR(6).moveInto(Size)) 541 return E; 542 if (Strings.size() < Size) 543 return reportError("truncated chars"); 544 545 OS << Indent << " '"; 546 OS.write_escaped(Strings.slice(0, Size), /*hex=*/true); 547 OS << "'\n"; 548 Strings = Strings.drop_front(Size); 549 } while (--NumStrings); 550 551 OS << Indent << " }"; 552 return Error::success(); 553 } 554 555 BitcodeAnalyzer::BitcodeAnalyzer(StringRef Buffer, 556 Optional<StringRef> BlockInfoBuffer) 557 : Stream(Buffer) { 558 if (BlockInfoBuffer) 559 BlockInfoStream.emplace(*BlockInfoBuffer); 560 } 561 562 Error BitcodeAnalyzer::analyze(Optional<BCDumpOptions> O, 563 Optional<StringRef> CheckHash) { 564 if (Error E = analyzeHeader(O, Stream).moveInto(CurStreamType)) 565 return E; 566 567 Stream.setBlockInfo(&BlockInfo); 568 569 // Read block info from BlockInfoStream, if specified. 570 // The block info must be a top-level block. 571 if (BlockInfoStream) { 572 BitstreamCursor BlockInfoCursor(*BlockInfoStream); 573 if (Error E = analyzeHeader(O, BlockInfoCursor).takeError()) 574 return E; 575 576 while (!BlockInfoCursor.AtEndOfStream()) { 577 Expected<unsigned> MaybeCode = BlockInfoCursor.ReadCode(); 578 if (!MaybeCode) 579 return MaybeCode.takeError(); 580 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 581 return reportError("Invalid record at top-level in block info file"); 582 583 Expected<unsigned> MaybeBlockID = BlockInfoCursor.ReadSubBlockID(); 584 if (!MaybeBlockID) 585 return MaybeBlockID.takeError(); 586 if (MaybeBlockID.get() == bitc::BLOCKINFO_BLOCK_ID) { 587 Optional<BitstreamBlockInfo> NewBlockInfo; 588 if (Error E = 589 BlockInfoCursor.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true) 590 .moveInto(NewBlockInfo)) 591 return E; 592 if (!NewBlockInfo) 593 return reportError("Malformed BlockInfoBlock in block info file"); 594 BlockInfo = std::move(*NewBlockInfo); 595 break; 596 } 597 598 if (Error Err = BlockInfoCursor.SkipBlock()) 599 return Err; 600 } 601 } 602 603 // Parse the top-level structure. We only allow blocks at the top-level. 604 while (!Stream.AtEndOfStream()) { 605 Expected<unsigned> MaybeCode = Stream.ReadCode(); 606 if (!MaybeCode) 607 return MaybeCode.takeError(); 608 if (MaybeCode.get() != bitc::ENTER_SUBBLOCK) 609 return reportError("Invalid record at top-level"); 610 611 Expected<unsigned> MaybeBlockID = Stream.ReadSubBlockID(); 612 if (!MaybeBlockID) 613 return MaybeBlockID.takeError(); 614 615 if (Error E = parseBlock(MaybeBlockID.get(), 0, O, CheckHash)) 616 return E; 617 ++NumTopBlocks; 618 } 619 620 return Error::success(); 621 } 622 623 void BitcodeAnalyzer::printStats(BCDumpOptions O, 624 Optional<StringRef> Filename) { 625 uint64_t BufferSizeBits = Stream.getBitcodeBytes().size() * CHAR_BIT; 626 // Print a summary of the read file. 627 O.OS << "Summary "; 628 if (Filename) 629 O.OS << "of " << Filename->data() << ":\n"; 630 O.OS << " Total size: "; 631 printSize(O.OS, BufferSizeBits); 632 O.OS << "\n"; 633 O.OS << " Stream type: "; 634 switch (CurStreamType) { 635 case UnknownBitstream: 636 O.OS << "unknown\n"; 637 break; 638 case LLVMIRBitstream: 639 O.OS << "LLVM IR\n"; 640 break; 641 case ClangSerializedASTBitstream: 642 O.OS << "Clang Serialized AST\n"; 643 break; 644 case ClangSerializedDiagnosticsBitstream: 645 O.OS << "Clang Serialized Diagnostics\n"; 646 break; 647 case LLVMBitstreamRemarks: 648 O.OS << "LLVM Remarks\n"; 649 break; 650 } 651 O.OS << " # Toplevel Blocks: " << NumTopBlocks << "\n"; 652 O.OS << "\n"; 653 654 // Emit per-block stats. 655 O.OS << "Per-block Summary:\n"; 656 for (const auto &Stat : BlockIDStats) { 657 O.OS << " Block ID #" << Stat.first; 658 if (Optional<const char *> BlockName = 659 GetBlockName(Stat.first, BlockInfo, CurStreamType)) 660 O.OS << " (" << *BlockName << ")"; 661 O.OS << ":\n"; 662 663 const PerBlockIDStats &Stats = Stat.second; 664 O.OS << " Num Instances: " << Stats.NumInstances << "\n"; 665 O.OS << " Total Size: "; 666 printSize(O.OS, Stats.NumBits); 667 O.OS << "\n"; 668 double pct = (Stats.NumBits * 100.0) / BufferSizeBits; 669 O.OS << " Percent of file: " << format("%2.4f%%", pct) << "\n"; 670 if (Stats.NumInstances > 1) { 671 O.OS << " Average Size: "; 672 printSize(O.OS, Stats.NumBits / (double)Stats.NumInstances); 673 O.OS << "\n"; 674 O.OS << " Tot/Avg SubBlocks: " << Stats.NumSubBlocks << "/" 675 << Stats.NumSubBlocks / (double)Stats.NumInstances << "\n"; 676 O.OS << " Tot/Avg Abbrevs: " << Stats.NumAbbrevs << "/" 677 << Stats.NumAbbrevs / (double)Stats.NumInstances << "\n"; 678 O.OS << " Tot/Avg Records: " << Stats.NumRecords << "/" 679 << Stats.NumRecords / (double)Stats.NumInstances << "\n"; 680 } else { 681 O.OS << " Num SubBlocks: " << Stats.NumSubBlocks << "\n"; 682 O.OS << " Num Abbrevs: " << Stats.NumAbbrevs << "\n"; 683 O.OS << " Num Records: " << Stats.NumRecords << "\n"; 684 } 685 if (Stats.NumRecords) { 686 double pct = (Stats.NumAbbreviatedRecords * 100.0) / Stats.NumRecords; 687 O.OS << " Percent Abbrevs: " << format("%2.4f%%", pct) << "\n"; 688 } 689 O.OS << "\n"; 690 691 // Print a histogram of the codes we see. 692 if (O.Histogram && !Stats.CodeFreq.empty()) { 693 std::vector<std::pair<unsigned, unsigned>> FreqPairs; // <freq,code> 694 for (unsigned i = 0, e = Stats.CodeFreq.size(); i != e; ++i) 695 if (unsigned Freq = Stats.CodeFreq[i].NumInstances) 696 FreqPairs.push_back(std::make_pair(Freq, i)); 697 llvm::stable_sort(FreqPairs); 698 std::reverse(FreqPairs.begin(), FreqPairs.end()); 699 700 O.OS << "\tRecord Histogram:\n"; 701 O.OS << "\t\t Count # Bits b/Rec % Abv Record Kind\n"; 702 for (const auto &FreqPair : FreqPairs) { 703 const PerRecordStats &RecStats = Stats.CodeFreq[FreqPair.second]; 704 705 O.OS << format("\t\t%7d %9lu", RecStats.NumInstances, 706 (unsigned long)RecStats.TotalBits); 707 708 if (RecStats.NumInstances > 1) 709 O.OS << format(" %9.1f", 710 (double)RecStats.TotalBits / RecStats.NumInstances); 711 else 712 O.OS << " "; 713 714 if (RecStats.NumAbbrev) 715 O.OS << format(" %7.2f", (double)RecStats.NumAbbrev / 716 RecStats.NumInstances * 100); 717 else 718 O.OS << " "; 719 720 O.OS << " "; 721 if (Optional<const char *> CodeName = GetCodeName( 722 FreqPair.second, Stat.first, BlockInfo, CurStreamType)) 723 O.OS << *CodeName << "\n"; 724 else 725 O.OS << "UnknownCode" << FreqPair.second << "\n"; 726 } 727 O.OS << "\n"; 728 } 729 } 730 } 731 732 Error BitcodeAnalyzer::parseBlock(unsigned BlockID, unsigned IndentLevel, 733 Optional<BCDumpOptions> O, 734 Optional<StringRef> CheckHash) { 735 std::string Indent(IndentLevel * 2, ' '); 736 uint64_t BlockBitStart = Stream.GetCurrentBitNo(); 737 738 // Get the statistics for this BlockID. 739 PerBlockIDStats &BlockStats = BlockIDStats[BlockID]; 740 741 BlockStats.NumInstances++; 742 743 // BLOCKINFO is a special part of the stream. 744 bool DumpRecords = O.has_value(); 745 if (BlockID == bitc::BLOCKINFO_BLOCK_ID) { 746 if (O && !O->DumpBlockinfo) 747 O->OS << Indent << "<BLOCKINFO_BLOCK/>\n"; 748 Optional<BitstreamBlockInfo> NewBlockInfo; 749 if (Error E = Stream.ReadBlockInfoBlock(/*ReadBlockInfoNames=*/true) 750 .moveInto(NewBlockInfo)) 751 return E; 752 if (!NewBlockInfo) 753 return reportError("Malformed BlockInfoBlock"); 754 BlockInfo = std::move(*NewBlockInfo); 755 if (Error Err = Stream.JumpToBit(BlockBitStart)) 756 return Err; 757 // It's not really interesting to dump the contents of the blockinfo 758 // block, so only do it if the user explicitly requests it. 759 DumpRecords = O && O->DumpBlockinfo; 760 } 761 762 unsigned NumWords = 0; 763 if (Error Err = Stream.EnterSubBlock(BlockID, &NumWords)) 764 return Err; 765 766 // Keep it for later, when we see a MODULE_HASH record 767 uint64_t BlockEntryPos = Stream.getCurrentByteNo(); 768 769 Optional<const char *> BlockName; 770 if (DumpRecords) { 771 O->OS << Indent << "<"; 772 if ((BlockName = GetBlockName(BlockID, BlockInfo, CurStreamType))) 773 O->OS << *BlockName; 774 else 775 O->OS << "UnknownBlock" << BlockID; 776 777 if (!O->Symbolic && BlockName) 778 O->OS << " BlockID=" << BlockID; 779 780 O->OS << " NumWords=" << NumWords 781 << " BlockCodeSize=" << Stream.getAbbrevIDWidth() << ">\n"; 782 } 783 784 SmallVector<uint64_t, 64> Record; 785 786 // Keep the offset to the metadata index if seen. 787 uint64_t MetadataIndexOffset = 0; 788 789 // Read all the records for this block. 790 while (true) { 791 if (Stream.AtEndOfStream()) 792 return reportError("Premature end of bitstream"); 793 794 uint64_t RecordStartBit = Stream.GetCurrentBitNo(); 795 796 BitstreamEntry Entry; 797 if (Error E = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs) 798 .moveInto(Entry)) 799 return E; 800 801 switch (Entry.Kind) { 802 case BitstreamEntry::Error: 803 return reportError("malformed bitcode file"); 804 case BitstreamEntry::EndBlock: { 805 uint64_t BlockBitEnd = Stream.GetCurrentBitNo(); 806 BlockStats.NumBits += BlockBitEnd - BlockBitStart; 807 if (DumpRecords) { 808 O->OS << Indent << "</"; 809 if (BlockName) 810 O->OS << *BlockName << ">\n"; 811 else 812 O->OS << "UnknownBlock" << BlockID << ">\n"; 813 } 814 return Error::success(); 815 } 816 817 case BitstreamEntry::SubBlock: { 818 uint64_t SubBlockBitStart = Stream.GetCurrentBitNo(); 819 if (Error E = parseBlock(Entry.ID, IndentLevel + 1, O, CheckHash)) 820 return E; 821 ++BlockStats.NumSubBlocks; 822 uint64_t SubBlockBitEnd = Stream.GetCurrentBitNo(); 823 824 // Don't include subblock sizes in the size of this block. 825 BlockBitStart += SubBlockBitEnd - SubBlockBitStart; 826 continue; 827 } 828 case BitstreamEntry::Record: 829 // The interesting case. 830 break; 831 } 832 833 if (Entry.ID == bitc::DEFINE_ABBREV) { 834 if (Error Err = Stream.ReadAbbrevRecord()) 835 return Err; 836 ++BlockStats.NumAbbrevs; 837 continue; 838 } 839 840 Record.clear(); 841 842 ++BlockStats.NumRecords; 843 844 StringRef Blob; 845 uint64_t CurrentRecordPos = Stream.GetCurrentBitNo(); 846 unsigned Code; 847 if (Error E = Stream.readRecord(Entry.ID, Record, &Blob).moveInto(Code)) 848 return E; 849 850 // Increment the # occurrences of this code. 851 if (BlockStats.CodeFreq.size() <= Code) 852 BlockStats.CodeFreq.resize(Code + 1); 853 BlockStats.CodeFreq[Code].NumInstances++; 854 BlockStats.CodeFreq[Code].TotalBits += 855 Stream.GetCurrentBitNo() - RecordStartBit; 856 if (Entry.ID != bitc::UNABBREV_RECORD) { 857 BlockStats.CodeFreq[Code].NumAbbrev++; 858 ++BlockStats.NumAbbreviatedRecords; 859 } 860 861 if (DumpRecords) { 862 O->OS << Indent << " <"; 863 Optional<const char *> CodeName = 864 GetCodeName(Code, BlockID, BlockInfo, CurStreamType); 865 if (CodeName) 866 O->OS << *CodeName; 867 else 868 O->OS << "UnknownCode" << Code; 869 if (!O->Symbolic && CodeName) 870 O->OS << " codeid=" << Code; 871 const BitCodeAbbrev *Abbv = nullptr; 872 if (Entry.ID != bitc::UNABBREV_RECORD) { 873 Expected<const BitCodeAbbrev *> MaybeAbbv = Stream.getAbbrev(Entry.ID); 874 if (!MaybeAbbv) 875 return MaybeAbbv.takeError(); 876 Abbv = MaybeAbbv.get(); 877 O->OS << " abbrevid=" << Entry.ID; 878 } 879 880 for (unsigned i = 0, e = Record.size(); i != e; ++i) 881 O->OS << " op" << i << "=" << (int64_t)Record[i]; 882 883 // If we found a metadata index, let's verify that we had an offset 884 // before and validate its forward reference offset was correct! 885 if (BlockID == bitc::METADATA_BLOCK_ID) { 886 if (Code == bitc::METADATA_INDEX_OFFSET) { 887 if (Record.size() != 2) 888 O->OS << "(Invalid record)"; 889 else { 890 auto Offset = Record[0] + (Record[1] << 32); 891 MetadataIndexOffset = Stream.GetCurrentBitNo() + Offset; 892 } 893 } 894 if (Code == bitc::METADATA_INDEX) { 895 O->OS << " (offset "; 896 if (MetadataIndexOffset == RecordStartBit) 897 O->OS << "match)"; 898 else 899 O->OS << "mismatch: " << MetadataIndexOffset << " vs " 900 << RecordStartBit << ")"; 901 } 902 } 903 904 // If we found a module hash, let's verify that it matches! 905 if (BlockID == bitc::MODULE_BLOCK_ID && Code == bitc::MODULE_CODE_HASH && 906 CheckHash) { 907 if (Record.size() != 5) 908 O->OS << " (invalid)"; 909 else { 910 // Recompute the hash and compare it to the one in the bitcode 911 SHA1 Hasher; 912 std::array<uint8_t, 20> Hash; 913 Hasher.update(*CheckHash); 914 { 915 int BlockSize = (CurrentRecordPos / 8) - BlockEntryPos; 916 auto Ptr = Stream.getPointerToByte(BlockEntryPos, BlockSize); 917 Hasher.update(ArrayRef<uint8_t>(Ptr, BlockSize)); 918 Hash = Hasher.result(); 919 } 920 std::array<uint8_t, 20> RecordedHash; 921 int Pos = 0; 922 for (auto &Val : Record) { 923 assert(!(Val >> 32) && "Unexpected high bits set"); 924 support::endian::write32be(&RecordedHash[Pos], Val); 925 Pos += 4; 926 } 927 if (Hash == RecordedHash) 928 O->OS << " (match)"; 929 else 930 O->OS << " (!mismatch!)"; 931 } 932 } 933 934 O->OS << "/>"; 935 936 if (Abbv) { 937 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 938 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 939 if (!Op.isEncoding() || Op.getEncoding() != BitCodeAbbrevOp::Array) 940 continue; 941 assert(i + 2 == e && "Array op not second to last"); 942 std::string Str; 943 bool ArrayIsPrintable = true; 944 for (unsigned j = i - 1, je = Record.size(); j != je; ++j) { 945 if (!isPrint(static_cast<unsigned char>(Record[j]))) { 946 ArrayIsPrintable = false; 947 break; 948 } 949 Str += (char)Record[j]; 950 } 951 if (ArrayIsPrintable) 952 O->OS << " record string = '" << Str << "'"; 953 break; 954 } 955 } 956 957 if (Blob.data()) { 958 if (canDecodeBlob(Code, BlockID)) { 959 if (Error E = decodeMetadataStringsBlob(Indent, Record, Blob, O->OS)) 960 return E; 961 } else { 962 O->OS << " blob data = "; 963 if (O->ShowBinaryBlobs) { 964 O->OS << "'"; 965 O->OS.write_escaped(Blob, /*hex=*/true) << "'"; 966 } else { 967 bool BlobIsPrintable = true; 968 for (char C : Blob) 969 if (!isPrint(static_cast<unsigned char>(C))) { 970 BlobIsPrintable = false; 971 break; 972 } 973 974 if (BlobIsPrintable) 975 O->OS << "'" << Blob << "'"; 976 else 977 O->OS << "unprintable, " << Blob.size() << " bytes."; 978 } 979 } 980 } 981 982 O->OS << "\n"; 983 } 984 985 // Make sure that we can skip the current record. 986 if (Error Err = Stream.JumpToBit(CurrentRecordPos)) 987 return Err; 988 if (Expected<unsigned> Skipped = Stream.skipRecord(Entry.ID)) 989 ; // Do nothing. 990 else 991 return Skipped.takeError(); 992 } 993 } 994 995