1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitstream/BitstreamReader.h" 10 #include "llvm/ADT/StringRef.h" 11 #include <cassert> 12 #include <string> 13 14 using namespace llvm; 15 16 //===----------------------------------------------------------------------===// 17 // BitstreamCursor implementation 18 //===----------------------------------------------------------------------===// 19 20 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. 21 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { 22 // Save the current block's state on BlockScope. 23 BlockScope.push_back(Block(CurCodeSize)); 24 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 25 26 // Add the abbrevs specific to this block to the CurAbbrevs list. 27 if (BlockInfo) { 28 if (const BitstreamBlockInfo::BlockInfo *Info = 29 BlockInfo->getBlockInfo(BlockID)) { 30 CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(), 31 Info->Abbrevs.end()); 32 } 33 } 34 35 // Get the codesize of this block. 36 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); 37 if (!MaybeVBR) 38 return MaybeVBR.takeError(); 39 CurCodeSize = MaybeVBR.get(); 40 41 if (CurCodeSize > MaxChunkSize) 42 return llvm::createStringError( 43 std::errc::illegal_byte_sequence, 44 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, 45 CurCodeSize); 46 47 SkipToFourByteBoundary(); 48 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); 49 if (!MaybeNum) 50 return MaybeNum.takeError(); 51 word_t NumWords = MaybeNum.get(); 52 if (NumWordsP) 53 *NumWordsP = NumWords; 54 55 if (CurCodeSize == 0) 56 return llvm::createStringError( 57 std::errc::illegal_byte_sequence, 58 "can't enter sub-block: current code size is 0"); 59 if (AtEndOfStream()) 60 return llvm::createStringError( 61 std::errc::illegal_byte_sequence, 62 "can't enter sub block: already at end of stream"); 63 64 return Error::success(); 65 } 66 67 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, 68 const BitCodeAbbrevOp &Op) { 69 assert(!Op.isLiteral() && "Not to be used with literals!"); 70 71 // Decode the value as we are commanded. 72 switch (Op.getEncoding()) { 73 case BitCodeAbbrevOp::Array: 74 case BitCodeAbbrevOp::Blob: 75 llvm_unreachable("Should not reach here"); 76 case BitCodeAbbrevOp::Fixed: 77 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 78 return Cursor.Read((unsigned)Op.getEncodingData()); 79 case BitCodeAbbrevOp::VBR: 80 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 81 return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); 82 case BitCodeAbbrevOp::Char6: 83 if (Expected<unsigned> Res = Cursor.Read(6)) 84 return BitCodeAbbrevOp::DecodeChar6(Res.get()); 85 else 86 return Res.takeError(); 87 } 88 llvm_unreachable("invalid abbreviation encoding"); 89 } 90 91 /// skipRecord - Read the current record and discard it. 92 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { 93 // Skip unabbreviated records by reading past their entries. 94 if (AbbrevID == bitc::UNABBREV_RECORD) { 95 Expected<uint32_t> MaybeCode = ReadVBR(6); 96 if (!MaybeCode) 97 return MaybeCode.takeError(); 98 unsigned Code = MaybeCode.get(); 99 Expected<uint32_t> MaybeVBR = ReadVBR(6); 100 if (!MaybeVBR) 101 return MaybeVBR.get(); 102 unsigned NumElts = MaybeVBR.get(); 103 for (unsigned i = 0; i != NumElts; ++i) 104 if (Expected<uint64_t> Res = ReadVBR64(6)) 105 ; // Skip! 106 else 107 return Res.takeError(); 108 return Code; 109 } 110 111 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 112 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 113 unsigned Code; 114 if (CodeOp.isLiteral()) 115 Code = CodeOp.getLiteralValue(); 116 else { 117 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 118 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 119 return llvm::createStringError( 120 std::errc::illegal_byte_sequence, 121 "Abbreviation starts with an Array or a Blob"); 122 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); 123 if (!MaybeCode) 124 return MaybeCode.takeError(); 125 Code = MaybeCode.get(); 126 } 127 128 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { 129 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 130 if (Op.isLiteral()) 131 continue; 132 133 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 134 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 135 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op)) 136 continue; 137 else 138 return MaybeField.takeError(); 139 } 140 141 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 142 // Array case. Read the number of elements as a vbr6. 143 Expected<uint32_t> MaybeNum = ReadVBR(6); 144 if (!MaybeNum) 145 return MaybeNum.takeError(); 146 unsigned NumElts = MaybeNum.get(); 147 148 // Get the element encoding. 149 assert(i+2 == e && "array op not second to last?"); 150 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 151 152 // Read all the elements. 153 // Decode the value as we are commanded. 154 switch (EltEnc.getEncoding()) { 155 default: 156 report_fatal_error("Array element type can't be an Array or a Blob"); 157 case BitCodeAbbrevOp::Fixed: 158 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 159 if (Error Err = 160 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * 161 EltEnc.getEncodingData())) 162 return std::move(Err); 163 break; 164 case BitCodeAbbrevOp::VBR: 165 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 166 for (; NumElts; --NumElts) 167 if (Expected<uint64_t> Res = 168 ReadVBR64((unsigned)EltEnc.getEncodingData())) 169 ; // Skip! 170 else 171 return Res.takeError(); 172 break; 173 case BitCodeAbbrevOp::Char6: 174 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) 175 return std::move(Err); 176 break; 177 } 178 continue; 179 } 180 181 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 182 // Blob case. Read the number of bytes as a vbr6. 183 Expected<uint32_t> MaybeNum = ReadVBR(6); 184 if (!MaybeNum) 185 return MaybeNum.takeError(); 186 unsigned NumElts = MaybeNum.get(); 187 SkipToFourByteBoundary(); // 32-bit alignment 188 189 // Figure out where the end of this blob will be including tail padding. 190 const size_t NewEnd = 191 GetCurrentBitNo() + ((static_cast<uint64_t>(NumElts) + 3) & ~3) * 8; 192 193 // If this would read off the end of the bitcode file, just set the 194 // record to empty and return. 195 if (!canSkipToPos(NewEnd/8)) { 196 skipToEnd(); 197 break; 198 } 199 200 // Skip over the blob. 201 if (Error Err = JumpToBit(NewEnd)) 202 return std::move(Err); 203 } 204 return Code; 205 } 206 207 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, 208 SmallVectorImpl<uint64_t> &Vals, 209 StringRef *Blob) { 210 if (AbbrevID == bitc::UNABBREV_RECORD) { 211 Expected<uint32_t> MaybeCode = ReadVBR(6); 212 if (!MaybeCode) 213 return MaybeCode.takeError(); 214 uint32_t Code = MaybeCode.get(); 215 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 216 if (!MaybeNumElts) 217 return MaybeNumElts.takeError(); 218 uint32_t NumElts = MaybeNumElts.get(); 219 Vals.reserve(Vals.size() + NumElts); 220 221 for (unsigned i = 0; i != NumElts; ++i) 222 if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) 223 Vals.push_back(MaybeVal.get()); 224 else 225 return MaybeVal.takeError(); 226 return Code; 227 } 228 229 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 230 231 // Read the record code first. 232 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); 233 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 234 unsigned Code; 235 if (CodeOp.isLiteral()) 236 Code = CodeOp.getLiteralValue(); 237 else { 238 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 239 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 240 report_fatal_error("Abbreviation starts with an Array or a Blob"); 241 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) 242 Code = MaybeCode.get(); 243 else 244 return MaybeCode.takeError(); 245 } 246 247 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 248 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 249 if (Op.isLiteral()) { 250 Vals.push_back(Op.getLiteralValue()); 251 continue; 252 } 253 254 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 255 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 256 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) 257 Vals.push_back(MaybeVal.get()); 258 else 259 return MaybeVal.takeError(); 260 continue; 261 } 262 263 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 264 // Array case. Read the number of elements as a vbr6. 265 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 266 if (!MaybeNumElts) 267 return MaybeNumElts.takeError(); 268 uint32_t NumElts = MaybeNumElts.get(); 269 Vals.reserve(Vals.size() + NumElts); 270 271 // Get the element encoding. 272 if (i + 2 != e) 273 report_fatal_error("Array op not second to last"); 274 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 275 if (!EltEnc.isEncoding()) 276 report_fatal_error( 277 "Array element type has to be an encoding of a type"); 278 279 // Read all the elements. 280 switch (EltEnc.getEncoding()) { 281 default: 282 report_fatal_error("Array element type can't be an Array or a Blob"); 283 case BitCodeAbbrevOp::Fixed: 284 for (; NumElts; --NumElts) 285 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = 286 Read((unsigned)EltEnc.getEncodingData())) 287 Vals.push_back(MaybeVal.get()); 288 else 289 return MaybeVal.takeError(); 290 break; 291 case BitCodeAbbrevOp::VBR: 292 for (; NumElts; --NumElts) 293 if (Expected<uint64_t> MaybeVal = 294 ReadVBR64((unsigned)EltEnc.getEncodingData())) 295 Vals.push_back(MaybeVal.get()); 296 else 297 return MaybeVal.takeError(); 298 break; 299 case BitCodeAbbrevOp::Char6: 300 for (; NumElts; --NumElts) 301 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) 302 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); 303 else 304 return MaybeVal.takeError(); 305 } 306 continue; 307 } 308 309 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 310 // Blob case. Read the number of bytes as a vbr6. 311 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 312 if (!MaybeNumElts) 313 return MaybeNumElts.takeError(); 314 uint32_t NumElts = MaybeNumElts.get(); 315 SkipToFourByteBoundary(); // 32-bit alignment 316 317 // Figure out where the end of this blob will be including tail padding. 318 size_t CurBitPos = GetCurrentBitNo(); 319 const size_t NewEnd = 320 CurBitPos + ((static_cast<uint64_t>(NumElts) + 3) & ~3) * 8; 321 322 // If this would read off the end of the bitcode file, just set the 323 // record to empty and return. 324 if (!canSkipToPos(NewEnd/8)) { 325 Vals.append(NumElts, 0); 326 skipToEnd(); 327 break; 328 } 329 330 // Otherwise, inform the streamer that we need these bytes in memory. Skip 331 // over tail padding first, in case jumping to NewEnd invalidates the Blob 332 // pointer. 333 if (Error Err = JumpToBit(NewEnd)) 334 return std::move(Err); 335 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); 336 337 // If we can return a reference to the data, do so to avoid copying it. 338 if (Blob) { 339 *Blob = StringRef(Ptr, NumElts); 340 } else { 341 // Otherwise, unpack into Vals with zero extension. 342 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); 343 Vals.append(UPtr, UPtr + NumElts); 344 } 345 } 346 347 return Code; 348 } 349 350 Error BitstreamCursor::ReadAbbrevRecord() { 351 auto Abbv = std::make_shared<BitCodeAbbrev>(); 352 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); 353 if (!MaybeNumOpInfo) 354 return MaybeNumOpInfo.takeError(); 355 unsigned NumOpInfo = MaybeNumOpInfo.get(); 356 for (unsigned i = 0; i != NumOpInfo; ++i) { 357 Expected<word_t> MaybeIsLiteral = Read(1); 358 if (!MaybeIsLiteral) 359 return MaybeIsLiteral.takeError(); 360 bool IsLiteral = MaybeIsLiteral.get(); 361 if (IsLiteral) { 362 Expected<uint64_t> MaybeOp = ReadVBR64(8); 363 if (!MaybeOp) 364 return MaybeOp.takeError(); 365 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); 366 continue; 367 } 368 369 Expected<word_t> MaybeEncoding = Read(3); 370 if (!MaybeEncoding) 371 return MaybeEncoding.takeError(); 372 BitCodeAbbrevOp::Encoding E = 373 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); 374 if (BitCodeAbbrevOp::hasEncodingData(E)) { 375 Expected<uint64_t> MaybeData = ReadVBR64(5); 376 if (!MaybeData) 377 return MaybeData.takeError(); 378 uint64_t Data = MaybeData.get(); 379 380 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) 381 // and vbr(0) as a literal zero. This is decoded the same way, and avoids 382 // a slow path in Read() to have to handle reading zero bits. 383 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 384 Data == 0) { 385 Abbv->Add(BitCodeAbbrevOp(0)); 386 continue; 387 } 388 389 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 390 Data > MaxChunkSize) 391 report_fatal_error( 392 "Fixed or VBR abbrev record with size > MaxChunkData"); 393 394 Abbv->Add(BitCodeAbbrevOp(E, Data)); 395 } else 396 Abbv->Add(BitCodeAbbrevOp(E)); 397 } 398 399 if (Abbv->getNumOperandInfos() == 0) 400 report_fatal_error("Abbrev record with no operands"); 401 CurAbbrevs.push_back(std::move(Abbv)); 402 403 return Error::success(); 404 } 405 406 Expected<Optional<BitstreamBlockInfo>> 407 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { 408 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) 409 return std::move(Err); 410 411 BitstreamBlockInfo NewBlockInfo; 412 413 SmallVector<uint64_t, 64> Record; 414 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; 415 416 // Read all the records for this module. 417 while (true) { 418 Expected<BitstreamEntry> MaybeEntry = 419 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); 420 if (!MaybeEntry) 421 return MaybeEntry.takeError(); 422 BitstreamEntry Entry = MaybeEntry.get(); 423 424 switch (Entry.Kind) { 425 case llvm::BitstreamEntry::SubBlock: // Handled for us already. 426 case llvm::BitstreamEntry::Error: 427 return None; 428 case llvm::BitstreamEntry::EndBlock: 429 return std::move(NewBlockInfo); 430 case llvm::BitstreamEntry::Record: 431 // The interesting case. 432 break; 433 } 434 435 // Read abbrev records, associate them with CurBID. 436 if (Entry.ID == bitc::DEFINE_ABBREV) { 437 if (!CurBlockInfo) return None; 438 if (Error Err = ReadAbbrevRecord()) 439 return std::move(Err); 440 441 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 442 // appropriate BlockInfo. 443 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); 444 CurAbbrevs.pop_back(); 445 continue; 446 } 447 448 // Read a record. 449 Record.clear(); 450 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); 451 if (!MaybeBlockInfo) 452 return MaybeBlockInfo.takeError(); 453 switch (MaybeBlockInfo.get()) { 454 default: 455 break; // Default behavior, ignore unknown content. 456 case bitc::BLOCKINFO_CODE_SETBID: 457 if (Record.size() < 1) 458 return None; 459 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); 460 break; 461 case bitc::BLOCKINFO_CODE_BLOCKNAME: { 462 if (!CurBlockInfo) 463 return None; 464 if (!ReadBlockInfoNames) 465 break; // Ignore name. 466 CurBlockInfo->Name = std::string(Record.begin(), Record.end()); 467 break; 468 } 469 case bitc::BLOCKINFO_CODE_SETRECORDNAME: { 470 if (!CurBlockInfo) return None; 471 if (!ReadBlockInfoNames) 472 break; // Ignore name. 473 CurBlockInfo->RecordNames.emplace_back( 474 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); 475 break; 476 } 477 } 478 } 479 } 480