1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitstream/BitstreamReader.h" 10 #include "llvm/ADT/StringRef.h" 11 #include <cassert> 12 #include <string> 13 14 using namespace llvm; 15 16 //===----------------------------------------------------------------------===// 17 // BitstreamCursor implementation 18 //===----------------------------------------------------------------------===// 19 // 20 static Error error(const char *Message) { 21 return createStringError(std::errc::illegal_byte_sequence, Message); 22 } 23 24 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. 25 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { 26 // Save the current block's state on BlockScope. 27 BlockScope.push_back(Block(CurCodeSize)); 28 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 29 30 // Add the abbrevs specific to this block to the CurAbbrevs list. 31 if (BlockInfo) { 32 if (const BitstreamBlockInfo::BlockInfo *Info = 33 BlockInfo->getBlockInfo(BlockID)) { 34 llvm::append_range(CurAbbrevs, Info->Abbrevs); 35 } 36 } 37 38 // Get the codesize of this block. 39 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); 40 if (!MaybeVBR) 41 return MaybeVBR.takeError(); 42 CurCodeSize = MaybeVBR.get(); 43 44 if (CurCodeSize > MaxChunkSize) 45 return llvm::createStringError( 46 std::errc::illegal_byte_sequence, 47 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, 48 CurCodeSize); 49 50 SkipToFourByteBoundary(); 51 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); 52 if (!MaybeNum) 53 return MaybeNum.takeError(); 54 word_t NumWords = MaybeNum.get(); 55 if (NumWordsP) 56 *NumWordsP = NumWords; 57 58 if (CurCodeSize == 0) 59 return llvm::createStringError( 60 std::errc::illegal_byte_sequence, 61 "can't enter sub-block: current code size is 0"); 62 if (AtEndOfStream()) 63 return llvm::createStringError( 64 std::errc::illegal_byte_sequence, 65 "can't enter sub block: already at end of stream"); 66 67 return Error::success(); 68 } 69 70 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, 71 const BitCodeAbbrevOp &Op) { 72 assert(!Op.isLiteral() && "Not to be used with literals!"); 73 74 // Decode the value as we are commanded. 75 switch (Op.getEncoding()) { 76 case BitCodeAbbrevOp::Array: 77 case BitCodeAbbrevOp::Blob: 78 llvm_unreachable("Should not reach here"); 79 case BitCodeAbbrevOp::Fixed: 80 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 81 return Cursor.Read((unsigned)Op.getEncodingData()); 82 case BitCodeAbbrevOp::VBR: 83 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 84 return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); 85 case BitCodeAbbrevOp::Char6: 86 if (Expected<unsigned> Res = Cursor.Read(6)) 87 return BitCodeAbbrevOp::DecodeChar6(Res.get()); 88 else 89 return Res.takeError(); 90 } 91 llvm_unreachable("invalid abbreviation encoding"); 92 } 93 94 /// skipRecord - Read the current record and discard it. 95 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { 96 // Skip unabbreviated records by reading past their entries. 97 if (AbbrevID == bitc::UNABBREV_RECORD) { 98 Expected<uint32_t> MaybeCode = ReadVBR(6); 99 if (!MaybeCode) 100 return MaybeCode.takeError(); 101 unsigned Code = MaybeCode.get(); 102 Expected<uint32_t> MaybeVBR = ReadVBR(6); 103 if (!MaybeVBR) 104 return MaybeVBR.takeError(); 105 unsigned NumElts = MaybeVBR.get(); 106 for (unsigned i = 0; i != NumElts; ++i) 107 if (Expected<uint64_t> Res = ReadVBR64(6)) 108 ; // Skip! 109 else 110 return Res.takeError(); 111 return Code; 112 } 113 114 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); 115 if (!MaybeAbbv) 116 return MaybeAbbv.takeError(); 117 118 const BitCodeAbbrev *Abbv = MaybeAbbv.get(); 119 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 120 unsigned Code; 121 if (CodeOp.isLiteral()) 122 Code = CodeOp.getLiteralValue(); 123 else { 124 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 125 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 126 return llvm::createStringError( 127 std::errc::illegal_byte_sequence, 128 "Abbreviation starts with an Array or a Blob"); 129 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); 130 if (!MaybeCode) 131 return MaybeCode.takeError(); 132 Code = MaybeCode.get(); 133 } 134 135 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { 136 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 137 if (Op.isLiteral()) 138 continue; 139 140 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 141 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 142 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op)) 143 continue; 144 else 145 return MaybeField.takeError(); 146 } 147 148 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 149 // Array case. Read the number of elements as a vbr6. 150 Expected<uint32_t> MaybeNum = ReadVBR(6); 151 if (!MaybeNum) 152 return MaybeNum.takeError(); 153 unsigned NumElts = MaybeNum.get(); 154 155 // Get the element encoding. 156 assert(i+2 == e && "array op not second to last?"); 157 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 158 159 // Read all the elements. 160 // Decode the value as we are commanded. 161 switch (EltEnc.getEncoding()) { 162 default: 163 return error("Array element type can't be an Array or a Blob"); 164 case BitCodeAbbrevOp::Fixed: 165 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 166 if (Error Err = 167 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * 168 EltEnc.getEncodingData())) 169 return std::move(Err); 170 break; 171 case BitCodeAbbrevOp::VBR: 172 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 173 for (; NumElts; --NumElts) 174 if (Expected<uint64_t> Res = 175 ReadVBR64((unsigned)EltEnc.getEncodingData())) 176 ; // Skip! 177 else 178 return Res.takeError(); 179 break; 180 case BitCodeAbbrevOp::Char6: 181 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) 182 return std::move(Err); 183 break; 184 } 185 continue; 186 } 187 188 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 189 // Blob case. Read the number of bytes as a vbr6. 190 Expected<uint32_t> MaybeNum = ReadVBR(6); 191 if (!MaybeNum) 192 return MaybeNum.takeError(); 193 unsigned NumElts = MaybeNum.get(); 194 SkipToFourByteBoundary(); // 32-bit alignment 195 196 // Figure out where the end of this blob will be including tail padding. 197 const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8; 198 199 // If this would read off the end of the bitcode file, just set the 200 // record to empty and return. 201 if (!canSkipToPos(NewEnd/8)) { 202 skipToEnd(); 203 break; 204 } 205 206 // Skip over the blob. 207 if (Error Err = JumpToBit(NewEnd)) 208 return std::move(Err); 209 } 210 return Code; 211 } 212 213 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, 214 SmallVectorImpl<uint64_t> &Vals, 215 StringRef *Blob) { 216 if (AbbrevID == bitc::UNABBREV_RECORD) { 217 Expected<uint32_t> MaybeCode = ReadVBR(6); 218 if (!MaybeCode) 219 return MaybeCode.takeError(); 220 uint32_t Code = MaybeCode.get(); 221 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 222 if (!MaybeNumElts) 223 return MaybeNumElts.takeError(); 224 uint32_t NumElts = MaybeNumElts.get(); 225 Vals.reserve(Vals.size() + NumElts); 226 227 for (unsigned i = 0; i != NumElts; ++i) 228 if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) 229 Vals.push_back(MaybeVal.get()); 230 else 231 return MaybeVal.takeError(); 232 return Code; 233 } 234 235 Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID); 236 if (!MaybeAbbv) 237 return MaybeAbbv.takeError(); 238 const BitCodeAbbrev *Abbv = MaybeAbbv.get(); 239 240 // Read the record code first. 241 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); 242 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 243 unsigned Code; 244 if (CodeOp.isLiteral()) 245 Code = CodeOp.getLiteralValue(); 246 else { 247 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 248 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 249 return error("Abbreviation starts with an Array or a Blob"); 250 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) 251 Code = MaybeCode.get(); 252 else 253 return MaybeCode.takeError(); 254 } 255 256 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 257 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 258 if (Op.isLiteral()) { 259 Vals.push_back(Op.getLiteralValue()); 260 continue; 261 } 262 263 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 264 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 265 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) 266 Vals.push_back(MaybeVal.get()); 267 else 268 return MaybeVal.takeError(); 269 continue; 270 } 271 272 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 273 // Array case. Read the number of elements as a vbr6. 274 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 275 if (!MaybeNumElts) 276 return MaybeNumElts.takeError(); 277 uint32_t NumElts = MaybeNumElts.get(); 278 Vals.reserve(Vals.size() + NumElts); 279 280 // Get the element encoding. 281 if (i + 2 != e) 282 return error("Array op not second to last"); 283 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 284 if (!EltEnc.isEncoding()) 285 return error( 286 "Array element type has to be an encoding of a type"); 287 288 // Read all the elements. 289 switch (EltEnc.getEncoding()) { 290 default: 291 return error("Array element type can't be an Array or a Blob"); 292 case BitCodeAbbrevOp::Fixed: 293 for (; NumElts; --NumElts) 294 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = 295 Read((unsigned)EltEnc.getEncodingData())) 296 Vals.push_back(MaybeVal.get()); 297 else 298 return MaybeVal.takeError(); 299 break; 300 case BitCodeAbbrevOp::VBR: 301 for (; NumElts; --NumElts) 302 if (Expected<uint64_t> MaybeVal = 303 ReadVBR64((unsigned)EltEnc.getEncodingData())) 304 Vals.push_back(MaybeVal.get()); 305 else 306 return MaybeVal.takeError(); 307 break; 308 case BitCodeAbbrevOp::Char6: 309 for (; NumElts; --NumElts) 310 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) 311 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); 312 else 313 return MaybeVal.takeError(); 314 } 315 continue; 316 } 317 318 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 319 // Blob case. Read the number of bytes as a vbr6. 320 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 321 if (!MaybeNumElts) 322 return MaybeNumElts.takeError(); 323 uint32_t NumElts = MaybeNumElts.get(); 324 SkipToFourByteBoundary(); // 32-bit alignment 325 326 // Figure out where the end of this blob will be including tail padding. 327 size_t CurBitPos = GetCurrentBitNo(); 328 const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8; 329 330 // If this would read off the end of the bitcode file, just set the 331 // record to empty and return. 332 if (!canSkipToPos(NewEnd/8)) { 333 Vals.append(NumElts, 0); 334 skipToEnd(); 335 break; 336 } 337 338 // Otherwise, inform the streamer that we need these bytes in memory. Skip 339 // over tail padding first, in case jumping to NewEnd invalidates the Blob 340 // pointer. 341 if (Error Err = JumpToBit(NewEnd)) 342 return std::move(Err); 343 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); 344 345 // If we can return a reference to the data, do so to avoid copying it. 346 if (Blob) { 347 *Blob = StringRef(Ptr, NumElts); 348 } else { 349 // Otherwise, unpack into Vals with zero extension. 350 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); 351 Vals.append(UPtr, UPtr + NumElts); 352 } 353 } 354 355 return Code; 356 } 357 358 Error BitstreamCursor::ReadAbbrevRecord() { 359 auto Abbv = std::make_shared<BitCodeAbbrev>(); 360 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); 361 if (!MaybeNumOpInfo) 362 return MaybeNumOpInfo.takeError(); 363 unsigned NumOpInfo = MaybeNumOpInfo.get(); 364 for (unsigned i = 0; i != NumOpInfo; ++i) { 365 Expected<word_t> MaybeIsLiteral = Read(1); 366 if (!MaybeIsLiteral) 367 return MaybeIsLiteral.takeError(); 368 bool IsLiteral = MaybeIsLiteral.get(); 369 if (IsLiteral) { 370 Expected<uint64_t> MaybeOp = ReadVBR64(8); 371 if (!MaybeOp) 372 return MaybeOp.takeError(); 373 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); 374 continue; 375 } 376 377 Expected<word_t> MaybeEncoding = Read(3); 378 if (!MaybeEncoding) 379 return MaybeEncoding.takeError(); 380 if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get())) 381 return error("Invalid encoding"); 382 383 BitCodeAbbrevOp::Encoding E = 384 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); 385 if (BitCodeAbbrevOp::hasEncodingData(E)) { 386 Expected<uint64_t> MaybeData = ReadVBR64(5); 387 if (!MaybeData) 388 return MaybeData.takeError(); 389 uint64_t Data = MaybeData.get(); 390 391 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) 392 // and vbr(0) as a literal zero. This is decoded the same way, and avoids 393 // a slow path in Read() to have to handle reading zero bits. 394 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 395 Data == 0) { 396 Abbv->Add(BitCodeAbbrevOp(0)); 397 continue; 398 } 399 400 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 401 Data > MaxChunkSize) 402 return error("Fixed or VBR abbrev record with size > MaxChunkData"); 403 404 Abbv->Add(BitCodeAbbrevOp(E, Data)); 405 } else 406 Abbv->Add(BitCodeAbbrevOp(E)); 407 } 408 409 if (Abbv->getNumOperandInfos() == 0) 410 return error("Abbrev record with no operands"); 411 CurAbbrevs.push_back(std::move(Abbv)); 412 413 return Error::success(); 414 } 415 416 Expected<Optional<BitstreamBlockInfo>> 417 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { 418 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) 419 return std::move(Err); 420 421 BitstreamBlockInfo NewBlockInfo; 422 423 SmallVector<uint64_t, 64> Record; 424 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; 425 426 // Read all the records for this module. 427 while (true) { 428 Expected<BitstreamEntry> MaybeEntry = 429 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); 430 if (!MaybeEntry) 431 return MaybeEntry.takeError(); 432 BitstreamEntry Entry = MaybeEntry.get(); 433 434 switch (Entry.Kind) { 435 case llvm::BitstreamEntry::SubBlock: // Handled for us already. 436 case llvm::BitstreamEntry::Error: 437 return None; 438 case llvm::BitstreamEntry::EndBlock: 439 return std::move(NewBlockInfo); 440 case llvm::BitstreamEntry::Record: 441 // The interesting case. 442 break; 443 } 444 445 // Read abbrev records, associate them with CurBID. 446 if (Entry.ID == bitc::DEFINE_ABBREV) { 447 if (!CurBlockInfo) return None; 448 if (Error Err = ReadAbbrevRecord()) 449 return std::move(Err); 450 451 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 452 // appropriate BlockInfo. 453 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); 454 CurAbbrevs.pop_back(); 455 continue; 456 } 457 458 // Read a record. 459 Record.clear(); 460 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); 461 if (!MaybeBlockInfo) 462 return MaybeBlockInfo.takeError(); 463 switch (MaybeBlockInfo.get()) { 464 default: 465 break; // Default behavior, ignore unknown content. 466 case bitc::BLOCKINFO_CODE_SETBID: 467 if (Record.size() < 1) 468 return None; 469 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); 470 break; 471 case bitc::BLOCKINFO_CODE_BLOCKNAME: { 472 if (!CurBlockInfo) 473 return None; 474 if (!ReadBlockInfoNames) 475 break; // Ignore name. 476 CurBlockInfo->Name = std::string(Record.begin(), Record.end()); 477 break; 478 } 479 case bitc::BLOCKINFO_CODE_SETRECORDNAME: { 480 if (!CurBlockInfo) return None; 481 if (!ReadBlockInfoNames) 482 break; // Ignore name. 483 CurBlockInfo->RecordNames.emplace_back( 484 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); 485 break; 486 } 487 } 488 } 489 } 490