1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "llvm/Bitstream/BitstreamReader.h" 10 #include "llvm/ADT/StringRef.h" 11 #include <cassert> 12 #include <string> 13 14 using namespace llvm; 15 16 //===----------------------------------------------------------------------===// 17 // BitstreamCursor implementation 18 //===----------------------------------------------------------------------===// 19 // 20 static Error error(const char *Message) { 21 return createStringError(std::errc::illegal_byte_sequence, Message); 22 } 23 24 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block. 25 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { 26 // Save the current block's state on BlockScope. 27 BlockScope.push_back(Block(CurCodeSize)); 28 BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); 29 30 // Add the abbrevs specific to this block to the CurAbbrevs list. 31 if (BlockInfo) { 32 if (const BitstreamBlockInfo::BlockInfo *Info = 33 BlockInfo->getBlockInfo(BlockID)) { 34 llvm::append_range(CurAbbrevs, Info->Abbrevs); 35 } 36 } 37 38 // Get the codesize of this block. 39 Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth); 40 if (!MaybeVBR) 41 return MaybeVBR.takeError(); 42 CurCodeSize = MaybeVBR.get(); 43 44 if (CurCodeSize > MaxChunkSize) 45 return llvm::createStringError( 46 std::errc::illegal_byte_sequence, 47 "can't read more than %zu at a time, trying to read %u", +MaxChunkSize, 48 CurCodeSize); 49 50 SkipToFourByteBoundary(); 51 Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth); 52 if (!MaybeNum) 53 return MaybeNum.takeError(); 54 word_t NumWords = MaybeNum.get(); 55 if (NumWordsP) 56 *NumWordsP = NumWords; 57 58 if (CurCodeSize == 0) 59 return llvm::createStringError( 60 std::errc::illegal_byte_sequence, 61 "can't enter sub-block: current code size is 0"); 62 if (AtEndOfStream()) 63 return llvm::createStringError( 64 std::errc::illegal_byte_sequence, 65 "can't enter sub block: already at end of stream"); 66 67 return Error::success(); 68 } 69 70 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor, 71 const BitCodeAbbrevOp &Op) { 72 assert(!Op.isLiteral() && "Not to be used with literals!"); 73 74 // Decode the value as we are commanded. 75 switch (Op.getEncoding()) { 76 case BitCodeAbbrevOp::Array: 77 case BitCodeAbbrevOp::Blob: 78 llvm_unreachable("Should not reach here"); 79 case BitCodeAbbrevOp::Fixed: 80 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 81 return Cursor.Read((unsigned)Op.getEncodingData()); 82 case BitCodeAbbrevOp::VBR: 83 assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize); 84 return Cursor.ReadVBR64((unsigned)Op.getEncodingData()); 85 case BitCodeAbbrevOp::Char6: 86 if (Expected<unsigned> Res = Cursor.Read(6)) 87 return BitCodeAbbrevOp::DecodeChar6(Res.get()); 88 else 89 return Res.takeError(); 90 } 91 llvm_unreachable("invalid abbreviation encoding"); 92 } 93 94 /// skipRecord - Read the current record and discard it. 95 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) { 96 // Skip unabbreviated records by reading past their entries. 97 if (AbbrevID == bitc::UNABBREV_RECORD) { 98 Expected<uint32_t> MaybeCode = ReadVBR(6); 99 if (!MaybeCode) 100 return MaybeCode.takeError(); 101 unsigned Code = MaybeCode.get(); 102 Expected<uint32_t> MaybeVBR = ReadVBR(6); 103 if (!MaybeVBR) 104 return MaybeVBR.get(); 105 unsigned NumElts = MaybeVBR.get(); 106 for (unsigned i = 0; i != NumElts; ++i) 107 if (Expected<uint64_t> Res = ReadVBR64(6)) 108 ; // Skip! 109 else 110 return Res.takeError(); 111 return Code; 112 } 113 114 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 115 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 116 unsigned Code; 117 if (CodeOp.isLiteral()) 118 Code = CodeOp.getLiteralValue(); 119 else { 120 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 121 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 122 return llvm::createStringError( 123 std::errc::illegal_byte_sequence, 124 "Abbreviation starts with an Array or a Blob"); 125 Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp); 126 if (!MaybeCode) 127 return MaybeCode.takeError(); 128 Code = MaybeCode.get(); 129 } 130 131 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) { 132 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 133 if (Op.isLiteral()) 134 continue; 135 136 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 137 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 138 if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op)) 139 continue; 140 else 141 return MaybeField.takeError(); 142 } 143 144 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 145 // Array case. Read the number of elements as a vbr6. 146 Expected<uint32_t> MaybeNum = ReadVBR(6); 147 if (!MaybeNum) 148 return MaybeNum.takeError(); 149 unsigned NumElts = MaybeNum.get(); 150 151 // Get the element encoding. 152 assert(i+2 == e && "array op not second to last?"); 153 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 154 155 // Read all the elements. 156 // Decode the value as we are commanded. 157 switch (EltEnc.getEncoding()) { 158 default: 159 return error("Array element type can't be an Array or a Blob"); 160 case BitCodeAbbrevOp::Fixed: 161 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 162 if (Error Err = 163 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) * 164 EltEnc.getEncodingData())) 165 return std::move(Err); 166 break; 167 case BitCodeAbbrevOp::VBR: 168 assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize); 169 for (; NumElts; --NumElts) 170 if (Expected<uint64_t> Res = 171 ReadVBR64((unsigned)EltEnc.getEncodingData())) 172 ; // Skip! 173 else 174 return Res.takeError(); 175 break; 176 case BitCodeAbbrevOp::Char6: 177 if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6)) 178 return std::move(Err); 179 break; 180 } 181 continue; 182 } 183 184 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 185 // Blob case. Read the number of bytes as a vbr6. 186 Expected<uint32_t> MaybeNum = ReadVBR(6); 187 if (!MaybeNum) 188 return MaybeNum.takeError(); 189 unsigned NumElts = MaybeNum.get(); 190 SkipToFourByteBoundary(); // 32-bit alignment 191 192 // Figure out where the end of this blob will be including tail padding. 193 const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8; 194 195 // If this would read off the end of the bitcode file, just set the 196 // record to empty and return. 197 if (!canSkipToPos(NewEnd/8)) { 198 skipToEnd(); 199 break; 200 } 201 202 // Skip over the blob. 203 if (Error Err = JumpToBit(NewEnd)) 204 return std::move(Err); 205 } 206 return Code; 207 } 208 209 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID, 210 SmallVectorImpl<uint64_t> &Vals, 211 StringRef *Blob) { 212 if (AbbrevID == bitc::UNABBREV_RECORD) { 213 Expected<uint32_t> MaybeCode = ReadVBR(6); 214 if (!MaybeCode) 215 return MaybeCode.takeError(); 216 uint32_t Code = MaybeCode.get(); 217 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 218 if (!MaybeNumElts) 219 return MaybeNumElts.takeError(); 220 uint32_t NumElts = MaybeNumElts.get(); 221 Vals.reserve(Vals.size() + NumElts); 222 223 for (unsigned i = 0; i != NumElts; ++i) 224 if (Expected<uint64_t> MaybeVal = ReadVBR64(6)) 225 Vals.push_back(MaybeVal.get()); 226 else 227 return MaybeVal.takeError(); 228 return Code; 229 } 230 231 const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); 232 233 // Read the record code first. 234 assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); 235 const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); 236 unsigned Code; 237 if (CodeOp.isLiteral()) 238 Code = CodeOp.getLiteralValue(); 239 else { 240 if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array || 241 CodeOp.getEncoding() == BitCodeAbbrevOp::Blob) 242 return error("Abbreviation starts with an Array or a Blob"); 243 if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp)) 244 Code = MaybeCode.get(); 245 else 246 return MaybeCode.takeError(); 247 } 248 249 for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { 250 const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); 251 if (Op.isLiteral()) { 252 Vals.push_back(Op.getLiteralValue()); 253 continue; 254 } 255 256 if (Op.getEncoding() != BitCodeAbbrevOp::Array && 257 Op.getEncoding() != BitCodeAbbrevOp::Blob) { 258 if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op)) 259 Vals.push_back(MaybeVal.get()); 260 else 261 return MaybeVal.takeError(); 262 continue; 263 } 264 265 if (Op.getEncoding() == BitCodeAbbrevOp::Array) { 266 // Array case. Read the number of elements as a vbr6. 267 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 268 if (!MaybeNumElts) 269 return MaybeNumElts.takeError(); 270 uint32_t NumElts = MaybeNumElts.get(); 271 Vals.reserve(Vals.size() + NumElts); 272 273 // Get the element encoding. 274 if (i + 2 != e) 275 return error("Array op not second to last"); 276 const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); 277 if (!EltEnc.isEncoding()) 278 return error( 279 "Array element type has to be an encoding of a type"); 280 281 // Read all the elements. 282 switch (EltEnc.getEncoding()) { 283 default: 284 return error("Array element type can't be an Array or a Blob"); 285 case BitCodeAbbrevOp::Fixed: 286 for (; NumElts; --NumElts) 287 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = 288 Read((unsigned)EltEnc.getEncodingData())) 289 Vals.push_back(MaybeVal.get()); 290 else 291 return MaybeVal.takeError(); 292 break; 293 case BitCodeAbbrevOp::VBR: 294 for (; NumElts; --NumElts) 295 if (Expected<uint64_t> MaybeVal = 296 ReadVBR64((unsigned)EltEnc.getEncodingData())) 297 Vals.push_back(MaybeVal.get()); 298 else 299 return MaybeVal.takeError(); 300 break; 301 case BitCodeAbbrevOp::Char6: 302 for (; NumElts; --NumElts) 303 if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6)) 304 Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get())); 305 else 306 return MaybeVal.takeError(); 307 } 308 continue; 309 } 310 311 assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); 312 // Blob case. Read the number of bytes as a vbr6. 313 Expected<uint32_t> MaybeNumElts = ReadVBR(6); 314 if (!MaybeNumElts) 315 return MaybeNumElts.takeError(); 316 uint32_t NumElts = MaybeNumElts.get(); 317 SkipToFourByteBoundary(); // 32-bit alignment 318 319 // Figure out where the end of this blob will be including tail padding. 320 size_t CurBitPos = GetCurrentBitNo(); 321 const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8; 322 323 // If this would read off the end of the bitcode file, just set the 324 // record to empty and return. 325 if (!canSkipToPos(NewEnd/8)) { 326 Vals.append(NumElts, 0); 327 skipToEnd(); 328 break; 329 } 330 331 // Otherwise, inform the streamer that we need these bytes in memory. Skip 332 // over tail padding first, in case jumping to NewEnd invalidates the Blob 333 // pointer. 334 if (Error Err = JumpToBit(NewEnd)) 335 return std::move(Err); 336 const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts); 337 338 // If we can return a reference to the data, do so to avoid copying it. 339 if (Blob) { 340 *Blob = StringRef(Ptr, NumElts); 341 } else { 342 // Otherwise, unpack into Vals with zero extension. 343 auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr); 344 Vals.append(UPtr, UPtr + NumElts); 345 } 346 } 347 348 return Code; 349 } 350 351 Error BitstreamCursor::ReadAbbrevRecord() { 352 auto Abbv = std::make_shared<BitCodeAbbrev>(); 353 Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5); 354 if (!MaybeNumOpInfo) 355 return MaybeNumOpInfo.takeError(); 356 unsigned NumOpInfo = MaybeNumOpInfo.get(); 357 for (unsigned i = 0; i != NumOpInfo; ++i) { 358 Expected<word_t> MaybeIsLiteral = Read(1); 359 if (!MaybeIsLiteral) 360 return MaybeIsLiteral.takeError(); 361 bool IsLiteral = MaybeIsLiteral.get(); 362 if (IsLiteral) { 363 Expected<uint64_t> MaybeOp = ReadVBR64(8); 364 if (!MaybeOp) 365 return MaybeOp.takeError(); 366 Abbv->Add(BitCodeAbbrevOp(MaybeOp.get())); 367 continue; 368 } 369 370 Expected<word_t> MaybeEncoding = Read(3); 371 if (!MaybeEncoding) 372 return MaybeEncoding.takeError(); 373 if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get())) 374 return error("Invalid encoding"); 375 376 BitCodeAbbrevOp::Encoding E = 377 (BitCodeAbbrevOp::Encoding)MaybeEncoding.get(); 378 if (BitCodeAbbrevOp::hasEncodingData(E)) { 379 Expected<uint64_t> MaybeData = ReadVBR64(5); 380 if (!MaybeData) 381 return MaybeData.takeError(); 382 uint64_t Data = MaybeData.get(); 383 384 // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) 385 // and vbr(0) as a literal zero. This is decoded the same way, and avoids 386 // a slow path in Read() to have to handle reading zero bits. 387 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 388 Data == 0) { 389 Abbv->Add(BitCodeAbbrevOp(0)); 390 continue; 391 } 392 393 if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && 394 Data > MaxChunkSize) 395 return error("Fixed or VBR abbrev record with size > MaxChunkData"); 396 397 Abbv->Add(BitCodeAbbrevOp(E, Data)); 398 } else 399 Abbv->Add(BitCodeAbbrevOp(E)); 400 } 401 402 if (Abbv->getNumOperandInfos() == 0) 403 return error("Abbrev record with no operands"); 404 CurAbbrevs.push_back(std::move(Abbv)); 405 406 return Error::success(); 407 } 408 409 Expected<Optional<BitstreamBlockInfo>> 410 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) { 411 if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) 412 return std::move(Err); 413 414 BitstreamBlockInfo NewBlockInfo; 415 416 SmallVector<uint64_t, 64> Record; 417 BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr; 418 419 // Read all the records for this module. 420 while (true) { 421 Expected<BitstreamEntry> MaybeEntry = 422 advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); 423 if (!MaybeEntry) 424 return MaybeEntry.takeError(); 425 BitstreamEntry Entry = MaybeEntry.get(); 426 427 switch (Entry.Kind) { 428 case llvm::BitstreamEntry::SubBlock: // Handled for us already. 429 case llvm::BitstreamEntry::Error: 430 return None; 431 case llvm::BitstreamEntry::EndBlock: 432 return std::move(NewBlockInfo); 433 case llvm::BitstreamEntry::Record: 434 // The interesting case. 435 break; 436 } 437 438 // Read abbrev records, associate them with CurBID. 439 if (Entry.ID == bitc::DEFINE_ABBREV) { 440 if (!CurBlockInfo) return None; 441 if (Error Err = ReadAbbrevRecord()) 442 return std::move(Err); 443 444 // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the 445 // appropriate BlockInfo. 446 CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back())); 447 CurAbbrevs.pop_back(); 448 continue; 449 } 450 451 // Read a record. 452 Record.clear(); 453 Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record); 454 if (!MaybeBlockInfo) 455 return MaybeBlockInfo.takeError(); 456 switch (MaybeBlockInfo.get()) { 457 default: 458 break; // Default behavior, ignore unknown content. 459 case bitc::BLOCKINFO_CODE_SETBID: 460 if (Record.size() < 1) 461 return None; 462 CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]); 463 break; 464 case bitc::BLOCKINFO_CODE_BLOCKNAME: { 465 if (!CurBlockInfo) 466 return None; 467 if (!ReadBlockInfoNames) 468 break; // Ignore name. 469 CurBlockInfo->Name = std::string(Record.begin(), Record.end()); 470 break; 471 } 472 case bitc::BLOCKINFO_CODE_SETRECORDNAME: { 473 if (!CurBlockInfo) return None; 474 if (!ReadBlockInfoNames) 475 break; // Ignore name. 476 CurBlockInfo->RecordNames.emplace_back( 477 (unsigned)Record[0], std::string(Record.begin() + 1, Record.end())); 478 break; 479 } 480 } 481 } 482 } 483