xref: /llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp (revision 8df63211a65693c7cc760e361adf20edd450fafa)
1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <optional>
13 #include <string>
14 
15 using namespace llvm;
16 
17 //===----------------------------------------------------------------------===//
18 //  BitstreamCursor implementation
19 //===----------------------------------------------------------------------===//
20 //
21 static Error error(const char *Message) {
22   return createStringError(std::errc::illegal_byte_sequence, Message);
23 }
24 
25 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
26 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
27   // Save the current block's state on BlockScope.
28   BlockScope.push_back(Block(CurCodeSize));
29   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
30 
31   // Add the abbrevs specific to this block to the CurAbbrevs list.
32   if (BlockInfo) {
33     if (const BitstreamBlockInfo::BlockInfo *Info =
34             BlockInfo->getBlockInfo(BlockID)) {
35       llvm::append_range(CurAbbrevs, Info->Abbrevs);
36     }
37   }
38 
39   // Get the codesize of this block.
40   Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
41   if (!MaybeVBR)
42     return MaybeVBR.takeError();
43   CurCodeSize = MaybeVBR.get();
44 
45   if (CurCodeSize > MaxChunkSize)
46     return llvm::createStringError(
47         std::errc::illegal_byte_sequence,
48         "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
49         CurCodeSize);
50 
51   SkipToFourByteBoundary();
52   Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
53   if (!MaybeNum)
54     return MaybeNum.takeError();
55   word_t NumWords = MaybeNum.get();
56   if (NumWordsP)
57     *NumWordsP = NumWords;
58 
59   if (CurCodeSize == 0)
60     return llvm::createStringError(
61         std::errc::illegal_byte_sequence,
62         "can't enter sub-block: current code size is 0");
63   if (AtEndOfStream())
64     return llvm::createStringError(
65         std::errc::illegal_byte_sequence,
66         "can't enter sub block: already at end of stream");
67 
68   return Error::success();
69 }
70 
71 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
72                                                const BitCodeAbbrevOp &Op) {
73   assert(!Op.isLiteral() && "Not to be used with literals!");
74 
75   // Decode the value as we are commanded.
76   switch (Op.getEncoding()) {
77   case BitCodeAbbrevOp::Array:
78   case BitCodeAbbrevOp::Blob:
79     llvm_unreachable("Should not reach here");
80   case BitCodeAbbrevOp::Fixed:
81     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
82     return Cursor.Read((unsigned)Op.getEncodingData());
83   case BitCodeAbbrevOp::VBR:
84     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
85     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
86   case BitCodeAbbrevOp::Char6:
87     if (Expected<unsigned> Res = Cursor.Read(6))
88       return BitCodeAbbrevOp::DecodeChar6(Res.get());
89     else
90       return Res.takeError();
91   }
92   llvm_unreachable("invalid abbreviation encoding");
93 }
94 
95 /// skipRecord - Read the current record and discard it.
96 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
97   // Skip unabbreviated records by reading past their entries.
98   if (AbbrevID == bitc::UNABBREV_RECORD) {
99     Expected<uint32_t> MaybeCode = ReadVBR(6);
100     if (!MaybeCode)
101       return MaybeCode.takeError();
102     unsigned Code = MaybeCode.get();
103     Expected<uint32_t> MaybeVBR = ReadVBR(6);
104     if (!MaybeVBR)
105       return MaybeVBR.takeError();
106     unsigned NumElts = MaybeVBR.get();
107     for (unsigned i = 0; i != NumElts; ++i)
108       if (Expected<uint64_t> Res = ReadVBR64(6))
109         ; // Skip!
110       else
111         return Res.takeError();
112     return Code;
113   }
114 
115   Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
116   if (!MaybeAbbv)
117     return MaybeAbbv.takeError();
118 
119   const BitCodeAbbrev *Abbv = MaybeAbbv.get();
120   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
121   unsigned Code;
122   if (CodeOp.isLiteral())
123     Code = CodeOp.getLiteralValue();
124   else {
125     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
126         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
127       return llvm::createStringError(
128           std::errc::illegal_byte_sequence,
129           "Abbreviation starts with an Array or a Blob");
130     Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
131     if (!MaybeCode)
132       return MaybeCode.takeError();
133     Code = MaybeCode.get();
134   }
135 
136   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
137     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
138     if (Op.isLiteral())
139       continue;
140 
141     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
142         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
143       if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
144         continue;
145       else
146         return MaybeField.takeError();
147     }
148 
149     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
150       // Array case.  Read the number of elements as a vbr6.
151       Expected<uint32_t> MaybeNum = ReadVBR(6);
152       if (!MaybeNum)
153         return MaybeNum.takeError();
154       unsigned NumElts = MaybeNum.get();
155 
156       // Get the element encoding.
157       assert(i+2 == e && "array op not second to last?");
158       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
159 
160       // Read all the elements.
161       // Decode the value as we are commanded.
162       switch (EltEnc.getEncoding()) {
163       default:
164         return error("Array element type can't be an Array or a Blob");
165       case BitCodeAbbrevOp::Fixed:
166         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
167         if (Error Err =
168                 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
169                                                   EltEnc.getEncodingData()))
170           return Err;
171         break;
172       case BitCodeAbbrevOp::VBR:
173         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
174         for (; NumElts; --NumElts)
175           if (Expected<uint64_t> Res =
176                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
177             ; // Skip!
178           else
179             return Res.takeError();
180         break;
181       case BitCodeAbbrevOp::Char6:
182         if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
183           return Err;
184         break;
185       }
186       continue;
187     }
188 
189     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
190     // Blob case.  Read the number of bytes as a vbr6.
191     Expected<uint32_t> MaybeNum = ReadVBR(6);
192     if (!MaybeNum)
193       return MaybeNum.takeError();
194     unsigned NumElts = MaybeNum.get();
195     SkipToFourByteBoundary();  // 32-bit alignment
196 
197     // Figure out where the end of this blob will be including tail padding.
198     const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
199 
200     // If this would read off the end of the bitcode file, just set the
201     // record to empty and return.
202     if (!canSkipToPos(NewEnd/8)) {
203       skipToEnd();
204       break;
205     }
206 
207     // Skip over the blob.
208     if (Error Err = JumpToBit(NewEnd))
209       return Err;
210   }
211   return Code;
212 }
213 
214 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
215                                                SmallVectorImpl<uint64_t> &Vals,
216                                                StringRef *Blob) {
217   if (AbbrevID == bitc::UNABBREV_RECORD) {
218     Expected<uint32_t> MaybeCode = ReadVBR(6);
219     if (!MaybeCode)
220       return MaybeCode.takeError();
221     uint32_t Code = MaybeCode.get();
222     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
223     if (!MaybeNumElts)
224       return error(
225           ("Failed to read size: " + toString(MaybeNumElts.takeError()))
226               .c_str());
227     uint32_t NumElts = MaybeNumElts.get();
228     if (!isSizePlausible(NumElts))
229       return error("Size is not plausible");
230     Vals.reserve(Vals.size() + NumElts);
231 
232     for (unsigned i = 0; i != NumElts; ++i)
233       if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
234         Vals.push_back(MaybeVal.get());
235       else
236         return MaybeVal.takeError();
237     return Code;
238   }
239 
240   Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
241   if (!MaybeAbbv)
242     return MaybeAbbv.takeError();
243   const BitCodeAbbrev *Abbv = MaybeAbbv.get();
244 
245   // Read the record code first.
246   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
247   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
248   unsigned Code;
249   if (CodeOp.isLiteral())
250     Code = CodeOp.getLiteralValue();
251   else {
252     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
253         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
254       return error("Abbreviation starts with an Array or a Blob");
255     if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
256       Code = MaybeCode.get();
257     else
258       return MaybeCode.takeError();
259   }
260 
261   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
262     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
263     if (Op.isLiteral()) {
264       Vals.push_back(Op.getLiteralValue());
265       continue;
266     }
267 
268     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
269         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
270       if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
271         Vals.push_back(MaybeVal.get());
272       else
273         return MaybeVal.takeError();
274       continue;
275     }
276 
277     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
278       // Array case.  Read the number of elements as a vbr6.
279       Expected<uint32_t> MaybeNumElts = ReadVBR(6);
280       if (!MaybeNumElts)
281         return error(
282             ("Failed to read size: " + toString(MaybeNumElts.takeError()))
283                 .c_str());
284       uint32_t NumElts = MaybeNumElts.get();
285       if (!isSizePlausible(NumElts))
286         return error("Size is not plausible");
287       Vals.reserve(Vals.size() + NumElts);
288 
289       // Get the element encoding.
290       if (i + 2 != e)
291         return error("Array op not second to last");
292       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
293       if (!EltEnc.isEncoding())
294         return error(
295             "Array element type has to be an encoding of a type");
296 
297       // Read all the elements.
298       switch (EltEnc.getEncoding()) {
299       default:
300         return error("Array element type can't be an Array or a Blob");
301       case BitCodeAbbrevOp::Fixed:
302         for (; NumElts; --NumElts)
303           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
304                   Read((unsigned)EltEnc.getEncodingData()))
305             Vals.push_back(MaybeVal.get());
306           else
307             return MaybeVal.takeError();
308         break;
309       case BitCodeAbbrevOp::VBR:
310         for (; NumElts; --NumElts)
311           if (Expected<uint64_t> MaybeVal =
312                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
313             Vals.push_back(MaybeVal.get());
314           else
315             return MaybeVal.takeError();
316         break;
317       case BitCodeAbbrevOp::Char6:
318         for (; NumElts; --NumElts)
319           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
320             Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
321           else
322             return MaybeVal.takeError();
323       }
324       continue;
325     }
326 
327     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
328     // Blob case.  Read the number of bytes as a vbr6.
329     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
330     if (!MaybeNumElts)
331       return MaybeNumElts.takeError();
332     uint32_t NumElts = MaybeNumElts.get();
333     SkipToFourByteBoundary();  // 32-bit alignment
334 
335     // Figure out where the end of this blob will be including tail padding.
336     size_t CurBitPos = GetCurrentBitNo();
337     const size_t NewEnd =
338         CurBitPos + static_cast<uint64_t>(alignTo(NumElts, 4)) * 8;
339 
340     // Make sure the bitstream is large enough to contain the blob.
341     if (!canSkipToPos(NewEnd/8))
342       return error("Blob ends too soon");
343 
344     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
345     // over tail padding first, in case jumping to NewEnd invalidates the Blob
346     // pointer.
347     if (Error Err = JumpToBit(NewEnd))
348       return Err;
349     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
350 
351     // If we can return a reference to the data, do so to avoid copying it.
352     if (Blob) {
353       *Blob = StringRef(Ptr, NumElts);
354     } else {
355       // Otherwise, unpack into Vals with zero extension.
356       auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
357       Vals.append(UPtr, UPtr + NumElts);
358     }
359   }
360 
361   return Code;
362 }
363 
364 Error BitstreamCursor::ReadAbbrevRecord() {
365   auto Abbv = std::make_shared<BitCodeAbbrev>();
366   Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
367   if (!MaybeNumOpInfo)
368     return MaybeNumOpInfo.takeError();
369   unsigned NumOpInfo = MaybeNumOpInfo.get();
370   for (unsigned i = 0; i != NumOpInfo; ++i) {
371     Expected<word_t> MaybeIsLiteral = Read(1);
372     if (!MaybeIsLiteral)
373       return MaybeIsLiteral.takeError();
374     bool IsLiteral = MaybeIsLiteral.get();
375     if (IsLiteral) {
376       Expected<uint64_t> MaybeOp = ReadVBR64(8);
377       if (!MaybeOp)
378         return MaybeOp.takeError();
379       Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
380       continue;
381     }
382 
383     Expected<word_t> MaybeEncoding = Read(3);
384     if (!MaybeEncoding)
385       return MaybeEncoding.takeError();
386     if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
387       return error("Invalid encoding");
388 
389     BitCodeAbbrevOp::Encoding E =
390         (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
391     if (BitCodeAbbrevOp::hasEncodingData(E)) {
392       Expected<uint64_t> MaybeData = ReadVBR64(5);
393       if (!MaybeData)
394         return MaybeData.takeError();
395       uint64_t Data = MaybeData.get();
396 
397       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
398       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
399       // a slow path in Read() to have to handle reading zero bits.
400       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
401           Data == 0) {
402         Abbv->Add(BitCodeAbbrevOp(0));
403         continue;
404       }
405 
406       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
407           Data > MaxChunkSize)
408         return error("Fixed or VBR abbrev record with size > MaxChunkData");
409 
410       Abbv->Add(BitCodeAbbrevOp(E, Data));
411     } else
412       Abbv->Add(BitCodeAbbrevOp(E));
413   }
414 
415   if (Abbv->getNumOperandInfos() == 0)
416     return error("Abbrev record with no operands");
417   CurAbbrevs.push_back(std::move(Abbv));
418 
419   return Error::success();
420 }
421 
422 Expected<std::optional<BitstreamBlockInfo>>
423 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
424   if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
425     return Err;
426 
427   BitstreamBlockInfo NewBlockInfo;
428 
429   SmallVector<uint64_t, 64> Record;
430   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
431 
432   // Read all the records for this module.
433   while (true) {
434     Expected<BitstreamEntry> MaybeEntry =
435         advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
436     if (!MaybeEntry)
437       return MaybeEntry.takeError();
438     BitstreamEntry Entry = MaybeEntry.get();
439 
440     switch (Entry.Kind) {
441     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
442     case llvm::BitstreamEntry::Error:
443       return std::nullopt;
444     case llvm::BitstreamEntry::EndBlock:
445       return std::move(NewBlockInfo);
446     case llvm::BitstreamEntry::Record:
447       // The interesting case.
448       break;
449     }
450 
451     // Read abbrev records, associate them with CurBID.
452     if (Entry.ID == bitc::DEFINE_ABBREV) {
453       if (!CurBlockInfo)
454         return std::nullopt;
455       if (Error Err = ReadAbbrevRecord())
456         return Err;
457 
458       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
459       // appropriate BlockInfo.
460       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
461       CurAbbrevs.pop_back();
462       continue;
463     }
464 
465     // Read a record.
466     Record.clear();
467     Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
468     if (!MaybeBlockInfo)
469       return MaybeBlockInfo.takeError();
470     switch (MaybeBlockInfo.get()) {
471     default:
472       break; // Default behavior, ignore unknown content.
473     case bitc::BLOCKINFO_CODE_SETBID:
474       if (Record.size() < 1)
475         return std::nullopt;
476       CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
477       break;
478     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
479       if (!CurBlockInfo)
480         return std::nullopt;
481       if (!ReadBlockInfoNames)
482         break; // Ignore name.
483       CurBlockInfo->Name = std::string(Record.begin(), Record.end());
484       break;
485     }
486       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
487       if (!CurBlockInfo)
488         return std::nullopt;
489       if (!ReadBlockInfoNames)
490         break; // Ignore name.
491       CurBlockInfo->RecordNames.emplace_back(
492           (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
493       break;
494       }
495       }
496   }
497 }
498