xref: /llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp (revision 3c86642edd28f1ce970882edaba8dce468ec7401)
1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <string>
13 
14 using namespace llvm;
15 
16 //===----------------------------------------------------------------------===//
17 //  BitstreamCursor implementation
18 //===----------------------------------------------------------------------===//
19 //
20 static Error error(const char *Message) {
21   return createStringError(std::errc::illegal_byte_sequence, Message);
22 }
23 
24 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
25 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
26   // Save the current block's state on BlockScope.
27   BlockScope.push_back(Block(CurCodeSize));
28   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
29 
30   // Add the abbrevs specific to this block to the CurAbbrevs list.
31   if (BlockInfo) {
32     if (const BitstreamBlockInfo::BlockInfo *Info =
33             BlockInfo->getBlockInfo(BlockID)) {
34       llvm::append_range(CurAbbrevs, Info->Abbrevs);
35     }
36   }
37 
38   // Get the codesize of this block.
39   Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
40   if (!MaybeVBR)
41     return MaybeVBR.takeError();
42   CurCodeSize = MaybeVBR.get();
43 
44   if (CurCodeSize > MaxChunkSize)
45     return llvm::createStringError(
46         std::errc::illegal_byte_sequence,
47         "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
48         CurCodeSize);
49 
50   SkipToFourByteBoundary();
51   Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
52   if (!MaybeNum)
53     return MaybeNum.takeError();
54   word_t NumWords = MaybeNum.get();
55   if (NumWordsP)
56     *NumWordsP = NumWords;
57 
58   if (CurCodeSize == 0)
59     return llvm::createStringError(
60         std::errc::illegal_byte_sequence,
61         "can't enter sub-block: current code size is 0");
62   if (AtEndOfStream())
63     return llvm::createStringError(
64         std::errc::illegal_byte_sequence,
65         "can't enter sub block: already at end of stream");
66 
67   return Error::success();
68 }
69 
70 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
71                                                const BitCodeAbbrevOp &Op) {
72   assert(!Op.isLiteral() && "Not to be used with literals!");
73 
74   // Decode the value as we are commanded.
75   switch (Op.getEncoding()) {
76   case BitCodeAbbrevOp::Array:
77   case BitCodeAbbrevOp::Blob:
78     llvm_unreachable("Should not reach here");
79   case BitCodeAbbrevOp::Fixed:
80     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
81     return Cursor.Read((unsigned)Op.getEncodingData());
82   case BitCodeAbbrevOp::VBR:
83     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
84     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
85   case BitCodeAbbrevOp::Char6:
86     if (Expected<unsigned> Res = Cursor.Read(6))
87       return BitCodeAbbrevOp::DecodeChar6(Res.get());
88     else
89       return Res.takeError();
90   }
91   llvm_unreachable("invalid abbreviation encoding");
92 }
93 
94 /// skipRecord - Read the current record and discard it.
95 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
96   // Skip unabbreviated records by reading past their entries.
97   if (AbbrevID == bitc::UNABBREV_RECORD) {
98     Expected<uint32_t> MaybeCode = ReadVBR(6);
99     if (!MaybeCode)
100       return MaybeCode.takeError();
101     unsigned Code = MaybeCode.get();
102     Expected<uint32_t> MaybeVBR = ReadVBR(6);
103     if (!MaybeVBR)
104       return MaybeVBR.takeError();
105     unsigned NumElts = MaybeVBR.get();
106     for (unsigned i = 0; i != NumElts; ++i)
107       if (Expected<uint64_t> Res = ReadVBR64(6))
108         ; // Skip!
109       else
110         return Res.takeError();
111     return Code;
112   }
113 
114   Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
115   if (!MaybeAbbv)
116     return MaybeAbbv.takeError();
117 
118   const BitCodeAbbrev *Abbv = MaybeAbbv.get();
119   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
120   unsigned Code;
121   if (CodeOp.isLiteral())
122     Code = CodeOp.getLiteralValue();
123   else {
124     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
125         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
126       return llvm::createStringError(
127           std::errc::illegal_byte_sequence,
128           "Abbreviation starts with an Array or a Blob");
129     Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
130     if (!MaybeCode)
131       return MaybeCode.takeError();
132     Code = MaybeCode.get();
133   }
134 
135   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
136     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
137     if (Op.isLiteral())
138       continue;
139 
140     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
141         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
142       if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
143         continue;
144       else
145         return MaybeField.takeError();
146     }
147 
148     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
149       // Array case.  Read the number of elements as a vbr6.
150       Expected<uint32_t> MaybeNum = ReadVBR(6);
151       if (!MaybeNum)
152         return MaybeNum.takeError();
153       unsigned NumElts = MaybeNum.get();
154 
155       // Get the element encoding.
156       assert(i+2 == e && "array op not second to last?");
157       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
158 
159       // Read all the elements.
160       // Decode the value as we are commanded.
161       switch (EltEnc.getEncoding()) {
162       default:
163         return error("Array element type can't be an Array or a Blob");
164       case BitCodeAbbrevOp::Fixed:
165         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
166         if (Error Err =
167                 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
168                                                   EltEnc.getEncodingData()))
169           return std::move(Err);
170         break;
171       case BitCodeAbbrevOp::VBR:
172         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
173         for (; NumElts; --NumElts)
174           if (Expected<uint64_t> Res =
175                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
176             ; // Skip!
177           else
178             return Res.takeError();
179         break;
180       case BitCodeAbbrevOp::Char6:
181         if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
182           return std::move(Err);
183         break;
184       }
185       continue;
186     }
187 
188     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
189     // Blob case.  Read the number of bytes as a vbr6.
190     Expected<uint32_t> MaybeNum = ReadVBR(6);
191     if (!MaybeNum)
192       return MaybeNum.takeError();
193     unsigned NumElts = MaybeNum.get();
194     SkipToFourByteBoundary();  // 32-bit alignment
195 
196     // Figure out where the end of this blob will be including tail padding.
197     const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
198 
199     // If this would read off the end of the bitcode file, just set the
200     // record to empty and return.
201     if (!canSkipToPos(NewEnd/8)) {
202       skipToEnd();
203       break;
204     }
205 
206     // Skip over the blob.
207     if (Error Err = JumpToBit(NewEnd))
208       return std::move(Err);
209   }
210   return Code;
211 }
212 
213 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
214                                                SmallVectorImpl<uint64_t> &Vals,
215                                                StringRef *Blob) {
216   if (AbbrevID == bitc::UNABBREV_RECORD) {
217     Expected<uint32_t> MaybeCode = ReadVBR(6);
218     if (!MaybeCode)
219       return MaybeCode.takeError();
220     uint32_t Code = MaybeCode.get();
221     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
222     if (!MaybeNumElts)
223       return MaybeNumElts.takeError();
224     uint32_t NumElts = MaybeNumElts.get();
225     if (!isSizePlausible(NumElts))
226       return error("Size is not plausible");
227     Vals.reserve(Vals.size() + NumElts);
228 
229     for (unsigned i = 0; i != NumElts; ++i)
230       if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
231         Vals.push_back(MaybeVal.get());
232       else
233         return MaybeVal.takeError();
234     return Code;
235   }
236 
237   Expected<const BitCodeAbbrev *> MaybeAbbv = getAbbrev(AbbrevID);
238   if (!MaybeAbbv)
239     return MaybeAbbv.takeError();
240   const BitCodeAbbrev *Abbv = MaybeAbbv.get();
241 
242   // Read the record code first.
243   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
244   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
245   unsigned Code;
246   if (CodeOp.isLiteral())
247     Code = CodeOp.getLiteralValue();
248   else {
249     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
250         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
251       return error("Abbreviation starts with an Array or a Blob");
252     if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
253       Code = MaybeCode.get();
254     else
255       return MaybeCode.takeError();
256   }
257 
258   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
259     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
260     if (Op.isLiteral()) {
261       Vals.push_back(Op.getLiteralValue());
262       continue;
263     }
264 
265     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
266         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
267       if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
268         Vals.push_back(MaybeVal.get());
269       else
270         return MaybeVal.takeError();
271       continue;
272     }
273 
274     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
275       // Array case.  Read the number of elements as a vbr6.
276       Expected<uint32_t> MaybeNumElts = ReadVBR(6);
277       if (!MaybeNumElts)
278         return MaybeNumElts.takeError();
279       uint32_t NumElts = MaybeNumElts.get();
280       if (!isSizePlausible(NumElts))
281         return error("Size is not plausible");
282       Vals.reserve(Vals.size() + NumElts);
283 
284       // Get the element encoding.
285       if (i + 2 != e)
286         return error("Array op not second to last");
287       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
288       if (!EltEnc.isEncoding())
289         return error(
290             "Array element type has to be an encoding of a type");
291 
292       // Read all the elements.
293       switch (EltEnc.getEncoding()) {
294       default:
295         return error("Array element type can't be an Array or a Blob");
296       case BitCodeAbbrevOp::Fixed:
297         for (; NumElts; --NumElts)
298           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
299                   Read((unsigned)EltEnc.getEncodingData()))
300             Vals.push_back(MaybeVal.get());
301           else
302             return MaybeVal.takeError();
303         break;
304       case BitCodeAbbrevOp::VBR:
305         for (; NumElts; --NumElts)
306           if (Expected<uint64_t> MaybeVal =
307                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
308             Vals.push_back(MaybeVal.get());
309           else
310             return MaybeVal.takeError();
311         break;
312       case BitCodeAbbrevOp::Char6:
313         for (; NumElts; --NumElts)
314           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
315             Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
316           else
317             return MaybeVal.takeError();
318       }
319       continue;
320     }
321 
322     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
323     // Blob case.  Read the number of bytes as a vbr6.
324     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
325     if (!MaybeNumElts)
326       return MaybeNumElts.takeError();
327     uint32_t NumElts = MaybeNumElts.get();
328     SkipToFourByteBoundary();  // 32-bit alignment
329 
330     // Figure out where the end of this blob will be including tail padding.
331     size_t CurBitPos = GetCurrentBitNo();
332     const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
333 
334     // If this would read off the end of the bitcode file, just set the
335     // record to empty and return.
336     if (!canSkipToPos(NewEnd/8)) {
337       Vals.append(NumElts, 0);
338       skipToEnd();
339       break;
340     }
341 
342     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
343     // over tail padding first, in case jumping to NewEnd invalidates the Blob
344     // pointer.
345     if (Error Err = JumpToBit(NewEnd))
346       return std::move(Err);
347     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
348 
349     // If we can return a reference to the data, do so to avoid copying it.
350     if (Blob) {
351       *Blob = StringRef(Ptr, NumElts);
352     } else {
353       // Otherwise, unpack into Vals with zero extension.
354       auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
355       Vals.append(UPtr, UPtr + NumElts);
356     }
357   }
358 
359   return Code;
360 }
361 
362 Error BitstreamCursor::ReadAbbrevRecord() {
363   auto Abbv = std::make_shared<BitCodeAbbrev>();
364   Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
365   if (!MaybeNumOpInfo)
366     return MaybeNumOpInfo.takeError();
367   unsigned NumOpInfo = MaybeNumOpInfo.get();
368   for (unsigned i = 0; i != NumOpInfo; ++i) {
369     Expected<word_t> MaybeIsLiteral = Read(1);
370     if (!MaybeIsLiteral)
371       return MaybeIsLiteral.takeError();
372     bool IsLiteral = MaybeIsLiteral.get();
373     if (IsLiteral) {
374       Expected<uint64_t> MaybeOp = ReadVBR64(8);
375       if (!MaybeOp)
376         return MaybeOp.takeError();
377       Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
378       continue;
379     }
380 
381     Expected<word_t> MaybeEncoding = Read(3);
382     if (!MaybeEncoding)
383       return MaybeEncoding.takeError();
384     if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
385       return error("Invalid encoding");
386 
387     BitCodeAbbrevOp::Encoding E =
388         (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
389     if (BitCodeAbbrevOp::hasEncodingData(E)) {
390       Expected<uint64_t> MaybeData = ReadVBR64(5);
391       if (!MaybeData)
392         return MaybeData.takeError();
393       uint64_t Data = MaybeData.get();
394 
395       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
396       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
397       // a slow path in Read() to have to handle reading zero bits.
398       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
399           Data == 0) {
400         Abbv->Add(BitCodeAbbrevOp(0));
401         continue;
402       }
403 
404       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
405           Data > MaxChunkSize)
406         return error("Fixed or VBR abbrev record with size > MaxChunkData");
407 
408       Abbv->Add(BitCodeAbbrevOp(E, Data));
409     } else
410       Abbv->Add(BitCodeAbbrevOp(E));
411   }
412 
413   if (Abbv->getNumOperandInfos() == 0)
414     return error("Abbrev record with no operands");
415   CurAbbrevs.push_back(std::move(Abbv));
416 
417   return Error::success();
418 }
419 
420 Expected<Optional<BitstreamBlockInfo>>
421 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
422   if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
423     return std::move(Err);
424 
425   BitstreamBlockInfo NewBlockInfo;
426 
427   SmallVector<uint64_t, 64> Record;
428   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
429 
430   // Read all the records for this module.
431   while (true) {
432     Expected<BitstreamEntry> MaybeEntry =
433         advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
434     if (!MaybeEntry)
435       return MaybeEntry.takeError();
436     BitstreamEntry Entry = MaybeEntry.get();
437 
438     switch (Entry.Kind) {
439     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
440     case llvm::BitstreamEntry::Error:
441       return None;
442     case llvm::BitstreamEntry::EndBlock:
443       return std::move(NewBlockInfo);
444     case llvm::BitstreamEntry::Record:
445       // The interesting case.
446       break;
447     }
448 
449     // Read abbrev records, associate them with CurBID.
450     if (Entry.ID == bitc::DEFINE_ABBREV) {
451       if (!CurBlockInfo) return None;
452       if (Error Err = ReadAbbrevRecord())
453         return std::move(Err);
454 
455       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
456       // appropriate BlockInfo.
457       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
458       CurAbbrevs.pop_back();
459       continue;
460     }
461 
462     // Read a record.
463     Record.clear();
464     Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
465     if (!MaybeBlockInfo)
466       return MaybeBlockInfo.takeError();
467     switch (MaybeBlockInfo.get()) {
468     default:
469       break; // Default behavior, ignore unknown content.
470     case bitc::BLOCKINFO_CODE_SETBID:
471       if (Record.size() < 1)
472         return None;
473       CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
474       break;
475     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
476       if (!CurBlockInfo)
477         return None;
478       if (!ReadBlockInfoNames)
479         break; // Ignore name.
480       CurBlockInfo->Name = std::string(Record.begin(), Record.end());
481       break;
482     }
483       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
484         if (!CurBlockInfo) return None;
485         if (!ReadBlockInfoNames)
486           break; // Ignore name.
487         CurBlockInfo->RecordNames.emplace_back(
488             (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
489         break;
490       }
491       }
492   }
493 }
494