xref: /llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp (revision 47849870278ce05cde03d41f03fd3a1e65ee22a6)
1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <string>
13 
14 using namespace llvm;
15 
16 //===----------------------------------------------------------------------===//
17 //  BitstreamCursor implementation
18 //===----------------------------------------------------------------------===//
19 
20 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
21 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
22   // Save the current block's state on BlockScope.
23   BlockScope.push_back(Block(CurCodeSize));
24   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
25 
26   // Add the abbrevs specific to this block to the CurAbbrevs list.
27   if (BlockInfo) {
28     if (const BitstreamBlockInfo::BlockInfo *Info =
29             BlockInfo->getBlockInfo(BlockID)) {
30       CurAbbrevs.insert(CurAbbrevs.end(), Info->Abbrevs.begin(),
31                         Info->Abbrevs.end());
32     }
33   }
34 
35   // Get the codesize of this block.
36   Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
37   if (!MaybeVBR)
38     return MaybeVBR.takeError();
39   CurCodeSize = MaybeVBR.get();
40 
41   if (CurCodeSize > MaxChunkSize)
42     return llvm::createStringError(
43         std::errc::illegal_byte_sequence,
44         "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
45         CurCodeSize);
46 
47   SkipToFourByteBoundary();
48   Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
49   if (!MaybeNum)
50     return MaybeNum.takeError();
51   word_t NumWords = MaybeNum.get();
52   if (NumWordsP)
53     *NumWordsP = NumWords;
54 
55   if (CurCodeSize == 0)
56     return llvm::createStringError(
57         std::errc::illegal_byte_sequence,
58         "can't enter sub-block: current code size is 0");
59   if (AtEndOfStream())
60     return llvm::createStringError(
61         std::errc::illegal_byte_sequence,
62         "can't enter sub block: already at end of stream");
63 
64   return Error::success();
65 }
66 
67 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
68                                                const BitCodeAbbrevOp &Op) {
69   assert(!Op.isLiteral() && "Not to be used with literals!");
70 
71   // Decode the value as we are commanded.
72   switch (Op.getEncoding()) {
73   case BitCodeAbbrevOp::Array:
74   case BitCodeAbbrevOp::Blob:
75     llvm_unreachable("Should not reach here");
76   case BitCodeAbbrevOp::Fixed:
77     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
78     return Cursor.Read((unsigned)Op.getEncodingData());
79   case BitCodeAbbrevOp::VBR:
80     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
81     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
82   case BitCodeAbbrevOp::Char6:
83     if (Expected<unsigned> Res = Cursor.Read(6))
84       return BitCodeAbbrevOp::DecodeChar6(Res.get());
85     else
86       return Res.takeError();
87   }
88   llvm_unreachable("invalid abbreviation encoding");
89 }
90 
91 /// skipRecord - Read the current record and discard it.
92 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
93   // Skip unabbreviated records by reading past their entries.
94   if (AbbrevID == bitc::UNABBREV_RECORD) {
95     Expected<uint32_t> MaybeCode = ReadVBR(6);
96     if (!MaybeCode)
97       return MaybeCode.takeError();
98     unsigned Code = MaybeCode.get();
99     Expected<uint32_t> MaybeVBR = ReadVBR(6);
100     if (!MaybeVBR)
101       return MaybeVBR.get();
102     unsigned NumElts = MaybeVBR.get();
103     for (unsigned i = 0; i != NumElts; ++i)
104       if (Expected<uint64_t> Res = ReadVBR64(6))
105         ; // Skip!
106       else
107         return Res.takeError();
108     return Code;
109   }
110 
111   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
112   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
113   unsigned Code;
114   if (CodeOp.isLiteral())
115     Code = CodeOp.getLiteralValue();
116   else {
117     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
118         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
119       return llvm::createStringError(
120           std::errc::illegal_byte_sequence,
121           "Abbreviation starts with an Array or a Blob");
122     Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
123     if (!MaybeCode)
124       return MaybeCode.takeError();
125     Code = MaybeCode.get();
126   }
127 
128   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
129     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
130     if (Op.isLiteral())
131       continue;
132 
133     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
134         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
135       if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
136         continue;
137       else
138         return MaybeField.takeError();
139     }
140 
141     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
142       // Array case.  Read the number of elements as a vbr6.
143       Expected<uint32_t> MaybeNum = ReadVBR(6);
144       if (!MaybeNum)
145         return MaybeNum.takeError();
146       unsigned NumElts = MaybeNum.get();
147 
148       // Get the element encoding.
149       assert(i+2 == e && "array op not second to last?");
150       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
151 
152       // Read all the elements.
153       // Decode the value as we are commanded.
154       switch (EltEnc.getEncoding()) {
155       default:
156         report_fatal_error("Array element type can't be an Array or a Blob");
157       case BitCodeAbbrevOp::Fixed:
158         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
159         if (Error Err =
160                 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
161                                                   EltEnc.getEncodingData()))
162           return std::move(Err);
163         break;
164       case BitCodeAbbrevOp::VBR:
165         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
166         for (; NumElts; --NumElts)
167           if (Expected<uint64_t> Res =
168                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
169             ; // Skip!
170           else
171             return Res.takeError();
172         break;
173       case BitCodeAbbrevOp::Char6:
174         if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
175           return std::move(Err);
176         break;
177       }
178       continue;
179     }
180 
181     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
182     // Blob case.  Read the number of bytes as a vbr6.
183     Expected<uint32_t> MaybeNum = ReadVBR(6);
184     if (!MaybeNum)
185       return MaybeNum.takeError();
186     unsigned NumElts = MaybeNum.get();
187     SkipToFourByteBoundary();  // 32-bit alignment
188 
189     // Figure out where the end of this blob will be including tail padding.
190     const size_t NewEnd =
191         GetCurrentBitNo() + ((static_cast<uint64_t>(NumElts) + 3) & ~3) * 8;
192 
193     // If this would read off the end of the bitcode file, just set the
194     // record to empty and return.
195     if (!canSkipToPos(NewEnd/8)) {
196       skipToEnd();
197       break;
198     }
199 
200     // Skip over the blob.
201     if (Error Err = JumpToBit(NewEnd))
202       return std::move(Err);
203   }
204   return Code;
205 }
206 
207 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
208                                                SmallVectorImpl<uint64_t> &Vals,
209                                                StringRef *Blob) {
210   if (AbbrevID == bitc::UNABBREV_RECORD) {
211     Expected<uint32_t> MaybeCode = ReadVBR(6);
212     if (!MaybeCode)
213       return MaybeCode.takeError();
214     uint32_t Code = MaybeCode.get();
215     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
216     if (!MaybeNumElts)
217       return MaybeNumElts.takeError();
218     uint32_t NumElts = MaybeNumElts.get();
219     Vals.reserve(Vals.size() + NumElts);
220 
221     for (unsigned i = 0; i != NumElts; ++i)
222       if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
223         Vals.push_back(MaybeVal.get());
224       else
225         return MaybeVal.takeError();
226     return Code;
227   }
228 
229   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
230 
231   // Read the record code first.
232   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
233   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
234   unsigned Code;
235   if (CodeOp.isLiteral())
236     Code = CodeOp.getLiteralValue();
237   else {
238     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
239         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
240       report_fatal_error("Abbreviation starts with an Array or a Blob");
241     if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
242       Code = MaybeCode.get();
243     else
244       return MaybeCode.takeError();
245   }
246 
247   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
248     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
249     if (Op.isLiteral()) {
250       Vals.push_back(Op.getLiteralValue());
251       continue;
252     }
253 
254     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
255         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
256       if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
257         Vals.push_back(MaybeVal.get());
258       else
259         return MaybeVal.takeError();
260       continue;
261     }
262 
263     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
264       // Array case.  Read the number of elements as a vbr6.
265       Expected<uint32_t> MaybeNumElts = ReadVBR(6);
266       if (!MaybeNumElts)
267         return MaybeNumElts.takeError();
268       uint32_t NumElts = MaybeNumElts.get();
269       Vals.reserve(Vals.size() + NumElts);
270 
271       // Get the element encoding.
272       if (i + 2 != e)
273         report_fatal_error("Array op not second to last");
274       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
275       if (!EltEnc.isEncoding())
276         report_fatal_error(
277             "Array element type has to be an encoding of a type");
278 
279       // Read all the elements.
280       switch (EltEnc.getEncoding()) {
281       default:
282         report_fatal_error("Array element type can't be an Array or a Blob");
283       case BitCodeAbbrevOp::Fixed:
284         for (; NumElts; --NumElts)
285           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
286                   Read((unsigned)EltEnc.getEncodingData()))
287             Vals.push_back(MaybeVal.get());
288           else
289             return MaybeVal.takeError();
290         break;
291       case BitCodeAbbrevOp::VBR:
292         for (; NumElts; --NumElts)
293           if (Expected<uint64_t> MaybeVal =
294                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
295             Vals.push_back(MaybeVal.get());
296           else
297             return MaybeVal.takeError();
298         break;
299       case BitCodeAbbrevOp::Char6:
300         for (; NumElts; --NumElts)
301           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
302             Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
303           else
304             return MaybeVal.takeError();
305       }
306       continue;
307     }
308 
309     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
310     // Blob case.  Read the number of bytes as a vbr6.
311     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
312     if (!MaybeNumElts)
313       return MaybeNumElts.takeError();
314     uint32_t NumElts = MaybeNumElts.get();
315     SkipToFourByteBoundary();  // 32-bit alignment
316 
317     // Figure out where the end of this blob will be including tail padding.
318     size_t CurBitPos = GetCurrentBitNo();
319     const size_t NewEnd =
320         CurBitPos + ((static_cast<uint64_t>(NumElts) + 3) & ~3) * 8;
321 
322     // If this would read off the end of the bitcode file, just set the
323     // record to empty and return.
324     if (!canSkipToPos(NewEnd/8)) {
325       Vals.append(NumElts, 0);
326       skipToEnd();
327       break;
328     }
329 
330     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
331     // over tail padding first, in case jumping to NewEnd invalidates the Blob
332     // pointer.
333     if (Error Err = JumpToBit(NewEnd))
334       return std::move(Err);
335     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
336 
337     // If we can return a reference to the data, do so to avoid copying it.
338     if (Blob) {
339       *Blob = StringRef(Ptr, NumElts);
340     } else {
341       // Otherwise, unpack into Vals with zero extension.
342       auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
343       Vals.append(UPtr, UPtr + NumElts);
344     }
345   }
346 
347   return Code;
348 }
349 
350 Error BitstreamCursor::ReadAbbrevRecord() {
351   auto Abbv = std::make_shared<BitCodeAbbrev>();
352   Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
353   if (!MaybeNumOpInfo)
354     return MaybeNumOpInfo.takeError();
355   unsigned NumOpInfo = MaybeNumOpInfo.get();
356   for (unsigned i = 0; i != NumOpInfo; ++i) {
357     Expected<word_t> MaybeIsLiteral = Read(1);
358     if (!MaybeIsLiteral)
359       return MaybeIsLiteral.takeError();
360     bool IsLiteral = MaybeIsLiteral.get();
361     if (IsLiteral) {
362       Expected<uint64_t> MaybeOp = ReadVBR64(8);
363       if (!MaybeOp)
364         return MaybeOp.takeError();
365       Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
366       continue;
367     }
368 
369     Expected<word_t> MaybeEncoding = Read(3);
370     if (!MaybeEncoding)
371       return MaybeEncoding.takeError();
372     BitCodeAbbrevOp::Encoding E =
373         (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
374     if (BitCodeAbbrevOp::hasEncodingData(E)) {
375       Expected<uint64_t> MaybeData = ReadVBR64(5);
376       if (!MaybeData)
377         return MaybeData.takeError();
378       uint64_t Data = MaybeData.get();
379 
380       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
381       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
382       // a slow path in Read() to have to handle reading zero bits.
383       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
384           Data == 0) {
385         Abbv->Add(BitCodeAbbrevOp(0));
386         continue;
387       }
388 
389       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
390           Data > MaxChunkSize)
391         report_fatal_error(
392             "Fixed or VBR abbrev record with size > MaxChunkData");
393 
394       Abbv->Add(BitCodeAbbrevOp(E, Data));
395     } else
396       Abbv->Add(BitCodeAbbrevOp(E));
397   }
398 
399   if (Abbv->getNumOperandInfos() == 0)
400     report_fatal_error("Abbrev record with no operands");
401   CurAbbrevs.push_back(std::move(Abbv));
402 
403   return Error::success();
404 }
405 
406 Expected<Optional<BitstreamBlockInfo>>
407 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
408   if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
409     return std::move(Err);
410 
411   BitstreamBlockInfo NewBlockInfo;
412 
413   SmallVector<uint64_t, 64> Record;
414   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
415 
416   // Read all the records for this module.
417   while (true) {
418     Expected<BitstreamEntry> MaybeEntry =
419         advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
420     if (!MaybeEntry)
421       return MaybeEntry.takeError();
422     BitstreamEntry Entry = MaybeEntry.get();
423 
424     switch (Entry.Kind) {
425     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
426     case llvm::BitstreamEntry::Error:
427       return None;
428     case llvm::BitstreamEntry::EndBlock:
429       return std::move(NewBlockInfo);
430     case llvm::BitstreamEntry::Record:
431       // The interesting case.
432       break;
433     }
434 
435     // Read abbrev records, associate them with CurBID.
436     if (Entry.ID == bitc::DEFINE_ABBREV) {
437       if (!CurBlockInfo) return None;
438       if (Error Err = ReadAbbrevRecord())
439         return std::move(Err);
440 
441       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
442       // appropriate BlockInfo.
443       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
444       CurAbbrevs.pop_back();
445       continue;
446     }
447 
448     // Read a record.
449     Record.clear();
450     Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
451     if (!MaybeBlockInfo)
452       return MaybeBlockInfo.takeError();
453     switch (MaybeBlockInfo.get()) {
454     default:
455       break; // Default behavior, ignore unknown content.
456     case bitc::BLOCKINFO_CODE_SETBID:
457       if (Record.size() < 1)
458         return None;
459       CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
460       break;
461     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
462       if (!CurBlockInfo)
463         return None;
464       if (!ReadBlockInfoNames)
465         break; // Ignore name.
466       CurBlockInfo->Name = std::string(Record.begin(), Record.end());
467       break;
468     }
469       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
470         if (!CurBlockInfo) return None;
471         if (!ReadBlockInfoNames)
472           break; // Ignore name.
473         CurBlockInfo->RecordNames.emplace_back(
474             (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
475         break;
476       }
477       }
478   }
479 }
480