xref: /llvm-project/llvm/lib/Bitstream/Reader/BitstreamReader.cpp (revision c00ef03d73773d015d4a19f9d0bdcb38dd1bf5e0)
1 //===- BitstreamReader.cpp - BitstreamReader implementation ---------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "llvm/Bitstream/BitstreamReader.h"
10 #include "llvm/ADT/StringRef.h"
11 #include <cassert>
12 #include <string>
13 
14 using namespace llvm;
15 
16 //===----------------------------------------------------------------------===//
17 //  BitstreamCursor implementation
18 //===----------------------------------------------------------------------===//
19 //
20 static Error error(const char *Message) {
21   return createStringError(std::errc::illegal_byte_sequence, Message);
22 }
23 
24 /// Having read the ENTER_SUBBLOCK abbrevid, enter the block.
25 Error BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) {
26   // Save the current block's state on BlockScope.
27   BlockScope.push_back(Block(CurCodeSize));
28   BlockScope.back().PrevAbbrevs.swap(CurAbbrevs);
29 
30   // Add the abbrevs specific to this block to the CurAbbrevs list.
31   if (BlockInfo) {
32     if (const BitstreamBlockInfo::BlockInfo *Info =
33             BlockInfo->getBlockInfo(BlockID)) {
34       llvm::append_range(CurAbbrevs, Info->Abbrevs);
35     }
36   }
37 
38   // Get the codesize of this block.
39   Expected<uint32_t> MaybeVBR = ReadVBR(bitc::CodeLenWidth);
40   if (!MaybeVBR)
41     return MaybeVBR.takeError();
42   CurCodeSize = MaybeVBR.get();
43 
44   if (CurCodeSize > MaxChunkSize)
45     return llvm::createStringError(
46         std::errc::illegal_byte_sequence,
47         "can't read more than %zu at a time, trying to read %u", +MaxChunkSize,
48         CurCodeSize);
49 
50   SkipToFourByteBoundary();
51   Expected<word_t> MaybeNum = Read(bitc::BlockSizeWidth);
52   if (!MaybeNum)
53     return MaybeNum.takeError();
54   word_t NumWords = MaybeNum.get();
55   if (NumWordsP)
56     *NumWordsP = NumWords;
57 
58   if (CurCodeSize == 0)
59     return llvm::createStringError(
60         std::errc::illegal_byte_sequence,
61         "can't enter sub-block: current code size is 0");
62   if (AtEndOfStream())
63     return llvm::createStringError(
64         std::errc::illegal_byte_sequence,
65         "can't enter sub block: already at end of stream");
66 
67   return Error::success();
68 }
69 
70 static Expected<uint64_t> readAbbreviatedField(BitstreamCursor &Cursor,
71                                                const BitCodeAbbrevOp &Op) {
72   assert(!Op.isLiteral() && "Not to be used with literals!");
73 
74   // Decode the value as we are commanded.
75   switch (Op.getEncoding()) {
76   case BitCodeAbbrevOp::Array:
77   case BitCodeAbbrevOp::Blob:
78     llvm_unreachable("Should not reach here");
79   case BitCodeAbbrevOp::Fixed:
80     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
81     return Cursor.Read((unsigned)Op.getEncodingData());
82   case BitCodeAbbrevOp::VBR:
83     assert((unsigned)Op.getEncodingData() <= Cursor.MaxChunkSize);
84     return Cursor.ReadVBR64((unsigned)Op.getEncodingData());
85   case BitCodeAbbrevOp::Char6:
86     if (Expected<unsigned> Res = Cursor.Read(6))
87       return BitCodeAbbrevOp::DecodeChar6(Res.get());
88     else
89       return Res.takeError();
90   }
91   llvm_unreachable("invalid abbreviation encoding");
92 }
93 
94 /// skipRecord - Read the current record and discard it.
95 Expected<unsigned> BitstreamCursor::skipRecord(unsigned AbbrevID) {
96   // Skip unabbreviated records by reading past their entries.
97   if (AbbrevID == bitc::UNABBREV_RECORD) {
98     Expected<uint32_t> MaybeCode = ReadVBR(6);
99     if (!MaybeCode)
100       return MaybeCode.takeError();
101     unsigned Code = MaybeCode.get();
102     Expected<uint32_t> MaybeVBR = ReadVBR(6);
103     if (!MaybeVBR)
104       return MaybeVBR.get();
105     unsigned NumElts = MaybeVBR.get();
106     for (unsigned i = 0; i != NumElts; ++i)
107       if (Expected<uint64_t> Res = ReadVBR64(6))
108         ; // Skip!
109       else
110         return Res.takeError();
111     return Code;
112   }
113 
114   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
115   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
116   unsigned Code;
117   if (CodeOp.isLiteral())
118     Code = CodeOp.getLiteralValue();
119   else {
120     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
121         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
122       return llvm::createStringError(
123           std::errc::illegal_byte_sequence,
124           "Abbreviation starts with an Array or a Blob");
125     Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp);
126     if (!MaybeCode)
127       return MaybeCode.takeError();
128     Code = MaybeCode.get();
129   }
130 
131   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i < e; ++i) {
132     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
133     if (Op.isLiteral())
134       continue;
135 
136     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
137         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
138       if (Expected<uint64_t> MaybeField = readAbbreviatedField(*this, Op))
139         continue;
140       else
141         return MaybeField.takeError();
142     }
143 
144     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
145       // Array case.  Read the number of elements as a vbr6.
146       Expected<uint32_t> MaybeNum = ReadVBR(6);
147       if (!MaybeNum)
148         return MaybeNum.takeError();
149       unsigned NumElts = MaybeNum.get();
150 
151       // Get the element encoding.
152       assert(i+2 == e && "array op not second to last?");
153       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
154 
155       // Read all the elements.
156       // Decode the value as we are commanded.
157       switch (EltEnc.getEncoding()) {
158       default:
159         return error("Array element type can't be an Array or a Blob");
160       case BitCodeAbbrevOp::Fixed:
161         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
162         if (Error Err =
163                 JumpToBit(GetCurrentBitNo() + static_cast<uint64_t>(NumElts) *
164                                                   EltEnc.getEncodingData()))
165           return std::move(Err);
166         break;
167       case BitCodeAbbrevOp::VBR:
168         assert((unsigned)EltEnc.getEncodingData() <= MaxChunkSize);
169         for (; NumElts; --NumElts)
170           if (Expected<uint64_t> Res =
171                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
172             ; // Skip!
173           else
174             return Res.takeError();
175         break;
176       case BitCodeAbbrevOp::Char6:
177         if (Error Err = JumpToBit(GetCurrentBitNo() + NumElts * 6))
178           return std::move(Err);
179         break;
180       }
181       continue;
182     }
183 
184     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
185     // Blob case.  Read the number of bytes as a vbr6.
186     Expected<uint32_t> MaybeNum = ReadVBR(6);
187     if (!MaybeNum)
188       return MaybeNum.takeError();
189     unsigned NumElts = MaybeNum.get();
190     SkipToFourByteBoundary();  // 32-bit alignment
191 
192     // Figure out where the end of this blob will be including tail padding.
193     const size_t NewEnd = GetCurrentBitNo() + alignTo(NumElts, 4) * 8;
194 
195     // If this would read off the end of the bitcode file, just set the
196     // record to empty and return.
197     if (!canSkipToPos(NewEnd/8)) {
198       skipToEnd();
199       break;
200     }
201 
202     // Skip over the blob.
203     if (Error Err = JumpToBit(NewEnd))
204       return std::move(Err);
205   }
206   return Code;
207 }
208 
209 Expected<unsigned> BitstreamCursor::readRecord(unsigned AbbrevID,
210                                                SmallVectorImpl<uint64_t> &Vals,
211                                                StringRef *Blob) {
212   if (AbbrevID == bitc::UNABBREV_RECORD) {
213     Expected<uint32_t> MaybeCode = ReadVBR(6);
214     if (!MaybeCode)
215       return MaybeCode.takeError();
216     uint32_t Code = MaybeCode.get();
217     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
218     if (!MaybeNumElts)
219       return MaybeNumElts.takeError();
220     uint32_t NumElts = MaybeNumElts.get();
221     Vals.reserve(Vals.size() + NumElts);
222 
223     for (unsigned i = 0; i != NumElts; ++i)
224       if (Expected<uint64_t> MaybeVal = ReadVBR64(6))
225         Vals.push_back(MaybeVal.get());
226       else
227         return MaybeVal.takeError();
228     return Code;
229   }
230 
231   const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID);
232 
233   // Read the record code first.
234   assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?");
235   const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0);
236   unsigned Code;
237   if (CodeOp.isLiteral())
238     Code = CodeOp.getLiteralValue();
239   else {
240     if (CodeOp.getEncoding() == BitCodeAbbrevOp::Array ||
241         CodeOp.getEncoding() == BitCodeAbbrevOp::Blob)
242       return error("Abbreviation starts with an Array or a Blob");
243     if (Expected<uint64_t> MaybeCode = readAbbreviatedField(*this, CodeOp))
244       Code = MaybeCode.get();
245     else
246       return MaybeCode.takeError();
247   }
248 
249   for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) {
250     const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i);
251     if (Op.isLiteral()) {
252       Vals.push_back(Op.getLiteralValue());
253       continue;
254     }
255 
256     if (Op.getEncoding() != BitCodeAbbrevOp::Array &&
257         Op.getEncoding() != BitCodeAbbrevOp::Blob) {
258       if (Expected<uint64_t> MaybeVal = readAbbreviatedField(*this, Op))
259         Vals.push_back(MaybeVal.get());
260       else
261         return MaybeVal.takeError();
262       continue;
263     }
264 
265     if (Op.getEncoding() == BitCodeAbbrevOp::Array) {
266       // Array case.  Read the number of elements as a vbr6.
267       Expected<uint32_t> MaybeNumElts = ReadVBR(6);
268       if (!MaybeNumElts)
269         return MaybeNumElts.takeError();
270       uint32_t NumElts = MaybeNumElts.get();
271       Vals.reserve(Vals.size() + NumElts);
272 
273       // Get the element encoding.
274       if (i + 2 != e)
275         return error("Array op not second to last");
276       const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i);
277       if (!EltEnc.isEncoding())
278         return error(
279             "Array element type has to be an encoding of a type");
280 
281       // Read all the elements.
282       switch (EltEnc.getEncoding()) {
283       default:
284         return error("Array element type can't be an Array or a Blob");
285       case BitCodeAbbrevOp::Fixed:
286         for (; NumElts; --NumElts)
287           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal =
288                   Read((unsigned)EltEnc.getEncodingData()))
289             Vals.push_back(MaybeVal.get());
290           else
291             return MaybeVal.takeError();
292         break;
293       case BitCodeAbbrevOp::VBR:
294         for (; NumElts; --NumElts)
295           if (Expected<uint64_t> MaybeVal =
296                   ReadVBR64((unsigned)EltEnc.getEncodingData()))
297             Vals.push_back(MaybeVal.get());
298           else
299             return MaybeVal.takeError();
300         break;
301       case BitCodeAbbrevOp::Char6:
302         for (; NumElts; --NumElts)
303           if (Expected<SimpleBitstreamCursor::word_t> MaybeVal = Read(6))
304             Vals.push_back(BitCodeAbbrevOp::DecodeChar6(MaybeVal.get()));
305           else
306             return MaybeVal.takeError();
307       }
308       continue;
309     }
310 
311     assert(Op.getEncoding() == BitCodeAbbrevOp::Blob);
312     // Blob case.  Read the number of bytes as a vbr6.
313     Expected<uint32_t> MaybeNumElts = ReadVBR(6);
314     if (!MaybeNumElts)
315       return MaybeNumElts.takeError();
316     uint32_t NumElts = MaybeNumElts.get();
317     SkipToFourByteBoundary();  // 32-bit alignment
318 
319     // Figure out where the end of this blob will be including tail padding.
320     size_t CurBitPos = GetCurrentBitNo();
321     const size_t NewEnd = CurBitPos + alignTo(NumElts, 4) * 8;
322 
323     // If this would read off the end of the bitcode file, just set the
324     // record to empty and return.
325     if (!canSkipToPos(NewEnd/8)) {
326       Vals.append(NumElts, 0);
327       skipToEnd();
328       break;
329     }
330 
331     // Otherwise, inform the streamer that we need these bytes in memory.  Skip
332     // over tail padding first, in case jumping to NewEnd invalidates the Blob
333     // pointer.
334     if (Error Err = JumpToBit(NewEnd))
335       return std::move(Err);
336     const char *Ptr = (const char *)getPointerToBit(CurBitPos, NumElts);
337 
338     // If we can return a reference to the data, do so to avoid copying it.
339     if (Blob) {
340       *Blob = StringRef(Ptr, NumElts);
341     } else {
342       // Otherwise, unpack into Vals with zero extension.
343       auto *UPtr = reinterpret_cast<const unsigned char *>(Ptr);
344       Vals.append(UPtr, UPtr + NumElts);
345     }
346   }
347 
348   return Code;
349 }
350 
351 Error BitstreamCursor::ReadAbbrevRecord() {
352   auto Abbv = std::make_shared<BitCodeAbbrev>();
353   Expected<uint32_t> MaybeNumOpInfo = ReadVBR(5);
354   if (!MaybeNumOpInfo)
355     return MaybeNumOpInfo.takeError();
356   unsigned NumOpInfo = MaybeNumOpInfo.get();
357   for (unsigned i = 0; i != NumOpInfo; ++i) {
358     Expected<word_t> MaybeIsLiteral = Read(1);
359     if (!MaybeIsLiteral)
360       return MaybeIsLiteral.takeError();
361     bool IsLiteral = MaybeIsLiteral.get();
362     if (IsLiteral) {
363       Expected<uint64_t> MaybeOp = ReadVBR64(8);
364       if (!MaybeOp)
365         return MaybeOp.takeError();
366       Abbv->Add(BitCodeAbbrevOp(MaybeOp.get()));
367       continue;
368     }
369 
370     Expected<word_t> MaybeEncoding = Read(3);
371     if (!MaybeEncoding)
372       return MaybeEncoding.takeError();
373     if (!BitCodeAbbrevOp::isValidEncoding(MaybeEncoding.get()))
374       return error("Invalid encoding");
375 
376     BitCodeAbbrevOp::Encoding E =
377         (BitCodeAbbrevOp::Encoding)MaybeEncoding.get();
378     if (BitCodeAbbrevOp::hasEncodingData(E)) {
379       Expected<uint64_t> MaybeData = ReadVBR64(5);
380       if (!MaybeData)
381         return MaybeData.takeError();
382       uint64_t Data = MaybeData.get();
383 
384       // As a special case, handle fixed(0) (i.e., a fixed field with zero bits)
385       // and vbr(0) as a literal zero.  This is decoded the same way, and avoids
386       // a slow path in Read() to have to handle reading zero bits.
387       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
388           Data == 0) {
389         Abbv->Add(BitCodeAbbrevOp(0));
390         continue;
391       }
392 
393       if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) &&
394           Data > MaxChunkSize)
395         return error("Fixed or VBR abbrev record with size > MaxChunkData");
396 
397       Abbv->Add(BitCodeAbbrevOp(E, Data));
398     } else
399       Abbv->Add(BitCodeAbbrevOp(E));
400   }
401 
402   if (Abbv->getNumOperandInfos() == 0)
403     return error("Abbrev record with no operands");
404   CurAbbrevs.push_back(std::move(Abbv));
405 
406   return Error::success();
407 }
408 
409 Expected<Optional<BitstreamBlockInfo>>
410 BitstreamCursor::ReadBlockInfoBlock(bool ReadBlockInfoNames) {
411   if (llvm::Error Err = EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID))
412     return std::move(Err);
413 
414   BitstreamBlockInfo NewBlockInfo;
415 
416   SmallVector<uint64_t, 64> Record;
417   BitstreamBlockInfo::BlockInfo *CurBlockInfo = nullptr;
418 
419   // Read all the records for this module.
420   while (true) {
421     Expected<BitstreamEntry> MaybeEntry =
422         advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs);
423     if (!MaybeEntry)
424       return MaybeEntry.takeError();
425     BitstreamEntry Entry = MaybeEntry.get();
426 
427     switch (Entry.Kind) {
428     case llvm::BitstreamEntry::SubBlock: // Handled for us already.
429     case llvm::BitstreamEntry::Error:
430       return None;
431     case llvm::BitstreamEntry::EndBlock:
432       return std::move(NewBlockInfo);
433     case llvm::BitstreamEntry::Record:
434       // The interesting case.
435       break;
436     }
437 
438     // Read abbrev records, associate them with CurBID.
439     if (Entry.ID == bitc::DEFINE_ABBREV) {
440       if (!CurBlockInfo) return None;
441       if (Error Err = ReadAbbrevRecord())
442         return std::move(Err);
443 
444       // ReadAbbrevRecord installs the abbrev in CurAbbrevs.  Move it to the
445       // appropriate BlockInfo.
446       CurBlockInfo->Abbrevs.push_back(std::move(CurAbbrevs.back()));
447       CurAbbrevs.pop_back();
448       continue;
449     }
450 
451     // Read a record.
452     Record.clear();
453     Expected<unsigned> MaybeBlockInfo = readRecord(Entry.ID, Record);
454     if (!MaybeBlockInfo)
455       return MaybeBlockInfo.takeError();
456     switch (MaybeBlockInfo.get()) {
457     default:
458       break; // Default behavior, ignore unknown content.
459     case bitc::BLOCKINFO_CODE_SETBID:
460       if (Record.size() < 1)
461         return None;
462       CurBlockInfo = &NewBlockInfo.getOrCreateBlockInfo((unsigned)Record[0]);
463       break;
464     case bitc::BLOCKINFO_CODE_BLOCKNAME: {
465       if (!CurBlockInfo)
466         return None;
467       if (!ReadBlockInfoNames)
468         break; // Ignore name.
469       CurBlockInfo->Name = std::string(Record.begin(), Record.end());
470       break;
471     }
472       case bitc::BLOCKINFO_CODE_SETRECORDNAME: {
473         if (!CurBlockInfo) return None;
474         if (!ReadBlockInfoNames)
475           break; // Ignore name.
476         CurBlockInfo->RecordNames.emplace_back(
477             (unsigned)Record[0], std::string(Record.begin() + 1, Record.end()));
478         break;
479       }
480       }
481   }
482 }
483