xref: /llvm-project/llvm/lib/CGData/CodeGenDataReader.cpp (revision dc85d5263ed5e416cb4ddf405611472f4ef12fd3)
1 //===- CodeGenDataReader.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading codegen data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CGData/CodeGenDataReader.h"
14 #include "llvm/CGData/OutlinedHashTreeRecord.h"
15 #include "llvm/Object/ObjectFile.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 
18 #define DEBUG_TYPE "cg-data-reader"
19 
20 using namespace llvm;
21 
22 namespace llvm {
23 
24 static Expected<std::unique_ptr<MemoryBuffer>>
25 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
26   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
27                                            : FS.getBufferForFile(Filename);
28   if (std::error_code EC = BufferOrErr.getError())
29     return errorCodeToError(EC);
30   return std::move(BufferOrErr.get());
31 }
32 
33 Error CodeGenDataReader::mergeFromObjectFile(
34     const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
35     stable_hash *CombinedHash) {
36   Triple TT = Obj->makeTriple();
37   auto CGOutLineName =
38       getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
39 
40   for (auto &Section : Obj->sections()) {
41     Expected<StringRef> NameOrErr = Section.getName();
42     if (!NameOrErr)
43       return NameOrErr.takeError();
44     Expected<StringRef> ContentsOrErr = Section.getContents();
45     if (!ContentsOrErr)
46       return ContentsOrErr.takeError();
47     auto *Data = reinterpret_cast<const unsigned char *>(ContentsOrErr->data());
48     auto *EndData = Data + ContentsOrErr->size();
49 
50     if (*NameOrErr == CGOutLineName) {
51       if (CombinedHash)
52         *CombinedHash =
53             stable_hash_combine(*CombinedHash, xxh3_64bits(*ContentsOrErr));
54       // In case dealing with an executable that has concatenated cgdata,
55       // we want to merge them into a single cgdata.
56       // Although it's not a typical workflow, we support this scenario.
57       while (Data != EndData) {
58         OutlinedHashTreeRecord LocalOutlineRecord;
59         LocalOutlineRecord.deserialize(Data);
60         GlobalOutlineRecord.merge(LocalOutlineRecord);
61       }
62     }
63     // TODO: Add support for other cgdata sections.
64   }
65 
66   return Error::success();
67 }
68 
69 Error IndexedCodeGenDataReader::read() {
70   using namespace support;
71 
72   // The smallest header with the version 1 is 24 bytes
73   const unsigned MinHeaderSize = 24;
74   if (DataBuffer->getBufferSize() < MinHeaderSize)
75     return error(cgdata_error::bad_header);
76 
77   auto *Start =
78       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
79   auto *End =
80       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
81   if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
82     return E;
83 
84   if (hasOutlinedHashTree()) {
85     const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
86     if (Ptr >= End)
87       return error(cgdata_error::eof);
88     HashTreeRecord.deserialize(Ptr);
89   }
90 
91   return success();
92 }
93 
94 Expected<std::unique_ptr<CodeGenDataReader>>
95 CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
96   // Set up the buffer to read.
97   auto BufferOrError = setupMemoryBuffer(Path, FS);
98   if (Error E = BufferOrError.takeError())
99     return std::move(E);
100   return CodeGenDataReader::create(std::move(BufferOrError.get()));
101 }
102 
103 Expected<std::unique_ptr<CodeGenDataReader>>
104 CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
105   if (Buffer->getBufferSize() == 0)
106     return make_error<CGDataError>(cgdata_error::empty_cgdata);
107 
108   std::unique_ptr<CodeGenDataReader> Reader;
109   // Create the reader.
110   if (IndexedCodeGenDataReader::hasFormat(*Buffer))
111     Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
112   else if (TextCodeGenDataReader::hasFormat(*Buffer))
113     Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
114   else
115     return make_error<CGDataError>(cgdata_error::malformed);
116 
117   // Initialize the reader and return the result.
118   if (Error E = Reader->read())
119     return std::move(E);
120 
121   return std::move(Reader);
122 }
123 
124 bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
125   using namespace support;
126   if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
127     return false;
128 
129   uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
130       DataBuffer.getBufferStart());
131   // Verify that it's magical.
132   return Magic == IndexedCGData::Magic;
133 }
134 
135 bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
136   // Verify that this really looks like plain ASCII text by checking a
137   // 'reasonable' number of characters (up to the magic size).
138   StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
139   return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
140 }
141 Error TextCodeGenDataReader::read() {
142   using namespace support;
143 
144   // Parse the custom header line by line.
145   for (; !Line.is_at_eof(); ++Line) {
146     // Skip empty or whitespace-only lines
147     if (Line->trim().empty())
148       continue;
149 
150     if (!Line->starts_with(":"))
151       break;
152     StringRef Str = Line->drop_front().rtrim();
153     if (Str.equals_insensitive("outlined_hash_tree"))
154       DataKind |= CGDataKind::FunctionOutlinedHashTree;
155     else
156       return error(cgdata_error::bad_header);
157   }
158 
159   // We treat an empty header (that is a comment # only) as a valid header.
160   if (Line.is_at_eof()) {
161     if (DataKind == CGDataKind::Unknown)
162       return Error::success();
163     return error(cgdata_error::bad_header);
164   }
165 
166   // The YAML docs follow after the header.
167   const char *Pos = Line->data();
168   size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
169                 reinterpret_cast<size_t>(Pos);
170   yaml::Input YOS(StringRef(Pos, Size));
171   if (hasOutlinedHashTree())
172     HashTreeRecord.deserializeYAML(YOS);
173 
174   // TODO: Add more yaml cgdata in order
175 
176   return Error::success();
177 }
178 } // end namespace llvm
179