xref: /llvm-project/llvm/lib/CGData/CodeGenDataReader.cpp (revision ffcf3c8688f57acaf6a404a1238673c9d197ba9a)
1 //===- CodeGenDataReader.cpp ----------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains support for reading codegen data.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/CGData/CodeGenDataReader.h"
14 #include "llvm/CGData/OutlinedHashTreeRecord.h"
15 #include "llvm/Object/ObjectFile.h"
16 #include "llvm/Support/MemoryBuffer.h"
17 
18 #define DEBUG_TYPE "cg-data-reader"
19 
20 using namespace llvm;
21 
22 namespace llvm {
23 
24 static Expected<std::unique_ptr<MemoryBuffer>>
25 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
26   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
27                                            : FS.getBufferForFile(Filename);
28   if (std::error_code EC = BufferOrErr.getError())
29     return errorCodeToError(EC);
30   return std::move(BufferOrErr.get());
31 }
32 
33 Error CodeGenDataReader::mergeFromObjectFile(
34     const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
35     StableFunctionMapRecord &GlobalFunctionMapRecord,
36     stable_hash *CombinedHash) {
37   Triple TT = Obj->makeTriple();
38   auto CGOutlineName =
39       getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
40   auto CGMergeName =
41       getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
42 
43   auto processSectionContents = [&](const StringRef &Name,
44                                     const StringRef &Contents) {
45     if (Name != CGOutlineName && Name != CGMergeName)
46       return;
47     if (CombinedHash)
48       *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
49     auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
50     auto *EndData = Data + Contents.size();
51     // In case dealing with an executable that has concatenated cgdata,
52     // we want to merge them into a single cgdata.
53     // Although it's not a typical workflow, we support this scenario
54     // by looping over all data in the sections.
55     if (Name == CGOutlineName) {
56       while (Data != EndData) {
57         OutlinedHashTreeRecord LocalOutlineRecord;
58         LocalOutlineRecord.deserialize(Data);
59         GlobalOutlineRecord.merge(LocalOutlineRecord);
60       }
61     } else if (Name == CGMergeName) {
62       while (Data != EndData) {
63         StableFunctionMapRecord LocalFunctionMapRecord;
64         LocalFunctionMapRecord.deserialize(Data);
65         GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
66       }
67     }
68   };
69 
70   for (auto &Section : Obj->sections()) {
71     Expected<StringRef> NameOrErr = Section.getName();
72     if (!NameOrErr)
73       return NameOrErr.takeError();
74     Expected<StringRef> ContentsOrErr = Section.getContents();
75     if (!ContentsOrErr)
76       return ContentsOrErr.takeError();
77     processSectionContents(*NameOrErr, *ContentsOrErr);
78   }
79 
80   return Error::success();
81 }
82 
83 Error IndexedCodeGenDataReader::read() {
84   using namespace support;
85 
86   // The smallest header with the version 1 is 24 bytes.
87   // Do not update this value even with the new version of the header.
88   const unsigned MinHeaderSize = 24;
89   if (DataBuffer->getBufferSize() < MinHeaderSize)
90     return error(cgdata_error::bad_header);
91 
92   auto *Start =
93       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
94   auto *End =
95       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
96   if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
97     return E;
98 
99   if (hasOutlinedHashTree()) {
100     const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
101     if (Ptr >= End)
102       return error(cgdata_error::eof);
103     HashTreeRecord.deserialize(Ptr);
104   }
105   if (hasStableFunctionMap()) {
106     const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
107     if (Ptr >= End)
108       return error(cgdata_error::eof);
109     FunctionMapRecord.deserialize(Ptr);
110   }
111 
112   return success();
113 }
114 
115 Expected<std::unique_ptr<CodeGenDataReader>>
116 CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
117   // Set up the buffer to read.
118   auto BufferOrError = setupMemoryBuffer(Path, FS);
119   if (Error E = BufferOrError.takeError())
120     return std::move(E);
121   return CodeGenDataReader::create(std::move(BufferOrError.get()));
122 }
123 
124 Expected<std::unique_ptr<CodeGenDataReader>>
125 CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
126   if (Buffer->getBufferSize() == 0)
127     return make_error<CGDataError>(cgdata_error::empty_cgdata);
128 
129   std::unique_ptr<CodeGenDataReader> Reader;
130   // Create the reader.
131   if (IndexedCodeGenDataReader::hasFormat(*Buffer))
132     Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
133   else if (TextCodeGenDataReader::hasFormat(*Buffer))
134     Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
135   else
136     return make_error<CGDataError>(cgdata_error::malformed);
137 
138   // Initialize the reader and return the result.
139   if (Error E = Reader->read())
140     return std::move(E);
141 
142   return std::move(Reader);
143 }
144 
145 bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
146   using namespace support;
147   if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
148     return false;
149 
150   uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
151       DataBuffer.getBufferStart());
152   // Verify that it's magical.
153   return Magic == IndexedCGData::Magic;
154 }
155 
156 bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
157   // Verify that this really looks like plain ASCII text by checking a
158   // 'reasonable' number of characters (up to the magic size).
159   StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
160   return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
161 }
162 Error TextCodeGenDataReader::read() {
163   using namespace support;
164 
165   // Parse the custom header line by line.
166   for (; !Line.is_at_eof(); ++Line) {
167     // Skip empty or whitespace-only lines
168     if (Line->trim().empty())
169       continue;
170 
171     if (!Line->starts_with(":"))
172       break;
173     StringRef Str = Line->drop_front().rtrim();
174     if (Str.equals_insensitive("outlined_hash_tree"))
175       DataKind |= CGDataKind::FunctionOutlinedHashTree;
176     else if (Str.equals_insensitive("stable_function_map"))
177       DataKind |= CGDataKind::StableFunctionMergingMap;
178     else
179       return error(cgdata_error::bad_header);
180   }
181 
182   // We treat an empty header (that is a comment # only) as a valid header.
183   if (Line.is_at_eof()) {
184     if (DataKind == CGDataKind::Unknown)
185       return Error::success();
186     return error(cgdata_error::bad_header);
187   }
188 
189   // The YAML docs follow after the header.
190   const char *Pos = Line->data();
191   size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
192                 reinterpret_cast<size_t>(Pos);
193   yaml::Input YOS(StringRef(Pos, Size));
194   if (hasOutlinedHashTree())
195     HashTreeRecord.deserializeYAML(YOS);
196   if (hasStableFunctionMap())
197     FunctionMapRecord.deserializeYAML(YOS);
198 
199   return Error::success();
200 }
201 } // end namespace llvm
202