xref: /llvm-project/llvm/lib/CGData/CodeGenDataReader.cpp (revision ffcf3c8688f57acaf6a404a1238673c9d197ba9a)
19bb55568SKyungwoo Lee //===- CodeGenDataReader.cpp ----------------------------------------------===//
29bb55568SKyungwoo Lee //
39bb55568SKyungwoo Lee // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
49bb55568SKyungwoo Lee // See https://llvm.org/LICENSE.txt for license information.
59bb55568SKyungwoo Lee // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
69bb55568SKyungwoo Lee //
79bb55568SKyungwoo Lee //===----------------------------------------------------------------------===//
89bb55568SKyungwoo Lee //
99bb55568SKyungwoo Lee // This file contains support for reading codegen data.
109bb55568SKyungwoo Lee //
119bb55568SKyungwoo Lee //===----------------------------------------------------------------------===//
129bb55568SKyungwoo Lee 
139bb55568SKyungwoo Lee #include "llvm/CGData/CodeGenDataReader.h"
149bb55568SKyungwoo Lee #include "llvm/CGData/OutlinedHashTreeRecord.h"
159bb55568SKyungwoo Lee #include "llvm/Object/ObjectFile.h"
169bb55568SKyungwoo Lee #include "llvm/Support/MemoryBuffer.h"
179bb55568SKyungwoo Lee 
189bb55568SKyungwoo Lee #define DEBUG_TYPE "cg-data-reader"
199bb55568SKyungwoo Lee 
209bb55568SKyungwoo Lee using namespace llvm;
219bb55568SKyungwoo Lee 
229bb55568SKyungwoo Lee namespace llvm {
239bb55568SKyungwoo Lee 
249bb55568SKyungwoo Lee static Expected<std::unique_ptr<MemoryBuffer>>
259bb55568SKyungwoo Lee setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) {
269bb55568SKyungwoo Lee   auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN()
279bb55568SKyungwoo Lee                                            : FS.getBufferForFile(Filename);
289bb55568SKyungwoo Lee   if (std::error_code EC = BufferOrErr.getError())
299bb55568SKyungwoo Lee     return errorCodeToError(EC);
309bb55568SKyungwoo Lee   return std::move(BufferOrErr.get());
319bb55568SKyungwoo Lee }
329bb55568SKyungwoo Lee 
339bb55568SKyungwoo Lee Error CodeGenDataReader::mergeFromObjectFile(
34dc85d526SKyungwoo Lee     const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord,
35*ffcf3c86SKyungwoo Lee     StableFunctionMapRecord &GlobalFunctionMapRecord,
36dc85d526SKyungwoo Lee     stable_hash *CombinedHash) {
379bb55568SKyungwoo Lee   Triple TT = Obj->makeTriple();
38*ffcf3c86SKyungwoo Lee   auto CGOutlineName =
399bb55568SKyungwoo Lee       getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false);
40*ffcf3c86SKyungwoo Lee   auto CGMergeName =
41*ffcf3c86SKyungwoo Lee       getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false);
42*ffcf3c86SKyungwoo Lee 
43*ffcf3c86SKyungwoo Lee   auto processSectionContents = [&](const StringRef &Name,
44*ffcf3c86SKyungwoo Lee                                     const StringRef &Contents) {
45*ffcf3c86SKyungwoo Lee     if (Name != CGOutlineName && Name != CGMergeName)
46*ffcf3c86SKyungwoo Lee       return;
47*ffcf3c86SKyungwoo Lee     if (CombinedHash)
48*ffcf3c86SKyungwoo Lee       *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents));
49*ffcf3c86SKyungwoo Lee     auto *Data = reinterpret_cast<const unsigned char *>(Contents.data());
50*ffcf3c86SKyungwoo Lee     auto *EndData = Data + Contents.size();
51*ffcf3c86SKyungwoo Lee     // In case dealing with an executable that has concatenated cgdata,
52*ffcf3c86SKyungwoo Lee     // we want to merge them into a single cgdata.
53*ffcf3c86SKyungwoo Lee     // Although it's not a typical workflow, we support this scenario
54*ffcf3c86SKyungwoo Lee     // by looping over all data in the sections.
55*ffcf3c86SKyungwoo Lee     if (Name == CGOutlineName) {
56*ffcf3c86SKyungwoo Lee       while (Data != EndData) {
57*ffcf3c86SKyungwoo Lee         OutlinedHashTreeRecord LocalOutlineRecord;
58*ffcf3c86SKyungwoo Lee         LocalOutlineRecord.deserialize(Data);
59*ffcf3c86SKyungwoo Lee         GlobalOutlineRecord.merge(LocalOutlineRecord);
60*ffcf3c86SKyungwoo Lee       }
61*ffcf3c86SKyungwoo Lee     } else if (Name == CGMergeName) {
62*ffcf3c86SKyungwoo Lee       while (Data != EndData) {
63*ffcf3c86SKyungwoo Lee         StableFunctionMapRecord LocalFunctionMapRecord;
64*ffcf3c86SKyungwoo Lee         LocalFunctionMapRecord.deserialize(Data);
65*ffcf3c86SKyungwoo Lee         GlobalFunctionMapRecord.merge(LocalFunctionMapRecord);
66*ffcf3c86SKyungwoo Lee       }
67*ffcf3c86SKyungwoo Lee     }
68*ffcf3c86SKyungwoo Lee   };
699bb55568SKyungwoo Lee 
709bb55568SKyungwoo Lee   for (auto &Section : Obj->sections()) {
719bb55568SKyungwoo Lee     Expected<StringRef> NameOrErr = Section.getName();
729bb55568SKyungwoo Lee     if (!NameOrErr)
739bb55568SKyungwoo Lee       return NameOrErr.takeError();
749bb55568SKyungwoo Lee     Expected<StringRef> ContentsOrErr = Section.getContents();
759bb55568SKyungwoo Lee     if (!ContentsOrErr)
769bb55568SKyungwoo Lee       return ContentsOrErr.takeError();
77*ffcf3c86SKyungwoo Lee     processSectionContents(*NameOrErr, *ContentsOrErr);
789bb55568SKyungwoo Lee   }
799bb55568SKyungwoo Lee 
809bb55568SKyungwoo Lee   return Error::success();
819bb55568SKyungwoo Lee }
829bb55568SKyungwoo Lee 
839bb55568SKyungwoo Lee Error IndexedCodeGenDataReader::read() {
849bb55568SKyungwoo Lee   using namespace support;
859bb55568SKyungwoo Lee 
86*ffcf3c86SKyungwoo Lee   // The smallest header with the version 1 is 24 bytes.
87*ffcf3c86SKyungwoo Lee   // Do not update this value even with the new version of the header.
889bb55568SKyungwoo Lee   const unsigned MinHeaderSize = 24;
899bb55568SKyungwoo Lee   if (DataBuffer->getBufferSize() < MinHeaderSize)
909bb55568SKyungwoo Lee     return error(cgdata_error::bad_header);
919bb55568SKyungwoo Lee 
929bb55568SKyungwoo Lee   auto *Start =
939bb55568SKyungwoo Lee       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart());
949bb55568SKyungwoo Lee   auto *End =
959bb55568SKyungwoo Lee       reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd());
969bb55568SKyungwoo Lee   if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header))
979bb55568SKyungwoo Lee     return E;
989bb55568SKyungwoo Lee 
999bb55568SKyungwoo Lee   if (hasOutlinedHashTree()) {
1009bb55568SKyungwoo Lee     const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset;
1019bb55568SKyungwoo Lee     if (Ptr >= End)
1029bb55568SKyungwoo Lee       return error(cgdata_error::eof);
1039bb55568SKyungwoo Lee     HashTreeRecord.deserialize(Ptr);
1049bb55568SKyungwoo Lee   }
105*ffcf3c86SKyungwoo Lee   if (hasStableFunctionMap()) {
106*ffcf3c86SKyungwoo Lee     const unsigned char *Ptr = Start + Header.StableFunctionMapOffset;
107*ffcf3c86SKyungwoo Lee     if (Ptr >= End)
108*ffcf3c86SKyungwoo Lee       return error(cgdata_error::eof);
109*ffcf3c86SKyungwoo Lee     FunctionMapRecord.deserialize(Ptr);
110*ffcf3c86SKyungwoo Lee   }
1119bb55568SKyungwoo Lee 
1129bb55568SKyungwoo Lee   return success();
1139bb55568SKyungwoo Lee }
1149bb55568SKyungwoo Lee 
1159bb55568SKyungwoo Lee Expected<std::unique_ptr<CodeGenDataReader>>
1169bb55568SKyungwoo Lee CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) {
1179bb55568SKyungwoo Lee   // Set up the buffer to read.
1189bb55568SKyungwoo Lee   auto BufferOrError = setupMemoryBuffer(Path, FS);
1199bb55568SKyungwoo Lee   if (Error E = BufferOrError.takeError())
1209bb55568SKyungwoo Lee     return std::move(E);
1219bb55568SKyungwoo Lee   return CodeGenDataReader::create(std::move(BufferOrError.get()));
1229bb55568SKyungwoo Lee }
1239bb55568SKyungwoo Lee 
1249bb55568SKyungwoo Lee Expected<std::unique_ptr<CodeGenDataReader>>
1259bb55568SKyungwoo Lee CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) {
1269bb55568SKyungwoo Lee   if (Buffer->getBufferSize() == 0)
1279bb55568SKyungwoo Lee     return make_error<CGDataError>(cgdata_error::empty_cgdata);
1289bb55568SKyungwoo Lee 
1299bb55568SKyungwoo Lee   std::unique_ptr<CodeGenDataReader> Reader;
1309bb55568SKyungwoo Lee   // Create the reader.
1319bb55568SKyungwoo Lee   if (IndexedCodeGenDataReader::hasFormat(*Buffer))
1329bb55568SKyungwoo Lee     Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer));
1339bb55568SKyungwoo Lee   else if (TextCodeGenDataReader::hasFormat(*Buffer))
1349bb55568SKyungwoo Lee     Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer));
1359bb55568SKyungwoo Lee   else
1369bb55568SKyungwoo Lee     return make_error<CGDataError>(cgdata_error::malformed);
1379bb55568SKyungwoo Lee 
1389bb55568SKyungwoo Lee   // Initialize the reader and return the result.
1399bb55568SKyungwoo Lee   if (Error E = Reader->read())
1409bb55568SKyungwoo Lee     return std::move(E);
1419bb55568SKyungwoo Lee 
1429bb55568SKyungwoo Lee   return std::move(Reader);
1439bb55568SKyungwoo Lee }
1449bb55568SKyungwoo Lee 
1459bb55568SKyungwoo Lee bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) {
1469bb55568SKyungwoo Lee   using namespace support;
1479bb55568SKyungwoo Lee   if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic))
1489bb55568SKyungwoo Lee     return false;
1499bb55568SKyungwoo Lee 
1509bb55568SKyungwoo Lee   uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>(
1519bb55568SKyungwoo Lee       DataBuffer.getBufferStart());
1529bb55568SKyungwoo Lee   // Verify that it's magical.
1539bb55568SKyungwoo Lee   return Magic == IndexedCGData::Magic;
1549bb55568SKyungwoo Lee }
1559bb55568SKyungwoo Lee 
1569bb55568SKyungwoo Lee bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) {
1579bb55568SKyungwoo Lee   // Verify that this really looks like plain ASCII text by checking a
1589bb55568SKyungwoo Lee   // 'reasonable' number of characters (up to the magic size).
1599bb55568SKyungwoo Lee   StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t));
1609bb55568SKyungwoo Lee   return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); });
1619bb55568SKyungwoo Lee }
1629bb55568SKyungwoo Lee Error TextCodeGenDataReader::read() {
1639bb55568SKyungwoo Lee   using namespace support;
1649bb55568SKyungwoo Lee 
1659bb55568SKyungwoo Lee   // Parse the custom header line by line.
1669bb55568SKyungwoo Lee   for (; !Line.is_at_eof(); ++Line) {
1679bb55568SKyungwoo Lee     // Skip empty or whitespace-only lines
1689bb55568SKyungwoo Lee     if (Line->trim().empty())
1699bb55568SKyungwoo Lee       continue;
1709bb55568SKyungwoo Lee 
1719bb55568SKyungwoo Lee     if (!Line->starts_with(":"))
1729bb55568SKyungwoo Lee       break;
1739bb55568SKyungwoo Lee     StringRef Str = Line->drop_front().rtrim();
1749bb55568SKyungwoo Lee     if (Str.equals_insensitive("outlined_hash_tree"))
1759bb55568SKyungwoo Lee       DataKind |= CGDataKind::FunctionOutlinedHashTree;
176*ffcf3c86SKyungwoo Lee     else if (Str.equals_insensitive("stable_function_map"))
177*ffcf3c86SKyungwoo Lee       DataKind |= CGDataKind::StableFunctionMergingMap;
1789bb55568SKyungwoo Lee     else
1799bb55568SKyungwoo Lee       return error(cgdata_error::bad_header);
1809bb55568SKyungwoo Lee   }
1819bb55568SKyungwoo Lee 
1829bb55568SKyungwoo Lee   // We treat an empty header (that is a comment # only) as a valid header.
1839bb55568SKyungwoo Lee   if (Line.is_at_eof()) {
1849bb55568SKyungwoo Lee     if (DataKind == CGDataKind::Unknown)
1859bb55568SKyungwoo Lee       return Error::success();
1869bb55568SKyungwoo Lee     return error(cgdata_error::bad_header);
1879bb55568SKyungwoo Lee   }
1889bb55568SKyungwoo Lee 
1899bb55568SKyungwoo Lee   // The YAML docs follow after the header.
1909bb55568SKyungwoo Lee   const char *Pos = Line->data();
1919bb55568SKyungwoo Lee   size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) -
1929bb55568SKyungwoo Lee                 reinterpret_cast<size_t>(Pos);
1939bb55568SKyungwoo Lee   yaml::Input YOS(StringRef(Pos, Size));
1949bb55568SKyungwoo Lee   if (hasOutlinedHashTree())
1959bb55568SKyungwoo Lee     HashTreeRecord.deserializeYAML(YOS);
196*ffcf3c86SKyungwoo Lee   if (hasStableFunctionMap())
197*ffcf3c86SKyungwoo Lee     FunctionMapRecord.deserializeYAML(YOS);
1989bb55568SKyungwoo Lee 
1999bb55568SKyungwoo Lee   return Error::success();
2009bb55568SKyungwoo Lee }
2019bb55568SKyungwoo Lee } // end namespace llvm
202