19bb55568SKyungwoo Lee //===- CodeGenDataReader.cpp ----------------------------------------------===// 29bb55568SKyungwoo Lee // 39bb55568SKyungwoo Lee // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 49bb55568SKyungwoo Lee // See https://llvm.org/LICENSE.txt for license information. 59bb55568SKyungwoo Lee // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 69bb55568SKyungwoo Lee // 79bb55568SKyungwoo Lee //===----------------------------------------------------------------------===// 89bb55568SKyungwoo Lee // 99bb55568SKyungwoo Lee // This file contains support for reading codegen data. 109bb55568SKyungwoo Lee // 119bb55568SKyungwoo Lee //===----------------------------------------------------------------------===// 129bb55568SKyungwoo Lee 139bb55568SKyungwoo Lee #include "llvm/CGData/CodeGenDataReader.h" 149bb55568SKyungwoo Lee #include "llvm/CGData/OutlinedHashTreeRecord.h" 159bb55568SKyungwoo Lee #include "llvm/Object/ObjectFile.h" 169bb55568SKyungwoo Lee #include "llvm/Support/MemoryBuffer.h" 179bb55568SKyungwoo Lee 189bb55568SKyungwoo Lee #define DEBUG_TYPE "cg-data-reader" 199bb55568SKyungwoo Lee 209bb55568SKyungwoo Lee using namespace llvm; 219bb55568SKyungwoo Lee 229bb55568SKyungwoo Lee namespace llvm { 239bb55568SKyungwoo Lee 249bb55568SKyungwoo Lee static Expected<std::unique_ptr<MemoryBuffer>> 259bb55568SKyungwoo Lee setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 269bb55568SKyungwoo Lee auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 279bb55568SKyungwoo Lee : FS.getBufferForFile(Filename); 289bb55568SKyungwoo Lee if (std::error_code EC = BufferOrErr.getError()) 299bb55568SKyungwoo Lee return errorCodeToError(EC); 309bb55568SKyungwoo Lee return std::move(BufferOrErr.get()); 319bb55568SKyungwoo Lee } 329bb55568SKyungwoo Lee 339bb55568SKyungwoo Lee Error CodeGenDataReader::mergeFromObjectFile( 34dc85d526SKyungwoo Lee const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, 35*ffcf3c86SKyungwoo Lee StableFunctionMapRecord &GlobalFunctionMapRecord, 36dc85d526SKyungwoo Lee stable_hash *CombinedHash) { 379bb55568SKyungwoo Lee Triple TT = Obj->makeTriple(); 38*ffcf3c86SKyungwoo Lee auto CGOutlineName = 399bb55568SKyungwoo Lee getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); 40*ffcf3c86SKyungwoo Lee auto CGMergeName = 41*ffcf3c86SKyungwoo Lee getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false); 42*ffcf3c86SKyungwoo Lee 43*ffcf3c86SKyungwoo Lee auto processSectionContents = [&](const StringRef &Name, 44*ffcf3c86SKyungwoo Lee const StringRef &Contents) { 45*ffcf3c86SKyungwoo Lee if (Name != CGOutlineName && Name != CGMergeName) 46*ffcf3c86SKyungwoo Lee return; 47*ffcf3c86SKyungwoo Lee if (CombinedHash) 48*ffcf3c86SKyungwoo Lee *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents)); 49*ffcf3c86SKyungwoo Lee auto *Data = reinterpret_cast<const unsigned char *>(Contents.data()); 50*ffcf3c86SKyungwoo Lee auto *EndData = Data + Contents.size(); 51*ffcf3c86SKyungwoo Lee // In case dealing with an executable that has concatenated cgdata, 52*ffcf3c86SKyungwoo Lee // we want to merge them into a single cgdata. 53*ffcf3c86SKyungwoo Lee // Although it's not a typical workflow, we support this scenario 54*ffcf3c86SKyungwoo Lee // by looping over all data in the sections. 55*ffcf3c86SKyungwoo Lee if (Name == CGOutlineName) { 56*ffcf3c86SKyungwoo Lee while (Data != EndData) { 57*ffcf3c86SKyungwoo Lee OutlinedHashTreeRecord LocalOutlineRecord; 58*ffcf3c86SKyungwoo Lee LocalOutlineRecord.deserialize(Data); 59*ffcf3c86SKyungwoo Lee GlobalOutlineRecord.merge(LocalOutlineRecord); 60*ffcf3c86SKyungwoo Lee } 61*ffcf3c86SKyungwoo Lee } else if (Name == CGMergeName) { 62*ffcf3c86SKyungwoo Lee while (Data != EndData) { 63*ffcf3c86SKyungwoo Lee StableFunctionMapRecord LocalFunctionMapRecord; 64*ffcf3c86SKyungwoo Lee LocalFunctionMapRecord.deserialize(Data); 65*ffcf3c86SKyungwoo Lee GlobalFunctionMapRecord.merge(LocalFunctionMapRecord); 66*ffcf3c86SKyungwoo Lee } 67*ffcf3c86SKyungwoo Lee } 68*ffcf3c86SKyungwoo Lee }; 699bb55568SKyungwoo Lee 709bb55568SKyungwoo Lee for (auto &Section : Obj->sections()) { 719bb55568SKyungwoo Lee Expected<StringRef> NameOrErr = Section.getName(); 729bb55568SKyungwoo Lee if (!NameOrErr) 739bb55568SKyungwoo Lee return NameOrErr.takeError(); 749bb55568SKyungwoo Lee Expected<StringRef> ContentsOrErr = Section.getContents(); 759bb55568SKyungwoo Lee if (!ContentsOrErr) 769bb55568SKyungwoo Lee return ContentsOrErr.takeError(); 77*ffcf3c86SKyungwoo Lee processSectionContents(*NameOrErr, *ContentsOrErr); 789bb55568SKyungwoo Lee } 799bb55568SKyungwoo Lee 809bb55568SKyungwoo Lee return Error::success(); 819bb55568SKyungwoo Lee } 829bb55568SKyungwoo Lee 839bb55568SKyungwoo Lee Error IndexedCodeGenDataReader::read() { 849bb55568SKyungwoo Lee using namespace support; 859bb55568SKyungwoo Lee 86*ffcf3c86SKyungwoo Lee // The smallest header with the version 1 is 24 bytes. 87*ffcf3c86SKyungwoo Lee // Do not update this value even with the new version of the header. 889bb55568SKyungwoo Lee const unsigned MinHeaderSize = 24; 899bb55568SKyungwoo Lee if (DataBuffer->getBufferSize() < MinHeaderSize) 909bb55568SKyungwoo Lee return error(cgdata_error::bad_header); 919bb55568SKyungwoo Lee 929bb55568SKyungwoo Lee auto *Start = 939bb55568SKyungwoo Lee reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart()); 949bb55568SKyungwoo Lee auto *End = 959bb55568SKyungwoo Lee reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd()); 969bb55568SKyungwoo Lee if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header)) 979bb55568SKyungwoo Lee return E; 989bb55568SKyungwoo Lee 999bb55568SKyungwoo Lee if (hasOutlinedHashTree()) { 1009bb55568SKyungwoo Lee const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset; 1019bb55568SKyungwoo Lee if (Ptr >= End) 1029bb55568SKyungwoo Lee return error(cgdata_error::eof); 1039bb55568SKyungwoo Lee HashTreeRecord.deserialize(Ptr); 1049bb55568SKyungwoo Lee } 105*ffcf3c86SKyungwoo Lee if (hasStableFunctionMap()) { 106*ffcf3c86SKyungwoo Lee const unsigned char *Ptr = Start + Header.StableFunctionMapOffset; 107*ffcf3c86SKyungwoo Lee if (Ptr >= End) 108*ffcf3c86SKyungwoo Lee return error(cgdata_error::eof); 109*ffcf3c86SKyungwoo Lee FunctionMapRecord.deserialize(Ptr); 110*ffcf3c86SKyungwoo Lee } 1119bb55568SKyungwoo Lee 1129bb55568SKyungwoo Lee return success(); 1139bb55568SKyungwoo Lee } 1149bb55568SKyungwoo Lee 1159bb55568SKyungwoo Lee Expected<std::unique_ptr<CodeGenDataReader>> 1169bb55568SKyungwoo Lee CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) { 1179bb55568SKyungwoo Lee // Set up the buffer to read. 1189bb55568SKyungwoo Lee auto BufferOrError = setupMemoryBuffer(Path, FS); 1199bb55568SKyungwoo Lee if (Error E = BufferOrError.takeError()) 1209bb55568SKyungwoo Lee return std::move(E); 1219bb55568SKyungwoo Lee return CodeGenDataReader::create(std::move(BufferOrError.get())); 1229bb55568SKyungwoo Lee } 1239bb55568SKyungwoo Lee 1249bb55568SKyungwoo Lee Expected<std::unique_ptr<CodeGenDataReader>> 1259bb55568SKyungwoo Lee CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 1269bb55568SKyungwoo Lee if (Buffer->getBufferSize() == 0) 1279bb55568SKyungwoo Lee return make_error<CGDataError>(cgdata_error::empty_cgdata); 1289bb55568SKyungwoo Lee 1299bb55568SKyungwoo Lee std::unique_ptr<CodeGenDataReader> Reader; 1309bb55568SKyungwoo Lee // Create the reader. 1319bb55568SKyungwoo Lee if (IndexedCodeGenDataReader::hasFormat(*Buffer)) 1329bb55568SKyungwoo Lee Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer)); 1339bb55568SKyungwoo Lee else if (TextCodeGenDataReader::hasFormat(*Buffer)) 1349bb55568SKyungwoo Lee Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer)); 1359bb55568SKyungwoo Lee else 1369bb55568SKyungwoo Lee return make_error<CGDataError>(cgdata_error::malformed); 1379bb55568SKyungwoo Lee 1389bb55568SKyungwoo Lee // Initialize the reader and return the result. 1399bb55568SKyungwoo Lee if (Error E = Reader->read()) 1409bb55568SKyungwoo Lee return std::move(E); 1419bb55568SKyungwoo Lee 1429bb55568SKyungwoo Lee return std::move(Reader); 1439bb55568SKyungwoo Lee } 1449bb55568SKyungwoo Lee 1459bb55568SKyungwoo Lee bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) { 1469bb55568SKyungwoo Lee using namespace support; 1479bb55568SKyungwoo Lee if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic)) 1489bb55568SKyungwoo Lee return false; 1499bb55568SKyungwoo Lee 1509bb55568SKyungwoo Lee uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( 1519bb55568SKyungwoo Lee DataBuffer.getBufferStart()); 1529bb55568SKyungwoo Lee // Verify that it's magical. 1539bb55568SKyungwoo Lee return Magic == IndexedCGData::Magic; 1549bb55568SKyungwoo Lee } 1559bb55568SKyungwoo Lee 1569bb55568SKyungwoo Lee bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) { 1579bb55568SKyungwoo Lee // Verify that this really looks like plain ASCII text by checking a 1589bb55568SKyungwoo Lee // 'reasonable' number of characters (up to the magic size). 1599bb55568SKyungwoo Lee StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t)); 1609bb55568SKyungwoo Lee return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); }); 1619bb55568SKyungwoo Lee } 1629bb55568SKyungwoo Lee Error TextCodeGenDataReader::read() { 1639bb55568SKyungwoo Lee using namespace support; 1649bb55568SKyungwoo Lee 1659bb55568SKyungwoo Lee // Parse the custom header line by line. 1669bb55568SKyungwoo Lee for (; !Line.is_at_eof(); ++Line) { 1679bb55568SKyungwoo Lee // Skip empty or whitespace-only lines 1689bb55568SKyungwoo Lee if (Line->trim().empty()) 1699bb55568SKyungwoo Lee continue; 1709bb55568SKyungwoo Lee 1719bb55568SKyungwoo Lee if (!Line->starts_with(":")) 1729bb55568SKyungwoo Lee break; 1739bb55568SKyungwoo Lee StringRef Str = Line->drop_front().rtrim(); 1749bb55568SKyungwoo Lee if (Str.equals_insensitive("outlined_hash_tree")) 1759bb55568SKyungwoo Lee DataKind |= CGDataKind::FunctionOutlinedHashTree; 176*ffcf3c86SKyungwoo Lee else if (Str.equals_insensitive("stable_function_map")) 177*ffcf3c86SKyungwoo Lee DataKind |= CGDataKind::StableFunctionMergingMap; 1789bb55568SKyungwoo Lee else 1799bb55568SKyungwoo Lee return error(cgdata_error::bad_header); 1809bb55568SKyungwoo Lee } 1819bb55568SKyungwoo Lee 1829bb55568SKyungwoo Lee // We treat an empty header (that is a comment # only) as a valid header. 1839bb55568SKyungwoo Lee if (Line.is_at_eof()) { 1849bb55568SKyungwoo Lee if (DataKind == CGDataKind::Unknown) 1859bb55568SKyungwoo Lee return Error::success(); 1869bb55568SKyungwoo Lee return error(cgdata_error::bad_header); 1879bb55568SKyungwoo Lee } 1889bb55568SKyungwoo Lee 1899bb55568SKyungwoo Lee // The YAML docs follow after the header. 1909bb55568SKyungwoo Lee const char *Pos = Line->data(); 1919bb55568SKyungwoo Lee size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) - 1929bb55568SKyungwoo Lee reinterpret_cast<size_t>(Pos); 1939bb55568SKyungwoo Lee yaml::Input YOS(StringRef(Pos, Size)); 1949bb55568SKyungwoo Lee if (hasOutlinedHashTree()) 1959bb55568SKyungwoo Lee HashTreeRecord.deserializeYAML(YOS); 196*ffcf3c86SKyungwoo Lee if (hasStableFunctionMap()) 197*ffcf3c86SKyungwoo Lee FunctionMapRecord.deserializeYAML(YOS); 1989bb55568SKyungwoo Lee 1999bb55568SKyungwoo Lee return Error::success(); 2009bb55568SKyungwoo Lee } 2019bb55568SKyungwoo Lee } // end namespace llvm 202