1 //===- CodeGenDataReader.cpp ----------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for reading codegen data. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "llvm/CGData/CodeGenDataReader.h" 14 #include "llvm/CGData/OutlinedHashTreeRecord.h" 15 #include "llvm/Object/ObjectFile.h" 16 #include "llvm/Support/MemoryBuffer.h" 17 18 #define DEBUG_TYPE "cg-data-reader" 19 20 using namespace llvm; 21 22 namespace llvm { 23 24 static Expected<std::unique_ptr<MemoryBuffer>> 25 setupMemoryBuffer(const Twine &Filename, vfs::FileSystem &FS) { 26 auto BufferOrErr = Filename.str() == "-" ? MemoryBuffer::getSTDIN() 27 : FS.getBufferForFile(Filename); 28 if (std::error_code EC = BufferOrErr.getError()) 29 return errorCodeToError(EC); 30 return std::move(BufferOrErr.get()); 31 } 32 33 Error CodeGenDataReader::mergeFromObjectFile( 34 const object::ObjectFile *Obj, OutlinedHashTreeRecord &GlobalOutlineRecord, 35 StableFunctionMapRecord &GlobalFunctionMapRecord, 36 stable_hash *CombinedHash) { 37 Triple TT = Obj->makeTriple(); 38 auto CGOutlineName = 39 getCodeGenDataSectionName(CG_outline, TT.getObjectFormat(), false); 40 auto CGMergeName = 41 getCodeGenDataSectionName(CG_merge, TT.getObjectFormat(), false); 42 43 auto processSectionContents = [&](const StringRef &Name, 44 const StringRef &Contents) { 45 if (Name != CGOutlineName && Name != CGMergeName) 46 return; 47 if (CombinedHash) 48 *CombinedHash = stable_hash_combine(*CombinedHash, xxh3_64bits(Contents)); 49 auto *Data = reinterpret_cast<const unsigned char *>(Contents.data()); 50 auto *EndData = Data + Contents.size(); 51 // In case dealing with an executable that has concatenated cgdata, 52 // we want to merge them into a single cgdata. 53 // Although it's not a typical workflow, we support this scenario 54 // by looping over all data in the sections. 55 if (Name == CGOutlineName) { 56 while (Data != EndData) { 57 OutlinedHashTreeRecord LocalOutlineRecord; 58 LocalOutlineRecord.deserialize(Data); 59 GlobalOutlineRecord.merge(LocalOutlineRecord); 60 } 61 } else if (Name == CGMergeName) { 62 while (Data != EndData) { 63 StableFunctionMapRecord LocalFunctionMapRecord; 64 LocalFunctionMapRecord.deserialize(Data); 65 GlobalFunctionMapRecord.merge(LocalFunctionMapRecord); 66 } 67 } 68 }; 69 70 for (auto &Section : Obj->sections()) { 71 Expected<StringRef> NameOrErr = Section.getName(); 72 if (!NameOrErr) 73 return NameOrErr.takeError(); 74 Expected<StringRef> ContentsOrErr = Section.getContents(); 75 if (!ContentsOrErr) 76 return ContentsOrErr.takeError(); 77 processSectionContents(*NameOrErr, *ContentsOrErr); 78 } 79 80 return Error::success(); 81 } 82 83 Error IndexedCodeGenDataReader::read() { 84 using namespace support; 85 86 // The smallest header with the version 1 is 24 bytes. 87 // Do not update this value even with the new version of the header. 88 const unsigned MinHeaderSize = 24; 89 if (DataBuffer->getBufferSize() < MinHeaderSize) 90 return error(cgdata_error::bad_header); 91 92 auto *Start = 93 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferStart()); 94 auto *End = 95 reinterpret_cast<const unsigned char *>(DataBuffer->getBufferEnd()); 96 if (auto E = IndexedCGData::Header::readFromBuffer(Start).moveInto(Header)) 97 return E; 98 99 if (hasOutlinedHashTree()) { 100 const unsigned char *Ptr = Start + Header.OutlinedHashTreeOffset; 101 if (Ptr >= End) 102 return error(cgdata_error::eof); 103 HashTreeRecord.deserialize(Ptr); 104 } 105 if (hasStableFunctionMap()) { 106 const unsigned char *Ptr = Start + Header.StableFunctionMapOffset; 107 if (Ptr >= End) 108 return error(cgdata_error::eof); 109 FunctionMapRecord.deserialize(Ptr); 110 } 111 112 return success(); 113 } 114 115 Expected<std::unique_ptr<CodeGenDataReader>> 116 CodeGenDataReader::create(const Twine &Path, vfs::FileSystem &FS) { 117 // Set up the buffer to read. 118 auto BufferOrError = setupMemoryBuffer(Path, FS); 119 if (Error E = BufferOrError.takeError()) 120 return std::move(E); 121 return CodeGenDataReader::create(std::move(BufferOrError.get())); 122 } 123 124 Expected<std::unique_ptr<CodeGenDataReader>> 125 CodeGenDataReader::create(std::unique_ptr<MemoryBuffer> Buffer) { 126 if (Buffer->getBufferSize() == 0) 127 return make_error<CGDataError>(cgdata_error::empty_cgdata); 128 129 std::unique_ptr<CodeGenDataReader> Reader; 130 // Create the reader. 131 if (IndexedCodeGenDataReader::hasFormat(*Buffer)) 132 Reader = std::make_unique<IndexedCodeGenDataReader>(std::move(Buffer)); 133 else if (TextCodeGenDataReader::hasFormat(*Buffer)) 134 Reader = std::make_unique<TextCodeGenDataReader>(std::move(Buffer)); 135 else 136 return make_error<CGDataError>(cgdata_error::malformed); 137 138 // Initialize the reader and return the result. 139 if (Error E = Reader->read()) 140 return std::move(E); 141 142 return std::move(Reader); 143 } 144 145 bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer &DataBuffer) { 146 using namespace support; 147 if (DataBuffer.getBufferSize() < sizeof(IndexedCGData::Magic)) 148 return false; 149 150 uint64_t Magic = endian::read<uint64_t, llvm::endianness::little, aligned>( 151 DataBuffer.getBufferStart()); 152 // Verify that it's magical. 153 return Magic == IndexedCGData::Magic; 154 } 155 156 bool TextCodeGenDataReader::hasFormat(const MemoryBuffer &Buffer) { 157 // Verify that this really looks like plain ASCII text by checking a 158 // 'reasonable' number of characters (up to the magic size). 159 StringRef Prefix = Buffer.getBuffer().take_front(sizeof(uint64_t)); 160 return llvm::all_of(Prefix, [](char c) { return isPrint(c) || isSpace(c); }); 161 } 162 Error TextCodeGenDataReader::read() { 163 using namespace support; 164 165 // Parse the custom header line by line. 166 for (; !Line.is_at_eof(); ++Line) { 167 // Skip empty or whitespace-only lines 168 if (Line->trim().empty()) 169 continue; 170 171 if (!Line->starts_with(":")) 172 break; 173 StringRef Str = Line->drop_front().rtrim(); 174 if (Str.equals_insensitive("outlined_hash_tree")) 175 DataKind |= CGDataKind::FunctionOutlinedHashTree; 176 else if (Str.equals_insensitive("stable_function_map")) 177 DataKind |= CGDataKind::StableFunctionMergingMap; 178 else 179 return error(cgdata_error::bad_header); 180 } 181 182 // We treat an empty header (that is a comment # only) as a valid header. 183 if (Line.is_at_eof()) { 184 if (DataKind == CGDataKind::Unknown) 185 return Error::success(); 186 return error(cgdata_error::bad_header); 187 } 188 189 // The YAML docs follow after the header. 190 const char *Pos = Line->data(); 191 size_t Size = reinterpret_cast<size_t>(DataBuffer->getBufferEnd()) - 192 reinterpret_cast<size_t>(Pos); 193 yaml::Input YOS(StringRef(Pos, Size)); 194 if (hasOutlinedHashTree()) 195 HashTreeRecord.deserializeYAML(YOS); 196 if (hasStableFunctionMap()) 197 FunctionMapRecord.deserializeYAML(YOS); 198 199 return Error::success(); 200 } 201 } // end namespace llvm 202