1 //===-- CodeGenData.cpp ---------------------------------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains support for codegen data that has stable summary which 10 // can be used to optimize the code in the subsequent codegen. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Bitcode/BitcodeWriter.h" 15 #include "llvm/CGData/CodeGenDataReader.h" 16 #include "llvm/CGData/OutlinedHashTreeRecord.h" 17 #include "llvm/CGData/StableFunctionMapRecord.h" 18 #include "llvm/Object/ObjectFile.h" 19 #include "llvm/Support/Caching.h" 20 #include "llvm/Support/CommandLine.h" 21 #include "llvm/Support/FileSystem.h" 22 #include "llvm/Support/Path.h" 23 #include "llvm/Support/WithColor.h" 24 25 #define DEBUG_TYPE "cg-data" 26 27 using namespace llvm; 28 using namespace cgdata; 29 30 cl::opt<bool> 31 CodeGenDataGenerate("codegen-data-generate", cl::init(false), cl::Hidden, 32 cl::desc("Emit CodeGen Data into custom sections")); 33 cl::opt<std::string> 34 CodeGenDataUsePath("codegen-data-use-path", cl::init(""), cl::Hidden, 35 cl::desc("File path to where .cgdata file is read")); 36 cl::opt<bool> CodeGenDataThinLTOTwoRounds( 37 "codegen-data-thinlto-two-rounds", cl::init(false), cl::Hidden, 38 cl::desc("Enable two-round ThinLTO code generation. The first round " 39 "emits codegen data, while the second round uses the emitted " 40 "codegen data for further optimizations.")); 41 42 static std::string getCGDataErrString(cgdata_error Err, 43 const std::string &ErrMsg = "") { 44 std::string Msg; 45 raw_string_ostream OS(Msg); 46 47 switch (Err) { 48 case cgdata_error::success: 49 OS << "success"; 50 break; 51 case cgdata_error::eof: 52 OS << "end of File"; 53 break; 54 case cgdata_error::bad_magic: 55 OS << "invalid codegen data (bad magic)"; 56 break; 57 case cgdata_error::bad_header: 58 OS << "invalid codegen data (file header is corrupt)"; 59 break; 60 case cgdata_error::empty_cgdata: 61 OS << "empty codegen data"; 62 break; 63 case cgdata_error::malformed: 64 OS << "malformed codegen data"; 65 break; 66 case cgdata_error::unsupported_version: 67 OS << "unsupported codegen data version"; 68 break; 69 } 70 71 // If optional error message is not empty, append it to the message. 72 if (!ErrMsg.empty()) 73 OS << ": " << ErrMsg; 74 75 return OS.str(); 76 } 77 78 namespace { 79 80 // FIXME: This class is only here to support the transition to llvm::Error. It 81 // will be removed once this transition is complete. Clients should prefer to 82 // deal with the Error value directly, rather than converting to error_code. 83 class CGDataErrorCategoryType : public std::error_category { 84 const char *name() const noexcept override { return "llvm.cgdata"; } 85 86 std::string message(int IE) const override { 87 return getCGDataErrString(static_cast<cgdata_error>(IE)); 88 } 89 }; 90 91 } // end anonymous namespace 92 93 const std::error_category &llvm::cgdata_category() { 94 static CGDataErrorCategoryType ErrorCategory; 95 return ErrorCategory; 96 } 97 98 std::string CGDataError::message() const { 99 return getCGDataErrString(Err, Msg); 100 } 101 102 char CGDataError::ID = 0; 103 104 namespace { 105 106 const char *CodeGenDataSectNameCommon[] = { 107 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ 108 SectNameCommon, 109 #include "llvm/CGData/CodeGenData.inc" 110 }; 111 112 const char *CodeGenDataSectNameCoff[] = { 113 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) \ 114 SectNameCoff, 115 #include "llvm/CGData/CodeGenData.inc" 116 }; 117 118 const char *CodeGenDataSectNamePrefix[] = { 119 #define CG_DATA_SECT_ENTRY(Kind, SectNameCommon, SectNameCoff, Prefix) Prefix, 120 #include "llvm/CGData/CodeGenData.inc" 121 }; 122 123 } // namespace 124 125 namespace llvm { 126 127 std::string getCodeGenDataSectionName(CGDataSectKind CGSK, 128 Triple::ObjectFormatType OF, 129 bool AddSegmentInfo) { 130 std::string SectName; 131 132 if (OF == Triple::MachO && AddSegmentInfo) 133 SectName = CodeGenDataSectNamePrefix[CGSK]; 134 135 if (OF == Triple::COFF) 136 SectName += CodeGenDataSectNameCoff[CGSK]; 137 else 138 SectName += CodeGenDataSectNameCommon[CGSK]; 139 140 return SectName; 141 } 142 143 std::unique_ptr<CodeGenData> CodeGenData::Instance = nullptr; 144 std::once_flag CodeGenData::OnceFlag; 145 146 CodeGenData &CodeGenData::getInstance() { 147 std::call_once(CodeGenData::OnceFlag, []() { 148 Instance = std::unique_ptr<CodeGenData>(new CodeGenData()); 149 150 if (CodeGenDataGenerate || CodeGenDataThinLTOTwoRounds) 151 Instance->EmitCGData = true; 152 else if (!CodeGenDataUsePath.empty()) { 153 // Initialize the global CGData if the input file name is given. 154 // We do not error-out when failing to parse the input file. 155 // Instead, just emit an warning message and fall back as if no CGData 156 // were available. 157 auto FS = vfs::getRealFileSystem(); 158 auto ReaderOrErr = CodeGenDataReader::create(CodeGenDataUsePath, *FS); 159 if (Error E = ReaderOrErr.takeError()) { 160 warn(std::move(E), CodeGenDataUsePath); 161 return; 162 } 163 // Publish each CGData based on the data type in the header. 164 auto Reader = ReaderOrErr->get(); 165 if (Reader->hasOutlinedHashTree()) 166 Instance->publishOutlinedHashTree(Reader->releaseOutlinedHashTree()); 167 if (Reader->hasStableFunctionMap()) 168 Instance->publishStableFunctionMap(Reader->releaseStableFunctionMap()); 169 } 170 }); 171 return *Instance; 172 } 173 174 namespace IndexedCGData { 175 176 Expected<Header> Header::readFromBuffer(const unsigned char *Curr) { 177 using namespace support; 178 179 static_assert(std::is_standard_layout_v<llvm::IndexedCGData::Header>, 180 "The header should be standard layout type since we use offset " 181 "of fields to read."); 182 Header H; 183 H.Magic = endian::readNext<uint64_t, endianness::little, unaligned>(Curr); 184 if (H.Magic != IndexedCGData::Magic) 185 return make_error<CGDataError>(cgdata_error::bad_magic); 186 H.Version = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); 187 if (H.Version > IndexedCGData::CGDataVersion::CurrentVersion) 188 return make_error<CGDataError>(cgdata_error::unsupported_version); 189 H.DataKind = endian::readNext<uint32_t, endianness::little, unaligned>(Curr); 190 191 static_assert(IndexedCGData::CGDataVersion::CurrentVersion == Version2, 192 "Please update the offset computation below if a new field has " 193 "been added to the header."); 194 H.OutlinedHashTreeOffset = 195 endian::readNext<uint64_t, endianness::little, unaligned>(Curr); 196 if (H.Version >= 2) 197 H.StableFunctionMapOffset = 198 endian::readNext<uint64_t, endianness::little, unaligned>(Curr); 199 200 return H; 201 } 202 203 } // end namespace IndexedCGData 204 205 namespace cgdata { 206 207 void warn(Twine Message, std::string Whence, std::string Hint) { 208 WithColor::warning(); 209 if (!Whence.empty()) 210 errs() << Whence << ": "; 211 errs() << Message << "\n"; 212 if (!Hint.empty()) 213 WithColor::note() << Hint << "\n"; 214 } 215 216 void warn(Error E, StringRef Whence) { 217 if (E.isA<CGDataError>()) { 218 handleAllErrors(std::move(E), [&](const CGDataError &IPE) { 219 warn(IPE.message(), Whence.str(), ""); 220 }); 221 } 222 } 223 224 void saveModuleForTwoRounds(const Module &TheModule, unsigned Task, 225 AddStreamFn AddStream) { 226 LLVM_DEBUG(dbgs() << "Saving module: " << TheModule.getModuleIdentifier() 227 << " in Task " << Task << "\n"); 228 Expected<std::unique_ptr<CachedFileStream>> StreamOrErr = 229 AddStream(Task, TheModule.getModuleIdentifier()); 230 if (Error Err = StreamOrErr.takeError()) 231 report_fatal_error(std::move(Err)); 232 std::unique_ptr<CachedFileStream> &Stream = *StreamOrErr; 233 234 WriteBitcodeToFile(TheModule, *Stream->OS, 235 /*ShouldPreserveUseListOrder=*/true); 236 } 237 238 std::unique_ptr<Module> loadModuleForTwoRounds(BitcodeModule &OrigModule, 239 unsigned Task, 240 LLVMContext &Context, 241 ArrayRef<StringRef> IRFiles) { 242 LLVM_DEBUG(dbgs() << "Loading module: " << OrigModule.getModuleIdentifier() 243 << " in Task " << Task << "\n"); 244 auto FileBuffer = MemoryBuffer::getMemBuffer( 245 IRFiles[Task], "in-memory IR file", /*RequiresNullTerminator=*/false); 246 auto RestoredModule = parseBitcodeFile(*FileBuffer, Context); 247 if (!RestoredModule) 248 report_fatal_error( 249 Twine("Failed to parse optimized bitcode loaded for Task: ") + 250 Twine(Task) + "\n"); 251 252 // Restore the original module identifier. 253 (*RestoredModule)->setModuleIdentifier(OrigModule.getModuleIdentifier()); 254 return std::move(*RestoredModule); 255 } 256 257 Expected<stable_hash> mergeCodeGenData(ArrayRef<StringRef> ObjFiles) { 258 OutlinedHashTreeRecord GlobalOutlineRecord; 259 StableFunctionMapRecord GlobalStableFunctionMapRecord; 260 stable_hash CombinedHash = 0; 261 for (auto File : ObjFiles) { 262 if (File.empty()) 263 continue; 264 std::unique_ptr<MemoryBuffer> Buffer = MemoryBuffer::getMemBuffer( 265 File, "in-memory object file", /*RequiresNullTerminator=*/false); 266 Expected<std::unique_ptr<object::ObjectFile>> BinOrErr = 267 object::ObjectFile::createObjectFile(Buffer->getMemBufferRef()); 268 if (!BinOrErr) 269 return BinOrErr.takeError(); 270 271 std::unique_ptr<object::ObjectFile> &Obj = BinOrErr.get(); 272 if (auto E = CodeGenDataReader::mergeFromObjectFile( 273 Obj.get(), GlobalOutlineRecord, GlobalStableFunctionMapRecord, 274 &CombinedHash)) 275 return E; 276 } 277 278 GlobalStableFunctionMapRecord.finalize(); 279 280 if (!GlobalOutlineRecord.empty()) 281 cgdata::publishOutlinedHashTree(std::move(GlobalOutlineRecord.HashTree)); 282 if (!GlobalStableFunctionMapRecord.empty()) 283 cgdata::publishStableFunctionMap( 284 std::move(GlobalStableFunctionMapRecord.FunctionMap)); 285 286 return CombinedHash; 287 } 288 289 } // end namespace cgdata 290 291 } // end namespace llvm 292