1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines the Module class, which describes a module that has 10 // been loaded from an AST file. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H 15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H 16 17 #include "clang/Basic/FileManager.h" 18 #include "clang/Basic/LLVM.h" 19 #include "clang/Basic/Module.h" 20 #include "clang/Basic/SourceLocation.h" 21 #include "clang/Serialization/ASTBitCodes.h" 22 #include "clang/Serialization/ContinuousRangeMap.h" 23 #include "clang/Serialization/ModuleFileExtension.h" 24 #include "llvm/ADT/BitVector.h" 25 #include "llvm/ADT/DenseMap.h" 26 #include "llvm/ADT/PointerIntPair.h" 27 #include "llvm/ADT/SetVector.h" 28 #include "llvm/ADT/SmallVector.h" 29 #include "llvm/ADT/StringRef.h" 30 #include "llvm/Bitstream/BitstreamReader.h" 31 #include "llvm/Support/Endian.h" 32 #include <cassert> 33 #include <cstdint> 34 #include <memory> 35 #include <string> 36 #include <vector> 37 38 namespace clang { 39 40 namespace serialization { 41 42 /// Specifies the kind of module that has been loaded. 43 enum ModuleKind { 44 /// File is an implicitly-loaded module. 45 MK_ImplicitModule, 46 47 /// File is an explicitly-loaded module. 48 MK_ExplicitModule, 49 50 /// File is a PCH file treated as such. 51 MK_PCH, 52 53 /// File is a PCH file treated as the preamble. 54 MK_Preamble, 55 56 /// File is a PCH file treated as the actual main file. 57 MK_MainFile, 58 59 /// File is from a prebuilt module path. 60 MK_PrebuiltModule 61 }; 62 63 /// The input file info that has been loaded from an AST file. 64 struct InputFileInfo { 65 StringRef UnresolvedImportedFilenameAsRequested; 66 StringRef UnresolvedImportedFilename; 67 68 uint64_t ContentHash; 69 off_t StoredSize; 70 time_t StoredTime; 71 bool Overridden; 72 bool Transient; 73 bool TopLevel; 74 bool ModuleMap; 75 76 bool isValid() const { 77 return !UnresolvedImportedFilenameAsRequested.empty(); 78 } 79 }; 80 81 /// The input file that has been loaded from this AST file, along with 82 /// bools indicating whether this was an overridden buffer or if it was 83 /// out-of-date or not-found. 84 class InputFile { 85 enum { 86 Overridden = 1, 87 OutOfDate = 2, 88 NotFound = 3 89 }; 90 llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val; 91 92 public: 93 InputFile() = default; 94 95 InputFile(FileEntryRef File, bool isOverridden = false, 96 bool isOutOfDate = false) { 97 unsigned intVal = 0; 98 // Make isOutOfDate with higher priority than isOverridden. 99 // It is possible if the recorded hash value mismatches. 100 if (isOutOfDate) 101 intVal = OutOfDate; 102 else if (isOverridden) 103 intVal = Overridden; 104 Val.setPointerAndInt(&File.getMapEntry(), intVal); 105 } 106 107 static InputFile getNotFound() { 108 InputFile File; 109 File.Val.setInt(NotFound); 110 return File; 111 } 112 113 OptionalFileEntryRef getFile() const { 114 if (auto *P = Val.getPointer()) 115 return FileEntryRef(*P); 116 return std::nullopt; 117 } 118 bool isOverridden() const { return Val.getInt() == Overridden; } 119 bool isOutOfDate() const { return Val.getInt() == OutOfDate; } 120 bool isNotFound() const { return Val.getInt() == NotFound; } 121 }; 122 123 /// Information about a module that has been loaded by the ASTReader. 124 /// 125 /// Each instance of the Module class corresponds to a single AST file, which 126 /// may be a precompiled header, precompiled preamble, a module, or an AST file 127 /// of some sort loaded as the main file, all of which are specific formulations 128 /// of the general notion of a "module". A module may depend on any number of 129 /// other modules. 130 class ModuleFile { 131 public: 132 ModuleFile(ModuleKind Kind, FileEntryRef File, unsigned Generation) 133 : Kind(Kind), File(File), Generation(Generation) {} 134 ~ModuleFile(); 135 136 // === General information === 137 138 /// The index of this module in the list of modules. 139 unsigned Index = 0; 140 141 /// The type of this module. 142 ModuleKind Kind; 143 144 /// The file name of the module file. 145 std::string FileName; 146 147 /// The name of the module. 148 std::string ModuleName; 149 150 /// The base directory of the module. 151 std::string BaseDirectory; 152 153 static std::string getTimestampFilename(StringRef FileName) { 154 return (FileName + ".timestamp").str(); 155 } 156 157 /// The original source file name that was used to build the 158 /// primary AST file, which may have been modified for 159 /// relocatable-pch support. 160 std::string OriginalSourceFileName; 161 162 /// The actual original source file name that was used to 163 /// build this AST file. 164 std::string ActualOriginalSourceFileName; 165 166 /// The file ID for the original source file that was used to 167 /// build this AST file. 168 FileID OriginalSourceFileID; 169 170 std::string ModuleMapPath; 171 172 /// Whether this precompiled header is a relocatable PCH file. 173 bool RelocatablePCH = false; 174 175 /// Whether this module file is a standard C++ module. 176 bool StandardCXXModule = false; 177 178 /// Whether timestamps are included in this module file. 179 bool HasTimestamps = false; 180 181 /// Whether the top-level module has been read from the AST file. 182 bool DidReadTopLevelSubmodule = false; 183 184 /// The file entry for the module file. 185 FileEntryRef File; 186 187 /// The signature of the module file, which may be used instead of the size 188 /// and modification time to identify this particular file. 189 ASTFileSignature Signature; 190 191 /// The signature of the AST block of the module file, this can be used to 192 /// unique module files based on AST contents. 193 ASTFileSignature ASTBlockHash; 194 195 /// The bit vector denoting usage of each header search entry (true = used). 196 llvm::BitVector SearchPathUsage; 197 198 /// The bit vector denoting usage of each VFS entry (true = used). 199 llvm::BitVector VFSUsage; 200 201 /// Whether this module has been directly imported by the 202 /// user. 203 bool DirectlyImported = false; 204 205 /// The generation of which this module file is a part. 206 unsigned Generation; 207 208 /// The memory buffer that stores the data associated with 209 /// this AST file, owned by the InMemoryModuleCache. 210 llvm::MemoryBuffer *Buffer = nullptr; 211 212 /// The size of this file, in bits. 213 uint64_t SizeInBits = 0; 214 215 /// The global bit offset (or base) of this module 216 uint64_t GlobalBitOffset = 0; 217 218 /// The bit offset of the AST block of this module. 219 uint64_t ASTBlockStartOffset = 0; 220 221 /// The serialized bitstream data for this file. 222 StringRef Data; 223 224 /// The main bitstream cursor for the main block. 225 llvm::BitstreamCursor Stream; 226 227 /// The source location where the module was explicitly or implicitly 228 /// imported in the local translation unit. 229 /// 230 /// If module A depends on and imports module B, both modules will have the 231 /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a 232 /// source location inside module A). 233 /// 234 /// WARNING: This is largely useless. It doesn't tell you when a module was 235 /// made visible, just when the first submodule of that module was imported. 236 SourceLocation DirectImportLoc; 237 238 /// The source location where this module was first imported. 239 SourceLocation ImportLoc; 240 241 /// The first source location in this module. 242 SourceLocation FirstLoc; 243 244 /// The list of extension readers that are attached to this module 245 /// file. 246 std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders; 247 248 /// The module offset map data for this file. If non-empty, the various 249 /// ContinuousRangeMaps described below have not yet been populated. 250 StringRef ModuleOffsetMap; 251 252 // === Input Files === 253 254 /// The cursor to the start of the input-files block. 255 llvm::BitstreamCursor InputFilesCursor; 256 257 /// Absolute offset of the start of the input-files block. 258 uint64_t InputFilesOffsetBase = 0; 259 260 /// Relative offsets for all of the input file entries in the AST file. 261 const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr; 262 263 /// The input files that have been loaded from this AST file. 264 std::vector<InputFile> InputFilesLoaded; 265 266 /// The input file infos that have been loaded from this AST file. 267 std::vector<InputFileInfo> InputFileInfosLoaded; 268 269 // All user input files reside at the index range [0, NumUserInputFiles), and 270 // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()). 271 unsigned NumUserInputFiles = 0; 272 273 /// If non-zero, specifies the time when we last validated input 274 /// files. Zero means we never validated them. 275 /// 276 /// The time is specified in seconds since the start of the Epoch. 277 uint64_t InputFilesValidationTimestamp = 0; 278 279 // === Source Locations === 280 281 /// Cursor used to read source location entries. 282 llvm::BitstreamCursor SLocEntryCursor; 283 284 /// The bit offset to the start of the SOURCE_MANAGER_BLOCK. 285 uint64_t SourceManagerBlockStartOffset = 0; 286 287 /// The number of source location entries in this AST file. 288 unsigned LocalNumSLocEntries = 0; 289 290 /// The base ID in the source manager's view of this module. 291 int SLocEntryBaseID = 0; 292 293 /// The base offset in the source manager's view of this module. 294 SourceLocation::UIntTy SLocEntryBaseOffset = 0; 295 296 /// Base file offset for the offsets in SLocEntryOffsets. Real file offset 297 /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i]. 298 uint64_t SLocEntryOffsetsBase = 0; 299 300 /// Offsets for all of the source location entries in the 301 /// AST file. 302 const uint32_t *SLocEntryOffsets = nullptr; 303 304 // === Identifiers === 305 306 /// The number of identifiers in this AST file. 307 unsigned LocalNumIdentifiers = 0; 308 309 /// Offsets into the identifier table data. 310 /// 311 /// This array is indexed by the identifier ID (-1), and provides 312 /// the offset into IdentifierTableData where the string data is 313 /// stored. 314 const uint32_t *IdentifierOffsets = nullptr; 315 316 /// Base identifier ID for identifiers local to this module. 317 serialization::IdentifierID BaseIdentifierID = 0; 318 319 /// Actual data for the on-disk hash table of identifiers. 320 /// 321 /// This pointer points into a memory buffer, where the on-disk hash 322 /// table for identifiers actually lives. 323 const unsigned char *IdentifierTableData = nullptr; 324 325 /// A pointer to an on-disk hash table of opaque type 326 /// IdentifierHashTable. 327 void *IdentifierLookupTable = nullptr; 328 329 /// Offsets of identifiers that we're going to preload within 330 /// IdentifierTableData. 331 std::vector<unsigned> PreloadIdentifierOffsets; 332 333 // === Macros === 334 335 /// The cursor to the start of the preprocessor block, which stores 336 /// all of the macro definitions. 337 llvm::BitstreamCursor MacroCursor; 338 339 /// The number of macros in this AST file. 340 unsigned LocalNumMacros = 0; 341 342 /// Base file offset for the offsets in MacroOffsets. Real file offset for 343 /// the entry is MacroOffsetsBase + MacroOffsets[i]. 344 uint64_t MacroOffsetsBase = 0; 345 346 /// Offsets of macros in the preprocessor block. 347 /// 348 /// This array is indexed by the macro ID (-1), and provides 349 /// the offset into the preprocessor block where macro definitions are 350 /// stored. 351 const uint32_t *MacroOffsets = nullptr; 352 353 /// Base macro ID for macros local to this module. 354 serialization::MacroID BaseMacroID = 0; 355 356 /// Remapping table for macro IDs in this module. 357 ContinuousRangeMap<uint32_t, int, 2> MacroRemap; 358 359 /// The offset of the start of the set of defined macros. 360 uint64_t MacroStartOffset = 0; 361 362 // === Detailed PreprocessingRecord === 363 364 /// The cursor to the start of the (optional) detailed preprocessing 365 /// record block. 366 llvm::BitstreamCursor PreprocessorDetailCursor; 367 368 /// The offset of the start of the preprocessor detail cursor. 369 uint64_t PreprocessorDetailStartOffset = 0; 370 371 /// Base preprocessed entity ID for preprocessed entities local to 372 /// this module. 373 serialization::PreprocessedEntityID BasePreprocessedEntityID = 0; 374 375 /// Remapping table for preprocessed entity IDs in this module. 376 ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap; 377 378 const PPEntityOffset *PreprocessedEntityOffsets = nullptr; 379 unsigned NumPreprocessedEntities = 0; 380 381 /// Base ID for preprocessed skipped ranges local to this module. 382 unsigned BasePreprocessedSkippedRangeID = 0; 383 384 const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr; 385 unsigned NumPreprocessedSkippedRanges = 0; 386 387 // === Header search information === 388 389 /// The number of local HeaderFileInfo structures. 390 unsigned LocalNumHeaderFileInfos = 0; 391 392 /// Actual data for the on-disk hash table of header file 393 /// information. 394 /// 395 /// This pointer points into a memory buffer, where the on-disk hash 396 /// table for header file information actually lives. 397 const char *HeaderFileInfoTableData = nullptr; 398 399 /// The on-disk hash table that contains information about each of 400 /// the header files. 401 void *HeaderFileInfoTable = nullptr; 402 403 // === Submodule information === 404 405 /// The number of submodules in this module. 406 unsigned LocalNumSubmodules = 0; 407 408 /// Base submodule ID for submodules local to this module. 409 serialization::SubmoduleID BaseSubmoduleID = 0; 410 411 /// Remapping table for submodule IDs in this module. 412 ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap; 413 414 // === Selectors === 415 416 /// The number of selectors new to this file. 417 /// 418 /// This is the number of entries in SelectorOffsets. 419 unsigned LocalNumSelectors = 0; 420 421 /// Offsets into the selector lookup table's data array 422 /// where each selector resides. 423 const uint32_t *SelectorOffsets = nullptr; 424 425 /// Base selector ID for selectors local to this module. 426 serialization::SelectorID BaseSelectorID = 0; 427 428 /// Remapping table for selector IDs in this module. 429 ContinuousRangeMap<uint32_t, int, 2> SelectorRemap; 430 431 /// A pointer to the character data that comprises the selector table 432 /// 433 /// The SelectorOffsets table refers into this memory. 434 const unsigned char *SelectorLookupTableData = nullptr; 435 436 /// A pointer to an on-disk hash table of opaque type 437 /// ASTSelectorLookupTable. 438 /// 439 /// This hash table provides the IDs of all selectors, and the associated 440 /// instance and factory methods. 441 void *SelectorLookupTable = nullptr; 442 443 // === Declarations === 444 445 /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block. 446 /// It has read all the abbreviations at the start of the block and is ready 447 /// to jump around with these in context. 448 llvm::BitstreamCursor DeclsCursor; 449 450 /// The offset to the start of the DECLTYPES_BLOCK block. 451 uint64_t DeclsBlockStartOffset = 0; 452 453 /// The number of declarations in this AST file. 454 unsigned LocalNumDecls = 0; 455 456 /// Offset of each declaration within the bitstream, indexed 457 /// by the declaration ID (-1). 458 const DeclOffset *DeclOffsets = nullptr; 459 460 /// Base declaration index in ASTReader for declarations local to this module. 461 unsigned BaseDeclIndex = 0; 462 463 /// Array of file-level DeclIDs sorted by file. 464 const serialization::unaligned_decl_id_t *FileSortedDecls = nullptr; 465 unsigned NumFileSortedDecls = 0; 466 467 /// Array of category list location information within this 468 /// module file, sorted by the definition ID. 469 const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr; 470 471 /// The number of redeclaration info entries in ObjCCategoriesMap. 472 unsigned LocalNumObjCCategoriesInMap = 0; 473 474 /// The Objective-C category lists for categories known to this 475 /// module. 476 SmallVector<uint64_t, 1> ObjCCategories; 477 478 // === Types === 479 480 /// The number of types in this AST file. 481 unsigned LocalNumTypes = 0; 482 483 /// Offset of each type within the bitstream, indexed by the 484 /// type ID, or the representation of a Type*. 485 const UnalignedUInt64 *TypeOffsets = nullptr; 486 487 /// Base type ID for types local to this module as represented in 488 /// the global type ID space. 489 serialization::TypeID BaseTypeIndex = 0; 490 491 // === Miscellaneous === 492 493 /// Diagnostic IDs and their mappings that the user changed. 494 SmallVector<uint64_t, 8> PragmaDiagMappings; 495 496 /// List of modules which depend on this module 497 llvm::SetVector<ModuleFile *> ImportedBy; 498 499 /// List of modules which this module directly imported 500 llvm::SetVector<ModuleFile *> Imports; 501 502 /// List of modules which this modules dependent on. Different 503 /// from `Imports`, this includes indirectly imported modules too. 504 /// The order of TransitiveImports is significant. It should keep 505 /// the same order with that module file manager when we write 506 /// the current module file. The value of the member will be initialized 507 /// in `ASTReader::ReadModuleOffsetMap`. 508 llvm::SmallVector<ModuleFile *, 16> TransitiveImports; 509 510 /// Determine whether this module was directly imported at 511 /// any point during translation. 512 bool isDirectlyImported() const { return DirectlyImported; } 513 514 /// Is this a module file for a module (rather than a PCH or similar). 515 bool isModule() const { 516 return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule || 517 Kind == MK_PrebuiltModule; 518 } 519 520 /// Dump debugging output for this module. 521 void dump(); 522 }; 523 524 } // namespace serialization 525 526 } // namespace clang 527 528 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H 529