xref: /llvm-project/clang/include/clang/Serialization/ModuleFile.h (revision 9d4837f47c48c634d4a0ac799188e1f5332495ef)
1 //===- ModuleFile.h - Module file description -------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the Module class, which describes a module that has
10 //  been loaded from an AST file.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_SERIALIZATION_MODULEFILE_H
15 #define LLVM_CLANG_SERIALIZATION_MODULEFILE_H
16 
17 #include "clang/Basic/FileManager.h"
18 #include "clang/Basic/LLVM.h"
19 #include "clang/Basic/Module.h"
20 #include "clang/Basic/SourceLocation.h"
21 #include "clang/Serialization/ASTBitCodes.h"
22 #include "clang/Serialization/ContinuousRangeMap.h"
23 #include "clang/Serialization/ModuleFileExtension.h"
24 #include "llvm/ADT/BitVector.h"
25 #include "llvm/ADT/DenseMap.h"
26 #include "llvm/ADT/PointerIntPair.h"
27 #include "llvm/ADT/SetVector.h"
28 #include "llvm/ADT/SmallVector.h"
29 #include "llvm/ADT/StringRef.h"
30 #include "llvm/Bitstream/BitstreamReader.h"
31 #include "llvm/Support/Endian.h"
32 #include <cassert>
33 #include <cstdint>
34 #include <memory>
35 #include <string>
36 #include <vector>
37 
38 namespace clang {
39 
40 namespace serialization {
41 
42 /// Specifies the kind of module that has been loaded.
43 enum ModuleKind {
44   /// File is an implicitly-loaded module.
45   MK_ImplicitModule,
46 
47   /// File is an explicitly-loaded module.
48   MK_ExplicitModule,
49 
50   /// File is a PCH file treated as such.
51   MK_PCH,
52 
53   /// File is a PCH file treated as the preamble.
54   MK_Preamble,
55 
56   /// File is a PCH file treated as the actual main file.
57   MK_MainFile,
58 
59   /// File is from a prebuilt module path.
60   MK_PrebuiltModule
61 };
62 
63 /// The input file info that has been loaded from an AST file.
64 struct InputFileInfo {
65   StringRef UnresolvedImportedFilenameAsRequested;
66   StringRef UnresolvedImportedFilename;
67 
68   uint64_t ContentHash;
69   off_t StoredSize;
70   time_t StoredTime;
71   bool Overridden;
72   bool Transient;
73   bool TopLevel;
74   bool ModuleMap;
75 
76   bool isValid() const {
77     return !UnresolvedImportedFilenameAsRequested.empty();
78   }
79 };
80 
81 /// The input file that has been loaded from this AST file, along with
82 /// bools indicating whether this was an overridden buffer or if it was
83 /// out-of-date or not-found.
84 class InputFile {
85   enum {
86     Overridden = 1,
87     OutOfDate = 2,
88     NotFound = 3
89   };
90   llvm::PointerIntPair<const FileEntryRef::MapEntry *, 2, unsigned> Val;
91 
92 public:
93   InputFile() = default;
94 
95   InputFile(FileEntryRef File, bool isOverridden = false,
96             bool isOutOfDate = false) {
97     unsigned intVal = 0;
98     // Make isOutOfDate with higher priority than isOverridden.
99     // It is possible if the recorded hash value mismatches.
100     if (isOutOfDate)
101       intVal = OutOfDate;
102     else if (isOverridden)
103       intVal = Overridden;
104     Val.setPointerAndInt(&File.getMapEntry(), intVal);
105   }
106 
107   static InputFile getNotFound() {
108     InputFile File;
109     File.Val.setInt(NotFound);
110     return File;
111   }
112 
113   OptionalFileEntryRef getFile() const {
114     if (auto *P = Val.getPointer())
115       return FileEntryRef(*P);
116     return std::nullopt;
117   }
118   bool isOverridden() const { return Val.getInt() == Overridden; }
119   bool isOutOfDate() const { return Val.getInt() == OutOfDate; }
120   bool isNotFound() const { return Val.getInt() == NotFound; }
121 };
122 
123 /// Information about a module that has been loaded by the ASTReader.
124 ///
125 /// Each instance of the Module class corresponds to a single AST file, which
126 /// may be a precompiled header, precompiled preamble, a module, or an AST file
127 /// of some sort loaded as the main file, all of which are specific formulations
128 /// of the general notion of a "module". A module may depend on any number of
129 /// other modules.
130 class ModuleFile {
131 public:
132   ModuleFile(ModuleKind Kind, FileEntryRef File, unsigned Generation)
133       : Kind(Kind), File(File), Generation(Generation) {}
134   ~ModuleFile();
135 
136   // === General information ===
137 
138   /// The index of this module in the list of modules.
139   unsigned Index = 0;
140 
141   /// The type of this module.
142   ModuleKind Kind;
143 
144   /// The file name of the module file.
145   std::string FileName;
146 
147   /// The name of the module.
148   std::string ModuleName;
149 
150   /// The base directory of the module.
151   std::string BaseDirectory;
152 
153   static std::string getTimestampFilename(StringRef FileName) {
154     return (FileName + ".timestamp").str();
155   }
156 
157   /// The original source file name that was used to build the
158   /// primary AST file, which may have been modified for
159   /// relocatable-pch support.
160   std::string OriginalSourceFileName;
161 
162   /// The actual original source file name that was used to
163   /// build this AST file.
164   std::string ActualOriginalSourceFileName;
165 
166   /// The file ID for the original source file that was used to
167   /// build this AST file.
168   FileID OriginalSourceFileID;
169 
170   std::string ModuleMapPath;
171 
172   /// Whether this precompiled header is a relocatable PCH file.
173   bool RelocatablePCH = false;
174 
175   /// Whether this module file is a standard C++ module.
176   bool StandardCXXModule = false;
177 
178   /// Whether timestamps are included in this module file.
179   bool HasTimestamps = false;
180 
181   /// Whether the top-level module has been read from the AST file.
182   bool DidReadTopLevelSubmodule = false;
183 
184   /// The file entry for the module file.
185   FileEntryRef File;
186 
187   /// The signature of the module file, which may be used instead of the size
188   /// and modification time to identify this particular file.
189   ASTFileSignature Signature;
190 
191   /// The signature of the AST block of the module file, this can be used to
192   /// unique module files based on AST contents.
193   ASTFileSignature ASTBlockHash;
194 
195   /// The bit vector denoting usage of each header search entry (true = used).
196   llvm::BitVector SearchPathUsage;
197 
198   /// The bit vector denoting usage of each VFS entry (true = used).
199   llvm::BitVector VFSUsage;
200 
201   /// Whether this module has been directly imported by the
202   /// user.
203   bool DirectlyImported = false;
204 
205   /// The generation of which this module file is a part.
206   unsigned Generation;
207 
208   /// The memory buffer that stores the data associated with
209   /// this AST file, owned by the InMemoryModuleCache.
210   llvm::MemoryBuffer *Buffer = nullptr;
211 
212   /// The size of this file, in bits.
213   uint64_t SizeInBits = 0;
214 
215   /// The global bit offset (or base) of this module
216   uint64_t GlobalBitOffset = 0;
217 
218   /// The bit offset of the AST block of this module.
219   uint64_t ASTBlockStartOffset = 0;
220 
221   /// The serialized bitstream data for this file.
222   StringRef Data;
223 
224   /// The main bitstream cursor for the main block.
225   llvm::BitstreamCursor Stream;
226 
227   /// The source location where the module was explicitly or implicitly
228   /// imported in the local translation unit.
229   ///
230   /// If module A depends on and imports module B, both modules will have the
231   /// same DirectImportLoc, but different ImportLoc (B's ImportLoc will be a
232   /// source location inside module A).
233   ///
234   /// WARNING: This is largely useless. It doesn't tell you when a module was
235   /// made visible, just when the first submodule of that module was imported.
236   SourceLocation DirectImportLoc;
237 
238   /// The source location where this module was first imported.
239   SourceLocation ImportLoc;
240 
241   /// The first source location in this module.
242   SourceLocation FirstLoc;
243 
244   /// The list of extension readers that are attached to this module
245   /// file.
246   std::vector<std::unique_ptr<ModuleFileExtensionReader>> ExtensionReaders;
247 
248   /// The module offset map data for this file. If non-empty, the various
249   /// ContinuousRangeMaps described below have not yet been populated.
250   StringRef ModuleOffsetMap;
251 
252   // === Input Files ===
253 
254   /// The cursor to the start of the input-files block.
255   llvm::BitstreamCursor InputFilesCursor;
256 
257   /// Absolute offset of the start of the input-files block.
258   uint64_t InputFilesOffsetBase = 0;
259 
260   /// Relative offsets for all of the input file entries in the AST file.
261   const llvm::support::unaligned_uint64_t *InputFileOffsets = nullptr;
262 
263   /// The input files that have been loaded from this AST file.
264   std::vector<InputFile> InputFilesLoaded;
265 
266   /// The input file infos that have been loaded from this AST file.
267   std::vector<InputFileInfo> InputFileInfosLoaded;
268 
269   // All user input files reside at the index range [0, NumUserInputFiles), and
270   // system input files reside at [NumUserInputFiles, InputFilesLoaded.size()).
271   unsigned NumUserInputFiles = 0;
272 
273   /// If non-zero, specifies the time when we last validated input
274   /// files.  Zero means we never validated them.
275   ///
276   /// The time is specified in seconds since the start of the Epoch.
277   uint64_t InputFilesValidationTimestamp = 0;
278 
279   // === Source Locations ===
280 
281   /// Cursor used to read source location entries.
282   llvm::BitstreamCursor SLocEntryCursor;
283 
284   /// The bit offset to the start of the SOURCE_MANAGER_BLOCK.
285   uint64_t SourceManagerBlockStartOffset = 0;
286 
287   /// The number of source location entries in this AST file.
288   unsigned LocalNumSLocEntries = 0;
289 
290   /// The base ID in the source manager's view of this module.
291   int SLocEntryBaseID = 0;
292 
293   /// The base offset in the source manager's view of this module.
294   SourceLocation::UIntTy SLocEntryBaseOffset = 0;
295 
296   /// Base file offset for the offsets in SLocEntryOffsets. Real file offset
297   /// for the entry is SLocEntryOffsetsBase + SLocEntryOffsets[i].
298   uint64_t SLocEntryOffsetsBase = 0;
299 
300   /// Offsets for all of the source location entries in the
301   /// AST file.
302   const uint32_t *SLocEntryOffsets = nullptr;
303 
304   // === Identifiers ===
305 
306   /// The number of identifiers in this AST file.
307   unsigned LocalNumIdentifiers = 0;
308 
309   /// Offsets into the identifier table data.
310   ///
311   /// This array is indexed by the identifier ID (-1), and provides
312   /// the offset into IdentifierTableData where the string data is
313   /// stored.
314   const uint32_t *IdentifierOffsets = nullptr;
315 
316   /// Base identifier ID for identifiers local to this module.
317   serialization::IdentifierID BaseIdentifierID = 0;
318 
319   /// Actual data for the on-disk hash table of identifiers.
320   ///
321   /// This pointer points into a memory buffer, where the on-disk hash
322   /// table for identifiers actually lives.
323   const unsigned char *IdentifierTableData = nullptr;
324 
325   /// A pointer to an on-disk hash table of opaque type
326   /// IdentifierHashTable.
327   void *IdentifierLookupTable = nullptr;
328 
329   /// Offsets of identifiers that we're going to preload within
330   /// IdentifierTableData.
331   std::vector<unsigned> PreloadIdentifierOffsets;
332 
333   // === Macros ===
334 
335   /// The cursor to the start of the preprocessor block, which stores
336   /// all of the macro definitions.
337   llvm::BitstreamCursor MacroCursor;
338 
339   /// The number of macros in this AST file.
340   unsigned LocalNumMacros = 0;
341 
342   /// Base file offset for the offsets in MacroOffsets. Real file offset for
343   /// the entry is MacroOffsetsBase + MacroOffsets[i].
344   uint64_t MacroOffsetsBase = 0;
345 
346   /// Offsets of macros in the preprocessor block.
347   ///
348   /// This array is indexed by the macro ID (-1), and provides
349   /// the offset into the preprocessor block where macro definitions are
350   /// stored.
351   const uint32_t *MacroOffsets = nullptr;
352 
353   /// Base macro ID for macros local to this module.
354   serialization::MacroID BaseMacroID = 0;
355 
356   /// Remapping table for macro IDs in this module.
357   ContinuousRangeMap<uint32_t, int, 2> MacroRemap;
358 
359   /// The offset of the start of the set of defined macros.
360   uint64_t MacroStartOffset = 0;
361 
362   // === Detailed PreprocessingRecord ===
363 
364   /// The cursor to the start of the (optional) detailed preprocessing
365   /// record block.
366   llvm::BitstreamCursor PreprocessorDetailCursor;
367 
368   /// The offset of the start of the preprocessor detail cursor.
369   uint64_t PreprocessorDetailStartOffset = 0;
370 
371   /// Base preprocessed entity ID for preprocessed entities local to
372   /// this module.
373   serialization::PreprocessedEntityID BasePreprocessedEntityID = 0;
374 
375   /// Remapping table for preprocessed entity IDs in this module.
376   ContinuousRangeMap<uint32_t, int, 2> PreprocessedEntityRemap;
377 
378   const PPEntityOffset *PreprocessedEntityOffsets = nullptr;
379   unsigned NumPreprocessedEntities = 0;
380 
381   /// Base ID for preprocessed skipped ranges local to this module.
382   unsigned BasePreprocessedSkippedRangeID = 0;
383 
384   const PPSkippedRange *PreprocessedSkippedRangeOffsets = nullptr;
385   unsigned NumPreprocessedSkippedRanges = 0;
386 
387   // === Header search information ===
388 
389   /// The number of local HeaderFileInfo structures.
390   unsigned LocalNumHeaderFileInfos = 0;
391 
392   /// Actual data for the on-disk hash table of header file
393   /// information.
394   ///
395   /// This pointer points into a memory buffer, where the on-disk hash
396   /// table for header file information actually lives.
397   const char *HeaderFileInfoTableData = nullptr;
398 
399   /// The on-disk hash table that contains information about each of
400   /// the header files.
401   void *HeaderFileInfoTable = nullptr;
402 
403   // === Submodule information ===
404 
405   /// The number of submodules in this module.
406   unsigned LocalNumSubmodules = 0;
407 
408   /// Base submodule ID for submodules local to this module.
409   serialization::SubmoduleID BaseSubmoduleID = 0;
410 
411   /// Remapping table for submodule IDs in this module.
412   ContinuousRangeMap<uint32_t, int, 2> SubmoduleRemap;
413 
414   // === Selectors ===
415 
416   /// The number of selectors new to this file.
417   ///
418   /// This is the number of entries in SelectorOffsets.
419   unsigned LocalNumSelectors = 0;
420 
421   /// Offsets into the selector lookup table's data array
422   /// where each selector resides.
423   const uint32_t *SelectorOffsets = nullptr;
424 
425   /// Base selector ID for selectors local to this module.
426   serialization::SelectorID BaseSelectorID = 0;
427 
428   /// Remapping table for selector IDs in this module.
429   ContinuousRangeMap<uint32_t, int, 2> SelectorRemap;
430 
431   /// A pointer to the character data that comprises the selector table
432   ///
433   /// The SelectorOffsets table refers into this memory.
434   const unsigned char *SelectorLookupTableData = nullptr;
435 
436   /// A pointer to an on-disk hash table of opaque type
437   /// ASTSelectorLookupTable.
438   ///
439   /// This hash table provides the IDs of all selectors, and the associated
440   /// instance and factory methods.
441   void *SelectorLookupTable = nullptr;
442 
443   // === Declarations ===
444 
445   /// DeclsCursor - This is a cursor to the start of the DECLTYPES_BLOCK block.
446   /// It has read all the abbreviations at the start of the block and is ready
447   /// to jump around with these in context.
448   llvm::BitstreamCursor DeclsCursor;
449 
450   /// The offset to the start of the DECLTYPES_BLOCK block.
451   uint64_t DeclsBlockStartOffset = 0;
452 
453   /// The number of declarations in this AST file.
454   unsigned LocalNumDecls = 0;
455 
456   /// Offset of each declaration within the bitstream, indexed
457   /// by the declaration ID (-1).
458   const DeclOffset *DeclOffsets = nullptr;
459 
460   /// Base declaration index in ASTReader for declarations local to this module.
461   unsigned BaseDeclIndex = 0;
462 
463   /// Array of file-level DeclIDs sorted by file.
464   const serialization::unaligned_decl_id_t *FileSortedDecls = nullptr;
465   unsigned NumFileSortedDecls = 0;
466 
467   /// Array of category list location information within this
468   /// module file, sorted by the definition ID.
469   const serialization::ObjCCategoriesInfo *ObjCCategoriesMap = nullptr;
470 
471   /// The number of redeclaration info entries in ObjCCategoriesMap.
472   unsigned LocalNumObjCCategoriesInMap = 0;
473 
474   /// The Objective-C category lists for categories known to this
475   /// module.
476   SmallVector<uint64_t, 1> ObjCCategories;
477 
478   // === Types ===
479 
480   /// The number of types in this AST file.
481   unsigned LocalNumTypes = 0;
482 
483   /// Offset of each type within the bitstream, indexed by the
484   /// type ID, or the representation of a Type*.
485   const UnalignedUInt64 *TypeOffsets = nullptr;
486 
487   /// Base type ID for types local to this module as represented in
488   /// the global type ID space.
489   serialization::TypeID BaseTypeIndex = 0;
490 
491   // === Miscellaneous ===
492 
493   /// Diagnostic IDs and their mappings that the user changed.
494   SmallVector<uint64_t, 8> PragmaDiagMappings;
495 
496   /// List of modules which depend on this module
497   llvm::SetVector<ModuleFile *> ImportedBy;
498 
499   /// List of modules which this module directly imported
500   llvm::SetVector<ModuleFile *> Imports;
501 
502   /// List of modules which this modules dependent on. Different
503   /// from `Imports`, this includes indirectly imported modules too.
504   /// The order of TransitiveImports is significant. It should keep
505   /// the same order with that module file manager when we write
506   /// the current module file. The value of the member will be initialized
507   /// in `ASTReader::ReadModuleOffsetMap`.
508   llvm::SmallVector<ModuleFile *, 16> TransitiveImports;
509 
510   /// Determine whether this module was directly imported at
511   /// any point during translation.
512   bool isDirectlyImported() const { return DirectlyImported; }
513 
514   /// Is this a module file for a module (rather than a PCH or similar).
515   bool isModule() const {
516     return Kind == MK_ImplicitModule || Kind == MK_ExplicitModule ||
517            Kind == MK_PrebuiltModule;
518   }
519 
520   /// Dump debugging output for this module.
521   void dump();
522 };
523 
524 } // namespace serialization
525 
526 } // namespace clang
527 
528 #endif // LLVM_CLANG_SERIALIZATION_MODULEFILE_H
529