xref: /llvm-project/clang/lib/Serialization/ModuleManager.cpp (revision 8f0df9f3bbc6d7f3d5cbfd955c5ee4404c53a75d)
1 //===- ModuleManager.cpp - Module Manager ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the ModuleManager class, which manages a set of loaded
10 //  modules for the ASTReader.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Serialization/ModuleManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/LLVM.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/ModuleMap.h"
19 #include "clang/Serialization/GlobalModuleIndex.h"
20 #include "clang/Serialization/InMemoryModuleCache.h"
21 #include "clang/Serialization/ModuleFile.h"
22 #include "clang/Serialization/PCHContainerOperations.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/iterator.h"
29 #include "llvm/Support/Chrono.h"
30 #include "llvm/Support/DOTGraphTraits.h"
31 #include "llvm/Support/ErrorOr.h"
32 #include "llvm/Support/GraphWriter.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/VirtualFileSystem.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <memory>
38 #include <optional>
39 #include <string>
40 #include <system_error>
41 
42 using namespace clang;
43 using namespace serialization;
44 
45 ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const {
46   auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false,
47                                /*CacheFailure=*/false);
48   if (Entry)
49     return lookup(*Entry);
50 
51   return nullptr;
52 }
53 
54 ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const {
55   if (const Module *Mod = HeaderSearchInfo.getModuleMap().findModule(Name))
56     if (const FileEntry *File = Mod->getASTFile())
57       return lookup(File);
58 
59   return nullptr;
60 }
61 
62 ModuleFile *ModuleManager::lookup(const FileEntry *File) const {
63   auto Known = Modules.find(File);
64   if (Known == Modules.end())
65     return nullptr;
66 
67   return Known->second;
68 }
69 
70 std::unique_ptr<llvm::MemoryBuffer>
71 ModuleManager::lookupBuffer(StringRef Name) {
72   auto Entry = FileMgr.getFile(Name, /*OpenFile=*/false,
73                                /*CacheFailure=*/false);
74   if (!Entry)
75     return nullptr;
76   return std::move(InMemoryBuffers[*Entry]);
77 }
78 
79 static bool checkSignature(ASTFileSignature Signature,
80                            ASTFileSignature ExpectedSignature,
81                            std::string &ErrorStr) {
82   if (!ExpectedSignature || Signature == ExpectedSignature)
83     return false;
84 
85   ErrorStr =
86       Signature ? "signature mismatch" : "could not read module signature";
87   return true;
88 }
89 
90 static void updateModuleImports(ModuleFile &MF, ModuleFile *ImportedBy,
91                                 SourceLocation ImportLoc) {
92   if (ImportedBy) {
93     MF.ImportedBy.insert(ImportedBy);
94     ImportedBy->Imports.insert(&MF);
95   } else {
96     if (!MF.DirectlyImported)
97       MF.ImportLoc = ImportLoc;
98 
99     MF.DirectlyImported = true;
100   }
101 }
102 
103 ModuleManager::AddModuleResult
104 ModuleManager::addModule(StringRef FileName, ModuleKind Type,
105                          SourceLocation ImportLoc, ModuleFile *ImportedBy,
106                          unsigned Generation,
107                          off_t ExpectedSize, time_t ExpectedModTime,
108                          ASTFileSignature ExpectedSignature,
109                          ASTFileSignatureReader ReadSignature,
110                          ModuleFile *&Module,
111                          std::string &ErrorStr) {
112   Module = nullptr;
113 
114   // Look for the file entry. This only fails if the expected size or
115   // modification time differ.
116   OptionalFileEntryRefDegradesToFileEntryPtr Entry;
117   if (Type == MK_ExplicitModule || Type == MK_PrebuiltModule) {
118     // If we're not expecting to pull this file out of the module cache, it
119     // might have a different mtime due to being moved across filesystems in
120     // a distributed build. The size must still match, though. (As must the
121     // contents, but we can't check that.)
122     ExpectedModTime = 0;
123   }
124   // Note: ExpectedSize and ExpectedModTime will be 0 for MK_ImplicitModule
125   // when using an ASTFileSignature.
126   if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, Entry)) {
127     ErrorStr = "module file out of date";
128     return OutOfDate;
129   }
130 
131   if (!Entry && FileName != "-") {
132     ErrorStr = "module file not found";
133     return Missing;
134   }
135 
136   // The ModuleManager's use of FileEntry nodes as the keys for its map of
137   // loaded modules is less than ideal. Uniqueness for FileEntry nodes is
138   // maintained by FileManager, which in turn uses inode numbers on hosts
139   // that support that. When coupled with the module cache's proclivity for
140   // turning over and deleting stale PCMs, this means entries for different
141   // module files can wind up reusing the same underlying inode. When this
142   // happens, subsequent accesses to the Modules map will disagree on the
143   // ModuleFile associated with a given file. In general, it is not sufficient
144   // to resolve this conundrum with a type like FileEntryRef that stores the
145   // name of the FileEntry node on first access because of path canonicalization
146   // issues. However, the paths constructed for implicit module builds are
147   // fully under Clang's control. We *can*, therefore, rely on their structure
148   // being consistent across operating systems and across subsequent accesses
149   // to the Modules map.
150   auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF,
151                                      const FileEntry *Entry) -> bool {
152     if (Kind != MK_ImplicitModule)
153       return true;
154     return Entry->getName() == MF->FileName;
155   };
156 
157   // Check whether we already loaded this module, before
158   if (ModuleFile *ModuleEntry = Modules.lookup(Entry)) {
159     if (implicitModuleNamesMatch(Type, ModuleEntry, Entry)) {
160       // Check the stored signature.
161       if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
162         return OutOfDate;
163 
164       Module = ModuleEntry;
165       updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
166       return AlreadyLoaded;
167     }
168   }
169 
170   // Allocate a new module.
171   auto NewModule = std::make_unique<ModuleFile>(Type, Generation);
172   NewModule->Index = Chain.size();
173   NewModule->FileName = FileName.str();
174   NewModule->File = Entry;
175   NewModule->ImportLoc = ImportLoc;
176   NewModule->InputFilesValidationTimestamp = 0;
177 
178   if (NewModule->Kind == MK_ImplicitModule) {
179     std::string TimestampFilename = NewModule->getTimestampFilename();
180     llvm::vfs::Status Status;
181     // A cached stat value would be fine as well.
182     if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
183       NewModule->InputFilesValidationTimestamp =
184           llvm::sys::toTimeT(Status.getLastModificationTime());
185   }
186 
187   // Load the contents of the module
188   if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
189     // The buffer was already provided for us.
190     NewModule->Buffer = &ModuleCache->addBuiltPCM(FileName, std::move(Buffer));
191     // Since the cached buffer is reused, it is safe to close the file
192     // descriptor that was opened while stat()ing the PCM in
193     // lookupModuleFile() above, it won't be needed any longer.
194     Entry->closeFile();
195   } else if (llvm::MemoryBuffer *Buffer =
196                  getModuleCache().lookupPCM(FileName)) {
197     NewModule->Buffer = Buffer;
198     // As above, the file descriptor is no longer needed.
199     Entry->closeFile();
200   } else if (getModuleCache().shouldBuildPCM(FileName)) {
201     // Report that the module is out of date, since we tried (and failed) to
202     // import it earlier.
203     Entry->closeFile();
204     return OutOfDate;
205   } else {
206     // Open the AST file.
207     llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Buf((std::error_code()));
208     if (FileName == "-") {
209       Buf = llvm::MemoryBuffer::getSTDIN();
210     } else {
211       // Get a buffer of the file and close the file descriptor when done.
212       // The file is volatile because in a parallel build we expect multiple
213       // compiler processes to use the same module file rebuilding it if needed.
214       //
215       // RequiresNullTerminator is false because module files don't need it, and
216       // this allows the file to still be mmapped.
217       Buf = FileMgr.getBufferForFile(NewModule->File,
218                                      /*IsVolatile=*/true,
219                                      /*RequiresNullTerminator=*/false);
220     }
221 
222     if (!Buf) {
223       ErrorStr = Buf.getError().message();
224       return Missing;
225     }
226 
227     NewModule->Buffer = &getModuleCache().addPCM(FileName, std::move(*Buf));
228   }
229 
230   // Initialize the stream.
231   NewModule->Data = PCHContainerRdr.ExtractPCH(*NewModule->Buffer);
232 
233   // Read the signature eagerly now so that we can check it.  Avoid calling
234   // ReadSignature unless there's something to check though.
235   if (ExpectedSignature && checkSignature(ReadSignature(NewModule->Data),
236                                           ExpectedSignature, ErrorStr))
237     return OutOfDate;
238 
239   // We're keeping this module.  Store it everywhere.
240   Module = Modules[Entry] = NewModule.get();
241 
242   updateModuleImports(*NewModule, ImportedBy, ImportLoc);
243 
244   if (!NewModule->isModule())
245     PCHChain.push_back(NewModule.get());
246   if (!ImportedBy)
247     Roots.push_back(NewModule.get());
248 
249   Chain.push_back(std::move(NewModule));
250   return NewlyLoaded;
251 }
252 
253 void ModuleManager::removeModules(ModuleIterator First) {
254   auto Last = end();
255   if (First == Last)
256     return;
257 
258   // Explicitly clear VisitOrder since we might not notice it is stale.
259   VisitOrder.clear();
260 
261   // Collect the set of module file pointers that we'll be removing.
262   llvm::SmallPtrSet<ModuleFile *, 4> victimSet(
263       (llvm::pointer_iterator<ModuleIterator>(First)),
264       (llvm::pointer_iterator<ModuleIterator>(Last)));
265 
266   auto IsVictim = [&](ModuleFile *MF) {
267     return victimSet.count(MF);
268   };
269   // Remove any references to the now-destroyed modules.
270   for (auto I = begin(); I != First; ++I) {
271     I->Imports.remove_if(IsVictim);
272     I->ImportedBy.remove_if(IsVictim);
273   }
274   llvm::erase_if(Roots, IsVictim);
275 
276   // Remove the modules from the PCH chain.
277   for (auto I = First; I != Last; ++I) {
278     if (!I->isModule()) {
279       PCHChain.erase(llvm::find(PCHChain, &*I), PCHChain.end());
280       break;
281     }
282   }
283 
284   // Delete the modules.
285   for (ModuleIterator victim = First; victim != Last; ++victim)
286     Modules.erase(victim->File);
287 
288   Chain.erase(Chain.begin() + (First - begin()), Chain.end());
289 }
290 
291 void
292 ModuleManager::addInMemoryBuffer(StringRef FileName,
293                                  std::unique_ptr<llvm::MemoryBuffer> Buffer) {
294   const FileEntry *Entry =
295       FileMgr.getVirtualFile(FileName, Buffer->getBufferSize(), 0);
296   InMemoryBuffers[Entry] = std::move(Buffer);
297 }
298 
299 std::unique_ptr<ModuleManager::VisitState> ModuleManager::allocateVisitState() {
300   // Fast path: if we have a cached state, use it.
301   if (FirstVisitState) {
302     auto Result = std::move(FirstVisitState);
303     FirstVisitState = std::move(Result->NextState);
304     return Result;
305   }
306 
307   // Allocate and return a new state.
308   return std::make_unique<VisitState>(size());
309 }
310 
311 void ModuleManager::returnVisitState(std::unique_ptr<VisitState> State) {
312   assert(State->NextState == nullptr && "Visited state is in list?");
313   State->NextState = std::move(FirstVisitState);
314   FirstVisitState = std::move(State);
315 }
316 
317 void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) {
318   GlobalIndex = Index;
319   if (!GlobalIndex) {
320     ModulesInCommonWithGlobalIndex.clear();
321     return;
322   }
323 
324   // Notify the global module index about all of the modules we've already
325   // loaded.
326   for (ModuleFile &M : *this)
327     if (!GlobalIndex->loadedModuleFile(&M))
328       ModulesInCommonWithGlobalIndex.push_back(&M);
329 }
330 
331 void ModuleManager::moduleFileAccepted(ModuleFile *MF) {
332   if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF))
333     return;
334 
335   ModulesInCommonWithGlobalIndex.push_back(MF);
336 }
337 
338 ModuleManager::ModuleManager(FileManager &FileMgr,
339                              InMemoryModuleCache &ModuleCache,
340                              const PCHContainerReader &PCHContainerRdr,
341                              const HeaderSearch &HeaderSearchInfo)
342     : FileMgr(FileMgr), ModuleCache(&ModuleCache),
343       PCHContainerRdr(PCHContainerRdr), HeaderSearchInfo(HeaderSearchInfo) {}
344 
345 void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
346                           llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit) {
347   // If the visitation order vector is the wrong size, recompute the order.
348   if (VisitOrder.size() != Chain.size()) {
349     unsigned N = size();
350     VisitOrder.clear();
351     VisitOrder.reserve(N);
352 
353     // Record the number of incoming edges for each module. When we
354     // encounter a module with no incoming edges, push it into the queue
355     // to seed the queue.
356     SmallVector<ModuleFile *, 4> Queue;
357     Queue.reserve(N);
358     llvm::SmallVector<unsigned, 4> UnusedIncomingEdges;
359     UnusedIncomingEdges.resize(size());
360     for (ModuleFile &M : llvm::reverse(*this)) {
361       unsigned Size = M.ImportedBy.size();
362       UnusedIncomingEdges[M.Index] = Size;
363       if (!Size)
364         Queue.push_back(&M);
365     }
366 
367     // Traverse the graph, making sure to visit a module before visiting any
368     // of its dependencies.
369     while (!Queue.empty()) {
370       ModuleFile *CurrentModule = Queue.pop_back_val();
371       VisitOrder.push_back(CurrentModule);
372 
373       // For any module that this module depends on, push it on the
374       // stack (if it hasn't already been marked as visited).
375       for (ModuleFile *M : llvm::reverse(CurrentModule->Imports)) {
376         // Remove our current module as an impediment to visiting the
377         // module we depend on. If we were the last unvisited module
378         // that depends on this particular module, push it into the
379         // queue to be visited.
380         unsigned &NumUnusedEdges = UnusedIncomingEdges[M->Index];
381         if (NumUnusedEdges && (--NumUnusedEdges == 0))
382           Queue.push_back(M);
383       }
384     }
385 
386     assert(VisitOrder.size() == N && "Visitation order is wrong?");
387 
388     FirstVisitState = nullptr;
389   }
390 
391   auto State = allocateVisitState();
392   unsigned VisitNumber = State->NextVisitNumber++;
393 
394   // If the caller has provided us with a hit-set that came from the global
395   // module index, mark every module file in common with the global module
396   // index that is *not* in that set as 'visited'.
397   if (ModuleFilesHit && !ModulesInCommonWithGlobalIndex.empty()) {
398     for (unsigned I = 0, N = ModulesInCommonWithGlobalIndex.size(); I != N; ++I)
399     {
400       ModuleFile *M = ModulesInCommonWithGlobalIndex[I];
401       if (!ModuleFilesHit->count(M))
402         State->VisitNumber[M->Index] = VisitNumber;
403     }
404   }
405 
406   for (unsigned I = 0, N = VisitOrder.size(); I != N; ++I) {
407     ModuleFile *CurrentModule = VisitOrder[I];
408     // Should we skip this module file?
409     if (State->VisitNumber[CurrentModule->Index] == VisitNumber)
410       continue;
411 
412     // Visit the module.
413     assert(State->VisitNumber[CurrentModule->Index] == VisitNumber - 1);
414     State->VisitNumber[CurrentModule->Index] = VisitNumber;
415     if (!Visitor(*CurrentModule))
416       continue;
417 
418     // The visitor has requested that cut off visitation of any
419     // module that the current module depends on. To indicate this
420     // behavior, we mark all of the reachable modules as having been visited.
421     ModuleFile *NextModule = CurrentModule;
422     do {
423       // For any module that this module depends on, push it on the
424       // stack (if it hasn't already been marked as visited).
425       for (llvm::SetVector<ModuleFile *>::iterator
426              M = NextModule->Imports.begin(),
427              MEnd = NextModule->Imports.end();
428            M != MEnd; ++M) {
429         if (State->VisitNumber[(*M)->Index] != VisitNumber) {
430           State->Stack.push_back(*M);
431           State->VisitNumber[(*M)->Index] = VisitNumber;
432         }
433       }
434 
435       if (State->Stack.empty())
436         break;
437 
438       // Pop the next module off the stack.
439       NextModule = State->Stack.pop_back_val();
440     } while (true);
441   }
442 
443   returnVisitState(std::move(State));
444 }
445 
446 bool ModuleManager::lookupModuleFile(StringRef FileName, off_t ExpectedSize,
447                                      time_t ExpectedModTime,
448                                      std::optional<FileEntryRef> &File) {
449   File = std::nullopt;
450   if (FileName == "-")
451     return false;
452 
453   // Open the file immediately to ensure there is no race between stat'ing and
454   // opening the file.
455   Optional<FileEntryRef> FileOrErr =
456       expectedToOptional(FileMgr.getFileRef(FileName, /*OpenFile=*/true,
457                                             /*CacheFailure=*/false));
458   if (!FileOrErr)
459     return false;
460 
461   File = *FileOrErr;
462 
463   if ((ExpectedSize && ExpectedSize != File->getSize()) ||
464       (ExpectedModTime && ExpectedModTime != File->getModificationTime()))
465     // Do not destroy File, as it may be referenced. If we need to rebuild it,
466     // it will be destroyed by removeModules.
467     return true;
468 
469   return false;
470 }
471 
472 #ifndef NDEBUG
473 namespace llvm {
474 
475   template<>
476   struct GraphTraits<ModuleManager> {
477     using NodeRef = ModuleFile *;
478     using ChildIteratorType = llvm::SetVector<ModuleFile *>::const_iterator;
479     using nodes_iterator = pointer_iterator<ModuleManager::ModuleConstIterator>;
480 
481     static ChildIteratorType child_begin(NodeRef Node) {
482       return Node->Imports.begin();
483     }
484 
485     static ChildIteratorType child_end(NodeRef Node) {
486       return Node->Imports.end();
487     }
488 
489     static nodes_iterator nodes_begin(const ModuleManager &Manager) {
490       return nodes_iterator(Manager.begin());
491     }
492 
493     static nodes_iterator nodes_end(const ModuleManager &Manager) {
494       return nodes_iterator(Manager.end());
495     }
496   };
497 
498   template<>
499   struct DOTGraphTraits<ModuleManager> : public DefaultDOTGraphTraits {
500     explicit DOTGraphTraits(bool IsSimple = false)
501         : DefaultDOTGraphTraits(IsSimple) {}
502 
503     static bool renderGraphFromBottomUp() { return true; }
504 
505     std::string getNodeLabel(ModuleFile *M, const ModuleManager&) {
506       return M->ModuleName;
507     }
508   };
509 
510 } // namespace llvm
511 
512 void ModuleManager::viewGraph() {
513   llvm::ViewGraph(*this, "Modules");
514 }
515 #endif
516