xref: /llvm-project/clang/lib/Serialization/ModuleManager.cpp (revision 0ffa29fe8152e247eea87017e8c5aeedc6329c15)
1 //===- ModuleManager.cpp - Module Manager ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines the ModuleManager class, which manages a set of loaded
10 //  modules for the ASTReader.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Serialization/ModuleManager.h"
15 #include "clang/Basic/FileManager.h"
16 #include "clang/Basic/LLVM.h"
17 #include "clang/Lex/HeaderSearch.h"
18 #include "clang/Lex/ModuleMap.h"
19 #include "clang/Serialization/GlobalModuleIndex.h"
20 #include "clang/Serialization/InMemoryModuleCache.h"
21 #include "clang/Serialization/ModuleFile.h"
22 #include "clang/Serialization/PCHContainerOperations.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SetVector.h"
25 #include "llvm/ADT/SmallPtrSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/iterator.h"
29 #include "llvm/Support/Chrono.h"
30 #include "llvm/Support/DOTGraphTraits.h"
31 #include "llvm/Support/ErrorOr.h"
32 #include "llvm/Support/GraphWriter.h"
33 #include "llvm/Support/MemoryBuffer.h"
34 #include "llvm/Support/VirtualFileSystem.h"
35 #include <algorithm>
36 #include <cassert>
37 #include <memory>
38 #include <string>
39 #include <system_error>
40 
41 using namespace clang;
42 using namespace serialization;
43 
44 ModuleFile *ModuleManager::lookupByFileName(StringRef Name) const {
45   auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false,
46                                           /*CacheFailure=*/false);
47   if (Entry)
48     return lookup(*Entry);
49 
50   return nullptr;
51 }
52 
53 ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const {
54   if (const Module *Mod = HeaderSearchInfo.getModuleMap().findModule(Name))
55     if (OptionalFileEntryRef File = Mod->getASTFile())
56       return lookup(*File);
57 
58   return nullptr;
59 }
60 
61 ModuleFile *ModuleManager::lookup(const FileEntry *File) const {
62   return Modules.lookup(File);
63 }
64 
65 std::unique_ptr<llvm::MemoryBuffer>
66 ModuleManager::lookupBuffer(StringRef Name) {
67   auto Entry = FileMgr.getOptionalFileRef(Name, /*OpenFile=*/false,
68                                           /*CacheFailure=*/false);
69   if (!Entry)
70     return nullptr;
71   return std::move(InMemoryBuffers[*Entry]);
72 }
73 
74 static bool checkSignature(ASTFileSignature Signature,
75                            ASTFileSignature ExpectedSignature,
76                            std::string &ErrorStr) {
77   if (!ExpectedSignature || Signature == ExpectedSignature)
78     return false;
79 
80   ErrorStr =
81       Signature ? "signature mismatch" : "could not read module signature";
82   return true;
83 }
84 
85 static void updateModuleImports(ModuleFile &MF, ModuleFile *ImportedBy,
86                                 SourceLocation ImportLoc) {
87   if (ImportedBy) {
88     MF.ImportedBy.insert(ImportedBy);
89     ImportedBy->Imports.insert(&MF);
90   } else {
91     if (!MF.DirectlyImported)
92       MF.ImportLoc = ImportLoc;
93 
94     MF.DirectlyImported = true;
95   }
96 }
97 
98 ModuleManager::AddModuleResult
99 ModuleManager::addModule(StringRef FileName, ModuleKind Type,
100                          SourceLocation ImportLoc, ModuleFile *ImportedBy,
101                          unsigned Generation,
102                          off_t ExpectedSize, time_t ExpectedModTime,
103                          ASTFileSignature ExpectedSignature,
104                          ASTFileSignatureReader ReadSignature,
105                          ModuleFile *&Module,
106                          std::string &ErrorStr) {
107   Module = nullptr;
108 
109   // Look for the file entry. This only fails if the expected size or
110   // modification time differ.
111   OptionalFileEntryRef Entry;
112   if (Type == MK_ExplicitModule || Type == MK_PrebuiltModule) {
113     // If we're not expecting to pull this file out of the module cache, it
114     // might have a different mtime due to being moved across filesystems in
115     // a distributed build. The size must still match, though. (As must the
116     // contents, but we can't check that.)
117     ExpectedModTime = 0;
118   }
119   // Note: ExpectedSize and ExpectedModTime will be 0 for MK_ImplicitModule
120   // when using an ASTFileSignature.
121   if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, Entry)) {
122     ErrorStr = "module file out of date";
123     return OutOfDate;
124   }
125 
126   if (!Entry) {
127     ErrorStr = "module file not found";
128     return Missing;
129   }
130 
131   // The ModuleManager's use of FileEntry nodes as the keys for its map of
132   // loaded modules is less than ideal. Uniqueness for FileEntry nodes is
133   // maintained by FileManager, which in turn uses inode numbers on hosts
134   // that support that. When coupled with the module cache's proclivity for
135   // turning over and deleting stale PCMs, this means entries for different
136   // module files can wind up reusing the same underlying inode. When this
137   // happens, subsequent accesses to the Modules map will disagree on the
138   // ModuleFile associated with a given file. In general, it is not sufficient
139   // to resolve this conundrum with a type like FileEntryRef that stores the
140   // name of the FileEntry node on first access because of path canonicalization
141   // issues. However, the paths constructed for implicit module builds are
142   // fully under Clang's control. We *can*, therefore, rely on their structure
143   // being consistent across operating systems and across subsequent accesses
144   // to the Modules map.
145   auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF,
146                                      FileEntryRef Entry) -> bool {
147     if (Kind != MK_ImplicitModule)
148       return true;
149     return Entry.getName() == MF->FileName;
150   };
151 
152   // Check whether we already loaded this module, before
153   if (ModuleFile *ModuleEntry = Modules.lookup(*Entry)) {
154     if (implicitModuleNamesMatch(Type, ModuleEntry, *Entry)) {
155       // Check the stored signature.
156       if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
157         return OutOfDate;
158 
159       Module = ModuleEntry;
160       updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
161       return AlreadyLoaded;
162     }
163   }
164 
165   // Allocate a new module.
166   auto NewModule = std::make_unique<ModuleFile>(Type, *Entry, Generation);
167   NewModule->Index = Chain.size();
168   NewModule->FileName = FileName.str();
169   NewModule->ImportLoc = ImportLoc;
170   NewModule->InputFilesValidationTimestamp = 0;
171 
172   if (NewModule->Kind == MK_ImplicitModule) {
173     std::string TimestampFilename =
174         ModuleFile::getTimestampFilename(NewModule->FileName);
175     llvm::vfs::Status Status;
176     // A cached stat value would be fine as well.
177     if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
178       NewModule->InputFilesValidationTimestamp =
179           llvm::sys::toTimeT(Status.getLastModificationTime());
180   }
181 
182   // Load the contents of the module
183   if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
184     // The buffer was already provided for us.
185     NewModule->Buffer = &ModuleCache->addBuiltPCM(FileName, std::move(Buffer));
186     // Since the cached buffer is reused, it is safe to close the file
187     // descriptor that was opened while stat()ing the PCM in
188     // lookupModuleFile() above, it won't be needed any longer.
189     Entry->closeFile();
190   } else if (llvm::MemoryBuffer *Buffer =
191                  getModuleCache().lookupPCM(FileName)) {
192     NewModule->Buffer = Buffer;
193     // As above, the file descriptor is no longer needed.
194     Entry->closeFile();
195   } else if (getModuleCache().shouldBuildPCM(FileName)) {
196     // Report that the module is out of date, since we tried (and failed) to
197     // import it earlier.
198     Entry->closeFile();
199     return OutOfDate;
200   } else {
201     // Get a buffer of the file and close the file descriptor when done.
202     // The file is volatile because in a parallel build we expect multiple
203     // compiler processes to use the same module file rebuilding it if needed.
204     //
205     // RequiresNullTerminator is false because module files don't need it, and
206     // this allows the file to still be mmapped.
207     auto Buf = FileMgr.getBufferForFile(NewModule->File,
208                                         /*IsVolatile=*/true,
209                                         /*RequiresNullTerminator=*/false);
210 
211     if (!Buf) {
212       ErrorStr = Buf.getError().message();
213       return Missing;
214     }
215 
216     NewModule->Buffer = &getModuleCache().addPCM(FileName, std::move(*Buf));
217   }
218 
219   // Initialize the stream.
220   NewModule->Data = PCHContainerRdr.ExtractPCH(*NewModule->Buffer);
221 
222   // Read the signature eagerly now so that we can check it.  Avoid calling
223   // ReadSignature unless there's something to check though.
224   if (ExpectedSignature && checkSignature(ReadSignature(NewModule->Data),
225                                           ExpectedSignature, ErrorStr))
226     return OutOfDate;
227 
228   // We're keeping this module.  Store it everywhere.
229   Module = Modules[*Entry] = NewModule.get();
230 
231   updateModuleImports(*NewModule, ImportedBy, ImportLoc);
232 
233   if (!NewModule->isModule())
234     PCHChain.push_back(NewModule.get());
235   if (!ImportedBy)
236     Roots.push_back(NewModule.get());
237 
238   Chain.push_back(std::move(NewModule));
239   return NewlyLoaded;
240 }
241 
242 void ModuleManager::removeModules(ModuleIterator First) {
243   auto Last = end();
244   if (First == Last)
245     return;
246 
247   // Explicitly clear VisitOrder since we might not notice it is stale.
248   VisitOrder.clear();
249 
250   // Collect the set of module file pointers that we'll be removing.
251   llvm::SmallPtrSet<ModuleFile *, 4> victimSet(
252       (llvm::pointer_iterator<ModuleIterator>(First)),
253       (llvm::pointer_iterator<ModuleIterator>(Last)));
254 
255   auto IsVictim = [&](ModuleFile *MF) {
256     return victimSet.count(MF);
257   };
258   // Remove any references to the now-destroyed modules.
259   for (auto I = begin(); I != First; ++I) {
260     I->Imports.remove_if(IsVictim);
261     I->ImportedBy.remove_if(IsVictim);
262   }
263   llvm::erase_if(Roots, IsVictim);
264 
265   // Remove the modules from the PCH chain.
266   for (auto I = First; I != Last; ++I) {
267     if (!I->isModule()) {
268       PCHChain.erase(llvm::find(PCHChain, &*I), PCHChain.end());
269       break;
270     }
271   }
272 
273   // Delete the modules.
274   for (ModuleIterator victim = First; victim != Last; ++victim)
275     Modules.erase(victim->File);
276 
277   Chain.erase(Chain.begin() + (First - begin()), Chain.end());
278 }
279 
280 void
281 ModuleManager::addInMemoryBuffer(StringRef FileName,
282                                  std::unique_ptr<llvm::MemoryBuffer> Buffer) {
283   FileEntryRef Entry =
284       FileMgr.getVirtualFileRef(FileName, Buffer->getBufferSize(), 0);
285   InMemoryBuffers[Entry] = std::move(Buffer);
286 }
287 
288 std::unique_ptr<ModuleManager::VisitState> ModuleManager::allocateVisitState() {
289   // Fast path: if we have a cached state, use it.
290   if (FirstVisitState) {
291     auto Result = std::move(FirstVisitState);
292     FirstVisitState = std::move(Result->NextState);
293     return Result;
294   }
295 
296   // Allocate and return a new state.
297   return std::make_unique<VisitState>(size());
298 }
299 
300 void ModuleManager::returnVisitState(std::unique_ptr<VisitState> State) {
301   assert(State->NextState == nullptr && "Visited state is in list?");
302   State->NextState = std::move(FirstVisitState);
303   FirstVisitState = std::move(State);
304 }
305 
306 void ModuleManager::setGlobalIndex(GlobalModuleIndex *Index) {
307   GlobalIndex = Index;
308   if (!GlobalIndex) {
309     ModulesInCommonWithGlobalIndex.clear();
310     return;
311   }
312 
313   // Notify the global module index about all of the modules we've already
314   // loaded.
315   for (ModuleFile &M : *this)
316     if (!GlobalIndex->loadedModuleFile(&M))
317       ModulesInCommonWithGlobalIndex.push_back(&M);
318 }
319 
320 void ModuleManager::moduleFileAccepted(ModuleFile *MF) {
321   if (!GlobalIndex || GlobalIndex->loadedModuleFile(MF))
322     return;
323 
324   ModulesInCommonWithGlobalIndex.push_back(MF);
325 }
326 
327 ModuleManager::ModuleManager(FileManager &FileMgr,
328                              InMemoryModuleCache &ModuleCache,
329                              const PCHContainerReader &PCHContainerRdr,
330                              const HeaderSearch &HeaderSearchInfo)
331     : FileMgr(FileMgr), ModuleCache(&ModuleCache),
332       PCHContainerRdr(PCHContainerRdr), HeaderSearchInfo(HeaderSearchInfo) {}
333 
334 void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
335                           llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit) {
336   // If the visitation order vector is the wrong size, recompute the order.
337   if (VisitOrder.size() != Chain.size()) {
338     unsigned N = size();
339     VisitOrder.clear();
340     VisitOrder.reserve(N);
341 
342     // Record the number of incoming edges for each module. When we
343     // encounter a module with no incoming edges, push it into the queue
344     // to seed the queue.
345     SmallVector<ModuleFile *, 4> Queue;
346     Queue.reserve(N);
347     llvm::SmallVector<unsigned, 4> UnusedIncomingEdges;
348     UnusedIncomingEdges.resize(size());
349     for (ModuleFile &M : llvm::reverse(*this)) {
350       unsigned Size = M.ImportedBy.size();
351       UnusedIncomingEdges[M.Index] = Size;
352       if (!Size)
353         Queue.push_back(&M);
354     }
355 
356     // Traverse the graph, making sure to visit a module before visiting any
357     // of its dependencies.
358     while (!Queue.empty()) {
359       ModuleFile *CurrentModule = Queue.pop_back_val();
360       VisitOrder.push_back(CurrentModule);
361 
362       // For any module that this module depends on, push it on the
363       // stack (if it hasn't already been marked as visited).
364       for (ModuleFile *M : llvm::reverse(CurrentModule->Imports)) {
365         // Remove our current module as an impediment to visiting the
366         // module we depend on. If we were the last unvisited module
367         // that depends on this particular module, push it into the
368         // queue to be visited.
369         unsigned &NumUnusedEdges = UnusedIncomingEdges[M->Index];
370         if (NumUnusedEdges && (--NumUnusedEdges == 0))
371           Queue.push_back(M);
372       }
373     }
374 
375     assert(VisitOrder.size() == N && "Visitation order is wrong?");
376 
377     FirstVisitState = nullptr;
378   }
379 
380   auto State = allocateVisitState();
381   unsigned VisitNumber = State->NextVisitNumber++;
382 
383   // If the caller has provided us with a hit-set that came from the global
384   // module index, mark every module file in common with the global module
385   // index that is *not* in that set as 'visited'.
386   if (ModuleFilesHit && !ModulesInCommonWithGlobalIndex.empty()) {
387     for (unsigned I = 0, N = ModulesInCommonWithGlobalIndex.size(); I != N; ++I)
388     {
389       ModuleFile *M = ModulesInCommonWithGlobalIndex[I];
390       if (!ModuleFilesHit->count(M))
391         State->VisitNumber[M->Index] = VisitNumber;
392     }
393   }
394 
395   for (unsigned I = 0, N = VisitOrder.size(); I != N; ++I) {
396     ModuleFile *CurrentModule = VisitOrder[I];
397     // Should we skip this module file?
398     if (State->VisitNumber[CurrentModule->Index] == VisitNumber)
399       continue;
400 
401     // Visit the module.
402     assert(State->VisitNumber[CurrentModule->Index] == VisitNumber - 1);
403     State->VisitNumber[CurrentModule->Index] = VisitNumber;
404     if (!Visitor(*CurrentModule))
405       continue;
406 
407     // The visitor has requested that cut off visitation of any
408     // module that the current module depends on. To indicate this
409     // behavior, we mark all of the reachable modules as having been visited.
410     ModuleFile *NextModule = CurrentModule;
411     do {
412       // For any module that this module depends on, push it on the
413       // stack (if it hasn't already been marked as visited).
414       for (llvm::SetVector<ModuleFile *>::iterator
415              M = NextModule->Imports.begin(),
416              MEnd = NextModule->Imports.end();
417            M != MEnd; ++M) {
418         if (State->VisitNumber[(*M)->Index] != VisitNumber) {
419           State->Stack.push_back(*M);
420           State->VisitNumber[(*M)->Index] = VisitNumber;
421         }
422       }
423 
424       if (State->Stack.empty())
425         break;
426 
427       // Pop the next module off the stack.
428       NextModule = State->Stack.pop_back_val();
429     } while (true);
430   }
431 
432   returnVisitState(std::move(State));
433 }
434 
435 bool ModuleManager::lookupModuleFile(StringRef FileName, off_t ExpectedSize,
436                                      time_t ExpectedModTime,
437                                      OptionalFileEntryRef &File) {
438   if (FileName == "-") {
439     File = expectedToOptional(FileMgr.getSTDIN());
440     return false;
441   }
442 
443   // Open the file immediately to ensure there is no race between stat'ing and
444   // opening the file.
445   File = FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true,
446                                     /*CacheFailure=*/false);
447 
448   if (File &&
449       ((ExpectedSize && ExpectedSize != File->getSize()) ||
450        (ExpectedModTime && ExpectedModTime != File->getModificationTime())))
451     // Do not destroy File, as it may be referenced. If we need to rebuild it,
452     // it will be destroyed by removeModules.
453     return true;
454 
455   return false;
456 }
457 
458 #ifndef NDEBUG
459 namespace llvm {
460 
461   template<>
462   struct GraphTraits<ModuleManager> {
463     using NodeRef = ModuleFile *;
464     using ChildIteratorType = llvm::SetVector<ModuleFile *>::const_iterator;
465     using nodes_iterator = pointer_iterator<ModuleManager::ModuleConstIterator>;
466 
467     static ChildIteratorType child_begin(NodeRef Node) {
468       return Node->Imports.begin();
469     }
470 
471     static ChildIteratorType child_end(NodeRef Node) {
472       return Node->Imports.end();
473     }
474 
475     static nodes_iterator nodes_begin(const ModuleManager &Manager) {
476       return nodes_iterator(Manager.begin());
477     }
478 
479     static nodes_iterator nodes_end(const ModuleManager &Manager) {
480       return nodes_iterator(Manager.end());
481     }
482   };
483 
484   template<>
485   struct DOTGraphTraits<ModuleManager> : public DefaultDOTGraphTraits {
486     explicit DOTGraphTraits(bool IsSimple = false)
487         : DefaultDOTGraphTraits(IsSimple) {}
488 
489     static bool renderGraphFromBottomUp() { return true; }
490 
491     std::string getNodeLabel(ModuleFile *M, const ModuleManager&) {
492       return M->ModuleName;
493     }
494   };
495 
496 } // namespace llvm
497 
498 void ModuleManager::viewGraph() {
499   llvm::ViewGraph(*this, "Modules");
500 }
501 #endif
502