xref: /llvm-project/clang-tools-extra/clangd/ModulesBuilder.cpp (revision 6bb5d6ae23cace42bd108ca14e17e863c73bbb5c)
1 //===----------------- ModulesBuilder.cpp ------------------------*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "ModulesBuilder.h"
10 #include "Compiler.h"
11 #include "support/Logger.h"
12 #include "clang/Frontend/FrontendAction.h"
13 #include "clang/Frontend/FrontendActions.h"
14 #include "clang/Serialization/ASTReader.h"
15 #include "clang/Serialization/InMemoryModuleCache.h"
16 #include "llvm/ADT/ScopeExit.h"
17 #include <queue>
18 
19 namespace clang {
20 namespace clangd {
21 
22 namespace {
23 
24 // Create a path to store module files. Generally it should be:
25 //
26 //   {TEMP_DIRS}/clangd/module_files/{hashed-file-name}-%%-%%-%%-%%-%%-%%/.
27 //
28 // {TEMP_DIRS} is the temporary directory for the system, e.g., "/var/tmp"
29 // or "C:/TEMP".
30 //
31 // '%%' means random value to make the generated path unique.
32 //
33 // \param MainFile is used to get the root of the project from global
34 // compilation database.
35 //
36 // TODO: Move these module fils out of the temporary directory if the module
37 // files are persistent.
38 llvm::SmallString<256> getUniqueModuleFilesPath(PathRef MainFile) {
39   llvm::SmallString<128> HashedPrefix = llvm::sys::path::filename(MainFile);
40   // There might be multiple files with the same name in a project. So appending
41   // the hash value of the full path to make sure they won't conflict.
42   HashedPrefix += std::to_string(llvm::hash_value(MainFile));
43 
44   llvm::SmallString<256> ResultPattern;
45 
46   llvm::sys::path::system_temp_directory(/*erasedOnReboot=*/true,
47                                          ResultPattern);
48 
49   llvm::sys::path::append(ResultPattern, "clangd");
50   llvm::sys::path::append(ResultPattern, "module_files");
51 
52   llvm::sys::path::append(ResultPattern, HashedPrefix);
53 
54   ResultPattern.append("-%%-%%-%%-%%-%%-%%");
55 
56   llvm::SmallString<256> Result;
57   llvm::sys::fs::createUniquePath(ResultPattern, Result,
58                                   /*MakeAbsolute=*/false);
59 
60   llvm::sys::fs::create_directories(Result);
61   return Result;
62 }
63 
64 // Get a unique module file path under \param ModuleFilesPrefix.
65 std::string getModuleFilePath(llvm::StringRef ModuleName,
66                               PathRef ModuleFilesPrefix) {
67   llvm::SmallString<256> ModuleFilePath(ModuleFilesPrefix);
68   auto [PrimaryModuleName, PartitionName] = ModuleName.split(':');
69   llvm::sys::path::append(ModuleFilePath, PrimaryModuleName);
70   if (!PartitionName.empty()) {
71     ModuleFilePath.append("-");
72     ModuleFilePath.append(PartitionName);
73   }
74 
75   ModuleFilePath.append(".pcm");
76   return std::string(ModuleFilePath);
77 }
78 
79 // FailedPrerequisiteModules - stands for the PrerequisiteModules which has
80 // errors happened during the building process.
81 class FailedPrerequisiteModules : public PrerequisiteModules {
82 public:
83   ~FailedPrerequisiteModules() override = default;
84 
85   // We shouldn't adjust the compilation commands based on
86   // FailedPrerequisiteModules.
87   void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
88   }
89 
90   // FailedPrerequisiteModules can never be reused.
91   bool
92   canReuse(const CompilerInvocation &CI,
93            llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override {
94     return false;
95   }
96 };
97 
98 struct ModuleFile {
99   ModuleFile(StringRef ModuleName, PathRef ModuleFilePath)
100       : ModuleName(ModuleName.str()), ModuleFilePath(ModuleFilePath.str()) {}
101 
102   ModuleFile() = delete;
103 
104   ModuleFile(const ModuleFile &) = delete;
105   ModuleFile operator=(const ModuleFile &) = delete;
106 
107   // The move constructor is needed for llvm::SmallVector.
108   ModuleFile(ModuleFile &&Other)
109       : ModuleName(std::move(Other.ModuleName)),
110         ModuleFilePath(std::move(Other.ModuleFilePath)) {
111     Other.ModuleName.clear();
112     Other.ModuleFilePath.clear();
113   }
114 
115   ModuleFile &operator=(ModuleFile &&Other) {
116     if (this == &Other)
117       return *this;
118 
119     this->~ModuleFile();
120     new (this) ModuleFile(std::move(Other));
121     return *this;
122   }
123 
124   ~ModuleFile() {
125     if (!ModuleFilePath.empty())
126       llvm::sys::fs::remove(ModuleFilePath);
127   }
128 
129   StringRef getModuleName() const { return ModuleName; }
130 
131   StringRef getModuleFilePath() const { return ModuleFilePath; }
132 
133 private:
134   std::string ModuleName;
135   std::string ModuleFilePath;
136 };
137 
138 // ReusablePrerequisiteModules - stands for PrerequisiteModules for which all
139 // the required modules are built successfully. All the module files
140 // are owned by the modules builder.
141 class ReusablePrerequisiteModules : public PrerequisiteModules {
142 public:
143   ReusablePrerequisiteModules() = default;
144 
145   ReusablePrerequisiteModules(const ReusablePrerequisiteModules &Other) =
146       default;
147   ReusablePrerequisiteModules &
148   operator=(const ReusablePrerequisiteModules &) = default;
149   ReusablePrerequisiteModules(ReusablePrerequisiteModules &&) = delete;
150   ReusablePrerequisiteModules
151   operator=(ReusablePrerequisiteModules &&) = delete;
152 
153   ~ReusablePrerequisiteModules() override = default;
154 
155   void adjustHeaderSearchOptions(HeaderSearchOptions &Options) const override {
156     // Appending all built module files.
157     for (const auto &RequiredModule : RequiredModules)
158       Options.PrebuiltModuleFiles.insert_or_assign(
159           RequiredModule->getModuleName().str(),
160           RequiredModule->getModuleFilePath().str());
161   }
162 
163   bool canReuse(const CompilerInvocation &CI,
164                 llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem>) const override;
165 
166   bool isModuleUnitBuilt(llvm::StringRef ModuleName) const {
167     return BuiltModuleNames.contains(ModuleName);
168   }
169 
170   void addModuleFile(std::shared_ptr<const ModuleFile> ModuleFile) {
171     BuiltModuleNames.insert(ModuleFile->getModuleName());
172     RequiredModules.emplace_back(std::move(ModuleFile));
173   }
174 
175 private:
176   llvm::SmallVector<std::shared_ptr<const ModuleFile>, 8> RequiredModules;
177   // A helper class to speedup the query if a module is built.
178   llvm::StringSet<> BuiltModuleNames;
179 };
180 
181 bool IsModuleFileUpToDate(PathRef ModuleFilePath,
182                           const PrerequisiteModules &RequisiteModules,
183                           llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
184   auto HSOpts = std::make_shared<HeaderSearchOptions>();
185   RequisiteModules.adjustHeaderSearchOptions(*HSOpts);
186   HSOpts->ForceCheckCXX20ModulesInputFiles = true;
187   HSOpts->ValidateASTInputFilesContent = true;
188 
189   clang::clangd::IgnoreDiagnostics IgnoreDiags;
190   IntrusiveRefCntPtr<DiagnosticsEngine> Diags =
191       CompilerInstance::createDiagnostics(*VFS, new DiagnosticOptions,
192                                           &IgnoreDiags,
193                                           /*ShouldOwnClient=*/false);
194 
195   LangOptions LangOpts;
196   LangOpts.SkipODRCheckInGMF = true;
197 
198   FileManager FileMgr(FileSystemOptions(), VFS);
199 
200   SourceManager SourceMgr(*Diags, FileMgr);
201 
202   HeaderSearch HeaderInfo(std::move(HSOpts), SourceMgr, *Diags, LangOpts,
203                           /*Target=*/nullptr);
204 
205   TrivialModuleLoader ModuleLoader;
206   Preprocessor PP(std::make_shared<PreprocessorOptions>(), *Diags, LangOpts,
207                   SourceMgr, HeaderInfo, ModuleLoader);
208 
209   IntrusiveRefCntPtr<InMemoryModuleCache> ModuleCache = new InMemoryModuleCache;
210   PCHContainerOperations PCHOperations;
211   ASTReader Reader(PP, *ModuleCache, /*ASTContext=*/nullptr,
212                    PCHOperations.getRawReader(), {});
213 
214   // We don't need any listener here. By default it will use a validator
215   // listener.
216   Reader.setListener(nullptr);
217 
218   if (Reader.ReadAST(ModuleFilePath, serialization::MK_MainFile,
219                      SourceLocation(),
220                      ASTReader::ARR_None) != ASTReader::Success)
221     return false;
222 
223   bool UpToDate = true;
224   Reader.getModuleManager().visit([&](serialization::ModuleFile &MF) -> bool {
225     Reader.visitInputFiles(
226         MF, /*IncludeSystem=*/false, /*Complain=*/false,
227         [&](const serialization::InputFile &IF, bool isSystem) {
228           if (!IF.getFile() || IF.isOutOfDate())
229             UpToDate = false;
230         });
231     return !UpToDate;
232   });
233   return UpToDate;
234 }
235 
236 bool IsModuleFilesUpToDate(
237     llvm::SmallVector<PathRef> ModuleFilePaths,
238     const PrerequisiteModules &RequisiteModules,
239     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) {
240   return llvm::all_of(
241       ModuleFilePaths, [&RequisiteModules, VFS](auto ModuleFilePath) {
242         return IsModuleFileUpToDate(ModuleFilePath, RequisiteModules, VFS);
243       });
244 }
245 
246 /// Build a module file for module with `ModuleName`. The information of built
247 /// module file are stored in \param BuiltModuleFiles.
248 llvm::Expected<ModuleFile>
249 buildModuleFile(llvm::StringRef ModuleName, PathRef ModuleUnitFileName,
250                 const GlobalCompilationDatabase &CDB, const ThreadsafeFS &TFS,
251                 const ReusablePrerequisiteModules &BuiltModuleFiles) {
252   // Try cheap operation earlier to boil-out cheaply if there are problems.
253   auto Cmd = CDB.getCompileCommand(ModuleUnitFileName);
254   if (!Cmd)
255     return llvm::createStringError(
256         llvm::formatv("No compile command for {0}", ModuleUnitFileName));
257 
258   llvm::SmallString<256> ModuleFilesPrefix =
259       getUniqueModuleFilesPath(ModuleUnitFileName);
260 
261   Cmd->Output = getModuleFilePath(ModuleName, ModuleFilesPrefix);
262 
263   ParseInputs Inputs;
264   Inputs.TFS = &TFS;
265   Inputs.CompileCommand = std::move(*Cmd);
266 
267   IgnoreDiagnostics IgnoreDiags;
268   auto CI = buildCompilerInvocation(Inputs, IgnoreDiags);
269   if (!CI)
270     return llvm::createStringError("Failed to build compiler invocation");
271 
272   auto FS = Inputs.TFS->view(Inputs.CompileCommand.Directory);
273   auto Buf = FS->getBufferForFile(Inputs.CompileCommand.Filename);
274   if (!Buf)
275     return llvm::createStringError("Failed to create buffer");
276 
277   // In clang's driver, we will suppress the check for ODR violation in GMF.
278   // See the implementation of RenderModulesOptions in Clang.cpp.
279   CI->getLangOpts().SkipODRCheckInGMF = true;
280 
281   // Hash the contents of input files and store the hash value to the BMI files.
282   // So that we can check if the files are still valid when we want to reuse the
283   // BMI files.
284   CI->getHeaderSearchOpts().ValidateASTInputFilesContent = true;
285 
286   BuiltModuleFiles.adjustHeaderSearchOptions(CI->getHeaderSearchOpts());
287 
288   CI->getFrontendOpts().OutputFile = Inputs.CompileCommand.Output;
289   auto Clang =
290       prepareCompilerInstance(std::move(CI), /*Preamble=*/nullptr,
291                               std::move(*Buf), std::move(FS), IgnoreDiags);
292   if (!Clang)
293     return llvm::createStringError("Failed to prepare compiler instance");
294 
295   GenerateReducedModuleInterfaceAction Action;
296   Clang->ExecuteAction(Action);
297 
298   if (Clang->getDiagnostics().hasErrorOccurred())
299     return llvm::createStringError("Compilation failed");
300 
301   return ModuleFile{ModuleName, Inputs.CompileCommand.Output};
302 }
303 
304 bool ReusablePrerequisiteModules::canReuse(
305     const CompilerInvocation &CI,
306     llvm::IntrusiveRefCntPtr<llvm::vfs::FileSystem> VFS) const {
307   if (RequiredModules.empty())
308     return true;
309 
310   llvm::SmallVector<llvm::StringRef> BMIPaths;
311   for (auto &MF : RequiredModules)
312     BMIPaths.push_back(MF->getModuleFilePath());
313   return IsModuleFilesUpToDate(BMIPaths, *this, VFS);
314 }
315 
316 class ModuleFileCache {
317 public:
318   ModuleFileCache(const GlobalCompilationDatabase &CDB) : CDB(CDB) {}
319   const GlobalCompilationDatabase &getCDB() const { return CDB; }
320 
321   std::shared_ptr<const ModuleFile> getModule(StringRef ModuleName);
322 
323   void add(StringRef ModuleName, std::shared_ptr<const ModuleFile> ModuleFile) {
324     std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
325 
326     ModuleFiles[ModuleName] = ModuleFile;
327   }
328 
329   void remove(StringRef ModuleName);
330 
331 private:
332   const GlobalCompilationDatabase &CDB;
333 
334   llvm::StringMap<std::weak_ptr<const ModuleFile>> ModuleFiles;
335   // Mutex to guard accesses to ModuleFiles.
336   std::mutex ModuleFilesMutex;
337 };
338 
339 std::shared_ptr<const ModuleFile>
340 ModuleFileCache::getModule(StringRef ModuleName) {
341   std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
342 
343   auto Iter = ModuleFiles.find(ModuleName);
344   if (Iter == ModuleFiles.end())
345     return nullptr;
346 
347   if (auto Res = Iter->second.lock())
348     return Res;
349 
350   ModuleFiles.erase(Iter);
351   return nullptr;
352 }
353 
354 void ModuleFileCache::remove(StringRef ModuleName) {
355   std::lock_guard<std::mutex> Lock(ModuleFilesMutex);
356 
357   ModuleFiles.erase(ModuleName);
358 }
359 
360 /// Collect the directly and indirectly required module names for \param
361 /// ModuleName in topological order. The \param ModuleName is guaranteed to
362 /// be the last element in \param ModuleNames.
363 llvm::SmallVector<StringRef> getAllRequiredModules(ProjectModules &MDB,
364                                                    StringRef ModuleName) {
365   llvm::SmallVector<llvm::StringRef> ModuleNames;
366   llvm::StringSet<> ModuleNamesSet;
367 
368   auto VisitDeps = [&](StringRef ModuleName, auto Visitor) -> void {
369     ModuleNamesSet.insert(ModuleName);
370 
371     for (StringRef RequiredModuleName :
372          MDB.getRequiredModules(MDB.getSourceForModuleName(ModuleName)))
373       if (ModuleNamesSet.insert(RequiredModuleName).second)
374         Visitor(RequiredModuleName, Visitor);
375 
376     ModuleNames.push_back(ModuleName);
377   };
378   VisitDeps(ModuleName, VisitDeps);
379 
380   return ModuleNames;
381 }
382 
383 } // namespace
384 
385 class ModulesBuilder::ModulesBuilderImpl {
386 public:
387   ModulesBuilderImpl(const GlobalCompilationDatabase &CDB) : Cache(CDB) {}
388 
389   const GlobalCompilationDatabase &getCDB() const { return Cache.getCDB(); }
390 
391   llvm::Error
392   getOrBuildModuleFile(StringRef ModuleName, const ThreadsafeFS &TFS,
393                        ProjectModules &MDB,
394                        ReusablePrerequisiteModules &BuiltModuleFiles);
395 
396 private:
397   ModuleFileCache Cache;
398 };
399 
400 llvm::Error ModulesBuilder::ModulesBuilderImpl::getOrBuildModuleFile(
401     StringRef ModuleName, const ThreadsafeFS &TFS, ProjectModules &MDB,
402     ReusablePrerequisiteModules &BuiltModuleFiles) {
403   if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
404     return llvm::Error::success();
405 
406   PathRef ModuleUnitFileName = MDB.getSourceForModuleName(ModuleName);
407   /// It is possible that we're meeting third party modules (modules whose
408   /// source are not in the project. e.g, the std module may be a third-party
409   /// module for most project) or something wrong with the implementation of
410   /// ProjectModules.
411   /// FIXME: How should we treat third party modules here? If we want to ignore
412   /// third party modules, we should return true instead of false here.
413   /// Currently we simply bail out.
414   if (ModuleUnitFileName.empty())
415     return llvm::createStringError(
416         llvm::formatv("Don't get the module unit for module {0}", ModuleName));
417 
418   // Get Required modules in topological order.
419   auto ReqModuleNames = getAllRequiredModules(MDB, ModuleName);
420   for (llvm::StringRef ReqModuleName : ReqModuleNames) {
421     if (BuiltModuleFiles.isModuleUnitBuilt(ModuleName))
422       continue;
423 
424     if (auto Cached = Cache.getModule(ReqModuleName)) {
425       if (IsModuleFileUpToDate(Cached->getModuleFilePath(), BuiltModuleFiles,
426                                TFS.view(std::nullopt))) {
427         log("Reusing module {0} from {1}", ModuleName,
428             Cached->getModuleFilePath());
429         BuiltModuleFiles.addModuleFile(std::move(Cached));
430         continue;
431       }
432       Cache.remove(ReqModuleName);
433     }
434 
435     llvm::Expected<ModuleFile> MF = buildModuleFile(
436         ModuleName, ModuleUnitFileName, getCDB(), TFS, BuiltModuleFiles);
437     if (llvm::Error Err = MF.takeError())
438       return Err;
439 
440     log("Built module {0} to {1}", ModuleName, MF->getModuleFilePath());
441     auto BuiltModuleFile = std::make_shared<const ModuleFile>(std::move(*MF));
442     Cache.add(ModuleName, BuiltModuleFile);
443     BuiltModuleFiles.addModuleFile(std::move(BuiltModuleFile));
444   }
445 
446   return llvm::Error::success();
447 }
448 
449 std::unique_ptr<PrerequisiteModules>
450 ModulesBuilder::buildPrerequisiteModulesFor(PathRef File,
451                                             const ThreadsafeFS &TFS) {
452   std::unique_ptr<ProjectModules> MDB = Impl->getCDB().getProjectModules(File);
453   if (!MDB) {
454     elog("Failed to get Project Modules information for {0}", File);
455     return std::make_unique<FailedPrerequisiteModules>();
456   }
457 
458   std::vector<std::string> RequiredModuleNames = MDB->getRequiredModules(File);
459   if (RequiredModuleNames.empty())
460     return std::make_unique<ReusablePrerequisiteModules>();
461 
462   auto RequiredModules = std::make_unique<ReusablePrerequisiteModules>();
463   for (llvm::StringRef RequiredModuleName : RequiredModuleNames) {
464     // Return early if there is any error.
465     if (llvm::Error Err = Impl->getOrBuildModuleFile(
466             RequiredModuleName, TFS, *MDB.get(), *RequiredModules.get())) {
467       elog("Failed to build module {0}; due to {1}", RequiredModuleName,
468            toString(std::move(Err)));
469       return std::make_unique<FailedPrerequisiteModules>();
470     }
471   }
472 
473   return std::move(RequiredModules);
474 }
475 
476 ModulesBuilder::ModulesBuilder(const GlobalCompilationDatabase &CDB) {
477   Impl = std::make_unique<ModulesBuilderImpl>(CDB);
478 }
479 
480 ModulesBuilder::~ModulesBuilder() {}
481 
482 } // namespace clangd
483 } // namespace clang
484