xref: /llvm-project/clang/lib/Tooling/JSONCompilationDatabase.cpp (revision a0d032eacf3c40bdfef53b6d160dc895fa5df131)
1 //===--- JSONCompilationDatabase.cpp - ------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file contains the implementation of the JSONCompilationDatabase.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Tooling/JSONCompilationDatabase.h"
15 #include "clang/Tooling/CompilationDatabase.h"
16 #include "clang/Tooling/CompilationDatabasePluginRegistry.h"
17 #include "clang/Tooling/Tooling.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/StringSaver.h"
23 #include <system_error>
24 
25 namespace clang {
26 namespace tooling {
27 
28 namespace {
29 
30 /// \brief A parser for escaped strings of command line arguments.
31 ///
32 /// Assumes \-escaping for quoted arguments (see the documentation of
33 /// unescapeCommandLine(...)).
34 class CommandLineArgumentParser {
35  public:
36   CommandLineArgumentParser(StringRef CommandLine)
37       : Input(CommandLine), Position(Input.begin()-1) {}
38 
39   std::vector<std::string> parse() {
40     bool HasMoreInput = true;
41     while (HasMoreInput && nextNonWhitespace()) {
42       std::string Argument;
43       HasMoreInput = parseStringInto(Argument);
44       CommandLine.push_back(Argument);
45     }
46     return CommandLine;
47   }
48 
49  private:
50   // All private methods return true if there is more input available.
51 
52   bool parseStringInto(std::string &String) {
53     do {
54       if (*Position == '"') {
55         if (!parseDoubleQuotedStringInto(String)) return false;
56       } else if (*Position == '\'') {
57         if (!parseSingleQuotedStringInto(String)) return false;
58       } else {
59         if (!parseFreeStringInto(String)) return false;
60       }
61     } while (*Position != ' ');
62     return true;
63   }
64 
65   bool parseDoubleQuotedStringInto(std::string &String) {
66     if (!next()) return false;
67     while (*Position != '"') {
68       if (!skipEscapeCharacter()) return false;
69       String.push_back(*Position);
70       if (!next()) return false;
71     }
72     return next();
73   }
74 
75   bool parseSingleQuotedStringInto(std::string &String) {
76     if (!next()) return false;
77     while (*Position != '\'') {
78       String.push_back(*Position);
79       if (!next()) return false;
80     }
81     return next();
82   }
83 
84   bool parseFreeStringInto(std::string &String) {
85     do {
86       if (!skipEscapeCharacter()) return false;
87       String.push_back(*Position);
88       if (!next()) return false;
89     } while (*Position != ' ' && *Position != '"' && *Position != '\'');
90     return true;
91   }
92 
93   bool skipEscapeCharacter() {
94     if (*Position == '\\') {
95       return next();
96     }
97     return true;
98   }
99 
100   bool nextNonWhitespace() {
101     do {
102       if (!next()) return false;
103     } while (*Position == ' ');
104     return true;
105   }
106 
107   bool next() {
108     ++Position;
109     return Position != Input.end();
110   }
111 
112   const StringRef Input;
113   StringRef::iterator Position;
114   std::vector<std::string> CommandLine;
115 };
116 
117 std::vector<std::string> unescapeCommandLine(
118     StringRef EscapedCommandLine) {
119   llvm::Triple Triple(llvm::sys::getProcessTriple());
120   if (Triple.getOS() == llvm::Triple::OSType::Win32) {
121     // Assume Windows command line parsing on Win32 unless the triple explicitly
122     // tells us otherwise.
123     if (!Triple.hasEnvironment() ||
124         Triple.getEnvironment() == llvm::Triple::EnvironmentType::MSVC) {
125       llvm::BumpPtrAllocator Alloc;
126       llvm::StringSaver Saver(Alloc);
127       llvm::SmallVector<const char *, 64> T;
128       llvm::cl::TokenizeWindowsCommandLine(EscapedCommandLine, Saver, T);
129       std::vector<std::string> Result(T.begin(), T.end());
130       return Result;
131     }
132   }
133 
134   CommandLineArgumentParser parser(EscapedCommandLine);
135   return parser.parse();
136 }
137 
138 class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin {
139   std::unique_ptr<CompilationDatabase>
140   loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
141     SmallString<1024> JSONDatabasePath(Directory);
142     llvm::sys::path::append(JSONDatabasePath, "compile_commands.json");
143     std::unique_ptr<CompilationDatabase> Database(
144         JSONCompilationDatabase::loadFromFile(JSONDatabasePath, ErrorMessage));
145     if (!Database)
146       return nullptr;
147     return Database;
148   }
149 };
150 
151 } // end namespace
152 
153 // Register the JSONCompilationDatabasePlugin with the
154 // CompilationDatabasePluginRegistry using this statically initialized variable.
155 static CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin>
156 X("json-compilation-database", "Reads JSON formatted compilation databases");
157 
158 // This anchor is used to force the linker to link in the generated object file
159 // and thus register the JSONCompilationDatabasePlugin.
160 volatile int JSONAnchorSource = 0;
161 
162 std::unique_ptr<JSONCompilationDatabase>
163 JSONCompilationDatabase::loadFromFile(StringRef FilePath,
164                                       std::string &ErrorMessage) {
165   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer =
166       llvm::MemoryBuffer::getFile(FilePath);
167   if (std::error_code Result = DatabaseBuffer.getError()) {
168     ErrorMessage = "Error while opening JSON database: " + Result.message();
169     return nullptr;
170   }
171   std::unique_ptr<JSONCompilationDatabase> Database(
172       new JSONCompilationDatabase(std::move(*DatabaseBuffer)));
173   if (!Database->parse(ErrorMessage))
174     return nullptr;
175   return Database;
176 }
177 
178 std::unique_ptr<JSONCompilationDatabase>
179 JSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString,
180                                         std::string &ErrorMessage) {
181   std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer(
182       llvm::MemoryBuffer::getMemBuffer(DatabaseString));
183   std::unique_ptr<JSONCompilationDatabase> Database(
184       new JSONCompilationDatabase(std::move(DatabaseBuffer)));
185   if (!Database->parse(ErrorMessage))
186     return nullptr;
187   return Database;
188 }
189 
190 std::vector<CompileCommand>
191 JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const {
192   SmallString<128> NativeFilePath;
193   llvm::sys::path::native(FilePath, NativeFilePath);
194 
195   std::string Error;
196   llvm::raw_string_ostream ES(Error);
197   StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES);
198   if (Match.empty())
199     return std::vector<CompileCommand>();
200   llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
201     CommandsRefI = IndexByFile.find(Match);
202   if (CommandsRefI == IndexByFile.end())
203     return std::vector<CompileCommand>();
204   std::vector<CompileCommand> Commands;
205   getCommands(CommandsRefI->getValue(), Commands);
206   return Commands;
207 }
208 
209 std::vector<std::string>
210 JSONCompilationDatabase::getAllFiles() const {
211   std::vector<std::string> Result;
212 
213   llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
214     CommandsRefI = IndexByFile.begin();
215   const llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
216     CommandsRefEnd = IndexByFile.end();
217   for (; CommandsRefI != CommandsRefEnd; ++CommandsRefI) {
218     Result.push_back(CommandsRefI->first().str());
219   }
220 
221   return Result;
222 }
223 
224 std::vector<CompileCommand>
225 JSONCompilationDatabase::getAllCompileCommands() const {
226   std::vector<CompileCommand> Commands;
227   getCommands(AllCommands, Commands);
228   return Commands;
229 }
230 
231 static std::vector<std::string>
232 nodeToCommandLine(const std::vector<llvm::yaml::ScalarNode *> &Nodes) {
233   SmallString<1024> Storage;
234   if (Nodes.size() == 1) {
235     return unescapeCommandLine(Nodes[0]->getValue(Storage));
236   }
237   std::vector<std::string> Arguments;
238   for (auto *Node : Nodes) {
239     Arguments.push_back(Node->getValue(Storage));
240   }
241   return Arguments;
242 }
243 
244 void JSONCompilationDatabase::getCommands(
245     ArrayRef<CompileCommandRef> CommandsRef,
246     std::vector<CompileCommand> &Commands) const {
247   for (int I = 0, E = CommandsRef.size(); I != E; ++I) {
248     SmallString<8> DirectoryStorage;
249     SmallString<32> FilenameStorage;
250     Commands.emplace_back(
251       std::get<0>(CommandsRef[I])->getValue(DirectoryStorage),
252       std::get<1>(CommandsRef[I])->getValue(FilenameStorage),
253       nodeToCommandLine(std::get<2>(CommandsRef[I])));
254   }
255 }
256 
257 bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
258   llvm::yaml::document_iterator I = YAMLStream.begin();
259   if (I == YAMLStream.end()) {
260     ErrorMessage = "Error while parsing YAML.";
261     return false;
262   }
263   llvm::yaml::Node *Root = I->getRoot();
264   if (!Root) {
265     ErrorMessage = "Error while parsing YAML.";
266     return false;
267   }
268   llvm::yaml::SequenceNode *Array = dyn_cast<llvm::yaml::SequenceNode>(Root);
269   if (!Array) {
270     ErrorMessage = "Expected array.";
271     return false;
272   }
273   for (auto& NextObject : *Array) {
274     llvm::yaml::MappingNode *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject);
275     if (!Object) {
276       ErrorMessage = "Expected object.";
277       return false;
278     }
279     llvm::yaml::ScalarNode *Directory = nullptr;
280     llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command;
281     llvm::yaml::ScalarNode *File = nullptr;
282     for (auto& NextKeyValue : *Object) {
283       llvm::yaml::ScalarNode *KeyString =
284           dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
285       if (!KeyString) {
286         ErrorMessage = "Expected strings as key.";
287         return false;
288       }
289       SmallString<10> KeyStorage;
290       StringRef KeyValue = KeyString->getValue(KeyStorage);
291       llvm::yaml::Node *Value = NextKeyValue.getValue();
292       if (!Value) {
293         ErrorMessage = "Expected value.";
294         return false;
295       }
296       llvm::yaml::ScalarNode *ValueString =
297           dyn_cast<llvm::yaml::ScalarNode>(Value);
298       llvm::yaml::SequenceNode *SequenceString =
299           dyn_cast<llvm::yaml::SequenceNode>(Value);
300       if (KeyValue == "arguments" && !SequenceString) {
301         ErrorMessage = "Expected sequence as value.";
302         return false;
303       } else if (KeyValue != "arguments" && !ValueString) {
304         ErrorMessage = "Expected string as value.";
305         return false;
306       }
307       if (KeyValue == "directory") {
308         Directory = ValueString;
309       } else if (KeyValue == "arguments") {
310         Command = std::vector<llvm::yaml::ScalarNode *>();
311         for (auto &Argument : *SequenceString) {
312           auto Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument);
313           if (!Scalar) {
314             ErrorMessage = "Only strings are allowed in 'arguments'.";
315             return false;
316           }
317           Command->push_back(Scalar);
318         }
319       } else if (KeyValue == "command") {
320         if (!Command)
321           Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString);
322       } else if (KeyValue == "file") {
323         File = ValueString;
324       } else {
325         ErrorMessage = ("Unknown key: \"" +
326                         KeyString->getRawValue() + "\"").str();
327         return false;
328       }
329     }
330     if (!File) {
331       ErrorMessage = "Missing key: \"file\".";
332       return false;
333     }
334     if (!Command) {
335       ErrorMessage = "Missing key: \"command\" or \"arguments\".";
336       return false;
337     }
338     if (!Directory) {
339       ErrorMessage = "Missing key: \"directory\".";
340       return false;
341     }
342     SmallString<8> FileStorage;
343     StringRef FileName = File->getValue(FileStorage);
344     SmallString<128> NativeFilePath;
345     if (llvm::sys::path::is_relative(FileName)) {
346       SmallString<8> DirectoryStorage;
347       SmallString<128> AbsolutePath(
348           Directory->getValue(DirectoryStorage));
349       llvm::sys::path::append(AbsolutePath, FileName);
350       llvm::sys::path::native(AbsolutePath, NativeFilePath);
351     } else {
352       llvm::sys::path::native(FileName, NativeFilePath);
353     }
354     auto Cmd = CompileCommandRef(Directory, File, *Command);
355     IndexByFile[NativeFilePath].push_back(Cmd);
356     AllCommands.push_back(Cmd);
357     MatchTrie.insert(NativeFilePath);
358   }
359   return true;
360 }
361 
362 } // end namespace tooling
363 } // end namespace clang
364