xref: /llvm-project/clang/lib/Tooling/JSONCompilationDatabase.cpp (revision 9e60a2ad734e1d598f93e7148c6339795b57359c)
1 //===--- JSONCompilationDatabase.cpp - ------------------------------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 //  This file contains the implementation of the JSONCompilationDatabase.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Tooling/JSONCompilationDatabase.h"
15 #include "clang/Tooling/CompilationDatabase.h"
16 #include "clang/Tooling/CompilationDatabasePluginRegistry.h"
17 #include "clang/Tooling/Tooling.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/Support/Allocator.h"
20 #include "llvm/Support/CommandLine.h"
21 #include "llvm/Support/Path.h"
22 #include "llvm/Support/StringSaver.h"
23 #include <system_error>
24 
25 namespace clang {
26 namespace tooling {
27 
28 namespace {
29 
30 /// \brief A parser for escaped strings of command line arguments.
31 ///
32 /// Assumes \-escaping for quoted arguments (see the documentation of
33 /// unescapeCommandLine(...)).
34 class CommandLineArgumentParser {
35  public:
36   CommandLineArgumentParser(StringRef CommandLine)
37       : Input(CommandLine), Position(Input.begin()-1) {}
38 
39   std::vector<std::string> parse() {
40     bool HasMoreInput = true;
41     while (HasMoreInput && nextNonWhitespace()) {
42       std::string Argument;
43       HasMoreInput = parseStringInto(Argument);
44       CommandLine.push_back(Argument);
45     }
46     return CommandLine;
47   }
48 
49  private:
50   // All private methods return true if there is more input available.
51 
52   bool parseStringInto(std::string &String) {
53     do {
54       if (*Position == '"') {
55         if (!parseDoubleQuotedStringInto(String)) return false;
56       } else if (*Position == '\'') {
57         if (!parseSingleQuotedStringInto(String)) return false;
58       } else {
59         if (!parseFreeStringInto(String)) return false;
60       }
61     } while (*Position != ' ');
62     return true;
63   }
64 
65   bool parseDoubleQuotedStringInto(std::string &String) {
66     if (!next()) return false;
67     while (*Position != '"') {
68       if (!skipEscapeCharacter()) return false;
69       String.push_back(*Position);
70       if (!next()) return false;
71     }
72     return next();
73   }
74 
75   bool parseSingleQuotedStringInto(std::string &String) {
76     if (!next()) return false;
77     while (*Position != '\'') {
78       String.push_back(*Position);
79       if (!next()) return false;
80     }
81     return next();
82   }
83 
84   bool parseFreeStringInto(std::string &String) {
85     do {
86       if (!skipEscapeCharacter()) return false;
87       String.push_back(*Position);
88       if (!next()) return false;
89     } while (*Position != ' ' && *Position != '"' && *Position != '\'');
90     return true;
91   }
92 
93   bool skipEscapeCharacter() {
94     if (*Position == '\\') {
95       return next();
96     }
97     return true;
98   }
99 
100   bool nextNonWhitespace() {
101     do {
102       if (!next()) return false;
103     } while (*Position == ' ');
104     return true;
105   }
106 
107   bool next() {
108     ++Position;
109     return Position != Input.end();
110   }
111 
112   const StringRef Input;
113   StringRef::iterator Position;
114   std::vector<std::string> CommandLine;
115 };
116 
117 std::vector<std::string> unescapeCommandLine(JSONCommandLineSyntax Syntax,
118                                              StringRef EscapedCommandLine) {
119   if (Syntax == JSONCommandLineSyntax::AutoDetect) {
120     llvm::Triple Triple(llvm::sys::getProcessTriple());
121     if (Triple.getOS() == llvm::Triple::OSType::Win32) {
122       // Assume Windows command line parsing on Win32 unless the triple
123       // explicitly
124       // tells us otherwise.
125       if (!Triple.hasEnvironment() ||
126           Triple.getEnvironment() == llvm::Triple::EnvironmentType::MSVC)
127         Syntax = JSONCommandLineSyntax::Windows;
128       else
129         Syntax = JSONCommandLineSyntax::Gnu;
130     }
131   }
132 
133   if (Syntax == JSONCommandLineSyntax::Windows) {
134     llvm::BumpPtrAllocator Alloc;
135     llvm::StringSaver Saver(Alloc);
136     llvm::SmallVector<const char *, 64> T;
137     llvm::cl::TokenizeWindowsCommandLine(EscapedCommandLine, Saver, T);
138     std::vector<std::string> Result(T.begin(), T.end());
139     return Result;
140   }
141   assert(Syntax == JSONCommandLineSyntax::Gnu);
142   CommandLineArgumentParser parser(EscapedCommandLine);
143   return parser.parse();
144 }
145 
146 class JSONCompilationDatabasePlugin : public CompilationDatabasePlugin {
147   std::unique_ptr<CompilationDatabase>
148   loadFromDirectory(StringRef Directory, std::string &ErrorMessage) override {
149     SmallString<1024> JSONDatabasePath(Directory);
150     llvm::sys::path::append(JSONDatabasePath, "compile_commands.json");
151     std::unique_ptr<CompilationDatabase> Database(
152         JSONCompilationDatabase::loadFromFile(
153             JSONDatabasePath, ErrorMessage, JSONCommandLineSyntax::AutoDetect));
154     if (!Database)
155       return nullptr;
156     return Database;
157   }
158 };
159 
160 } // end namespace
161 
162 // Register the JSONCompilationDatabasePlugin with the
163 // CompilationDatabasePluginRegistry using this statically initialized variable.
164 static CompilationDatabasePluginRegistry::Add<JSONCompilationDatabasePlugin>
165 X("json-compilation-database", "Reads JSON formatted compilation databases");
166 
167 // This anchor is used to force the linker to link in the generated object file
168 // and thus register the JSONCompilationDatabasePlugin.
169 volatile int JSONAnchorSource = 0;
170 
171 std::unique_ptr<JSONCompilationDatabase>
172 JSONCompilationDatabase::loadFromFile(StringRef FilePath,
173                                       std::string &ErrorMessage,
174                                       JSONCommandLineSyntax Syntax) {
175   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> DatabaseBuffer =
176       llvm::MemoryBuffer::getFile(FilePath);
177   if (std::error_code Result = DatabaseBuffer.getError()) {
178     ErrorMessage = "Error while opening JSON database: " + Result.message();
179     return nullptr;
180   }
181   std::unique_ptr<JSONCompilationDatabase> Database(
182       new JSONCompilationDatabase(std::move(*DatabaseBuffer), Syntax));
183   if (!Database->parse(ErrorMessage))
184     return nullptr;
185   return Database;
186 }
187 
188 std::unique_ptr<JSONCompilationDatabase>
189 JSONCompilationDatabase::loadFromBuffer(StringRef DatabaseString,
190                                         std::string &ErrorMessage,
191                                         JSONCommandLineSyntax Syntax) {
192   std::unique_ptr<llvm::MemoryBuffer> DatabaseBuffer(
193       llvm::MemoryBuffer::getMemBuffer(DatabaseString));
194   std::unique_ptr<JSONCompilationDatabase> Database(
195       new JSONCompilationDatabase(std::move(DatabaseBuffer), Syntax));
196   if (!Database->parse(ErrorMessage))
197     return nullptr;
198   return Database;
199 }
200 
201 std::vector<CompileCommand>
202 JSONCompilationDatabase::getCompileCommands(StringRef FilePath) const {
203   SmallString<128> NativeFilePath;
204   llvm::sys::path::native(FilePath, NativeFilePath);
205 
206   std::string Error;
207   llvm::raw_string_ostream ES(Error);
208   StringRef Match = MatchTrie.findEquivalent(NativeFilePath, ES);
209   if (Match.empty())
210     return std::vector<CompileCommand>();
211   llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
212     CommandsRefI = IndexByFile.find(Match);
213   if (CommandsRefI == IndexByFile.end())
214     return std::vector<CompileCommand>();
215   std::vector<CompileCommand> Commands;
216   getCommands(CommandsRefI->getValue(), Commands);
217   return Commands;
218 }
219 
220 std::vector<std::string>
221 JSONCompilationDatabase::getAllFiles() const {
222   std::vector<std::string> Result;
223 
224   llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
225     CommandsRefI = IndexByFile.begin();
226   const llvm::StringMap< std::vector<CompileCommandRef> >::const_iterator
227     CommandsRefEnd = IndexByFile.end();
228   for (; CommandsRefI != CommandsRefEnd; ++CommandsRefI) {
229     Result.push_back(CommandsRefI->first().str());
230   }
231 
232   return Result;
233 }
234 
235 std::vector<CompileCommand>
236 JSONCompilationDatabase::getAllCompileCommands() const {
237   std::vector<CompileCommand> Commands;
238   getCommands(AllCommands, Commands);
239   return Commands;
240 }
241 
242 static std::vector<std::string>
243 nodeToCommandLine(JSONCommandLineSyntax Syntax,
244                   const std::vector<llvm::yaml::ScalarNode *> &Nodes) {
245   SmallString<1024> Storage;
246   if (Nodes.size() == 1) {
247     return unescapeCommandLine(Syntax, Nodes[0]->getValue(Storage));
248   }
249   std::vector<std::string> Arguments;
250   for (auto *Node : Nodes) {
251     Arguments.push_back(Node->getValue(Storage));
252   }
253   return Arguments;
254 }
255 
256 void JSONCompilationDatabase::getCommands(
257     ArrayRef<CompileCommandRef> CommandsRef,
258     std::vector<CompileCommand> &Commands) const {
259   for (int I = 0, E = CommandsRef.size(); I != E; ++I) {
260     SmallString<8> DirectoryStorage;
261     SmallString<32> FilenameStorage;
262     Commands.emplace_back(
263         std::get<0>(CommandsRef[I])->getValue(DirectoryStorage),
264         std::get<1>(CommandsRef[I])->getValue(FilenameStorage),
265         nodeToCommandLine(Syntax, std::get<2>(CommandsRef[I])));
266   }
267 }
268 
269 bool JSONCompilationDatabase::parse(std::string &ErrorMessage) {
270   llvm::yaml::document_iterator I = YAMLStream.begin();
271   if (I == YAMLStream.end()) {
272     ErrorMessage = "Error while parsing YAML.";
273     return false;
274   }
275   llvm::yaml::Node *Root = I->getRoot();
276   if (!Root) {
277     ErrorMessage = "Error while parsing YAML.";
278     return false;
279   }
280   llvm::yaml::SequenceNode *Array = dyn_cast<llvm::yaml::SequenceNode>(Root);
281   if (!Array) {
282     ErrorMessage = "Expected array.";
283     return false;
284   }
285   for (auto& NextObject : *Array) {
286     llvm::yaml::MappingNode *Object = dyn_cast<llvm::yaml::MappingNode>(&NextObject);
287     if (!Object) {
288       ErrorMessage = "Expected object.";
289       return false;
290     }
291     llvm::yaml::ScalarNode *Directory = nullptr;
292     llvm::Optional<std::vector<llvm::yaml::ScalarNode *>> Command;
293     llvm::yaml::ScalarNode *File = nullptr;
294     for (auto& NextKeyValue : *Object) {
295       llvm::yaml::ScalarNode *KeyString =
296           dyn_cast<llvm::yaml::ScalarNode>(NextKeyValue.getKey());
297       if (!KeyString) {
298         ErrorMessage = "Expected strings as key.";
299         return false;
300       }
301       SmallString<10> KeyStorage;
302       StringRef KeyValue = KeyString->getValue(KeyStorage);
303       llvm::yaml::Node *Value = NextKeyValue.getValue();
304       if (!Value) {
305         ErrorMessage = "Expected value.";
306         return false;
307       }
308       llvm::yaml::ScalarNode *ValueString =
309           dyn_cast<llvm::yaml::ScalarNode>(Value);
310       llvm::yaml::SequenceNode *SequenceString =
311           dyn_cast<llvm::yaml::SequenceNode>(Value);
312       if (KeyValue == "arguments" && !SequenceString) {
313         ErrorMessage = "Expected sequence as value.";
314         return false;
315       } else if (KeyValue != "arguments" && !ValueString) {
316         ErrorMessage = "Expected string as value.";
317         return false;
318       }
319       if (KeyValue == "directory") {
320         Directory = ValueString;
321       } else if (KeyValue == "arguments") {
322         Command = std::vector<llvm::yaml::ScalarNode *>();
323         for (auto &Argument : *SequenceString) {
324           auto Scalar = dyn_cast<llvm::yaml::ScalarNode>(&Argument);
325           if (!Scalar) {
326             ErrorMessage = "Only strings are allowed in 'arguments'.";
327             return false;
328           }
329           Command->push_back(Scalar);
330         }
331       } else if (KeyValue == "command") {
332         if (!Command)
333           Command = std::vector<llvm::yaml::ScalarNode *>(1, ValueString);
334       } else if (KeyValue == "file") {
335         File = ValueString;
336       } else {
337         ErrorMessage = ("Unknown key: \"" +
338                         KeyString->getRawValue() + "\"").str();
339         return false;
340       }
341     }
342     if (!File) {
343       ErrorMessage = "Missing key: \"file\".";
344       return false;
345     }
346     if (!Command) {
347       ErrorMessage = "Missing key: \"command\" or \"arguments\".";
348       return false;
349     }
350     if (!Directory) {
351       ErrorMessage = "Missing key: \"directory\".";
352       return false;
353     }
354     SmallString<8> FileStorage;
355     StringRef FileName = File->getValue(FileStorage);
356     SmallString<128> NativeFilePath;
357     if (llvm::sys::path::is_relative(FileName)) {
358       SmallString<8> DirectoryStorage;
359       SmallString<128> AbsolutePath(
360           Directory->getValue(DirectoryStorage));
361       llvm::sys::path::append(AbsolutePath, FileName);
362       llvm::sys::path::native(AbsolutePath, NativeFilePath);
363     } else {
364       llvm::sys::path::native(FileName, NativeFilePath);
365     }
366     auto Cmd = CompileCommandRef(Directory, File, *Command);
367     IndexByFile[NativeFilePath].push_back(Cmd);
368     AllCommands.push_back(Cmd);
369     MatchTrie.insert(NativeFilePath);
370   }
371   return true;
372 }
373 
374 } // end namespace tooling
375 } // end namespace clang
376