xref: /llvm-project/clang-tools-extra/clangd/ConfigYAML.cpp (revision 1f90797f6a9d91d61e0f66b465b0467e4c66d0e0)
1 //===--- ConfigYAML.cpp - Loading configuration fragments from YAML files -===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 #include "ConfigFragment.h"
9 #include "llvm/ADT/SmallSet.h"
10 #include "llvm/ADT/SmallString.h"
11 #include "llvm/ADT/StringRef.h"
12 #include "llvm/Support/MemoryBuffer.h"
13 #include "llvm/Support/SourceMgr.h"
14 #include "llvm/Support/YAMLParser.h"
15 #include <optional>
16 #include <string>
17 
18 namespace clang {
19 namespace clangd {
20 namespace config {
21 namespace {
22 using llvm::yaml::BlockScalarNode;
23 using llvm::yaml::MappingNode;
24 using llvm::yaml::Node;
25 using llvm::yaml::ScalarNode;
26 using llvm::yaml::SequenceNode;
27 
28 std::optional<llvm::StringRef>
29 bestGuess(llvm::StringRef Search,
30           llvm::ArrayRef<llvm::StringRef> AllowedValues) {
31   unsigned MaxEdit = (Search.size() + 1) / 3;
32   if (!MaxEdit)
33     return std::nullopt;
34   std::optional<llvm::StringRef> Result;
35   for (const auto &AllowedValue : AllowedValues) {
36     unsigned EditDistance = Search.edit_distance(AllowedValue, true, MaxEdit);
37     // We can't do better than an edit distance of 1, so just return this and
38     // save computing other values.
39     if (EditDistance == 1U)
40       return AllowedValue;
41     if (EditDistance == MaxEdit && !Result) {
42       Result = AllowedValue;
43     } else if (EditDistance < MaxEdit) {
44       Result = AllowedValue;
45       MaxEdit = EditDistance;
46     }
47   }
48   return Result;
49 }
50 
51 class Parser {
52   llvm::SourceMgr &SM;
53   bool HadError = false;
54 
55 public:
56   Parser(llvm::SourceMgr &SM) : SM(SM) {}
57 
58   // Tries to parse N into F, returning false if it failed and we couldn't
59   // meaningfully recover (YAML syntax error, or hard semantic error).
60   bool parse(Fragment &F, Node &N) {
61     DictParser Dict("Config", this);
62     Dict.handle("If", [&](Node &N) { parse(F.If, N); });
63     Dict.handle("CompileFlags", [&](Node &N) { parse(F.CompileFlags, N); });
64     Dict.handle("Index", [&](Node &N) { parse(F.Index, N); });
65     Dict.handle("Style", [&](Node &N) { parse(F.Style, N); });
66     Dict.handle("Diagnostics", [&](Node &N) { parse(F.Diagnostics, N); });
67     Dict.handle("Completion", [&](Node &N) { parse(F.Completion, N); });
68     Dict.handle("Hover", [&](Node &N) { parse(F.Hover, N); });
69     Dict.handle("InlayHints", [&](Node &N) { parse(F.InlayHints, N); });
70     Dict.handle("SemanticTokens", [&](Node &N) { parse(F.SemanticTokens, N); });
71     Dict.parse(N);
72     return !(N.failed() || HadError);
73   }
74 
75 private:
76   void parse(Fragment::IfBlock &F, Node &N) {
77     DictParser Dict("If", this);
78     Dict.unrecognized([&](Located<std::string>, Node &) {
79       F.HasUnrecognizedCondition = true;
80       return true; // Emit a warning for the unrecognized key.
81     });
82     Dict.handle("PathMatch", [&](Node &N) {
83       if (auto Values = scalarValues(N))
84         F.PathMatch = std::move(*Values);
85     });
86     Dict.handle("PathExclude", [&](Node &N) {
87       if (auto Values = scalarValues(N))
88         F.PathExclude = std::move(*Values);
89     });
90     Dict.parse(N);
91   }
92 
93   void parse(Fragment::CompileFlagsBlock &F, Node &N) {
94     DictParser Dict("CompileFlags", this);
95     Dict.handle("Compiler", [&](Node &N) {
96       if (auto Value = scalarValue(N, "Compiler"))
97         F.Compiler = std::move(*Value);
98     });
99     Dict.handle("Add", [&](Node &N) {
100       if (auto Values = scalarValues(N))
101         F.Add = std::move(*Values);
102     });
103     Dict.handle("Remove", [&](Node &N) {
104       if (auto Values = scalarValues(N))
105         F.Remove = std::move(*Values);
106     });
107     Dict.handle("CompilationDatabase", [&](Node &N) {
108       F.CompilationDatabase = scalarValue(N, "CompilationDatabase");
109     });
110     Dict.parse(N);
111   }
112 
113   void parse(Fragment::StyleBlock &F, Node &N) {
114     DictParser Dict("Style", this);
115     Dict.handle("FullyQualifiedNamespaces", [&](Node &N) {
116       if (auto Values = scalarValues(N))
117         F.FullyQualifiedNamespaces = std::move(*Values);
118     });
119     Dict.handle("QuotedHeaders", [&](Node &N) {
120       if (auto Values = scalarValues(N))
121         F.QuotedHeaders = std::move(*Values);
122     });
123     Dict.handle("AngledHeaders", [&](Node &N) {
124       if (auto Values = scalarValues(N))
125         F.AngledHeaders = std::move(*Values);
126     });
127     Dict.parse(N);
128   }
129 
130   void parse(Fragment::DiagnosticsBlock &F, Node &N) {
131     DictParser Dict("Diagnostics", this);
132     Dict.handle("Suppress", [&](Node &N) {
133       if (auto Values = scalarValues(N))
134         F.Suppress = std::move(*Values);
135     });
136     Dict.handle("UnusedIncludes", [&](Node &N) {
137       F.UnusedIncludes = scalarValue(N, "UnusedIncludes");
138     });
139     Dict.handle("MissingIncludes", [&](Node &N) {
140       F.MissingIncludes = scalarValue(N, "MissingIncludes");
141     });
142     Dict.handle("Includes", [&](Node &N) { parse(F.Includes, N); });
143     Dict.handle("ClangTidy", [&](Node &N) { parse(F.ClangTidy, N); });
144     Dict.parse(N);
145   }
146 
147   void parse(Fragment::DiagnosticsBlock::ClangTidyBlock &F, Node &N) {
148     DictParser Dict("ClangTidy", this);
149     Dict.handle("Add", [&](Node &N) {
150       if (auto Values = scalarValues(N))
151         F.Add = std::move(*Values);
152     });
153     Dict.handle("Remove", [&](Node &N) {
154       if (auto Values = scalarValues(N))
155         F.Remove = std::move(*Values);
156     });
157     Dict.handle("CheckOptions", [&](Node &N) {
158       DictParser CheckOptDict("CheckOptions", this);
159       CheckOptDict.unrecognized([&](Located<std::string> &&Key, Node &Val) {
160         if (auto Value = scalarValue(Val, *Key))
161           F.CheckOptions.emplace_back(std::move(Key), std::move(*Value));
162         return false; // Don't emit a warning
163       });
164       CheckOptDict.parse(N);
165     });
166     Dict.handle("FastCheckFilter", [&](Node &N) {
167       if (auto FastCheckFilter = scalarValue(N, "FastCheckFilter"))
168         F.FastCheckFilter = *FastCheckFilter;
169     });
170     Dict.parse(N);
171   }
172 
173   void parse(Fragment::DiagnosticsBlock::IncludesBlock &F, Node &N) {
174     DictParser Dict("Includes", this);
175     Dict.handle("IgnoreHeader", [&](Node &N) {
176       if (auto Values = scalarValues(N))
177         F.IgnoreHeader = std::move(*Values);
178     });
179     Dict.handle("AnalyzeAngledIncludes", [&](Node &N) {
180       if (auto Value = boolValue(N, "AnalyzeAngledIncludes"))
181         F.AnalyzeAngledIncludes = *Value;
182     });
183     Dict.parse(N);
184   }
185 
186   void parse(Fragment::IndexBlock &F, Node &N) {
187     DictParser Dict("Index", this);
188     Dict.handle("Background",
189                 [&](Node &N) { F.Background = scalarValue(N, "Background"); });
190     Dict.handle("External", [&](Node &N) {
191       Fragment::IndexBlock::ExternalBlock External;
192       // External block can either be a mapping or a scalar value. Dispatch
193       // accordingly.
194       if (N.getType() == Node::NK_Mapping) {
195         parse(External, N);
196       } else if (N.getType() == Node::NK_Scalar ||
197                  N.getType() == Node::NK_BlockScalar) {
198         parse(External, *scalarValue(N, "External"));
199       } else {
200         error("External must be either a scalar or a mapping.", N);
201         return;
202       }
203       F.External.emplace(std::move(External));
204       F.External->Range = N.getSourceRange();
205     });
206     Dict.handle("StandardLibrary", [&](Node &N) {
207       if (auto StandardLibrary = boolValue(N, "StandardLibrary"))
208         F.StandardLibrary = *StandardLibrary;
209     });
210     Dict.parse(N);
211   }
212 
213   void parse(Fragment::IndexBlock::ExternalBlock &F,
214              Located<std::string> ExternalVal) {
215     if (!llvm::StringRef(*ExternalVal).equals_insensitive("none")) {
216       error("Only scalar value supported for External is 'None'",
217             ExternalVal.Range);
218       return;
219     }
220     F.IsNone = true;
221     F.IsNone.Range = ExternalVal.Range;
222   }
223 
224   void parse(Fragment::IndexBlock::ExternalBlock &F, Node &N) {
225     DictParser Dict("External", this);
226     Dict.handle("File", [&](Node &N) { F.File = scalarValue(N, "File"); });
227     Dict.handle("Server",
228                 [&](Node &N) { F.Server = scalarValue(N, "Server"); });
229     Dict.handle("MountPoint",
230                 [&](Node &N) { F.MountPoint = scalarValue(N, "MountPoint"); });
231     Dict.parse(N);
232   }
233 
234   void parse(Fragment::CompletionBlock &F, Node &N) {
235     DictParser Dict("Completion", this);
236     Dict.handle("AllScopes", [&](Node &N) {
237       if (auto AllScopes = boolValue(N, "AllScopes"))
238         F.AllScopes = *AllScopes;
239     });
240     Dict.handle("ArgumentLists", [&](Node &N) {
241       if (auto ArgumentLists = scalarValue(N, "ArgumentLists"))
242         F.ArgumentLists = *ArgumentLists;
243     });
244     Dict.parse(N);
245   }
246 
247   void parse(Fragment::HoverBlock &F, Node &N) {
248     DictParser Dict("Hover", this);
249     Dict.handle("ShowAKA", [&](Node &N) {
250       if (auto ShowAKA = boolValue(N, "ShowAKA"))
251         F.ShowAKA = *ShowAKA;
252     });
253     Dict.parse(N);
254   }
255 
256   void parse(Fragment::InlayHintsBlock &F, Node &N) {
257     DictParser Dict("InlayHints", this);
258     Dict.handle("Enabled", [&](Node &N) {
259       if (auto Value = boolValue(N, "Enabled"))
260         F.Enabled = *Value;
261     });
262     Dict.handle("ParameterNames", [&](Node &N) {
263       if (auto Value = boolValue(N, "ParameterNames"))
264         F.ParameterNames = *Value;
265     });
266     Dict.handle("DeducedTypes", [&](Node &N) {
267       if (auto Value = boolValue(N, "DeducedTypes"))
268         F.DeducedTypes = *Value;
269     });
270     Dict.handle("Designators", [&](Node &N) {
271       if (auto Value = boolValue(N, "Designators"))
272         F.Designators = *Value;
273     });
274     Dict.handle("BlockEnd", [&](Node &N) {
275       if (auto Value = boolValue(N, "BlockEnd"))
276         F.BlockEnd = *Value;
277     });
278     Dict.handle("DefaultArguments", [&](Node &N) {
279       if (auto Value = boolValue(N, "DefaultArguments"))
280         F.DefaultArguments = *Value;
281     });
282     Dict.handle("TypeNameLimit", [&](Node &N) {
283       if (auto Value = uint32Value(N, "TypeNameLimit"))
284         F.TypeNameLimit = *Value;
285     });
286     Dict.parse(N);
287   }
288 
289   void parse(Fragment::SemanticTokensBlock &F, Node &N) {
290     DictParser Dict("SemanticTokens", this);
291     Dict.handle("DisabledKinds", [&](Node &N) {
292       if (auto Values = scalarValues(N))
293         F.DisabledKinds = std::move(*Values);
294     });
295     Dict.handle("DisabledModifiers", [&](Node &N) {
296       if (auto Values = scalarValues(N))
297         F.DisabledModifiers = std::move(*Values);
298     });
299     Dict.parse(N);
300   }
301 
302   // Helper for parsing mapping nodes (dictionaries).
303   // We don't use YamlIO as we want to control over unknown keys.
304   class DictParser {
305     llvm::StringRef Description;
306     std::vector<std::pair<llvm::StringRef, std::function<void(Node &)>>> Keys;
307     std::function<bool(Located<std::string>, Node &)> UnknownHandler;
308     Parser *Outer;
309 
310   public:
311     DictParser(llvm::StringRef Description, Parser *Outer)
312         : Description(Description), Outer(Outer) {}
313 
314     // Parse is called when Key is encountered, and passed the associated value.
315     // It should emit diagnostics if the value is invalid (e.g. wrong type).
316     // If Key is seen twice, Parse runs only once and an error is reported.
317     void handle(llvm::StringLiteral Key, std::function<void(Node &)> Parse) {
318       for (const auto &Entry : Keys) {
319         (void)Entry;
320         assert(Entry.first != Key && "duplicate key handler");
321       }
322       Keys.emplace_back(Key, std::move(Parse));
323     }
324 
325     // Handler is called when a Key is not matched by any handle().
326     // If this is unset or the Handler returns true, a warning is emitted for
327     // the unknown key.
328     void
329     unrecognized(std::function<bool(Located<std::string>, Node &)> Handler) {
330       UnknownHandler = std::move(Handler);
331     }
332 
333     // Process a mapping node and call handlers for each key/value pair.
334     void parse(Node &N) const {
335       if (N.getType() != Node::NK_Mapping) {
336         Outer->error(Description + " should be a dictionary", N);
337         return;
338       }
339       llvm::SmallSet<std::string, 8> Seen;
340       llvm::SmallVector<Located<std::string>, 0> UnknownKeys;
341       // We *must* consume all items, even on error, or the parser will assert.
342       for (auto &KV : llvm::cast<MappingNode>(N)) {
343         auto *K = KV.getKey();
344         if (!K) // YAMLParser emitted an error.
345           continue;
346         auto Key = Outer->scalarValue(*K, "Dictionary key");
347         if (!Key)
348           continue;
349         if (!Seen.insert(**Key).second) {
350           Outer->warning("Duplicate key " + **Key + " is ignored", *K);
351           if (auto *Value = KV.getValue())
352             Value->skip();
353           continue;
354         }
355         auto *Value = KV.getValue();
356         if (!Value) // YAMLParser emitted an error.
357           continue;
358         bool Matched = false;
359         for (const auto &Handler : Keys) {
360           if (Handler.first == **Key) {
361             Matched = true;
362             Handler.second(*Value);
363             break;
364           }
365         }
366         if (!Matched) {
367           bool Warn = !UnknownHandler;
368           if (UnknownHandler)
369             Warn = UnknownHandler(
370                 Located<std::string>(**Key, K->getSourceRange()), *Value);
371           if (Warn)
372             UnknownKeys.push_back(std::move(*Key));
373         }
374       }
375       if (!UnknownKeys.empty())
376         warnUnknownKeys(UnknownKeys, Seen);
377     }
378 
379   private:
380     void warnUnknownKeys(llvm::ArrayRef<Located<std::string>> UnknownKeys,
381                          const llvm::SmallSet<std::string, 8> &SeenKeys) const {
382       llvm::SmallVector<llvm::StringRef> UnseenKeys;
383       for (const auto &KeyAndHandler : Keys)
384         if (!SeenKeys.count(KeyAndHandler.first.str()))
385           UnseenKeys.push_back(KeyAndHandler.first);
386 
387       for (const Located<std::string> &UnknownKey : UnknownKeys)
388         if (auto BestGuess = bestGuess(*UnknownKey, UnseenKeys))
389           Outer->warning("Unknown " + Description + " key '" + *UnknownKey +
390                              "'; did you mean '" + *BestGuess + "'?",
391                          UnknownKey.Range);
392         else
393           Outer->warning("Unknown " + Description + " key '" + *UnknownKey +
394                              "'",
395                          UnknownKey.Range);
396     }
397   };
398 
399   // Try to parse a single scalar value from the node, warn on failure.
400   std::optional<Located<std::string>> scalarValue(Node &N,
401                                                   llvm::StringRef Desc) {
402     llvm::SmallString<256> Buf;
403     if (auto *S = llvm::dyn_cast<ScalarNode>(&N))
404       return Located<std::string>(S->getValue(Buf).str(), N.getSourceRange());
405     if (auto *BS = llvm::dyn_cast<BlockScalarNode>(&N))
406       return Located<std::string>(BS->getValue().str(), N.getSourceRange());
407     warning(Desc + " should be scalar", N);
408     return std::nullopt;
409   }
410 
411   std::optional<Located<bool>> boolValue(Node &N, llvm::StringRef Desc) {
412     if (auto Scalar = scalarValue(N, Desc)) {
413       if (auto Bool = llvm::yaml::parseBool(**Scalar))
414         return Located<bool>(*Bool, Scalar->Range);
415       warning(Desc + " should be a boolean", N);
416     }
417     return std::nullopt;
418   }
419 
420   std::optional<Located<uint32_t>> uint32Value(Node &N, llvm::StringRef Desc) {
421     if (auto Scalar = scalarValue(N, Desc)) {
422       unsigned long long Num;
423       if (!llvm::getAsUnsignedInteger(**Scalar, 0, Num)) {
424         return Located<uint32_t>(Num, Scalar->Range);
425       }
426     }
427     warning(Desc + " invalid number", N);
428     return std::nullopt;
429   }
430 
431   // Try to parse a list of single scalar values, or just a single value.
432   std::optional<std::vector<Located<std::string>>> scalarValues(Node &N) {
433     std::vector<Located<std::string>> Result;
434     if (auto *S = llvm::dyn_cast<ScalarNode>(&N)) {
435       llvm::SmallString<256> Buf;
436       Result.emplace_back(S->getValue(Buf).str(), N.getSourceRange());
437     } else if (auto *S = llvm::dyn_cast<BlockScalarNode>(&N)) {
438       Result.emplace_back(S->getValue().str(), N.getSourceRange());
439     } else if (auto *S = llvm::dyn_cast<SequenceNode>(&N)) {
440       // We *must* consume all items, even on error, or the parser will assert.
441       for (auto &Child : *S) {
442         if (auto Value = scalarValue(Child, "List item"))
443           Result.push_back(std::move(*Value));
444       }
445     } else {
446       warning("Expected scalar or list of scalars", N);
447       return std::nullopt;
448     }
449     return Result;
450   }
451 
452   // Report a "hard" error, reflecting a config file that can never be valid.
453   void error(const llvm::Twine &Msg, llvm::SMRange Range) {
454     HadError = true;
455     SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Error, Msg, Range);
456   }
457   void error(const llvm::Twine &Msg, const Node &N) {
458     return error(Msg, N.getSourceRange());
459   }
460 
461   // Report a "soft" error that could be caused by e.g. version skew.
462   void warning(const llvm::Twine &Msg, llvm::SMRange Range) {
463     SM.PrintMessage(Range.Start, llvm::SourceMgr::DK_Warning, Msg, Range);
464   }
465   void warning(const llvm::Twine &Msg, const Node &N) {
466     return warning(Msg, N.getSourceRange());
467   }
468 };
469 
470 } // namespace
471 
472 std::vector<Fragment> Fragment::parseYAML(llvm::StringRef YAML,
473                                           llvm::StringRef BufferName,
474                                           DiagnosticCallback Diags) {
475   // The YAML document may contain multiple conditional fragments.
476   // The SourceManager is shared for all of them.
477   auto SM = std::make_shared<llvm::SourceMgr>();
478   auto Buf = llvm::MemoryBuffer::getMemBufferCopy(YAML, BufferName);
479   // Adapt DiagnosticCallback to function-pointer interface.
480   // Callback receives both errors we emit and those from the YAML parser.
481   SM->setDiagHandler(
482       [](const llvm::SMDiagnostic &Diag, void *Ctx) {
483         (*reinterpret_cast<DiagnosticCallback *>(Ctx))(Diag);
484       },
485       &Diags);
486   std::vector<Fragment> Result;
487   for (auto &Doc : llvm::yaml::Stream(*Buf, *SM)) {
488     if (Node *N = Doc.getRoot()) {
489       Fragment Fragment;
490       Fragment.Source.Manager = SM;
491       Fragment.Source.Location = N->getSourceRange().Start;
492       SM->PrintMessage(Fragment.Source.Location, llvm::SourceMgr::DK_Note,
493                        "Parsing config fragment");
494       if (Parser(*SM).parse(Fragment, *N))
495         Result.push_back(std::move(Fragment));
496     }
497   }
498   SM->PrintMessage(SM->FindLocForLineAndColumn(SM->getMainFileID(), 0, 0),
499                    llvm::SourceMgr::DK_Note,
500                    "Parsed " + llvm::Twine(Result.size()) +
501                        " fragments from file");
502   // Hack: stash the buffer in the SourceMgr to keep it alive.
503   // SM has two entries: "main" non-owning buffer, and ignored owning buffer.
504   SM->AddNewSourceBuffer(std::move(Buf), llvm::SMLoc());
505   return Result;
506 }
507 
508 } // namespace config
509 } // namespace clangd
510 } // namespace clang
511