xref: /llvm-project/clang/lib/Tooling/FileMatchTrie.cpp (revision d19f0666bcd8f7d26aaf4019244c3ed91e47b1b7)
16366efedSEugene Zelenko //===- FileMatchTrie.cpp --------------------------------------------------===//
226cf9c43SDaniel Jasper //
32946cd70SChandler Carruth // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
42946cd70SChandler Carruth // See https://llvm.org/LICENSE.txt for license information.
52946cd70SChandler Carruth // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
626cf9c43SDaniel Jasper //
726cf9c43SDaniel Jasper //===----------------------------------------------------------------------===//
826cf9c43SDaniel Jasper //
926cf9c43SDaniel Jasper //  This file contains the implementation of a FileMatchTrie.
1026cf9c43SDaniel Jasper //
1126cf9c43SDaniel Jasper //===----------------------------------------------------------------------===//
1226cf9c43SDaniel Jasper 
1326cf9c43SDaniel Jasper #include "clang/Tooling/FileMatchTrie.h"
1426cf9c43SDaniel Jasper #include "llvm/ADT/StringMap.h"
156366efedSEugene Zelenko #include "llvm/ADT/StringRef.h"
1626cf9c43SDaniel Jasper #include "llvm/Support/FileSystem.h"
17552c169eSRafael Espindola #include "llvm/Support/Path.h"
188c059026SDaniel Jasper #include "llvm/Support/raw_ostream.h"
196366efedSEugene Zelenko #include <string>
206366efedSEugene Zelenko #include <vector>
216366efedSEugene Zelenko 
226a1457e6SBenjamin Kramer using namespace clang;
236a1457e6SBenjamin Kramer using namespace tooling;
2426cf9c43SDaniel Jasper 
256a1457e6SBenjamin Kramer namespace {
266366efedSEugene Zelenko 
279fc8faf9SAdrian Prantl /// Default \c PathComparator using \c llvm::sys::fs::equivalent().
2826cf9c43SDaniel Jasper struct DefaultPathComparator : public PathComparator {
equivalent__anon0dd710d90111::DefaultPathComparator29fb6b25b5SCraig Topper   bool equivalent(StringRef FileA, StringRef FileB) const override {
30fddb32c3SDaniel Jasper     return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
3126cf9c43SDaniel Jasper   }
3226cf9c43SDaniel Jasper };
336366efedSEugene Zelenko 
346366efedSEugene Zelenko } // namespace
3526cf9c43SDaniel Jasper 
366a1457e6SBenjamin Kramer namespace clang {
376a1457e6SBenjamin Kramer namespace tooling {
386366efedSEugene Zelenko 
399fc8faf9SAdrian Prantl /// A node of the \c FileMatchTrie.
4026cf9c43SDaniel Jasper ///
4126cf9c43SDaniel Jasper /// Each node has storage for up to one path and a map mapping a path segment to
4226cf9c43SDaniel Jasper /// child nodes. The trie starts with an empty root node.
4326cf9c43SDaniel Jasper class FileMatchTrieNode {
4426cf9c43SDaniel Jasper public:
459fc8faf9SAdrian Prantl   /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
4626cf9c43SDaniel Jasper   /// the number of \c NewPath's trailing characters already consumed during
4726cf9c43SDaniel Jasper   /// recursion.
4826cf9c43SDaniel Jasper   ///
4926cf9c43SDaniel Jasper   /// An insert of a path
5026cf9c43SDaniel Jasper   /// 'p'starts at the root node and does the following:
5126cf9c43SDaniel Jasper   /// - If the node is empty, insert 'p' into its storage and abort.
5226cf9c43SDaniel Jasper   /// - If the node has a path 'p2' but no children, take the last path segment
5326cf9c43SDaniel Jasper   ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
5426cf9c43SDaniel Jasper   ///   'p2' there.
5526cf9c43SDaniel Jasper   /// - Insert a new child for the last segment of 'p' and insert the rest of
5626cf9c43SDaniel Jasper   ///   'p' there.
5726cf9c43SDaniel Jasper   ///
5826cf9c43SDaniel Jasper   /// An insert operation is linear in the number of a path's segments.
insert(StringRef NewPath,unsigned ConsumedLength=0)5926cf9c43SDaniel Jasper   void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
6026cf9c43SDaniel Jasper     // We cannot put relative paths into the FileMatchTrie as then a path can be
6126cf9c43SDaniel Jasper     // a postfix of another path, violating a core assumption of the trie.
6226cf9c43SDaniel Jasper     if (llvm::sys::path::is_relative(NewPath))
6326cf9c43SDaniel Jasper       return;
6426cf9c43SDaniel Jasper     if (Path.empty()) {
6526cf9c43SDaniel Jasper       // This is an empty leaf. Store NewPath and return.
66adcd0268SBenjamin Kramer       Path = std::string(NewPath);
6726cf9c43SDaniel Jasper       return;
6826cf9c43SDaniel Jasper     }
6926cf9c43SDaniel Jasper     if (Children.empty()) {
7026cf9c43SDaniel Jasper       // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
7126cf9c43SDaniel Jasper       if (NewPath == Path)
7226cf9c43SDaniel Jasper           return;
7326cf9c43SDaniel Jasper       // Make this a node and create a child-leaf with 'Path'.
7426cf9c43SDaniel Jasper       StringRef Element(llvm::sys::path::filename(
7526cf9c43SDaniel Jasper           StringRef(Path).drop_back(ConsumedLength)));
7626cf9c43SDaniel Jasper       Children[Element].Path = Path;
7726cf9c43SDaniel Jasper     }
7826cf9c43SDaniel Jasper     StringRef Element(llvm::sys::path::filename(
7926cf9c43SDaniel Jasper           StringRef(NewPath).drop_back(ConsumedLength)));
8026cf9c43SDaniel Jasper     Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
8126cf9c43SDaniel Jasper   }
8226cf9c43SDaniel Jasper 
839fc8faf9SAdrian Prantl   /// Tries to find the node under this \c FileMatchTrieNode that best
8426cf9c43SDaniel Jasper   /// matches 'FileName'.
8526cf9c43SDaniel Jasper   ///
8626cf9c43SDaniel Jasper   /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
8726cf9c43SDaniel Jasper   /// \c true and an empty string is returned. If no path fits 'FileName', an
8826cf9c43SDaniel Jasper   /// empty string is returned. \c ConsumedLength denotes the number of
8926cf9c43SDaniel Jasper   /// \c Filename's trailing characters already consumed during recursion.
9026cf9c43SDaniel Jasper   ///
9126cf9c43SDaniel Jasper   /// To find the best matching node for a given path 'p', the
9226cf9c43SDaniel Jasper   /// \c findEquivalent() function is called recursively for each path segment
932a8c18d9SAlexander Kornienko   /// (back to front) of 'p' until a node 'n' is reached that does not ..
9426cf9c43SDaniel Jasper   /// - .. have children. In this case it is checked
9526cf9c43SDaniel Jasper   ///   whether the stored path is equivalent to 'p'. If yes, the best match is
9626cf9c43SDaniel Jasper   ///   found. Otherwise continue with the parent node as if this node did not
9726cf9c43SDaniel Jasper   ///   exist.
9826cf9c43SDaniel Jasper   /// - .. a child matching the next path segment. In this case, all children of
9926cf9c43SDaniel Jasper   ///   'n' are an equally good match for 'p'. All children are of 'n' are found
10026cf9c43SDaniel Jasper   ///   recursively and their equivalence to 'p' is determined. If none are
10126cf9c43SDaniel Jasper   ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
10226cf9c43SDaniel Jasper   ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
10326cf9c43SDaniel Jasper   ///   error.
findEquivalent(const PathComparator & Comparator,StringRef FileName,bool & IsAmbiguous,unsigned ConsumedLength=0) const10426cf9c43SDaniel Jasper   StringRef findEquivalent(const PathComparator& Comparator,
10526cf9c43SDaniel Jasper                            StringRef FileName,
10626cf9c43SDaniel Jasper                            bool &IsAmbiguous,
10726cf9c43SDaniel Jasper                            unsigned ConsumedLength = 0) const {
108*d19f0666SAleksandr Platonov     // Note: we support only directory symlinks for performance reasons.
10926cf9c43SDaniel Jasper     if (Children.empty()) {
110*d19f0666SAleksandr Platonov       // As far as we do not support file symlinks, compare
111*d19f0666SAleksandr Platonov       // basenames here to avoid request to file system.
112*d19f0666SAleksandr Platonov       if (llvm::sys::path::filename(Path) ==
113*d19f0666SAleksandr Platonov               llvm::sys::path::filename(FileName) &&
114*d19f0666SAleksandr Platonov           Comparator.equivalent(StringRef(Path), FileName))
11526cf9c43SDaniel Jasper         return StringRef(Path);
1166366efedSEugene Zelenko       return {};
11726cf9c43SDaniel Jasper     }
11826cf9c43SDaniel Jasper     StringRef Element(llvm::sys::path::filename(FileName.drop_back(
11926cf9c43SDaniel Jasper         ConsumedLength)));
12026cf9c43SDaniel Jasper     llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
12126cf9c43SDaniel Jasper         Children.find(Element);
12226cf9c43SDaniel Jasper     if (MatchingChild != Children.end()) {
12326cf9c43SDaniel Jasper       StringRef Result = MatchingChild->getValue().findEquivalent(
12426cf9c43SDaniel Jasper           Comparator, FileName, IsAmbiguous,
12526cf9c43SDaniel Jasper           ConsumedLength + Element.size() + 1);
12626cf9c43SDaniel Jasper       if (!Result.empty() || IsAmbiguous)
12726cf9c43SDaniel Jasper         return Result;
12826cf9c43SDaniel Jasper     }
129*d19f0666SAleksandr Platonov 
130*d19f0666SAleksandr Platonov     // If `ConsumedLength` is zero, this is the root and we have no filename
131*d19f0666SAleksandr Platonov     // match. Give up in this case, we don't try to find symlinks with
132*d19f0666SAleksandr Platonov     // different names.
133*d19f0666SAleksandr Platonov     if (ConsumedLength == 0)
134*d19f0666SAleksandr Platonov       return {};
135*d19f0666SAleksandr Platonov 
13626cf9c43SDaniel Jasper     std::vector<StringRef> AllChildren;
13726cf9c43SDaniel Jasper     getAll(AllChildren, MatchingChild);
13826cf9c43SDaniel Jasper     StringRef Result;
1396366efedSEugene Zelenko     for (const auto &Child : AllChildren) {
1406366efedSEugene Zelenko       if (Comparator.equivalent(Child, FileName)) {
14126cf9c43SDaniel Jasper         if (Result.empty()) {
1426366efedSEugene Zelenko           Result = Child;
14326cf9c43SDaniel Jasper         } else {
14426cf9c43SDaniel Jasper           IsAmbiguous = true;
1456366efedSEugene Zelenko           return {};
14626cf9c43SDaniel Jasper         }
14726cf9c43SDaniel Jasper       }
14826cf9c43SDaniel Jasper     }
14926cf9c43SDaniel Jasper     return Result;
15026cf9c43SDaniel Jasper   }
15126cf9c43SDaniel Jasper 
15226cf9c43SDaniel Jasper private:
1539fc8faf9SAdrian Prantl   /// Gets all paths under this FileMatchTrieNode.
getAll(std::vector<StringRef> & Results,llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const15426cf9c43SDaniel Jasper   void getAll(std::vector<StringRef> &Results,
15526cf9c43SDaniel Jasper               llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
15626cf9c43SDaniel Jasper     if (Path.empty())
15726cf9c43SDaniel Jasper       return;
15826cf9c43SDaniel Jasper     if (Children.empty()) {
15926cf9c43SDaniel Jasper       Results.push_back(StringRef(Path));
16026cf9c43SDaniel Jasper       return;
16126cf9c43SDaniel Jasper     }
16226cf9c43SDaniel Jasper     for (llvm::StringMap<FileMatchTrieNode>::const_iterator
16326cf9c43SDaniel Jasper          It = Children.begin(), E = Children.end();
16426cf9c43SDaniel Jasper          It != E; ++It) {
16526cf9c43SDaniel Jasper       if (It == Except)
16626cf9c43SDaniel Jasper         continue;
16726cf9c43SDaniel Jasper       It->getValue().getAll(Results, Children.end());
16826cf9c43SDaniel Jasper     }
16926cf9c43SDaniel Jasper   }
17026cf9c43SDaniel Jasper 
17126cf9c43SDaniel Jasper   // The stored absolute path in this node. Only valid for leaf nodes, i.e.
17226cf9c43SDaniel Jasper   // nodes where Children.empty().
17326cf9c43SDaniel Jasper   std::string Path;
17426cf9c43SDaniel Jasper 
17526cf9c43SDaniel Jasper   // The children of this node stored in a map based on the next path segment.
17626cf9c43SDaniel Jasper   llvm::StringMap<FileMatchTrieNode> Children;
17726cf9c43SDaniel Jasper };
1786366efedSEugene Zelenko 
1796366efedSEugene Zelenko } // namespace tooling
1806366efedSEugene Zelenko } // namespace clang
18126cf9c43SDaniel Jasper 
FileMatchTrie()18226cf9c43SDaniel Jasper FileMatchTrie::FileMatchTrie()
18326cf9c43SDaniel Jasper     : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
18426cf9c43SDaniel Jasper 
FileMatchTrie(PathComparator * Comparator)18526cf9c43SDaniel Jasper FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
18626cf9c43SDaniel Jasper     : Root(new FileMatchTrieNode), Comparator(Comparator) {}
18726cf9c43SDaniel Jasper 
~FileMatchTrie()18826cf9c43SDaniel Jasper FileMatchTrie::~FileMatchTrie() {
18926cf9c43SDaniel Jasper   delete Root;
19026cf9c43SDaniel Jasper }
19126cf9c43SDaniel Jasper 
insert(StringRef NewPath)19226cf9c43SDaniel Jasper void FileMatchTrie::insert(StringRef NewPath) {
19326cf9c43SDaniel Jasper   Root->insert(NewPath);
19426cf9c43SDaniel Jasper }
19526cf9c43SDaniel Jasper 
findEquivalent(StringRef FileName,raw_ostream & Error) const19626cf9c43SDaniel Jasper StringRef FileMatchTrie::findEquivalent(StringRef FileName,
197f857950dSDmitri Gribenko                                         raw_ostream &Error) const {
19826cf9c43SDaniel Jasper   if (llvm::sys::path::is_relative(FileName)) {
19926cf9c43SDaniel Jasper     Error << "Cannot resolve relative paths";
2006366efedSEugene Zelenko     return {};
20126cf9c43SDaniel Jasper   }
20226cf9c43SDaniel Jasper   bool IsAmbiguous = false;
20326cf9c43SDaniel Jasper   StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
20426cf9c43SDaniel Jasper   if (IsAmbiguous)
20526cf9c43SDaniel Jasper     Error << "Path is ambiguous";
20626cf9c43SDaniel Jasper   return Result;
20726cf9c43SDaniel Jasper }
208