10b57cec5SDimitry Andric //===- FileMatchTrie.cpp --------------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file contains the implementation of a FileMatchTrie.
100b57cec5SDimitry Andric //
110b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
120b57cec5SDimitry Andric
130b57cec5SDimitry Andric #include "clang/Tooling/FileMatchTrie.h"
140b57cec5SDimitry Andric #include "llvm/ADT/StringMap.h"
150b57cec5SDimitry Andric #include "llvm/ADT/StringRef.h"
160b57cec5SDimitry Andric #include "llvm/Support/FileSystem.h"
170b57cec5SDimitry Andric #include "llvm/Support/Path.h"
180b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
190b57cec5SDimitry Andric #include <string>
200b57cec5SDimitry Andric #include <vector>
210b57cec5SDimitry Andric
220b57cec5SDimitry Andric using namespace clang;
230b57cec5SDimitry Andric using namespace tooling;
240b57cec5SDimitry Andric
250b57cec5SDimitry Andric namespace {
260b57cec5SDimitry Andric
270b57cec5SDimitry Andric /// Default \c PathComparator using \c llvm::sys::fs::equivalent().
280b57cec5SDimitry Andric struct DefaultPathComparator : public PathComparator {
equivalent__anondbd58fb80111::DefaultPathComparator290b57cec5SDimitry Andric bool equivalent(StringRef FileA, StringRef FileB) const override {
300b57cec5SDimitry Andric return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
310b57cec5SDimitry Andric }
320b57cec5SDimitry Andric };
330b57cec5SDimitry Andric
340b57cec5SDimitry Andric } // namespace
350b57cec5SDimitry Andric
360b57cec5SDimitry Andric namespace clang {
370b57cec5SDimitry Andric namespace tooling {
380b57cec5SDimitry Andric
390b57cec5SDimitry Andric /// A node of the \c FileMatchTrie.
400b57cec5SDimitry Andric ///
410b57cec5SDimitry Andric /// Each node has storage for up to one path and a map mapping a path segment to
420b57cec5SDimitry Andric /// child nodes. The trie starts with an empty root node.
430b57cec5SDimitry Andric class FileMatchTrieNode {
440b57cec5SDimitry Andric public:
450b57cec5SDimitry Andric /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
460b57cec5SDimitry Andric /// the number of \c NewPath's trailing characters already consumed during
470b57cec5SDimitry Andric /// recursion.
480b57cec5SDimitry Andric ///
490b57cec5SDimitry Andric /// An insert of a path
500b57cec5SDimitry Andric /// 'p'starts at the root node and does the following:
510b57cec5SDimitry Andric /// - If the node is empty, insert 'p' into its storage and abort.
520b57cec5SDimitry Andric /// - If the node has a path 'p2' but no children, take the last path segment
530b57cec5SDimitry Andric /// 's' of 'p2', put a new child into the map at 's' an insert the rest of
540b57cec5SDimitry Andric /// 'p2' there.
550b57cec5SDimitry Andric /// - Insert a new child for the last segment of 'p' and insert the rest of
560b57cec5SDimitry Andric /// 'p' there.
570b57cec5SDimitry Andric ///
580b57cec5SDimitry Andric /// An insert operation is linear in the number of a path's segments.
insert(StringRef NewPath,unsigned ConsumedLength=0)590b57cec5SDimitry Andric void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
600b57cec5SDimitry Andric // We cannot put relative paths into the FileMatchTrie as then a path can be
610b57cec5SDimitry Andric // a postfix of another path, violating a core assumption of the trie.
620b57cec5SDimitry Andric if (llvm::sys::path::is_relative(NewPath))
630b57cec5SDimitry Andric return;
640b57cec5SDimitry Andric if (Path.empty()) {
650b57cec5SDimitry Andric // This is an empty leaf. Store NewPath and return.
665ffd83dbSDimitry Andric Path = std::string(NewPath);
670b57cec5SDimitry Andric return;
680b57cec5SDimitry Andric }
690b57cec5SDimitry Andric if (Children.empty()) {
700b57cec5SDimitry Andric // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
710b57cec5SDimitry Andric if (NewPath == Path)
720b57cec5SDimitry Andric return;
730b57cec5SDimitry Andric // Make this a node and create a child-leaf with 'Path'.
740b57cec5SDimitry Andric StringRef Element(llvm::sys::path::filename(
750b57cec5SDimitry Andric StringRef(Path).drop_back(ConsumedLength)));
760b57cec5SDimitry Andric Children[Element].Path = Path;
770b57cec5SDimitry Andric }
780b57cec5SDimitry Andric StringRef Element(llvm::sys::path::filename(
790b57cec5SDimitry Andric StringRef(NewPath).drop_back(ConsumedLength)));
800b57cec5SDimitry Andric Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
810b57cec5SDimitry Andric }
820b57cec5SDimitry Andric
830b57cec5SDimitry Andric /// Tries to find the node under this \c FileMatchTrieNode that best
840b57cec5SDimitry Andric /// matches 'FileName'.
850b57cec5SDimitry Andric ///
860b57cec5SDimitry Andric /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
870b57cec5SDimitry Andric /// \c true and an empty string is returned. If no path fits 'FileName', an
880b57cec5SDimitry Andric /// empty string is returned. \c ConsumedLength denotes the number of
890b57cec5SDimitry Andric /// \c Filename's trailing characters already consumed during recursion.
900b57cec5SDimitry Andric ///
910b57cec5SDimitry Andric /// To find the best matching node for a given path 'p', the
920b57cec5SDimitry Andric /// \c findEquivalent() function is called recursively for each path segment
930b57cec5SDimitry Andric /// (back to front) of 'p' until a node 'n' is reached that does not ..
940b57cec5SDimitry Andric /// - .. have children. In this case it is checked
950b57cec5SDimitry Andric /// whether the stored path is equivalent to 'p'. If yes, the best match is
960b57cec5SDimitry Andric /// found. Otherwise continue with the parent node as if this node did not
970b57cec5SDimitry Andric /// exist.
980b57cec5SDimitry Andric /// - .. a child matching the next path segment. In this case, all children of
990b57cec5SDimitry Andric /// 'n' are an equally good match for 'p'. All children are of 'n' are found
1000b57cec5SDimitry Andric /// recursively and their equivalence to 'p' is determined. If none are
1010b57cec5SDimitry Andric /// equivalent, continue with the parent node as if 'n' didn't exist. If one
1020b57cec5SDimitry Andric /// is equivalent, the best match is found. Otherwise, report and ambigiuity
1030b57cec5SDimitry Andric /// error.
findEquivalent(const PathComparator & Comparator,StringRef FileName,bool & IsAmbiguous,unsigned ConsumedLength=0) const1040b57cec5SDimitry Andric StringRef findEquivalent(const PathComparator& Comparator,
1050b57cec5SDimitry Andric StringRef FileName,
1060b57cec5SDimitry Andric bool &IsAmbiguous,
1070b57cec5SDimitry Andric unsigned ConsumedLength = 0) const {
108*e8d8bef9SDimitry Andric // Note: we support only directory symlinks for performance reasons.
1090b57cec5SDimitry Andric if (Children.empty()) {
110*e8d8bef9SDimitry Andric // As far as we do not support file symlinks, compare
111*e8d8bef9SDimitry Andric // basenames here to avoid request to file system.
112*e8d8bef9SDimitry Andric if (llvm::sys::path::filename(Path) ==
113*e8d8bef9SDimitry Andric llvm::sys::path::filename(FileName) &&
114*e8d8bef9SDimitry Andric Comparator.equivalent(StringRef(Path), FileName))
1150b57cec5SDimitry Andric return StringRef(Path);
1160b57cec5SDimitry Andric return {};
1170b57cec5SDimitry Andric }
1180b57cec5SDimitry Andric StringRef Element(llvm::sys::path::filename(FileName.drop_back(
1190b57cec5SDimitry Andric ConsumedLength)));
1200b57cec5SDimitry Andric llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
1210b57cec5SDimitry Andric Children.find(Element);
1220b57cec5SDimitry Andric if (MatchingChild != Children.end()) {
1230b57cec5SDimitry Andric StringRef Result = MatchingChild->getValue().findEquivalent(
1240b57cec5SDimitry Andric Comparator, FileName, IsAmbiguous,
1250b57cec5SDimitry Andric ConsumedLength + Element.size() + 1);
1260b57cec5SDimitry Andric if (!Result.empty() || IsAmbiguous)
1270b57cec5SDimitry Andric return Result;
1280b57cec5SDimitry Andric }
129*e8d8bef9SDimitry Andric
130*e8d8bef9SDimitry Andric // If `ConsumedLength` is zero, this is the root and we have no filename
131*e8d8bef9SDimitry Andric // match. Give up in this case, we don't try to find symlinks with
132*e8d8bef9SDimitry Andric // different names.
133*e8d8bef9SDimitry Andric if (ConsumedLength == 0)
134*e8d8bef9SDimitry Andric return {};
135*e8d8bef9SDimitry Andric
1360b57cec5SDimitry Andric std::vector<StringRef> AllChildren;
1370b57cec5SDimitry Andric getAll(AllChildren, MatchingChild);
1380b57cec5SDimitry Andric StringRef Result;
1390b57cec5SDimitry Andric for (const auto &Child : AllChildren) {
1400b57cec5SDimitry Andric if (Comparator.equivalent(Child, FileName)) {
1410b57cec5SDimitry Andric if (Result.empty()) {
1420b57cec5SDimitry Andric Result = Child;
1430b57cec5SDimitry Andric } else {
1440b57cec5SDimitry Andric IsAmbiguous = true;
1450b57cec5SDimitry Andric return {};
1460b57cec5SDimitry Andric }
1470b57cec5SDimitry Andric }
1480b57cec5SDimitry Andric }
1490b57cec5SDimitry Andric return Result;
1500b57cec5SDimitry Andric }
1510b57cec5SDimitry Andric
1520b57cec5SDimitry Andric private:
1530b57cec5SDimitry Andric /// Gets all paths under this FileMatchTrieNode.
getAll(std::vector<StringRef> & Results,llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const1540b57cec5SDimitry Andric void getAll(std::vector<StringRef> &Results,
1550b57cec5SDimitry Andric llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
1560b57cec5SDimitry Andric if (Path.empty())
1570b57cec5SDimitry Andric return;
1580b57cec5SDimitry Andric if (Children.empty()) {
1590b57cec5SDimitry Andric Results.push_back(StringRef(Path));
1600b57cec5SDimitry Andric return;
1610b57cec5SDimitry Andric }
1620b57cec5SDimitry Andric for (llvm::StringMap<FileMatchTrieNode>::const_iterator
1630b57cec5SDimitry Andric It = Children.begin(), E = Children.end();
1640b57cec5SDimitry Andric It != E; ++It) {
1650b57cec5SDimitry Andric if (It == Except)
1660b57cec5SDimitry Andric continue;
1670b57cec5SDimitry Andric It->getValue().getAll(Results, Children.end());
1680b57cec5SDimitry Andric }
1690b57cec5SDimitry Andric }
1700b57cec5SDimitry Andric
1710b57cec5SDimitry Andric // The stored absolute path in this node. Only valid for leaf nodes, i.e.
1720b57cec5SDimitry Andric // nodes where Children.empty().
1730b57cec5SDimitry Andric std::string Path;
1740b57cec5SDimitry Andric
1750b57cec5SDimitry Andric // The children of this node stored in a map based on the next path segment.
1760b57cec5SDimitry Andric llvm::StringMap<FileMatchTrieNode> Children;
1770b57cec5SDimitry Andric };
1780b57cec5SDimitry Andric
1790b57cec5SDimitry Andric } // namespace tooling
1800b57cec5SDimitry Andric } // namespace clang
1810b57cec5SDimitry Andric
FileMatchTrie()1820b57cec5SDimitry Andric FileMatchTrie::FileMatchTrie()
1830b57cec5SDimitry Andric : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
1840b57cec5SDimitry Andric
FileMatchTrie(PathComparator * Comparator)1850b57cec5SDimitry Andric FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
1860b57cec5SDimitry Andric : Root(new FileMatchTrieNode), Comparator(Comparator) {}
1870b57cec5SDimitry Andric
~FileMatchTrie()1880b57cec5SDimitry Andric FileMatchTrie::~FileMatchTrie() {
1890b57cec5SDimitry Andric delete Root;
1900b57cec5SDimitry Andric }
1910b57cec5SDimitry Andric
insert(StringRef NewPath)1920b57cec5SDimitry Andric void FileMatchTrie::insert(StringRef NewPath) {
1930b57cec5SDimitry Andric Root->insert(NewPath);
1940b57cec5SDimitry Andric }
1950b57cec5SDimitry Andric
findEquivalent(StringRef FileName,raw_ostream & Error) const1960b57cec5SDimitry Andric StringRef FileMatchTrie::findEquivalent(StringRef FileName,
1970b57cec5SDimitry Andric raw_ostream &Error) const {
1980b57cec5SDimitry Andric if (llvm::sys::path::is_relative(FileName)) {
1990b57cec5SDimitry Andric Error << "Cannot resolve relative paths";
2000b57cec5SDimitry Andric return {};
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric bool IsAmbiguous = false;
2030b57cec5SDimitry Andric StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
2040b57cec5SDimitry Andric if (IsAmbiguous)
2050b57cec5SDimitry Andric Error << "Path is ambiguous";
2060b57cec5SDimitry Andric return Result;
2070b57cec5SDimitry Andric }
208