xref: /openbsd-src/gnu/llvm/clang/lib/Tooling/FileMatchTrie.cpp (revision a9ac8606c53d55cee9c3a39778b249c51df111ef)
1e5dd7070Spatrick //===- FileMatchTrie.cpp --------------------------------------------------===//
2e5dd7070Spatrick //
3e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6e5dd7070Spatrick //
7e5dd7070Spatrick //===----------------------------------------------------------------------===//
8e5dd7070Spatrick //
9e5dd7070Spatrick //  This file contains the implementation of a FileMatchTrie.
10e5dd7070Spatrick //
11e5dd7070Spatrick //===----------------------------------------------------------------------===//
12e5dd7070Spatrick 
13e5dd7070Spatrick #include "clang/Tooling/FileMatchTrie.h"
14e5dd7070Spatrick #include "llvm/ADT/StringMap.h"
15e5dd7070Spatrick #include "llvm/ADT/StringRef.h"
16e5dd7070Spatrick #include "llvm/Support/FileSystem.h"
17e5dd7070Spatrick #include "llvm/Support/Path.h"
18e5dd7070Spatrick #include "llvm/Support/raw_ostream.h"
19e5dd7070Spatrick #include <string>
20e5dd7070Spatrick #include <vector>
21e5dd7070Spatrick 
22e5dd7070Spatrick using namespace clang;
23e5dd7070Spatrick using namespace tooling;
24e5dd7070Spatrick 
25e5dd7070Spatrick namespace {
26e5dd7070Spatrick 
27e5dd7070Spatrick /// Default \c PathComparator using \c llvm::sys::fs::equivalent().
28e5dd7070Spatrick struct DefaultPathComparator : public PathComparator {
equivalent__anon8f596cbd0111::DefaultPathComparator29e5dd7070Spatrick   bool equivalent(StringRef FileA, StringRef FileB) const override {
30e5dd7070Spatrick     return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
31e5dd7070Spatrick   }
32e5dd7070Spatrick };
33e5dd7070Spatrick 
34e5dd7070Spatrick } // namespace
35e5dd7070Spatrick 
36e5dd7070Spatrick namespace clang {
37e5dd7070Spatrick namespace tooling {
38e5dd7070Spatrick 
39e5dd7070Spatrick /// A node of the \c FileMatchTrie.
40e5dd7070Spatrick ///
41e5dd7070Spatrick /// Each node has storage for up to one path and a map mapping a path segment to
42e5dd7070Spatrick /// child nodes. The trie starts with an empty root node.
43e5dd7070Spatrick class FileMatchTrieNode {
44e5dd7070Spatrick public:
45e5dd7070Spatrick   /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
46e5dd7070Spatrick   /// the number of \c NewPath's trailing characters already consumed during
47e5dd7070Spatrick   /// recursion.
48e5dd7070Spatrick   ///
49e5dd7070Spatrick   /// An insert of a path
50e5dd7070Spatrick   /// 'p'starts at the root node and does the following:
51e5dd7070Spatrick   /// - If the node is empty, insert 'p' into its storage and abort.
52e5dd7070Spatrick   /// - If the node has a path 'p2' but no children, take the last path segment
53e5dd7070Spatrick   ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
54e5dd7070Spatrick   ///   'p2' there.
55e5dd7070Spatrick   /// - Insert a new child for the last segment of 'p' and insert the rest of
56e5dd7070Spatrick   ///   'p' there.
57e5dd7070Spatrick   ///
58e5dd7070Spatrick   /// An insert operation is linear in the number of a path's segments.
insert(StringRef NewPath,unsigned ConsumedLength=0)59e5dd7070Spatrick   void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
60e5dd7070Spatrick     // We cannot put relative paths into the FileMatchTrie as then a path can be
61e5dd7070Spatrick     // a postfix of another path, violating a core assumption of the trie.
62e5dd7070Spatrick     if (llvm::sys::path::is_relative(NewPath))
63e5dd7070Spatrick       return;
64e5dd7070Spatrick     if (Path.empty()) {
65e5dd7070Spatrick       // This is an empty leaf. Store NewPath and return.
66ec727ea7Spatrick       Path = std::string(NewPath);
67e5dd7070Spatrick       return;
68e5dd7070Spatrick     }
69e5dd7070Spatrick     if (Children.empty()) {
70e5dd7070Spatrick       // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
71e5dd7070Spatrick       if (NewPath == Path)
72e5dd7070Spatrick           return;
73e5dd7070Spatrick       // Make this a node and create a child-leaf with 'Path'.
74e5dd7070Spatrick       StringRef Element(llvm::sys::path::filename(
75e5dd7070Spatrick           StringRef(Path).drop_back(ConsumedLength)));
76e5dd7070Spatrick       Children[Element].Path = Path;
77e5dd7070Spatrick     }
78e5dd7070Spatrick     StringRef Element(llvm::sys::path::filename(
79e5dd7070Spatrick           StringRef(NewPath).drop_back(ConsumedLength)));
80e5dd7070Spatrick     Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
81e5dd7070Spatrick   }
82e5dd7070Spatrick 
83e5dd7070Spatrick   /// Tries to find the node under this \c FileMatchTrieNode that best
84e5dd7070Spatrick   /// matches 'FileName'.
85e5dd7070Spatrick   ///
86e5dd7070Spatrick   /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
87e5dd7070Spatrick   /// \c true and an empty string is returned. If no path fits 'FileName', an
88e5dd7070Spatrick   /// empty string is returned. \c ConsumedLength denotes the number of
89e5dd7070Spatrick   /// \c Filename's trailing characters already consumed during recursion.
90e5dd7070Spatrick   ///
91e5dd7070Spatrick   /// To find the best matching node for a given path 'p', the
92e5dd7070Spatrick   /// \c findEquivalent() function is called recursively for each path segment
93e5dd7070Spatrick   /// (back to front) of 'p' until a node 'n' is reached that does not ..
94e5dd7070Spatrick   /// - .. have children. In this case it is checked
95e5dd7070Spatrick   ///   whether the stored path is equivalent to 'p'. If yes, the best match is
96e5dd7070Spatrick   ///   found. Otherwise continue with the parent node as if this node did not
97e5dd7070Spatrick   ///   exist.
98e5dd7070Spatrick   /// - .. a child matching the next path segment. In this case, all children of
99e5dd7070Spatrick   ///   'n' are an equally good match for 'p'. All children are of 'n' are found
100e5dd7070Spatrick   ///   recursively and their equivalence to 'p' is determined. If none are
101e5dd7070Spatrick   ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
102e5dd7070Spatrick   ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
103e5dd7070Spatrick   ///   error.
findEquivalent(const PathComparator & Comparator,StringRef FileName,bool & IsAmbiguous,unsigned ConsumedLength=0) const104e5dd7070Spatrick   StringRef findEquivalent(const PathComparator& Comparator,
105e5dd7070Spatrick                            StringRef FileName,
106e5dd7070Spatrick                            bool &IsAmbiguous,
107e5dd7070Spatrick                            unsigned ConsumedLength = 0) const {
108*a9ac8606Spatrick     // Note: we support only directory symlinks for performance reasons.
109e5dd7070Spatrick     if (Children.empty()) {
110*a9ac8606Spatrick       // As far as we do not support file symlinks, compare
111*a9ac8606Spatrick       // basenames here to avoid request to file system.
112*a9ac8606Spatrick       if (llvm::sys::path::filename(Path) ==
113*a9ac8606Spatrick               llvm::sys::path::filename(FileName) &&
114*a9ac8606Spatrick           Comparator.equivalent(StringRef(Path), FileName))
115e5dd7070Spatrick         return StringRef(Path);
116e5dd7070Spatrick       return {};
117e5dd7070Spatrick     }
118e5dd7070Spatrick     StringRef Element(llvm::sys::path::filename(FileName.drop_back(
119e5dd7070Spatrick         ConsumedLength)));
120e5dd7070Spatrick     llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
121e5dd7070Spatrick         Children.find(Element);
122e5dd7070Spatrick     if (MatchingChild != Children.end()) {
123e5dd7070Spatrick       StringRef Result = MatchingChild->getValue().findEquivalent(
124e5dd7070Spatrick           Comparator, FileName, IsAmbiguous,
125e5dd7070Spatrick           ConsumedLength + Element.size() + 1);
126e5dd7070Spatrick       if (!Result.empty() || IsAmbiguous)
127e5dd7070Spatrick         return Result;
128e5dd7070Spatrick     }
129*a9ac8606Spatrick 
130*a9ac8606Spatrick     // If `ConsumedLength` is zero, this is the root and we have no filename
131*a9ac8606Spatrick     // match. Give up in this case, we don't try to find symlinks with
132*a9ac8606Spatrick     // different names.
133*a9ac8606Spatrick     if (ConsumedLength == 0)
134*a9ac8606Spatrick       return {};
135*a9ac8606Spatrick 
136e5dd7070Spatrick     std::vector<StringRef> AllChildren;
137e5dd7070Spatrick     getAll(AllChildren, MatchingChild);
138e5dd7070Spatrick     StringRef Result;
139e5dd7070Spatrick     for (const auto &Child : AllChildren) {
140e5dd7070Spatrick       if (Comparator.equivalent(Child, FileName)) {
141e5dd7070Spatrick         if (Result.empty()) {
142e5dd7070Spatrick           Result = Child;
143e5dd7070Spatrick         } else {
144e5dd7070Spatrick           IsAmbiguous = true;
145e5dd7070Spatrick           return {};
146e5dd7070Spatrick         }
147e5dd7070Spatrick       }
148e5dd7070Spatrick     }
149e5dd7070Spatrick     return Result;
150e5dd7070Spatrick   }
151e5dd7070Spatrick 
152e5dd7070Spatrick private:
153e5dd7070Spatrick   /// Gets all paths under this FileMatchTrieNode.
getAll(std::vector<StringRef> & Results,llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const154e5dd7070Spatrick   void getAll(std::vector<StringRef> &Results,
155e5dd7070Spatrick               llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
156e5dd7070Spatrick     if (Path.empty())
157e5dd7070Spatrick       return;
158e5dd7070Spatrick     if (Children.empty()) {
159e5dd7070Spatrick       Results.push_back(StringRef(Path));
160e5dd7070Spatrick       return;
161e5dd7070Spatrick     }
162e5dd7070Spatrick     for (llvm::StringMap<FileMatchTrieNode>::const_iterator
163e5dd7070Spatrick          It = Children.begin(), E = Children.end();
164e5dd7070Spatrick          It != E; ++It) {
165e5dd7070Spatrick       if (It == Except)
166e5dd7070Spatrick         continue;
167e5dd7070Spatrick       It->getValue().getAll(Results, Children.end());
168e5dd7070Spatrick     }
169e5dd7070Spatrick   }
170e5dd7070Spatrick 
171e5dd7070Spatrick   // The stored absolute path in this node. Only valid for leaf nodes, i.e.
172e5dd7070Spatrick   // nodes where Children.empty().
173e5dd7070Spatrick   std::string Path;
174e5dd7070Spatrick 
175e5dd7070Spatrick   // The children of this node stored in a map based on the next path segment.
176e5dd7070Spatrick   llvm::StringMap<FileMatchTrieNode> Children;
177e5dd7070Spatrick };
178e5dd7070Spatrick 
179e5dd7070Spatrick } // namespace tooling
180e5dd7070Spatrick } // namespace clang
181e5dd7070Spatrick 
FileMatchTrie()182e5dd7070Spatrick FileMatchTrie::FileMatchTrie()
183e5dd7070Spatrick     : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
184e5dd7070Spatrick 
FileMatchTrie(PathComparator * Comparator)185e5dd7070Spatrick FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
186e5dd7070Spatrick     : Root(new FileMatchTrieNode), Comparator(Comparator) {}
187e5dd7070Spatrick 
~FileMatchTrie()188e5dd7070Spatrick FileMatchTrie::~FileMatchTrie() {
189e5dd7070Spatrick   delete Root;
190e5dd7070Spatrick }
191e5dd7070Spatrick 
insert(StringRef NewPath)192e5dd7070Spatrick void FileMatchTrie::insert(StringRef NewPath) {
193e5dd7070Spatrick   Root->insert(NewPath);
194e5dd7070Spatrick }
195e5dd7070Spatrick 
findEquivalent(StringRef FileName,raw_ostream & Error) const196e5dd7070Spatrick StringRef FileMatchTrie::findEquivalent(StringRef FileName,
197e5dd7070Spatrick                                         raw_ostream &Error) const {
198e5dd7070Spatrick   if (llvm::sys::path::is_relative(FileName)) {
199e5dd7070Spatrick     Error << "Cannot resolve relative paths";
200e5dd7070Spatrick     return {};
201e5dd7070Spatrick   }
202e5dd7070Spatrick   bool IsAmbiguous = false;
203e5dd7070Spatrick   StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
204e5dd7070Spatrick   if (IsAmbiguous)
205e5dd7070Spatrick     Error << "Path is ambiguous";
206e5dd7070Spatrick   return Result;
207e5dd7070Spatrick }
208