xref: /openbsd-src/gnu/llvm/clang/lib/Tooling/FileMatchTrie.cpp (revision e5dd70708596ae51455a0ffa086a00c5b29f8583)
1*e5dd7070Spatrick //===- FileMatchTrie.cpp --------------------------------------------------===//
2*e5dd7070Spatrick //
3*e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information.
5*e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*e5dd7070Spatrick //
7*e5dd7070Spatrick //===----------------------------------------------------------------------===//
8*e5dd7070Spatrick //
9*e5dd7070Spatrick //  This file contains the implementation of a FileMatchTrie.
10*e5dd7070Spatrick //
11*e5dd7070Spatrick //===----------------------------------------------------------------------===//
12*e5dd7070Spatrick 
13*e5dd7070Spatrick #include "clang/Tooling/FileMatchTrie.h"
14*e5dd7070Spatrick #include "llvm/ADT/StringMap.h"
15*e5dd7070Spatrick #include "llvm/ADT/StringRef.h"
16*e5dd7070Spatrick #include "llvm/Support/FileSystem.h"
17*e5dd7070Spatrick #include "llvm/Support/Path.h"
18*e5dd7070Spatrick #include "llvm/Support/raw_ostream.h"
19*e5dd7070Spatrick #include <string>
20*e5dd7070Spatrick #include <vector>
21*e5dd7070Spatrick 
22*e5dd7070Spatrick using namespace clang;
23*e5dd7070Spatrick using namespace tooling;
24*e5dd7070Spatrick 
25*e5dd7070Spatrick namespace {
26*e5dd7070Spatrick 
27*e5dd7070Spatrick /// Default \c PathComparator using \c llvm::sys::fs::equivalent().
28*e5dd7070Spatrick struct DefaultPathComparator : public PathComparator {
29*e5dd7070Spatrick   bool equivalent(StringRef FileA, StringRef FileB) const override {
30*e5dd7070Spatrick     return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
31*e5dd7070Spatrick   }
32*e5dd7070Spatrick };
33*e5dd7070Spatrick 
34*e5dd7070Spatrick } // namespace
35*e5dd7070Spatrick 
36*e5dd7070Spatrick namespace clang {
37*e5dd7070Spatrick namespace tooling {
38*e5dd7070Spatrick 
39*e5dd7070Spatrick /// A node of the \c FileMatchTrie.
40*e5dd7070Spatrick ///
41*e5dd7070Spatrick /// Each node has storage for up to one path and a map mapping a path segment to
42*e5dd7070Spatrick /// child nodes. The trie starts with an empty root node.
43*e5dd7070Spatrick class FileMatchTrieNode {
44*e5dd7070Spatrick public:
45*e5dd7070Spatrick   /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
46*e5dd7070Spatrick   /// the number of \c NewPath's trailing characters already consumed during
47*e5dd7070Spatrick   /// recursion.
48*e5dd7070Spatrick   ///
49*e5dd7070Spatrick   /// An insert of a path
50*e5dd7070Spatrick   /// 'p'starts at the root node and does the following:
51*e5dd7070Spatrick   /// - If the node is empty, insert 'p' into its storage and abort.
52*e5dd7070Spatrick   /// - If the node has a path 'p2' but no children, take the last path segment
53*e5dd7070Spatrick   ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
54*e5dd7070Spatrick   ///   'p2' there.
55*e5dd7070Spatrick   /// - Insert a new child for the last segment of 'p' and insert the rest of
56*e5dd7070Spatrick   ///   'p' there.
57*e5dd7070Spatrick   ///
58*e5dd7070Spatrick   /// An insert operation is linear in the number of a path's segments.
59*e5dd7070Spatrick   void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
60*e5dd7070Spatrick     // We cannot put relative paths into the FileMatchTrie as then a path can be
61*e5dd7070Spatrick     // a postfix of another path, violating a core assumption of the trie.
62*e5dd7070Spatrick     if (llvm::sys::path::is_relative(NewPath))
63*e5dd7070Spatrick       return;
64*e5dd7070Spatrick     if (Path.empty()) {
65*e5dd7070Spatrick       // This is an empty leaf. Store NewPath and return.
66*e5dd7070Spatrick       Path = NewPath;
67*e5dd7070Spatrick       return;
68*e5dd7070Spatrick     }
69*e5dd7070Spatrick     if (Children.empty()) {
70*e5dd7070Spatrick       // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
71*e5dd7070Spatrick       if (NewPath == Path)
72*e5dd7070Spatrick           return;
73*e5dd7070Spatrick       // Make this a node and create a child-leaf with 'Path'.
74*e5dd7070Spatrick       StringRef Element(llvm::sys::path::filename(
75*e5dd7070Spatrick           StringRef(Path).drop_back(ConsumedLength)));
76*e5dd7070Spatrick       Children[Element].Path = Path;
77*e5dd7070Spatrick     }
78*e5dd7070Spatrick     StringRef Element(llvm::sys::path::filename(
79*e5dd7070Spatrick           StringRef(NewPath).drop_back(ConsumedLength)));
80*e5dd7070Spatrick     Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
81*e5dd7070Spatrick   }
82*e5dd7070Spatrick 
83*e5dd7070Spatrick   /// Tries to find the node under this \c FileMatchTrieNode that best
84*e5dd7070Spatrick   /// matches 'FileName'.
85*e5dd7070Spatrick   ///
86*e5dd7070Spatrick   /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
87*e5dd7070Spatrick   /// \c true and an empty string is returned. If no path fits 'FileName', an
88*e5dd7070Spatrick   /// empty string is returned. \c ConsumedLength denotes the number of
89*e5dd7070Spatrick   /// \c Filename's trailing characters already consumed during recursion.
90*e5dd7070Spatrick   ///
91*e5dd7070Spatrick   /// To find the best matching node for a given path 'p', the
92*e5dd7070Spatrick   /// \c findEquivalent() function is called recursively for each path segment
93*e5dd7070Spatrick   /// (back to front) of 'p' until a node 'n' is reached that does not ..
94*e5dd7070Spatrick   /// - .. have children. In this case it is checked
95*e5dd7070Spatrick   ///   whether the stored path is equivalent to 'p'. If yes, the best match is
96*e5dd7070Spatrick   ///   found. Otherwise continue with the parent node as if this node did not
97*e5dd7070Spatrick   ///   exist.
98*e5dd7070Spatrick   /// - .. a child matching the next path segment. In this case, all children of
99*e5dd7070Spatrick   ///   'n' are an equally good match for 'p'. All children are of 'n' are found
100*e5dd7070Spatrick   ///   recursively and their equivalence to 'p' is determined. If none are
101*e5dd7070Spatrick   ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
102*e5dd7070Spatrick   ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
103*e5dd7070Spatrick   ///   error.
104*e5dd7070Spatrick   StringRef findEquivalent(const PathComparator& Comparator,
105*e5dd7070Spatrick                            StringRef FileName,
106*e5dd7070Spatrick                            bool &IsAmbiguous,
107*e5dd7070Spatrick                            unsigned ConsumedLength = 0) const {
108*e5dd7070Spatrick     if (Children.empty()) {
109*e5dd7070Spatrick       if (Comparator.equivalent(StringRef(Path), FileName))
110*e5dd7070Spatrick         return StringRef(Path);
111*e5dd7070Spatrick       return {};
112*e5dd7070Spatrick     }
113*e5dd7070Spatrick     StringRef Element(llvm::sys::path::filename(FileName.drop_back(
114*e5dd7070Spatrick         ConsumedLength)));
115*e5dd7070Spatrick     llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
116*e5dd7070Spatrick         Children.find(Element);
117*e5dd7070Spatrick     if (MatchingChild != Children.end()) {
118*e5dd7070Spatrick       StringRef Result = MatchingChild->getValue().findEquivalent(
119*e5dd7070Spatrick           Comparator, FileName, IsAmbiguous,
120*e5dd7070Spatrick           ConsumedLength + Element.size() + 1);
121*e5dd7070Spatrick       if (!Result.empty() || IsAmbiguous)
122*e5dd7070Spatrick         return Result;
123*e5dd7070Spatrick     }
124*e5dd7070Spatrick     std::vector<StringRef> AllChildren;
125*e5dd7070Spatrick     getAll(AllChildren, MatchingChild);
126*e5dd7070Spatrick     StringRef Result;
127*e5dd7070Spatrick     for (const auto &Child : AllChildren) {
128*e5dd7070Spatrick       if (Comparator.equivalent(Child, FileName)) {
129*e5dd7070Spatrick         if (Result.empty()) {
130*e5dd7070Spatrick           Result = Child;
131*e5dd7070Spatrick         } else {
132*e5dd7070Spatrick           IsAmbiguous = true;
133*e5dd7070Spatrick           return {};
134*e5dd7070Spatrick         }
135*e5dd7070Spatrick       }
136*e5dd7070Spatrick     }
137*e5dd7070Spatrick     return Result;
138*e5dd7070Spatrick   }
139*e5dd7070Spatrick 
140*e5dd7070Spatrick private:
141*e5dd7070Spatrick   /// Gets all paths under this FileMatchTrieNode.
142*e5dd7070Spatrick   void getAll(std::vector<StringRef> &Results,
143*e5dd7070Spatrick               llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
144*e5dd7070Spatrick     if (Path.empty())
145*e5dd7070Spatrick       return;
146*e5dd7070Spatrick     if (Children.empty()) {
147*e5dd7070Spatrick       Results.push_back(StringRef(Path));
148*e5dd7070Spatrick       return;
149*e5dd7070Spatrick     }
150*e5dd7070Spatrick     for (llvm::StringMap<FileMatchTrieNode>::const_iterator
151*e5dd7070Spatrick          It = Children.begin(), E = Children.end();
152*e5dd7070Spatrick          It != E; ++It) {
153*e5dd7070Spatrick       if (It == Except)
154*e5dd7070Spatrick         continue;
155*e5dd7070Spatrick       It->getValue().getAll(Results, Children.end());
156*e5dd7070Spatrick     }
157*e5dd7070Spatrick   }
158*e5dd7070Spatrick 
159*e5dd7070Spatrick   // The stored absolute path in this node. Only valid for leaf nodes, i.e.
160*e5dd7070Spatrick   // nodes where Children.empty().
161*e5dd7070Spatrick   std::string Path;
162*e5dd7070Spatrick 
163*e5dd7070Spatrick   // The children of this node stored in a map based on the next path segment.
164*e5dd7070Spatrick   llvm::StringMap<FileMatchTrieNode> Children;
165*e5dd7070Spatrick };
166*e5dd7070Spatrick 
167*e5dd7070Spatrick } // namespace tooling
168*e5dd7070Spatrick } // namespace clang
169*e5dd7070Spatrick 
170*e5dd7070Spatrick FileMatchTrie::FileMatchTrie()
171*e5dd7070Spatrick     : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}
172*e5dd7070Spatrick 
173*e5dd7070Spatrick FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
174*e5dd7070Spatrick     : Root(new FileMatchTrieNode), Comparator(Comparator) {}
175*e5dd7070Spatrick 
176*e5dd7070Spatrick FileMatchTrie::~FileMatchTrie() {
177*e5dd7070Spatrick   delete Root;
178*e5dd7070Spatrick }
179*e5dd7070Spatrick 
180*e5dd7070Spatrick void FileMatchTrie::insert(StringRef NewPath) {
181*e5dd7070Spatrick   Root->insert(NewPath);
182*e5dd7070Spatrick }
183*e5dd7070Spatrick 
184*e5dd7070Spatrick StringRef FileMatchTrie::findEquivalent(StringRef FileName,
185*e5dd7070Spatrick                                         raw_ostream &Error) const {
186*e5dd7070Spatrick   if (llvm::sys::path::is_relative(FileName)) {
187*e5dd7070Spatrick     Error << "Cannot resolve relative paths";
188*e5dd7070Spatrick     return {};
189*e5dd7070Spatrick   }
190*e5dd7070Spatrick   bool IsAmbiguous = false;
191*e5dd7070Spatrick   StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
192*e5dd7070Spatrick   if (IsAmbiguous)
193*e5dd7070Spatrick     Error << "Path is ambiguous";
194*e5dd7070Spatrick   return Result;
195*e5dd7070Spatrick }
196