1*e5dd7070Spatrick //===- FileMatchTrie.cpp --------------------------------------------------===// 2*e5dd7070Spatrick // 3*e5dd7070Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*e5dd7070Spatrick // See https://llvm.org/LICENSE.txt for license information. 5*e5dd7070Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*e5dd7070Spatrick // 7*e5dd7070Spatrick //===----------------------------------------------------------------------===// 8*e5dd7070Spatrick // 9*e5dd7070Spatrick // This file contains the implementation of a FileMatchTrie. 10*e5dd7070Spatrick // 11*e5dd7070Spatrick //===----------------------------------------------------------------------===// 12*e5dd7070Spatrick 13*e5dd7070Spatrick #include "clang/Tooling/FileMatchTrie.h" 14*e5dd7070Spatrick #include "llvm/ADT/StringMap.h" 15*e5dd7070Spatrick #include "llvm/ADT/StringRef.h" 16*e5dd7070Spatrick #include "llvm/Support/FileSystem.h" 17*e5dd7070Spatrick #include "llvm/Support/Path.h" 18*e5dd7070Spatrick #include "llvm/Support/raw_ostream.h" 19*e5dd7070Spatrick #include <string> 20*e5dd7070Spatrick #include <vector> 21*e5dd7070Spatrick 22*e5dd7070Spatrick using namespace clang; 23*e5dd7070Spatrick using namespace tooling; 24*e5dd7070Spatrick 25*e5dd7070Spatrick namespace { 26*e5dd7070Spatrick 27*e5dd7070Spatrick /// Default \c PathComparator using \c llvm::sys::fs::equivalent(). 28*e5dd7070Spatrick struct DefaultPathComparator : public PathComparator { 29*e5dd7070Spatrick bool equivalent(StringRef FileA, StringRef FileB) const override { 30*e5dd7070Spatrick return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB); 31*e5dd7070Spatrick } 32*e5dd7070Spatrick }; 33*e5dd7070Spatrick 34*e5dd7070Spatrick } // namespace 35*e5dd7070Spatrick 36*e5dd7070Spatrick namespace clang { 37*e5dd7070Spatrick namespace tooling { 38*e5dd7070Spatrick 39*e5dd7070Spatrick /// A node of the \c FileMatchTrie. 40*e5dd7070Spatrick /// 41*e5dd7070Spatrick /// Each node has storage for up to one path and a map mapping a path segment to 42*e5dd7070Spatrick /// child nodes. The trie starts with an empty root node. 43*e5dd7070Spatrick class FileMatchTrieNode { 44*e5dd7070Spatrick public: 45*e5dd7070Spatrick /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes 46*e5dd7070Spatrick /// the number of \c NewPath's trailing characters already consumed during 47*e5dd7070Spatrick /// recursion. 48*e5dd7070Spatrick /// 49*e5dd7070Spatrick /// An insert of a path 50*e5dd7070Spatrick /// 'p'starts at the root node and does the following: 51*e5dd7070Spatrick /// - If the node is empty, insert 'p' into its storage and abort. 52*e5dd7070Spatrick /// - If the node has a path 'p2' but no children, take the last path segment 53*e5dd7070Spatrick /// 's' of 'p2', put a new child into the map at 's' an insert the rest of 54*e5dd7070Spatrick /// 'p2' there. 55*e5dd7070Spatrick /// - Insert a new child for the last segment of 'p' and insert the rest of 56*e5dd7070Spatrick /// 'p' there. 57*e5dd7070Spatrick /// 58*e5dd7070Spatrick /// An insert operation is linear in the number of a path's segments. 59*e5dd7070Spatrick void insert(StringRef NewPath, unsigned ConsumedLength = 0) { 60*e5dd7070Spatrick // We cannot put relative paths into the FileMatchTrie as then a path can be 61*e5dd7070Spatrick // a postfix of another path, violating a core assumption of the trie. 62*e5dd7070Spatrick if (llvm::sys::path::is_relative(NewPath)) 63*e5dd7070Spatrick return; 64*e5dd7070Spatrick if (Path.empty()) { 65*e5dd7070Spatrick // This is an empty leaf. Store NewPath and return. 66*e5dd7070Spatrick Path = NewPath; 67*e5dd7070Spatrick return; 68*e5dd7070Spatrick } 69*e5dd7070Spatrick if (Children.empty()) { 70*e5dd7070Spatrick // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'. 71*e5dd7070Spatrick if (NewPath == Path) 72*e5dd7070Spatrick return; 73*e5dd7070Spatrick // Make this a node and create a child-leaf with 'Path'. 74*e5dd7070Spatrick StringRef Element(llvm::sys::path::filename( 75*e5dd7070Spatrick StringRef(Path).drop_back(ConsumedLength))); 76*e5dd7070Spatrick Children[Element].Path = Path; 77*e5dd7070Spatrick } 78*e5dd7070Spatrick StringRef Element(llvm::sys::path::filename( 79*e5dd7070Spatrick StringRef(NewPath).drop_back(ConsumedLength))); 80*e5dd7070Spatrick Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1); 81*e5dd7070Spatrick } 82*e5dd7070Spatrick 83*e5dd7070Spatrick /// Tries to find the node under this \c FileMatchTrieNode that best 84*e5dd7070Spatrick /// matches 'FileName'. 85*e5dd7070Spatrick /// 86*e5dd7070Spatrick /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to 87*e5dd7070Spatrick /// \c true and an empty string is returned. If no path fits 'FileName', an 88*e5dd7070Spatrick /// empty string is returned. \c ConsumedLength denotes the number of 89*e5dd7070Spatrick /// \c Filename's trailing characters already consumed during recursion. 90*e5dd7070Spatrick /// 91*e5dd7070Spatrick /// To find the best matching node for a given path 'p', the 92*e5dd7070Spatrick /// \c findEquivalent() function is called recursively for each path segment 93*e5dd7070Spatrick /// (back to front) of 'p' until a node 'n' is reached that does not .. 94*e5dd7070Spatrick /// - .. have children. In this case it is checked 95*e5dd7070Spatrick /// whether the stored path is equivalent to 'p'. If yes, the best match is 96*e5dd7070Spatrick /// found. Otherwise continue with the parent node as if this node did not 97*e5dd7070Spatrick /// exist. 98*e5dd7070Spatrick /// - .. a child matching the next path segment. In this case, all children of 99*e5dd7070Spatrick /// 'n' are an equally good match for 'p'. All children are of 'n' are found 100*e5dd7070Spatrick /// recursively and their equivalence to 'p' is determined. If none are 101*e5dd7070Spatrick /// equivalent, continue with the parent node as if 'n' didn't exist. If one 102*e5dd7070Spatrick /// is equivalent, the best match is found. Otherwise, report and ambigiuity 103*e5dd7070Spatrick /// error. 104*e5dd7070Spatrick StringRef findEquivalent(const PathComparator& Comparator, 105*e5dd7070Spatrick StringRef FileName, 106*e5dd7070Spatrick bool &IsAmbiguous, 107*e5dd7070Spatrick unsigned ConsumedLength = 0) const { 108*e5dd7070Spatrick if (Children.empty()) { 109*e5dd7070Spatrick if (Comparator.equivalent(StringRef(Path), FileName)) 110*e5dd7070Spatrick return StringRef(Path); 111*e5dd7070Spatrick return {}; 112*e5dd7070Spatrick } 113*e5dd7070Spatrick StringRef Element(llvm::sys::path::filename(FileName.drop_back( 114*e5dd7070Spatrick ConsumedLength))); 115*e5dd7070Spatrick llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild = 116*e5dd7070Spatrick Children.find(Element); 117*e5dd7070Spatrick if (MatchingChild != Children.end()) { 118*e5dd7070Spatrick StringRef Result = MatchingChild->getValue().findEquivalent( 119*e5dd7070Spatrick Comparator, FileName, IsAmbiguous, 120*e5dd7070Spatrick ConsumedLength + Element.size() + 1); 121*e5dd7070Spatrick if (!Result.empty() || IsAmbiguous) 122*e5dd7070Spatrick return Result; 123*e5dd7070Spatrick } 124*e5dd7070Spatrick std::vector<StringRef> AllChildren; 125*e5dd7070Spatrick getAll(AllChildren, MatchingChild); 126*e5dd7070Spatrick StringRef Result; 127*e5dd7070Spatrick for (const auto &Child : AllChildren) { 128*e5dd7070Spatrick if (Comparator.equivalent(Child, FileName)) { 129*e5dd7070Spatrick if (Result.empty()) { 130*e5dd7070Spatrick Result = Child; 131*e5dd7070Spatrick } else { 132*e5dd7070Spatrick IsAmbiguous = true; 133*e5dd7070Spatrick return {}; 134*e5dd7070Spatrick } 135*e5dd7070Spatrick } 136*e5dd7070Spatrick } 137*e5dd7070Spatrick return Result; 138*e5dd7070Spatrick } 139*e5dd7070Spatrick 140*e5dd7070Spatrick private: 141*e5dd7070Spatrick /// Gets all paths under this FileMatchTrieNode. 142*e5dd7070Spatrick void getAll(std::vector<StringRef> &Results, 143*e5dd7070Spatrick llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const { 144*e5dd7070Spatrick if (Path.empty()) 145*e5dd7070Spatrick return; 146*e5dd7070Spatrick if (Children.empty()) { 147*e5dd7070Spatrick Results.push_back(StringRef(Path)); 148*e5dd7070Spatrick return; 149*e5dd7070Spatrick } 150*e5dd7070Spatrick for (llvm::StringMap<FileMatchTrieNode>::const_iterator 151*e5dd7070Spatrick It = Children.begin(), E = Children.end(); 152*e5dd7070Spatrick It != E; ++It) { 153*e5dd7070Spatrick if (It == Except) 154*e5dd7070Spatrick continue; 155*e5dd7070Spatrick It->getValue().getAll(Results, Children.end()); 156*e5dd7070Spatrick } 157*e5dd7070Spatrick } 158*e5dd7070Spatrick 159*e5dd7070Spatrick // The stored absolute path in this node. Only valid for leaf nodes, i.e. 160*e5dd7070Spatrick // nodes where Children.empty(). 161*e5dd7070Spatrick std::string Path; 162*e5dd7070Spatrick 163*e5dd7070Spatrick // The children of this node stored in a map based on the next path segment. 164*e5dd7070Spatrick llvm::StringMap<FileMatchTrieNode> Children; 165*e5dd7070Spatrick }; 166*e5dd7070Spatrick 167*e5dd7070Spatrick } // namespace tooling 168*e5dd7070Spatrick } // namespace clang 169*e5dd7070Spatrick 170*e5dd7070Spatrick FileMatchTrie::FileMatchTrie() 171*e5dd7070Spatrick : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {} 172*e5dd7070Spatrick 173*e5dd7070Spatrick FileMatchTrie::FileMatchTrie(PathComparator *Comparator) 174*e5dd7070Spatrick : Root(new FileMatchTrieNode), Comparator(Comparator) {} 175*e5dd7070Spatrick 176*e5dd7070Spatrick FileMatchTrie::~FileMatchTrie() { 177*e5dd7070Spatrick delete Root; 178*e5dd7070Spatrick } 179*e5dd7070Spatrick 180*e5dd7070Spatrick void FileMatchTrie::insert(StringRef NewPath) { 181*e5dd7070Spatrick Root->insert(NewPath); 182*e5dd7070Spatrick } 183*e5dd7070Spatrick 184*e5dd7070Spatrick StringRef FileMatchTrie::findEquivalent(StringRef FileName, 185*e5dd7070Spatrick raw_ostream &Error) const { 186*e5dd7070Spatrick if (llvm::sys::path::is_relative(FileName)) { 187*e5dd7070Spatrick Error << "Cannot resolve relative paths"; 188*e5dd7070Spatrick return {}; 189*e5dd7070Spatrick } 190*e5dd7070Spatrick bool IsAmbiguous = false; 191*e5dd7070Spatrick StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous); 192*e5dd7070Spatrick if (IsAmbiguous) 193*e5dd7070Spatrick Error << "Path is ambiguous"; 194*e5dd7070Spatrick return Result; 195*e5dd7070Spatrick } 196