1 //===--- FileDistance.h - File proximity scoring -----------------*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This library measures the distance between file paths. 10 // It's used for ranking symbols, e.g. in code completion. 11 // |foo/bar.h -> foo/bar.h| = 0. 12 // |foo/bar.h -> foo/baz.h| < |foo/bar.h -> baz.h|. 13 // This is an edit-distance, where edits go up or down the directory tree. 14 // It's not symmetrical, the costs of going up and down may not match. 15 // 16 // Dealing with multiple sources: 17 // In practice we care about the distance from a source file, but files near 18 // its main-header and #included files are considered "close". 19 // So we start with a set of (anchor, cost) pairs, and call the distance to a 20 // path the minimum of `cost + |source -> path|`. 21 // 22 // We allow each source to limit the number of up-traversals paths may start 23 // with. Up-traversals may reach things that are not "semantically near". 24 // 25 // Symbol URI schemes: 26 // Symbol locations may be represented by URIs rather than file paths directly. 27 // In this case we want to perform distance computations in URI space rather 28 // than in file-space, without performing redundant conversions. 29 // Therefore we have a lookup structure that accepts URIs, so that intermediate 30 // calculations for the same scheme can be reused. 31 // 32 // Caveats: 33 // Assuming up and down traversals each have uniform costs is simplistic. 34 // Often there are "semantic roots" whose children are almost unrelated. 35 // (e.g. /usr/include/, or / in an umbrella repository). We ignore this. 36 // 37 //===----------------------------------------------------------------------===// 38 39 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H 40 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H 41 42 #include "llvm/ADT/ArrayRef.h" 43 #include "llvm/ADT/DenseMap.h" 44 #include "llvm/ADT/StringMap.h" 45 #include "llvm/ADT/StringRef.h" 46 #include <memory> 47 48 namespace clang { 49 namespace clangd { 50 51 struct FileDistanceOptions { 52 unsigned UpCost = 2; // |foo/bar.h -> foo| 53 unsigned DownCost = 1; // |foo -> foo/bar.h| 54 unsigned IncludeCost = 2; // |foo.cc -> included_header.h| 55 bool AllowDownTraversalFromRoot = true; // | / -> /a | 56 }; 57 58 struct SourceParams { 59 // Base cost for paths starting at this source. 60 unsigned Cost = 0; 61 // Limits the number of upwards traversals allowed from this source. 62 unsigned MaxUpTraversals = std::numeric_limits<unsigned>::max(); 63 }; 64 65 // Supports lookups to find the minimum distance to a file from any source. 66 // This object should be reused, it memoizes intermediate computations. 67 class FileDistance { 68 public: 69 static constexpr unsigned Unreachable = std::numeric_limits<unsigned>::max(); 70 static const llvm::hash_code RootHash; 71 72 FileDistance(llvm::StringMap<SourceParams> Sources, 73 const FileDistanceOptions &Opts = {}); 74 75 // Computes the minimum distance from any source to the file path. 76 unsigned distance(llvm::StringRef Path); 77 78 private: 79 // Costs computed so far. Always contains sources and their ancestors. 80 // We store hash codes only. Collisions are rare and consequences aren't dire. 81 llvm::DenseMap<llvm::hash_code, unsigned> Cache; 82 FileDistanceOptions Opts; 83 }; 84 85 // Supports lookups like FileDistance, but the lookup keys are URIs. 86 // We convert each of the sources to the scheme of the URI and do a FileDistance 87 // comparison on the bodies. 88 class URIDistance { 89 public: 90 // \p Sources must contain absolute paths, not URIs. 91 URIDistance(llvm::StringMap<SourceParams> Sources, 92 const FileDistanceOptions &Opts = {}) Sources(Sources)93 : Sources(Sources), Opts(Opts) {} 94 95 // Computes the minimum distance from any source to the URI. 96 // Only sources that can be mapped into the URI's scheme are considered. 97 unsigned distance(llvm::StringRef URI); 98 99 private: 100 // Returns the FileDistance for a URI scheme, creating it if needed. 101 FileDistance &forScheme(llvm::StringRef Scheme); 102 103 // We cache the results using the original strings so we can skip URI parsing. 104 llvm::DenseMap<llvm::hash_code, unsigned> Cache; 105 llvm::StringMap<SourceParams> Sources; 106 llvm::StringMap<std::unique_ptr<FileDistance>> ByScheme; 107 FileDistanceOptions Opts; 108 }; 109 110 /// Support lookups like FileDistance, but the lookup keys are symbol scopes. 111 /// For example, a scope "na::nb::" is converted to "/na/nb". 112 class ScopeDistance { 113 public: 114 /// QueryScopes[0] is the preferred scope. 115 ScopeDistance(llvm::ArrayRef<std::string> QueryScopes); 116 117 unsigned distance(llvm::StringRef SymbolScope); 118 119 private: 120 FileDistance Distance; 121 }; 122 123 } // namespace clangd 124 } // namespace clang 125 126 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_FILEDISTANCE_H 127