1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 /// 9 /// Some operations such as code completion produce a set of candidates. 10 /// Usually the user can choose between them, but we should put the best options 11 /// at the top (they're easier to select, and more likely to be seen). 12 /// 13 /// This file defines building blocks for ranking candidates. 14 /// It's used by the features directly and also in the implementation of 15 /// indexes, as indexes also need to heuristically limit their results. 16 /// 17 /// The facilities here are: 18 /// - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString 19 /// These are structured in a way that they can be debugged, and are fairly 20 /// consistent regardless of the source. 21 /// - compute scores from scoring signals. These are suitable for sorting. 22 /// - sorting utilities like the TopN container. 23 /// These could be split up further to isolate dependencies if we care. 24 /// 25 //===----------------------------------------------------------------------===// 26 27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 29 30 #include "FileDistance.h" 31 #include "clang/Sema/CodeCompleteConsumer.h" 32 #include "llvm/ADT/StringRef.h" 33 #include "llvm/ADT/StringSet.h" 34 #include <algorithm> 35 #include <functional> 36 #include <optional> 37 #include <vector> 38 39 namespace llvm { 40 class raw_ostream; 41 } // namespace llvm 42 43 namespace clang { 44 class CodeCompletionResult; 45 46 namespace clangd { 47 struct ASTSignals; 48 struct Symbol; 49 class URIDistance; 50 51 // Signals structs are designed to be aggregated from 0 or more sources. 52 // A default instance has neutral signals, and sources are merged into it. 53 // They can be dumped for debugging, and evaluate()d into a score. 54 55 /// Attributes of a symbol that affect how much we like it. 56 struct SymbolQualitySignals { 57 bool Deprecated = false; 58 bool ReservedName = false; // __foo, _Foo are usually implementation details. 59 // FIXME: make these findable once user types _. 60 bool ImplementationDetail = false; 61 unsigned References = 0; 62 63 enum SymbolCategory { 64 Unknown = 0, 65 Variable, 66 Macro, 67 Type, 68 Function, 69 Constructor, 70 Destructor, 71 Namespace, 72 Keyword, 73 Operator, 74 } Category = Unknown; 75 76 void merge(const CodeCompletionResult &SemaCCResult); 77 void merge(const Symbol &IndexResult); 78 79 // Condense these signals down to a single number, higher is better. 80 float evaluateHeuristics() const; 81 }; 82 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 83 const SymbolQualitySignals &); 84 85 /// Attributes of a symbol-query pair that affect how much we like it. 86 struct SymbolRelevanceSignals { 87 /// The name of the symbol (for ContextWords). Must be explicitly assigned. 88 llvm::StringRef Name; 89 /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned. 90 float NameMatch = 1; 91 /// Lowercase words relevant to the context (e.g. near the completion point). 92 llvm::StringSet<>* ContextWords = nullptr; 93 bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private). 94 /// Whether fixits needs to be applied for that completion or not. 95 bool NeedsFixIts = false; 96 bool InBaseClass = false; // A member from base class of the accessed class. 97 98 URIDistance *FileProximityMatch = nullptr; 99 /// These are used to calculate proximity between the index symbol and the 100 /// query. 101 llvm::StringRef SymbolURI; 102 /// FIXME: unify with index proximity score - signals should be 103 /// source-independent. 104 /// Proximity between best declaration and the query. [0-1], 1 is closest. 105 float SemaFileProximityScore = 0; 106 107 // Scope proximity is only considered (both index and sema) when this is set. 108 ScopeDistance *ScopeProximityMatch = nullptr; 109 std::optional<llvm::StringRef> SymbolScope; 110 // A symbol from sema should be accessible from the current scope. 111 bool SemaSaysInScope = false; 112 113 // An approximate measure of where we expect the symbol to be used. 114 enum AccessibleScope { 115 FunctionScope, 116 ClassScope, 117 FileScope, 118 GlobalScope, 119 } Scope = GlobalScope; 120 121 enum QueryType { 122 CodeComplete, 123 Generic, 124 } Query = Generic; 125 126 CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other; 127 128 // Whether symbol is an instance member of a class. 129 bool IsInstanceMember = false; 130 131 // Whether clang provided a preferred type in the completion context. 132 bool HadContextType = false; 133 // Whether a source completion item or a symbol had a type information. 134 bool HadSymbolType = false; 135 // Whether the item matches the type expected in the completion context. 136 bool TypeMatchesPreferred = false; 137 138 /// Length of the unqualified partial name of Symbol typed in 139 /// CompletionPrefix. 140 unsigned FilterLength = 0; 141 142 const ASTSignals *MainFileSignals = nullptr; 143 /// Number of references to the candidate in the main file. 144 unsigned MainFileRefs = 0; 145 /// Number of unique symbols in the main file which belongs to candidate's 146 /// namespace. This indicates how relevant the namespace is in the current 147 /// file. 148 unsigned ScopeRefsInFile = 0; 149 150 /// Set of derived signals computed by calculateDerivedSignals(). Must not be 151 /// set explicitly. 152 struct DerivedSignals { 153 /// Whether Name contains some word from context. 154 bool NameMatchesContext = false; 155 /// Min distance between SymbolURI and all the headers included by the TU. 156 unsigned FileProximityDistance = FileDistance::Unreachable; 157 /// Min distance between SymbolScope and all the available scopes. 158 unsigned ScopeProximityDistance = FileDistance::Unreachable; 159 }; 160 161 DerivedSignals calculateDerivedSignals() const; 162 163 void merge(const CodeCompletionResult &SemaResult); 164 void merge(const Symbol &IndexResult); 165 void computeASTSignals(const CodeCompletionResult &SemaResult); 166 167 // Condense these signals down to a single number, higher is better. 168 float evaluateHeuristics() const; 169 }; 170 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 171 const SymbolRelevanceSignals &); 172 173 /// Combine symbol quality and relevance into a single score. 174 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance); 175 176 /// Same semantics as CodeComplete::Score. Quality score and Relevance score 177 /// have been removed since DecisionForest cannot assign individual scores to 178 /// Quality and Relevance signals. 179 struct DecisionForestScores { 180 float Total = 0.f; 181 float ExcludingName = 0.f; 182 }; 183 184 DecisionForestScores 185 evaluateDecisionForest(const SymbolQualitySignals &Quality, 186 const SymbolRelevanceSignals &Relevance, float Base); 187 188 /// TopN<T> is a lossy container that preserves only the "best" N elements. 189 template <typename T, typename Compare = std::greater<T>> class TopN { 190 public: 191 using value_type = T; 192 TopN(size_t N, Compare Greater = Compare()) N(N)193 : N(N), Greater(std::move(Greater)) {} 194 195 // Adds a candidate to the set. 196 // Returns true if a candidate was dropped to get back under N. push(value_type && V)197 bool push(value_type &&V) { 198 bool Dropped = false; 199 if (Heap.size() >= N) { 200 Dropped = true; 201 if (N > 0 && Greater(V, Heap.front())) { 202 std::pop_heap(Heap.begin(), Heap.end(), Greater); 203 Heap.back() = std::move(V); 204 std::push_heap(Heap.begin(), Heap.end(), Greater); 205 } 206 } else { 207 Heap.push_back(std::move(V)); 208 std::push_heap(Heap.begin(), Heap.end(), Greater); 209 } 210 assert(Heap.size() <= N); 211 assert(std::is_heap(Heap.begin(), Heap.end(), Greater)); 212 return Dropped; 213 } 214 215 // Returns candidates from best to worst. items()216 std::vector<value_type> items() && { 217 std::sort_heap(Heap.begin(), Heap.end(), Greater); 218 assert(Heap.size() <= N); 219 return std::move(Heap); 220 } 221 222 private: 223 const size_t N; 224 std::vector<value_type> Heap; // Min-heap, comparator is Greater. 225 Compare Greater; 226 }; 227 228 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for 229 /// LSP. (The highest score compares smallest so it sorts at the top). 230 std::string sortText(float Score, llvm::StringRef Tiebreak = ""); 231 232 struct SignatureQualitySignals { 233 uint32_t NumberOfParameters = 0; 234 uint32_t NumberOfOptionalParameters = 0; 235 CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind = 236 CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function; 237 }; 238 llvm::raw_ostream &operator<<(llvm::raw_ostream &, 239 const SignatureQualitySignals &); 240 241 } // namespace clangd 242 } // namespace clang 243 244 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H 245