xref: /llvm-project/clang-tools-extra/clangd/Quality.h (revision f71ffd3b735b4d6ae3c12be1806cdd6205b3b378)
1 //===--- Quality.h - Ranking alternatives for ambiguous queries --*- C++-*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// Some operations such as code completion produce a set of candidates.
10 /// Usually the user can choose between them, but we should put the best options
11 /// at the top (they're easier to select, and more likely to be seen).
12 ///
13 /// This file defines building blocks for ranking candidates.
14 /// It's used by the features directly and also in the implementation of
15 /// indexes, as indexes also need to heuristically limit their results.
16 ///
17 /// The facilities here are:
18 ///   - retrieving scoring signals from e.g. indexes, AST, CodeCompletionString
19 ///     These are structured in a way that they can be debugged, and are fairly
20 ///     consistent regardless of the source.
21 ///   - compute scores from scoring signals. These are suitable for sorting.
22 ///   - sorting utilities like the TopN container.
23 /// These could be split up further to isolate dependencies if we care.
24 ///
25 //===----------------------------------------------------------------------===//
26 
27 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
28 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
29 
30 #include "FileDistance.h"
31 #include "clang/Sema/CodeCompleteConsumer.h"
32 #include "llvm/ADT/StringRef.h"
33 #include "llvm/ADT/StringSet.h"
34 #include <algorithm>
35 #include <functional>
36 #include <optional>
37 #include <vector>
38 
39 namespace llvm {
40 class raw_ostream;
41 } // namespace llvm
42 
43 namespace clang {
44 class CodeCompletionResult;
45 
46 namespace clangd {
47 struct ASTSignals;
48 struct Symbol;
49 class URIDistance;
50 
51 // Signals structs are designed to be aggregated from 0 or more sources.
52 // A default instance has neutral signals, and sources are merged into it.
53 // They can be dumped for debugging, and evaluate()d into a score.
54 
55 /// Attributes of a symbol that affect how much we like it.
56 struct SymbolQualitySignals {
57   bool Deprecated = false;
58   bool ReservedName = false; // __foo, _Foo are usually implementation details.
59                              // FIXME: make these findable once user types _.
60   bool ImplementationDetail = false;
61   unsigned References = 0;
62 
63   enum SymbolCategory {
64     Unknown = 0,
65     Variable,
66     Macro,
67     Type,
68     Function,
69     Constructor,
70     Destructor,
71     Namespace,
72     Keyword,
73     Operator,
74   } Category = Unknown;
75 
76   void merge(const CodeCompletionResult &SemaCCResult);
77   void merge(const Symbol &IndexResult);
78 
79   // Condense these signals down to a single number, higher is better.
80   float evaluateHeuristics() const;
81 };
82 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
83                               const SymbolQualitySignals &);
84 
85 /// Attributes of a symbol-query pair that affect how much we like it.
86 struct SymbolRelevanceSignals {
87   /// The name of the symbol (for ContextWords). Must be explicitly assigned.
88   llvm::StringRef Name;
89   /// 0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
90   float NameMatch = 1;
91   /// Lowercase words relevant to the context (e.g. near the completion point).
92   llvm::StringSet<>* ContextWords = nullptr;
93   bool Forbidden = false; // Unavailable (e.g const) or inaccessible (private).
94   /// Whether fixits needs to be applied for that completion or not.
95   bool NeedsFixIts = false;
96   bool InBaseClass = false; // A member from base class of the accessed class.
97 
98   URIDistance *FileProximityMatch = nullptr;
99   /// These are used to calculate proximity between the index symbol and the
100   /// query.
101   llvm::StringRef SymbolURI;
102   /// FIXME: unify with index proximity score - signals should be
103   /// source-independent.
104   /// Proximity between best declaration and the query. [0-1], 1 is closest.
105   float SemaFileProximityScore = 0;
106 
107   // Scope proximity is only considered (both index and sema) when this is set.
108   ScopeDistance *ScopeProximityMatch = nullptr;
109   std::optional<llvm::StringRef> SymbolScope;
110   // A symbol from sema should be accessible from the current scope.
111   bool SemaSaysInScope = false;
112 
113   // An approximate measure of where we expect the symbol to be used.
114   enum AccessibleScope {
115     FunctionScope,
116     ClassScope,
117     FileScope,
118     GlobalScope,
119   } Scope = GlobalScope;
120 
121   enum QueryType {
122     CodeComplete,
123     Generic,
124   } Query = Generic;
125 
126   CodeCompletionContext::Kind Context = CodeCompletionContext::CCC_Other;
127 
128   // Whether symbol is an instance member of a class.
129   bool IsInstanceMember = false;
130 
131   // Whether clang provided a preferred type in the completion context.
132   bool HadContextType = false;
133   // Whether a source completion item or a symbol had a type information.
134   bool HadSymbolType = false;
135   // Whether the item matches the type expected in the completion context.
136   bool TypeMatchesPreferred = false;
137 
138   /// Length of the unqualified partial name of Symbol typed in
139   /// CompletionPrefix.
140   unsigned FilterLength = 0;
141 
142   const ASTSignals *MainFileSignals = nullptr;
143   /// Number of references to the candidate in the main file.
144   unsigned MainFileRefs = 0;
145   /// Number of unique symbols in the main file which belongs to candidate's
146   /// namespace. This indicates how relevant the namespace is in the current
147   /// file.
148   unsigned ScopeRefsInFile = 0;
149 
150   /// Set of derived signals computed by calculateDerivedSignals(). Must not be
151   /// set explicitly.
152   struct DerivedSignals {
153     /// Whether Name contains some word from context.
154     bool NameMatchesContext = false;
155     /// Min distance between SymbolURI and all the headers included by the TU.
156     unsigned FileProximityDistance = FileDistance::Unreachable;
157     /// Min distance between SymbolScope and all the available scopes.
158     unsigned ScopeProximityDistance = FileDistance::Unreachable;
159   };
160 
161   DerivedSignals calculateDerivedSignals() const;
162 
163   void merge(const CodeCompletionResult &SemaResult);
164   void merge(const Symbol &IndexResult);
165   void computeASTSignals(const CodeCompletionResult &SemaResult);
166 
167   // Condense these signals down to a single number, higher is better.
168   float evaluateHeuristics() const;
169 };
170 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
171                               const SymbolRelevanceSignals &);
172 
173 /// Combine symbol quality and relevance into a single score.
174 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance);
175 
176 /// Same semantics as CodeComplete::Score. Quality score and Relevance score
177 /// have been removed since DecisionForest cannot assign individual scores to
178 /// Quality and Relevance signals.
179 struct DecisionForestScores {
180   float Total = 0.f;
181   float ExcludingName = 0.f;
182 };
183 
184 DecisionForestScores
185 evaluateDecisionForest(const SymbolQualitySignals &Quality,
186                        const SymbolRelevanceSignals &Relevance, float Base);
187 
188 /// TopN<T> is a lossy container that preserves only the "best" N elements.
189 template <typename T, typename Compare = std::greater<T>> class TopN {
190 public:
191   using value_type = T;
192   TopN(size_t N, Compare Greater = Compare())
N(N)193       : N(N), Greater(std::move(Greater)) {}
194 
195   // Adds a candidate to the set.
196   // Returns true if a candidate was dropped to get back under N.
push(value_type && V)197   bool push(value_type &&V) {
198     bool Dropped = false;
199     if (Heap.size() >= N) {
200       Dropped = true;
201       if (N > 0 && Greater(V, Heap.front())) {
202         std::pop_heap(Heap.begin(), Heap.end(), Greater);
203         Heap.back() = std::move(V);
204         std::push_heap(Heap.begin(), Heap.end(), Greater);
205       }
206     } else {
207       Heap.push_back(std::move(V));
208       std::push_heap(Heap.begin(), Heap.end(), Greater);
209     }
210     assert(Heap.size() <= N);
211     assert(std::is_heap(Heap.begin(), Heap.end(), Greater));
212     return Dropped;
213   }
214 
215   // Returns candidates from best to worst.
items()216   std::vector<value_type> items() && {
217     std::sort_heap(Heap.begin(), Heap.end(), Greater);
218     assert(Heap.size() <= N);
219     return std::move(Heap);
220   }
221 
222 private:
223   const size_t N;
224   std::vector<value_type> Heap; // Min-heap, comparator is Greater.
225   Compare Greater;
226 };
227 
228 /// Returns a string that sorts in the same order as (-Score, Tiebreak), for
229 /// LSP. (The highest score compares smallest so it sorts at the top).
230 std::string sortText(float Score, llvm::StringRef Tiebreak = "");
231 
232 struct SignatureQualitySignals {
233   uint32_t NumberOfParameters = 0;
234   uint32_t NumberOfOptionalParameters = 0;
235   CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind =
236       CodeCompleteConsumer::OverloadCandidate::CandidateKind::CK_Function;
237 };
238 llvm::raw_ostream &operator<<(llvm::raw_ostream &,
239                               const SignatureQualitySignals &);
240 
241 } // namespace clangd
242 } // namespace clang
243 
244 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_QUALITY_H
245