xref: /dflybsd-src/contrib/gcc-8.0/gcc/spellcheck.h (revision 38fd149817dfbff97799f62fcb70be98c4e32523)
1*38fd1498Szrj /* Find near-matches for strings and identifiers.
2*38fd1498Szrj    Copyright (C) 2015-2018 Free Software Foundation, Inc.
3*38fd1498Szrj 
4*38fd1498Szrj This file is part of GCC.
5*38fd1498Szrj 
6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under
7*38fd1498Szrj the terms of the GNU General Public License as published by the Free
8*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later
9*38fd1498Szrj version.
10*38fd1498Szrj 
11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY
12*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or
13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14*38fd1498Szrj for more details.
15*38fd1498Szrj 
16*38fd1498Szrj You should have received a copy of the GNU General Public License
17*38fd1498Szrj along with GCC; see the file COPYING3.  If not see
18*38fd1498Szrj <http://www.gnu.org/licenses/>.  */
19*38fd1498Szrj 
20*38fd1498Szrj #ifndef GCC_SPELLCHECK_H
21*38fd1498Szrj #define GCC_SPELLCHECK_H
22*38fd1498Szrj 
23*38fd1498Szrj typedef unsigned int edit_distance_t;
24*38fd1498Szrj const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
25*38fd1498Szrj 
26*38fd1498Szrj /* spellcheck.c  */
27*38fd1498Szrj extern edit_distance_t
28*38fd1498Szrj levenshtein_distance (const char *s, int len_s,
29*38fd1498Szrj 		      const char *t, int len_t);
30*38fd1498Szrj 
31*38fd1498Szrj extern edit_distance_t
32*38fd1498Szrj levenshtein_distance (const char *s, const char *t);
33*38fd1498Szrj 
34*38fd1498Szrj extern const char *
35*38fd1498Szrj find_closest_string (const char *target,
36*38fd1498Szrj 		     const auto_vec<const char *> *candidates);
37*38fd1498Szrj 
38*38fd1498Szrj /* A traits class for describing a string-like type usable by
39*38fd1498Szrj    class best_match.
40*38fd1498Szrj    Specializations should provide the implementations of the following:
41*38fd1498Szrj 
42*38fd1498Szrj      static size_t get_length (TYPE);
43*38fd1498Szrj      static const char *get_string (TYPE);
44*38fd1498Szrj 
45*38fd1498Szrj    get_string should return a non-NULL ptr, which does not need to be
46*38fd1498Szrj    0-terminated.  */
47*38fd1498Szrj 
48*38fd1498Szrj template <typename TYPE>
49*38fd1498Szrj struct edit_distance_traits {};
50*38fd1498Szrj 
51*38fd1498Szrj /* Specialization of edit_distance_traits for C-style strings.  */
52*38fd1498Szrj 
53*38fd1498Szrj template <>
54*38fd1498Szrj struct edit_distance_traits<const char *>
55*38fd1498Szrj {
56*38fd1498Szrj   static size_t get_length (const char *str)
57*38fd1498Szrj   {
58*38fd1498Szrj     gcc_assert (str);
59*38fd1498Szrj     return strlen (str);
60*38fd1498Szrj   }
61*38fd1498Szrj 
62*38fd1498Szrj   static const char *get_string (const char *str)
63*38fd1498Szrj   {
64*38fd1498Szrj     gcc_assert (str);
65*38fd1498Szrj     return str;
66*38fd1498Szrj   }
67*38fd1498Szrj };
68*38fd1498Szrj 
69*38fd1498Szrj /* A type for use when determining the best match against a string,
70*38fd1498Szrj    expressed as a template so that we can match against various
71*38fd1498Szrj    string-like types (const char *, frontend identifiers, and preprocessor
72*38fd1498Szrj    macros).
73*38fd1498Szrj 
74*38fd1498Szrj    This type accumulates the best possible match against GOAL_TYPE for
75*38fd1498Szrj    a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
76*38fd1498Szrj    number of calls to levenshtein_distance and to
77*38fd1498Szrj    edit_distance_traits<T>::get_length.  */
78*38fd1498Szrj 
79*38fd1498Szrj template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
80*38fd1498Szrj class best_match
81*38fd1498Szrj {
82*38fd1498Szrj  public:
83*38fd1498Szrj   typedef GOAL_TYPE goal_t;
84*38fd1498Szrj   typedef CANDIDATE_TYPE candidate_t;
85*38fd1498Szrj   typedef edit_distance_traits<goal_t> goal_traits;
86*38fd1498Szrj   typedef edit_distance_traits<candidate_t> candidate_traits;
87*38fd1498Szrj 
88*38fd1498Szrj   /* Constructor.  */
89*38fd1498Szrj 
90*38fd1498Szrj   best_match (GOAL_TYPE goal,
91*38fd1498Szrj 	      edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
92*38fd1498Szrj   : m_goal (goal_traits::get_string (goal)),
93*38fd1498Szrj     m_goal_len (goal_traits::get_length (goal)),
94*38fd1498Szrj     m_best_candidate (NULL),
95*38fd1498Szrj     m_best_distance (best_distance_so_far)
96*38fd1498Szrj   {}
97*38fd1498Szrj 
98*38fd1498Szrj   /* Compare the edit distance between CANDIDATE and m_goal,
99*38fd1498Szrj      and if it's the best so far, record it.  */
100*38fd1498Szrj 
101*38fd1498Szrj   void consider (candidate_t candidate)
102*38fd1498Szrj   {
103*38fd1498Szrj     size_t candidate_len = candidate_traits::get_length (candidate);
104*38fd1498Szrj 
105*38fd1498Szrj     /* Calculate a lower bound on the candidate's distance to the goal,
106*38fd1498Szrj        based on the difference in lengths; it will require at least
107*38fd1498Szrj        this many insertions/deletions.  */
108*38fd1498Szrj     edit_distance_t min_candidate_distance
109*38fd1498Szrj       = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
110*38fd1498Szrj 
111*38fd1498Szrj     /* If the candidate's length is sufficiently different to that
112*38fd1498Szrj        of the goal string, then the number of insertions/deletions
113*38fd1498Szrj        may be >= the best distance so far.  If so, we can reject
114*38fd1498Szrj        the candidate immediately without needing to compute
115*38fd1498Szrj        the exact distance, since it won't be an improvement.  */
116*38fd1498Szrj     if (min_candidate_distance >= m_best_distance)
117*38fd1498Szrj       return;
118*38fd1498Szrj 
119*38fd1498Szrj     /* If the candidate will be unable to beat the criterion in
120*38fd1498Szrj        get_best_meaningful_candidate, reject it without computing
121*38fd1498Szrj        the exact distance.  */
122*38fd1498Szrj     unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
123*38fd1498Szrj     if (min_candidate_distance > cutoff)
124*38fd1498Szrj       return;
125*38fd1498Szrj 
126*38fd1498Szrj     /* Otherwise, compute the distance and see if the candidate
127*38fd1498Szrj        has beaten the previous best value.  */
128*38fd1498Szrj     edit_distance_t dist
129*38fd1498Szrj       = levenshtein_distance (m_goal, m_goal_len,
130*38fd1498Szrj 			      candidate_traits::get_string (candidate),
131*38fd1498Szrj 			      candidate_len);
132*38fd1498Szrj     if (dist < m_best_distance)
133*38fd1498Szrj       {
134*38fd1498Szrj 	m_best_distance = dist;
135*38fd1498Szrj 	m_best_candidate = candidate;
136*38fd1498Szrj 	m_best_candidate_len = candidate_len;
137*38fd1498Szrj       }
138*38fd1498Szrj   }
139*38fd1498Szrj 
140*38fd1498Szrj   /* Assuming that BEST_CANDIDATE is known to be better than
141*38fd1498Szrj      m_best_candidate, update (without recomputing the edit distance to
142*38fd1498Szrj      the goal).  */
143*38fd1498Szrj 
144*38fd1498Szrj   void set_best_so_far (CANDIDATE_TYPE best_candidate,
145*38fd1498Szrj 			edit_distance_t best_distance,
146*38fd1498Szrj 			size_t best_candidate_len)
147*38fd1498Szrj   {
148*38fd1498Szrj     gcc_assert (best_distance < m_best_distance);
149*38fd1498Szrj     m_best_candidate = best_candidate;
150*38fd1498Szrj     m_best_distance = best_distance;
151*38fd1498Szrj     m_best_candidate_len = best_candidate_len;
152*38fd1498Szrj   }
153*38fd1498Szrj 
154*38fd1498Szrj   /* Get the best candidate so far, but applying a filter to ensure
155*38fd1498Szrj      that we return NULL if none of the candidates are close to the goal,
156*38fd1498Szrj      to avoid offering nonsensical suggestions to the user.  */
157*38fd1498Szrj 
158*38fd1498Szrj   candidate_t get_best_meaningful_candidate () const
159*38fd1498Szrj   {
160*38fd1498Szrj     /* If more than half of the letters were misspelled, the suggestion is
161*38fd1498Szrj        likely to be meaningless.  */
162*38fd1498Szrj     if (m_best_candidate)
163*38fd1498Szrj       {
164*38fd1498Szrj 	unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
165*38fd1498Szrj 	if (m_best_distance > cutoff)
166*38fd1498Szrj 	  return NULL;
167*38fd1498Szrj     }
168*38fd1498Szrj 
169*38fd1498Szrj     /* If the goal string somehow makes it into the candidate list, offering
170*38fd1498Szrj        it as a suggestion will be nonsensical e.g.
171*38fd1498Szrj          'constexpr' does not name a type; did you mean 'constexpr'?
172*38fd1498Szrj        Ultimately such suggestions are due to bugs in constructing the
173*38fd1498Szrj        candidate list, but as a band-aid, do not offer suggestions for
174*38fd1498Szrj        distance == 0 (where candidate == goal).  */
175*38fd1498Szrj     if (m_best_distance == 0)
176*38fd1498Szrj       return NULL;
177*38fd1498Szrj 
178*38fd1498Szrj     return m_best_candidate;
179*38fd1498Szrj   }
180*38fd1498Szrj 
181*38fd1498Szrj   /* Get the closest candidate so far, without applying any filtering.  */
182*38fd1498Szrj 
183*38fd1498Szrj   candidate_t blithely_get_best_candidate () const
184*38fd1498Szrj   {
185*38fd1498Szrj     return m_best_candidate;
186*38fd1498Szrj   }
187*38fd1498Szrj 
188*38fd1498Szrj   edit_distance_t get_best_distance () const { return m_best_distance; }
189*38fd1498Szrj   size_t get_best_candidate_length () const { return m_best_candidate_len; }
190*38fd1498Szrj 
191*38fd1498Szrj  private:
192*38fd1498Szrj   const char *m_goal;
193*38fd1498Szrj   size_t m_goal_len;
194*38fd1498Szrj   candidate_t m_best_candidate;
195*38fd1498Szrj   edit_distance_t m_best_distance;
196*38fd1498Szrj   size_t m_best_candidate_len;
197*38fd1498Szrj };
198*38fd1498Szrj 
199*38fd1498Szrj #endif  /* GCC_SPELLCHECK_H  */
200