1*38fd1498Szrj /* Find near-matches for strings and identifiers. 2*38fd1498Szrj Copyright (C) 2015-2018 Free Software Foundation, Inc. 3*38fd1498Szrj 4*38fd1498Szrj This file is part of GCC. 5*38fd1498Szrj 6*38fd1498Szrj GCC is free software; you can redistribute it and/or modify it under 7*38fd1498Szrj the terms of the GNU General Public License as published by the Free 8*38fd1498Szrj Software Foundation; either version 3, or (at your option) any later 9*38fd1498Szrj version. 10*38fd1498Szrj 11*38fd1498Szrj GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12*38fd1498Szrj WARRANTY; without even the implied warranty of MERCHANTABILITY or 13*38fd1498Szrj FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14*38fd1498Szrj for more details. 15*38fd1498Szrj 16*38fd1498Szrj You should have received a copy of the GNU General Public License 17*38fd1498Szrj along with GCC; see the file COPYING3. If not see 18*38fd1498Szrj <http://www.gnu.org/licenses/>. */ 19*38fd1498Szrj 20*38fd1498Szrj #ifndef GCC_SPELLCHECK_H 21*38fd1498Szrj #define GCC_SPELLCHECK_H 22*38fd1498Szrj 23*38fd1498Szrj typedef unsigned int edit_distance_t; 24*38fd1498Szrj const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX; 25*38fd1498Szrj 26*38fd1498Szrj /* spellcheck.c */ 27*38fd1498Szrj extern edit_distance_t 28*38fd1498Szrj levenshtein_distance (const char *s, int len_s, 29*38fd1498Szrj const char *t, int len_t); 30*38fd1498Szrj 31*38fd1498Szrj extern edit_distance_t 32*38fd1498Szrj levenshtein_distance (const char *s, const char *t); 33*38fd1498Szrj 34*38fd1498Szrj extern const char * 35*38fd1498Szrj find_closest_string (const char *target, 36*38fd1498Szrj const auto_vec<const char *> *candidates); 37*38fd1498Szrj 38*38fd1498Szrj /* A traits class for describing a string-like type usable by 39*38fd1498Szrj class best_match. 40*38fd1498Szrj Specializations should provide the implementations of the following: 41*38fd1498Szrj 42*38fd1498Szrj static size_t get_length (TYPE); 43*38fd1498Szrj static const char *get_string (TYPE); 44*38fd1498Szrj 45*38fd1498Szrj get_string should return a non-NULL ptr, which does not need to be 46*38fd1498Szrj 0-terminated. */ 47*38fd1498Szrj 48*38fd1498Szrj template <typename TYPE> 49*38fd1498Szrj struct edit_distance_traits {}; 50*38fd1498Szrj 51*38fd1498Szrj /* Specialization of edit_distance_traits for C-style strings. */ 52*38fd1498Szrj 53*38fd1498Szrj template <> 54*38fd1498Szrj struct edit_distance_traits<const char *> 55*38fd1498Szrj { 56*38fd1498Szrj static size_t get_length (const char *str) 57*38fd1498Szrj { 58*38fd1498Szrj gcc_assert (str); 59*38fd1498Szrj return strlen (str); 60*38fd1498Szrj } 61*38fd1498Szrj 62*38fd1498Szrj static const char *get_string (const char *str) 63*38fd1498Szrj { 64*38fd1498Szrj gcc_assert (str); 65*38fd1498Szrj return str; 66*38fd1498Szrj } 67*38fd1498Szrj }; 68*38fd1498Szrj 69*38fd1498Szrj /* A type for use when determining the best match against a string, 70*38fd1498Szrj expressed as a template so that we can match against various 71*38fd1498Szrj string-like types (const char *, frontend identifiers, and preprocessor 72*38fd1498Szrj macros). 73*38fd1498Szrj 74*38fd1498Szrj This type accumulates the best possible match against GOAL_TYPE for 75*38fd1498Szrj a sequence of elements of CANDIDATE_TYPE, whilst minimizing the 76*38fd1498Szrj number of calls to levenshtein_distance and to 77*38fd1498Szrj edit_distance_traits<T>::get_length. */ 78*38fd1498Szrj 79*38fd1498Szrj template <typename GOAL_TYPE, typename CANDIDATE_TYPE> 80*38fd1498Szrj class best_match 81*38fd1498Szrj { 82*38fd1498Szrj public: 83*38fd1498Szrj typedef GOAL_TYPE goal_t; 84*38fd1498Szrj typedef CANDIDATE_TYPE candidate_t; 85*38fd1498Szrj typedef edit_distance_traits<goal_t> goal_traits; 86*38fd1498Szrj typedef edit_distance_traits<candidate_t> candidate_traits; 87*38fd1498Szrj 88*38fd1498Szrj /* Constructor. */ 89*38fd1498Szrj 90*38fd1498Szrj best_match (GOAL_TYPE goal, 91*38fd1498Szrj edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE) 92*38fd1498Szrj : m_goal (goal_traits::get_string (goal)), 93*38fd1498Szrj m_goal_len (goal_traits::get_length (goal)), 94*38fd1498Szrj m_best_candidate (NULL), 95*38fd1498Szrj m_best_distance (best_distance_so_far) 96*38fd1498Szrj {} 97*38fd1498Szrj 98*38fd1498Szrj /* Compare the edit distance between CANDIDATE and m_goal, 99*38fd1498Szrj and if it's the best so far, record it. */ 100*38fd1498Szrj 101*38fd1498Szrj void consider (candidate_t candidate) 102*38fd1498Szrj { 103*38fd1498Szrj size_t candidate_len = candidate_traits::get_length (candidate); 104*38fd1498Szrj 105*38fd1498Szrj /* Calculate a lower bound on the candidate's distance to the goal, 106*38fd1498Szrj based on the difference in lengths; it will require at least 107*38fd1498Szrj this many insertions/deletions. */ 108*38fd1498Szrj edit_distance_t min_candidate_distance 109*38fd1498Szrj = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len); 110*38fd1498Szrj 111*38fd1498Szrj /* If the candidate's length is sufficiently different to that 112*38fd1498Szrj of the goal string, then the number of insertions/deletions 113*38fd1498Szrj may be >= the best distance so far. If so, we can reject 114*38fd1498Szrj the candidate immediately without needing to compute 115*38fd1498Szrj the exact distance, since it won't be an improvement. */ 116*38fd1498Szrj if (min_candidate_distance >= m_best_distance) 117*38fd1498Szrj return; 118*38fd1498Szrj 119*38fd1498Szrj /* If the candidate will be unable to beat the criterion in 120*38fd1498Szrj get_best_meaningful_candidate, reject it without computing 121*38fd1498Szrj the exact distance. */ 122*38fd1498Szrj unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2; 123*38fd1498Szrj if (min_candidate_distance > cutoff) 124*38fd1498Szrj return; 125*38fd1498Szrj 126*38fd1498Szrj /* Otherwise, compute the distance and see if the candidate 127*38fd1498Szrj has beaten the previous best value. */ 128*38fd1498Szrj edit_distance_t dist 129*38fd1498Szrj = levenshtein_distance (m_goal, m_goal_len, 130*38fd1498Szrj candidate_traits::get_string (candidate), 131*38fd1498Szrj candidate_len); 132*38fd1498Szrj if (dist < m_best_distance) 133*38fd1498Szrj { 134*38fd1498Szrj m_best_distance = dist; 135*38fd1498Szrj m_best_candidate = candidate; 136*38fd1498Szrj m_best_candidate_len = candidate_len; 137*38fd1498Szrj } 138*38fd1498Szrj } 139*38fd1498Szrj 140*38fd1498Szrj /* Assuming that BEST_CANDIDATE is known to be better than 141*38fd1498Szrj m_best_candidate, update (without recomputing the edit distance to 142*38fd1498Szrj the goal). */ 143*38fd1498Szrj 144*38fd1498Szrj void set_best_so_far (CANDIDATE_TYPE best_candidate, 145*38fd1498Szrj edit_distance_t best_distance, 146*38fd1498Szrj size_t best_candidate_len) 147*38fd1498Szrj { 148*38fd1498Szrj gcc_assert (best_distance < m_best_distance); 149*38fd1498Szrj m_best_candidate = best_candidate; 150*38fd1498Szrj m_best_distance = best_distance; 151*38fd1498Szrj m_best_candidate_len = best_candidate_len; 152*38fd1498Szrj } 153*38fd1498Szrj 154*38fd1498Szrj /* Get the best candidate so far, but applying a filter to ensure 155*38fd1498Szrj that we return NULL if none of the candidates are close to the goal, 156*38fd1498Szrj to avoid offering nonsensical suggestions to the user. */ 157*38fd1498Szrj 158*38fd1498Szrj candidate_t get_best_meaningful_candidate () const 159*38fd1498Szrj { 160*38fd1498Szrj /* If more than half of the letters were misspelled, the suggestion is 161*38fd1498Szrj likely to be meaningless. */ 162*38fd1498Szrj if (m_best_candidate) 163*38fd1498Szrj { 164*38fd1498Szrj unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2; 165*38fd1498Szrj if (m_best_distance > cutoff) 166*38fd1498Szrj return NULL; 167*38fd1498Szrj } 168*38fd1498Szrj 169*38fd1498Szrj /* If the goal string somehow makes it into the candidate list, offering 170*38fd1498Szrj it as a suggestion will be nonsensical e.g. 171*38fd1498Szrj 'constexpr' does not name a type; did you mean 'constexpr'? 172*38fd1498Szrj Ultimately such suggestions are due to bugs in constructing the 173*38fd1498Szrj candidate list, but as a band-aid, do not offer suggestions for 174*38fd1498Szrj distance == 0 (where candidate == goal). */ 175*38fd1498Szrj if (m_best_distance == 0) 176*38fd1498Szrj return NULL; 177*38fd1498Szrj 178*38fd1498Szrj return m_best_candidate; 179*38fd1498Szrj } 180*38fd1498Szrj 181*38fd1498Szrj /* Get the closest candidate so far, without applying any filtering. */ 182*38fd1498Szrj 183*38fd1498Szrj candidate_t blithely_get_best_candidate () const 184*38fd1498Szrj { 185*38fd1498Szrj return m_best_candidate; 186*38fd1498Szrj } 187*38fd1498Szrj 188*38fd1498Szrj edit_distance_t get_best_distance () const { return m_best_distance; } 189*38fd1498Szrj size_t get_best_candidate_length () const { return m_best_candidate_len; } 190*38fd1498Szrj 191*38fd1498Szrj private: 192*38fd1498Szrj const char *m_goal; 193*38fd1498Szrj size_t m_goal_len; 194*38fd1498Szrj candidate_t m_best_candidate; 195*38fd1498Szrj edit_distance_t m_best_distance; 196*38fd1498Szrj size_t m_best_candidate_len; 197*38fd1498Szrj }; 198*38fd1498Szrj 199*38fd1498Szrj #endif /* GCC_SPELLCHECK_H */ 200