111
|
1 /* Find near-matches for strings and identifiers.
|
131
|
2 Copyright (C) 2015-2018 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #ifndef GCC_SPELLCHECK_H
|
|
21 #define GCC_SPELLCHECK_H
|
|
22
|
|
23 typedef unsigned int edit_distance_t;
|
|
24 const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;
|
|
25
|
|
26 /* spellcheck.c */
|
|
27 extern edit_distance_t
|
131
|
28 get_edit_distance (const char *s, int len_s,
|
|
29 const char *t, int len_t);
|
111
|
30
|
|
31 extern edit_distance_t
|
131
|
32 get_edit_distance (const char *s, const char *t);
|
111
|
33
|
|
34 extern const char *
|
|
35 find_closest_string (const char *target,
|
|
36 const auto_vec<const char *> *candidates);
|
|
37
|
|
38 /* A traits class for describing a string-like type usable by
|
|
39 class best_match.
|
|
40 Specializations should provide the implementations of the following:
|
|
41
|
|
42 static size_t get_length (TYPE);
|
|
43 static const char *get_string (TYPE);
|
|
44
|
|
45 get_string should return a non-NULL ptr, which does not need to be
|
|
46 0-terminated. */
|
|
47
|
|
48 template <typename TYPE>
|
|
49 struct edit_distance_traits {};
|
|
50
|
|
51 /* Specialization of edit_distance_traits for C-style strings. */
|
|
52
|
|
53 template <>
|
|
54 struct edit_distance_traits<const char *>
|
|
55 {
|
|
56 static size_t get_length (const char *str)
|
|
57 {
|
|
58 gcc_assert (str);
|
|
59 return strlen (str);
|
|
60 }
|
|
61
|
|
62 static const char *get_string (const char *str)
|
|
63 {
|
|
64 gcc_assert (str);
|
|
65 return str;
|
|
66 }
|
|
67 };
|
|
68
|
131
|
69 extern edit_distance_t get_edit_distance_cutoff (size_t goal_len,
|
|
70 size_t candidate_len);
|
|
71
|
111
|
72 /* A type for use when determining the best match against a string,
|
|
73 expressed as a template so that we can match against various
|
|
74 string-like types (const char *, frontend identifiers, and preprocessor
|
|
75 macros).
|
|
76
|
|
77 This type accumulates the best possible match against GOAL_TYPE for
|
|
78 a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
|
131
|
79 number of calls to get_edit_distance and to
|
111
|
80 edit_distance_traits<T>::get_length. */
|
|
81
|
|
82 template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
|
|
83 class best_match
|
|
84 {
|
|
85 public:
|
|
86 typedef GOAL_TYPE goal_t;
|
|
87 typedef CANDIDATE_TYPE candidate_t;
|
|
88 typedef edit_distance_traits<goal_t> goal_traits;
|
|
89 typedef edit_distance_traits<candidate_t> candidate_traits;
|
|
90
|
|
91 /* Constructor. */
|
|
92
|
|
93 best_match (GOAL_TYPE goal,
|
|
94 edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
|
|
95 : m_goal (goal_traits::get_string (goal)),
|
|
96 m_goal_len (goal_traits::get_length (goal)),
|
|
97 m_best_candidate (NULL),
|
|
98 m_best_distance (best_distance_so_far)
|
|
99 {}
|
|
100
|
|
101 /* Compare the edit distance between CANDIDATE and m_goal,
|
|
102 and if it's the best so far, record it. */
|
|
103
|
|
104 void consider (candidate_t candidate)
|
|
105 {
|
|
106 size_t candidate_len = candidate_traits::get_length (candidate);
|
|
107
|
|
108 /* Calculate a lower bound on the candidate's distance to the goal,
|
|
109 based on the difference in lengths; it will require at least
|
|
110 this many insertions/deletions. */
|
|
111 edit_distance_t min_candidate_distance
|
|
112 = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);
|
|
113
|
|
114 /* If the candidate's length is sufficiently different to that
|
|
115 of the goal string, then the number of insertions/deletions
|
|
116 may be >= the best distance so far. If so, we can reject
|
|
117 the candidate immediately without needing to compute
|
|
118 the exact distance, since it won't be an improvement. */
|
|
119 if (min_candidate_distance >= m_best_distance)
|
|
120 return;
|
|
121
|
|
122 /* If the candidate will be unable to beat the criterion in
|
|
123 get_best_meaningful_candidate, reject it without computing
|
|
124 the exact distance. */
|
131
|
125 edit_distance_t cutoff = get_cutoff (candidate_len);
|
111
|
126 if (min_candidate_distance > cutoff)
|
|
127 return;
|
|
128
|
|
129 /* Otherwise, compute the distance and see if the candidate
|
|
130 has beaten the previous best value. */
|
|
131 edit_distance_t dist
|
131
|
132 = get_edit_distance (m_goal, m_goal_len,
|
|
133 candidate_traits::get_string (candidate),
|
|
134 candidate_len);
|
111
|
135 if (dist < m_best_distance)
|
|
136 {
|
|
137 m_best_distance = dist;
|
|
138 m_best_candidate = candidate;
|
|
139 m_best_candidate_len = candidate_len;
|
|
140 }
|
|
141 }
|
|
142
|
|
143 /* Assuming that BEST_CANDIDATE is known to be better than
|
|
144 m_best_candidate, update (without recomputing the edit distance to
|
|
145 the goal). */
|
|
146
|
|
147 void set_best_so_far (CANDIDATE_TYPE best_candidate,
|
|
148 edit_distance_t best_distance,
|
|
149 size_t best_candidate_len)
|
|
150 {
|
|
151 gcc_assert (best_distance < m_best_distance);
|
|
152 m_best_candidate = best_candidate;
|
|
153 m_best_distance = best_distance;
|
|
154 m_best_candidate_len = best_candidate_len;
|
|
155 }
|
|
156
|
131
|
157 /* Generate the maximum edit distance for which we consider a suggestion
|
|
158 to be meaningful, given a candidate of length CANDIDATE_LEN. */
|
|
159
|
|
160 edit_distance_t get_cutoff (size_t candidate_len) const
|
|
161 {
|
|
162 return ::get_edit_distance_cutoff (m_goal_len, candidate_len);
|
|
163 }
|
|
164
|
111
|
165 /* Get the best candidate so far, but applying a filter to ensure
|
|
166 that we return NULL if none of the candidates are close to the goal,
|
|
167 to avoid offering nonsensical suggestions to the user. */
|
|
168
|
|
169 candidate_t get_best_meaningful_candidate () const
|
|
170 {
|
131
|
171 /* If the edit distance is too high, the suggestion is likely to be
|
|
172 meaningless. */
|
111
|
173 if (m_best_candidate)
|
|
174 {
|
131
|
175 edit_distance_t cutoff = get_cutoff (m_best_candidate_len);
|
111
|
176 if (m_best_distance > cutoff)
|
|
177 return NULL;
|
|
178 }
|
|
179
|
|
180 /* If the goal string somehow makes it into the candidate list, offering
|
|
181 it as a suggestion will be nonsensical e.g.
|
|
182 'constexpr' does not name a type; did you mean 'constexpr'?
|
|
183 Ultimately such suggestions are due to bugs in constructing the
|
|
184 candidate list, but as a band-aid, do not offer suggestions for
|
|
185 distance == 0 (where candidate == goal). */
|
|
186 if (m_best_distance == 0)
|
|
187 return NULL;
|
|
188
|
|
189 return m_best_candidate;
|
|
190 }
|
|
191
|
131
|
192 /* Get the closest candidate so far, without applying any filtering. */
|
|
193
|
|
194 candidate_t blithely_get_best_candidate () const
|
|
195 {
|
|
196 return m_best_candidate;
|
|
197 }
|
|
198
|
111
|
199 edit_distance_t get_best_distance () const { return m_best_distance; }
|
|
200 size_t get_best_candidate_length () const { return m_best_candidate_len; }
|
|
201
|
|
202 private:
|
|
203 const char *m_goal;
|
|
204 size_t m_goal_len;
|
|
205 candidate_t m_best_candidate;
|
|
206 edit_distance_t m_best_distance;
|
|
207 size_t m_best_candidate_len;
|
|
208 };
|
|
209
|
|
210 #endif /* GCC_SPELLCHECK_H */
|