annotate gcc/spellcheck.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 /* Find near-matches for strings.
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
2 Copyright (C) 2015-2018 Free Software Foundation, Inc.
111
kono
parents:
diff changeset
3
kono
parents:
diff changeset
4 This file is part of GCC.
kono
parents:
diff changeset
5
kono
parents:
diff changeset
6 GCC is free software; you can redistribute it and/or modify it under
kono
parents:
diff changeset
7 the terms of the GNU General Public License as published by the Free
kono
parents:
diff changeset
8 Software Foundation; either version 3, or (at your option) any later
kono
parents:
diff changeset
9 version.
kono
parents:
diff changeset
10
kono
parents:
diff changeset
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
kono
parents:
diff changeset
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
kono
parents:
diff changeset
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
kono
parents:
diff changeset
14 for more details.
kono
parents:
diff changeset
15
kono
parents:
diff changeset
16 You should have received a copy of the GNU General Public License
kono
parents:
diff changeset
17 along with GCC; see the file COPYING3. If not see
kono
parents:
diff changeset
18 <http://www.gnu.org/licenses/>. */
kono
parents:
diff changeset
19
kono
parents:
diff changeset
20 #include "config.h"
kono
parents:
diff changeset
21 #include "system.h"
kono
parents:
diff changeset
22 #include "coretypes.h"
kono
parents:
diff changeset
23 #include "tm.h"
kono
parents:
diff changeset
24 #include "tree.h"
kono
parents:
diff changeset
25 #include "spellcheck.h"
kono
parents:
diff changeset
26 #include "selftest.h"
kono
parents:
diff changeset
27
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
28 /* Get the edit distance between the two strings: the minimal
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
29 number of edits that are needed to change one string into another,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
30 where edits can be one-character insertions, removals, or substitutions,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
31 or transpositions of two adjacent characters (counting as one "edit").
111
kono
parents:
diff changeset
32
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
33 This implementation uses the Wagner-Fischer algorithm for the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
34 Damerau-Levenshtein distance; specifically, the "optimal string alignment
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
35 distance" or "restricted edit distance" variant. */
111
kono
parents:
diff changeset
36
kono
parents:
diff changeset
37 edit_distance_t
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
38 get_edit_distance (const char *s, int len_s,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
39 const char *t, int len_t)
111
kono
parents:
diff changeset
40 {
kono
parents:
diff changeset
41 const bool debug = false;
kono
parents:
diff changeset
42
kono
parents:
diff changeset
43 if (debug)
kono
parents:
diff changeset
44 {
kono
parents:
diff changeset
45 printf ("s: \"%s\" (len_s=%i)\n", s, len_s);
kono
parents:
diff changeset
46 printf ("t: \"%s\" (len_t=%i)\n", t, len_t);
kono
parents:
diff changeset
47 }
kono
parents:
diff changeset
48
kono
parents:
diff changeset
49 if (len_s == 0)
kono
parents:
diff changeset
50 return len_t;
kono
parents:
diff changeset
51 if (len_t == 0)
kono
parents:
diff changeset
52 return len_s;
kono
parents:
diff changeset
53
kono
parents:
diff changeset
54 /* We effectively build a matrix where each (i, j) contains the
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
55 distance between the prefix strings s[0:j] and t[0:i].
111
kono
parents:
diff changeset
56 Rather than actually build an (len_t + 1) * (len_s + 1) matrix,
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
57 we simply keep track of the last two rows, v_one_ago and v_two_ago,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
58 and a new row, v_next, which avoids an (len_t + 1) * (len_s + 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
59 allocation and memory accesses in favor of three (len_s + 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
60 allocations. These could potentially be
111
kono
parents:
diff changeset
61 statically-allocated if we impose a maximum length on the
kono
parents:
diff changeset
62 strings of interest. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
63 edit_distance_t *v_two_ago = new edit_distance_t[len_s + 1];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
64 edit_distance_t *v_one_ago = new edit_distance_t[len_s + 1];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
65 edit_distance_t *v_next = new edit_distance_t[len_s + 1];
111
kono
parents:
diff changeset
66
kono
parents:
diff changeset
67 /* The first row is for the case of an empty target string, which
kono
parents:
diff changeset
68 we can reach by deleting every character in the source string. */
kono
parents:
diff changeset
69 for (int i = 0; i < len_s + 1; i++)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
70 v_one_ago[i] = i;
111
kono
parents:
diff changeset
71
kono
parents:
diff changeset
72 /* Build successive rows. */
kono
parents:
diff changeset
73 for (int i = 0; i < len_t; i++)
kono
parents:
diff changeset
74 {
kono
parents:
diff changeset
75 if (debug)
kono
parents:
diff changeset
76 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
77 printf ("i:%i v_one_ago = ", i);
111
kono
parents:
diff changeset
78 for (int j = 0; j < len_s + 1; j++)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
79 printf ("%i ", v_one_ago[j]);
111
kono
parents:
diff changeset
80 printf ("\n");
kono
parents:
diff changeset
81 }
kono
parents:
diff changeset
82
kono
parents:
diff changeset
83 /* The initial column is for the case of an empty source string; we
kono
parents:
diff changeset
84 can reach prefixes of the target string of length i
kono
parents:
diff changeset
85 by inserting i characters. */
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
86 v_next[0] = i + 1;
111
kono
parents:
diff changeset
87
kono
parents:
diff changeset
88 /* Build the rest of the row by considering neighbors to
kono
parents:
diff changeset
89 the north, west and northwest. */
kono
parents:
diff changeset
90 for (int j = 0; j < len_s; j++)
kono
parents:
diff changeset
91 {
kono
parents:
diff changeset
92 edit_distance_t cost = (s[j] == t[i] ? 0 : 1);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
93 edit_distance_t deletion = v_next[j] + 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
94 edit_distance_t insertion = v_one_ago[j + 1] + 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
95 edit_distance_t substitution = v_one_ago[j] + cost;
111
kono
parents:
diff changeset
96 edit_distance_t cheapest = MIN (deletion, insertion);
kono
parents:
diff changeset
97 cheapest = MIN (cheapest, substitution);
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
98 if (i > 0 && j > 0 && s[j] == t[i - 1] && s[j - 1] == t[i])
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
99 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
100 edit_distance_t transposition = v_two_ago[j - 1] + 1;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
101 cheapest = MIN (cheapest, transposition);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
102 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
103 v_next[j + 1] = cheapest;
111
kono
parents:
diff changeset
104 }
kono
parents:
diff changeset
105
kono
parents:
diff changeset
106 /* Prepare to move on to next row. */
kono
parents:
diff changeset
107 for (int j = 0; j < len_s + 1; j++)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
108 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
109 v_two_ago[j] = v_one_ago[j];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
110 v_one_ago[j] = v_next[j];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
111 }
111
kono
parents:
diff changeset
112 }
kono
parents:
diff changeset
113
kono
parents:
diff changeset
114 if (debug)
kono
parents:
diff changeset
115 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
116 printf ("final v_next = ");
111
kono
parents:
diff changeset
117 for (int j = 0; j < len_s + 1; j++)
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
118 printf ("%i ", v_next[j]);
111
kono
parents:
diff changeset
119 printf ("\n");
kono
parents:
diff changeset
120 }
kono
parents:
diff changeset
121
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
122 edit_distance_t result = v_next[len_s];
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
123 delete[] v_two_ago;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
124 delete[] v_one_ago;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
125 delete[] v_next;
111
kono
parents:
diff changeset
126 return result;
kono
parents:
diff changeset
127 }
kono
parents:
diff changeset
128
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
129 /* Get the edit distance between two nil-terminated strings. */
111
kono
parents:
diff changeset
130
kono
parents:
diff changeset
131 edit_distance_t
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
132 get_edit_distance (const char *s, const char *t)
111
kono
parents:
diff changeset
133 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
134 return get_edit_distance (s, strlen (s), t, strlen (t));
111
kono
parents:
diff changeset
135 }
kono
parents:
diff changeset
136
kono
parents:
diff changeset
137 /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to
kono
parents:
diff changeset
138 an autovec of non-NULL strings, determine which element within
kono
parents:
diff changeset
139 CANDIDATES has the lowest edit distance to TARGET. If there are
kono
parents:
diff changeset
140 multiple elements with the same minimal distance, the first in the
kono
parents:
diff changeset
141 vector wins.
kono
parents:
diff changeset
142
kono
parents:
diff changeset
143 If more than half of the letters were misspelled, the suggestion is
kono
parents:
diff changeset
144 likely to be meaningless, so return NULL for this case. */
kono
parents:
diff changeset
145
kono
parents:
diff changeset
146 const char *
kono
parents:
diff changeset
147 find_closest_string (const char *target,
kono
parents:
diff changeset
148 const auto_vec<const char *> *candidates)
kono
parents:
diff changeset
149 {
kono
parents:
diff changeset
150 gcc_assert (target);
kono
parents:
diff changeset
151 gcc_assert (candidates);
kono
parents:
diff changeset
152
kono
parents:
diff changeset
153 int i;
kono
parents:
diff changeset
154 const char *candidate;
kono
parents:
diff changeset
155 best_match<const char *, const char *> bm (target);
kono
parents:
diff changeset
156 FOR_EACH_VEC_ELT (*candidates, i, candidate)
kono
parents:
diff changeset
157 {
kono
parents:
diff changeset
158 gcc_assert (candidate);
kono
parents:
diff changeset
159 bm.consider (candidate);
kono
parents:
diff changeset
160 }
kono
parents:
diff changeset
161
kono
parents:
diff changeset
162 return bm.get_best_meaningful_candidate ();
kono
parents:
diff changeset
163 }
kono
parents:
diff changeset
164
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
165 /* Generate the maximum edit distance for which we consider a suggestion
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
166 to be meaningful, given a goal of length GOAL_LEN and a candidate of
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
167 length CANDIDATE_LEN.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
168
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
169 This is a third of the the length of the candidate or of the goal,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
170 whichever is bigger. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
171
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
172 edit_distance_t
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
173 get_edit_distance_cutoff (size_t goal_len, size_t candidate_len)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
174 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
175 size_t max_length = MAX (goal_len, candidate_len);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
176 size_t min_length = MIN (goal_len, candidate_len);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
177
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
178 gcc_assert (max_length >= min_length);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
179
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
180 /* Special case: don't offer suggestions for a pair of
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
181 length == 1 strings (or empty strings). */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
182 if (max_length <= 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
183 return 0;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
184
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
185 /* If the lengths are close, then round down. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
186 if (max_length - min_length <= 1)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
187 /* ...but allow an edit distance of at least 1. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
188 return MAX (max_length / 3, 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
189
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
190 /* Otherwise, round up (thus giving a little extra leeway to some cases
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
191 involving insertions/deletions). */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
192 return (max_length + 2) / 3;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
193 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
194
111
kono
parents:
diff changeset
195 #if CHECKING_P
kono
parents:
diff changeset
196
kono
parents:
diff changeset
197 namespace selftest {
kono
parents:
diff changeset
198
kono
parents:
diff changeset
199 /* Selftests. */
kono
parents:
diff changeset
200
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
201 /* Verify that get_edit_distance (A, B) equals the expected value. */
111
kono
parents:
diff changeset
202
kono
parents:
diff changeset
203 static void
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
204 test_get_edit_distance_one_way (const char *a, const char *b,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
205 edit_distance_t expected)
111
kono
parents:
diff changeset
206 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
207 edit_distance_t actual = get_edit_distance (a, b);
111
kono
parents:
diff changeset
208 ASSERT_EQ (actual, expected);
kono
parents:
diff changeset
209 }
kono
parents:
diff changeset
210
kono
parents:
diff changeset
211 /* Verify that both
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
212 get_edit_distance (A, B)
111
kono
parents:
diff changeset
213 and
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
214 get_edit_distance (B, A)
111
kono
parents:
diff changeset
215 equal the expected value, to ensure that the function is symmetric. */
kono
parents:
diff changeset
216
kono
parents:
diff changeset
217 static void
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
218 test_get_edit_distance_both_ways (const char *a, const char *b,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
219 edit_distance_t expected)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
220 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
221 test_get_edit_distance_one_way (a, b, expected);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
222 test_get_edit_distance_one_way (b, a, expected);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
223 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
224
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
225 /* Verify get_edit_distance for a variety of pairs of pre-canned
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
226 inputs, comparing against known-good values. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
227
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
228 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
229 test_edit_distances ()
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
230 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
231 test_get_edit_distance_both_ways ("", "nonempty", strlen ("nonempty"));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
232 test_get_edit_distance_both_ways ("saturday", "sunday", 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
233 test_get_edit_distance_both_ways ("foo", "m_foo", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
234 test_get_edit_distance_both_ways ("hello_world", "HelloWorld", 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
235 test_get_edit_distance_both_ways
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
236 ("the quick brown fox jumps over the lazy dog", "dog", 40);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
237 test_get_edit_distance_both_ways
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
238 ("the quick brown fox jumps over the lazy dog",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
239 "the quick brown dog jumps over the lazy fox",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
240 4);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
241 test_get_edit_distance_both_ways
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
242 ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
243 "All your base are belong to us",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
244 44);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
245 test_get_edit_distance_both_ways ("foo", "FOO", 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
246 test_get_edit_distance_both_ways ("fee", "deed", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
247 test_get_edit_distance_both_ways ("coorzd1", "coordx1", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
248 test_get_edit_distance_both_ways ("assert", "sqrt", 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
249 test_get_edit_distance_both_ways ("PATH_MAX", "INT8_MAX", 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
250 test_get_edit_distance_both_ways ("time", "nice", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
251 test_get_edit_distance_both_ways ("bar", "carg", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
252 test_get_edit_distance_both_ways ("gtk_widget_show_all",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
253 "GtkWidgetShowAll", 7);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
254 test_get_edit_distance_both_ways ("m_bar", "bar", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
255 test_get_edit_distance_both_ways ("MACRO", "MACRAME", 3);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
256 test_get_edit_distance_both_ways ("ab", "ac", 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
257 test_get_edit_distance_both_ways ("ab", "a", 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
258 test_get_edit_distance_both_ways ("a", "b", 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
259 test_get_edit_distance_both_ways ("nanl", "name", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
260 test_get_edit_distance_both_ways ("char", "bar", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
261 test_get_edit_distance_both_ways ("-optimize", "fsanitize", 5);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
262 test_get_edit_distance_both_ways ("__DATE__", "__i386__", 4);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
263
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
264 /* Examples where transposition helps. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
265 test_get_edit_distance_both_ways ("ab", "ba", 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
266 test_get_edit_distance_both_ways ("ba", "abc", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
267 test_get_edit_distance_both_ways ("coorzd1", "coordz1", 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
268 test_get_edit_distance_both_ways ("abcdefghijklmnopqrstuvwxyz",
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
269 "bacdefghijklmnopqrstuvwxzy", 2);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
270 test_get_edit_distance_both_ways ("saturday", "sundya", 4);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
271 test_get_edit_distance_both_ways ("signed", "singed", 1);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
272 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
273
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
274 /* Subroutine of test_get_edit_distance_cutoff, for emulating the
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
275 spellchecking cutoff in up to GCC 8. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
276
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
277 static edit_distance_t
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
278 get_old_cutoff (size_t goal_len, size_t candidate_len)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
279 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
280 return MAX (goal_len, candidate_len) / 2;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
281 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
282
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
283 /* Verify that the cutoff for "meaningfulness" of suggestions is at least as
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
284 conservative as in older GCC releases.
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
285
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
286 This should ensure that we don't offer additional meaningless
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
287 suggestions (apart from those for which transposition has helped). */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
288
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
289 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
290 test_get_edit_distance_cutoff ()
111
kono
parents:
diff changeset
291 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
292 for (size_t goal_len = 0; goal_len < 30; goal_len++)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
293 for (size_t candidate_len = 0; candidate_len < 30; candidate_len++)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
294 ASSERT_TRUE (get_edit_distance_cutoff (goal_len, candidate_len)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
295 <= get_old_cutoff (goal_len, candidate_len));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
296 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
297
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
298 /* Assert that CANDIDATE is offered as a suggestion for TARGET. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
299
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
300 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
301 assert_suggested_for (const location &loc, const char *candidate,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
302 const char *target)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
303 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
304 auto_vec<const char *> candidates;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
305 candidates.safe_push (candidate);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
306 ASSERT_EQ_AT (loc, candidate, find_closest_string (target, &candidates));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
307 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
308
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
309 /* Assert that CANDIDATE is offered as a suggestion for TARGET. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
310
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
311 #define ASSERT_SUGGESTED_FOR(CANDIDATE, TARGET) \
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
312 SELFTEST_BEGIN_STMT \
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
313 assert_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
314 SELFTEST_END_STMT
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
315
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
316 /* Assert that CANDIDATE is not offered as a suggestion for TARGET. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
317
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
318 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
319 assert_not_suggested_for (const location &loc, const char *candidate,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
320 const char *target)
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
321 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
322 auto_vec<const char *> candidates;
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
323 candidates.safe_push (candidate);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
324 ASSERT_EQ_AT (loc, NULL, find_closest_string (target, &candidates));
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
325 }
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
326
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
327 /* Assert that CANDIDATE is not offered as a suggestion for TARGET. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
328
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
329 #define ASSERT_NOT_SUGGESTED_FOR(CANDIDATE, TARGET) \
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
330 SELFTEST_BEGIN_STMT \
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
331 assert_not_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
332 SELFTEST_END_STMT
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
333
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
334 /* Verify that we offer varous suggestions that are meaningful,
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
335 and that we don't offer various other ones that aren't (PR c/82967). */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
336
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
337 static void
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
338 test_suggestions ()
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
339 {
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
340 /* Good suggestions. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
341
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
342 ASSERT_SUGGESTED_FOR ("m_bar", "bar");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
343 // dist == 2, max_length == 5, min_length == 3
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
344
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
345 ASSERT_SUGGESTED_FOR ("MACRO", "MACRAME");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
346 // dist == 3, max_length == 7, min_length == 5
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
347
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
348 ASSERT_SUGGESTED_FOR ("gtk_widget_show_all", "GtkWidgetShowAll");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
349 // dist == 7, max_length == 16, min_length = 19
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
350
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
351 ASSERT_SUGGESTED_FOR ("ab", "ac");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
352 // dist == 1, max_length == min_length = 2
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
353
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
354 ASSERT_SUGGESTED_FOR ("ab", "a");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
355 // dist == 1, max_length == 2, min_length = 1
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
356
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
357 /* Bad suggestions. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
358
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
359 ASSERT_NOT_SUGGESTED_FOR ("a", "b");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
360 // dist == 1, max_length == min_length = 1
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
361
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
362 ASSERT_NOT_SUGGESTED_FOR ("sqrt", "assert");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
363 // dist == 3, max_length 6, min_length == 4
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
364
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
365 ASSERT_NOT_SUGGESTED_FOR ("INT8_MAX", "PATH_MAX");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
366 // dist == 3, max_length == min_length == 8
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
367
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
368 ASSERT_NOT_SUGGESTED_FOR ("nice", "time");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
369 ASSERT_NOT_SUGGESTED_FOR ("nanl", "name");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
370 // dist == 2, max_length == min_length == 4
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
371
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
372 ASSERT_NOT_SUGGESTED_FOR ("carg", "bar");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
373 ASSERT_NOT_SUGGESTED_FOR ("char", "bar");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
374 // dist == 2, max_length == 4, min_length == 3
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
375
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
376 ASSERT_NOT_SUGGESTED_FOR ("-optimize", "fsanitize");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
377 // dist == 5, max_length == min_length == 9
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
378
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
379 ASSERT_NOT_SUGGESTED_FOR ("__DATE__", "__i386__");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
380 // dist == 4, max_length == min_length == 8
111
kono
parents:
diff changeset
381 }
kono
parents:
diff changeset
382
kono
parents:
diff changeset
383 /* Verify that find_closest_string is sane. */
kono
parents:
diff changeset
384
kono
parents:
diff changeset
385 static void
kono
parents:
diff changeset
386 test_find_closest_string ()
kono
parents:
diff changeset
387 {
kono
parents:
diff changeset
388 auto_vec<const char *> candidates;
kono
parents:
diff changeset
389
kono
parents:
diff changeset
390 /* Verify that it can handle an empty vec. */
kono
parents:
diff changeset
391 ASSERT_EQ (NULL, find_closest_string ("", &candidates));
kono
parents:
diff changeset
392
kono
parents:
diff changeset
393 /* Verify that it works sanely for non-empty vecs. */
kono
parents:
diff changeset
394 candidates.safe_push ("apple");
kono
parents:
diff changeset
395 candidates.safe_push ("banana");
kono
parents:
diff changeset
396 candidates.safe_push ("cherry");
kono
parents:
diff changeset
397
kono
parents:
diff changeset
398 ASSERT_STREQ ("apple", find_closest_string ("app", &candidates));
kono
parents:
diff changeset
399 ASSERT_STREQ ("banana", find_closest_string ("banyan", &candidates));
kono
parents:
diff changeset
400 ASSERT_STREQ ("cherry", find_closest_string ("berry", &candidates));
kono
parents:
diff changeset
401 ASSERT_EQ (NULL, find_closest_string ("not like the others", &candidates));
kono
parents:
diff changeset
402
kono
parents:
diff changeset
403 /* The order of the vec can matter, but it should not matter for these
kono
parents:
diff changeset
404 inputs. */
kono
parents:
diff changeset
405 candidates.truncate (0);
kono
parents:
diff changeset
406 candidates.safe_push ("cherry");
kono
parents:
diff changeset
407 candidates.safe_push ("banana");
kono
parents:
diff changeset
408 candidates.safe_push ("apple");
kono
parents:
diff changeset
409 ASSERT_STREQ ("apple", find_closest_string ("app", &candidates));
kono
parents:
diff changeset
410 ASSERT_STREQ ("banana", find_closest_string ("banyan", &candidates));
kono
parents:
diff changeset
411 ASSERT_STREQ ("cherry", find_closest_string ("berry", &candidates));
kono
parents:
diff changeset
412 ASSERT_EQ (NULL, find_closest_string ("not like the others", &candidates));
kono
parents:
diff changeset
413
kono
parents:
diff changeset
414 /* If the goal string somehow makes it into the candidate list, offering
kono
parents:
diff changeset
415 it as a suggestion will be nonsensical. Verify that we don't offer such
kono
parents:
diff changeset
416 suggestions. */
kono
parents:
diff changeset
417 ASSERT_EQ (NULL, find_closest_string ("banana", &candidates));
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
418
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
419 /* Example from PR 69968 where transposition helps. */
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
420 candidates.truncate (0);
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
421 candidates.safe_push("coordx");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
422 candidates.safe_push("coordy");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
423 candidates.safe_push("coordz");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
424 candidates.safe_push("coordx1");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
425 candidates.safe_push("coordy1");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
426 candidates.safe_push("coordz1");
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
427 ASSERT_STREQ ("coordz1", find_closest_string ("coorzd1", &candidates));
111
kono
parents:
diff changeset
428 }
kono
parents:
diff changeset
429
kono
parents:
diff changeset
430 /* Test data for test_metric_conditions. */
kono
parents:
diff changeset
431
kono
parents:
diff changeset
432 static const char * const test_data[] = {
kono
parents:
diff changeset
433 "",
kono
parents:
diff changeset
434 "foo",
kono
parents:
diff changeset
435 "food",
kono
parents:
diff changeset
436 "boo",
kono
parents:
diff changeset
437 "1234567890123456789012345678901234567890123456789012345678901234567890"
kono
parents:
diff changeset
438 };
kono
parents:
diff changeset
439
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
440 /* Verify that get_edit_distance appears to be a sane distance function,
111
kono
parents:
diff changeset
441 i.e. the conditions for being a metric. This is done directly for a
kono
parents:
diff changeset
442 small set of examples, using test_data above. This is O(N^3) in the size
kono
parents:
diff changeset
443 of the array, due to the test for the triangle inequality, so we keep the
kono
parents:
diff changeset
444 array small. */
kono
parents:
diff changeset
445
kono
parents:
diff changeset
446 static void
kono
parents:
diff changeset
447 test_metric_conditions ()
kono
parents:
diff changeset
448 {
kono
parents:
diff changeset
449 const int num_test_cases = sizeof (test_data) / sizeof (test_data[0]);
kono
parents:
diff changeset
450
kono
parents:
diff changeset
451 for (int i = 0; i < num_test_cases; i++)
kono
parents:
diff changeset
452 {
kono
parents:
diff changeset
453 for (int j = 0; j < num_test_cases; j++)
kono
parents:
diff changeset
454 {
kono
parents:
diff changeset
455 edit_distance_t dist_ij
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
456 = get_edit_distance (test_data[i], test_data[j]);
111
kono
parents:
diff changeset
457
kono
parents:
diff changeset
458 /* Identity of indiscernibles: d(i, j) > 0 iff i == j. */
kono
parents:
diff changeset
459 if (i == j)
kono
parents:
diff changeset
460 ASSERT_EQ (dist_ij, 0);
kono
parents:
diff changeset
461 else
kono
parents:
diff changeset
462 ASSERT_TRUE (dist_ij > 0);
kono
parents:
diff changeset
463
kono
parents:
diff changeset
464 /* Symmetry: d(i, j) == d(j, i). */
kono
parents:
diff changeset
465 edit_distance_t dist_ji
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
466 = get_edit_distance (test_data[j], test_data[i]);
111
kono
parents:
diff changeset
467 ASSERT_EQ (dist_ij, dist_ji);
kono
parents:
diff changeset
468
kono
parents:
diff changeset
469 /* Triangle inequality. */
kono
parents:
diff changeset
470 for (int k = 0; k < num_test_cases; k++)
kono
parents:
diff changeset
471 {
kono
parents:
diff changeset
472 edit_distance_t dist_ik
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
473 = get_edit_distance (test_data[i], test_data[k]);
111
kono
parents:
diff changeset
474 edit_distance_t dist_jk
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
475 = get_edit_distance (test_data[j], test_data[k]);
111
kono
parents:
diff changeset
476 ASSERT_TRUE (dist_ik <= dist_ij + dist_jk);
kono
parents:
diff changeset
477 }
kono
parents:
diff changeset
478 }
kono
parents:
diff changeset
479 }
kono
parents:
diff changeset
480 }
kono
parents:
diff changeset
481
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
482 /* Run all of the selftests within this file. */
111
kono
parents:
diff changeset
483
kono
parents:
diff changeset
484 void
kono
parents:
diff changeset
485 spellcheck_c_tests ()
kono
parents:
diff changeset
486 {
131
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
487 test_edit_distances ();
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
488 test_get_edit_distance_cutoff ();
84e7813d76e9 gcc-8.2
mir3636
parents: 111
diff changeset
489 test_suggestions ();
111
kono
parents:
diff changeset
490 test_find_closest_string ();
kono
parents:
diff changeset
491 test_metric_conditions ();
kono
parents:
diff changeset
492 }
kono
parents:
diff changeset
493
kono
parents:
diff changeset
494 } // namespace selftest
kono
parents:
diff changeset
495
kono
parents:
diff changeset
496 #endif /* #if CHECKING_P */