Mercurial > hg > CbC > CbC_gcc
comparison gcc/spellcheck.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 /* Find near-matches for strings. | 1 /* Find near-matches for strings. |
2 Copyright (C) 2015-2017 Free Software Foundation, Inc. | 2 Copyright (C) 2015-2018 Free Software Foundation, Inc. |
3 | 3 |
4 This file is part of GCC. | 4 This file is part of GCC. |
5 | 5 |
6 GCC is free software; you can redistribute it and/or modify it under | 6 GCC is free software; you can redistribute it and/or modify it under |
7 the terms of the GNU General Public License as published by the Free | 7 the terms of the GNU General Public License as published by the Free |
23 #include "tm.h" | 23 #include "tm.h" |
24 #include "tree.h" | 24 #include "tree.h" |
25 #include "spellcheck.h" | 25 #include "spellcheck.h" |
26 #include "selftest.h" | 26 #include "selftest.h" |
27 | 27 |
28 /* The Levenshtein distance is an "edit-distance": the minimal | 28 /* Get the edit distance between the two strings: the minimal |
29 number of one-character insertions, removals or substitutions | 29 number of edits that are needed to change one string into another, |
30 that are needed to change one string into another. | 30 where edits can be one-character insertions, removals, or substitutions, |
31 | 31 or transpositions of two adjacent characters (counting as one "edit"). |
32 This implementation uses the Wagner-Fischer algorithm. */ | 32 |
33 This implementation uses the Wagner-Fischer algorithm for the | |
34 Damerau-Levenshtein distance; specifically, the "optimal string alignment | |
35 distance" or "restricted edit distance" variant. */ | |
33 | 36 |
34 edit_distance_t | 37 edit_distance_t |
35 levenshtein_distance (const char *s, int len_s, | 38 get_edit_distance (const char *s, int len_s, |
36 const char *t, int len_t) | 39 const char *t, int len_t) |
37 { | 40 { |
38 const bool debug = false; | 41 const bool debug = false; |
39 | 42 |
40 if (debug) | 43 if (debug) |
41 { | 44 { |
47 return len_t; | 50 return len_t; |
48 if (len_t == 0) | 51 if (len_t == 0) |
49 return len_s; | 52 return len_s; |
50 | 53 |
51 /* We effectively build a matrix where each (i, j) contains the | 54 /* We effectively build a matrix where each (i, j) contains the |
52 Levenshtein distance between the prefix strings s[0:j] | 55 distance between the prefix strings s[0:j] and t[0:i]. |
53 and t[0:i]. | |
54 Rather than actually build an (len_t + 1) * (len_s + 1) matrix, | 56 Rather than actually build an (len_t + 1) * (len_s + 1) matrix, |
55 we simply keep track of the last row, v0 and a new row, v1, | 57 we simply keep track of the last two rows, v_one_ago and v_two_ago, |
56 which avoids an (len_t + 1) * (len_s + 1) allocation and memory accesses | 58 and a new row, v_next, which avoids an (len_t + 1) * (len_s + 1) |
57 in favor of two (len_s + 1) allocations. These could potentially be | 59 allocation and memory accesses in favor of three (len_s + 1) |
60 allocations. These could potentially be | |
58 statically-allocated if we impose a maximum length on the | 61 statically-allocated if we impose a maximum length on the |
59 strings of interest. */ | 62 strings of interest. */ |
60 edit_distance_t *v0 = new edit_distance_t[len_s + 1]; | 63 edit_distance_t *v_two_ago = new edit_distance_t[len_s + 1]; |
61 edit_distance_t *v1 = new edit_distance_t[len_s + 1]; | 64 edit_distance_t *v_one_ago = new edit_distance_t[len_s + 1]; |
65 edit_distance_t *v_next = new edit_distance_t[len_s + 1]; | |
62 | 66 |
63 /* The first row is for the case of an empty target string, which | 67 /* The first row is for the case of an empty target string, which |
64 we can reach by deleting every character in the source string. */ | 68 we can reach by deleting every character in the source string. */ |
65 for (int i = 0; i < len_s + 1; i++) | 69 for (int i = 0; i < len_s + 1; i++) |
66 v0[i] = i; | 70 v_one_ago[i] = i; |
67 | 71 |
68 /* Build successive rows. */ | 72 /* Build successive rows. */ |
69 for (int i = 0; i < len_t; i++) | 73 for (int i = 0; i < len_t; i++) |
70 { | 74 { |
71 if (debug) | 75 if (debug) |
72 { | 76 { |
73 printf ("i:%i v0 = ", i); | 77 printf ("i:%i v_one_ago = ", i); |
74 for (int j = 0; j < len_s + 1; j++) | 78 for (int j = 0; j < len_s + 1; j++) |
75 printf ("%i ", v0[j]); | 79 printf ("%i ", v_one_ago[j]); |
76 printf ("\n"); | 80 printf ("\n"); |
77 } | 81 } |
78 | 82 |
79 /* The initial column is for the case of an empty source string; we | 83 /* The initial column is for the case of an empty source string; we |
80 can reach prefixes of the target string of length i | 84 can reach prefixes of the target string of length i |
81 by inserting i characters. */ | 85 by inserting i characters. */ |
82 v1[0] = i + 1; | 86 v_next[0] = i + 1; |
83 | 87 |
84 /* Build the rest of the row by considering neighbors to | 88 /* Build the rest of the row by considering neighbors to |
85 the north, west and northwest. */ | 89 the north, west and northwest. */ |
86 for (int j = 0; j < len_s; j++) | 90 for (int j = 0; j < len_s; j++) |
87 { | 91 { |
88 edit_distance_t cost = (s[j] == t[i] ? 0 : 1); | 92 edit_distance_t cost = (s[j] == t[i] ? 0 : 1); |
89 edit_distance_t deletion = v1[j] + 1; | 93 edit_distance_t deletion = v_next[j] + 1; |
90 edit_distance_t insertion = v0[j + 1] + 1; | 94 edit_distance_t insertion = v_one_ago[j + 1] + 1; |
91 edit_distance_t substitution = v0[j] + cost; | 95 edit_distance_t substitution = v_one_ago[j] + cost; |
92 edit_distance_t cheapest = MIN (deletion, insertion); | 96 edit_distance_t cheapest = MIN (deletion, insertion); |
93 cheapest = MIN (cheapest, substitution); | 97 cheapest = MIN (cheapest, substitution); |
94 v1[j + 1] = cheapest; | 98 if (i > 0 && j > 0 && s[j] == t[i - 1] && s[j - 1] == t[i]) |
99 { | |
100 edit_distance_t transposition = v_two_ago[j - 1] + 1; | |
101 cheapest = MIN (cheapest, transposition); | |
102 } | |
103 v_next[j + 1] = cheapest; | |
95 } | 104 } |
96 | 105 |
97 /* Prepare to move on to next row. */ | 106 /* Prepare to move on to next row. */ |
98 for (int j = 0; j < len_s + 1; j++) | 107 for (int j = 0; j < len_s + 1; j++) |
99 v0[j] = v1[j]; | 108 { |
109 v_two_ago[j] = v_one_ago[j]; | |
110 v_one_ago[j] = v_next[j]; | |
111 } | |
100 } | 112 } |
101 | 113 |
102 if (debug) | 114 if (debug) |
103 { | 115 { |
104 printf ("final v1 = "); | 116 printf ("final v_next = "); |
105 for (int j = 0; j < len_s + 1; j++) | 117 for (int j = 0; j < len_s + 1; j++) |
106 printf ("%i ", v1[j]); | 118 printf ("%i ", v_next[j]); |
107 printf ("\n"); | 119 printf ("\n"); |
108 } | 120 } |
109 | 121 |
110 edit_distance_t result = v1[len_s]; | 122 edit_distance_t result = v_next[len_s]; |
111 delete[] v0; | 123 delete[] v_two_ago; |
112 delete[] v1; | 124 delete[] v_one_ago; |
125 delete[] v_next; | |
113 return result; | 126 return result; |
114 } | 127 } |
115 | 128 |
116 /* Calculate Levenshtein distance between two nil-terminated strings. */ | 129 /* Get the edit distance between two nil-terminated strings. */ |
117 | 130 |
118 edit_distance_t | 131 edit_distance_t |
119 levenshtein_distance (const char *s, const char *t) | 132 get_edit_distance (const char *s, const char *t) |
120 { | 133 { |
121 return levenshtein_distance (s, strlen (s), t, strlen (t)); | 134 return get_edit_distance (s, strlen (s), t, strlen (t)); |
122 } | 135 } |
123 | 136 |
124 /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to | 137 /* Given TARGET, a non-NULL string, and CANDIDATES, a non-NULL ptr to |
125 an autovec of non-NULL strings, determine which element within | 138 an autovec of non-NULL strings, determine which element within |
126 CANDIDATES has the lowest edit distance to TARGET. If there are | 139 CANDIDATES has the lowest edit distance to TARGET. If there are |
147 } | 160 } |
148 | 161 |
149 return bm.get_best_meaningful_candidate (); | 162 return bm.get_best_meaningful_candidate (); |
150 } | 163 } |
151 | 164 |
165 /* Generate the maximum edit distance for which we consider a suggestion | |
166 to be meaningful, given a goal of length GOAL_LEN and a candidate of | |
167 length CANDIDATE_LEN. | |
168 | |
169 This is a third of the the length of the candidate or of the goal, | |
170 whichever is bigger. */ | |
171 | |
172 edit_distance_t | |
173 get_edit_distance_cutoff (size_t goal_len, size_t candidate_len) | |
174 { | |
175 size_t max_length = MAX (goal_len, candidate_len); | |
176 size_t min_length = MIN (goal_len, candidate_len); | |
177 | |
178 gcc_assert (max_length >= min_length); | |
179 | |
180 /* Special case: don't offer suggestions for a pair of | |
181 length == 1 strings (or empty strings). */ | |
182 if (max_length <= 1) | |
183 return 0; | |
184 | |
185 /* If the lengths are close, then round down. */ | |
186 if (max_length - min_length <= 1) | |
187 /* ...but allow an edit distance of at least 1. */ | |
188 return MAX (max_length / 3, 1); | |
189 | |
190 /* Otherwise, round up (thus giving a little extra leeway to some cases | |
191 involving insertions/deletions). */ | |
192 return (max_length + 2) / 3; | |
193 } | |
194 | |
152 #if CHECKING_P | 195 #if CHECKING_P |
153 | 196 |
154 namespace selftest { | 197 namespace selftest { |
155 | 198 |
156 /* Selftests. */ | 199 /* Selftests. */ |
157 | 200 |
158 /* Verify that the levenshtein_distance (A, B) equals the expected | 201 /* Verify that get_edit_distance (A, B) equals the expected value. */ |
159 value. */ | 202 |
160 | 203 static void |
161 static void | 204 test_get_edit_distance_one_way (const char *a, const char *b, |
162 levenshtein_distance_unit_test_oneway (const char *a, const char *b, | 205 edit_distance_t expected) |
163 edit_distance_t expected) | 206 { |
164 { | 207 edit_distance_t actual = get_edit_distance (a, b); |
165 edit_distance_t actual = levenshtein_distance (a, b); | |
166 ASSERT_EQ (actual, expected); | 208 ASSERT_EQ (actual, expected); |
167 } | 209 } |
168 | 210 |
169 /* Verify that both | 211 /* Verify that both |
170 levenshtein_distance (A, B) | 212 get_edit_distance (A, B) |
171 and | 213 and |
172 levenshtein_distance (B, A) | 214 get_edit_distance (B, A) |
173 equal the expected value, to ensure that the function is symmetric. */ | 215 equal the expected value, to ensure that the function is symmetric. */ |
174 | 216 |
175 static void | 217 static void |
176 levenshtein_distance_unit_test (const char *a, const char *b, | 218 test_get_edit_distance_both_ways (const char *a, const char *b, |
177 edit_distance_t expected) | 219 edit_distance_t expected) |
178 { | 220 { |
179 levenshtein_distance_unit_test_oneway (a, b, expected); | 221 test_get_edit_distance_one_way (a, b, expected); |
180 levenshtein_distance_unit_test_oneway (b, a, expected); | 222 test_get_edit_distance_one_way (b, a, expected); |
223 } | |
224 | |
225 /* Verify get_edit_distance for a variety of pairs of pre-canned | |
226 inputs, comparing against known-good values. */ | |
227 | |
228 static void | |
229 test_edit_distances () | |
230 { | |
231 test_get_edit_distance_both_ways ("", "nonempty", strlen ("nonempty")); | |
232 test_get_edit_distance_both_ways ("saturday", "sunday", 3); | |
233 test_get_edit_distance_both_ways ("foo", "m_foo", 2); | |
234 test_get_edit_distance_both_ways ("hello_world", "HelloWorld", 3); | |
235 test_get_edit_distance_both_ways | |
236 ("the quick brown fox jumps over the lazy dog", "dog", 40); | |
237 test_get_edit_distance_both_ways | |
238 ("the quick brown fox jumps over the lazy dog", | |
239 "the quick brown dog jumps over the lazy fox", | |
240 4); | |
241 test_get_edit_distance_both_ways | |
242 ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,", | |
243 "All your base are belong to us", | |
244 44); | |
245 test_get_edit_distance_both_ways ("foo", "FOO", 3); | |
246 test_get_edit_distance_both_ways ("fee", "deed", 2); | |
247 test_get_edit_distance_both_ways ("coorzd1", "coordx1", 2); | |
248 test_get_edit_distance_both_ways ("assert", "sqrt", 3); | |
249 test_get_edit_distance_both_ways ("PATH_MAX", "INT8_MAX", 3); | |
250 test_get_edit_distance_both_ways ("time", "nice", 2); | |
251 test_get_edit_distance_both_ways ("bar", "carg", 2); | |
252 test_get_edit_distance_both_ways ("gtk_widget_show_all", | |
253 "GtkWidgetShowAll", 7); | |
254 test_get_edit_distance_both_ways ("m_bar", "bar", 2); | |
255 test_get_edit_distance_both_ways ("MACRO", "MACRAME", 3); | |
256 test_get_edit_distance_both_ways ("ab", "ac", 1); | |
257 test_get_edit_distance_both_ways ("ab", "a", 1); | |
258 test_get_edit_distance_both_ways ("a", "b", 1); | |
259 test_get_edit_distance_both_ways ("nanl", "name", 2); | |
260 test_get_edit_distance_both_ways ("char", "bar", 2); | |
261 test_get_edit_distance_both_ways ("-optimize", "fsanitize", 5); | |
262 test_get_edit_distance_both_ways ("__DATE__", "__i386__", 4); | |
263 | |
264 /* Examples where transposition helps. */ | |
265 test_get_edit_distance_both_ways ("ab", "ba", 1); | |
266 test_get_edit_distance_both_ways ("ba", "abc", 2); | |
267 test_get_edit_distance_both_ways ("coorzd1", "coordz1", 1); | |
268 test_get_edit_distance_both_ways ("abcdefghijklmnopqrstuvwxyz", | |
269 "bacdefghijklmnopqrstuvwxzy", 2); | |
270 test_get_edit_distance_both_ways ("saturday", "sundya", 4); | |
271 test_get_edit_distance_both_ways ("signed", "singed", 1); | |
272 } | |
273 | |
274 /* Subroutine of test_get_edit_distance_cutoff, for emulating the | |
275 spellchecking cutoff in up to GCC 8. */ | |
276 | |
277 static edit_distance_t | |
278 get_old_cutoff (size_t goal_len, size_t candidate_len) | |
279 { | |
280 return MAX (goal_len, candidate_len) / 2; | |
281 } | |
282 | |
283 /* Verify that the cutoff for "meaningfulness" of suggestions is at least as | |
284 conservative as in older GCC releases. | |
285 | |
286 This should ensure that we don't offer additional meaningless | |
287 suggestions (apart from those for which transposition has helped). */ | |
288 | |
289 static void | |
290 test_get_edit_distance_cutoff () | |
291 { | |
292 for (size_t goal_len = 0; goal_len < 30; goal_len++) | |
293 for (size_t candidate_len = 0; candidate_len < 30; candidate_len++) | |
294 ASSERT_TRUE (get_edit_distance_cutoff (goal_len, candidate_len) | |
295 <= get_old_cutoff (goal_len, candidate_len)); | |
296 } | |
297 | |
298 /* Assert that CANDIDATE is offered as a suggestion for TARGET. */ | |
299 | |
300 static void | |
301 assert_suggested_for (const location &loc, const char *candidate, | |
302 const char *target) | |
303 { | |
304 auto_vec<const char *> candidates; | |
305 candidates.safe_push (candidate); | |
306 ASSERT_EQ_AT (loc, candidate, find_closest_string (target, &candidates)); | |
307 } | |
308 | |
309 /* Assert that CANDIDATE is offered as a suggestion for TARGET. */ | |
310 | |
311 #define ASSERT_SUGGESTED_FOR(CANDIDATE, TARGET) \ | |
312 SELFTEST_BEGIN_STMT \ | |
313 assert_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \ | |
314 SELFTEST_END_STMT | |
315 | |
316 /* Assert that CANDIDATE is not offered as a suggestion for TARGET. */ | |
317 | |
318 static void | |
319 assert_not_suggested_for (const location &loc, const char *candidate, | |
320 const char *target) | |
321 { | |
322 auto_vec<const char *> candidates; | |
323 candidates.safe_push (candidate); | |
324 ASSERT_EQ_AT (loc, NULL, find_closest_string (target, &candidates)); | |
325 } | |
326 | |
327 /* Assert that CANDIDATE is not offered as a suggestion for TARGET. */ | |
328 | |
329 #define ASSERT_NOT_SUGGESTED_FOR(CANDIDATE, TARGET) \ | |
330 SELFTEST_BEGIN_STMT \ | |
331 assert_not_suggested_for (SELFTEST_LOCATION, CANDIDATE, TARGET); \ | |
332 SELFTEST_END_STMT | |
333 | |
334 /* Verify that we offer varous suggestions that are meaningful, | |
335 and that we don't offer various other ones that aren't (PR c/82967). */ | |
336 | |
337 static void | |
338 test_suggestions () | |
339 { | |
340 /* Good suggestions. */ | |
341 | |
342 ASSERT_SUGGESTED_FOR ("m_bar", "bar"); | |
343 // dist == 2, max_length == 5, min_length == 3 | |
344 | |
345 ASSERT_SUGGESTED_FOR ("MACRO", "MACRAME"); | |
346 // dist == 3, max_length == 7, min_length == 5 | |
347 | |
348 ASSERT_SUGGESTED_FOR ("gtk_widget_show_all", "GtkWidgetShowAll"); | |
349 // dist == 7, max_length == 16, min_length = 19 | |
350 | |
351 ASSERT_SUGGESTED_FOR ("ab", "ac"); | |
352 // dist == 1, max_length == min_length = 2 | |
353 | |
354 ASSERT_SUGGESTED_FOR ("ab", "a"); | |
355 // dist == 1, max_length == 2, min_length = 1 | |
356 | |
357 /* Bad suggestions. */ | |
358 | |
359 ASSERT_NOT_SUGGESTED_FOR ("a", "b"); | |
360 // dist == 1, max_length == min_length = 1 | |
361 | |
362 ASSERT_NOT_SUGGESTED_FOR ("sqrt", "assert"); | |
363 // dist == 3, max_length 6, min_length == 4 | |
364 | |
365 ASSERT_NOT_SUGGESTED_FOR ("INT8_MAX", "PATH_MAX"); | |
366 // dist == 3, max_length == min_length == 8 | |
367 | |
368 ASSERT_NOT_SUGGESTED_FOR ("nice", "time"); | |
369 ASSERT_NOT_SUGGESTED_FOR ("nanl", "name"); | |
370 // dist == 2, max_length == min_length == 4 | |
371 | |
372 ASSERT_NOT_SUGGESTED_FOR ("carg", "bar"); | |
373 ASSERT_NOT_SUGGESTED_FOR ("char", "bar"); | |
374 // dist == 2, max_length == 4, min_length == 3 | |
375 | |
376 ASSERT_NOT_SUGGESTED_FOR ("-optimize", "fsanitize"); | |
377 // dist == 5, max_length == min_length == 9 | |
378 | |
379 ASSERT_NOT_SUGGESTED_FOR ("__DATE__", "__i386__"); | |
380 // dist == 4, max_length == min_length == 8 | |
181 } | 381 } |
182 | 382 |
183 /* Verify that find_closest_string is sane. */ | 383 /* Verify that find_closest_string is sane. */ |
184 | 384 |
185 static void | 385 static void |
213 | 413 |
214 /* If the goal string somehow makes it into the candidate list, offering | 414 /* If the goal string somehow makes it into the candidate list, offering |
215 it as a suggestion will be nonsensical. Verify that we don't offer such | 415 it as a suggestion will be nonsensical. Verify that we don't offer such |
216 suggestions. */ | 416 suggestions. */ |
217 ASSERT_EQ (NULL, find_closest_string ("banana", &candidates)); | 417 ASSERT_EQ (NULL, find_closest_string ("banana", &candidates)); |
418 | |
419 /* Example from PR 69968 where transposition helps. */ | |
420 candidates.truncate (0); | |
421 candidates.safe_push("coordx"); | |
422 candidates.safe_push("coordy"); | |
423 candidates.safe_push("coordz"); | |
424 candidates.safe_push("coordx1"); | |
425 candidates.safe_push("coordy1"); | |
426 candidates.safe_push("coordz1"); | |
427 ASSERT_STREQ ("coordz1", find_closest_string ("coorzd1", &candidates)); | |
218 } | 428 } |
219 | 429 |
220 /* Test data for test_metric_conditions. */ | 430 /* Test data for test_metric_conditions. */ |
221 | 431 |
222 static const char * const test_data[] = { | 432 static const char * const test_data[] = { |
225 "food", | 435 "food", |
226 "boo", | 436 "boo", |
227 "1234567890123456789012345678901234567890123456789012345678901234567890" | 437 "1234567890123456789012345678901234567890123456789012345678901234567890" |
228 }; | 438 }; |
229 | 439 |
230 /* Verify that levenshtein_distance appears to be a sane distance function, | 440 /* Verify that get_edit_distance appears to be a sane distance function, |
231 i.e. the conditions for being a metric. This is done directly for a | 441 i.e. the conditions for being a metric. This is done directly for a |
232 small set of examples, using test_data above. This is O(N^3) in the size | 442 small set of examples, using test_data above. This is O(N^3) in the size |
233 of the array, due to the test for the triangle inequality, so we keep the | 443 of the array, due to the test for the triangle inequality, so we keep the |
234 array small. */ | 444 array small. */ |
235 | 445 |
241 for (int i = 0; i < num_test_cases; i++) | 451 for (int i = 0; i < num_test_cases; i++) |
242 { | 452 { |
243 for (int j = 0; j < num_test_cases; j++) | 453 for (int j = 0; j < num_test_cases; j++) |
244 { | 454 { |
245 edit_distance_t dist_ij | 455 edit_distance_t dist_ij |
246 = levenshtein_distance (test_data[i], test_data[j]); | 456 = get_edit_distance (test_data[i], test_data[j]); |
247 | 457 |
248 /* Identity of indiscernibles: d(i, j) > 0 iff i == j. */ | 458 /* Identity of indiscernibles: d(i, j) > 0 iff i == j. */ |
249 if (i == j) | 459 if (i == j) |
250 ASSERT_EQ (dist_ij, 0); | 460 ASSERT_EQ (dist_ij, 0); |
251 else | 461 else |
252 ASSERT_TRUE (dist_ij > 0); | 462 ASSERT_TRUE (dist_ij > 0); |
253 | 463 |
254 /* Symmetry: d(i, j) == d(j, i). */ | 464 /* Symmetry: d(i, j) == d(j, i). */ |
255 edit_distance_t dist_ji | 465 edit_distance_t dist_ji |
256 = levenshtein_distance (test_data[j], test_data[i]); | 466 = get_edit_distance (test_data[j], test_data[i]); |
257 ASSERT_EQ (dist_ij, dist_ji); | 467 ASSERT_EQ (dist_ij, dist_ji); |
258 | 468 |
259 /* Triangle inequality. */ | 469 /* Triangle inequality. */ |
260 for (int k = 0; k < num_test_cases; k++) | 470 for (int k = 0; k < num_test_cases; k++) |
261 { | 471 { |
262 edit_distance_t dist_ik | 472 edit_distance_t dist_ik |
263 = levenshtein_distance (test_data[i], test_data[k]); | 473 = get_edit_distance (test_data[i], test_data[k]); |
264 edit_distance_t dist_jk | 474 edit_distance_t dist_jk |
265 = levenshtein_distance (test_data[j], test_data[k]); | 475 = get_edit_distance (test_data[j], test_data[k]); |
266 ASSERT_TRUE (dist_ik <= dist_ij + dist_jk); | 476 ASSERT_TRUE (dist_ik <= dist_ij + dist_jk); |
267 } | 477 } |
268 } | 478 } |
269 } | 479 } |
270 } | 480 } |
271 | 481 |
272 /* Verify levenshtein_distance for a variety of pairs of pre-canned | 482 /* Run all of the selftests within this file. */ |
273 inputs, comparing against known-good values. */ | |
274 | 483 |
275 void | 484 void |
276 spellcheck_c_tests () | 485 spellcheck_c_tests () |
277 { | 486 { |
278 levenshtein_distance_unit_test ("", "nonempty", strlen ("nonempty")); | 487 test_edit_distances (); |
279 levenshtein_distance_unit_test ("saturday", "sunday", 3); | 488 test_get_edit_distance_cutoff (); |
280 levenshtein_distance_unit_test ("foo", "m_foo", 2); | 489 test_suggestions (); |
281 levenshtein_distance_unit_test ("hello_world", "HelloWorld", 3); | |
282 levenshtein_distance_unit_test | |
283 ("the quick brown fox jumps over the lazy dog", "dog", 40); | |
284 levenshtein_distance_unit_test | |
285 ("the quick brown fox jumps over the lazy dog", | |
286 "the quick brown dog jumps over the lazy fox", | |
287 4); | |
288 levenshtein_distance_unit_test | |
289 ("Lorem ipsum dolor sit amet, consectetur adipiscing elit,", | |
290 "All your base are belong to us", | |
291 44); | |
292 levenshtein_distance_unit_test ("foo", "FOO", 3); | |
293 | |
294 test_find_closest_string (); | 490 test_find_closest_string (); |
295 test_metric_conditions (); | 491 test_metric_conditions (); |
296 } | 492 } |
297 | 493 |
298 } // namespace selftest | 494 } // namespace selftest |