view gcc/spellcheck.h @ 16:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
children 84e7813d76e9
line wrap: on
line source

/* Find near-matches for strings and identifiers.
   Copyright (C) 2015-2017 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<>.  */


typedef unsigned int edit_distance_t;
const edit_distance_t MAX_EDIT_DISTANCE = UINT_MAX;

/* spellcheck.c  */
extern edit_distance_t
levenshtein_distance (const char *s, int len_s,
		      const char *t, int len_t);

extern edit_distance_t
levenshtein_distance (const char *s, const char *t);

extern const char *
find_closest_string (const char *target,
		     const auto_vec<const char *> *candidates);

/* A traits class for describing a string-like type usable by
   class best_match.
   Specializations should provide the implementations of the following:

     static size_t get_length (TYPE);
     static const char *get_string (TYPE);

   get_string should return a non-NULL ptr, which does not need to be
   0-terminated.  */

template <typename TYPE>
struct edit_distance_traits {};

/* Specialization of edit_distance_traits for C-style strings.  */

template <>
struct edit_distance_traits<const char *>
  static size_t get_length (const char *str)
    gcc_assert (str);
    return strlen (str);

  static const char *get_string (const char *str)
    gcc_assert (str);
    return str;

/* A type for use when determining the best match against a string,
   expressed as a template so that we can match against various
   string-like types (const char *, frontend identifiers, and preprocessor

   This type accumulates the best possible match against GOAL_TYPE for
   a sequence of elements of CANDIDATE_TYPE, whilst minimizing the
   number of calls to levenshtein_distance and to
   edit_distance_traits<T>::get_length.  */

template <typename GOAL_TYPE, typename CANDIDATE_TYPE>
class best_match
  typedef GOAL_TYPE goal_t;
  typedef CANDIDATE_TYPE candidate_t;
  typedef edit_distance_traits<goal_t> goal_traits;
  typedef edit_distance_traits<candidate_t> candidate_traits;

  /* Constructor.  */

  best_match (GOAL_TYPE goal,
	      edit_distance_t best_distance_so_far = MAX_EDIT_DISTANCE)
  : m_goal (goal_traits::get_string (goal)),
    m_goal_len (goal_traits::get_length (goal)),
    m_best_candidate (NULL),
    m_best_distance (best_distance_so_far)

  /* Compare the edit distance between CANDIDATE and m_goal,
     and if it's the best so far, record it.  */

  void consider (candidate_t candidate)
    size_t candidate_len = candidate_traits::get_length (candidate);

    /* Calculate a lower bound on the candidate's distance to the goal,
       based on the difference in lengths; it will require at least
       this many insertions/deletions.  */
    edit_distance_t min_candidate_distance
      = abs ((ssize_t)candidate_len - (ssize_t)m_goal_len);

    /* If the candidate's length is sufficiently different to that
       of the goal string, then the number of insertions/deletions
       may be >= the best distance so far.  If so, we can reject
       the candidate immediately without needing to compute
       the exact distance, since it won't be an improvement.  */
    if (min_candidate_distance >= m_best_distance)

    /* If the candidate will be unable to beat the criterion in
       get_best_meaningful_candidate, reject it without computing
       the exact distance.  */
    unsigned int cutoff = MAX (m_goal_len, candidate_len) / 2;
    if (min_candidate_distance > cutoff)

    /* Otherwise, compute the distance and see if the candidate
       has beaten the previous best value.  */
    edit_distance_t dist
      = levenshtein_distance (m_goal, m_goal_len,
			      candidate_traits::get_string (candidate),
    if (dist < m_best_distance)
	m_best_distance = dist;
	m_best_candidate = candidate;
	m_best_candidate_len = candidate_len;

  /* Assuming that BEST_CANDIDATE is known to be better than
     m_best_candidate, update (without recomputing the edit distance to
     the goal).  */

  void set_best_so_far (CANDIDATE_TYPE best_candidate,
			edit_distance_t best_distance,
			size_t best_candidate_len)
    gcc_assert (best_distance < m_best_distance);
    m_best_candidate = best_candidate;
    m_best_distance = best_distance;
    m_best_candidate_len = best_candidate_len;

  /* Get the best candidate so far, but applying a filter to ensure
     that we return NULL if none of the candidates are close to the goal,
     to avoid offering nonsensical suggestions to the user.  */

  candidate_t get_best_meaningful_candidate () const
    /* If more than half of the letters were misspelled, the suggestion is
       likely to be meaningless.  */
    if (m_best_candidate)
	unsigned int cutoff = MAX (m_goal_len, m_best_candidate_len) / 2;
	if (m_best_distance > cutoff)
	  return NULL;

    /* If the goal string somehow makes it into the candidate list, offering
       it as a suggestion will be nonsensical e.g.
         'constexpr' does not name a type; did you mean 'constexpr'?
       Ultimately such suggestions are due to bugs in constructing the
       candidate list, but as a band-aid, do not offer suggestions for
       distance == 0 (where candidate == goal).  */
    if (m_best_distance == 0)
      return NULL;

    return m_best_candidate;

  edit_distance_t get_best_distance () const { return m_best_distance; }
  size_t get_best_candidate_length () const { return m_best_candidate_len; }

  const char *m_goal;
  size_t m_goal_len;
  candidate_t m_best_candidate;
  edit_distance_t m_best_distance;
  size_t m_best_candidate_len;

#endif  /* GCC_SPELLCHECK_H  */