view gcc/c-family/c-indentation.c @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
line wrap: on
line source

/* Implementation of -Wmisleading-indentation
   Copyright (C) 2015-2020 Free Software Foundation, Inc.

This file is part of GCC.

GCC is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free
Software Foundation; either version 3, or (at your option) any later
version.

GCC is distributed in the hope that it will be useful, but WITHOUT ANY
WARRANTY; without even the implied warranty of MERCHANTABILITY or
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
for more details.

You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3.  If not see
<http://www.gnu.org/licenses/>.  */

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "tm.h"
#include "c-common.h"
#include "c-indentation.h"
#include "selftest.h"

extern cpp_options *cpp_opts;

/* Round up VIS_COLUMN to nearest tab stop. */

static unsigned int
next_tab_stop (unsigned int vis_column, unsigned int tab_width)
{
  vis_column = ((vis_column + tab_width) / tab_width) * tab_width;
  return vis_column;
}

/* Convert libcpp's notion of a column (a 1-based char count) to
   the "visual column" (0-based column, respecting tabs), by reading the
   relevant line.

   Returns true if a conversion was possible, writing the result to OUT,
   otherwise returns false.  If FIRST_NWS is not NULL, then write to it
   the visual column corresponding to the first non-whitespace character
   on the line (up to or before EXPLOC).  */

static bool
get_visual_column (expanded_location exploc, location_t loc,
		   unsigned int *out,
		   unsigned int *first_nws,
		   unsigned int tab_width)
{
  /* PR c++/68819: if the column number is zero, we presumably
     had a location_t > LINE_MAP_MAX_LOCATION_WITH_COLS, and so
     we have no column information.
     Act as if no conversion was possible, triggering the
     error-handling path in the caller.  */
  if (!exploc.column)
    {
      static bool issued_note = false;
      if (!issued_note)
	{
	  /* Notify the user the first time this happens.  */
	  issued_note = true;
	  inform (loc,
		  "%<-Wmisleading-indentation%> is disabled from this point"
		  " onwards, since column-tracking was disabled due to"
		  " the size of the code/headers");
	}
      return false;
    }

  char_span line = location_get_source_line (exploc.file, exploc.line);
  if (!line)
    return false;
  if ((size_t)exploc.column > line.length ())
    return false;
  unsigned int vis_column = 0;
  for (int i = 1; i < exploc.column; i++)
    {
      unsigned char ch = line[i - 1];

      if (first_nws != NULL && !ISSPACE (ch))
	{
	  *first_nws = vis_column;
	  first_nws = NULL;
	}

      if (ch == '\t')
	vis_column = next_tab_stop (vis_column, tab_width);
      else
       vis_column++;
    }

  if (first_nws != NULL)
    *first_nws = vis_column;

  *out = vis_column;
  return true;
}

/* Attempt to determine the first non-whitespace character in line LINE_NUM
   of source line FILE.

   If this is possible, return true and write its "visual column" to
   *FIRST_NWS.
   Otherwise, return false, leaving *FIRST_NWS untouched.  */

static bool
get_first_nws_vis_column (const char *file, int line_num,
			  unsigned int *first_nws,
			  unsigned int tab_width)
{
  gcc_assert (first_nws);

  char_span line = location_get_source_line (file, line_num);
  if (!line)
    return false;
  unsigned int vis_column = 0;
  for (size_t i = 1; i < line.length (); i++)
    {
      unsigned char ch = line[i - 1];

      if (!ISSPACE (ch))
	{
	  *first_nws = vis_column;
	  return true;
	}

      if (ch == '\t')
	vis_column = next_tab_stop (vis_column, tab_width);
      else
	vis_column++;
    }

  /* No non-whitespace characters found.  */
  return false;
}

/* Determine if there is an unindent/outdent between
   BODY_EXPLOC and NEXT_STMT_EXPLOC, to ensure that we don't
   issue a warning for cases like the following:

   (1) Preprocessor logic

	if (flagA)
	  foo ();
	  ^ BODY_EXPLOC
      #if SOME_CONDITION_THAT_DOES_NOT_HOLD
	if (flagB)
      #endif
	  bar ();
	  ^ NEXT_STMT_EXPLOC

   "bar ();" is visually aligned below "foo ();" and
   is (as far as the parser sees) the next token, but
   this isn't misleading to a human reader.

   (2) Empty macro with bad indentation

   In the following, the
     "if (i > 0)"
   is poorly indented, and ought to be on the same column as
      "engine_ref_debug(e, 0, -1)"
   However, it is not misleadingly indented, due to the presence
   of that macro.

      #define engine_ref_debug(X, Y, Z)

      if (locked)
        i = foo (0);
      else
        i = foo (1);
      engine_ref_debug(e, 0, -1)
        if (i > 0)
        return 1;

   Return true if such an unindent/outdent is detected.  */

static bool
detect_intervening_unindent (const char *file,
			     int body_line,
			     int next_stmt_line,
			     unsigned int vis_column,
			     unsigned int tab_width)
{
  gcc_assert (file);
  gcc_assert (next_stmt_line > body_line);

  for (int line = body_line + 1; line < next_stmt_line; line++)
    {
      unsigned int line_vis_column;
      if (get_first_nws_vis_column (file, line, &line_vis_column, tab_width))
	if (line_vis_column < vis_column)
	  return true;
    }

  /* Not found.  */
  return false;
}


/* Helper function for warn_for_misleading_indentation; see
   description of that function below.  */

static bool
should_warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
					const token_indent_info &body_tinfo,
					const token_indent_info &next_tinfo)
{
  location_t guard_loc = guard_tinfo.location;
  location_t body_loc = body_tinfo.location;
  location_t next_stmt_loc = next_tinfo.location;

  enum cpp_ttype body_type = body_tinfo.type;
  enum cpp_ttype next_tok_type = next_tinfo.type;

  /* Don't attempt to compare the indentation of BODY_LOC and NEXT_STMT_LOC
     if either are within macros.  */
  if (linemap_location_from_macro_expansion_p (line_table, body_loc)
      || linemap_location_from_macro_expansion_p (line_table, next_stmt_loc))
    return false;

  /* Don't attempt to compare indentation if #line or # 44 "file"-style
     directives are present, suggesting generated code.

     All bets are off if these are present: the file that the #line
     directive could have an entirely different coding layout to C/C++
     (e.g. .md files).

     To determine if a #line is present, in theory we could look for a
     map with reason == LC_RENAME_VERBATIM.  However, if there has
     subsequently been a long line requiring a column number larger than
     that representable by the original LC_RENAME_VERBATIM map, then
     we'll have a map with reason LC_RENAME.
     Rather than attempting to search all of the maps for a
     LC_RENAME_VERBATIM, instead we have libcpp set a flag whenever one
     is seen, and we check for the flag here.
  */
  if (line_table->seen_line_directive)
    return false;

  /* We can't usefully warn about do-while and switch statements since the
     bodies of these statements are always explicitly delimited at both ends,
     so control flow is quite obvious.  */
  if (guard_tinfo.keyword == RID_DO
      || guard_tinfo.keyword == RID_SWITCH)
    return false;

  /* If the token following the body is a close brace or an "else"
     then while indentation may be sloppy, there is not much ambiguity
     about control flow, e.g.

     if (foo)       <- GUARD
       bar ();      <- BODY
       else baz (); <- NEXT

     {
     while (foo)  <- GUARD
     bar ();      <- BODY
     }            <- NEXT
     baz ();
  */
  if (next_tok_type == CPP_CLOSE_BRACE
      || next_tinfo.keyword == RID_ELSE)
    return false;

  /* Likewise, if the body of the guard is a compound statement then control
     flow is quite visually explicit regardless of the code's possibly poor
     indentation, e.g.

     while (foo)  <- GUARD
       {          <- BODY
       bar ();
       }
       baz ();    <- NEXT

    Things only get muddy when the body of the guard does not have
    braces, e.g.

    if (foo)  <- GUARD
      bar (); <- BODY
      baz (); <- NEXT
  */
  if (body_type == CPP_OPEN_BRACE)
    return false;

  /* Don't warn here about spurious semicolons.  */
  if (next_tok_type == CPP_SEMICOLON)
    return false;

  expanded_location body_exploc = expand_location (body_loc);
  expanded_location next_stmt_exploc = expand_location (next_stmt_loc);
  expanded_location guard_exploc = expand_location (guard_loc);

  const unsigned int tab_width = cpp_opts->tabstop;

  /* They must be in the same file.  */
  if (next_stmt_exploc.file != body_exploc.file)
    return false;

  /* If NEXT_STMT_LOC and BODY_LOC are on the same line, consider
     the location of the guard.

     Cases where we want to issue a warning:

       if (flag)
         foo ();  bar ();
                  ^ WARN HERE

       if (flag) foo (); bar ();
                         ^ WARN HERE


       if (flag) ; {
                   ^ WARN HERE

       if (flag)
        ; {
          ^ WARN HERE

     Cases where we don't want to issue a warning:

       various_code (); if (flag) foo (); bar (); more_code ();
                                          ^ DON'T WARN HERE.  */
  if (next_stmt_exploc.line == body_exploc.line)
    {
      if (guard_exploc.file != body_exploc.file)
	return true;
      if (guard_exploc.line < body_exploc.line)
	/* The guard is on a line before a line that contains both
	   the body and the next stmt.  */
	return true;
      else if (guard_exploc.line == body_exploc.line)
	{
	  /* They're all on the same line.  */
	  gcc_assert (guard_exploc.file == next_stmt_exploc.file);
	  gcc_assert (guard_exploc.line == next_stmt_exploc.line);
	  unsigned int guard_vis_column;
	  unsigned int guard_line_first_nws;
	  if (!get_visual_column (guard_exploc, guard_loc,
				  &guard_vis_column,
				  &guard_line_first_nws, tab_width))
	    return false;
	  /* Heuristic: only warn if the guard is the first thing
	     on its line.  */
	  if (guard_vis_column == guard_line_first_nws)
	    return true;
	}
    }

  /* If NEXT_STMT_LOC is on a line after BODY_LOC, consider
     their relative locations, and of the guard.

     Cases where we want to issue a warning:
        if (flag)
          foo ();
          bar ();
          ^ WARN HERE

     Cases where we don't want to issue a warning:
        if (flag)
        foo ();
        bar ();
        ^ DON'T WARN HERE (autogenerated code?)

	if (flagA)
	  foo ();
      #if SOME_CONDITION_THAT_DOES_NOT_HOLD
	if (flagB)
      #endif
	  bar ();
	  ^ DON'T WARN HERE

	if (flag)
	  ;
	  foo ();
	  ^ DON'T WARN HERE

	#define emit
	if (flag)
	     foo ();
	emit bar ();
	     ^ DON'T WARN HERE

  */
  if (next_stmt_exploc.line > body_exploc.line)
    {
      /* Determine if GUARD_LOC and NEXT_STMT_LOC are aligned on the same
	 "visual column"...  */
      unsigned int next_stmt_vis_column;
      unsigned int next_stmt_line_first_nws;
      unsigned int body_vis_column;
      unsigned int body_line_first_nws;
      unsigned int guard_vis_column;
      unsigned int guard_line_first_nws;
      /* If we can't determine it, don't issue a warning.  This is sometimes
	 the case for input files containing #line directives, and these
	 are often for autogenerated sources (e.g. from .md files), where
	 it's not clear that it's meaningful to look at indentation.  */
      if (!get_visual_column (next_stmt_exploc, next_stmt_loc,
			      &next_stmt_vis_column,
			      &next_stmt_line_first_nws, tab_width))
	return false;
      if (!get_visual_column (body_exploc, body_loc,
			      &body_vis_column,
			      &body_line_first_nws, tab_width))
	return false;
      if (!get_visual_column (guard_exploc, guard_loc,
			      &guard_vis_column,
			      &guard_line_first_nws, tab_width))
	return false;

      /* If the line where the next stmt starts has non-whitespace
	 on it before the stmt, then don't warn:
	  #define emit
	  if (flag)
	       foo ();
	  emit bar ();
	       ^ DON'T WARN HERE
	 (PR c/69122).  */
      if (next_stmt_line_first_nws < next_stmt_vis_column)
	return false;

      if ((body_type != CPP_SEMICOLON
	   && next_stmt_vis_column == body_vis_column)
	  /* As a special case handle the case where the body is a semicolon
	     that may be hidden by a preceding comment, e.g.  */

	  // if (p)
	  //   /* blah */;
	  //   foo (1);

	  /*  by looking instead at the column of the first non-whitespace
	      character on the body line.  */
	  || (body_type == CPP_SEMICOLON
	      && body_exploc.line > guard_exploc.line
	      && body_line_first_nws != body_vis_column
	      && next_stmt_vis_column > guard_line_first_nws))
	{
          /* Don't warn if they are aligned on the same column
	     as the guard itself (suggesting autogenerated code that doesn't
	     bother indenting at all).
	     For "else" clauses, we consider the column of the first
	     non-whitespace character on the guard line instead of the column
	     of the actual guard token itself because it is more sensible.
	     Consider:

	     if (p) {
	     foo (1);
	     } else     // GUARD
	     foo (2);   // BODY
	     foo (3);   // NEXT

	     and:

	     if (p)
	       foo (1);
	     } else       // GUARD
	       foo (2);   // BODY
	       foo (3);   // NEXT

	     If we just used the column of the "else" token, we would warn on
	     the first example and not warn on the second.  But we want the
	     exact opposite to happen: to not warn on the first example (which
	     is probably autogenerated) and to warn on the second (whose
	     indentation is misleading).  Using the column of the first
	     non-whitespace character on the guard line makes that
	     happen.  */
	  unsigned int guard_column = (guard_tinfo.keyword == RID_ELSE
				       ? guard_line_first_nws
				       : guard_vis_column);
	  if (guard_column == body_vis_column)
	    return false;

	  /* We may have something like:

	     if (p)
	       {
	       foo (1);
	       } else  // GUARD
	     foo (2);  // BODY
	     foo (3);  // NEXT

	     in which case the columns are not aligned but the code is not
	     misleadingly indented.  If the column of the body isn't indented
	     more than the guard line then don't warn.  */
	  if (body_vis_column <= guard_line_first_nws)
	    return false;

	  /* Don't warn if there is an unindent between the two statements. */
	  int vis_column = MIN (next_stmt_vis_column, body_vis_column);
	  if (detect_intervening_unindent (body_exploc.file, body_exploc.line,
					   next_stmt_exploc.line,
					   vis_column, tab_width))
	    return false;

	  /* Otherwise, they are visually aligned: issue a warning.  */
	  return true;
	}

	/* Also issue a warning for code having the form:

	   if (flag);
	     foo ();

	   while (flag);
	   {
	     ...
	   }

	   for (...);
	     {
	       ...
	     }

	   if (flag)
	     ;
	   else if (flag);
	     foo ();

	   where the semicolon at the end of each guard is most likely spurious.

	   But do not warn on:

	   for (..);
	   foo ();

	   where the next statement is aligned with the guard.
	*/
	if (body_type == CPP_SEMICOLON)
	  {
	    if (body_exploc.line == guard_exploc.line)
	      {
		if (next_stmt_vis_column > guard_line_first_nws
		    || (next_tok_type == CPP_OPEN_BRACE
			&& next_stmt_vis_column == guard_line_first_nws))
		  return true;
	      }
	  }
    }

  return false;
}

/* Return the string identifier corresponding to the given guard token.  */

const char *
guard_tinfo_to_string (enum rid keyword)
{
  switch (keyword)
    {
    case RID_FOR:
      return "for";
    case RID_ELSE:
      return "else";
    case RID_IF:
      return "if";
    case RID_WHILE:
      return "while";
    case RID_DO:
      return "do";
    case RID_SWITCH:
      return "switch";
    default:
      gcc_unreachable ();
    }
}

/* Called by the C/C++ frontends when we have a guarding statement at
   GUARD_LOC containing a statement at BODY_LOC, where the block wasn't
   written using braces, like this:

     if (flag)
       foo ();

   along with the location of the next token, at NEXT_STMT_LOC,
   so that we can detect followup statements that are within
   the same "visual block" as the guarded statement, but which
   aren't logically grouped within the guarding statement, such
   as:

     GUARD_LOC
     |
     V
     if (flag)
       foo (); <- BODY_LOC
       bar (); <- NEXT_STMT_LOC

   In the above, "bar ();" isn't guarded by the "if", but
   is indented to misleadingly suggest that it is in the same
   block as "foo ();".

   GUARD_KIND identifies the kind of clause e.g. "if", "else" etc.  */

void
warn_for_misleading_indentation (const token_indent_info &guard_tinfo,
				 const token_indent_info &body_tinfo,
				 const token_indent_info &next_tinfo)
{
  /* Early reject for the case where -Wmisleading-indentation is disabled,
     to avoid doing work only to have the warning suppressed inside the
     diagnostic machinery.  */
  if (!warn_misleading_indentation)
    return;

  if (should_warn_for_misleading_indentation (guard_tinfo,
					      body_tinfo,
					      next_tinfo))
    {
      auto_diagnostic_group d;
      if (warning_at (guard_tinfo.location, OPT_Wmisleading_indentation,
		      "this %qs clause does not guard...",
		      guard_tinfo_to_string (guard_tinfo.keyword)))
	inform (next_tinfo.location,
		"...this statement, but the latter is misleadingly indented"
		" as if it were guarded by the %qs",
		guard_tinfo_to_string (guard_tinfo.keyword));
    }
}

#if CHECKING_P

namespace selftest {

/* Verify that next_tab_stop works as expected.  */

static void
test_next_tab_stop ()
{
  const unsigned int tab_width = 8;

  ASSERT_EQ (next_tab_stop (0, tab_width), 8);
  ASSERT_EQ (next_tab_stop (1, tab_width), 8);
  ASSERT_EQ (next_tab_stop (7, tab_width), 8);

  ASSERT_EQ (next_tab_stop (8, tab_width), 16);
  ASSERT_EQ (next_tab_stop (9, tab_width), 16);
  ASSERT_EQ (next_tab_stop (15, tab_width), 16);

  ASSERT_EQ (next_tab_stop (16, tab_width), 24);
  ASSERT_EQ (next_tab_stop (17, tab_width), 24);
  ASSERT_EQ (next_tab_stop (23, tab_width), 24);
}

/* Verify that the given call to get_visual_column succeeds, with
   the given results.  */

static void
assert_get_visual_column_succeeds (const location &loc,
				   const char *file, int line, int column,
				   const unsigned int tab_width,
				   unsigned int expected_visual_column,
				   unsigned int expected_first_nws)
{
  expanded_location exploc;
  exploc.file = file;
  exploc.line = line;
  exploc.column = column;
  exploc.data = NULL;
  exploc.sysp = false;
  unsigned int actual_visual_column;
  unsigned int actual_first_nws;
  bool result = get_visual_column (exploc, UNKNOWN_LOCATION,
				   &actual_visual_column,
				   &actual_first_nws, tab_width);
  ASSERT_TRUE_AT (loc, result);
  ASSERT_EQ_AT (loc, actual_visual_column, expected_visual_column);
  ASSERT_EQ_AT (loc, actual_first_nws, expected_first_nws);
}

/* Verify that the given call to get_visual_column succeeds, with
   the given results.  */

#define ASSERT_GET_VISUAL_COLUMN_SUCCEEDS(FILENAME, LINE, COLUMN,	\
					  TAB_WIDTH,			\
					  EXPECTED_VISUAL_COLUMN,	\
					  EXPECTED_FIRST_NWS)		\
  SELFTEST_BEGIN_STMT							\
    assert_get_visual_column_succeeds (SELFTEST_LOCATION,		\
				       FILENAME, LINE, COLUMN,		\
				       TAB_WIDTH,			\
				       EXPECTED_VISUAL_COLUMN,		\
				       EXPECTED_FIRST_NWS);		\
  SELFTEST_END_STMT

/* Verify that the given call to get_visual_column fails gracefully.  */

static void
assert_get_visual_column_fails (const location &loc,
				const char *file, int line, int column,
				const unsigned int tab_width)
{
  expanded_location exploc;
  exploc.file = file;
  exploc.line = line;
  exploc.column = column;
  exploc.data = NULL;
  exploc.sysp = false;
  unsigned int actual_visual_column;
  unsigned int actual_first_nws;
  bool result = get_visual_column (exploc, UNKNOWN_LOCATION,
				   &actual_visual_column,
				   &actual_first_nws, tab_width);
  ASSERT_FALSE_AT (loc, result);
}

/* Verify that the given call to get_visual_column fails gracefully.  */

#define ASSERT_GET_VISUAL_COLUMN_FAILS(FILENAME, LINE, COLUMN,	\
				       TAB_WIDTH)		\
  SELFTEST_BEGIN_STMT						\
    assert_get_visual_column_fails (SELFTEST_LOCATION,		\
				    FILENAME, LINE, COLUMN,	\
				    TAB_WIDTH);		\
  SELFTEST_END_STMT

/* Verify that get_visual_column works as expected.  */

static void
test_get_visual_column ()
{
  /* Create a tempfile with a mixture of tabs and spaces.

     Both lines have either a space or a tab, then " line N",
     for 8 characters in total.

     1-based "columns" (w.r.t. to line 1):
     .....................0000000001111.
     .....................1234567890123.  */
  const char *content = ("  line 1\n"
			 "\t line 2\n");
  line_table_test ltt;
  temp_source_file tmp (SELFTEST_LOCATION, ".txt", content);

  const unsigned int tab_width = 8;
  const char *file = tmp.get_filename ();

  /* Line 1 (space-based indentation).  */
  {
    const int line = 1;
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 1, tab_width, 0, 0);
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 2, tab_width, 1, 1);
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 3, tab_width, 2, 2);
    /* first_nws should have stopped increasing.  */
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 4, tab_width, 3, 2);
    /* Verify the end-of-line boundary.  */
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 8, tab_width, 7, 2);
    ASSERT_GET_VISUAL_COLUMN_FAILS (file, line, 9, tab_width);
  }

  /* Line 2 (tab-based indentation).  */
  {
    const int line = 2;
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 1, tab_width, 0, 0);
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 2, tab_width, 8, 8);
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 3, tab_width, 9, 9);
    /* first_nws should have stopped increasing.  */
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 4, tab_width, 10, 9);
    /* Verify the end-of-line boundary.  */
    ASSERT_GET_VISUAL_COLUMN_SUCCEEDS (file, line, 8, tab_width, 14, 9);
    ASSERT_GET_VISUAL_COLUMN_FAILS (file, line, 9, tab_width);
  }
}

/* Run all of the selftests within this file.  */

void
c_indentation_c_tests ()
{
  test_next_tab_stop ();
  test_get_visual_column ();
}

} // namespace selftest

#endif /* CHECKING_P */