diff gcc/config/tilepro/gen-mul-tables.cc @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children 84e7813d76e9
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gcc/config/tilepro/gen-mul-tables.cc	Fri Oct 27 22:46:09 2017 +0900
@@ -0,0 +1,1365 @@
+/* Multiply table generator for tile.
+   Copyright (C) 2011-2017 Free Software Foundation, Inc.
+   Contributed by Walter Lee (walt@tilera.com)
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* This program creates a table used to compile multiply by constant
+   efficiently.
+
+   This program should be compiled by a c++ compiler.  If it's
+   compiled with -DTILEPRO, it generates the multiply table for
+   TILEPro; otherwise it generates the multiply table for TILE-Gx.
+   Running the program produces the table in stdout.
+
+   The program works by generating every possible combination of up to
+   MAX_INSTRUCTIONS linear operators (such as add, sub, s2a, left
+   shift) and computing the multiplier computed by those instructions.
+   For example,
+
+   s2a r2,r1,r1
+   s2a r3,r2,r2
+
+   multiplies r1 by 25.
+
+   There are usually multiple instruction sequences to multiply by a
+   given constant. This program keeps only the cheapest one.
+   "Cheapest" is defined first by the minimum theoretical schedule
+   length, and if those are equal then by the number of instructions,
+   and if those are equal then by which instructions we "prefer"
+   (e.g. because we think one might take infinitesimally less power
+   than another, or simply because we arbitrarily pick one to be more
+   canonical).
+
+   Once this program has determined the best instruction sequence for
+   each multiplier, it emits them in a table sorted by the multiplier
+   so the user can binary-search it to look for a match.  The table is
+   pruned based on various criteria to keep its sizes reasonable.  */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <string.h>
+#define __STDC_LIMIT_MACROS
+#include <stdint.h>
+
+#include <map>
+
+#ifdef TILEPRO
+
+/* The string representing the architecture.  */
+#define ARCH "tilepro"
+
+/* The type for the multiplication.  */
+typedef int MUL_TYPE;
+
+#else
+
+/* The string representing the architecture.  */
+#define ARCH "tilegx"
+
+/* The type for the multiplication.  */
+typedef long long MUL_TYPE;
+
+#endif
+
+/* Longest instruction sequence this will produce. With the current
+   stupid algorithm runtime grows very quickly with this number.  */
+#define MAX_INSTRUCTIONS 4
+
+/* Maximum number of subexpressions in the expression DAG being
+   generated.  This is the same as the number of instructions, except
+   that zero and the original register we'd like to multiply by a
+   constant are also thrown into the mix.  */
+#define MAX_SUBEXPRS (2 + MAX_INSTRUCTIONS)
+
+#define MIN(x, y)  ((x) <= (y) ? (x) : (y))
+#define MAX(x, y)  ((x) >= (y) ? (x) : (y))
+
+/* For this program a unary op is one which has only one nonconstant
+   operand.  So shift left by 5 is considered unary.  */
+typedef MUL_TYPE (*unary_op_func) (MUL_TYPE);
+typedef MUL_TYPE (*binary_op_func) (MUL_TYPE, MUL_TYPE);
+
+/* This describes an operation like 'add two registers' or 'left-shift
+   by 7'.
+
+   We call something a 'unary' operator if it only takes in one
+   register as input, even though it might have an implicit second
+   constant operand.  Currently this is used for left-shift by
+   constant.  */
+class Operator
+{
+public:
+  /* Construct for a binary operator.  */
+  Operator (const char *pattern, const char *name, binary_op_func func,
+	    int cost)
+    : m_pattern (pattern), m_name (name), m_top_index (-1),
+      m_unary_func (0), m_binary_func (func), m_cost (cost),
+      m_rhs_if_unary (0)
+  {
+  }
+
+  /* Construct for a unary operator.  */
+  Operator (const char *pattern, const char *name, unary_op_func func,
+	    int rhs_if_unary, int cost)
+    : m_pattern (pattern), m_name (name), m_top_index (-1),
+      m_unary_func (func), m_binary_func (0), m_cost (cost),
+      m_rhs_if_unary (rhs_if_unary)
+  {
+  }
+
+  bool is_unary () const
+  {
+    return m_binary_func == NULL;
+  }
+
+  /* Name of the pattern for this operation, e.g. CODE_FOR_addsi3.  */
+  const char *m_pattern;
+
+  /* Name of the opcode for this operation, e.g. add.  */
+  const char *m_name;
+
+  /* We don't have enough bits in our output representation to store
+     the original insn_code value, so we store a compressed form
+     instead.  These values are decoded back into insn_code via the
+     machine-generated multiply_insn_seq_decode_opcode lookup
+     table.  */
+  int m_top_index;
+
+  /* Unary operator to apply, or NULL if this is a binary operator.  */
+  unary_op_func m_unary_func;
+
+  /* Binary operator to apply, or NULL if this is a unary operator.  */
+  binary_op_func m_binary_func;
+
+  /* Function of how expensive we consider this operator. Higher is
+     worse.  */
+  int m_cost;
+
+  /* the RHS value to write into the C file if unary; used for shift
+     count.  */
+  int m_rhs_if_unary;
+};
+
+
+/* An entry in an expression DAG.  */
+class Expr
+{
+public:
+  Expr () : m_op (NULL), m_lhs (0), m_rhs (0), m_produced_val (0),
+    m_critical_path_length (0)
+  {
+  }
+
+  /* The operator being applied to the operands.  */
+  const Operator *m_op;
+
+  /* The index of the left-hand operand in the array of subexpressions
+     already computed.  */
+  int m_lhs;
+
+  /* For binary ops ,this is the index of the left-hand operand in the
+     array of subexpressions already computed. For unary ops, it is
+     specific to the op (e.g. it might hold a constant shift
+     count).  */
+  int m_rhs;
+
+  /* The multiplier produced by this expression tree. For example, for
+     the tree ((x << 5) + x), the value would be 33.  */
+  MUL_TYPE m_produced_val;
+
+  /* How far is this expression from the root, i.e. how many cycles
+     minimum will it take to compute this?  */
+  int m_critical_path_length;
+};
+
+
+/* Make function pointers for the various linear operators we can
+   apply to compute a multiplicative value.  */
+
+static MUL_TYPE
+add (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 + n2;
+}
+
+static MUL_TYPE
+sub (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 - n2;
+}
+
+static MUL_TYPE
+s1a (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 * 2 + n2;
+}
+
+static MUL_TYPE
+s2a (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 * 4 + n2;
+}
+
+static MUL_TYPE
+s3a (MUL_TYPE n1, MUL_TYPE n2)
+{
+  return n1 * 8 + n2;
+}
+
+#define SHIFT(count)                            \
+static MUL_TYPE                                 \
+shift##count(MUL_TYPE n)                        \
+{                                               \
+  return n << (count);                          \
+}
+
+SHIFT (1);
+SHIFT (2);
+SHIFT (3);
+SHIFT (4);
+SHIFT (5);
+SHIFT (6);
+SHIFT (7);
+SHIFT (8);
+SHIFT (9);
+SHIFT (10);
+SHIFT (11);
+SHIFT (12);
+SHIFT (13);
+SHIFT (14);
+SHIFT (15);
+SHIFT (16);
+SHIFT (17);
+SHIFT (18);
+SHIFT (19);
+SHIFT (20);
+SHIFT (21);
+SHIFT (22);
+SHIFT (23);
+SHIFT (24);
+SHIFT (25);
+SHIFT (26);
+SHIFT (27);
+SHIFT (28);
+SHIFT (29);
+SHIFT (30);
+SHIFT (31);
+#ifndef TILEPRO
+SHIFT (32);
+SHIFT (33);
+SHIFT (34);
+SHIFT (35);
+SHIFT (36);
+SHIFT (37);
+SHIFT (38);
+SHIFT (39);
+SHIFT (40);
+SHIFT (41);
+SHIFT (42);
+SHIFT (43);
+SHIFT (44);
+SHIFT (45);
+SHIFT (46);
+SHIFT (47);
+SHIFT (48);
+SHIFT (49);
+SHIFT (50);
+SHIFT (51);
+SHIFT (52);
+SHIFT (53);
+SHIFT (54);
+SHIFT (55);
+SHIFT (56);
+SHIFT (57);
+SHIFT (58);
+SHIFT (59);
+SHIFT (60);
+SHIFT (61);
+SHIFT (62);
+SHIFT (63);
+#endif
+
+#ifdef TILEPRO
+static Operator ops[] = {
+  Operator ("CODE_FOR_addsi3", "add", add, 1040),
+  Operator ("CODE_FOR_subsi3", "sub", sub, 1041),
+  Operator ("CODE_FOR_insn_s1a", "s1a", s1a, 1042),
+  Operator ("CODE_FOR_insn_s2a", "s2a", s2a, 1043),
+  Operator ("CODE_FOR_insn_s3a", "s3a", s3a, 1044),
+  /* Note: shl by 1 is not necessary, since adding a value to itself
+     produces the same result. But the architecture team thinks
+     left-shift may use slightly less power, so we might as well
+     prefer it.  */
+  Operator ("CODE_FOR_ashlsi3", "shli", shift1, 1, 1001),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift2, 2, 1002),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift3, 3, 1003),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift4, 4, 1004),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift5, 5, 1005),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift6, 6, 1006),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift7, 7, 1007),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift8, 8, 1008),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift9, 9, 1009),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift10, 10, 1010),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift11, 11, 1011),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift12, 12, 1012),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift13, 13, 1013),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift14, 14, 1014),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift15, 15, 1015),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift16, 16, 1016),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift17, 17, 1017),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift18, 18, 1018),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift19, 19, 1019),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift20, 20, 1020),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift21, 21, 1021),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift22, 22, 1022),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift23, 23, 1023),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift24, 24, 1024),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift25, 25, 1025),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift26, 26, 1026),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift27, 27, 1027),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift28, 28, 1028),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift29, 29, 1029),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift30, 30, 1030),
+  Operator ("CODE_FOR_ashlsi3", "shli", shift31, 31, 1031)
+};
+#else
+static Operator ops[] = {
+  Operator ("CODE_FOR_adddi3", "add", add, 1070),
+  Operator ("CODE_FOR_subdi3", "sub", sub, 1071),
+  Operator ("CODE_FOR_insn_shl1add", "shl1add", s1a, 1072),
+  Operator ("CODE_FOR_insn_shl2add", "shl2add", s2a, 1073),
+  Operator ("CODE_FOR_insn_shl3add", "shl3add", s3a, 1074),
+  // Note: shl by 1 is not necessary, since adding a value to itself
+  // produces the same result. But the architecture team thinks left-shift
+  // may use slightly less power, so we might as well prefer it.
+  Operator ("CODE_FOR_ashldi3", "shli", shift1, 1, 1001),
+  Operator ("CODE_FOR_ashldi3", "shli", shift2, 2, 1002),
+  Operator ("CODE_FOR_ashldi3", "shli", shift3, 3, 1003),
+  Operator ("CODE_FOR_ashldi3", "shli", shift4, 4, 1004),
+  Operator ("CODE_FOR_ashldi3", "shli", shift5, 5, 1005),
+  Operator ("CODE_FOR_ashldi3", "shli", shift6, 6, 1006),
+  Operator ("CODE_FOR_ashldi3", "shli", shift7, 7, 1007),
+  Operator ("CODE_FOR_ashldi3", "shli", shift8, 8, 1008),
+  Operator ("CODE_FOR_ashldi3", "shli", shift9, 9, 1009),
+  Operator ("CODE_FOR_ashldi3", "shli", shift10, 10, 1010),
+  Operator ("CODE_FOR_ashldi3", "shli", shift11, 11, 1011),
+  Operator ("CODE_FOR_ashldi3", "shli", shift12, 12, 1012),
+  Operator ("CODE_FOR_ashldi3", "shli", shift13, 13, 1013),
+  Operator ("CODE_FOR_ashldi3", "shli", shift14, 14, 1014),
+  Operator ("CODE_FOR_ashldi3", "shli", shift15, 15, 1015),
+  Operator ("CODE_FOR_ashldi3", "shli", shift16, 16, 1016),
+  Operator ("CODE_FOR_ashldi3", "shli", shift17, 17, 1017),
+  Operator ("CODE_FOR_ashldi3", "shli", shift18, 18, 1018),
+  Operator ("CODE_FOR_ashldi3", "shli", shift19, 19, 1019),
+  Operator ("CODE_FOR_ashldi3", "shli", shift20, 20, 1020),
+  Operator ("CODE_FOR_ashldi3", "shli", shift21, 21, 1021),
+  Operator ("CODE_FOR_ashldi3", "shli", shift22, 22, 1022),
+  Operator ("CODE_FOR_ashldi3", "shli", shift23, 23, 1023),
+  Operator ("CODE_FOR_ashldi3", "shli", shift24, 24, 1024),
+  Operator ("CODE_FOR_ashldi3", "shli", shift25, 25, 1025),
+  Operator ("CODE_FOR_ashldi3", "shli", shift26, 26, 1026),
+  Operator ("CODE_FOR_ashldi3", "shli", shift27, 27, 1027),
+  Operator ("CODE_FOR_ashldi3", "shli", shift28, 28, 1028),
+  Operator ("CODE_FOR_ashldi3", "shli", shift29, 29, 1029),
+  Operator ("CODE_FOR_ashldi3", "shli", shift30, 30, 1030),
+  Operator ("CODE_FOR_ashldi3", "shli", shift31, 31, 1031),
+  Operator ("CODE_FOR_ashldi3", "shli", shift32, 32, 1032),
+  Operator ("CODE_FOR_ashldi3", "shli", shift33, 33, 1033),
+  Operator ("CODE_FOR_ashldi3", "shli", shift34, 34, 1034),
+  Operator ("CODE_FOR_ashldi3", "shli", shift35, 35, 1035),
+  Operator ("CODE_FOR_ashldi3", "shli", shift36, 36, 1036),
+  Operator ("CODE_FOR_ashldi3", "shli", shift37, 37, 1037),
+  Operator ("CODE_FOR_ashldi3", "shli", shift38, 38, 1038),
+  Operator ("CODE_FOR_ashldi3", "shli", shift39, 39, 1039),
+  Operator ("CODE_FOR_ashldi3", "shli", shift40, 40, 1040),
+  Operator ("CODE_FOR_ashldi3", "shli", shift41, 41, 1041),
+  Operator ("CODE_FOR_ashldi3", "shli", shift42, 42, 1042),
+  Operator ("CODE_FOR_ashldi3", "shli", shift43, 43, 1043),
+  Operator ("CODE_FOR_ashldi3", "shli", shift44, 44, 1044),
+  Operator ("CODE_FOR_ashldi3", "shli", shift45, 45, 1045),
+  Operator ("CODE_FOR_ashldi3", "shli", shift46, 46, 1046),
+  Operator ("CODE_FOR_ashldi3", "shli", shift47, 47, 1047),
+  Operator ("CODE_FOR_ashldi3", "shli", shift48, 48, 1048),
+  Operator ("CODE_FOR_ashldi3", "shli", shift49, 49, 1049),
+  Operator ("CODE_FOR_ashldi3", "shli", shift50, 50, 1050),
+  Operator ("CODE_FOR_ashldi3", "shli", shift51, 51, 1051),
+  Operator ("CODE_FOR_ashldi3", "shli", shift52, 52, 1052),
+  Operator ("CODE_FOR_ashldi3", "shli", shift53, 53, 1053),
+  Operator ("CODE_FOR_ashldi3", "shli", shift54, 54, 1054),
+  Operator ("CODE_FOR_ashldi3", "shli", shift55, 55, 1055),
+  Operator ("CODE_FOR_ashldi3", "shli", shift56, 56, 1056),
+  Operator ("CODE_FOR_ashldi3", "shli", shift57, 57, 1057),
+  Operator ("CODE_FOR_ashldi3", "shli", shift58, 58, 1058),
+  Operator ("CODE_FOR_ashldi3", "shli", shift59, 59, 1059),
+  Operator ("CODE_FOR_ashldi3", "shli", shift60, 60, 1060),
+  Operator ("CODE_FOR_ashldi3", "shli", shift61, 61, 1061),
+  Operator ("CODE_FOR_ashldi3", "shli", shift62, 62, 1062),
+  Operator ("CODE_FOR_ashldi3", "shli", shift63, 63, 1063)
+};
+#endif
+
+/* An ExpressionTree is an expression DAG.  */
+class ExpressionTree
+{
+public:
+  ExpressionTree () : m_num_vals (2)
+  {
+    m_exprs[0].m_produced_val = 0;
+    m_exprs[1].m_produced_val = 1;
+  }
+
+  void copy_technique_from (ExpressionTree * other)
+  {
+    /* TODO: write this; other can compute the same products with less
+       cost.  We update this ExpressionTree object because some int is
+       already mapped to it.  */
+  }
+
+  int m_num_vals;
+  Expr m_exprs[MAX_SUBEXPRS];
+
+  int cost () const
+  {
+    int cost = 0;
+    for (int j = 2; j < m_num_vals; j++)
+        cost += m_exprs[j].m_op->m_cost;
+      return cost + m_exprs[m_num_vals - 1].m_critical_path_length * 1000000;
+  }
+};
+
+
+typedef std::map<MUL_TYPE, ExpressionTree *> ExpressionTreeMap;
+
+
+static void
+find_sequences (ExpressionTree &s, ExpressionTreeMap &best_solution)
+{
+  /* Don't look more if we can't add any new values to the expression
+     tree.  */
+  const int num_vals = s.m_num_vals;
+  if (num_vals == MAX_SUBEXPRS)
+    return;
+
+  /* Grow array to make room for new values added as we loop.  */
+  s.m_num_vals = num_vals + 1;
+
+  const Operator *const prev_op = s.m_exprs[num_vals - 1].m_op;
+  const int prev_top_index = (prev_op != NULL) ? prev_op->m_top_index : -1;
+
+  for (size_t f = 0; f < sizeof ops / sizeof ops[0]; f++)
+    {
+      const Operator *const op = &ops[f];
+
+      for (int i = 0; i < num_vals; i++)
+	{
+	  /* Only allow zero as the first operand to sub, otherwise
+	     it is useless.  */
+	  if (i == 0 && op->m_binary_func != sub)
+	    continue;
+
+	  /* Unary ops don't actually use the second operand, so as a
+	     big hack we trick it into only looping once in the inner
+	     loop.  */
+	  const int j_end = op->is_unary () ? 2 : num_vals;
+
+	  /* We never allow zero as the second operand, as it is
+	     always useless.  */
+	  for (int j = 1; j < j_end; j++)
+	    {
+	      /* Does this expression use the immediately previous
+		 expression?  */
+	      const bool uses_prev_value =
+		(i == num_vals - 1
+		 || (!op->is_unary () && j == num_vals - 1));
+
+	      if (!uses_prev_value)
+		{
+		  /* For search efficiency, prune redundant
+		     instruction orderings.
+
+		     This op does not take the immediately preceding
+		     value as input, which means we could have done it
+		     in the previous slot. If its opcode is less than
+		     the previous instruction's, we'll declare this
+		     instruction order non-canonical and not pursue
+		     this branch of the search.  */
+		  if (op->m_top_index < prev_top_index)
+		    continue;
+		}
+
+	      MUL_TYPE n;
+	      if (op->is_unary ())
+		{
+		  n = op->m_unary_func (s.m_exprs[i].m_produced_val);
+		}
+	      else
+		{
+		  n = op->m_binary_func (s.m_exprs[i].m_produced_val,
+					 s.m_exprs[j].m_produced_val);
+		}
+
+	      bool duplicate = false;
+	      for (int k = 0; k < num_vals; k++)
+		if (n == s.m_exprs[j].m_produced_val)
+		  {
+		    duplicate = true;
+		    break;
+		  }
+
+	      if (duplicate)
+		continue;
+
+	      /* See if we found the best solution for n.  */
+	      Expr *e = &s.m_exprs[num_vals];
+	      e->m_op = op;
+	      e->m_lhs = i;
+	      e->m_rhs = op->is_unary () ? op->m_rhs_if_unary : j;
+	      e->m_produced_val = n;
+	      e->m_critical_path_length =
+		1 + MAX (s.m_exprs[i].m_critical_path_length,
+			 s.m_exprs[j].m_critical_path_length);
+
+	      ExpressionTreeMap::iterator best (best_solution.find (n));
+	      if (best == best_solution.end ()
+		  || (*best).second->cost () > s.cost ())
+		best_solution[n] = new ExpressionTree (s);
+
+	      /* Recurse and find more.  */
+	      find_sequences (s, best_solution);
+	    }
+	}
+    }
+
+  /* Restore old size.  */
+  s.m_num_vals = num_vals;
+}
+
+
+/* For each insn_code used by an operator, choose a compact number so
+   it takes less space in the output data structure. This prints out a
+   lookup table used to map the compactified number back to an
+   insn_code.  */
+static void
+create_insn_code_compression_table ()
+{
+  int next_index = 1;
+
+  /* Entry 0 must hold CODE_FOR_nothing to mean "end of array".  */
+  printf ("const enum insn_code %s_multiply_insn_seq_decode_opcode[] = {\n"
+	  "  CODE_FOR_nothing /* must be first */ ", ARCH);
+
+  for (size_t i = 0; i < sizeof ops / sizeof ops[0]; i++)
+    {
+      Operator *op = &ops[i];
+      int index = -1;
+
+      /* See if some previous Operator was using the same insn_code.
+	 If so, reuse its table entry.  */
+      for (size_t j = 0; j < i; j++)
+	{
+	  Operator *old = &ops[j];
+	  if (strcmp (old->m_pattern, op->m_pattern) == 0)
+	    {
+	      index = old->m_top_index;
+	      break;
+	    }
+	}
+
+      if (index == -1)
+	{
+	  /* We need to make a new entry in the table.  */
+	  printf (",\n  %s", op->m_pattern);
+	  index = next_index++;
+	}
+
+      op->m_top_index = index;
+    }
+
+  printf ("\n};\n\n");
+}
+
+
+/* These are constants we've seen in code, that we want to keep table
+   entries for.  */
+static int multiply_constants_used[] = {
+  -11796480,
+  -27439,
+  -25148,
+  -22820,
+  -21709,
+  -20995,
+  -20284,
+  -20239,
+  -19595,
+  -19447,
+  -19183,
+  -19165,
+  -18730,
+  -17828,
+  -17799,
+  -17237,
+  -17036,
+  -16549,
+  -16423,
+  -16294,
+  -16244,
+  -16069,
+  -15137,
+  -15083,
+  -15038,
+  -14924,
+  -14905,
+  -14752,
+  -14731,
+  -14529,
+  -14273,
+  -14090,
+  -14084,
+  -14043,
+  -13850,
+  -13802,
+  -13631,
+  -13455,
+  -13275,
+  -12879,
+  -12700,
+  -12534,
+  -12399,
+  -12131,
+  -12112,
+  -12054,
+  -12019,
+  -11759,
+  -11585,
+  -11467,
+  -11395,
+  -11295,
+  -11248,
+  -11148,
+  -11116,
+  -11086,
+  -11059,
+  -11018,
+  -10811,
+  -10538,
+  -10258,
+  -10217,
+  -10033,
+  -9766,
+  -9754,
+  -9534,
+  -9527,
+  -9467,
+  -9262,
+  -9232,
+  -9222,
+  -9198,
+  -9191,
+  -9113,
+  -8825,
+  -8756,
+  -8697,
+  -8693,
+  -8565,
+  -8342,
+  -8208,
+  -8200,
+  -8170,
+  -8102,
+  -7770,
+  -7678,
+  -7562,
+  -7376,
+  -7373,
+  -7221,
+  -7121,
+  -6835,
+  -6810,
+  -6626,
+  -6581,
+  -6461,
+  -6278,
+  -6263,
+  -6163,
+  -6029,
+  -5816,
+  -5540,
+  -5461,
+  -5384,
+  -5329,
+  -4985,
+  -4926,
+  -4815,
+  -4788,
+  -4758,
+  -4433,
+  -4229,
+  -4209,
+  -4176,
+  -4104,
+  -4095,
+  -4078,
+  -3941,
+  -3818,
+  -3600,
+  -3474,
+  -3314,
+  -3264,
+  -3196,
+  -3072,
+  -2912,
+  -2836,
+  -2773,
+  -2269,
+  -2184,
+  -2100,
+  -1730,
+  -1512,
+  -1500,
+  -1396,
+  -1344,
+  -1312,
+  -1297,
+  -1059,
+  -1058,
+  1027,
+  1049,
+  1059,
+  1100,
+  1104,
+  1108,
+  1136,
+  1200,
+  1204,
+  1242,
+  1292,
+  1304,
+  1312,
+  1320,
+  1336,
+  1344,
+  1348,
+  1360,
+  1364,
+  1395,
+  1448,
+  1460,
+  1461,
+  1472,
+  1488,
+  1500,
+  1512,
+  1568,
+  1576,
+  1649,
+  1664,
+  1684,
+  1696,
+  1744,
+  1812,
+  1822,
+  1884,
+  1963,
+  1978,
+  2000,
+  2012,
+  2014,
+  2037,
+  2039,
+  2100,
+  2139,
+  2144,
+  2184,
+  2237,
+  2260,
+  2320,
+  2408,
+  2446,
+  2447,
+  2499,
+  2531,
+  2578,
+  2592,
+  2611,
+  2633,
+  2704,
+  2730,
+  2773,
+  2880,
+  2896,
+  2998,
+  3000,
+  3001,
+  3021,
+  3079,
+  3112,
+  3150,
+  3179,
+  3192,
+  3240,
+  3264,
+  3271,
+  3283,
+  3328,
+  3363,
+  3367,
+  3453,
+  3529,
+  3570,
+  3580,
+  3600,
+  3624,
+  3707,
+  3783,
+  3826,
+  3897,
+  3941,
+  3962,
+  3989,
+  4000,
+  4025,
+  4073,
+  4093,
+  4099,
+  4108,
+  4184,
+  4209,
+  4369,
+  4376,
+  4416,
+  4433,
+  4434,
+  4482,
+  4582,
+  4712,
+  4717,
+  4813,
+  4815,
+  4864,
+  5000,
+  5027,
+  5040,
+  5091,
+  5195,
+  5243,
+  5260,
+  5285,
+  5329,
+  5331,
+  5350,
+  5361,
+  5387,
+  5461,
+  5492,
+  5529,
+  5573,
+  5793,
+  5819,
+  5915,
+  5946,
+  5992,
+  6000,
+  6164,
+  6205,
+  6262,
+  6263,
+  6269,
+  6270,
+  6387,
+  6400,
+  6406,
+  6476,
+  6541,
+  6565,
+  6568,
+  6626,
+  6656,
+  6732,
+  6810,
+  6817,
+  6859,
+  7040,
+  7053,
+  7141,
+  7169,
+  7221,
+  7223,
+  7274,
+  7282,
+  7350,
+  7369,
+  7373,
+  7442,
+  7447,
+  7471,
+  7518,
+  7542,
+  7566,
+  7587,
+  7663,
+  7678,
+  7682,
+  7748,
+  7752,
+  7791,
+  8000,
+  8026,
+  8048,
+  8170,
+  8203,
+  8204,
+  8290,
+  8368,
+  8520,
+  8640,
+  8666,
+  8672,
+  8697,
+  8716,
+  8728,
+  8756,
+  8820,
+  8875,
+  8918,
+  8956,
+  9058,
+  9154,
+  9175,
+  9191,
+  9217,
+  9262,
+  9321,
+  9373,
+  9434,
+  9465,
+  9514,
+  9534,
+  9633,
+  9746,
+  9810,
+  9850,
+  9947,
+  9973,
+  10000,
+  10009,
+  10033,
+  10055,
+  10217,
+  10248,
+  10298,
+  10310,
+  10323,
+  10368,
+  10438,
+  10456,
+  10486,
+  10538,
+  10664,
+  10695,
+  10700,
+  10703,
+  10832,
+  10887,
+  10935,
+  10958,
+  11018,
+  11059,
+  11061,
+  11086,
+  11116,
+  11148,
+  11190,
+  11249,
+  11314,
+  11332,
+  11363,
+  11409,
+  11415,
+  11443,
+  11467,
+  11512,
+  11522,
+  11529,
+  11585,
+  11759,
+  11768,
+  11795,
+  11893,
+  11997,
+  12131,
+  12299,
+  12536,
+  12543,
+  12893,
+  12945,
+  12998,
+  13109,
+  13213,
+  13685,
+  13930,
+  14023,
+  14024,
+  14271,
+  14564,
+  14647,
+  15326,
+  15850,
+  15855,
+  15929,
+  16000,
+  16154,
+  16496,
+  16807,
+  16819,
+  16984,
+  17203,
+  17223,
+  17333,
+  17760,
+  17799,
+  17837,
+  18029,
+  18068,
+  18336,
+  18515,
+  19595,
+  20017,
+  20131,
+  20862,
+  20995,
+  21709,
+  22554,
+  25000,
+  25172,
+  25600,
+  25733,
+  27439,
+  38470,
+  46802,
+  50000,
+  11796480,
+  16843009,
+  23592960,
+};
+
+
+const int num_mult_constants_used =
+  (int)(sizeof multiply_constants_used
+	/ sizeof multiply_constants_used[0]);
+
+
+#define XSIZE (sizeof multiply_constants_used / \
+	       sizeof multiply_constants_used[0] + 32) / 32
+unsigned multiply_constants_avail[XSIZE];
+#undef XSIZE
+
+
+/* bsearch helper function.  */
+static int
+compare_constants (const void *key, const void *t)
+{
+  return (*(int*)key) - *((int*)t);
+}
+
+
+static int *
+find_mult_constants_used (int multiplier)
+{
+  return (int *) bsearch (&multiplier, multiply_constants_used,
+			  num_mult_constants_used,
+			  sizeof multiply_constants_used[0],
+			  compare_constants);
+}
+
+
+int num_ops (ExpressionTree *s)
+{
+  int n = 0;
+  for (int i = 0; i < s->m_num_vals; i++)
+    {
+      Expr *e = &s->m_exprs[i];
+      if (e->m_op != NULL)
+	n++;
+    }
+  return n;
+}
+
+
+#ifdef TILEPRO
+bool
+tilepro_emit (int multiplier, int num_ops)
+{
+  int abs_multiplier = (multiplier >= 0) ? multiplier : -multiplier;
+
+  /* Keep constants in range [-1024, 1024].  */
+  if (abs_multiplier <= 1024)
+    return true;
+
+  /* Keep constants near powers of two.  */
+  int prev_pow2 = 1 << (31 - __builtin_clz (abs_multiplier));
+  int next_pow2 = prev_pow2 << 1;
+
+  if ((abs_multiplier - prev_pow2 <= 10)
+      || (next_pow2 - abs_multiplier <= 10))
+    return true;
+
+  /* Keep constants near powers of ten.  */
+  {
+    long long j = 1;
+    long long prev_pow10;
+    long long next_pow10;
+
+    while (((j * 10) < abs_multiplier)
+	   && (j < (j * 10)))
+      j = j * 10;
+
+    prev_pow10 = j;
+    next_pow10 = j * 10;
+
+    if ((abs_multiplier - prev_pow10 <= 10)
+	|| (next_pow10 - abs_multiplier <= 10))
+      return true;
+  }
+
+  /* Keep short sequences that have two or fewer ops.  */
+  if (num_ops <= 2)
+    return true;
+
+  /* Keep constants that are mostly 0's or mostly 1's.  */
+  if (__builtin_popcount (multiplier) <= 2 ||
+      __builtin_popcount (multiplier) >= 30)
+    return true;
+
+  /* Keep constants seen in actual code.  */
+  if ((find_mult_constants_used (multiplier)))
+    return true;
+
+  return false;
+}
+#else
+bool
+tilegx_emit (long long multiplier, int num_ops)
+{
+  long long abs_multiplier = (multiplier >= 0) ? multiplier : - multiplier;
+
+  /* Keep constants in range [-1024, 1024].  */
+  if (abs_multiplier <= 1024)
+    return true;
+
+  /* Otherwise exclude sequences with four ops.  */
+  if (num_ops > 3)
+    return false;
+
+  /* Keep constants near powers of two.  */
+  {
+    unsigned long long prev_pow2 =
+      1LL << (63 - __builtin_clzll (abs_multiplier));
+    unsigned long long next_pow2 = prev_pow2 << 1;
+
+    /* handle overflow case. */
+    if (next_pow2 == 0)
+      {
+	if (prev_pow2 - abs_multiplier <= 10)
+	  return true;
+      }
+    else if ((abs_multiplier - prev_pow2 <= 10)
+	     || (next_pow2 - abs_multiplier <= 10))
+      return true;
+  }
+
+  /* Keep constants near powers of ten.  */
+  {
+    long long j = 1;
+    long long prev_pow10;
+    long long next_pow10;
+
+    while (((j * 10) < abs_multiplier)
+	   && (j < (INTMAX_MAX / 10)))
+      j = j * 10;
+
+    prev_pow10 = j;
+    next_pow10 = (j > (INTMAX_MAX / 10)) ? 0 : j * 10;
+
+    if ((abs_multiplier - prev_pow10 <= 100)
+	|| (next_pow10
+	    && (next_pow10 - abs_multiplier <= 100)))
+      return true;
+  }
+
+  if (num_ops <= 2)
+    return true;
+
+  /* Keep constants that are mostly 0's or mostly 1's.  */
+  if (__builtin_popcountll (multiplier) <= 2 ||
+      __builtin_popcountll (multiplier) >= 62)
+    return true;
+
+  /* Keep constants seen in actual code.  */
+  if (find_mult_constants_used (multiplier))
+    return true;
+
+  return false;
+}
+#endif
+
+
+int
+main ()
+{
+  ExpressionTreeMap best_solution;
+  ExpressionTree s;
+
+#ifdef TILEPRO
+  printf ("/* Constant multiply table for TILEPro.\n");
+#else
+  printf ("/* Constant multiply table for TILE-Gx.\n");
+#endif
+  printf ("   Copyright (C) 2011-2017 Free Software Foundation, Inc.\n");
+  printf ("   Contributed by Walter Lee (walt@tilera.com)\n");
+  printf ("\n");
+  printf ("   This file is part of GCC.\n");
+  printf ("\n");
+  printf ("   GCC is free software; you can redistribute it and/or modify it\n");
+  printf ("   under the terms of the GNU General Public License as published\n");
+  printf ("   by the Free Software Foundation; either version 3, or (at your\n");
+  printf ("   option) any later version.\n");
+  printf ("\n");
+  printf ("   GCC is distributed in the hope that it will be useful, but WITHOUT\n");
+  printf ("   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n");
+  printf ("   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public\n");
+  printf ("   License for more details.\n");
+  printf ("\n");
+  printf ("   You should have received a copy of the GNU General Public License\n");
+  printf ("   along with GCC; see the file COPYING3.  If not see\n");
+  printf ("   <http://www.gnu.org/licenses/>.  */\n");
+  printf ("\n");
+  printf ("/* Note this file is auto-generated from gen-mul-tables.cc.\n");
+  printf ("   Make any required changes there.  */\n");
+  printf ("\n");
+  printf ("#include \"config.h\"\n");
+  printf ("#include \"system.h\"\n");
+  printf ("#include \"coretypes.h\"\n");
+  printf ("#include \"backend.h\"\n");
+  printf ("#include \"rtl.h\"\n");
+  printf ("#include \"expmed.h\"\n");
+  printf ("#include \"%s-multiply.h\"\n\n", ARCH);
+  create_insn_code_compression_table ();
+
+  /* Try all combinations of operators and see what constants we
+     produce.  For each possible constant, record the most efficient
+     way to generate it.  */
+  find_sequences (s, best_solution);
+
+  printf ("const struct %s_multiply_insn_seq "
+	  "%s_multiply_insn_seq_table[] = {\n",
+	  ARCH, ARCH);
+
+  const char *comma_separator = "";
+
+  ExpressionTreeMap::iterator i (best_solution.begin ());
+  for (; i != best_solution.end (); ++i)
+    {
+      ExpressionTree *s = (*i).second;
+      const MUL_TYPE n = (*i).first;
+
+      if (n == 0 || n == 1)
+	{
+	  /* Both of these require zero operations, so these entries
+	     are bogus and should never be used.  */
+	  continue;
+	}
+
+      /* Prune the list of entries to keep the table to a reasonable
+	 size.  */
+#ifdef TILEPRO
+      if (!tilepro_emit (n, num_ops (s)))
+	continue;
+#else
+      if (!tilegx_emit (n, num_ops (s)))
+	continue;
+#endif
+
+      printf ("%s", comma_separator);
+
+#ifdef TILEPRO
+      const MUL_TYPE int_min = INT32_MIN;
+#else
+      const MUL_TYPE int_min = INT64_MIN;
+#endif
+      if (n == int_min)
+	{
+	  /* Handle C's woeful lack of unary negation. Without this,
+	     printing out INT_MIN in decimal will yield an unsigned
+	     int which could generate a compiler warning.  */
+#ifdef TILEPRO
+	  printf ("  {%d - 1 /* 0x%x */ ,\n   {", n + 1,
+		  (unsigned) n);
+#else
+	  printf ("  {%lldll - 1 /* 0x%llx */ ,\n   {", n + 1,
+		  (unsigned MUL_TYPE) n);
+#endif
+	}
+      else
+	{
+#ifdef TILEPRO
+	  printf ("  {%d /* 0x%x */ ,\n   {", n, (unsigned) n);
+#else
+	  printf ("  {%lldll /* 0x%llx */ ,\n   {", n, (unsigned MUL_TYPE) n);
+#endif
+	}
+
+      bool first = true;
+      for (int j = 0; j < s->m_num_vals; j++)
+	{
+	  Expr *e = &s->m_exprs[j];
+
+	  const Operator *op = e->m_op;
+	  if (op == NULL)
+	    continue;
+
+	  char buf[1024];
+	  snprintf (buf, sizeof buf, "%s{%d, %d, %d}%s",
+		    first ? "" : "    ",
+		    op->m_top_index,
+		    e->m_lhs, e->m_rhs, (j + 1) == s->m_num_vals ? "}" : ",");
+
+	  char opnd0[10];
+	  if (e->m_lhs)
+	    snprintf (opnd0, sizeof opnd0, "r%d", e->m_lhs);
+	  else
+	    snprintf (opnd0, sizeof opnd0, "zero");
+
+	  printf ("%s\t\t\t/* %s r%d, %s, %s%d */\n",
+		  buf, op->m_name, j, opnd0,
+		  op->is_unary () ? "" : "r", e->m_rhs);
+
+	  first = false;
+	}
+      printf ("   }");
+      comma_separator = ",\n";
+    }
+
+  printf ("\n};\n\n");
+  printf ("const int %s_multiply_insn_seq_table_size =\n"
+	  "  (int) (sizeof %s_multiply_insn_seq_table\n"
+	  "         / sizeof %s_multiply_insn_seq_table[0]);\n",
+	  ARCH, ARCH, ARCH);
+
+  return EXIT_SUCCESS;
+}