comparison gcc/tree-ssa-reassoc.c @ 145:1830386684a0

gcc-9.2.0
author anatofuz
date Thu, 13 Feb 2020 11:34:05 +0900
parents 84e7813d76e9
children
comparison
equal deleted inserted replaced
131:84e7813d76e9 145:1830386684a0
1 /* Reassociation for trees. 1 /* Reassociation for trees.
2 Copyright (C) 2005-2018 Free Software Foundation, Inc. 2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
3 Contributed by Daniel Berlin <dan@dberlin.org> 3 Contributed by Daniel Berlin <dan@dberlin.org>
4 4
5 This file is part of GCC. 5 This file is part of GCC.
6 6
7 GCC is free software; you can redistribute it and/or modify 7 GCC is free software; you can redistribute it and/or modify
46 #include "tree-ssa-loop.h" 46 #include "tree-ssa-loop.h"
47 #include "flags.h" 47 #include "flags.h"
48 #include "tree-ssa.h" 48 #include "tree-ssa.h"
49 #include "langhooks.h" 49 #include "langhooks.h"
50 #include "cfgloop.h" 50 #include "cfgloop.h"
51 #include "params.h"
52 #include "builtins.h" 51 #include "builtins.h"
53 #include "gimplify.h" 52 #include "gimplify.h"
54 #include "case-cfn-macros.h" 53 #include "case-cfn-macros.h"
55 54
56 /* This is a simple global reassociation pass. It is, in part, based 55 /* This is a simple global reassociation pass. It is, in part, based
268 block rank of its containing block. */ 267 block rank of its containing block. */
269 static long 268 static long
270 phi_rank (gimple *stmt) 269 phi_rank (gimple *stmt)
271 { 270 {
272 basic_block bb = gimple_bb (stmt); 271 basic_block bb = gimple_bb (stmt);
273 struct loop *father = bb->loop_father; 272 class loop *father = bb->loop_father;
274 tree res; 273 tree res;
275 unsigned i; 274 unsigned i;
276 use_operand_p use; 275 use_operand_p use;
277 gimple *use_stmt; 276 gimple *use_stmt;
278 277
601 600
602 /* Return true if STMT is reassociable operation containing a binary 601 /* Return true if STMT is reassociable operation containing a binary
603 operation with tree code CODE, and is inside LOOP. */ 602 operation with tree code CODE, and is inside LOOP. */
604 603
605 static bool 604 static bool
606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop) 605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop)
607 { 606 {
608 basic_block bb = gimple_bb (stmt); 607 basic_block bb = gimple_bb (stmt);
609 608
610 if (gimple_bb (stmt) == NULL) 609 if (gimple_bb (stmt) == NULL)
611 return false; 610 return false;
1013 { 1012 {
1014 if (dump_file && (dump_flags & TDF_DETAILS)) 1013 if (dump_file && (dump_flags & TDF_DETAILS))
1015 fprintf (dump_file, "Found * 0, removing all other ops\n"); 1014 fprintf (dump_file, "Found * 0, removing all other ops\n");
1016 1015
1017 reassociate_stats.ops_eliminated += ops->length () - 1; 1016 reassociate_stats.ops_eliminated += ops->length () - 1;
1018 ops->truncate (1); 1017 ops->truncate (0);
1019 ops->quick_push (oelast); 1018 ops->quick_push (oelast);
1020 return; 1019 return;
1021 } 1020 }
1022 } 1021 }
1023 else if (integer_onep (oelast->op) 1022 else if (integer_onep (oelast->op)
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x 1557 reassociation algorithm which allows optimizing a * x * y + b * y * x
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */ 1558 to (a + b ) * x * y in one invocation of the reassociation pass. */
1560 1559
1561 static bool 1560 static bool
1562 undistribute_ops_list (enum tree_code opcode, 1561 undistribute_ops_list (enum tree_code opcode,
1563 vec<operand_entry *> *ops, struct loop *loop) 1562 vec<operand_entry *> *ops, class loop *loop)
1564 { 1563 {
1565 unsigned int length = ops->length (); 1564 unsigned int length = ops->length ();
1566 operand_entry *oe1; 1565 operand_entry *oe1;
1567 unsigned i, j; 1566 unsigned i, j;
1568 unsigned nr_candidates, nr_candidates2; 1567 unsigned nr_candidates, nr_candidates2;
1770 cvec.release (); 1769 cvec.release ();
1771 1770
1772 return changed; 1771 return changed;
1773 } 1772 }
1774 1773
1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME:
1775 first: element index for each relevant BIT_FIELD_REF.
1776 second: the index of vec ops* for each relevant BIT_FIELD_REF. */
1777 typedef std::pair<unsigned, unsigned> v_info_elem;
1778 struct v_info {
1779 tree vec_type;
1780 auto_vec<v_info_elem, 32> vec;
1781 };
1782 typedef v_info *v_info_ptr;
1783
1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */
1785 static int
1786 sort_by_mach_mode (const void *p_i, const void *p_j)
1787 {
1788 const tree tr1 = *((const tree *) p_i);
1789 const tree tr2 = *((const tree *) p_j);
1790 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1));
1791 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2));
1792 if (mode1 > mode2)
1793 return 1;
1794 else if (mode1 < mode2)
1795 return -1;
1796 else
1797 return 0;
1798 }
1799
1800 /* Cleanup hash map for VECTOR information. */
1801 static void
1802 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map)
1803 {
1804 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin ();
1805 it != info_map.end (); ++it)
1806 {
1807 v_info_ptr info = (*it).second;
1808 delete info;
1809 (*it).second = NULL;
1810 }
1811 }
1812
1813 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE.
1814 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k]
1815 is transformed to
1816 Vs = (V1 + V2 + ... + Vn)
1817 Vs[0] + Vs[1] + ... + Vs[k]
1818
1819 The basic steps are listed below:
1820
1821 1) Check the addition chain *OPS by looking those summands coming from
1822 VECTOR bit_field_ref on VECTOR type. Put the information into
1823 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key.
1824
1825 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are
1826 continuous, they can cover the whole VECTOR perfectly without any holes.
1827 Obtain one VECTOR list which contain candidates to be transformed.
1828
1829 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of
1830 candidates with same mode, build the addition statements for them and
1831 generate BIT_FIELD_REFs accordingly.
1832
1833 TODO:
1834 The current implementation requires the whole VECTORs should be fully
1835 covered, but it can be extended to support partial, checking adjacent
1836 but not fill the whole, it may need some cost model to define the
1837 boundary to do or not.
1838 */
1839 static bool
1840 undistribute_bitref_for_vector (enum tree_code opcode,
1841 vec<operand_entry *> *ops, struct loop *loop)
1842 {
1843 if (ops->length () <= 1)
1844 return false;
1845
1846 if (opcode != PLUS_EXPR
1847 && opcode != MULT_EXPR
1848 && opcode != BIT_XOR_EXPR
1849 && opcode != BIT_IOR_EXPR
1850 && opcode != BIT_AND_EXPR)
1851 return false;
1852
1853 hash_map<tree, v_info_ptr> v_info_map;
1854 operand_entry *oe1;
1855 unsigned i;
1856
1857 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the
1858 information into map. */
1859 FOR_EACH_VEC_ELT (*ops, i, oe1)
1860 {
1861 enum tree_code dcode;
1862 gimple *oe1def;
1863
1864 if (TREE_CODE (oe1->op) != SSA_NAME)
1865 continue;
1866 oe1def = SSA_NAME_DEF_STMT (oe1->op);
1867 if (!is_gimple_assign (oe1def))
1868 continue;
1869 dcode = gimple_assign_rhs_code (oe1def);
1870 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop))
1871 continue;
1872
1873 tree rhs = gimple_assign_rhs1 (oe1def);
1874 tree vec = TREE_OPERAND (rhs, 0);
1875 tree vec_type = TREE_TYPE (vec);
1876
1877 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type))
1878 continue;
1879
1880 /* Ignore it if target machine can't support this VECTOR type. */
1881 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1882 continue;
1883
1884 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */
1885 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1886 continue;
1887
1888 if (VECTOR_TYPE_P (TREE_TYPE (rhs))
1889 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs))))
1890 continue;
1891
1892 /* The type of BIT_FIELD_REF might not be equal to the element type of
1893 the vector. We want to use a vector type with element type the
1894 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */
1895 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type)))
1896 {
1897 machine_mode simd_mode;
1898 unsigned HOST_WIDE_INT size, nunits;
1899 unsigned HOST_WIDE_INT elem_size
1900 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs)));
1901 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size))
1902 continue;
1903 if (size <= elem_size || (size % elem_size) != 0)
1904 continue;
1905 nunits = size / elem_size;
1906 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)),
1907 nunits).exists (&simd_mode))
1908 continue;
1909 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode);
1910
1911 /* Ignore it if target machine can't support this VECTOR type. */
1912 if (!VECTOR_MODE_P (TYPE_MODE (vec_type)))
1913 continue;
1914
1915 /* Check const vector type, constrain BIT_FIELD_REF offset and
1916 size. */
1917 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ())
1918 continue;
1919
1920 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)),
1921 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec)))))
1922 continue;
1923 }
1924
1925 tree elem_type = TREE_TYPE (vec_type);
1926 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type));
1927 if (maybe_ne (bit_field_size (rhs), elem_size))
1928 continue;
1929
1930 unsigned idx;
1931 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx))
1932 continue;
1933
1934 /* Ignore it if target machine can't support this type of VECTOR
1935 operation. */
1936 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector);
1937 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing)
1938 continue;
1939
1940 bool existed;
1941 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed);
1942 if (!existed)
1943 {
1944 info = new v_info;
1945 info->vec_type = vec_type;
1946 }
1947 else if (!types_compatible_p (vec_type, info->vec_type))
1948 continue;
1949 info->vec.safe_push (std::make_pair (idx, i));
1950 }
1951
1952 /* At least two VECTOR to combine. */
1953 if (v_info_map.elements () <= 1)
1954 {
1955 cleanup_vinfo_map (v_info_map);
1956 return false;
1957 }
1958
1959 /* Verify all VECTOR candidates by checking two conditions:
1960 1) sorted offsets are adjacent, no holes.
1961 2) can fill the whole VECTOR perfectly.
1962 And add the valid candidates to a vector for further handling. */
1963 auto_vec<tree> valid_vecs (v_info_map.elements ());
1964 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin ();
1965 it != v_info_map.end (); ++it)
1966 {
1967 tree cand_vec = (*it).first;
1968 v_info_ptr cand_info = (*it).second;
1969 unsigned int num_elems
1970 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant ();
1971 if (cand_info->vec.length () != num_elems)
1972 continue;
1973 sbitmap holes = sbitmap_alloc (num_elems);
1974 bitmap_ones (holes);
1975 bool valid = true;
1976 v_info_elem *curr;
1977 FOR_EACH_VEC_ELT (cand_info->vec, i, curr)
1978 {
1979 if (!bitmap_bit_p (holes, curr->first))
1980 {
1981 valid = false;
1982 break;
1983 }
1984 else
1985 bitmap_clear_bit (holes, curr->first);
1986 }
1987 if (valid && bitmap_empty_p (holes))
1988 valid_vecs.quick_push (cand_vec);
1989 sbitmap_free (holes);
1990 }
1991
1992 /* At least two VECTOR to combine. */
1993 if (valid_vecs.length () <= 1)
1994 {
1995 cleanup_vinfo_map (v_info_map);
1996 return false;
1997 }
1998
1999 valid_vecs.qsort (sort_by_mach_mode);
2000 /* Go through all candidates by machine mode order, query the mode_to_total
2001 to get the total number for each mode and skip the single one. */
2002 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i)
2003 {
2004 tree tvec = valid_vecs[i];
2005 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec));
2006
2007 /* Skip modes with only a single candidate. */
2008 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode)
2009 continue;
2010
2011 unsigned int idx, j;
2012 gimple *sum = NULL;
2013 tree sum_vec = tvec;
2014 v_info_ptr info_ptr = *(v_info_map.get (tvec));
2015 v_info_elem *elem;
2016 tree vec_type = info_ptr->vec_type;
2017
2018 /* Build the sum for all candidates with same mode. */
2019 do
2020 {
2021 sum = build_and_add_sum (vec_type, sum_vec,
2022 valid_vecs[i + 1], opcode);
2023 if (!useless_type_conversion_p (vec_type,
2024 TREE_TYPE (valid_vecs[i + 1])))
2025 {
2026 /* Update the operands only after build_and_add_sum,
2027 so that we don't have to repeat the placement algorithm
2028 of build_and_add_sum. */
2029 gimple_stmt_iterator gsi = gsi_for_stmt (sum);
2030 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type,
2031 valid_vecs[i + 1]);
2032 tree lhs = make_ssa_name (vec_type);
2033 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2034 gimple_set_uid (g, gimple_uid (sum));
2035 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2036 gimple_assign_set_rhs2 (sum, lhs);
2037 if (sum_vec == tvec)
2038 {
2039 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec);
2040 lhs = make_ssa_name (vec_type);
2041 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce);
2042 gimple_set_uid (g, gimple_uid (sum));
2043 gsi_insert_before (&gsi, g, GSI_NEW_STMT);
2044 gimple_assign_set_rhs1 (sum, lhs);
2045 }
2046 update_stmt (sum);
2047 }
2048 sum_vec = gimple_get_lhs (sum);
2049 info_ptr = *(v_info_map.get (valid_vecs[i + 1]));
2050 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type));
2051 /* Update those related ops of current candidate VECTOR. */
2052 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2053 {
2054 idx = elem->second;
2055 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2056 /* Set this then op definition will get DCEd later. */
2057 gimple_set_visited (def, true);
2058 if (opcode == PLUS_EXPR
2059 || opcode == BIT_XOR_EXPR
2060 || opcode == BIT_IOR_EXPR)
2061 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op));
2062 else if (opcode == MULT_EXPR)
2063 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op));
2064 else
2065 {
2066 gcc_assert (opcode == BIT_AND_EXPR);
2067 (*ops)[idx]->op
2068 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op));
2069 }
2070 (*ops)[idx]->rank = 0;
2071 }
2072 if (dump_file && (dump_flags & TDF_DETAILS))
2073 {
2074 fprintf (dump_file, "Generating addition -> ");
2075 print_gimple_stmt (dump_file, sum, 0);
2076 }
2077 i++;
2078 }
2079 while ((i < valid_vecs.length () - 1)
2080 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode);
2081
2082 /* Referring to first valid VECTOR with this mode, generate the
2083 BIT_FIELD_REF statements accordingly. */
2084 info_ptr = *(v_info_map.get (tvec));
2085 gcc_assert (sum);
2086 tree elem_type = TREE_TYPE (vec_type);
2087 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem)
2088 {
2089 idx = elem->second;
2090 tree dst = make_ssa_name (elem_type);
2091 tree pos = bitsize_int (elem->first
2092 * tree_to_uhwi (TYPE_SIZE (elem_type)));
2093 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec,
2094 TYPE_SIZE (elem_type), pos);
2095 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr);
2096 insert_stmt_after (gs, sum);
2097 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op);
2098 /* Set this then op definition will get DCEd later. */
2099 gimple_set_visited (def, true);
2100 (*ops)[idx]->op = gimple_assign_lhs (gs);
2101 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op);
2102 if (dump_file && (dump_flags & TDF_DETAILS))
2103 {
2104 fprintf (dump_file, "Generating bit_field_ref -> ");
2105 print_gimple_stmt (dump_file, gs, 0);
2106 }
2107 }
2108 }
2109
2110 if (dump_file && (dump_flags & TDF_DETAILS))
2111 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n");
2112
2113 cleanup_vinfo_map (v_info_map);
2114
2115 return true;
2116 }
2117
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison 2118 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison
1776 expression, examine the other OPS to see if any of them are comparisons 2119 expression, examine the other OPS to see if any of them are comparisons
1777 of the same values, which we may be able to combine or eliminate. 2120 of the same values, which we may be able to combine or eliminate.
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ 2121 For example, we can rewrite (a < b) | (a == b) as (a <= b). */
1779 2122
1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison) 2161 if (TREE_CODE_CLASS (rcode) != tcc_comparison)
1819 continue; 2162 continue;
1820 2163
1821 /* If we got here, we have a match. See if we can combine the 2164 /* If we got here, we have a match. See if we can combine the
1822 two comparisons. */ 2165 two comparisons. */
2166 tree type = TREE_TYPE (gimple_assign_lhs (def1));
1823 if (opcode == BIT_IOR_EXPR) 2167 if (opcode == BIT_IOR_EXPR)
1824 t = maybe_fold_or_comparisons (lcode, op1, op2, 2168 t = maybe_fold_or_comparisons (type,
2169 lcode, op1, op2,
1825 rcode, gimple_assign_rhs1 (def2), 2170 rcode, gimple_assign_rhs1 (def2),
1826 gimple_assign_rhs2 (def2)); 2171 gimple_assign_rhs2 (def2));
1827 else 2172 else
1828 t = maybe_fold_and_comparisons (lcode, op1, op2, 2173 t = maybe_fold_and_comparisons (type,
2174 lcode, op1, op2,
1829 rcode, gimple_assign_rhs1 (def2), 2175 rcode, gimple_assign_rhs1 (def2),
1830 gimple_assign_rhs2 (def2)); 2176 gimple_assign_rhs2 (def2));
1831 if (!t) 2177 if (!t)
1832 continue; 2178 continue;
1833 2179
2037 } 2383 }
2038 oelast = oe; 2384 oelast = oe;
2039 i++; 2385 i++;
2040 } 2386 }
2041 2387
2042 length = ops->length ();
2043 oelast = ops->last ();
2044
2045 if (iterate) 2388 if (iterate)
2046 optimize_ops_list (opcode, ops); 2389 optimize_ops_list (opcode, ops);
2047 } 2390 }
2048 2391
2049 /* The following functions are subroutines to optimize_range_tests and allow 2392 /* The following functions are subroutines to optimize_range_tests and allow
2141 arg0 = gimple_cond_lhs (stmt); 2484 arg0 = gimple_cond_lhs (stmt);
2142 arg1 = gimple_cond_rhs (stmt); 2485 arg1 = gimple_cond_rhs (stmt);
2143 exp_type = boolean_type_node; 2486 exp_type = boolean_type_node;
2144 } 2487 }
2145 2488
2146 if (TREE_CODE (arg0) != SSA_NAME) 2489 if (TREE_CODE (arg0) != SSA_NAME
2490 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0))
2147 break; 2491 break;
2148 loc = gimple_location (stmt); 2492 loc = gimple_location (stmt);
2149 switch (code) 2493 switch (code)
2150 { 2494 {
2151 case BIT_NOT_EXPR: 2495 case BIT_NOT_EXPR:
2535 return false; 2879 return false;
2536 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); 2880 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj);
2537 if (!tree_int_cst_equal (lowxor, highxor)) 2881 if (!tree_int_cst_equal (lowxor, highxor))
2538 return false; 2882 return false;
2539 2883
2884 exp = rangei->exp;
2885 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2886 int prec = GET_MODE_PRECISION (mode);
2887 if (TYPE_PRECISION (type) < prec
2888 || (wi::to_wide (TYPE_MIN_VALUE (type))
2889 != wi::min_value (prec, TYPE_SIGN (type)))
2890 || (wi::to_wide (TYPE_MAX_VALUE (type))
2891 != wi::max_value (prec, TYPE_SIGN (type))))
2892 {
2893 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type));
2894 exp = fold_convert (type, exp);
2895 lowxor = fold_convert (type, lowxor);
2896 lowi = fold_convert (type, lowi);
2897 highi = fold_convert (type, highi);
2898 }
2540 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); 2899 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor);
2541 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); 2900 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem);
2542 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); 2901 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem);
2543 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); 2902 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem);
2544 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, 2903 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp,
2545 NULL, rangei->in_p, lowj, highj, 2904 NULL, rangei->in_p, lowj, highj,
2546 rangei->strict_overflow_p 2905 rangei->strict_overflow_p
2579 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) 2938 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST)
2580 return false; 2939 return false;
2581 if (!integer_pow2p (tem1)) 2940 if (!integer_pow2p (tem1))
2582 return false; 2941 return false;
2583 2942
2584 type = unsigned_type_for (type); 2943 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type));
2944 int prec = GET_MODE_PRECISION (mode);
2945 if (TYPE_PRECISION (type) < prec
2946 || (wi::to_wide (TYPE_MIN_VALUE (type))
2947 != wi::min_value (prec, TYPE_SIGN (type)))
2948 || (wi::to_wide (TYPE_MAX_VALUE (type))
2949 != wi::max_value (prec, TYPE_SIGN (type))))
2950 type = build_nonstandard_integer_type (prec, 1);
2951 else
2952 type = unsigned_type_for (type);
2585 tem1 = fold_convert (type, tem1); 2953 tem1 = fold_convert (type, tem1);
2586 tem2 = fold_convert (type, tem2); 2954 tem2 = fold_convert (type, tem2);
2587 lowi = fold_convert (type, lowi); 2955 lowi = fold_convert (type, lowi);
2588 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); 2956 mask = fold_build1 (BIT_NOT_EXPR, type, tem1);
2589 tem1 = fold_build2 (MINUS_EXPR, type, 2957 tem1 = fold_build2 (MINUS_EXPR, type,
3453 return any_changes; 3821 return any_changes;
3454 } 3822 }
3455 3823
3456 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize 3824 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize
3457 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, 3825 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure,
3458 otherwise the comparison code. */ 3826 otherwise the comparison code. TYPE is a return value that is set
3827 to type of comparison. */
3459 3828
3460 static tree_code 3829 static tree_code
3461 ovce_extract_ops (tree var, gassign **rets, bool *reti) 3830 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type)
3462 { 3831 {
3463 if (TREE_CODE (var) != SSA_NAME) 3832 if (TREE_CODE (var) != SSA_NAME)
3464 return ERROR_MARK; 3833 return ERROR_MARK;
3465 3834
3466 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); 3835 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var));
3498 /* Success! */ 3867 /* Success! */
3499 if (rets) 3868 if (rets)
3500 *rets = stmt; 3869 *rets = stmt;
3501 if (reti) 3870 if (reti)
3502 *reti = inv; 3871 *reti = inv;
3872 if (type)
3873 *type = TREE_TYPE (cond);
3503 return cmp; 3874 return cmp;
3504 } 3875 }
3505 3876
3506 /* Optimize the condition of VEC_COND_EXPRs which have been combined 3877 /* Optimize the condition of VEC_COND_EXPRs which have been combined
3507 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ 3878 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */
3519 { 3890 {
3520 tree elt0 = (*ops)[i]->op; 3891 tree elt0 = (*ops)[i]->op;
3521 3892
3522 gassign *stmt0; 3893 gassign *stmt0;
3523 bool invert; 3894 bool invert;
3524 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); 3895 tree type;
3896 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type);
3525 if (cmp0 == ERROR_MARK) 3897 if (cmp0 == ERROR_MARK)
3526 continue; 3898 continue;
3527 3899
3528 for (j = i + 1; j < length; ++j) 3900 for (j = i + 1; j < length; ++j)
3529 { 3901 {
3530 tree &elt1 = (*ops)[j]->op; 3902 tree &elt1 = (*ops)[j]->op;
3531 3903
3532 gassign *stmt1; 3904 gassign *stmt1;
3533 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); 3905 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL);
3534 if (cmp1 == ERROR_MARK) 3906 if (cmp1 == ERROR_MARK)
3535 continue; 3907 continue;
3536 3908
3537 tree cond0 = gimple_assign_rhs1 (stmt0); 3909 tree cond0 = gimple_assign_rhs1 (stmt0);
3538 tree x0 = TREE_OPERAND (cond0, 0); 3910 tree x0 = TREE_OPERAND (cond0, 0);
3542 tree x1 = TREE_OPERAND (cond1, 0); 3914 tree x1 = TREE_OPERAND (cond1, 0);
3543 tree y1 = TREE_OPERAND (cond1, 1); 3915 tree y1 = TREE_OPERAND (cond1, 1);
3544 3916
3545 tree comb; 3917 tree comb;
3546 if (opcode == BIT_AND_EXPR) 3918 if (opcode == BIT_AND_EXPR)
3547 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3919 comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1,
3920 y1);
3548 else if (opcode == BIT_IOR_EXPR) 3921 else if (opcode == BIT_IOR_EXPR)
3549 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); 3922 comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1,
3923 y1);
3550 else 3924 else
3551 gcc_unreachable (); 3925 gcc_unreachable ();
3552 if (comb == NULL) 3926 if (comb == NULL)
3553 continue; 3927 continue;
3554 3928
3837 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, 4211 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable,
3838 return true and fill in *OPS recursively. */ 4212 return true and fill in *OPS recursively. */
3839 4213
3840 static bool 4214 static bool
3841 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, 4215 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops,
3842 struct loop *loop) 4216 class loop *loop)
3843 { 4217 {
3844 gimple *stmt = SSA_NAME_DEF_STMT (var); 4218 gimple *stmt = SSA_NAME_DEF_STMT (var);
3845 tree rhs[2]; 4219 tree rhs[2];
3846 int i; 4220 int i;
3847 4221
3872 they were changed during update_range_test and if yes, create new 4246 they were changed during update_range_test and if yes, create new
3873 stmts. */ 4247 stmts. */
3874 4248
3875 static tree 4249 static tree
3876 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, 4250 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops,
3877 unsigned int *pidx, struct loop *loop) 4251 unsigned int *pidx, class loop *loop)
3878 { 4252 {
3879 gimple *stmt = SSA_NAME_DEF_STMT (var); 4253 gimple *stmt = SSA_NAME_DEF_STMT (var);
3880 tree rhs[4]; 4254 tree rhs[4];
3881 int i; 4255 int i;
3882 4256
4644 5018
4645 static int 5019 static int
4646 get_reassociation_width (int ops_num, enum tree_code opc, 5020 get_reassociation_width (int ops_num, enum tree_code opc,
4647 machine_mode mode) 5021 machine_mode mode)
4648 { 5022 {
4649 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); 5023 int param_width = param_tree_reassoc_width;
4650 int width; 5024 int width;
4651 int width_min; 5025 int width_min;
4652 int cycles_best; 5026 int cycles_best;
4653 5027
4654 if (param_width > 0) 5028 if (param_width > 0)
4785 update_stmt (stmts[i]); 5159 update_stmt (stmts[i]);
4786 } 5160 }
4787 else 5161 else
4788 { 5162 {
4789 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); 5163 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode);
5164 gimple_set_visited (stmts[i], true);
4790 } 5165 }
4791 if (dump_file && (dump_flags & TDF_DETAILS)) 5166 if (dump_file && (dump_flags & TDF_DETAILS))
4792 { 5167 {
4793 fprintf (dump_file, " into "); 5168 fprintf (dump_file, " into ");
4794 print_gimple_stmt (dump_file, stmts[i], 0); 5169 print_gimple_stmt (dump_file, stmts[i], 0);
4809 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); 5184 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
4810 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); 5185 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
4811 gimple *oldbinrhs = binrhs; 5186 gimple *oldbinrhs = binrhs;
4812 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5187 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
4813 gimple *newbinrhs = NULL; 5188 gimple *newbinrhs = NULL;
4814 struct loop *loop = loop_containing_stmt (stmt); 5189 class loop *loop = loop_containing_stmt (stmt);
4815 tree lhs = gimple_assign_lhs (stmt); 5190 tree lhs = gimple_assign_lhs (stmt);
4816 5191
4817 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) 5192 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
4818 && is_reassociable_op (binrhs, rhscode, loop)); 5193 && is_reassociable_op (binrhs, rhscode, loop));
4819 5194
4943 { 5318 {
4944 tree lhs = gimple_assign_lhs (stmt); 5319 tree lhs = gimple_assign_lhs (stmt);
4945 tree binlhs = gimple_assign_rhs1 (stmt); 5320 tree binlhs = gimple_assign_rhs1 (stmt);
4946 tree binrhs = gimple_assign_rhs2 (stmt); 5321 tree binrhs = gimple_assign_rhs2 (stmt);
4947 gimple *immusestmt; 5322 gimple *immusestmt;
4948 struct loop *loop = loop_containing_stmt (stmt); 5323 class loop *loop = loop_containing_stmt (stmt);
4949 5324
4950 if (TREE_CODE (binlhs) == SSA_NAME 5325 if (TREE_CODE (binlhs) == SSA_NAME
4951 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) 5326 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
4952 return true; 5327 return true;
4953 5328
5098 tree binrhs = gimple_assign_rhs2 (stmt); 5473 tree binrhs = gimple_assign_rhs2 (stmt);
5099 gimple *binlhsdef = NULL, *binrhsdef = NULL; 5474 gimple *binlhsdef = NULL, *binrhsdef = NULL;
5100 bool binlhsisreassoc = false; 5475 bool binlhsisreassoc = false;
5101 bool binrhsisreassoc = false; 5476 bool binrhsisreassoc = false;
5102 enum tree_code rhscode = gimple_assign_rhs_code (stmt); 5477 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
5103 struct loop *loop = loop_containing_stmt (stmt); 5478 class loop *loop = loop_containing_stmt (stmt);
5104 5479
5105 if (set_visited) 5480 if (set_visited)
5106 gimple_set_visited (stmt, true); 5481 gimple_set_visited (stmt, true);
5107 5482
5108 if (TREE_CODE (binlhs) == SSA_NAME) 5483 if (TREE_CODE (binlhs) == SSA_NAME)
5854 && !stmt_could_throw_p (cfun, stmt)) 6229 && !stmt_could_throw_p (cfun, stmt))
5855 { 6230 {
5856 tree lhs, rhs1, rhs2; 6231 tree lhs, rhs1, rhs2;
5857 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); 6232 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
5858 6233
5859 /* If this is not a gimple binary expression, there is
5860 nothing for us to do with it. */
5861 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
5862 continue;
5863
5864 /* If this was part of an already processed statement, 6234 /* If this was part of an already processed statement,
5865 we don't need to touch it again. */ 6235 we don't need to touch it again. */
5866 if (gimple_visited_p (stmt)) 6236 if (gimple_visited_p (stmt))
5867 { 6237 {
5868 /* This statement might have become dead because of previous 6238 /* This statement might have become dead because of previous
5885 } 6255 }
5886 } 6256 }
5887 continue; 6257 continue;
5888 } 6258 }
5889 6259
6260 /* If this is not a gimple binary expression, there is
6261 nothing for us to do with it. */
6262 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
6263 continue;
6264
5890 lhs = gimple_assign_lhs (stmt); 6265 lhs = gimple_assign_lhs (stmt);
5891 rhs1 = gimple_assign_rhs1 (stmt); 6266 rhs1 = gimple_assign_rhs1 (stmt);
5892 rhs2 = gimple_assign_rhs2 (stmt); 6267 rhs2 = gimple_assign_rhs2 (stmt);
5893 6268
5894 /* For non-bit or min/max operations we can't associate 6269 /* For non-bit or min/max operations we can't associate
5923 loop_containing_stmt (stmt))) 6298 loop_containing_stmt (stmt)))
5924 { 6299 {
5925 ops.qsort (sort_by_operand_rank); 6300 ops.qsort (sort_by_operand_rank);
5926 optimize_ops_list (rhs_code, &ops); 6301 optimize_ops_list (rhs_code, &ops);
5927 } 6302 }
5928 6303 if (undistribute_bitref_for_vector (rhs_code, &ops,
6304 loop_containing_stmt (stmt)))
6305 {
6306 ops.qsort (sort_by_operand_rank);
6307 optimize_ops_list (rhs_code, &ops);
6308 }
5929 if (rhs_code == PLUS_EXPR 6309 if (rhs_code == PLUS_EXPR
5930 && transform_add_to_multiply (&ops)) 6310 && transform_add_to_multiply (&ops))
5931 ops.qsort (sort_by_operand_rank); 6311 ops.qsort (sort_by_operand_rank);
5932 6312
5933 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) 6313 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR)
5985 } 6365 }
5986 else 6366 else
5987 { 6367 {
5988 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); 6368 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs));
5989 int ops_num = ops.length (); 6369 int ops_num = ops.length ();
5990 int width = get_reassociation_width (ops_num, rhs_code, mode); 6370 int width;
5991
5992 if (dump_file && (dump_flags & TDF_DETAILS))
5993 fprintf (dump_file,
5994 "Width = %d was chosen for reassociation\n", width);
5995
5996 6371
5997 /* For binary bit operations, if there are at least 3 6372 /* For binary bit operations, if there are at least 3
5998 operands and the last last operand in OPS is a constant, 6373 operands and the last last operand in OPS is a constant,
5999 move it to the front. This helps ensure that we generate 6374 move it to the front. This helps ensure that we generate
6000 (X & Y) & C rather than (X & C) & Y. The former will 6375 (X & Y) & C rather than (X & C) & Y. The former will
6004 || rhs_code == BIT_IOR_EXPR 6379 || rhs_code == BIT_IOR_EXPR
6005 || rhs_code == BIT_XOR_EXPR) 6380 || rhs_code == BIT_XOR_EXPR)
6006 && TREE_CODE (ops.last ()->op) == INTEGER_CST) 6381 && TREE_CODE (ops.last ()->op) == INTEGER_CST)
6007 std::swap (*ops[0], *ops[ops_num - 1]); 6382 std::swap (*ops[0], *ops[ops_num - 1]);
6008 6383
6009 if (width > 1 6384 /* Only rewrite the expression tree to parallel in the
6010 && ops.length () > 3) 6385 last reassoc pass to avoid useless work back-and-forth
6011 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), 6386 with initial linearization. */
6012 width, ops); 6387 if (!reassoc_insert_powi_p
6388 && ops.length () > 3
6389 && (width = get_reassociation_width (ops_num, rhs_code,
6390 mode)) > 1)
6391 {
6392 if (dump_file && (dump_flags & TDF_DETAILS))
6393 fprintf (dump_file,
6394 "Width = %d was chosen for reassociation\n",
6395 width);
6396 rewrite_expr_tree_parallel (as_a <gassign *> (stmt),
6397 width, ops);
6398 }
6013 else 6399 else
6014 { 6400 {
6015 /* When there are three operands left, we want 6401 /* When there are three operands left, we want
6016 to make sure the ones that get the double 6402 to make sure the ones that get the double
6017 binary op are chosen wisely. */ 6403 binary op are chosen wisely. */