Mercurial > hg > CbC > CbC_gcc
comparison gcc/tree-ssa-reassoc.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
comparison
equal
deleted
inserted
replaced
131:84e7813d76e9 | 145:1830386684a0 |
---|---|
1 /* Reassociation for trees. | 1 /* Reassociation for trees. |
2 Copyright (C) 2005-2018 Free Software Foundation, Inc. | 2 Copyright (C) 2005-2020 Free Software Foundation, Inc. |
3 Contributed by Daniel Berlin <dan@dberlin.org> | 3 Contributed by Daniel Berlin <dan@dberlin.org> |
4 | 4 |
5 This file is part of GCC. | 5 This file is part of GCC. |
6 | 6 |
7 GCC is free software; you can redistribute it and/or modify | 7 GCC is free software; you can redistribute it and/or modify |
46 #include "tree-ssa-loop.h" | 46 #include "tree-ssa-loop.h" |
47 #include "flags.h" | 47 #include "flags.h" |
48 #include "tree-ssa.h" | 48 #include "tree-ssa.h" |
49 #include "langhooks.h" | 49 #include "langhooks.h" |
50 #include "cfgloop.h" | 50 #include "cfgloop.h" |
51 #include "params.h" | |
52 #include "builtins.h" | 51 #include "builtins.h" |
53 #include "gimplify.h" | 52 #include "gimplify.h" |
54 #include "case-cfn-macros.h" | 53 #include "case-cfn-macros.h" |
55 | 54 |
56 /* This is a simple global reassociation pass. It is, in part, based | 55 /* This is a simple global reassociation pass. It is, in part, based |
268 block rank of its containing block. */ | 267 block rank of its containing block. */ |
269 static long | 268 static long |
270 phi_rank (gimple *stmt) | 269 phi_rank (gimple *stmt) |
271 { | 270 { |
272 basic_block bb = gimple_bb (stmt); | 271 basic_block bb = gimple_bb (stmt); |
273 struct loop *father = bb->loop_father; | 272 class loop *father = bb->loop_father; |
274 tree res; | 273 tree res; |
275 unsigned i; | 274 unsigned i; |
276 use_operand_p use; | 275 use_operand_p use; |
277 gimple *use_stmt; | 276 gimple *use_stmt; |
278 | 277 |
601 | 600 |
602 /* Return true if STMT is reassociable operation containing a binary | 601 /* Return true if STMT is reassociable operation containing a binary |
603 operation with tree code CODE, and is inside LOOP. */ | 602 operation with tree code CODE, and is inside LOOP. */ |
604 | 603 |
605 static bool | 604 static bool |
606 is_reassociable_op (gimple *stmt, enum tree_code code, struct loop *loop) | 605 is_reassociable_op (gimple *stmt, enum tree_code code, class loop *loop) |
607 { | 606 { |
608 basic_block bb = gimple_bb (stmt); | 607 basic_block bb = gimple_bb (stmt); |
609 | 608 |
610 if (gimple_bb (stmt) == NULL) | 609 if (gimple_bb (stmt) == NULL) |
611 return false; | 610 return false; |
1013 { | 1012 { |
1014 if (dump_file && (dump_flags & TDF_DETAILS)) | 1013 if (dump_file && (dump_flags & TDF_DETAILS)) |
1015 fprintf (dump_file, "Found * 0, removing all other ops\n"); | 1014 fprintf (dump_file, "Found * 0, removing all other ops\n"); |
1016 | 1015 |
1017 reassociate_stats.ops_eliminated += ops->length () - 1; | 1016 reassociate_stats.ops_eliminated += ops->length () - 1; |
1018 ops->truncate (1); | 1017 ops->truncate (0); |
1019 ops->quick_push (oelast); | 1018 ops->quick_push (oelast); |
1020 return; | 1019 return; |
1021 } | 1020 } |
1022 } | 1021 } |
1023 else if (integer_onep (oelast->op) | 1022 else if (integer_onep (oelast->op) |
1558 reassociation algorithm which allows optimizing a * x * y + b * y * x | 1557 reassociation algorithm which allows optimizing a * x * y + b * y * x |
1559 to (a + b ) * x * y in one invocation of the reassociation pass. */ | 1558 to (a + b ) * x * y in one invocation of the reassociation pass. */ |
1560 | 1559 |
1561 static bool | 1560 static bool |
1562 undistribute_ops_list (enum tree_code opcode, | 1561 undistribute_ops_list (enum tree_code opcode, |
1563 vec<operand_entry *> *ops, struct loop *loop) | 1562 vec<operand_entry *> *ops, class loop *loop) |
1564 { | 1563 { |
1565 unsigned int length = ops->length (); | 1564 unsigned int length = ops->length (); |
1566 operand_entry *oe1; | 1565 operand_entry *oe1; |
1567 unsigned i, j; | 1566 unsigned i, j; |
1568 unsigned nr_candidates, nr_candidates2; | 1567 unsigned nr_candidates, nr_candidates2; |
1770 cvec.release (); | 1769 cvec.release (); |
1771 | 1770 |
1772 return changed; | 1771 return changed; |
1773 } | 1772 } |
1774 | 1773 |
1774 /* Pair to hold the information of one specific VECTOR_TYPE SSA_NAME: | |
1775 first: element index for each relevant BIT_FIELD_REF. | |
1776 second: the index of vec ops* for each relevant BIT_FIELD_REF. */ | |
1777 typedef std::pair<unsigned, unsigned> v_info_elem; | |
1778 struct v_info { | |
1779 tree vec_type; | |
1780 auto_vec<v_info_elem, 32> vec; | |
1781 }; | |
1782 typedef v_info *v_info_ptr; | |
1783 | |
1784 /* Comparison function for qsort on VECTOR SSA_NAME trees by machine mode. */ | |
1785 static int | |
1786 sort_by_mach_mode (const void *p_i, const void *p_j) | |
1787 { | |
1788 const tree tr1 = *((const tree *) p_i); | |
1789 const tree tr2 = *((const tree *) p_j); | |
1790 unsigned int mode1 = TYPE_MODE (TREE_TYPE (tr1)); | |
1791 unsigned int mode2 = TYPE_MODE (TREE_TYPE (tr2)); | |
1792 if (mode1 > mode2) | |
1793 return 1; | |
1794 else if (mode1 < mode2) | |
1795 return -1; | |
1796 else | |
1797 return 0; | |
1798 } | |
1799 | |
1800 /* Cleanup hash map for VECTOR information. */ | |
1801 static void | |
1802 cleanup_vinfo_map (hash_map<tree, v_info_ptr> &info_map) | |
1803 { | |
1804 for (hash_map<tree, v_info_ptr>::iterator it = info_map.begin (); | |
1805 it != info_map.end (); ++it) | |
1806 { | |
1807 v_info_ptr info = (*it).second; | |
1808 delete info; | |
1809 (*it).second = NULL; | |
1810 } | |
1811 } | |
1812 | |
1813 /* Perform un-distribution of BIT_FIELD_REF on VECTOR_TYPE. | |
1814 V1[0] + V1[1] + ... + V1[k] + V2[0] + V2[1] + ... + V2[k] + ... Vn[k] | |
1815 is transformed to | |
1816 Vs = (V1 + V2 + ... + Vn) | |
1817 Vs[0] + Vs[1] + ... + Vs[k] | |
1818 | |
1819 The basic steps are listed below: | |
1820 | |
1821 1) Check the addition chain *OPS by looking those summands coming from | |
1822 VECTOR bit_field_ref on VECTOR type. Put the information into | |
1823 v_info_map for each satisfied summand, using VECTOR SSA_NAME as key. | |
1824 | |
1825 2) For each key (VECTOR SSA_NAME), validate all its BIT_FIELD_REFs are | |
1826 continuous, they can cover the whole VECTOR perfectly without any holes. | |
1827 Obtain one VECTOR list which contain candidates to be transformed. | |
1828 | |
1829 3) Sort the VECTOR list by machine mode of VECTOR type, for each group of | |
1830 candidates with same mode, build the addition statements for them and | |
1831 generate BIT_FIELD_REFs accordingly. | |
1832 | |
1833 TODO: | |
1834 The current implementation requires the whole VECTORs should be fully | |
1835 covered, but it can be extended to support partial, checking adjacent | |
1836 but not fill the whole, it may need some cost model to define the | |
1837 boundary to do or not. | |
1838 */ | |
1839 static bool | |
1840 undistribute_bitref_for_vector (enum tree_code opcode, | |
1841 vec<operand_entry *> *ops, struct loop *loop) | |
1842 { | |
1843 if (ops->length () <= 1) | |
1844 return false; | |
1845 | |
1846 if (opcode != PLUS_EXPR | |
1847 && opcode != MULT_EXPR | |
1848 && opcode != BIT_XOR_EXPR | |
1849 && opcode != BIT_IOR_EXPR | |
1850 && opcode != BIT_AND_EXPR) | |
1851 return false; | |
1852 | |
1853 hash_map<tree, v_info_ptr> v_info_map; | |
1854 operand_entry *oe1; | |
1855 unsigned i; | |
1856 | |
1857 /* Find those summands from VECTOR BIT_FIELD_REF in addition chain, put the | |
1858 information into map. */ | |
1859 FOR_EACH_VEC_ELT (*ops, i, oe1) | |
1860 { | |
1861 enum tree_code dcode; | |
1862 gimple *oe1def; | |
1863 | |
1864 if (TREE_CODE (oe1->op) != SSA_NAME) | |
1865 continue; | |
1866 oe1def = SSA_NAME_DEF_STMT (oe1->op); | |
1867 if (!is_gimple_assign (oe1def)) | |
1868 continue; | |
1869 dcode = gimple_assign_rhs_code (oe1def); | |
1870 if (dcode != BIT_FIELD_REF || !is_reassociable_op (oe1def, dcode, loop)) | |
1871 continue; | |
1872 | |
1873 tree rhs = gimple_assign_rhs1 (oe1def); | |
1874 tree vec = TREE_OPERAND (rhs, 0); | |
1875 tree vec_type = TREE_TYPE (vec); | |
1876 | |
1877 if (TREE_CODE (vec) != SSA_NAME || !VECTOR_TYPE_P (vec_type)) | |
1878 continue; | |
1879 | |
1880 /* Ignore it if target machine can't support this VECTOR type. */ | |
1881 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) | |
1882 continue; | |
1883 | |
1884 /* Check const vector type, constrain BIT_FIELD_REF offset and size. */ | |
1885 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) | |
1886 continue; | |
1887 | |
1888 if (VECTOR_TYPE_P (TREE_TYPE (rhs)) | |
1889 || !is_a <scalar_mode> (TYPE_MODE (TREE_TYPE (rhs)))) | |
1890 continue; | |
1891 | |
1892 /* The type of BIT_FIELD_REF might not be equal to the element type of | |
1893 the vector. We want to use a vector type with element type the | |
1894 same as the BIT_FIELD_REF and size the same as TREE_TYPE (vec). */ | |
1895 if (!useless_type_conversion_p (TREE_TYPE (rhs), TREE_TYPE (vec_type))) | |
1896 { | |
1897 machine_mode simd_mode; | |
1898 unsigned HOST_WIDE_INT size, nunits; | |
1899 unsigned HOST_WIDE_INT elem_size | |
1900 = tree_to_uhwi (TYPE_SIZE (TREE_TYPE (rhs))); | |
1901 if (!GET_MODE_BITSIZE (TYPE_MODE (vec_type)).is_constant (&size)) | |
1902 continue; | |
1903 if (size <= elem_size || (size % elem_size) != 0) | |
1904 continue; | |
1905 nunits = size / elem_size; | |
1906 if (!mode_for_vector (SCALAR_TYPE_MODE (TREE_TYPE (rhs)), | |
1907 nunits).exists (&simd_mode)) | |
1908 continue; | |
1909 vec_type = build_vector_type_for_mode (TREE_TYPE (rhs), simd_mode); | |
1910 | |
1911 /* Ignore it if target machine can't support this VECTOR type. */ | |
1912 if (!VECTOR_MODE_P (TYPE_MODE (vec_type))) | |
1913 continue; | |
1914 | |
1915 /* Check const vector type, constrain BIT_FIELD_REF offset and | |
1916 size. */ | |
1917 if (!TYPE_VECTOR_SUBPARTS (vec_type).is_constant ()) | |
1918 continue; | |
1919 | |
1920 if (maybe_ne (GET_MODE_SIZE (TYPE_MODE (vec_type)), | |
1921 GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (vec))))) | |
1922 continue; | |
1923 } | |
1924 | |
1925 tree elem_type = TREE_TYPE (vec_type); | |
1926 unsigned HOST_WIDE_INT elem_size = tree_to_uhwi (TYPE_SIZE (elem_type)); | |
1927 if (maybe_ne (bit_field_size (rhs), elem_size)) | |
1928 continue; | |
1929 | |
1930 unsigned idx; | |
1931 if (!constant_multiple_p (bit_field_offset (rhs), elem_size, &idx)) | |
1932 continue; | |
1933 | |
1934 /* Ignore it if target machine can't support this type of VECTOR | |
1935 operation. */ | |
1936 optab op_tab = optab_for_tree_code (opcode, vec_type, optab_vector); | |
1937 if (optab_handler (op_tab, TYPE_MODE (vec_type)) == CODE_FOR_nothing) | |
1938 continue; | |
1939 | |
1940 bool existed; | |
1941 v_info_ptr &info = v_info_map.get_or_insert (vec, &existed); | |
1942 if (!existed) | |
1943 { | |
1944 info = new v_info; | |
1945 info->vec_type = vec_type; | |
1946 } | |
1947 else if (!types_compatible_p (vec_type, info->vec_type)) | |
1948 continue; | |
1949 info->vec.safe_push (std::make_pair (idx, i)); | |
1950 } | |
1951 | |
1952 /* At least two VECTOR to combine. */ | |
1953 if (v_info_map.elements () <= 1) | |
1954 { | |
1955 cleanup_vinfo_map (v_info_map); | |
1956 return false; | |
1957 } | |
1958 | |
1959 /* Verify all VECTOR candidates by checking two conditions: | |
1960 1) sorted offsets are adjacent, no holes. | |
1961 2) can fill the whole VECTOR perfectly. | |
1962 And add the valid candidates to a vector for further handling. */ | |
1963 auto_vec<tree> valid_vecs (v_info_map.elements ()); | |
1964 for (hash_map<tree, v_info_ptr>::iterator it = v_info_map.begin (); | |
1965 it != v_info_map.end (); ++it) | |
1966 { | |
1967 tree cand_vec = (*it).first; | |
1968 v_info_ptr cand_info = (*it).second; | |
1969 unsigned int num_elems | |
1970 = TYPE_VECTOR_SUBPARTS (cand_info->vec_type).to_constant (); | |
1971 if (cand_info->vec.length () != num_elems) | |
1972 continue; | |
1973 sbitmap holes = sbitmap_alloc (num_elems); | |
1974 bitmap_ones (holes); | |
1975 bool valid = true; | |
1976 v_info_elem *curr; | |
1977 FOR_EACH_VEC_ELT (cand_info->vec, i, curr) | |
1978 { | |
1979 if (!bitmap_bit_p (holes, curr->first)) | |
1980 { | |
1981 valid = false; | |
1982 break; | |
1983 } | |
1984 else | |
1985 bitmap_clear_bit (holes, curr->first); | |
1986 } | |
1987 if (valid && bitmap_empty_p (holes)) | |
1988 valid_vecs.quick_push (cand_vec); | |
1989 sbitmap_free (holes); | |
1990 } | |
1991 | |
1992 /* At least two VECTOR to combine. */ | |
1993 if (valid_vecs.length () <= 1) | |
1994 { | |
1995 cleanup_vinfo_map (v_info_map); | |
1996 return false; | |
1997 } | |
1998 | |
1999 valid_vecs.qsort (sort_by_mach_mode); | |
2000 /* Go through all candidates by machine mode order, query the mode_to_total | |
2001 to get the total number for each mode and skip the single one. */ | |
2002 for (unsigned i = 0; i < valid_vecs.length () - 1; ++i) | |
2003 { | |
2004 tree tvec = valid_vecs[i]; | |
2005 enum machine_mode mode = TYPE_MODE (TREE_TYPE (tvec)); | |
2006 | |
2007 /* Skip modes with only a single candidate. */ | |
2008 if (TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) != mode) | |
2009 continue; | |
2010 | |
2011 unsigned int idx, j; | |
2012 gimple *sum = NULL; | |
2013 tree sum_vec = tvec; | |
2014 v_info_ptr info_ptr = *(v_info_map.get (tvec)); | |
2015 v_info_elem *elem; | |
2016 tree vec_type = info_ptr->vec_type; | |
2017 | |
2018 /* Build the sum for all candidates with same mode. */ | |
2019 do | |
2020 { | |
2021 sum = build_and_add_sum (vec_type, sum_vec, | |
2022 valid_vecs[i + 1], opcode); | |
2023 if (!useless_type_conversion_p (vec_type, | |
2024 TREE_TYPE (valid_vecs[i + 1]))) | |
2025 { | |
2026 /* Update the operands only after build_and_add_sum, | |
2027 so that we don't have to repeat the placement algorithm | |
2028 of build_and_add_sum. */ | |
2029 gimple_stmt_iterator gsi = gsi_for_stmt (sum); | |
2030 tree vce = build1 (VIEW_CONVERT_EXPR, vec_type, | |
2031 valid_vecs[i + 1]); | |
2032 tree lhs = make_ssa_name (vec_type); | |
2033 gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); | |
2034 gimple_set_uid (g, gimple_uid (sum)); | |
2035 gsi_insert_before (&gsi, g, GSI_NEW_STMT); | |
2036 gimple_assign_set_rhs2 (sum, lhs); | |
2037 if (sum_vec == tvec) | |
2038 { | |
2039 vce = build1 (VIEW_CONVERT_EXPR, vec_type, sum_vec); | |
2040 lhs = make_ssa_name (vec_type); | |
2041 g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, vce); | |
2042 gimple_set_uid (g, gimple_uid (sum)); | |
2043 gsi_insert_before (&gsi, g, GSI_NEW_STMT); | |
2044 gimple_assign_set_rhs1 (sum, lhs); | |
2045 } | |
2046 update_stmt (sum); | |
2047 } | |
2048 sum_vec = gimple_get_lhs (sum); | |
2049 info_ptr = *(v_info_map.get (valid_vecs[i + 1])); | |
2050 gcc_assert (types_compatible_p (vec_type, info_ptr->vec_type)); | |
2051 /* Update those related ops of current candidate VECTOR. */ | |
2052 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) | |
2053 { | |
2054 idx = elem->second; | |
2055 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); | |
2056 /* Set this then op definition will get DCEd later. */ | |
2057 gimple_set_visited (def, true); | |
2058 if (opcode == PLUS_EXPR | |
2059 || opcode == BIT_XOR_EXPR | |
2060 || opcode == BIT_IOR_EXPR) | |
2061 (*ops)[idx]->op = build_zero_cst (TREE_TYPE ((*ops)[idx]->op)); | |
2062 else if (opcode == MULT_EXPR) | |
2063 (*ops)[idx]->op = build_one_cst (TREE_TYPE ((*ops)[idx]->op)); | |
2064 else | |
2065 { | |
2066 gcc_assert (opcode == BIT_AND_EXPR); | |
2067 (*ops)[idx]->op | |
2068 = build_all_ones_cst (TREE_TYPE ((*ops)[idx]->op)); | |
2069 } | |
2070 (*ops)[idx]->rank = 0; | |
2071 } | |
2072 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2073 { | |
2074 fprintf (dump_file, "Generating addition -> "); | |
2075 print_gimple_stmt (dump_file, sum, 0); | |
2076 } | |
2077 i++; | |
2078 } | |
2079 while ((i < valid_vecs.length () - 1) | |
2080 && TYPE_MODE (TREE_TYPE (valid_vecs[i + 1])) == mode); | |
2081 | |
2082 /* Referring to first valid VECTOR with this mode, generate the | |
2083 BIT_FIELD_REF statements accordingly. */ | |
2084 info_ptr = *(v_info_map.get (tvec)); | |
2085 gcc_assert (sum); | |
2086 tree elem_type = TREE_TYPE (vec_type); | |
2087 FOR_EACH_VEC_ELT (info_ptr->vec, j, elem) | |
2088 { | |
2089 idx = elem->second; | |
2090 tree dst = make_ssa_name (elem_type); | |
2091 tree pos = bitsize_int (elem->first | |
2092 * tree_to_uhwi (TYPE_SIZE (elem_type))); | |
2093 tree bfr = build3 (BIT_FIELD_REF, elem_type, sum_vec, | |
2094 TYPE_SIZE (elem_type), pos); | |
2095 gimple *gs = gimple_build_assign (dst, BIT_FIELD_REF, bfr); | |
2096 insert_stmt_after (gs, sum); | |
2097 gimple *def = SSA_NAME_DEF_STMT ((*ops)[idx]->op); | |
2098 /* Set this then op definition will get DCEd later. */ | |
2099 gimple_set_visited (def, true); | |
2100 (*ops)[idx]->op = gimple_assign_lhs (gs); | |
2101 (*ops)[idx]->rank = get_rank ((*ops)[idx]->op); | |
2102 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2103 { | |
2104 fprintf (dump_file, "Generating bit_field_ref -> "); | |
2105 print_gimple_stmt (dump_file, gs, 0); | |
2106 } | |
2107 } | |
2108 } | |
2109 | |
2110 if (dump_file && (dump_flags & TDF_DETAILS)) | |
2111 fprintf (dump_file, "undistributiong bit_field_ref for vector done.\n"); | |
2112 | |
2113 cleanup_vinfo_map (v_info_map); | |
2114 | |
2115 return true; | |
2116 } | |
2117 | |
1775 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison | 2118 /* If OPCODE is BIT_IOR_EXPR or BIT_AND_EXPR and CURR is a comparison |
1776 expression, examine the other OPS to see if any of them are comparisons | 2119 expression, examine the other OPS to see if any of them are comparisons |
1777 of the same values, which we may be able to combine or eliminate. | 2120 of the same values, which we may be able to combine or eliminate. |
1778 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ | 2121 For example, we can rewrite (a < b) | (a == b) as (a <= b). */ |
1779 | 2122 |
1818 if (TREE_CODE_CLASS (rcode) != tcc_comparison) | 2161 if (TREE_CODE_CLASS (rcode) != tcc_comparison) |
1819 continue; | 2162 continue; |
1820 | 2163 |
1821 /* If we got here, we have a match. See if we can combine the | 2164 /* If we got here, we have a match. See if we can combine the |
1822 two comparisons. */ | 2165 two comparisons. */ |
2166 tree type = TREE_TYPE (gimple_assign_lhs (def1)); | |
1823 if (opcode == BIT_IOR_EXPR) | 2167 if (opcode == BIT_IOR_EXPR) |
1824 t = maybe_fold_or_comparisons (lcode, op1, op2, | 2168 t = maybe_fold_or_comparisons (type, |
2169 lcode, op1, op2, | |
1825 rcode, gimple_assign_rhs1 (def2), | 2170 rcode, gimple_assign_rhs1 (def2), |
1826 gimple_assign_rhs2 (def2)); | 2171 gimple_assign_rhs2 (def2)); |
1827 else | 2172 else |
1828 t = maybe_fold_and_comparisons (lcode, op1, op2, | 2173 t = maybe_fold_and_comparisons (type, |
2174 lcode, op1, op2, | |
1829 rcode, gimple_assign_rhs1 (def2), | 2175 rcode, gimple_assign_rhs1 (def2), |
1830 gimple_assign_rhs2 (def2)); | 2176 gimple_assign_rhs2 (def2)); |
1831 if (!t) | 2177 if (!t) |
1832 continue; | 2178 continue; |
1833 | 2179 |
2037 } | 2383 } |
2038 oelast = oe; | 2384 oelast = oe; |
2039 i++; | 2385 i++; |
2040 } | 2386 } |
2041 | 2387 |
2042 length = ops->length (); | |
2043 oelast = ops->last (); | |
2044 | |
2045 if (iterate) | 2388 if (iterate) |
2046 optimize_ops_list (opcode, ops); | 2389 optimize_ops_list (opcode, ops); |
2047 } | 2390 } |
2048 | 2391 |
2049 /* The following functions are subroutines to optimize_range_tests and allow | 2392 /* The following functions are subroutines to optimize_range_tests and allow |
2141 arg0 = gimple_cond_lhs (stmt); | 2484 arg0 = gimple_cond_lhs (stmt); |
2142 arg1 = gimple_cond_rhs (stmt); | 2485 arg1 = gimple_cond_rhs (stmt); |
2143 exp_type = boolean_type_node; | 2486 exp_type = boolean_type_node; |
2144 } | 2487 } |
2145 | 2488 |
2146 if (TREE_CODE (arg0) != SSA_NAME) | 2489 if (TREE_CODE (arg0) != SSA_NAME |
2490 || SSA_NAME_OCCURS_IN_ABNORMAL_PHI (arg0)) | |
2147 break; | 2491 break; |
2148 loc = gimple_location (stmt); | 2492 loc = gimple_location (stmt); |
2149 switch (code) | 2493 switch (code) |
2150 { | 2494 { |
2151 case BIT_NOT_EXPR: | 2495 case BIT_NOT_EXPR: |
2535 return false; | 2879 return false; |
2536 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); | 2880 highxor = fold_binary (BIT_XOR_EXPR, type, highi, highj); |
2537 if (!tree_int_cst_equal (lowxor, highxor)) | 2881 if (!tree_int_cst_equal (lowxor, highxor)) |
2538 return false; | 2882 return false; |
2539 | 2883 |
2884 exp = rangei->exp; | |
2885 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); | |
2886 int prec = GET_MODE_PRECISION (mode); | |
2887 if (TYPE_PRECISION (type) < prec | |
2888 || (wi::to_wide (TYPE_MIN_VALUE (type)) | |
2889 != wi::min_value (prec, TYPE_SIGN (type))) | |
2890 || (wi::to_wide (TYPE_MAX_VALUE (type)) | |
2891 != wi::max_value (prec, TYPE_SIGN (type)))) | |
2892 { | |
2893 type = build_nonstandard_integer_type (prec, TYPE_UNSIGNED (type)); | |
2894 exp = fold_convert (type, exp); | |
2895 lowxor = fold_convert (type, lowxor); | |
2896 lowi = fold_convert (type, lowi); | |
2897 highi = fold_convert (type, highi); | |
2898 } | |
2540 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); | 2899 tem = fold_build1 (BIT_NOT_EXPR, type, lowxor); |
2541 exp = fold_build2 (BIT_AND_EXPR, type, rangei->exp, tem); | 2900 exp = fold_build2 (BIT_AND_EXPR, type, exp, tem); |
2542 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); | 2901 lowj = fold_build2 (BIT_AND_EXPR, type, lowi, tem); |
2543 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); | 2902 highj = fold_build2 (BIT_AND_EXPR, type, highi, tem); |
2544 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, | 2903 if (update_range_test (rangei, rangej, NULL, 1, opcode, ops, exp, |
2545 NULL, rangei->in_p, lowj, highj, | 2904 NULL, rangei->in_p, lowj, highj, |
2546 rangei->strict_overflow_p | 2905 rangei->strict_overflow_p |
2579 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) | 2938 if (tem1 == NULL_TREE || TREE_CODE (tem1) != INTEGER_CST) |
2580 return false; | 2939 return false; |
2581 if (!integer_pow2p (tem1)) | 2940 if (!integer_pow2p (tem1)) |
2582 return false; | 2941 return false; |
2583 | 2942 |
2584 type = unsigned_type_for (type); | 2943 scalar_int_mode mode = as_a <scalar_int_mode> (TYPE_MODE (type)); |
2944 int prec = GET_MODE_PRECISION (mode); | |
2945 if (TYPE_PRECISION (type) < prec | |
2946 || (wi::to_wide (TYPE_MIN_VALUE (type)) | |
2947 != wi::min_value (prec, TYPE_SIGN (type))) | |
2948 || (wi::to_wide (TYPE_MAX_VALUE (type)) | |
2949 != wi::max_value (prec, TYPE_SIGN (type)))) | |
2950 type = build_nonstandard_integer_type (prec, 1); | |
2951 else | |
2952 type = unsigned_type_for (type); | |
2585 tem1 = fold_convert (type, tem1); | 2953 tem1 = fold_convert (type, tem1); |
2586 tem2 = fold_convert (type, tem2); | 2954 tem2 = fold_convert (type, tem2); |
2587 lowi = fold_convert (type, lowi); | 2955 lowi = fold_convert (type, lowi); |
2588 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); | 2956 mask = fold_build1 (BIT_NOT_EXPR, type, tem1); |
2589 tem1 = fold_build2 (MINUS_EXPR, type, | 2957 tem1 = fold_build2 (MINUS_EXPR, type, |
3453 return any_changes; | 3821 return any_changes; |
3454 } | 3822 } |
3455 | 3823 |
3456 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize | 3824 /* A subroutine of optimize_vec_cond_expr to extract and canonicalize |
3457 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, | 3825 the operands of the VEC_COND_EXPR. Returns ERROR_MARK on failure, |
3458 otherwise the comparison code. */ | 3826 otherwise the comparison code. TYPE is a return value that is set |
3827 to type of comparison. */ | |
3459 | 3828 |
3460 static tree_code | 3829 static tree_code |
3461 ovce_extract_ops (tree var, gassign **rets, bool *reti) | 3830 ovce_extract_ops (tree var, gassign **rets, bool *reti, tree *type) |
3462 { | 3831 { |
3463 if (TREE_CODE (var) != SSA_NAME) | 3832 if (TREE_CODE (var) != SSA_NAME) |
3464 return ERROR_MARK; | 3833 return ERROR_MARK; |
3465 | 3834 |
3466 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); | 3835 gassign *stmt = dyn_cast <gassign *> (SSA_NAME_DEF_STMT (var)); |
3498 /* Success! */ | 3867 /* Success! */ |
3499 if (rets) | 3868 if (rets) |
3500 *rets = stmt; | 3869 *rets = stmt; |
3501 if (reti) | 3870 if (reti) |
3502 *reti = inv; | 3871 *reti = inv; |
3872 if (type) | |
3873 *type = TREE_TYPE (cond); | |
3503 return cmp; | 3874 return cmp; |
3504 } | 3875 } |
3505 | 3876 |
3506 /* Optimize the condition of VEC_COND_EXPRs which have been combined | 3877 /* Optimize the condition of VEC_COND_EXPRs which have been combined |
3507 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ | 3878 with OPCODE (either BIT_AND_EXPR or BIT_IOR_EXPR). */ |
3519 { | 3890 { |
3520 tree elt0 = (*ops)[i]->op; | 3891 tree elt0 = (*ops)[i]->op; |
3521 | 3892 |
3522 gassign *stmt0; | 3893 gassign *stmt0; |
3523 bool invert; | 3894 bool invert; |
3524 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert); | 3895 tree type; |
3896 tree_code cmp0 = ovce_extract_ops (elt0, &stmt0, &invert, &type); | |
3525 if (cmp0 == ERROR_MARK) | 3897 if (cmp0 == ERROR_MARK) |
3526 continue; | 3898 continue; |
3527 | 3899 |
3528 for (j = i + 1; j < length; ++j) | 3900 for (j = i + 1; j < length; ++j) |
3529 { | 3901 { |
3530 tree &elt1 = (*ops)[j]->op; | 3902 tree &elt1 = (*ops)[j]->op; |
3531 | 3903 |
3532 gassign *stmt1; | 3904 gassign *stmt1; |
3533 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL); | 3905 tree_code cmp1 = ovce_extract_ops (elt1, &stmt1, NULL, NULL); |
3534 if (cmp1 == ERROR_MARK) | 3906 if (cmp1 == ERROR_MARK) |
3535 continue; | 3907 continue; |
3536 | 3908 |
3537 tree cond0 = gimple_assign_rhs1 (stmt0); | 3909 tree cond0 = gimple_assign_rhs1 (stmt0); |
3538 tree x0 = TREE_OPERAND (cond0, 0); | 3910 tree x0 = TREE_OPERAND (cond0, 0); |
3542 tree x1 = TREE_OPERAND (cond1, 0); | 3914 tree x1 = TREE_OPERAND (cond1, 0); |
3543 tree y1 = TREE_OPERAND (cond1, 1); | 3915 tree y1 = TREE_OPERAND (cond1, 1); |
3544 | 3916 |
3545 tree comb; | 3917 tree comb; |
3546 if (opcode == BIT_AND_EXPR) | 3918 if (opcode == BIT_AND_EXPR) |
3547 comb = maybe_fold_and_comparisons (cmp0, x0, y0, cmp1, x1, y1); | 3919 comb = maybe_fold_and_comparisons (type, cmp0, x0, y0, cmp1, x1, |
3920 y1); | |
3548 else if (opcode == BIT_IOR_EXPR) | 3921 else if (opcode == BIT_IOR_EXPR) |
3549 comb = maybe_fold_or_comparisons (cmp0, x0, y0, cmp1, x1, y1); | 3922 comb = maybe_fold_or_comparisons (type, cmp0, x0, y0, cmp1, x1, |
3923 y1); | |
3550 else | 3924 else |
3551 gcc_unreachable (); | 3925 gcc_unreachable (); |
3552 if (comb == NULL) | 3926 if (comb == NULL) |
3553 continue; | 3927 continue; |
3554 | 3928 |
3837 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, | 4211 /* If VAR is set by CODE (BIT_{AND,IOR}_EXPR) which is reassociable, |
3838 return true and fill in *OPS recursively. */ | 4212 return true and fill in *OPS recursively. */ |
3839 | 4213 |
3840 static bool | 4214 static bool |
3841 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, | 4215 get_ops (tree var, enum tree_code code, vec<operand_entry *> *ops, |
3842 struct loop *loop) | 4216 class loop *loop) |
3843 { | 4217 { |
3844 gimple *stmt = SSA_NAME_DEF_STMT (var); | 4218 gimple *stmt = SSA_NAME_DEF_STMT (var); |
3845 tree rhs[2]; | 4219 tree rhs[2]; |
3846 int i; | 4220 int i; |
3847 | 4221 |
3872 they were changed during update_range_test and if yes, create new | 4246 they were changed during update_range_test and if yes, create new |
3873 stmts. */ | 4247 stmts. */ |
3874 | 4248 |
3875 static tree | 4249 static tree |
3876 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, | 4250 update_ops (tree var, enum tree_code code, vec<operand_entry *> ops, |
3877 unsigned int *pidx, struct loop *loop) | 4251 unsigned int *pidx, class loop *loop) |
3878 { | 4252 { |
3879 gimple *stmt = SSA_NAME_DEF_STMT (var); | 4253 gimple *stmt = SSA_NAME_DEF_STMT (var); |
3880 tree rhs[4]; | 4254 tree rhs[4]; |
3881 int i; | 4255 int i; |
3882 | 4256 |
4644 | 5018 |
4645 static int | 5019 static int |
4646 get_reassociation_width (int ops_num, enum tree_code opc, | 5020 get_reassociation_width (int ops_num, enum tree_code opc, |
4647 machine_mode mode) | 5021 machine_mode mode) |
4648 { | 5022 { |
4649 int param_width = PARAM_VALUE (PARAM_TREE_REASSOC_WIDTH); | 5023 int param_width = param_tree_reassoc_width; |
4650 int width; | 5024 int width; |
4651 int width_min; | 5025 int width_min; |
4652 int cycles_best; | 5026 int cycles_best; |
4653 | 5027 |
4654 if (param_width > 0) | 5028 if (param_width > 0) |
4785 update_stmt (stmts[i]); | 5159 update_stmt (stmts[i]); |
4786 } | 5160 } |
4787 else | 5161 else |
4788 { | 5162 { |
4789 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); | 5163 stmts[i] = build_and_add_sum (TREE_TYPE (last_rhs1), op1, op2, opcode); |
5164 gimple_set_visited (stmts[i], true); | |
4790 } | 5165 } |
4791 if (dump_file && (dump_flags & TDF_DETAILS)) | 5166 if (dump_file && (dump_flags & TDF_DETAILS)) |
4792 { | 5167 { |
4793 fprintf (dump_file, " into "); | 5168 fprintf (dump_file, " into "); |
4794 print_gimple_stmt (dump_file, stmts[i], 0); | 5169 print_gimple_stmt (dump_file, stmts[i], 0); |
4809 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); | 5184 gimple *binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); |
4810 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | 5185 gimple *binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); |
4811 gimple *oldbinrhs = binrhs; | 5186 gimple *oldbinrhs = binrhs; |
4812 enum tree_code rhscode = gimple_assign_rhs_code (stmt); | 5187 enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
4813 gimple *newbinrhs = NULL; | 5188 gimple *newbinrhs = NULL; |
4814 struct loop *loop = loop_containing_stmt (stmt); | 5189 class loop *loop = loop_containing_stmt (stmt); |
4815 tree lhs = gimple_assign_lhs (stmt); | 5190 tree lhs = gimple_assign_lhs (stmt); |
4816 | 5191 |
4817 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) | 5192 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) |
4818 && is_reassociable_op (binrhs, rhscode, loop)); | 5193 && is_reassociable_op (binrhs, rhscode, loop)); |
4819 | 5194 |
4943 { | 5318 { |
4944 tree lhs = gimple_assign_lhs (stmt); | 5319 tree lhs = gimple_assign_lhs (stmt); |
4945 tree binlhs = gimple_assign_rhs1 (stmt); | 5320 tree binlhs = gimple_assign_rhs1 (stmt); |
4946 tree binrhs = gimple_assign_rhs2 (stmt); | 5321 tree binrhs = gimple_assign_rhs2 (stmt); |
4947 gimple *immusestmt; | 5322 gimple *immusestmt; |
4948 struct loop *loop = loop_containing_stmt (stmt); | 5323 class loop *loop = loop_containing_stmt (stmt); |
4949 | 5324 |
4950 if (TREE_CODE (binlhs) == SSA_NAME | 5325 if (TREE_CODE (binlhs) == SSA_NAME |
4951 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) | 5326 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) |
4952 return true; | 5327 return true; |
4953 | 5328 |
5098 tree binrhs = gimple_assign_rhs2 (stmt); | 5473 tree binrhs = gimple_assign_rhs2 (stmt); |
5099 gimple *binlhsdef = NULL, *binrhsdef = NULL; | 5474 gimple *binlhsdef = NULL, *binrhsdef = NULL; |
5100 bool binlhsisreassoc = false; | 5475 bool binlhsisreassoc = false; |
5101 bool binrhsisreassoc = false; | 5476 bool binrhsisreassoc = false; |
5102 enum tree_code rhscode = gimple_assign_rhs_code (stmt); | 5477 enum tree_code rhscode = gimple_assign_rhs_code (stmt); |
5103 struct loop *loop = loop_containing_stmt (stmt); | 5478 class loop *loop = loop_containing_stmt (stmt); |
5104 | 5479 |
5105 if (set_visited) | 5480 if (set_visited) |
5106 gimple_set_visited (stmt, true); | 5481 gimple_set_visited (stmt, true); |
5107 | 5482 |
5108 if (TREE_CODE (binlhs) == SSA_NAME) | 5483 if (TREE_CODE (binlhs) == SSA_NAME) |
5854 && !stmt_could_throw_p (cfun, stmt)) | 6229 && !stmt_could_throw_p (cfun, stmt)) |
5855 { | 6230 { |
5856 tree lhs, rhs1, rhs2; | 6231 tree lhs, rhs1, rhs2; |
5857 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | 6232 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); |
5858 | 6233 |
5859 /* If this is not a gimple binary expression, there is | |
5860 nothing for us to do with it. */ | |
5861 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) | |
5862 continue; | |
5863 | |
5864 /* If this was part of an already processed statement, | 6234 /* If this was part of an already processed statement, |
5865 we don't need to touch it again. */ | 6235 we don't need to touch it again. */ |
5866 if (gimple_visited_p (stmt)) | 6236 if (gimple_visited_p (stmt)) |
5867 { | 6237 { |
5868 /* This statement might have become dead because of previous | 6238 /* This statement might have become dead because of previous |
5885 } | 6255 } |
5886 } | 6256 } |
5887 continue; | 6257 continue; |
5888 } | 6258 } |
5889 | 6259 |
6260 /* If this is not a gimple binary expression, there is | |
6261 nothing for us to do with it. */ | |
6262 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) | |
6263 continue; | |
6264 | |
5890 lhs = gimple_assign_lhs (stmt); | 6265 lhs = gimple_assign_lhs (stmt); |
5891 rhs1 = gimple_assign_rhs1 (stmt); | 6266 rhs1 = gimple_assign_rhs1 (stmt); |
5892 rhs2 = gimple_assign_rhs2 (stmt); | 6267 rhs2 = gimple_assign_rhs2 (stmt); |
5893 | 6268 |
5894 /* For non-bit or min/max operations we can't associate | 6269 /* For non-bit or min/max operations we can't associate |
5923 loop_containing_stmt (stmt))) | 6298 loop_containing_stmt (stmt))) |
5924 { | 6299 { |
5925 ops.qsort (sort_by_operand_rank); | 6300 ops.qsort (sort_by_operand_rank); |
5926 optimize_ops_list (rhs_code, &ops); | 6301 optimize_ops_list (rhs_code, &ops); |
5927 } | 6302 } |
5928 | 6303 if (undistribute_bitref_for_vector (rhs_code, &ops, |
6304 loop_containing_stmt (stmt))) | |
6305 { | |
6306 ops.qsort (sort_by_operand_rank); | |
6307 optimize_ops_list (rhs_code, &ops); | |
6308 } | |
5929 if (rhs_code == PLUS_EXPR | 6309 if (rhs_code == PLUS_EXPR |
5930 && transform_add_to_multiply (&ops)) | 6310 && transform_add_to_multiply (&ops)) |
5931 ops.qsort (sort_by_operand_rank); | 6311 ops.qsort (sort_by_operand_rank); |
5932 | 6312 |
5933 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) | 6313 if (rhs_code == BIT_IOR_EXPR || rhs_code == BIT_AND_EXPR) |
5985 } | 6365 } |
5986 else | 6366 else |
5987 { | 6367 { |
5988 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); | 6368 machine_mode mode = TYPE_MODE (TREE_TYPE (lhs)); |
5989 int ops_num = ops.length (); | 6369 int ops_num = ops.length (); |
5990 int width = get_reassociation_width (ops_num, rhs_code, mode); | 6370 int width; |
5991 | |
5992 if (dump_file && (dump_flags & TDF_DETAILS)) | |
5993 fprintf (dump_file, | |
5994 "Width = %d was chosen for reassociation\n", width); | |
5995 | |
5996 | 6371 |
5997 /* For binary bit operations, if there are at least 3 | 6372 /* For binary bit operations, if there are at least 3 |
5998 operands and the last last operand in OPS is a constant, | 6373 operands and the last last operand in OPS is a constant, |
5999 move it to the front. This helps ensure that we generate | 6374 move it to the front. This helps ensure that we generate |
6000 (X & Y) & C rather than (X & C) & Y. The former will | 6375 (X & Y) & C rather than (X & C) & Y. The former will |
6004 || rhs_code == BIT_IOR_EXPR | 6379 || rhs_code == BIT_IOR_EXPR |
6005 || rhs_code == BIT_XOR_EXPR) | 6380 || rhs_code == BIT_XOR_EXPR) |
6006 && TREE_CODE (ops.last ()->op) == INTEGER_CST) | 6381 && TREE_CODE (ops.last ()->op) == INTEGER_CST) |
6007 std::swap (*ops[0], *ops[ops_num - 1]); | 6382 std::swap (*ops[0], *ops[ops_num - 1]); |
6008 | 6383 |
6009 if (width > 1 | 6384 /* Only rewrite the expression tree to parallel in the |
6010 && ops.length () > 3) | 6385 last reassoc pass to avoid useless work back-and-forth |
6011 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), | 6386 with initial linearization. */ |
6012 width, ops); | 6387 if (!reassoc_insert_powi_p |
6388 && ops.length () > 3 | |
6389 && (width = get_reassociation_width (ops_num, rhs_code, | |
6390 mode)) > 1) | |
6391 { | |
6392 if (dump_file && (dump_flags & TDF_DETAILS)) | |
6393 fprintf (dump_file, | |
6394 "Width = %d was chosen for reassociation\n", | |
6395 width); | |
6396 rewrite_expr_tree_parallel (as_a <gassign *> (stmt), | |
6397 width, ops); | |
6398 } | |
6013 else | 6399 else |
6014 { | 6400 { |
6015 /* When there are three operands left, we want | 6401 /* When there are three operands left, we want |
6016 to make sure the ones that get the double | 6402 to make sure the ones that get the double |
6017 binary op are chosen wisely. */ | 6403 binary op are chosen wisely. */ |