Mercurial > hg > CbC > GCC_original
comparison gcc/tree-vect-generic.c @ 16:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
15:561a7518be6b | 16:04ced10e8804 |
---|---|
1 /* Lower vector operations to scalar operations. | 1 /* Lower vector operations to scalar operations. |
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 | 2 Copyright (C) 2004-2017 Free Software Foundation, Inc. |
3 Free Software Foundation, Inc. | |
4 | 3 |
5 This file is part of GCC. | 4 This file is part of GCC. |
6 | 5 |
7 GCC is free software; you can redistribute it and/or modify it | 6 GCC is free software; you can redistribute it and/or modify it |
8 under the terms of the GNU General Public License as published by the | 7 under the terms of the GNU General Public License as published by the |
19 <http://www.gnu.org/licenses/>. */ | 18 <http://www.gnu.org/licenses/>. */ |
20 | 19 |
21 #include "config.h" | 20 #include "config.h" |
22 #include "system.h" | 21 #include "system.h" |
23 #include "coretypes.h" | 22 #include "coretypes.h" |
23 #include "backend.h" | |
24 #include "rtl.h" | |
24 #include "tree.h" | 25 #include "tree.h" |
25 #include "tm.h" | 26 #include "gimple.h" |
27 #include "tree-pass.h" | |
28 #include "ssa.h" | |
29 #include "expmed.h" | |
30 #include "optabs-tree.h" | |
31 #include "diagnostic.h" | |
32 #include "fold-const.h" | |
33 #include "stor-layout.h" | |
26 #include "langhooks.h" | 34 #include "langhooks.h" |
27 #include "tree-flow.h" | 35 #include "tree-eh.h" |
28 #include "gimple.h" | 36 #include "gimple-iterator.h" |
29 #include "tree-iterator.h" | 37 #include "gimplify-me.h" |
30 #include "tree-pass.h" | 38 #include "gimplify.h" |
31 #include "flags.h" | 39 #include "tree-cfg.h" |
32 #include "ggc.h" | 40 |
33 | 41 |
34 /* Need to include rtl.h, expr.h, etc. for optabs. */ | 42 static void expand_vector_operations_1 (gimple_stmt_iterator *); |
35 #include "expr.h" | 43 |
36 #include "optabs.h" | |
37 | 44 |
38 /* Build a constant of type TYPE, made of VALUE's bits replicated | 45 /* Build a constant of type TYPE, made of VALUE's bits replicated |
39 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ | 46 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ |
40 static tree | 47 static tree |
41 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) | 48 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) |
42 { | 49 { |
43 int width = tree_low_cst (TYPE_SIZE (inner_type), 1); | 50 int width = tree_to_uhwi (TYPE_SIZE (inner_type)); |
44 int n = HOST_BITS_PER_WIDE_INT / width; | 51 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1) |
45 unsigned HOST_WIDE_INT low, high, mask; | 52 / HOST_BITS_PER_WIDE_INT; |
46 tree ret; | 53 unsigned HOST_WIDE_INT low, mask; |
47 | 54 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS]; |
48 gcc_assert (n); | 55 int i; |
56 | |
57 gcc_assert (n && n <= WIDE_INT_MAX_ELTS); | |
49 | 58 |
50 if (width == HOST_BITS_PER_WIDE_INT) | 59 if (width == HOST_BITS_PER_WIDE_INT) |
51 low = value; | 60 low = value; |
52 else | 61 else |
53 { | 62 { |
54 mask = ((HOST_WIDE_INT)1 << width) - 1; | 63 mask = ((HOST_WIDE_INT)1 << width) - 1; |
55 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); | 64 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); |
56 } | 65 } |
57 | 66 |
58 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) | 67 for (i = 0; i < n; i++) |
59 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; | 68 a[i] = low; |
60 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) | 69 |
61 high = 0; | 70 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT); |
62 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) | 71 return wide_int_to_tree |
63 high = low; | 72 (type, wide_int::from_array (a, n, TYPE_PRECISION (type))); |
64 else | |
65 gcc_unreachable (); | |
66 | |
67 ret = build_int_cst_wide (type, low, high); | |
68 return ret; | |
69 } | 73 } |
70 | 74 |
71 static GTY(()) tree vector_inner_type; | 75 static GTY(()) tree vector_inner_type; |
72 static GTY(()) tree vector_last_type; | 76 static GTY(()) tree vector_last_type; |
73 static GTY(()) int vector_last_nunits; | 77 static GTY(()) int vector_last_nunits; |
93 nunits)); | 97 nunits)); |
94 return vector_last_type; | 98 return vector_last_type; |
95 } | 99 } |
96 | 100 |
97 typedef tree (*elem_op_func) (gimple_stmt_iterator *, | 101 typedef tree (*elem_op_func) (gimple_stmt_iterator *, |
98 tree, tree, tree, tree, tree, enum tree_code); | 102 tree, tree, tree, tree, tree, enum tree_code, |
103 tree); | |
99 | 104 |
100 static inline tree | 105 static inline tree |
101 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, | 106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, |
102 tree t, tree bitsize, tree bitpos) | 107 tree t, tree bitsize, tree bitpos) |
103 { | 108 { |
109 if (TREE_CODE (t) == SSA_NAME) | |
110 { | |
111 gimple *def_stmt = SSA_NAME_DEF_STMT (t); | |
112 if (is_gimple_assign (def_stmt) | |
113 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST | |
114 || (bitpos | |
115 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))) | |
116 t = gimple_assign_rhs1 (def_stmt); | |
117 } | |
104 if (bitpos) | 118 if (bitpos) |
105 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); | 119 { |
120 if (TREE_CODE (type) == BOOLEAN_TYPE) | |
121 { | |
122 tree itype | |
123 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0); | |
124 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t, | |
125 bitsize, bitpos); | |
126 return gimplify_build2 (gsi, NE_EXPR, type, field, | |
127 build_zero_cst (itype)); | |
128 } | |
129 else | |
130 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); | |
131 } | |
106 else | 132 else |
107 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); | 133 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); |
108 } | 134 } |
109 | 135 |
110 static tree | 136 static tree |
111 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, | 137 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, |
112 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, | 138 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, |
113 enum tree_code code) | 139 enum tree_code code, tree type ATTRIBUTE_UNUSED) |
114 { | 140 { |
115 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | 141 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
116 return gimplify_build1 (gsi, code, inner_type, a); | 142 return gimplify_build1 (gsi, code, inner_type, a); |
117 } | 143 } |
118 | 144 |
119 static tree | 145 static tree |
120 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | 146 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, |
121 tree bitpos, tree bitsize, enum tree_code code) | 147 tree bitpos, tree bitsize, enum tree_code code, |
122 { | 148 tree type ATTRIBUTE_UNUSED) |
149 { | |
150 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) | |
151 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | |
152 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) | |
153 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
154 return gimplify_build2 (gsi, code, inner_type, a, b); | |
155 } | |
156 | |
157 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0 | |
158 | |
159 INNER_TYPE is the type of A and B elements | |
160 | |
161 returned expression is of signed integer type with the | |
162 size equal to the size of INNER_TYPE. */ | |
163 static tree | |
164 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | |
165 tree bitpos, tree bitsize, enum tree_code code, tree type) | |
166 { | |
167 tree stype = TREE_TYPE (type); | |
168 tree cst_false = build_zero_cst (stype); | |
169 tree cst_true = build_all_ones_cst (stype); | |
170 tree cmp; | |
171 | |
123 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | 172 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); |
124 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | 173 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); |
125 return gimplify_build2 (gsi, code, inner_type, a, b); | 174 |
175 cmp = build2 (code, boolean_type_node, a, b); | |
176 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false); | |
126 } | 177 } |
127 | 178 |
128 /* Expand vector addition to scalars. This does bit twiddling | 179 /* Expand vector addition to scalars. This does bit twiddling |
129 in order to increase parallelism: | 180 in order to increase parallelism: |
130 | 181 |
139 This optimization should be done only if 4 vector items or more | 190 This optimization should be done only if 4 vector items or more |
140 fit into a word. */ | 191 fit into a word. */ |
141 static tree | 192 static tree |
142 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, | 193 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, |
143 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, | 194 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, |
144 enum tree_code code) | 195 enum tree_code code, tree type ATTRIBUTE_UNUSED) |
145 { | 196 { |
146 tree inner_type = TREE_TYPE (TREE_TYPE (a)); | 197 tree inner_type = TREE_TYPE (TREE_TYPE (a)); |
147 unsigned HOST_WIDE_INT max; | 198 unsigned HOST_WIDE_INT max; |
148 tree low_bits, high_bits, a_low, b_low, result_low, signs; | 199 tree low_bits, high_bits, a_low, b_low, result_low, signs; |
149 | 200 |
171 | 222 |
172 static tree | 223 static tree |
173 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, | 224 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, |
174 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, | 225 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, |
175 tree bitsize ATTRIBUTE_UNUSED, | 226 tree bitsize ATTRIBUTE_UNUSED, |
176 enum tree_code code ATTRIBUTE_UNUSED) | 227 enum tree_code code ATTRIBUTE_UNUSED, |
228 tree type ATTRIBUTE_UNUSED) | |
177 { | 229 { |
178 tree inner_type = TREE_TYPE (TREE_TYPE (b)); | 230 tree inner_type = TREE_TYPE (TREE_TYPE (b)); |
179 HOST_WIDE_INT max; | 231 HOST_WIDE_INT max; |
180 tree low_bits, high_bits, b_low, result_low, signs; | 232 tree low_bits, high_bits, b_low, result_low, signs; |
181 | 233 |
197 static tree | 249 static tree |
198 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, | 250 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, |
199 tree type, tree inner_type, | 251 tree type, tree inner_type, |
200 tree a, tree b, enum tree_code code) | 252 tree a, tree b, enum tree_code code) |
201 { | 253 { |
202 VEC(constructor_elt,gc) *v; | 254 vec<constructor_elt, va_gc> *v; |
203 tree part_width = TYPE_SIZE (inner_type); | 255 tree part_width = TYPE_SIZE (inner_type); |
204 tree index = bitsize_int (0); | 256 tree index = bitsize_int (0); |
205 int nunits = TYPE_VECTOR_SUBPARTS (type); | 257 int nunits = TYPE_VECTOR_SUBPARTS (type); |
206 int delta = tree_low_cst (part_width, 1) | 258 int delta = tree_to_uhwi (part_width) |
207 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); | 259 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type))); |
208 int i; | 260 int i; |
209 | 261 location_t loc = gimple_location (gsi_stmt (*gsi)); |
210 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta); | 262 |
263 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type)) | |
264 warning_at (loc, OPT_Wvector_operation_performance, | |
265 "vector operation will be expanded piecewise"); | |
266 else | |
267 warning_at (loc, OPT_Wvector_operation_performance, | |
268 "vector operation will be expanded in parallel"); | |
269 | |
270 vec_alloc (v, (nunits + delta - 1) / delta); | |
211 for (i = 0; i < nunits; | 271 for (i = 0; i < nunits; |
212 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) | 272 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width)) |
213 { | 273 { |
214 tree result = f (gsi, inner_type, a, b, index, part_width, code); | 274 tree result = f (gsi, inner_type, a, b, index, part_width, code, type); |
215 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL); | 275 constructor_elt ce = {NULL_TREE, result}; |
216 ce->index = NULL_TREE; | 276 v->quick_push (ce); |
217 ce->value = result; | |
218 } | 277 } |
219 | 278 |
220 return build_constructor (type, v); | 279 return build_constructor (type, v); |
221 } | 280 } |
222 | 281 |
227 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, | 286 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, |
228 tree a, tree b, | 287 tree a, tree b, |
229 enum tree_code code) | 288 enum tree_code code) |
230 { | 289 { |
231 tree result, compute_type; | 290 tree result, compute_type; |
232 enum machine_mode mode; | 291 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD; |
233 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; | 292 location_t loc = gimple_location (gsi_stmt (*gsi)); |
234 | 293 |
235 /* We have three strategies. If the type is already correct, just do | 294 /* We have three strategies. If the type is already correct, just do |
236 the operation an element at a time. Else, if the vector is wider than | 295 the operation an element at a time. Else, if the vector is wider than |
237 one word, do it a word at a time; finally, if the vector is smaller | 296 one word, do it a word at a time; finally, if the vector is smaller |
238 than one word, do it as a scalar. */ | 297 than one word, do it as a scalar. */ |
250 GSI_SAME_STMT); | 309 GSI_SAME_STMT); |
251 } | 310 } |
252 else | 311 else |
253 { | 312 { |
254 /* Use a single scalar operation with a mode no wider than word_mode. */ | 313 /* Use a single scalar operation with a mode no wider than word_mode. */ |
255 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); | 314 scalar_int_mode mode |
315 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require (); | |
256 compute_type = lang_hooks.types.type_for_mode (mode, 1); | 316 compute_type = lang_hooks.types.type_for_mode (mode, 1); |
257 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); | 317 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type); |
318 warning_at (loc, OPT_Wvector_operation_performance, | |
319 "vector operation will be expanded with a " | |
320 "single scalar operation"); | |
258 } | 321 } |
259 | 322 |
260 return result; | 323 return result; |
261 } | 324 } |
262 | 325 |
269 expand_vector_addition (gimple_stmt_iterator *gsi, | 332 expand_vector_addition (gimple_stmt_iterator *gsi, |
270 elem_op_func f, elem_op_func f_parallel, | 333 elem_op_func f, elem_op_func f_parallel, |
271 tree type, tree a, tree b, enum tree_code code) | 334 tree type, tree a, tree b, enum tree_code code) |
272 { | 335 { |
273 int parts_per_word = UNITS_PER_WORD | 336 int parts_per_word = UNITS_PER_WORD |
274 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); | 337 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type))); |
275 | 338 |
276 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) | 339 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) |
277 && parts_per_word >= 4 | 340 && parts_per_word >= 4 |
278 && TYPE_VECTOR_SUBPARTS (type) >= 4) | 341 && TYPE_VECTOR_SUBPARTS (type) >= 4) |
279 return expand_vector_parallel (gsi, f_parallel, | 342 return expand_vector_parallel (gsi, f_parallel, |
282 return expand_vector_piecewise (gsi, f, | 345 return expand_vector_piecewise (gsi, f, |
283 type, TREE_TYPE (type), | 346 type, TREE_TYPE (type), |
284 a, b, code); | 347 a, b, code); |
285 } | 348 } |
286 | 349 |
287 /* Check if vector VEC consists of all the equal elements and | 350 /* Try to expand vector comparison expression OP0 CODE OP1 by |
288 that the number of elements corresponds to the type of VEC. | 351 querying optab if the following expression: |
289 The function returns first element of the vector | 352 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}> |
290 or NULL_TREE if the vector is not uniform. */ | 353 can be expanded. */ |
291 static tree | 354 static tree |
292 uniform_vector_p (tree vec) | 355 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0, |
293 { | 356 tree op1, enum tree_code code) |
294 tree first, t, els; | 357 { |
295 unsigned i; | 358 tree t; |
296 | 359 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code) |
297 if (vec == NULL_TREE) | 360 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code)) |
361 t = expand_vector_piecewise (gsi, do_compare, type, | |
362 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code); | |
363 else | |
364 t = NULL_TREE; | |
365 | |
366 return t; | |
367 } | |
368 | |
369 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type | |
370 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding | |
371 the result if successful, otherwise return NULL_TREE. */ | |
372 static tree | |
373 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts) | |
374 { | |
375 optab op; | |
376 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type); | |
377 bool scalar_shift = true; | |
378 | |
379 for (i = 1; i < nunits; i++) | |
380 { | |
381 if (shiftcnts[i] != shiftcnts[0]) | |
382 scalar_shift = false; | |
383 } | |
384 | |
385 if (scalar_shift && shiftcnts[0] == 0) | |
386 return op0; | |
387 | |
388 if (scalar_shift) | |
389 { | |
390 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar); | |
391 if (op != unknown_optab | |
392 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) | |
393 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, | |
394 build_int_cst (NULL_TREE, shiftcnts[0])); | |
395 } | |
396 | |
397 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); | |
398 if (op != unknown_optab | |
399 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) | |
400 { | |
401 auto_vec<tree, 32> vec (nunits); | |
402 for (i = 0; i < nunits; i++) | |
403 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i])); | |
404 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0, | |
405 build_vector (type, vec)); | |
406 } | |
407 | |
408 return NULL_TREE; | |
409 } | |
410 | |
411 /* Try to expand integer vector division by constant using | |
412 widening multiply, shifts and additions. */ | |
413 static tree | |
414 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0, | |
415 tree op1, enum tree_code code) | |
416 { | |
417 bool use_pow2 = true; | |
418 bool has_vector_shift = true; | |
419 int mode = -1, this_mode; | |
420 int pre_shift = -1, post_shift; | |
421 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type); | |
422 int *shifts = XALLOCAVEC (int, nunits * 4); | |
423 int *pre_shifts = shifts + nunits; | |
424 int *post_shifts = pre_shifts + nunits; | |
425 int *shift_temps = post_shifts + nunits; | |
426 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits); | |
427 int prec = TYPE_PRECISION (TREE_TYPE (type)); | |
428 int dummy_int; | |
429 unsigned int i; | |
430 signop sign_p = TYPE_SIGN (TREE_TYPE (type)); | |
431 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type))); | |
432 tree cur_op, mulcst, tem; | |
433 optab op; | |
434 | |
435 if (prec > HOST_BITS_PER_WIDE_INT) | |
298 return NULL_TREE; | 436 return NULL_TREE; |
299 | 437 |
300 if (TREE_CODE (vec) == VECTOR_CST) | 438 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector); |
301 { | 439 if (op == unknown_optab |
302 els = TREE_VECTOR_CST_ELTS (vec); | 440 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) |
303 first = TREE_VALUE (els); | 441 has_vector_shift = false; |
304 els = TREE_CHAIN (els); | 442 |
305 | 443 /* Analysis phase. Determine if all op1 elements are either power |
306 for (t = els; t; t = TREE_CHAIN (t)) | 444 of two and it is possible to expand it using shifts (or for remainder |
307 if (!operand_equal_p (first, TREE_VALUE (t), 0)) | 445 using masking). Additionally compute the multiplicative constants |
308 return NULL_TREE; | 446 and pre and post shifts if the division is to be expanded using |
309 | 447 widening or high part multiplication plus shifts. */ |
310 return first; | 448 for (i = 0; i < nunits; i++) |
311 } | 449 { |
312 | 450 tree cst = VECTOR_CST_ELT (op1, i); |
313 else if (TREE_CODE (vec) == CONSTRUCTOR) | 451 unsigned HOST_WIDE_INT ml; |
314 { | 452 |
315 first = error_mark_node; | 453 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst)) |
316 | 454 return NULL_TREE; |
317 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t) | 455 pre_shifts[i] = 0; |
318 { | 456 post_shifts[i] = 0; |
319 if (i == 0) | 457 mulc[i] = 0; |
320 { | 458 if (use_pow2 |
321 first = t; | 459 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1)) |
322 continue; | 460 use_pow2 = false; |
323 } | 461 if (use_pow2) |
324 if (!operand_equal_p (first, t, 0)) | 462 { |
463 shifts[i] = tree_log2 (cst); | |
464 if (shifts[i] != shifts[0] | |
465 && code == TRUNC_DIV_EXPR | |
466 && !has_vector_shift) | |
467 use_pow2 = false; | |
468 } | |
469 if (mode == -2) | |
470 continue; | |
471 if (sign_p == UNSIGNED) | |
472 { | |
473 unsigned HOST_WIDE_INT mh; | |
474 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask; | |
475 | |
476 if (d >= (HOST_WIDE_INT_1U << (prec - 1))) | |
477 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */ | |
325 return NULL_TREE; | 478 return NULL_TREE; |
326 } | 479 |
327 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec))) | 480 if (d <= 1) |
481 { | |
482 mode = -2; | |
483 continue; | |
484 } | |
485 | |
486 /* Find a suitable multiplier and right shift count | |
487 instead of multiplying with D. */ | |
488 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int); | |
489 | |
490 /* If the suggested multiplier is more than SIZE bits, we can | |
491 do better for even divisors, using an initial right shift. */ | |
492 if ((mh != 0 && (d & 1) == 0) | |
493 || (!has_vector_shift && pre_shift != -1)) | |
494 { | |
495 if (has_vector_shift) | |
496 pre_shift = ctz_or_zero (d); | |
497 else if (pre_shift == -1) | |
498 { | |
499 unsigned int j; | |
500 for (j = 0; j < nunits; j++) | |
501 { | |
502 tree cst2 = VECTOR_CST_ELT (op1, j); | |
503 unsigned HOST_WIDE_INT d2; | |
504 int this_pre_shift; | |
505 | |
506 if (!tree_fits_uhwi_p (cst2)) | |
507 return NULL_TREE; | |
508 d2 = tree_to_uhwi (cst2) & mask; | |
509 if (d2 == 0) | |
510 return NULL_TREE; | |
511 this_pre_shift = floor_log2 (d2 & -d2); | |
512 if (pre_shift == -1 || this_pre_shift < pre_shift) | |
513 pre_shift = this_pre_shift; | |
514 } | |
515 if (i != 0 && pre_shift != 0) | |
516 { | |
517 /* Restart. */ | |
518 i = -1U; | |
519 mode = -1; | |
520 continue; | |
521 } | |
522 } | |
523 if (pre_shift != 0) | |
524 { | |
525 if ((d >> pre_shift) <= 1) | |
526 { | |
527 mode = -2; | |
528 continue; | |
529 } | |
530 mh = choose_multiplier (d >> pre_shift, prec, | |
531 prec - pre_shift, | |
532 &ml, &post_shift, &dummy_int); | |
533 gcc_assert (!mh); | |
534 pre_shifts[i] = pre_shift; | |
535 } | |
536 } | |
537 if (!mh) | |
538 this_mode = 0; | |
539 else | |
540 this_mode = 1; | |
541 } | |
542 else | |
543 { | |
544 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst); | |
545 unsigned HOST_WIDE_INT abs_d; | |
546 | |
547 if (d == -1) | |
548 return NULL_TREE; | |
549 | |
550 /* Since d might be INT_MIN, we have to cast to | |
551 unsigned HOST_WIDE_INT before negating to avoid | |
552 undefined signed overflow. */ | |
553 abs_d = (d >= 0 | |
554 ? (unsigned HOST_WIDE_INT) d | |
555 : - (unsigned HOST_WIDE_INT) d); | |
556 | |
557 /* n rem d = n rem -d */ | |
558 if (code == TRUNC_MOD_EXPR && d < 0) | |
559 d = abs_d; | |
560 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1)) | |
561 { | |
562 /* This case is not handled correctly below. */ | |
563 mode = -2; | |
564 continue; | |
565 } | |
566 if (abs_d <= 1) | |
567 { | |
568 mode = -2; | |
569 continue; | |
570 } | |
571 | |
572 choose_multiplier (abs_d, prec, prec - 1, &ml, | |
573 &post_shift, &dummy_int); | |
574 if (ml >= HOST_WIDE_INT_1U << (prec - 1)) | |
575 { | |
576 this_mode = 4 + (d < 0); | |
577 ml |= HOST_WIDE_INT_M1U << (prec - 1); | |
578 } | |
579 else | |
580 this_mode = 2 + (d < 0); | |
581 } | |
582 mulc[i] = ml; | |
583 post_shifts[i] = post_shift; | |
584 if ((i && !has_vector_shift && post_shifts[0] != post_shift) | |
585 || post_shift >= prec | |
586 || pre_shifts[i] >= prec) | |
587 this_mode = -2; | |
588 | |
589 if (i == 0) | |
590 mode = this_mode; | |
591 else if (mode != this_mode) | |
592 mode = -2; | |
593 } | |
594 | |
595 if (use_pow2) | |
596 { | |
597 tree addend = NULL_TREE; | |
598 if (sign_p == SIGNED) | |
599 { | |
600 tree uns_type; | |
601 | |
602 /* Both division and remainder sequences need | |
603 op0 < 0 ? mask : 0 computed. It can be either computed as | |
604 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i])) | |
605 if none of the shifts is 0, or as the conditional. */ | |
606 for (i = 0; i < nunits; i++) | |
607 if (shifts[i] == 0) | |
608 break; | |
609 uns_type | |
610 = build_vector_type (build_nonstandard_integer_type (prec, 1), | |
611 nunits); | |
612 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type)) | |
613 { | |
614 for (i = 0; i < nunits; i++) | |
615 shift_temps[i] = prec - 1; | |
616 cur_op = add_rshift (gsi, type, op0, shift_temps); | |
617 if (cur_op != NULL_TREE) | |
618 { | |
619 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
620 uns_type, cur_op); | |
621 for (i = 0; i < nunits; i++) | |
622 shift_temps[i] = prec - shifts[i]; | |
623 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps); | |
624 if (cur_op != NULL_TREE) | |
625 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, | |
626 type, cur_op); | |
627 } | |
628 } | |
629 if (addend == NULL_TREE | |
630 && expand_vec_cond_expr_p (type, type, LT_EXPR)) | |
631 { | |
632 tree zero, cst, cond, mask_type; | |
633 gimple *stmt; | |
634 | |
635 mask_type = build_same_sized_truth_vector_type (type); | |
636 zero = build_zero_cst (type); | |
637 cond = build2 (LT_EXPR, mask_type, op0, zero); | |
638 auto_vec<tree, 32> vec (nunits); | |
639 for (i = 0; i < nunits; i++) | |
640 vec.quick_push (build_int_cst (TREE_TYPE (type), | |
641 (HOST_WIDE_INT_1U | |
642 << shifts[i]) - 1)); | |
643 cst = build_vector (type, vec); | |
644 addend = make_ssa_name (type); | |
645 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond, | |
646 cst, zero); | |
647 gsi_insert_before (gsi, stmt, GSI_SAME_STMT); | |
648 } | |
649 } | |
650 if (code == TRUNC_DIV_EXPR) | |
651 { | |
652 if (sign_p == UNSIGNED) | |
653 { | |
654 /* q = op0 >> shift; */ | |
655 cur_op = add_rshift (gsi, type, op0, shifts); | |
656 if (cur_op != NULL_TREE) | |
657 return cur_op; | |
658 } | |
659 else if (addend != NULL_TREE) | |
660 { | |
661 /* t1 = op0 + addend; | |
662 q = t1 >> shift; */ | |
663 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
664 if (op != unknown_optab | |
665 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) | |
666 { | |
667 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend); | |
668 cur_op = add_rshift (gsi, type, cur_op, shifts); | |
669 if (cur_op != NULL_TREE) | |
670 return cur_op; | |
671 } | |
672 } | |
673 } | |
674 else | |
675 { | |
676 tree mask; | |
677 auto_vec<tree, 32> vec (nunits); | |
678 for (i = 0; i < nunits; i++) | |
679 vec.quick_push (build_int_cst (TREE_TYPE (type), | |
680 (HOST_WIDE_INT_1U | |
681 << shifts[i]) - 1)); | |
682 mask = build_vector (type, vec); | |
683 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default); | |
684 if (op != unknown_optab | |
685 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing) | |
686 { | |
687 if (sign_p == UNSIGNED) | |
688 /* r = op0 & mask; */ | |
689 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask); | |
690 else if (addend != NULL_TREE) | |
691 { | |
692 /* t1 = op0 + addend; | |
693 t2 = t1 & mask; | |
694 r = t2 - addend; */ | |
695 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
696 if (op != unknown_optab | |
697 && optab_handler (op, TYPE_MODE (type)) | |
698 != CODE_FOR_nothing) | |
699 { | |
700 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, | |
701 addend); | |
702 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type, | |
703 cur_op, mask); | |
704 op = optab_for_tree_code (MINUS_EXPR, type, | |
705 optab_default); | |
706 if (op != unknown_optab | |
707 && optab_handler (op, TYPE_MODE (type)) | |
708 != CODE_FOR_nothing) | |
709 return gimplify_build2 (gsi, MINUS_EXPR, type, | |
710 cur_op, addend); | |
711 } | |
712 } | |
713 } | |
714 } | |
715 } | |
716 | |
717 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN) | |
718 return NULL_TREE; | |
719 | |
720 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type))) | |
721 return NULL_TREE; | |
722 | |
723 cur_op = op0; | |
724 | |
725 switch (mode) | |
726 { | |
727 case 0: | |
728 gcc_assert (sign_p == UNSIGNED); | |
729 /* t1 = oprnd0 >> pre_shift; | |
730 t2 = t1 h* ml; | |
731 q = t2 >> post_shift; */ | |
732 cur_op = add_rshift (gsi, type, cur_op, pre_shifts); | |
733 if (cur_op == NULL_TREE) | |
328 return NULL_TREE; | 734 return NULL_TREE; |
329 | 735 break; |
330 return first; | 736 case 1: |
331 } | 737 gcc_assert (sign_p == UNSIGNED); |
332 | 738 for (i = 0; i < nunits; i++) |
333 return NULL_TREE; | 739 { |
740 shift_temps[i] = 1; | |
741 post_shifts[i]--; | |
742 } | |
743 break; | |
744 case 2: | |
745 case 3: | |
746 case 4: | |
747 case 5: | |
748 gcc_assert (sign_p == SIGNED); | |
749 for (i = 0; i < nunits; i++) | |
750 shift_temps[i] = prec - 1; | |
751 break; | |
752 default: | |
753 return NULL_TREE; | |
754 } | |
755 | |
756 auto_vec<tree, 32> vec (nunits); | |
757 for (i = 0; i < nunits; i++) | |
758 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i])); | |
759 mulcst = build_vector (type, vec); | |
760 | |
761 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst); | |
762 | |
763 switch (mode) | |
764 { | |
765 case 0: | |
766 /* t1 = oprnd0 >> pre_shift; | |
767 t2 = t1 h* ml; | |
768 q = t2 >> post_shift; */ | |
769 cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
770 break; | |
771 case 1: | |
772 /* t1 = oprnd0 h* ml; | |
773 t2 = oprnd0 - t1; | |
774 t3 = t2 >> 1; | |
775 t4 = t1 + t3; | |
776 q = t4 >> (post_shift - 1); */ | |
777 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
778 if (op == unknown_optab | |
779 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
780 return NULL_TREE; | |
781 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op); | |
782 tem = add_rshift (gsi, type, tem, shift_temps); | |
783 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
784 if (op == unknown_optab | |
785 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
786 return NULL_TREE; | |
787 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem); | |
788 cur_op = add_rshift (gsi, type, tem, post_shifts); | |
789 if (cur_op == NULL_TREE) | |
790 return NULL_TREE; | |
791 break; | |
792 case 2: | |
793 case 3: | |
794 case 4: | |
795 case 5: | |
796 /* t1 = oprnd0 h* ml; | |
797 t2 = t1; [ iff (mode & 2) != 0 ] | |
798 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ] | |
799 t3 = t2 >> post_shift; | |
800 t4 = oprnd0 >> (prec - 1); | |
801 q = t3 - t4; [ iff (mode & 1) == 0 ] | |
802 q = t4 - t3; [ iff (mode & 1) != 0 ] */ | |
803 if ((mode & 2) == 0) | |
804 { | |
805 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
806 if (op == unknown_optab | |
807 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
808 return NULL_TREE; | |
809 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0); | |
810 } | |
811 cur_op = add_rshift (gsi, type, cur_op, post_shifts); | |
812 if (cur_op == NULL_TREE) | |
813 return NULL_TREE; | |
814 tem = add_rshift (gsi, type, op0, shift_temps); | |
815 if (tem == NULL_TREE) | |
816 return NULL_TREE; | |
817 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
818 if (op == unknown_optab | |
819 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
820 return NULL_TREE; | |
821 if ((mode & 1) == 0) | |
822 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem); | |
823 else | |
824 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op); | |
825 break; | |
826 default: | |
827 gcc_unreachable (); | |
828 } | |
829 | |
830 if (code == TRUNC_DIV_EXPR) | |
831 return cur_op; | |
832 | |
833 /* We divided. Now finish by: | |
834 t1 = q * oprnd1; | |
835 r = oprnd0 - t1; */ | |
836 op = optab_for_tree_code (MULT_EXPR, type, optab_default); | |
837 if (op == unknown_optab | |
838 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
839 return NULL_TREE; | |
840 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1); | |
841 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | |
842 if (op == unknown_optab | |
843 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
844 return NULL_TREE; | |
845 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem); | |
846 } | |
847 | |
848 /* Expand a vector condition to scalars, by using many conditions | |
849 on the vector's elements. */ | |
850 static void | |
851 expand_vector_condition (gimple_stmt_iterator *gsi) | |
852 { | |
853 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); | |
854 tree type = gimple_expr_type (stmt); | |
855 tree a = gimple_assign_rhs1 (stmt); | |
856 tree a1 = a; | |
857 tree a2 = NULL_TREE; | |
858 bool a_is_comparison = false; | |
859 tree b = gimple_assign_rhs2 (stmt); | |
860 tree c = gimple_assign_rhs3 (stmt); | |
861 vec<constructor_elt, va_gc> *v; | |
862 tree constr; | |
863 tree inner_type = TREE_TYPE (type); | |
864 tree cond_type = TREE_TYPE (TREE_TYPE (a)); | |
865 tree comp_inner_type = cond_type; | |
866 tree width = TYPE_SIZE (inner_type); | |
867 tree index = bitsize_int (0); | |
868 tree comp_width = width; | |
869 tree comp_index = index; | |
870 int nunits = TYPE_VECTOR_SUBPARTS (type); | |
871 int i; | |
872 location_t loc = gimple_location (gsi_stmt (*gsi)); | |
873 | |
874 if (!is_gimple_val (a)) | |
875 { | |
876 gcc_assert (COMPARISON_CLASS_P (a)); | |
877 a_is_comparison = true; | |
878 a1 = TREE_OPERAND (a, 0); | |
879 a2 = TREE_OPERAND (a, 1); | |
880 comp_inner_type = TREE_TYPE (TREE_TYPE (a1)); | |
881 comp_width = TYPE_SIZE (comp_inner_type); | |
882 } | |
883 | |
884 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a))) | |
885 return; | |
886 | |
887 /* Handle vector boolean types with bitmasks. If there is a comparison | |
888 and we can expand the comparison into the vector boolean bitmask, | |
889 or otherwise if it is compatible with type, we can transform | |
890 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5; | |
891 into | |
892 tmp_6 = x_2 < y_3; | |
893 tmp_7 = tmp_6 & vbfld_4; | |
894 tmp_8 = ~tmp_6; | |
895 tmp_9 = tmp_8 & vbfld_5; | |
896 vbfld_1 = tmp_7 | tmp_9; | |
897 Similarly for vbfld_10 instead of x_2 < y_3. */ | |
898 if (VECTOR_BOOLEAN_TYPE_P (type) | |
899 && SCALAR_INT_MODE_P (TYPE_MODE (type)) | |
900 && (GET_MODE_BITSIZE (TYPE_MODE (type)) | |
901 < (TYPE_VECTOR_SUBPARTS (type) | |
902 * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type))))) | |
903 && (a_is_comparison | |
904 ? useless_type_conversion_p (type, TREE_TYPE (a)) | |
905 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a)))) | |
906 { | |
907 if (a_is_comparison) | |
908 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2); | |
909 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b); | |
910 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a); | |
911 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c); | |
912 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2); | |
913 gimple_assign_set_rhs_from_tree (gsi, a); | |
914 update_stmt (gsi_stmt (*gsi)); | |
915 return; | |
916 } | |
917 | |
918 /* TODO: try and find a smaller vector type. */ | |
919 | |
920 warning_at (loc, OPT_Wvector_operation_performance, | |
921 "vector condition will be expanded piecewise"); | |
922 | |
923 vec_alloc (v, nunits); | |
924 for (i = 0; i < nunits; i++) | |
925 { | |
926 tree aa, result; | |
927 tree bb = tree_vec_extract (gsi, inner_type, b, width, index); | |
928 tree cc = tree_vec_extract (gsi, inner_type, c, width, index); | |
929 if (a_is_comparison) | |
930 { | |
931 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1, | |
932 comp_width, comp_index); | |
933 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2, | |
934 comp_width, comp_index); | |
935 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2); | |
936 } | |
937 else | |
938 aa = tree_vec_extract (gsi, cond_type, a, width, index); | |
939 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc); | |
940 constructor_elt ce = {NULL_TREE, result}; | |
941 v->quick_push (ce); | |
942 index = int_const_binop (PLUS_EXPR, index, width); | |
943 if (width == comp_width) | |
944 comp_index = index; | |
945 else | |
946 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width); | |
947 } | |
948 | |
949 constr = build_constructor (type, v); | |
950 gimple_assign_set_rhs_from_tree (gsi, constr); | |
951 update_stmt (gsi_stmt (*gsi)); | |
334 } | 952 } |
335 | 953 |
336 static tree | 954 static tree |
337 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, | 955 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, |
338 gimple assign, enum tree_code code) | 956 gassign *assign, enum tree_code code) |
339 { | 957 { |
340 enum machine_mode compute_mode = TYPE_MODE (compute_type); | 958 machine_mode compute_mode = TYPE_MODE (compute_type); |
341 | 959 |
342 /* If the compute mode is not a vector mode (hence we are not decomposing | 960 /* If the compute mode is not a vector mode (hence we are not decomposing |
343 a BLKmode vector to smaller, hardware-supported vectors), we may want | 961 a BLKmode vector to smaller, hardware-supported vectors), we may want |
344 to expand the operations in parallel. */ | 962 to expand the operations in parallel. */ |
345 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | 963 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT |
350 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | 968 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) |
351 switch (code) | 969 switch (code) |
352 { | 970 { |
353 case PLUS_EXPR: | 971 case PLUS_EXPR: |
354 case MINUS_EXPR: | 972 case MINUS_EXPR: |
355 if (!TYPE_OVERFLOW_TRAPS (type)) | 973 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) |
356 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, | 974 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, |
357 gimple_assign_rhs1 (assign), | 975 gimple_assign_rhs1 (assign), |
358 gimple_assign_rhs2 (assign), code); | 976 gimple_assign_rhs2 (assign), code); |
359 break; | 977 break; |
360 | 978 |
361 case NEGATE_EXPR: | 979 case NEGATE_EXPR: |
362 if (!TYPE_OVERFLOW_TRAPS (type)) | 980 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type)) |
363 return expand_vector_addition (gsi, do_unop, do_negate, type, | 981 return expand_vector_addition (gsi, do_unop, do_negate, type, |
364 gimple_assign_rhs1 (assign), | 982 gimple_assign_rhs1 (assign), |
365 NULL_TREE, code); | 983 NULL_TREE, code); |
366 break; | 984 break; |
367 | 985 |
373 gimple_assign_rhs2 (assign), code); | 991 gimple_assign_rhs2 (assign), code); |
374 | 992 |
375 case BIT_NOT_EXPR: | 993 case BIT_NOT_EXPR: |
376 return expand_vector_parallel (gsi, do_unop, type, | 994 return expand_vector_parallel (gsi, do_unop, type, |
377 gimple_assign_rhs1 (assign), | 995 gimple_assign_rhs1 (assign), |
378 NULL_TREE, code); | 996 NULL_TREE, code); |
997 case EQ_EXPR: | |
998 case NE_EXPR: | |
999 case GT_EXPR: | |
1000 case LT_EXPR: | |
1001 case GE_EXPR: | |
1002 case LE_EXPR: | |
1003 case UNEQ_EXPR: | |
1004 case UNGT_EXPR: | |
1005 case UNLT_EXPR: | |
1006 case UNGE_EXPR: | |
1007 case UNLE_EXPR: | |
1008 case LTGT_EXPR: | |
1009 case ORDERED_EXPR: | |
1010 case UNORDERED_EXPR: | |
1011 { | |
1012 tree rhs1 = gimple_assign_rhs1 (assign); | |
1013 tree rhs2 = gimple_assign_rhs2 (assign); | |
1014 | |
1015 return expand_vector_comparison (gsi, type, rhs1, rhs2, code); | |
1016 } | |
1017 | |
1018 case TRUNC_DIV_EXPR: | |
1019 case TRUNC_MOD_EXPR: | |
1020 { | |
1021 tree rhs1 = gimple_assign_rhs1 (assign); | |
1022 tree rhs2 = gimple_assign_rhs2 (assign); | |
1023 tree ret; | |
1024 | |
1025 if (!optimize | |
1026 || !VECTOR_INTEGER_TYPE_P (type) | |
1027 || TREE_CODE (rhs2) != VECTOR_CST | |
1028 || !VECTOR_MODE_P (TYPE_MODE (type))) | |
1029 break; | |
1030 | |
1031 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code); | |
1032 if (ret != NULL_TREE) | |
1033 return ret; | |
1034 break; | |
1035 } | |
379 | 1036 |
380 default: | 1037 default: |
381 break; | 1038 break; |
382 } | 1039 } |
383 | 1040 |
388 else | 1045 else |
389 return expand_vector_piecewise (gsi, do_binop, type, compute_type, | 1046 return expand_vector_piecewise (gsi, do_binop, type, compute_type, |
390 gimple_assign_rhs1 (assign), | 1047 gimple_assign_rhs1 (assign), |
391 gimple_assign_rhs2 (assign), code); | 1048 gimple_assign_rhs2 (assign), code); |
392 } | 1049 } |
1050 | |
1051 /* Try to optimize | |
1052 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 }; | |
1053 style stmts into: | |
1054 _9 = { b_7, b_7, b_7, b_7 }; | |
1055 a_5 = _9 + { 0, 3, 6, 9 }; | |
1056 because vector splat operation is usually more efficient | |
1057 than piecewise initialization of the vector. */ | |
1058 | |
1059 static void | |
1060 optimize_vector_constructor (gimple_stmt_iterator *gsi) | |
1061 { | |
1062 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); | |
1063 tree lhs = gimple_assign_lhs (stmt); | |
1064 tree rhs = gimple_assign_rhs1 (stmt); | |
1065 tree type = TREE_TYPE (rhs); | |
1066 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type); | |
1067 bool all_same = true; | |
1068 constructor_elt *elt; | |
1069 gimple *g; | |
1070 tree base = NULL_TREE; | |
1071 optab op; | |
1072 | |
1073 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts) | |
1074 return; | |
1075 op = optab_for_tree_code (PLUS_EXPR, type, optab_default); | |
1076 if (op == unknown_optab | |
1077 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing) | |
1078 return; | |
1079 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt) | |
1080 if (TREE_CODE (elt->value) != SSA_NAME | |
1081 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE) | |
1082 return; | |
1083 else | |
1084 { | |
1085 tree this_base = elt->value; | |
1086 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value) | |
1087 all_same = false; | |
1088 for (j = 0; j < nelts + 1; j++) | |
1089 { | |
1090 g = SSA_NAME_DEF_STMT (this_base); | |
1091 if (is_gimple_assign (g) | |
1092 && gimple_assign_rhs_code (g) == PLUS_EXPR | |
1093 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST | |
1094 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME | |
1095 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g))) | |
1096 this_base = gimple_assign_rhs1 (g); | |
1097 else | |
1098 break; | |
1099 } | |
1100 if (i == 0) | |
1101 base = this_base; | |
1102 else if (this_base != base) | |
1103 return; | |
1104 } | |
1105 if (all_same) | |
1106 return; | |
1107 auto_vec<tree, 32> cst (nelts); | |
1108 for (i = 0; i < nelts; i++) | |
1109 { | |
1110 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value; | |
1111 tree elt = build_zero_cst (TREE_TYPE (base)); | |
1112 while (this_base != base) | |
1113 { | |
1114 g = SSA_NAME_DEF_STMT (this_base); | |
1115 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base), | |
1116 elt, gimple_assign_rhs2 (g)); | |
1117 if (elt == NULL_TREE | |
1118 || TREE_CODE (elt) != INTEGER_CST | |
1119 || TREE_OVERFLOW (elt)) | |
1120 return; | |
1121 this_base = gimple_assign_rhs1 (g); | |
1122 } | |
1123 cst.quick_push (elt); | |
1124 } | |
1125 for (i = 0; i < nelts; i++) | |
1126 CONSTRUCTOR_ELT (rhs, i)->value = base; | |
1127 g = gimple_build_assign (make_ssa_name (type), rhs); | |
1128 gsi_insert_before (gsi, g, GSI_SAME_STMT); | |
1129 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g), | |
1130 build_vector (type, cst)); | |
1131 gsi_replace (gsi, g, false); | |
1132 } | |
393 | 1133 |
394 /* Return a type for the widest vector mode whose components are of mode | 1134 /* Return a type for the widest vector mode whose components are of type |
395 INNER_MODE, or NULL_TREE if none is found. | 1135 TYPE, or NULL_TREE if none is found. */ |
396 SATP is true for saturating fixed-point types. */ | |
397 | 1136 |
398 static tree | 1137 static tree |
399 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp) | 1138 type_for_widest_vector_mode (tree type, optab op) |
400 { | 1139 { |
401 enum machine_mode best_mode = VOIDmode, mode; | 1140 machine_mode inner_mode = TYPE_MODE (type); |
1141 machine_mode best_mode = VOIDmode, mode; | |
402 int best_nunits = 0; | 1142 int best_nunits = 0; |
403 | 1143 |
404 if (SCALAR_FLOAT_MODE_P (inner_mode)) | 1144 if (SCALAR_FLOAT_MODE_P (inner_mode)) |
405 mode = MIN_MODE_VECTOR_FLOAT; | 1145 mode = MIN_MODE_VECTOR_FLOAT; |
406 else if (SCALAR_FRACT_MODE_P (inner_mode)) | 1146 else if (SCALAR_FRACT_MODE_P (inner_mode)) |
412 else if (SCALAR_UACCUM_MODE_P (inner_mode)) | 1152 else if (SCALAR_UACCUM_MODE_P (inner_mode)) |
413 mode = MIN_MODE_VECTOR_UACCUM; | 1153 mode = MIN_MODE_VECTOR_UACCUM; |
414 else | 1154 else |
415 mode = MIN_MODE_VECTOR_INT; | 1155 mode = MIN_MODE_VECTOR_INT; |
416 | 1156 |
417 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) | 1157 FOR_EACH_MODE_FROM (mode, mode) |
418 if (GET_MODE_INNER (mode) == inner_mode | 1158 if (GET_MODE_INNER (mode) == inner_mode |
419 && GET_MODE_NUNITS (mode) > best_nunits | 1159 && GET_MODE_NUNITS (mode) > best_nunits |
420 && optab_handler (op, mode) != CODE_FOR_nothing) | 1160 && optab_handler (op, mode) != CODE_FOR_nothing) |
421 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); | 1161 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); |
422 | 1162 |
423 if (best_mode == VOIDmode) | 1163 if (best_mode == VOIDmode) |
424 return NULL_TREE; | 1164 return NULL_TREE; |
425 else | 1165 else |
426 { | 1166 return build_vector_type_for_mode (type, best_mode); |
427 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */ | 1167 } |
428 if (ALL_FIXED_POINT_MODE_P (best_mode)) | 1168 |
429 return lang_hooks.types.type_for_mode (best_mode, satp); | 1169 |
430 | 1170 /* Build a reference to the element of the vector VECT. Function |
431 return lang_hooks.types.type_for_mode (best_mode, 1); | 1171 returns either the element itself, either BIT_FIELD_REF, or an |
432 } | 1172 ARRAY_REF expression. |
1173 | |
1174 GSI is required to insert temporary variables while building a | |
1175 refernece to the element of the vector VECT. | |
1176 | |
1177 PTMPVEC is a pointer to the temporary variable for caching | |
1178 purposes. In case when PTMPVEC is NULL new temporary variable | |
1179 will be created. */ | |
1180 static tree | |
1181 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec) | |
1182 { | |
1183 tree vect_type, vect_elt_type; | |
1184 gimple *asgn; | |
1185 tree tmpvec; | |
1186 tree arraytype; | |
1187 bool need_asgn = true; | |
1188 unsigned int elements; | |
1189 | |
1190 vect_type = TREE_TYPE (vect); | |
1191 vect_elt_type = TREE_TYPE (vect_type); | |
1192 elements = TYPE_VECTOR_SUBPARTS (vect_type); | |
1193 | |
1194 if (TREE_CODE (idx) == INTEGER_CST) | |
1195 { | |
1196 unsigned HOST_WIDE_INT index; | |
1197 | |
1198 /* Given that we're about to compute a binary modulus, | |
1199 we don't care about the high bits of the value. */ | |
1200 index = TREE_INT_CST_LOW (idx); | |
1201 if (!tree_fits_uhwi_p (idx) || index >= elements) | |
1202 { | |
1203 index &= elements - 1; | |
1204 idx = build_int_cst (TREE_TYPE (idx), index); | |
1205 } | |
1206 | |
1207 /* When lowering a vector statement sequence do some easy | |
1208 simplification by looking through intermediate vector results. */ | |
1209 if (TREE_CODE (vect) == SSA_NAME) | |
1210 { | |
1211 gimple *def_stmt = SSA_NAME_DEF_STMT (vect); | |
1212 if (is_gimple_assign (def_stmt) | |
1213 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST | |
1214 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)) | |
1215 vect = gimple_assign_rhs1 (def_stmt); | |
1216 } | |
1217 | |
1218 if (TREE_CODE (vect) == VECTOR_CST) | |
1219 return VECTOR_CST_ELT (vect, index); | |
1220 else if (TREE_CODE (vect) == CONSTRUCTOR | |
1221 && (CONSTRUCTOR_NELTS (vect) == 0 | |
1222 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value)) | |
1223 != VECTOR_TYPE)) | |
1224 { | |
1225 if (index < CONSTRUCTOR_NELTS (vect)) | |
1226 return CONSTRUCTOR_ELT (vect, index)->value; | |
1227 return build_zero_cst (vect_elt_type); | |
1228 } | |
1229 else | |
1230 { | |
1231 tree size = TYPE_SIZE (vect_elt_type); | |
1232 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index), | |
1233 size); | |
1234 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos); | |
1235 } | |
1236 } | |
1237 | |
1238 if (!ptmpvec) | |
1239 tmpvec = create_tmp_var (vect_type, "vectmp"); | |
1240 else if (!*ptmpvec) | |
1241 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp"); | |
1242 else | |
1243 { | |
1244 tmpvec = *ptmpvec; | |
1245 need_asgn = false; | |
1246 } | |
1247 | |
1248 if (need_asgn) | |
1249 { | |
1250 TREE_ADDRESSABLE (tmpvec) = 1; | |
1251 asgn = gimple_build_assign (tmpvec, vect); | |
1252 gsi_insert_before (gsi, asgn, GSI_SAME_STMT); | |
1253 } | |
1254 | |
1255 arraytype = build_array_type_nelts (vect_elt_type, elements); | |
1256 return build4 (ARRAY_REF, vect_elt_type, | |
1257 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec), | |
1258 idx, NULL_TREE, NULL_TREE); | |
1259 } | |
1260 | |
1261 /* Check if VEC_PERM_EXPR within the given setting is supported | |
1262 by hardware, or lower it piecewise. | |
1263 | |
1264 When VEC_PERM_EXPR has the same first and second operands: | |
1265 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be | |
1266 {v0[mask[0]], v0[mask[1]], ...} | |
1267 MASK and V0 must have the same number of elements. | |
1268 | |
1269 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to | |
1270 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...} | |
1271 V0 and V1 must have the same type. MASK, V0, V1 must have the | |
1272 same number of arguments. */ | |
1273 | |
1274 static void | |
1275 lower_vec_perm (gimple_stmt_iterator *gsi) | |
1276 { | |
1277 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); | |
1278 tree mask = gimple_assign_rhs3 (stmt); | |
1279 tree vec0 = gimple_assign_rhs1 (stmt); | |
1280 tree vec1 = gimple_assign_rhs2 (stmt); | |
1281 tree vect_type = TREE_TYPE (vec0); | |
1282 tree mask_type = TREE_TYPE (mask); | |
1283 tree vect_elt_type = TREE_TYPE (vect_type); | |
1284 tree mask_elt_type = TREE_TYPE (mask_type); | |
1285 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type); | |
1286 vec<constructor_elt, va_gc> *v; | |
1287 tree constr, t, si, i_val; | |
1288 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE; | |
1289 bool two_operand_p = !operand_equal_p (vec0, vec1, 0); | |
1290 location_t loc = gimple_location (gsi_stmt (*gsi)); | |
1291 unsigned i; | |
1292 | |
1293 if (TREE_CODE (mask) == SSA_NAME) | |
1294 { | |
1295 gimple *def_stmt = SSA_NAME_DEF_STMT (mask); | |
1296 if (is_gimple_assign (def_stmt) | |
1297 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST) | |
1298 mask = gimple_assign_rhs1 (def_stmt); | |
1299 } | |
1300 | |
1301 if (TREE_CODE (mask) == VECTOR_CST) | |
1302 { | |
1303 auto_vec_perm_indices sel_int (elements); | |
1304 | |
1305 for (i = 0; i < elements; ++i) | |
1306 sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i)) | |
1307 & (2 * elements - 1)); | |
1308 | |
1309 if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int)) | |
1310 { | |
1311 gimple_assign_set_rhs3 (stmt, mask); | |
1312 update_stmt (stmt); | |
1313 return; | |
1314 } | |
1315 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero | |
1316 vector as VEC1 and a right element shift MASK. */ | |
1317 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type)) | |
1318 != CODE_FOR_nothing | |
1319 && TREE_CODE (vec1) == VECTOR_CST | |
1320 && initializer_zerop (vec1) | |
1321 && sel_int[0] | |
1322 && sel_int[0] < elements) | |
1323 { | |
1324 for (i = 1; i < elements; ++i) | |
1325 { | |
1326 unsigned int expected = i + sel_int[0]; | |
1327 /* Indices into the second vector are all equivalent. */ | |
1328 if (MIN (elements, (unsigned) sel_int[i]) | |
1329 != MIN (elements, expected)) | |
1330 break; | |
1331 } | |
1332 if (i == elements) | |
1333 { | |
1334 gimple_assign_set_rhs3 (stmt, mask); | |
1335 update_stmt (stmt); | |
1336 return; | |
1337 } | |
1338 } | |
1339 } | |
1340 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL)) | |
1341 return; | |
1342 | |
1343 warning_at (loc, OPT_Wvector_operation_performance, | |
1344 "vector shuffling operation will be expanded piecewise"); | |
1345 | |
1346 vec_alloc (v, elements); | |
1347 for (i = 0; i < elements; i++) | |
1348 { | |
1349 si = size_int (i); | |
1350 i_val = vector_element (gsi, mask, si, &masktmp); | |
1351 | |
1352 if (TREE_CODE (i_val) == INTEGER_CST) | |
1353 { | |
1354 unsigned HOST_WIDE_INT index; | |
1355 | |
1356 index = TREE_INT_CST_LOW (i_val); | |
1357 if (!tree_fits_uhwi_p (i_val) || index >= elements) | |
1358 i_val = build_int_cst (mask_elt_type, index & (elements - 1)); | |
1359 | |
1360 if (two_operand_p && (index & elements) != 0) | |
1361 t = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1362 else | |
1363 t = vector_element (gsi, vec0, i_val, &vec0tmp); | |
1364 | |
1365 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE, | |
1366 true, GSI_SAME_STMT); | |
1367 } | |
1368 else | |
1369 { | |
1370 tree cond = NULL_TREE, v0_val; | |
1371 | |
1372 if (two_operand_p) | |
1373 { | |
1374 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1375 build_int_cst (mask_elt_type, elements)); | |
1376 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1377 true, GSI_SAME_STMT); | |
1378 } | |
1379 | |
1380 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val, | |
1381 build_int_cst (mask_elt_type, elements - 1)); | |
1382 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE, | |
1383 true, GSI_SAME_STMT); | |
1384 | |
1385 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp); | |
1386 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE, | |
1387 true, GSI_SAME_STMT); | |
1388 | |
1389 if (two_operand_p) | |
1390 { | |
1391 tree v1_val; | |
1392 | |
1393 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp); | |
1394 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE, | |
1395 true, GSI_SAME_STMT); | |
1396 | |
1397 cond = fold_build2 (EQ_EXPR, boolean_type_node, | |
1398 cond, build_zero_cst (mask_elt_type)); | |
1399 cond = fold_build3 (COND_EXPR, vect_elt_type, | |
1400 cond, v0_val, v1_val); | |
1401 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE, | |
1402 true, GSI_SAME_STMT); | |
1403 } | |
1404 else | |
1405 t = v0_val; | |
1406 } | |
1407 | |
1408 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t); | |
1409 } | |
1410 | |
1411 constr = build_constructor (vect_type, v); | |
1412 gimple_assign_set_rhs_from_tree (gsi, constr); | |
1413 update_stmt (gsi_stmt (*gsi)); | |
1414 } | |
1415 | |
1416 /* If OP is a uniform vector return the element it is a splat from. */ | |
1417 | |
1418 static tree | |
1419 ssa_uniform_vector_p (tree op) | |
1420 { | |
1421 if (TREE_CODE (op) == VECTOR_CST | |
1422 || TREE_CODE (op) == CONSTRUCTOR) | |
1423 return uniform_vector_p (op); | |
1424 if (TREE_CODE (op) == SSA_NAME) | |
1425 { | |
1426 gimple *def_stmt = SSA_NAME_DEF_STMT (op); | |
1427 if (gimple_assign_single_p (def_stmt)) | |
1428 return uniform_vector_p (gimple_assign_rhs1 (def_stmt)); | |
1429 } | |
1430 return NULL_TREE; | |
1431 } | |
1432 | |
1433 /* Return type in which CODE operation with optab OP can be | |
1434 computed. */ | |
1435 | |
1436 static tree | |
1437 get_compute_type (enum tree_code code, optab op, tree type) | |
1438 { | |
1439 /* For very wide vectors, try using a smaller vector mode. */ | |
1440 tree compute_type = type; | |
1441 if (op | |
1442 && (!VECTOR_MODE_P (TYPE_MODE (type)) | |
1443 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)) | |
1444 { | |
1445 tree vector_compute_type | |
1446 = type_for_widest_vector_mode (TREE_TYPE (type), op); | |
1447 if (vector_compute_type != NULL_TREE | |
1448 && (TYPE_VECTOR_SUBPARTS (vector_compute_type) | |
1449 < TYPE_VECTOR_SUBPARTS (compute_type)) | |
1450 && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1 | |
1451 && (optab_handler (op, TYPE_MODE (vector_compute_type)) | |
1452 != CODE_FOR_nothing)) | |
1453 compute_type = vector_compute_type; | |
1454 } | |
1455 | |
1456 /* If we are breaking a BLKmode vector into smaller pieces, | |
1457 type_for_widest_vector_mode has already looked into the optab, | |
1458 so skip these checks. */ | |
1459 if (compute_type == type) | |
1460 { | |
1461 machine_mode compute_mode = TYPE_MODE (compute_type); | |
1462 if (VECTOR_MODE_P (compute_mode)) | |
1463 { | |
1464 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing) | |
1465 return compute_type; | |
1466 if (code == MULT_HIGHPART_EXPR | |
1467 && can_mult_highpart_p (compute_mode, | |
1468 TYPE_UNSIGNED (compute_type))) | |
1469 return compute_type; | |
1470 } | |
1471 /* There is no operation in hardware, so fall back to scalars. */ | |
1472 compute_type = TREE_TYPE (type); | |
1473 } | |
1474 | |
1475 return compute_type; | |
1476 } | |
1477 | |
1478 /* Helper function of expand_vector_operations_1. Return number of | |
1479 vector elements for vector types or 1 for other types. */ | |
1480 | |
1481 static inline int | |
1482 count_type_subparts (tree type) | |
1483 { | |
1484 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1; | |
1485 } | |
1486 | |
1487 static tree | |
1488 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, | |
1489 tree bitpos, tree bitsize, enum tree_code code, | |
1490 tree type ATTRIBUTE_UNUSED) | |
1491 { | |
1492 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE) | |
1493 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); | |
1494 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE) | |
1495 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); | |
1496 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi)); | |
1497 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b); | |
1498 } | |
1499 | |
1500 /* Expand a vector COND_EXPR to scalars, piecewise. */ | |
1501 static void | |
1502 expand_vector_scalar_condition (gimple_stmt_iterator *gsi) | |
1503 { | |
1504 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi)); | |
1505 tree type = gimple_expr_type (stmt); | |
1506 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type); | |
1507 machine_mode compute_mode = TYPE_MODE (compute_type); | |
1508 gcc_assert (compute_mode != BLKmode); | |
1509 tree lhs = gimple_assign_lhs (stmt); | |
1510 tree rhs2 = gimple_assign_rhs2 (stmt); | |
1511 tree rhs3 = gimple_assign_rhs3 (stmt); | |
1512 tree new_rhs; | |
1513 | |
1514 /* If the compute mode is not a vector mode (hence we are not decomposing | |
1515 a BLKmode vector to smaller, hardware-supported vectors), we may want | |
1516 to expand the operations in parallel. */ | |
1517 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT | |
1518 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT | |
1519 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT | |
1520 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT | |
1521 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM | |
1522 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) | |
1523 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3, | |
1524 COND_EXPR); | |
1525 else | |
1526 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type, | |
1527 rhs2, rhs3, COND_EXPR); | |
1528 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) | |
1529 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | |
1530 new_rhs); | |
1531 | |
1532 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | |
1533 way to do it is change expand_vector_operation and its callees to | |
1534 return a tree_code, RHS1 and RHS2 instead of a tree. */ | |
1535 gimple_assign_set_rhs_from_tree (gsi, new_rhs); | |
1536 update_stmt (gsi_stmt (*gsi)); | |
433 } | 1537 } |
434 | 1538 |
435 /* Process one statement. If we identify a vector operation, expand it. */ | 1539 /* Process one statement. If we identify a vector operation, expand it. */ |
436 | 1540 |
437 static void | 1541 static void |
438 expand_vector_operations_1 (gimple_stmt_iterator *gsi) | 1542 expand_vector_operations_1 (gimple_stmt_iterator *gsi) |
439 { | 1543 { |
440 gimple stmt = gsi_stmt (*gsi); | 1544 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE; |
441 tree lhs, rhs1, rhs2 = NULL, type, compute_type; | |
442 enum tree_code code; | 1545 enum tree_code code; |
443 enum machine_mode compute_mode; | 1546 optab op = unknown_optab; |
444 optab op = NULL; | |
445 enum gimple_rhs_class rhs_class; | 1547 enum gimple_rhs_class rhs_class; |
446 tree new_rhs; | 1548 tree new_rhs; |
447 | 1549 |
448 if (gimple_code (stmt) != GIMPLE_ASSIGN) | 1550 /* Only consider code == GIMPLE_ASSIGN. */ |
1551 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi)); | |
1552 if (!stmt) | |
449 return; | 1553 return; |
450 | 1554 |
451 code = gimple_assign_rhs_code (stmt); | 1555 code = gimple_assign_rhs_code (stmt); |
452 rhs_class = get_gimple_rhs_class (code); | 1556 rhs_class = get_gimple_rhs_class (code); |
1557 lhs = gimple_assign_lhs (stmt); | |
1558 | |
1559 if (code == VEC_PERM_EXPR) | |
1560 { | |
1561 lower_vec_perm (gsi); | |
1562 return; | |
1563 } | |
1564 | |
1565 if (code == VEC_COND_EXPR) | |
1566 { | |
1567 expand_vector_condition (gsi); | |
1568 return; | |
1569 } | |
1570 | |
1571 if (code == COND_EXPR | |
1572 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE | |
1573 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode) | |
1574 { | |
1575 expand_vector_scalar_condition (gsi); | |
1576 return; | |
1577 } | |
1578 | |
1579 if (code == CONSTRUCTOR | |
1580 && TREE_CODE (lhs) == SSA_NAME | |
1581 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs))) | |
1582 && !gimple_clobber_p (stmt) | |
1583 && optimize) | |
1584 { | |
1585 optimize_vector_constructor (gsi); | |
1586 return; | |
1587 } | |
453 | 1588 |
454 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) | 1589 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) |
455 return; | 1590 return; |
456 | 1591 |
457 lhs = gimple_assign_lhs (stmt); | |
458 rhs1 = gimple_assign_rhs1 (stmt); | 1592 rhs1 = gimple_assign_rhs1 (stmt); |
459 type = gimple_expr_type (stmt); | 1593 type = gimple_expr_type (stmt); |
460 if (rhs_class == GIMPLE_BINARY_RHS) | 1594 if (rhs_class == GIMPLE_BINARY_RHS) |
461 rhs2 = gimple_assign_rhs2 (stmt); | 1595 rhs2 = gimple_assign_rhs2 (stmt); |
462 | 1596 |
463 if (TREE_CODE (type) != VECTOR_TYPE) | 1597 if (TREE_CODE (type) != VECTOR_TYPE) |
464 return; | 1598 return; |
465 | 1599 |
466 if (code == NOP_EXPR | 1600 /* If the vector operation is operating on all same vector elements |
1601 implement it with a scalar operation and a splat if the target | |
1602 supports the scalar operation. */ | |
1603 tree srhs1, srhs2 = NULL_TREE; | |
1604 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE | |
1605 && (rhs2 == NULL_TREE | |
1606 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2)) | |
1607 && (srhs2 = rhs2)) | |
1608 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE) | |
1609 /* As we query direct optabs restrict to non-convert operations. */ | |
1610 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1))) | |
1611 { | |
1612 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar); | |
1613 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB | |
1614 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing) | |
1615 { | |
1616 tree slhs = make_ssa_name (TREE_TYPE (srhs1)); | |
1617 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2); | |
1618 gsi_insert_before (gsi, repl, GSI_SAME_STMT); | |
1619 gimple_assign_set_rhs_from_tree (gsi, | |
1620 build_vector_from_val (type, slhs)); | |
1621 update_stmt (stmt); | |
1622 return; | |
1623 } | |
1624 } | |
1625 | |
1626 /* A scalar operation pretending to be a vector one. */ | |
1627 if (VECTOR_BOOLEAN_TYPE_P (type) | |
1628 && !VECTOR_MODE_P (TYPE_MODE (type)) | |
1629 && TYPE_MODE (type) != BLKmode) | |
1630 return; | |
1631 | |
1632 if (CONVERT_EXPR_CODE_P (code) | |
467 || code == FLOAT_EXPR | 1633 || code == FLOAT_EXPR |
468 || code == FIX_TRUNC_EXPR | 1634 || code == FIX_TRUNC_EXPR |
469 || code == VIEW_CONVERT_EXPR) | 1635 || code == VIEW_CONVERT_EXPR) |
470 return; | 1636 return; |
471 | 1637 |
472 gcc_assert (code != CONVERT_EXPR); | |
473 | |
474 /* The signedness is determined from input argument. */ | 1638 /* The signedness is determined from input argument. */ |
475 if (code == VEC_UNPACK_FLOAT_HI_EXPR | 1639 if (code == VEC_UNPACK_FLOAT_HI_EXPR |
476 || code == VEC_UNPACK_FLOAT_LO_EXPR) | 1640 || code == VEC_UNPACK_FLOAT_LO_EXPR) |
477 type = TREE_TYPE (rhs1); | 1641 { |
1642 type = TREE_TYPE (rhs1); | |
1643 /* We do not know how to scalarize those. */ | |
1644 return; | |
1645 } | |
1646 | |
1647 /* For widening/narrowing vector operations, the relevant type is of the | |
1648 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is | |
1649 calculated in the same way above. */ | |
1650 if (code == WIDEN_SUM_EXPR | |
1651 || code == VEC_WIDEN_MULT_HI_EXPR | |
1652 || code == VEC_WIDEN_MULT_LO_EXPR | |
1653 || code == VEC_WIDEN_MULT_EVEN_EXPR | |
1654 || code == VEC_WIDEN_MULT_ODD_EXPR | |
1655 || code == VEC_UNPACK_HI_EXPR | |
1656 || code == VEC_UNPACK_LO_EXPR | |
1657 || code == VEC_PACK_TRUNC_EXPR | |
1658 || code == VEC_PACK_SAT_EXPR | |
1659 || code == VEC_PACK_FIX_TRUNC_EXPR | |
1660 || code == VEC_WIDEN_LSHIFT_HI_EXPR | |
1661 || code == VEC_WIDEN_LSHIFT_LO_EXPR) | |
1662 { | |
1663 type = TREE_TYPE (rhs1); | |
1664 /* We do not know how to scalarize those. */ | |
1665 return; | |
1666 } | |
478 | 1667 |
479 /* Choose between vector shift/rotate by vector and vector shift/rotate by | 1668 /* Choose between vector shift/rotate by vector and vector shift/rotate by |
480 scalar */ | 1669 scalar */ |
481 if (code == LSHIFT_EXPR | 1670 if (code == LSHIFT_EXPR |
482 || code == RSHIFT_EXPR | 1671 || code == RSHIFT_EXPR |
483 || code == LROTATE_EXPR | 1672 || code == LROTATE_EXPR |
484 || code == RROTATE_EXPR) | 1673 || code == RROTATE_EXPR) |
485 { | 1674 { |
486 bool vector_scalar_shift; | 1675 optab opv; |
487 op = optab_for_tree_code (code, type, optab_scalar); | 1676 |
488 | 1677 /* Check whether we have vector <op> {x,x,x,x} where x |
489 /* Vector/Scalar shift is supported. */ | 1678 could be a scalar variable or a constant. Transform |
490 vector_scalar_shift = (op && (optab_handler (op, TYPE_MODE (type)) | 1679 vector <op> {x,x,x,x} ==> vector <op> scalar. */ |
491 != CODE_FOR_nothing)); | 1680 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) |
492 | |
493 /* If the 2nd argument is vector, we need a vector/vector shift. | |
494 Except all the elements in the second vector are the same. */ | |
495 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2)))) | |
496 { | 1681 { |
497 tree first; | 1682 tree first; |
498 gimple def_stmt; | 1683 |
499 | 1684 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE) |
500 /* Check whether we have vector <op> {x,x,x,x} where x | |
501 could be a scalar variable or a constant. Transform | |
502 vector <op> {x,x,x,x} ==> vector <op> scalar. */ | |
503 if (vector_scalar_shift | |
504 && ((TREE_CODE (rhs2) == VECTOR_CST | |
505 && (first = uniform_vector_p (rhs2)) != NULL_TREE) | |
506 || (TREE_CODE (rhs2) == SSA_NAME | |
507 && (def_stmt = SSA_NAME_DEF_STMT (rhs2)) | |
508 && gimple_assign_single_p (def_stmt) | |
509 && (first = uniform_vector_p | |
510 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE))) | |
511 { | 1685 { |
512 gimple_assign_set_rhs2 (stmt, first); | 1686 gimple_assign_set_rhs2 (stmt, first); |
513 update_stmt (stmt); | 1687 update_stmt (stmt); |
514 rhs2 = first; | 1688 rhs2 = first; |
515 } | 1689 } |
516 else | |
517 op = optab_for_tree_code (code, type, optab_vector); | |
518 } | 1690 } |
519 | 1691 |
520 /* Try for a vector/scalar shift, and if we don't have one, see if we | 1692 opv = optab_for_tree_code (code, type, optab_vector); |
521 have a vector/vector shift */ | 1693 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) |
522 else if (!vector_scalar_shift) | 1694 op = opv; |
523 { | 1695 else |
524 op = optab_for_tree_code (code, type, optab_vector); | 1696 { |
525 | 1697 op = optab_for_tree_code (code, type, optab_scalar); |
526 if (op && (optab_handler (op, TYPE_MODE (type)) | 1698 |
527 != CODE_FOR_nothing)) | 1699 compute_type = get_compute_type (code, op, type); |
1700 if (compute_type == type) | |
1701 return; | |
1702 /* The rtl expander will expand vector/scalar as vector/vector | |
1703 if necessary. Pick one with wider vector type. */ | |
1704 tree compute_vtype = get_compute_type (code, opv, type); | |
1705 if (count_type_subparts (compute_vtype) | |
1706 > count_type_subparts (compute_type)) | |
528 { | 1707 { |
529 /* Transform vector <op> scalar => vector <op> {x,x,x,x}. */ | 1708 compute_type = compute_vtype; |
530 int n_parts = TYPE_VECTOR_SUBPARTS (type); | 1709 op = opv; |
531 int part_size = tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); | 1710 } |
532 tree part_type = lang_hooks.types.type_for_size (part_size, 1); | 1711 } |
533 tree vect_type = build_vector_type (part_type, n_parts); | 1712 |
534 | 1713 if (code == LROTATE_EXPR || code == RROTATE_EXPR) |
535 rhs2 = fold_convert (part_type, rhs2); | 1714 { |
536 rhs2 = build_vector_from_val (vect_type, rhs2); | 1715 if (compute_type == NULL_TREE) |
537 gimple_assign_set_rhs2 (stmt, rhs2); | 1716 compute_type = get_compute_type (code, op, type); |
538 update_stmt (stmt); | 1717 if (compute_type == type) |
1718 return; | |
1719 /* Before splitting vector rotates into scalar rotates, | |
1720 see if we can't use vector shifts and BIT_IOR_EXPR | |
1721 instead. For vector by vector rotates we'd also | |
1722 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there | |
1723 for now, fold doesn't seem to create such rotates anyway. */ | |
1724 if (compute_type == TREE_TYPE (type) | |
1725 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2))) | |
1726 { | |
1727 optab oplv = vashl_optab, opl = ashl_optab; | |
1728 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab; | |
1729 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type); | |
1730 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type); | |
1731 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type); | |
1732 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type); | |
1733 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type); | |
1734 /* The rtl expander will expand vector/scalar as vector/vector | |
1735 if necessary. Pick one with wider vector type. */ | |
1736 if (count_type_subparts (compute_lvtype) | |
1737 > count_type_subparts (compute_ltype)) | |
1738 { | |
1739 compute_ltype = compute_lvtype; | |
1740 opl = oplv; | |
1741 } | |
1742 if (count_type_subparts (compute_rvtype) | |
1743 > count_type_subparts (compute_rtype)) | |
1744 { | |
1745 compute_rtype = compute_rvtype; | |
1746 opr = oprv; | |
1747 } | |
1748 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and | |
1749 BIT_IOR_EXPR. */ | |
1750 compute_type = compute_ltype; | |
1751 if (count_type_subparts (compute_type) | |
1752 > count_type_subparts (compute_rtype)) | |
1753 compute_type = compute_rtype; | |
1754 if (count_type_subparts (compute_type) | |
1755 > count_type_subparts (compute_otype)) | |
1756 compute_type = compute_otype; | |
1757 /* Verify all 3 operations can be performed in that type. */ | |
1758 if (compute_type != TREE_TYPE (type)) | |
1759 { | |
1760 if (optab_handler (opl, TYPE_MODE (compute_type)) | |
1761 == CODE_FOR_nothing | |
1762 || optab_handler (opr, TYPE_MODE (compute_type)) | |
1763 == CODE_FOR_nothing | |
1764 || optab_handler (opo, TYPE_MODE (compute_type)) | |
1765 == CODE_FOR_nothing) | |
1766 compute_type = TREE_TYPE (type); | |
1767 } | |
539 } | 1768 } |
540 } | 1769 } |
541 } | 1770 } |
542 else | 1771 else |
543 op = optab_for_tree_code (code, type, optab_default); | 1772 op = optab_for_tree_code (code, type, optab_default); |
544 | |
545 /* For widening/narrowing vector operations, the relevant type is of the | |
546 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is | |
547 calculated in the same way above. */ | |
548 if (code == WIDEN_SUM_EXPR | |
549 || code == VEC_WIDEN_MULT_HI_EXPR | |
550 || code == VEC_WIDEN_MULT_LO_EXPR | |
551 || code == VEC_UNPACK_HI_EXPR | |
552 || code == VEC_UNPACK_LO_EXPR | |
553 || code == VEC_PACK_TRUNC_EXPR | |
554 || code == VEC_PACK_SAT_EXPR | |
555 || code == VEC_PACK_FIX_TRUNC_EXPR) | |
556 type = TREE_TYPE (rhs1); | |
557 | 1773 |
558 /* Optabs will try converting a negation into a subtraction, so | 1774 /* Optabs will try converting a negation into a subtraction, so |
559 look for it as well. TODO: negation of floating-point vectors | 1775 look for it as well. TODO: negation of floating-point vectors |
560 might be turned into an exclusive OR toggling the sign bit. */ | 1776 might be turned into an exclusive OR toggling the sign bit. */ |
561 if (op == NULL | 1777 if (op == unknown_optab |
562 && code == NEGATE_EXPR | 1778 && code == NEGATE_EXPR |
563 && INTEGRAL_TYPE_P (TREE_TYPE (type))) | 1779 && INTEGRAL_TYPE_P (TREE_TYPE (type))) |
564 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); | 1780 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); |
565 | 1781 |
566 /* For very wide vectors, try using a smaller vector mode. */ | 1782 if (compute_type == NULL_TREE) |
567 compute_type = type; | 1783 compute_type = get_compute_type (code, op, type); |
568 if (TYPE_MODE (type) == BLKmode && op) | |
569 { | |
570 tree vector_compute_type | |
571 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op, | |
572 TYPE_SATURATING (TREE_TYPE (type))); | |
573 if (vector_compute_type != NULL_TREE | |
574 && (TYPE_VECTOR_SUBPARTS (vector_compute_type) | |
575 < TYPE_VECTOR_SUBPARTS (compute_type))) | |
576 compute_type = vector_compute_type; | |
577 } | |
578 | |
579 /* If we are breaking a BLKmode vector into smaller pieces, | |
580 type_for_widest_vector_mode has already looked into the optab, | |
581 so skip these checks. */ | |
582 if (compute_type == type) | 1784 if (compute_type == type) |
583 { | 1785 return; |
584 compute_mode = TYPE_MODE (compute_type); | 1786 |
585 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT | |
586 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT | |
587 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FRACT | |
588 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UFRACT | |
589 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_ACCUM | |
590 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UACCUM) | |
591 && op != NULL | |
592 && optab_handler (op, compute_mode) != CODE_FOR_nothing) | |
593 return; | |
594 else | |
595 /* There is no operation in hardware, so fall back to scalars. */ | |
596 compute_type = TREE_TYPE (type); | |
597 } | |
598 | |
599 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR); | |
600 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); | 1787 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); |
1788 | |
1789 /* Leave expression untouched for later expansion. */ | |
1790 if (new_rhs == NULL_TREE) | |
1791 return; | |
1792 | |
601 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) | 1793 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) |
602 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), | 1794 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), |
603 new_rhs); | 1795 new_rhs); |
604 | 1796 |
605 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One | 1797 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One |
610 } | 1802 } |
611 | 1803 |
612 /* Use this to lower vector operations introduced by the vectorizer, | 1804 /* Use this to lower vector operations introduced by the vectorizer, |
613 if it may need the bit-twiddling tricks implemented in this file. */ | 1805 if it may need the bit-twiddling tricks implemented in this file. */ |
614 | 1806 |
615 static bool | |
616 gate_expand_vector_operations (void) | |
617 { | |
618 return flag_tree_vectorize != 0; | |
619 } | |
620 | |
621 static unsigned int | 1807 static unsigned int |
622 expand_vector_operations (void) | 1808 expand_vector_operations (void) |
623 { | 1809 { |
624 gimple_stmt_iterator gsi; | 1810 gimple_stmt_iterator gsi; |
625 basic_block bb; | 1811 basic_block bb; |
626 bool cfg_changed = false; | 1812 bool cfg_changed = false; |
627 | 1813 |
628 FOR_EACH_BB (bb) | 1814 FOR_EACH_BB_FN (bb, cfun) |
629 { | 1815 { |
630 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | 1816 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) |
631 { | 1817 { |
632 expand_vector_operations_1 (&gsi); | 1818 expand_vector_operations_1 (&gsi); |
633 /* ??? If we do not cleanup EH then we will ICE in | 1819 /* ??? If we do not cleanup EH then we will ICE in |
641 } | 1827 } |
642 | 1828 |
643 return cfg_changed ? TODO_cleanup_cfg : 0; | 1829 return cfg_changed ? TODO_cleanup_cfg : 0; |
644 } | 1830 } |
645 | 1831 |
646 struct gimple_opt_pass pass_lower_vector = | 1832 namespace { |
647 { | 1833 |
648 { | 1834 const pass_data pass_data_lower_vector = |
649 GIMPLE_PASS, | 1835 { |
650 "veclower", /* name */ | 1836 GIMPLE_PASS, /* type */ |
651 0, /* gate */ | 1837 "veclower", /* name */ |
652 expand_vector_operations, /* execute */ | 1838 OPTGROUP_VEC, /* optinfo_flags */ |
653 NULL, /* sub */ | 1839 TV_NONE, /* tv_id */ |
654 NULL, /* next */ | 1840 PROP_cfg, /* properties_required */ |
655 0, /* static_pass_number */ | 1841 PROP_gimple_lvec, /* properties_provided */ |
656 TV_NONE, /* tv_id */ | 1842 0, /* properties_destroyed */ |
657 PROP_cfg, /* properties_required */ | 1843 0, /* todo_flags_start */ |
658 0, /* properties_provided */ | 1844 TODO_update_ssa, /* todo_flags_finish */ |
659 0, /* properties_destroyed */ | |
660 0, /* todo_flags_start */ | |
661 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ | |
662 | TODO_verify_ssa | |
663 | TODO_verify_stmts | TODO_verify_flow | |
664 } | |
665 }; | 1845 }; |
666 | 1846 |
667 struct gimple_opt_pass pass_lower_vector_ssa = | 1847 class pass_lower_vector : public gimple_opt_pass |
668 { | 1848 { |
669 { | 1849 public: |
670 GIMPLE_PASS, | 1850 pass_lower_vector (gcc::context *ctxt) |
671 "veclower2", /* name */ | 1851 : gimple_opt_pass (pass_data_lower_vector, ctxt) |
672 gate_expand_vector_operations, /* gate */ | 1852 {} |
673 expand_vector_operations, /* execute */ | 1853 |
674 NULL, /* sub */ | 1854 /* opt_pass methods: */ |
675 NULL, /* next */ | 1855 virtual bool gate (function *fun) |
676 0, /* static_pass_number */ | 1856 { |
677 TV_NONE, /* tv_id */ | 1857 return !(fun->curr_properties & PROP_gimple_lvec); |
678 PROP_cfg, /* properties_required */ | 1858 } |
679 0, /* properties_provided */ | 1859 |
680 0, /* properties_destroyed */ | 1860 virtual unsigned int execute (function *) |
681 0, /* todo_flags_start */ | 1861 { |
682 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ | 1862 return expand_vector_operations (); |
683 | TODO_verify_ssa | 1863 } |
684 | TODO_verify_stmts | TODO_verify_flow | 1864 |
685 } | 1865 }; // class pass_lower_vector |
1866 | |
1867 } // anon namespace | |
1868 | |
1869 gimple_opt_pass * | |
1870 make_pass_lower_vector (gcc::context *ctxt) | |
1871 { | |
1872 return new pass_lower_vector (ctxt); | |
1873 } | |
1874 | |
1875 namespace { | |
1876 | |
1877 const pass_data pass_data_lower_vector_ssa = | |
1878 { | |
1879 GIMPLE_PASS, /* type */ | |
1880 "veclower2", /* name */ | |
1881 OPTGROUP_VEC, /* optinfo_flags */ | |
1882 TV_NONE, /* tv_id */ | |
1883 PROP_cfg, /* properties_required */ | |
1884 PROP_gimple_lvec, /* properties_provided */ | |
1885 0, /* properties_destroyed */ | |
1886 0, /* todo_flags_start */ | |
1887 ( TODO_update_ssa | |
1888 | TODO_cleanup_cfg ), /* todo_flags_finish */ | |
686 }; | 1889 }; |
687 | 1890 |
1891 class pass_lower_vector_ssa : public gimple_opt_pass | |
1892 { | |
1893 public: | |
1894 pass_lower_vector_ssa (gcc::context *ctxt) | |
1895 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt) | |
1896 {} | |
1897 | |
1898 /* opt_pass methods: */ | |
1899 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); } | |
1900 virtual unsigned int execute (function *) | |
1901 { | |
1902 return expand_vector_operations (); | |
1903 } | |
1904 | |
1905 }; // class pass_lower_vector_ssa | |
1906 | |
1907 } // anon namespace | |
1908 | |
1909 gimple_opt_pass * | |
1910 make_pass_lower_vector_ssa (gcc::context *ctxt) | |
1911 { | |
1912 return new pass_lower_vector_ssa (ctxt); | |
1913 } | |
1914 | |
688 #include "gt-tree-vect-generic.h" | 1915 #include "gt-tree-vect-generic.h" |