comparison gcc/tree-vect-generic.c @ 16:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
comparison
equal deleted inserted replaced
15:561a7518be6b 16:04ced10e8804
1 /* Lower vector operations to scalar operations. 1 /* Lower vector operations to scalar operations.
2 Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 2 Copyright (C) 2004-2017 Free Software Foundation, Inc.
3 Free Software Foundation, Inc.
4 3
5 This file is part of GCC. 4 This file is part of GCC.
6 5
7 GCC is free software; you can redistribute it and/or modify it 6 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the 7 under the terms of the GNU General Public License as published by the
19 <http://www.gnu.org/licenses/>. */ 18 <http://www.gnu.org/licenses/>. */
20 19
21 #include "config.h" 20 #include "config.h"
22 #include "system.h" 21 #include "system.h"
23 #include "coretypes.h" 22 #include "coretypes.h"
23 #include "backend.h"
24 #include "rtl.h"
24 #include "tree.h" 25 #include "tree.h"
25 #include "tm.h" 26 #include "gimple.h"
27 #include "tree-pass.h"
28 #include "ssa.h"
29 #include "expmed.h"
30 #include "optabs-tree.h"
31 #include "diagnostic.h"
32 #include "fold-const.h"
33 #include "stor-layout.h"
26 #include "langhooks.h" 34 #include "langhooks.h"
27 #include "tree-flow.h" 35 #include "tree-eh.h"
28 #include "gimple.h" 36 #include "gimple-iterator.h"
29 #include "tree-iterator.h" 37 #include "gimplify-me.h"
30 #include "tree-pass.h" 38 #include "gimplify.h"
31 #include "flags.h" 39 #include "tree-cfg.h"
32 #include "ggc.h" 40
33 41
34 /* Need to include rtl.h, expr.h, etc. for optabs. */ 42 static void expand_vector_operations_1 (gimple_stmt_iterator *);
35 #include "expr.h" 43
36 #include "optabs.h"
37 44
38 /* Build a constant of type TYPE, made of VALUE's bits replicated 45 /* Build a constant of type TYPE, made of VALUE's bits replicated
39 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */ 46 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
40 static tree 47 static tree
41 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value) 48 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
42 { 49 {
43 int width = tree_low_cst (TYPE_SIZE (inner_type), 1); 50 int width = tree_to_uhwi (TYPE_SIZE (inner_type));
44 int n = HOST_BITS_PER_WIDE_INT / width; 51 int n = (TYPE_PRECISION (type) + HOST_BITS_PER_WIDE_INT - 1)
45 unsigned HOST_WIDE_INT low, high, mask; 52 / HOST_BITS_PER_WIDE_INT;
46 tree ret; 53 unsigned HOST_WIDE_INT low, mask;
47 54 HOST_WIDE_INT a[WIDE_INT_MAX_ELTS];
48 gcc_assert (n); 55 int i;
56
57 gcc_assert (n && n <= WIDE_INT_MAX_ELTS);
49 58
50 if (width == HOST_BITS_PER_WIDE_INT) 59 if (width == HOST_BITS_PER_WIDE_INT)
51 low = value; 60 low = value;
52 else 61 else
53 { 62 {
54 mask = ((HOST_WIDE_INT)1 << width) - 1; 63 mask = ((HOST_WIDE_INT)1 << width) - 1;
55 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask); 64 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
56 } 65 }
57 66
58 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT) 67 for (i = 0; i < n; i++)
59 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0; 68 a[i] = low;
60 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT) 69
61 high = 0; 70 gcc_assert (TYPE_PRECISION (type) <= MAX_BITSIZE_MODE_ANY_INT);
62 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT) 71 return wide_int_to_tree
63 high = low; 72 (type, wide_int::from_array (a, n, TYPE_PRECISION (type)));
64 else
65 gcc_unreachable ();
66
67 ret = build_int_cst_wide (type, low, high);
68 return ret;
69 } 73 }
70 74
71 static GTY(()) tree vector_inner_type; 75 static GTY(()) tree vector_inner_type;
72 static GTY(()) tree vector_last_type; 76 static GTY(()) tree vector_last_type;
73 static GTY(()) int vector_last_nunits; 77 static GTY(()) int vector_last_nunits;
93 nunits)); 97 nunits));
94 return vector_last_type; 98 return vector_last_type;
95 } 99 }
96 100
97 typedef tree (*elem_op_func) (gimple_stmt_iterator *, 101 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
98 tree, tree, tree, tree, tree, enum tree_code); 102 tree, tree, tree, tree, tree, enum tree_code,
103 tree);
99 104
100 static inline tree 105 static inline tree
101 tree_vec_extract (gimple_stmt_iterator *gsi, tree type, 106 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
102 tree t, tree bitsize, tree bitpos) 107 tree t, tree bitsize, tree bitpos)
103 { 108 {
109 if (TREE_CODE (t) == SSA_NAME)
110 {
111 gimple *def_stmt = SSA_NAME_DEF_STMT (t);
112 if (is_gimple_assign (def_stmt)
113 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
114 || (bitpos
115 && gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR)))
116 t = gimple_assign_rhs1 (def_stmt);
117 }
104 if (bitpos) 118 if (bitpos)
105 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos); 119 {
120 if (TREE_CODE (type) == BOOLEAN_TYPE)
121 {
122 tree itype
123 = build_nonstandard_integer_type (tree_to_uhwi (bitsize), 0);
124 tree field = gimplify_build3 (gsi, BIT_FIELD_REF, itype, t,
125 bitsize, bitpos);
126 return gimplify_build2 (gsi, NE_EXPR, type, field,
127 build_zero_cst (itype));
128 }
129 else
130 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
131 }
106 else 132 else
107 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t); 133 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
108 } 134 }
109 135
110 static tree 136 static tree
111 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a, 137 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
112 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize, 138 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
113 enum tree_code code) 139 enum tree_code code, tree type ATTRIBUTE_UNUSED)
114 { 140 {
115 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 141 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
116 return gimplify_build1 (gsi, code, inner_type, a); 142 return gimplify_build1 (gsi, code, inner_type, a);
117 } 143 }
118 144
119 static tree 145 static tree
120 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b, 146 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
121 tree bitpos, tree bitsize, enum tree_code code) 147 tree bitpos, tree bitsize, enum tree_code code,
122 { 148 tree type ATTRIBUTE_UNUSED)
149 {
150 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
151 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
152 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
153 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
154 return gimplify_build2 (gsi, code, inner_type, a, b);
155 }
156
157 /* Construct expression (A[BITPOS] code B[BITPOS]) ? -1 : 0
158
159 INNER_TYPE is the type of A and B elements
160
161 returned expression is of signed integer type with the
162 size equal to the size of INNER_TYPE. */
163 static tree
164 do_compare (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
165 tree bitpos, tree bitsize, enum tree_code code, tree type)
166 {
167 tree stype = TREE_TYPE (type);
168 tree cst_false = build_zero_cst (stype);
169 tree cst_true = build_all_ones_cst (stype);
170 tree cmp;
171
123 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos); 172 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
124 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos); 173 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
125 return gimplify_build2 (gsi, code, inner_type, a, b); 174
175 cmp = build2 (code, boolean_type_node, a, b);
176 return gimplify_build3 (gsi, COND_EXPR, stype, cmp, cst_true, cst_false);
126 } 177 }
127 178
128 /* Expand vector addition to scalars. This does bit twiddling 179 /* Expand vector addition to scalars. This does bit twiddling
129 in order to increase parallelism: 180 in order to increase parallelism:
130 181
139 This optimization should be done only if 4 vector items or more 190 This optimization should be done only if 4 vector items or more
140 fit into a word. */ 191 fit into a word. */
141 static tree 192 static tree
142 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b, 193 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
143 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED, 194 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
144 enum tree_code code) 195 enum tree_code code, tree type ATTRIBUTE_UNUSED)
145 { 196 {
146 tree inner_type = TREE_TYPE (TREE_TYPE (a)); 197 tree inner_type = TREE_TYPE (TREE_TYPE (a));
147 unsigned HOST_WIDE_INT max; 198 unsigned HOST_WIDE_INT max;
148 tree low_bits, high_bits, a_low, b_low, result_low, signs; 199 tree low_bits, high_bits, a_low, b_low, result_low, signs;
149 200
171 222
172 static tree 223 static tree
173 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b, 224 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
174 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED, 225 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
175 tree bitsize ATTRIBUTE_UNUSED, 226 tree bitsize ATTRIBUTE_UNUSED,
176 enum tree_code code ATTRIBUTE_UNUSED) 227 enum tree_code code ATTRIBUTE_UNUSED,
228 tree type ATTRIBUTE_UNUSED)
177 { 229 {
178 tree inner_type = TREE_TYPE (TREE_TYPE (b)); 230 tree inner_type = TREE_TYPE (TREE_TYPE (b));
179 HOST_WIDE_INT max; 231 HOST_WIDE_INT max;
180 tree low_bits, high_bits, b_low, result_low, signs; 232 tree low_bits, high_bits, b_low, result_low, signs;
181 233
197 static tree 249 static tree
198 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f, 250 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
199 tree type, tree inner_type, 251 tree type, tree inner_type,
200 tree a, tree b, enum tree_code code) 252 tree a, tree b, enum tree_code code)
201 { 253 {
202 VEC(constructor_elt,gc) *v; 254 vec<constructor_elt, va_gc> *v;
203 tree part_width = TYPE_SIZE (inner_type); 255 tree part_width = TYPE_SIZE (inner_type);
204 tree index = bitsize_int (0); 256 tree index = bitsize_int (0);
205 int nunits = TYPE_VECTOR_SUBPARTS (type); 257 int nunits = TYPE_VECTOR_SUBPARTS (type);
206 int delta = tree_low_cst (part_width, 1) 258 int delta = tree_to_uhwi (part_width)
207 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); 259 / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
208 int i; 260 int i;
209 261 location_t loc = gimple_location (gsi_stmt (*gsi));
210 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta); 262
263 if (types_compatible_p (gimple_expr_type (gsi_stmt (*gsi)), type))
264 warning_at (loc, OPT_Wvector_operation_performance,
265 "vector operation will be expanded piecewise");
266 else
267 warning_at (loc, OPT_Wvector_operation_performance,
268 "vector operation will be expanded in parallel");
269
270 vec_alloc (v, (nunits + delta - 1) / delta);
211 for (i = 0; i < nunits; 271 for (i = 0; i < nunits;
212 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0)) 272 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width))
213 { 273 {
214 tree result = f (gsi, inner_type, a, b, index, part_width, code); 274 tree result = f (gsi, inner_type, a, b, index, part_width, code, type);
215 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL); 275 constructor_elt ce = {NULL_TREE, result};
216 ce->index = NULL_TREE; 276 v->quick_push (ce);
217 ce->value = result;
218 } 277 }
219 278
220 return build_constructor (type, v); 279 return build_constructor (type, v);
221 } 280 }
222 281
227 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type, 286 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
228 tree a, tree b, 287 tree a, tree b,
229 enum tree_code code) 288 enum tree_code code)
230 { 289 {
231 tree result, compute_type; 290 tree result, compute_type;
232 enum machine_mode mode; 291 int n_words = tree_to_uhwi (TYPE_SIZE_UNIT (type)) / UNITS_PER_WORD;
233 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD; 292 location_t loc = gimple_location (gsi_stmt (*gsi));
234 293
235 /* We have three strategies. If the type is already correct, just do 294 /* We have three strategies. If the type is already correct, just do
236 the operation an element at a time. Else, if the vector is wider than 295 the operation an element at a time. Else, if the vector is wider than
237 one word, do it a word at a time; finally, if the vector is smaller 296 one word, do it a word at a time; finally, if the vector is smaller
238 than one word, do it as a scalar. */ 297 than one word, do it as a scalar. */
250 GSI_SAME_STMT); 309 GSI_SAME_STMT);
251 } 310 }
252 else 311 else
253 { 312 {
254 /* Use a single scalar operation with a mode no wider than word_mode. */ 313 /* Use a single scalar operation with a mode no wider than word_mode. */
255 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0); 314 scalar_int_mode mode
315 = int_mode_for_size (tree_to_uhwi (TYPE_SIZE (type)), 0).require ();
256 compute_type = lang_hooks.types.type_for_mode (mode, 1); 316 compute_type = lang_hooks.types.type_for_mode (mode, 1);
257 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code); 317 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code, type);
318 warning_at (loc, OPT_Wvector_operation_performance,
319 "vector operation will be expanded with a "
320 "single scalar operation");
258 } 321 }
259 322
260 return result; 323 return result;
261 } 324 }
262 325
269 expand_vector_addition (gimple_stmt_iterator *gsi, 332 expand_vector_addition (gimple_stmt_iterator *gsi,
270 elem_op_func f, elem_op_func f_parallel, 333 elem_op_func f, elem_op_func f_parallel,
271 tree type, tree a, tree b, enum tree_code code) 334 tree type, tree a, tree b, enum tree_code code)
272 { 335 {
273 int parts_per_word = UNITS_PER_WORD 336 int parts_per_word = UNITS_PER_WORD
274 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1); 337 / tree_to_uhwi (TYPE_SIZE_UNIT (TREE_TYPE (type)));
275 338
276 if (INTEGRAL_TYPE_P (TREE_TYPE (type)) 339 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
277 && parts_per_word >= 4 340 && parts_per_word >= 4
278 && TYPE_VECTOR_SUBPARTS (type) >= 4) 341 && TYPE_VECTOR_SUBPARTS (type) >= 4)
279 return expand_vector_parallel (gsi, f_parallel, 342 return expand_vector_parallel (gsi, f_parallel,
282 return expand_vector_piecewise (gsi, f, 345 return expand_vector_piecewise (gsi, f,
283 type, TREE_TYPE (type), 346 type, TREE_TYPE (type),
284 a, b, code); 347 a, b, code);
285 } 348 }
286 349
287 /* Check if vector VEC consists of all the equal elements and 350 /* Try to expand vector comparison expression OP0 CODE OP1 by
288 that the number of elements corresponds to the type of VEC. 351 querying optab if the following expression:
289 The function returns first element of the vector 352 VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
290 or NULL_TREE if the vector is not uniform. */ 353 can be expanded. */
291 static tree 354 static tree
292 uniform_vector_p (tree vec) 355 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
293 { 356 tree op1, enum tree_code code)
294 tree first, t, els; 357 {
295 unsigned i; 358 tree t;
296 359 if (!expand_vec_cmp_expr_p (TREE_TYPE (op0), type, code)
297 if (vec == NULL_TREE) 360 && !expand_vec_cond_expr_p (type, TREE_TYPE (op0), code))
361 t = expand_vector_piecewise (gsi, do_compare, type,
362 TREE_TYPE (TREE_TYPE (op0)), op0, op1, code);
363 else
364 t = NULL_TREE;
365
366 return t;
367 }
368
369 /* Helper function of expand_vector_divmod. Gimplify a RSHIFT_EXPR in type
370 of OP0 with shift counts in SHIFTCNTS array and return the temporary holding
371 the result if successful, otherwise return NULL_TREE. */
372 static tree
373 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
374 {
375 optab op;
376 unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type);
377 bool scalar_shift = true;
378
379 for (i = 1; i < nunits; i++)
380 {
381 if (shiftcnts[i] != shiftcnts[0])
382 scalar_shift = false;
383 }
384
385 if (scalar_shift && shiftcnts[0] == 0)
386 return op0;
387
388 if (scalar_shift)
389 {
390 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_scalar);
391 if (op != unknown_optab
392 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
393 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
394 build_int_cst (NULL_TREE, shiftcnts[0]));
395 }
396
397 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
398 if (op != unknown_optab
399 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
400 {
401 auto_vec<tree, 32> vec (nunits);
402 for (i = 0; i < nunits; i++)
403 vec.quick_push (build_int_cst (TREE_TYPE (type), shiftcnts[i]));
404 return gimplify_build2 (gsi, RSHIFT_EXPR, type, op0,
405 build_vector (type, vec));
406 }
407
408 return NULL_TREE;
409 }
410
411 /* Try to expand integer vector division by constant using
412 widening multiply, shifts and additions. */
413 static tree
414 expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
415 tree op1, enum tree_code code)
416 {
417 bool use_pow2 = true;
418 bool has_vector_shift = true;
419 int mode = -1, this_mode;
420 int pre_shift = -1, post_shift;
421 unsigned int nunits = TYPE_VECTOR_SUBPARTS (type);
422 int *shifts = XALLOCAVEC (int, nunits * 4);
423 int *pre_shifts = shifts + nunits;
424 int *post_shifts = pre_shifts + nunits;
425 int *shift_temps = post_shifts + nunits;
426 unsigned HOST_WIDE_INT *mulc = XALLOCAVEC (unsigned HOST_WIDE_INT, nunits);
427 int prec = TYPE_PRECISION (TREE_TYPE (type));
428 int dummy_int;
429 unsigned int i;
430 signop sign_p = TYPE_SIGN (TREE_TYPE (type));
431 unsigned HOST_WIDE_INT mask = GET_MODE_MASK (TYPE_MODE (TREE_TYPE (type)));
432 tree cur_op, mulcst, tem;
433 optab op;
434
435 if (prec > HOST_BITS_PER_WIDE_INT)
298 return NULL_TREE; 436 return NULL_TREE;
299 437
300 if (TREE_CODE (vec) == VECTOR_CST) 438 op = optab_for_tree_code (RSHIFT_EXPR, type, optab_vector);
301 { 439 if (op == unknown_optab
302 els = TREE_VECTOR_CST_ELTS (vec); 440 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
303 first = TREE_VALUE (els); 441 has_vector_shift = false;
304 els = TREE_CHAIN (els); 442
305 443 /* Analysis phase. Determine if all op1 elements are either power
306 for (t = els; t; t = TREE_CHAIN (t)) 444 of two and it is possible to expand it using shifts (or for remainder
307 if (!operand_equal_p (first, TREE_VALUE (t), 0)) 445 using masking). Additionally compute the multiplicative constants
308 return NULL_TREE; 446 and pre and post shifts if the division is to be expanded using
309 447 widening or high part multiplication plus shifts. */
310 return first; 448 for (i = 0; i < nunits; i++)
311 } 449 {
312 450 tree cst = VECTOR_CST_ELT (op1, i);
313 else if (TREE_CODE (vec) == CONSTRUCTOR) 451 unsigned HOST_WIDE_INT ml;
314 { 452
315 first = error_mark_node; 453 if (TREE_CODE (cst) != INTEGER_CST || integer_zerop (cst))
316 454 return NULL_TREE;
317 FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t) 455 pre_shifts[i] = 0;
318 { 456 post_shifts[i] = 0;
319 if (i == 0) 457 mulc[i] = 0;
320 { 458 if (use_pow2
321 first = t; 459 && (!integer_pow2p (cst) || tree_int_cst_sgn (cst) != 1))
322 continue; 460 use_pow2 = false;
323 } 461 if (use_pow2)
324 if (!operand_equal_p (first, t, 0)) 462 {
463 shifts[i] = tree_log2 (cst);
464 if (shifts[i] != shifts[0]
465 && code == TRUNC_DIV_EXPR
466 && !has_vector_shift)
467 use_pow2 = false;
468 }
469 if (mode == -2)
470 continue;
471 if (sign_p == UNSIGNED)
472 {
473 unsigned HOST_WIDE_INT mh;
474 unsigned HOST_WIDE_INT d = TREE_INT_CST_LOW (cst) & mask;
475
476 if (d >= (HOST_WIDE_INT_1U << (prec - 1)))
477 /* FIXME: Can transform this into op0 >= op1 ? 1 : 0. */
325 return NULL_TREE; 478 return NULL_TREE;
326 } 479
327 if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec))) 480 if (d <= 1)
481 {
482 mode = -2;
483 continue;
484 }
485
486 /* Find a suitable multiplier and right shift count
487 instead of multiplying with D. */
488 mh = choose_multiplier (d, prec, prec, &ml, &post_shift, &dummy_int);
489
490 /* If the suggested multiplier is more than SIZE bits, we can
491 do better for even divisors, using an initial right shift. */
492 if ((mh != 0 && (d & 1) == 0)
493 || (!has_vector_shift && pre_shift != -1))
494 {
495 if (has_vector_shift)
496 pre_shift = ctz_or_zero (d);
497 else if (pre_shift == -1)
498 {
499 unsigned int j;
500 for (j = 0; j < nunits; j++)
501 {
502 tree cst2 = VECTOR_CST_ELT (op1, j);
503 unsigned HOST_WIDE_INT d2;
504 int this_pre_shift;
505
506 if (!tree_fits_uhwi_p (cst2))
507 return NULL_TREE;
508 d2 = tree_to_uhwi (cst2) & mask;
509 if (d2 == 0)
510 return NULL_TREE;
511 this_pre_shift = floor_log2 (d2 & -d2);
512 if (pre_shift == -1 || this_pre_shift < pre_shift)
513 pre_shift = this_pre_shift;
514 }
515 if (i != 0 && pre_shift != 0)
516 {
517 /* Restart. */
518 i = -1U;
519 mode = -1;
520 continue;
521 }
522 }
523 if (pre_shift != 0)
524 {
525 if ((d >> pre_shift) <= 1)
526 {
527 mode = -2;
528 continue;
529 }
530 mh = choose_multiplier (d >> pre_shift, prec,
531 prec - pre_shift,
532 &ml, &post_shift, &dummy_int);
533 gcc_assert (!mh);
534 pre_shifts[i] = pre_shift;
535 }
536 }
537 if (!mh)
538 this_mode = 0;
539 else
540 this_mode = 1;
541 }
542 else
543 {
544 HOST_WIDE_INT d = TREE_INT_CST_LOW (cst);
545 unsigned HOST_WIDE_INT abs_d;
546
547 if (d == -1)
548 return NULL_TREE;
549
550 /* Since d might be INT_MIN, we have to cast to
551 unsigned HOST_WIDE_INT before negating to avoid
552 undefined signed overflow. */
553 abs_d = (d >= 0
554 ? (unsigned HOST_WIDE_INT) d
555 : - (unsigned HOST_WIDE_INT) d);
556
557 /* n rem d = n rem -d */
558 if (code == TRUNC_MOD_EXPR && d < 0)
559 d = abs_d;
560 else if (abs_d == HOST_WIDE_INT_1U << (prec - 1))
561 {
562 /* This case is not handled correctly below. */
563 mode = -2;
564 continue;
565 }
566 if (abs_d <= 1)
567 {
568 mode = -2;
569 continue;
570 }
571
572 choose_multiplier (abs_d, prec, prec - 1, &ml,
573 &post_shift, &dummy_int);
574 if (ml >= HOST_WIDE_INT_1U << (prec - 1))
575 {
576 this_mode = 4 + (d < 0);
577 ml |= HOST_WIDE_INT_M1U << (prec - 1);
578 }
579 else
580 this_mode = 2 + (d < 0);
581 }
582 mulc[i] = ml;
583 post_shifts[i] = post_shift;
584 if ((i && !has_vector_shift && post_shifts[0] != post_shift)
585 || post_shift >= prec
586 || pre_shifts[i] >= prec)
587 this_mode = -2;
588
589 if (i == 0)
590 mode = this_mode;
591 else if (mode != this_mode)
592 mode = -2;
593 }
594
595 if (use_pow2)
596 {
597 tree addend = NULL_TREE;
598 if (sign_p == SIGNED)
599 {
600 tree uns_type;
601
602 /* Both division and remainder sequences need
603 op0 < 0 ? mask : 0 computed. It can be either computed as
604 (type) (((uns_type) (op0 >> (prec - 1))) >> (prec - shifts[i]))
605 if none of the shifts is 0, or as the conditional. */
606 for (i = 0; i < nunits; i++)
607 if (shifts[i] == 0)
608 break;
609 uns_type
610 = build_vector_type (build_nonstandard_integer_type (prec, 1),
611 nunits);
612 if (i == nunits && TYPE_MODE (uns_type) == TYPE_MODE (type))
613 {
614 for (i = 0; i < nunits; i++)
615 shift_temps[i] = prec - 1;
616 cur_op = add_rshift (gsi, type, op0, shift_temps);
617 if (cur_op != NULL_TREE)
618 {
619 cur_op = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
620 uns_type, cur_op);
621 for (i = 0; i < nunits; i++)
622 shift_temps[i] = prec - shifts[i];
623 cur_op = add_rshift (gsi, uns_type, cur_op, shift_temps);
624 if (cur_op != NULL_TREE)
625 addend = gimplify_build1 (gsi, VIEW_CONVERT_EXPR,
626 type, cur_op);
627 }
628 }
629 if (addend == NULL_TREE
630 && expand_vec_cond_expr_p (type, type, LT_EXPR))
631 {
632 tree zero, cst, cond, mask_type;
633 gimple *stmt;
634
635 mask_type = build_same_sized_truth_vector_type (type);
636 zero = build_zero_cst (type);
637 cond = build2 (LT_EXPR, mask_type, op0, zero);
638 auto_vec<tree, 32> vec (nunits);
639 for (i = 0; i < nunits; i++)
640 vec.quick_push (build_int_cst (TREE_TYPE (type),
641 (HOST_WIDE_INT_1U
642 << shifts[i]) - 1));
643 cst = build_vector (type, vec);
644 addend = make_ssa_name (type);
645 stmt = gimple_build_assign (addend, VEC_COND_EXPR, cond,
646 cst, zero);
647 gsi_insert_before (gsi, stmt, GSI_SAME_STMT);
648 }
649 }
650 if (code == TRUNC_DIV_EXPR)
651 {
652 if (sign_p == UNSIGNED)
653 {
654 /* q = op0 >> shift; */
655 cur_op = add_rshift (gsi, type, op0, shifts);
656 if (cur_op != NULL_TREE)
657 return cur_op;
658 }
659 else if (addend != NULL_TREE)
660 {
661 /* t1 = op0 + addend;
662 q = t1 >> shift; */
663 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
664 if (op != unknown_optab
665 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
666 {
667 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0, addend);
668 cur_op = add_rshift (gsi, type, cur_op, shifts);
669 if (cur_op != NULL_TREE)
670 return cur_op;
671 }
672 }
673 }
674 else
675 {
676 tree mask;
677 auto_vec<tree, 32> vec (nunits);
678 for (i = 0; i < nunits; i++)
679 vec.quick_push (build_int_cst (TREE_TYPE (type),
680 (HOST_WIDE_INT_1U
681 << shifts[i]) - 1));
682 mask = build_vector (type, vec);
683 op = optab_for_tree_code (BIT_AND_EXPR, type, optab_default);
684 if (op != unknown_optab
685 && optab_handler (op, TYPE_MODE (type)) != CODE_FOR_nothing)
686 {
687 if (sign_p == UNSIGNED)
688 /* r = op0 & mask; */
689 return gimplify_build2 (gsi, BIT_AND_EXPR, type, op0, mask);
690 else if (addend != NULL_TREE)
691 {
692 /* t1 = op0 + addend;
693 t2 = t1 & mask;
694 r = t2 - addend; */
695 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
696 if (op != unknown_optab
697 && optab_handler (op, TYPE_MODE (type))
698 != CODE_FOR_nothing)
699 {
700 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, op0,
701 addend);
702 cur_op = gimplify_build2 (gsi, BIT_AND_EXPR, type,
703 cur_op, mask);
704 op = optab_for_tree_code (MINUS_EXPR, type,
705 optab_default);
706 if (op != unknown_optab
707 && optab_handler (op, TYPE_MODE (type))
708 != CODE_FOR_nothing)
709 return gimplify_build2 (gsi, MINUS_EXPR, type,
710 cur_op, addend);
711 }
712 }
713 }
714 }
715 }
716
717 if (mode == -2 || BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
718 return NULL_TREE;
719
720 if (!can_mult_highpart_p (TYPE_MODE (type), TYPE_UNSIGNED (type)))
721 return NULL_TREE;
722
723 cur_op = op0;
724
725 switch (mode)
726 {
727 case 0:
728 gcc_assert (sign_p == UNSIGNED);
729 /* t1 = oprnd0 >> pre_shift;
730 t2 = t1 h* ml;
731 q = t2 >> post_shift; */
732 cur_op = add_rshift (gsi, type, cur_op, pre_shifts);
733 if (cur_op == NULL_TREE)
328 return NULL_TREE; 734 return NULL_TREE;
329 735 break;
330 return first; 736 case 1:
331 } 737 gcc_assert (sign_p == UNSIGNED);
332 738 for (i = 0; i < nunits; i++)
333 return NULL_TREE; 739 {
740 shift_temps[i] = 1;
741 post_shifts[i]--;
742 }
743 break;
744 case 2:
745 case 3:
746 case 4:
747 case 5:
748 gcc_assert (sign_p == SIGNED);
749 for (i = 0; i < nunits; i++)
750 shift_temps[i] = prec - 1;
751 break;
752 default:
753 return NULL_TREE;
754 }
755
756 auto_vec<tree, 32> vec (nunits);
757 for (i = 0; i < nunits; i++)
758 vec.quick_push (build_int_cst (TREE_TYPE (type), mulc[i]));
759 mulcst = build_vector (type, vec);
760
761 cur_op = gimplify_build2 (gsi, MULT_HIGHPART_EXPR, type, cur_op, mulcst);
762
763 switch (mode)
764 {
765 case 0:
766 /* t1 = oprnd0 >> pre_shift;
767 t2 = t1 h* ml;
768 q = t2 >> post_shift; */
769 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
770 break;
771 case 1:
772 /* t1 = oprnd0 h* ml;
773 t2 = oprnd0 - t1;
774 t3 = t2 >> 1;
775 t4 = t1 + t3;
776 q = t4 >> (post_shift - 1); */
777 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
778 if (op == unknown_optab
779 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
780 return NULL_TREE;
781 tem = gimplify_build2 (gsi, MINUS_EXPR, type, op0, cur_op);
782 tem = add_rshift (gsi, type, tem, shift_temps);
783 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
784 if (op == unknown_optab
785 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
786 return NULL_TREE;
787 tem = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, tem);
788 cur_op = add_rshift (gsi, type, tem, post_shifts);
789 if (cur_op == NULL_TREE)
790 return NULL_TREE;
791 break;
792 case 2:
793 case 3:
794 case 4:
795 case 5:
796 /* t1 = oprnd0 h* ml;
797 t2 = t1; [ iff (mode & 2) != 0 ]
798 t2 = t1 + oprnd0; [ iff (mode & 2) == 0 ]
799 t3 = t2 >> post_shift;
800 t4 = oprnd0 >> (prec - 1);
801 q = t3 - t4; [ iff (mode & 1) == 0 ]
802 q = t4 - t3; [ iff (mode & 1) != 0 ] */
803 if ((mode & 2) == 0)
804 {
805 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
806 if (op == unknown_optab
807 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
808 return NULL_TREE;
809 cur_op = gimplify_build2 (gsi, PLUS_EXPR, type, cur_op, op0);
810 }
811 cur_op = add_rshift (gsi, type, cur_op, post_shifts);
812 if (cur_op == NULL_TREE)
813 return NULL_TREE;
814 tem = add_rshift (gsi, type, op0, shift_temps);
815 if (tem == NULL_TREE)
816 return NULL_TREE;
817 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
818 if (op == unknown_optab
819 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
820 return NULL_TREE;
821 if ((mode & 1) == 0)
822 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, cur_op, tem);
823 else
824 cur_op = gimplify_build2 (gsi, MINUS_EXPR, type, tem, cur_op);
825 break;
826 default:
827 gcc_unreachable ();
828 }
829
830 if (code == TRUNC_DIV_EXPR)
831 return cur_op;
832
833 /* We divided. Now finish by:
834 t1 = q * oprnd1;
835 r = oprnd0 - t1; */
836 op = optab_for_tree_code (MULT_EXPR, type, optab_default);
837 if (op == unknown_optab
838 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
839 return NULL_TREE;
840 tem = gimplify_build2 (gsi, MULT_EXPR, type, cur_op, op1);
841 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
842 if (op == unknown_optab
843 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
844 return NULL_TREE;
845 return gimplify_build2 (gsi, MINUS_EXPR, type, op0, tem);
846 }
847
848 /* Expand a vector condition to scalars, by using many conditions
849 on the vector's elements. */
850 static void
851 expand_vector_condition (gimple_stmt_iterator *gsi)
852 {
853 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
854 tree type = gimple_expr_type (stmt);
855 tree a = gimple_assign_rhs1 (stmt);
856 tree a1 = a;
857 tree a2 = NULL_TREE;
858 bool a_is_comparison = false;
859 tree b = gimple_assign_rhs2 (stmt);
860 tree c = gimple_assign_rhs3 (stmt);
861 vec<constructor_elt, va_gc> *v;
862 tree constr;
863 tree inner_type = TREE_TYPE (type);
864 tree cond_type = TREE_TYPE (TREE_TYPE (a));
865 tree comp_inner_type = cond_type;
866 tree width = TYPE_SIZE (inner_type);
867 tree index = bitsize_int (0);
868 tree comp_width = width;
869 tree comp_index = index;
870 int nunits = TYPE_VECTOR_SUBPARTS (type);
871 int i;
872 location_t loc = gimple_location (gsi_stmt (*gsi));
873
874 if (!is_gimple_val (a))
875 {
876 gcc_assert (COMPARISON_CLASS_P (a));
877 a_is_comparison = true;
878 a1 = TREE_OPERAND (a, 0);
879 a2 = TREE_OPERAND (a, 1);
880 comp_inner_type = TREE_TYPE (TREE_TYPE (a1));
881 comp_width = TYPE_SIZE (comp_inner_type);
882 }
883
884 if (expand_vec_cond_expr_p (type, TREE_TYPE (a1), TREE_CODE (a)))
885 return;
886
887 /* Handle vector boolean types with bitmasks. If there is a comparison
888 and we can expand the comparison into the vector boolean bitmask,
889 or otherwise if it is compatible with type, we can transform
890 vbfld_1 = x_2 < y_3 ? vbfld_4 : vbfld_5;
891 into
892 tmp_6 = x_2 < y_3;
893 tmp_7 = tmp_6 & vbfld_4;
894 tmp_8 = ~tmp_6;
895 tmp_9 = tmp_8 & vbfld_5;
896 vbfld_1 = tmp_7 | tmp_9;
897 Similarly for vbfld_10 instead of x_2 < y_3. */
898 if (VECTOR_BOOLEAN_TYPE_P (type)
899 && SCALAR_INT_MODE_P (TYPE_MODE (type))
900 && (GET_MODE_BITSIZE (TYPE_MODE (type))
901 < (TYPE_VECTOR_SUBPARTS (type)
902 * GET_MODE_BITSIZE (TYPE_MODE (TREE_TYPE (type)))))
903 && (a_is_comparison
904 ? useless_type_conversion_p (type, TREE_TYPE (a))
905 : expand_vec_cmp_expr_p (TREE_TYPE (a1), type, TREE_CODE (a))))
906 {
907 if (a_is_comparison)
908 a = gimplify_build2 (gsi, TREE_CODE (a), type, a1, a2);
909 a1 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a, b);
910 a2 = gimplify_build1 (gsi, BIT_NOT_EXPR, type, a);
911 a2 = gimplify_build2 (gsi, BIT_AND_EXPR, type, a2, c);
912 a = gimplify_build2 (gsi, BIT_IOR_EXPR, type, a1, a2);
913 gimple_assign_set_rhs_from_tree (gsi, a);
914 update_stmt (gsi_stmt (*gsi));
915 return;
916 }
917
918 /* TODO: try and find a smaller vector type. */
919
920 warning_at (loc, OPT_Wvector_operation_performance,
921 "vector condition will be expanded piecewise");
922
923 vec_alloc (v, nunits);
924 for (i = 0; i < nunits; i++)
925 {
926 tree aa, result;
927 tree bb = tree_vec_extract (gsi, inner_type, b, width, index);
928 tree cc = tree_vec_extract (gsi, inner_type, c, width, index);
929 if (a_is_comparison)
930 {
931 tree aa1 = tree_vec_extract (gsi, comp_inner_type, a1,
932 comp_width, comp_index);
933 tree aa2 = tree_vec_extract (gsi, comp_inner_type, a2,
934 comp_width, comp_index);
935 aa = fold_build2 (TREE_CODE (a), cond_type, aa1, aa2);
936 }
937 else
938 aa = tree_vec_extract (gsi, cond_type, a, width, index);
939 result = gimplify_build3 (gsi, COND_EXPR, inner_type, aa, bb, cc);
940 constructor_elt ce = {NULL_TREE, result};
941 v->quick_push (ce);
942 index = int_const_binop (PLUS_EXPR, index, width);
943 if (width == comp_width)
944 comp_index = index;
945 else
946 comp_index = int_const_binop (PLUS_EXPR, comp_index, comp_width);
947 }
948
949 constr = build_constructor (type, v);
950 gimple_assign_set_rhs_from_tree (gsi, constr);
951 update_stmt (gsi_stmt (*gsi));
334 } 952 }
335 953
336 static tree 954 static tree
337 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type, 955 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
338 gimple assign, enum tree_code code) 956 gassign *assign, enum tree_code code)
339 { 957 {
340 enum machine_mode compute_mode = TYPE_MODE (compute_type); 958 machine_mode compute_mode = TYPE_MODE (compute_type);
341 959
342 /* If the compute mode is not a vector mode (hence we are not decomposing 960 /* If the compute mode is not a vector mode (hence we are not decomposing
343 a BLKmode vector to smaller, hardware-supported vectors), we may want 961 a BLKmode vector to smaller, hardware-supported vectors), we may want
344 to expand the operations in parallel. */ 962 to expand the operations in parallel. */
345 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT 963 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
350 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM) 968 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
351 switch (code) 969 switch (code)
352 { 970 {
353 case PLUS_EXPR: 971 case PLUS_EXPR:
354 case MINUS_EXPR: 972 case MINUS_EXPR:
355 if (!TYPE_OVERFLOW_TRAPS (type)) 973 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
356 return expand_vector_addition (gsi, do_binop, do_plus_minus, type, 974 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
357 gimple_assign_rhs1 (assign), 975 gimple_assign_rhs1 (assign),
358 gimple_assign_rhs2 (assign), code); 976 gimple_assign_rhs2 (assign), code);
359 break; 977 break;
360 978
361 case NEGATE_EXPR: 979 case NEGATE_EXPR:
362 if (!TYPE_OVERFLOW_TRAPS (type)) 980 if (ANY_INTEGRAL_TYPE_P (type) && !TYPE_OVERFLOW_TRAPS (type))
363 return expand_vector_addition (gsi, do_unop, do_negate, type, 981 return expand_vector_addition (gsi, do_unop, do_negate, type,
364 gimple_assign_rhs1 (assign), 982 gimple_assign_rhs1 (assign),
365 NULL_TREE, code); 983 NULL_TREE, code);
366 break; 984 break;
367 985
373 gimple_assign_rhs2 (assign), code); 991 gimple_assign_rhs2 (assign), code);
374 992
375 case BIT_NOT_EXPR: 993 case BIT_NOT_EXPR:
376 return expand_vector_parallel (gsi, do_unop, type, 994 return expand_vector_parallel (gsi, do_unop, type,
377 gimple_assign_rhs1 (assign), 995 gimple_assign_rhs1 (assign),
378 NULL_TREE, code); 996 NULL_TREE, code);
997 case EQ_EXPR:
998 case NE_EXPR:
999 case GT_EXPR:
1000 case LT_EXPR:
1001 case GE_EXPR:
1002 case LE_EXPR:
1003 case UNEQ_EXPR:
1004 case UNGT_EXPR:
1005 case UNLT_EXPR:
1006 case UNGE_EXPR:
1007 case UNLE_EXPR:
1008 case LTGT_EXPR:
1009 case ORDERED_EXPR:
1010 case UNORDERED_EXPR:
1011 {
1012 tree rhs1 = gimple_assign_rhs1 (assign);
1013 tree rhs2 = gimple_assign_rhs2 (assign);
1014
1015 return expand_vector_comparison (gsi, type, rhs1, rhs2, code);
1016 }
1017
1018 case TRUNC_DIV_EXPR:
1019 case TRUNC_MOD_EXPR:
1020 {
1021 tree rhs1 = gimple_assign_rhs1 (assign);
1022 tree rhs2 = gimple_assign_rhs2 (assign);
1023 tree ret;
1024
1025 if (!optimize
1026 || !VECTOR_INTEGER_TYPE_P (type)
1027 || TREE_CODE (rhs2) != VECTOR_CST
1028 || !VECTOR_MODE_P (TYPE_MODE (type)))
1029 break;
1030
1031 ret = expand_vector_divmod (gsi, type, rhs1, rhs2, code);
1032 if (ret != NULL_TREE)
1033 return ret;
1034 break;
1035 }
379 1036
380 default: 1037 default:
381 break; 1038 break;
382 } 1039 }
383 1040
388 else 1045 else
389 return expand_vector_piecewise (gsi, do_binop, type, compute_type, 1046 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
390 gimple_assign_rhs1 (assign), 1047 gimple_assign_rhs1 (assign),
391 gimple_assign_rhs2 (assign), code); 1048 gimple_assign_rhs2 (assign), code);
392 } 1049 }
1050
1051 /* Try to optimize
1052 a_5 = { b_7, b_7 + 3, b_7 + 6, b_7 + 9 };
1053 style stmts into:
1054 _9 = { b_7, b_7, b_7, b_7 };
1055 a_5 = _9 + { 0, 3, 6, 9 };
1056 because vector splat operation is usually more efficient
1057 than piecewise initialization of the vector. */
1058
1059 static void
1060 optimize_vector_constructor (gimple_stmt_iterator *gsi)
1061 {
1062 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1063 tree lhs = gimple_assign_lhs (stmt);
1064 tree rhs = gimple_assign_rhs1 (stmt);
1065 tree type = TREE_TYPE (rhs);
1066 unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type);
1067 bool all_same = true;
1068 constructor_elt *elt;
1069 gimple *g;
1070 tree base = NULL_TREE;
1071 optab op;
1072
1073 if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
1074 return;
1075 op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
1076 if (op == unknown_optab
1077 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing)
1078 return;
1079 FOR_EACH_VEC_SAFE_ELT (CONSTRUCTOR_ELTS (rhs), i, elt)
1080 if (TREE_CODE (elt->value) != SSA_NAME
1081 || TREE_CODE (TREE_TYPE (elt->value)) == VECTOR_TYPE)
1082 return;
1083 else
1084 {
1085 tree this_base = elt->value;
1086 if (this_base != CONSTRUCTOR_ELT (rhs, 0)->value)
1087 all_same = false;
1088 for (j = 0; j < nelts + 1; j++)
1089 {
1090 g = SSA_NAME_DEF_STMT (this_base);
1091 if (is_gimple_assign (g)
1092 && gimple_assign_rhs_code (g) == PLUS_EXPR
1093 && TREE_CODE (gimple_assign_rhs2 (g)) == INTEGER_CST
1094 && TREE_CODE (gimple_assign_rhs1 (g)) == SSA_NAME
1095 && !SSA_NAME_OCCURS_IN_ABNORMAL_PHI (gimple_assign_rhs1 (g)))
1096 this_base = gimple_assign_rhs1 (g);
1097 else
1098 break;
1099 }
1100 if (i == 0)
1101 base = this_base;
1102 else if (this_base != base)
1103 return;
1104 }
1105 if (all_same)
1106 return;
1107 auto_vec<tree, 32> cst (nelts);
1108 for (i = 0; i < nelts; i++)
1109 {
1110 tree this_base = CONSTRUCTOR_ELT (rhs, i)->value;
1111 tree elt = build_zero_cst (TREE_TYPE (base));
1112 while (this_base != base)
1113 {
1114 g = SSA_NAME_DEF_STMT (this_base);
1115 elt = fold_binary (PLUS_EXPR, TREE_TYPE (base),
1116 elt, gimple_assign_rhs2 (g));
1117 if (elt == NULL_TREE
1118 || TREE_CODE (elt) != INTEGER_CST
1119 || TREE_OVERFLOW (elt))
1120 return;
1121 this_base = gimple_assign_rhs1 (g);
1122 }
1123 cst.quick_push (elt);
1124 }
1125 for (i = 0; i < nelts; i++)
1126 CONSTRUCTOR_ELT (rhs, i)->value = base;
1127 g = gimple_build_assign (make_ssa_name (type), rhs);
1128 gsi_insert_before (gsi, g, GSI_SAME_STMT);
1129 g = gimple_build_assign (lhs, PLUS_EXPR, gimple_assign_lhs (g),
1130 build_vector (type, cst));
1131 gsi_replace (gsi, g, false);
1132 }
393 1133
394 /* Return a type for the widest vector mode whose components are of mode 1134 /* Return a type for the widest vector mode whose components are of type
395 INNER_MODE, or NULL_TREE if none is found. 1135 TYPE, or NULL_TREE if none is found. */
396 SATP is true for saturating fixed-point types. */
397 1136
398 static tree 1137 static tree
399 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp) 1138 type_for_widest_vector_mode (tree type, optab op)
400 { 1139 {
401 enum machine_mode best_mode = VOIDmode, mode; 1140 machine_mode inner_mode = TYPE_MODE (type);
1141 machine_mode best_mode = VOIDmode, mode;
402 int best_nunits = 0; 1142 int best_nunits = 0;
403 1143
404 if (SCALAR_FLOAT_MODE_P (inner_mode)) 1144 if (SCALAR_FLOAT_MODE_P (inner_mode))
405 mode = MIN_MODE_VECTOR_FLOAT; 1145 mode = MIN_MODE_VECTOR_FLOAT;
406 else if (SCALAR_FRACT_MODE_P (inner_mode)) 1146 else if (SCALAR_FRACT_MODE_P (inner_mode))
412 else if (SCALAR_UACCUM_MODE_P (inner_mode)) 1152 else if (SCALAR_UACCUM_MODE_P (inner_mode))
413 mode = MIN_MODE_VECTOR_UACCUM; 1153 mode = MIN_MODE_VECTOR_UACCUM;
414 else 1154 else
415 mode = MIN_MODE_VECTOR_INT; 1155 mode = MIN_MODE_VECTOR_INT;
416 1156
417 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 1157 FOR_EACH_MODE_FROM (mode, mode)
418 if (GET_MODE_INNER (mode) == inner_mode 1158 if (GET_MODE_INNER (mode) == inner_mode
419 && GET_MODE_NUNITS (mode) > best_nunits 1159 && GET_MODE_NUNITS (mode) > best_nunits
420 && optab_handler (op, mode) != CODE_FOR_nothing) 1160 && optab_handler (op, mode) != CODE_FOR_nothing)
421 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode); 1161 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
422 1162
423 if (best_mode == VOIDmode) 1163 if (best_mode == VOIDmode)
424 return NULL_TREE; 1164 return NULL_TREE;
425 else 1165 else
426 { 1166 return build_vector_type_for_mode (type, best_mode);
427 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */ 1167 }
428 if (ALL_FIXED_POINT_MODE_P (best_mode)) 1168
429 return lang_hooks.types.type_for_mode (best_mode, satp); 1169
430 1170 /* Build a reference to the element of the vector VECT. Function
431 return lang_hooks.types.type_for_mode (best_mode, 1); 1171 returns either the element itself, either BIT_FIELD_REF, or an
432 } 1172 ARRAY_REF expression.
1173
1174 GSI is required to insert temporary variables while building a
1175 refernece to the element of the vector VECT.
1176
1177 PTMPVEC is a pointer to the temporary variable for caching
1178 purposes. In case when PTMPVEC is NULL new temporary variable
1179 will be created. */
1180 static tree
1181 vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
1182 {
1183 tree vect_type, vect_elt_type;
1184 gimple *asgn;
1185 tree tmpvec;
1186 tree arraytype;
1187 bool need_asgn = true;
1188 unsigned int elements;
1189
1190 vect_type = TREE_TYPE (vect);
1191 vect_elt_type = TREE_TYPE (vect_type);
1192 elements = TYPE_VECTOR_SUBPARTS (vect_type);
1193
1194 if (TREE_CODE (idx) == INTEGER_CST)
1195 {
1196 unsigned HOST_WIDE_INT index;
1197
1198 /* Given that we're about to compute a binary modulus,
1199 we don't care about the high bits of the value. */
1200 index = TREE_INT_CST_LOW (idx);
1201 if (!tree_fits_uhwi_p (idx) || index >= elements)
1202 {
1203 index &= elements - 1;
1204 idx = build_int_cst (TREE_TYPE (idx), index);
1205 }
1206
1207 /* When lowering a vector statement sequence do some easy
1208 simplification by looking through intermediate vector results. */
1209 if (TREE_CODE (vect) == SSA_NAME)
1210 {
1211 gimple *def_stmt = SSA_NAME_DEF_STMT (vect);
1212 if (is_gimple_assign (def_stmt)
1213 && (gimple_assign_rhs_code (def_stmt) == VECTOR_CST
1214 || gimple_assign_rhs_code (def_stmt) == CONSTRUCTOR))
1215 vect = gimple_assign_rhs1 (def_stmt);
1216 }
1217
1218 if (TREE_CODE (vect) == VECTOR_CST)
1219 return VECTOR_CST_ELT (vect, index);
1220 else if (TREE_CODE (vect) == CONSTRUCTOR
1221 && (CONSTRUCTOR_NELTS (vect) == 0
1222 || TREE_CODE (TREE_TYPE (CONSTRUCTOR_ELT (vect, 0)->value))
1223 != VECTOR_TYPE))
1224 {
1225 if (index < CONSTRUCTOR_NELTS (vect))
1226 return CONSTRUCTOR_ELT (vect, index)->value;
1227 return build_zero_cst (vect_elt_type);
1228 }
1229 else
1230 {
1231 tree size = TYPE_SIZE (vect_elt_type);
1232 tree pos = fold_build2 (MULT_EXPR, bitsizetype, bitsize_int (index),
1233 size);
1234 return fold_build3 (BIT_FIELD_REF, vect_elt_type, vect, size, pos);
1235 }
1236 }
1237
1238 if (!ptmpvec)
1239 tmpvec = create_tmp_var (vect_type, "vectmp");
1240 else if (!*ptmpvec)
1241 tmpvec = *ptmpvec = create_tmp_var (vect_type, "vectmp");
1242 else
1243 {
1244 tmpvec = *ptmpvec;
1245 need_asgn = false;
1246 }
1247
1248 if (need_asgn)
1249 {
1250 TREE_ADDRESSABLE (tmpvec) = 1;
1251 asgn = gimple_build_assign (tmpvec, vect);
1252 gsi_insert_before (gsi, asgn, GSI_SAME_STMT);
1253 }
1254
1255 arraytype = build_array_type_nelts (vect_elt_type, elements);
1256 return build4 (ARRAY_REF, vect_elt_type,
1257 build1 (VIEW_CONVERT_EXPR, arraytype, tmpvec),
1258 idx, NULL_TREE, NULL_TREE);
1259 }
1260
1261 /* Check if VEC_PERM_EXPR within the given setting is supported
1262 by hardware, or lower it piecewise.
1263
1264 When VEC_PERM_EXPR has the same first and second operands:
1265 VEC_PERM_EXPR <v0, v0, mask> the lowered version would be
1266 {v0[mask[0]], v0[mask[1]], ...}
1267 MASK and V0 must have the same number of elements.
1268
1269 Otherwise VEC_PERM_EXPR <v0, v1, mask> is lowered to
1270 {mask[0] < len(v0) ? v0[mask[0]] : v1[mask[0]], ...}
1271 V0 and V1 must have the same type. MASK, V0, V1 must have the
1272 same number of arguments. */
1273
1274 static void
1275 lower_vec_perm (gimple_stmt_iterator *gsi)
1276 {
1277 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1278 tree mask = gimple_assign_rhs3 (stmt);
1279 tree vec0 = gimple_assign_rhs1 (stmt);
1280 tree vec1 = gimple_assign_rhs2 (stmt);
1281 tree vect_type = TREE_TYPE (vec0);
1282 tree mask_type = TREE_TYPE (mask);
1283 tree vect_elt_type = TREE_TYPE (vect_type);
1284 tree mask_elt_type = TREE_TYPE (mask_type);
1285 unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type);
1286 vec<constructor_elt, va_gc> *v;
1287 tree constr, t, si, i_val;
1288 tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
1289 bool two_operand_p = !operand_equal_p (vec0, vec1, 0);
1290 location_t loc = gimple_location (gsi_stmt (*gsi));
1291 unsigned i;
1292
1293 if (TREE_CODE (mask) == SSA_NAME)
1294 {
1295 gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
1296 if (is_gimple_assign (def_stmt)
1297 && gimple_assign_rhs_code (def_stmt) == VECTOR_CST)
1298 mask = gimple_assign_rhs1 (def_stmt);
1299 }
1300
1301 if (TREE_CODE (mask) == VECTOR_CST)
1302 {
1303 auto_vec_perm_indices sel_int (elements);
1304
1305 for (i = 0; i < elements; ++i)
1306 sel_int.quick_push (TREE_INT_CST_LOW (VECTOR_CST_ELT (mask, i))
1307 & (2 * elements - 1));
1308
1309 if (can_vec_perm_p (TYPE_MODE (vect_type), false, &sel_int))
1310 {
1311 gimple_assign_set_rhs3 (stmt, mask);
1312 update_stmt (stmt);
1313 return;
1314 }
1315 /* Also detect vec_shr pattern - VEC_PERM_EXPR with zero
1316 vector as VEC1 and a right element shift MASK. */
1317 if (optab_handler (vec_shr_optab, TYPE_MODE (vect_type))
1318 != CODE_FOR_nothing
1319 && TREE_CODE (vec1) == VECTOR_CST
1320 && initializer_zerop (vec1)
1321 && sel_int[0]
1322 && sel_int[0] < elements)
1323 {
1324 for (i = 1; i < elements; ++i)
1325 {
1326 unsigned int expected = i + sel_int[0];
1327 /* Indices into the second vector are all equivalent. */
1328 if (MIN (elements, (unsigned) sel_int[i])
1329 != MIN (elements, expected))
1330 break;
1331 }
1332 if (i == elements)
1333 {
1334 gimple_assign_set_rhs3 (stmt, mask);
1335 update_stmt (stmt);
1336 return;
1337 }
1338 }
1339 }
1340 else if (can_vec_perm_p (TYPE_MODE (vect_type), true, NULL))
1341 return;
1342
1343 warning_at (loc, OPT_Wvector_operation_performance,
1344 "vector shuffling operation will be expanded piecewise");
1345
1346 vec_alloc (v, elements);
1347 for (i = 0; i < elements; i++)
1348 {
1349 si = size_int (i);
1350 i_val = vector_element (gsi, mask, si, &masktmp);
1351
1352 if (TREE_CODE (i_val) == INTEGER_CST)
1353 {
1354 unsigned HOST_WIDE_INT index;
1355
1356 index = TREE_INT_CST_LOW (i_val);
1357 if (!tree_fits_uhwi_p (i_val) || index >= elements)
1358 i_val = build_int_cst (mask_elt_type, index & (elements - 1));
1359
1360 if (two_operand_p && (index & elements) != 0)
1361 t = vector_element (gsi, vec1, i_val, &vec1tmp);
1362 else
1363 t = vector_element (gsi, vec0, i_val, &vec0tmp);
1364
1365 t = force_gimple_operand_gsi (gsi, t, true, NULL_TREE,
1366 true, GSI_SAME_STMT);
1367 }
1368 else
1369 {
1370 tree cond = NULL_TREE, v0_val;
1371
1372 if (two_operand_p)
1373 {
1374 cond = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1375 build_int_cst (mask_elt_type, elements));
1376 cond = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1377 true, GSI_SAME_STMT);
1378 }
1379
1380 i_val = fold_build2 (BIT_AND_EXPR, mask_elt_type, i_val,
1381 build_int_cst (mask_elt_type, elements - 1));
1382 i_val = force_gimple_operand_gsi (gsi, i_val, true, NULL_TREE,
1383 true, GSI_SAME_STMT);
1384
1385 v0_val = vector_element (gsi, vec0, i_val, &vec0tmp);
1386 v0_val = force_gimple_operand_gsi (gsi, v0_val, true, NULL_TREE,
1387 true, GSI_SAME_STMT);
1388
1389 if (two_operand_p)
1390 {
1391 tree v1_val;
1392
1393 v1_val = vector_element (gsi, vec1, i_val, &vec1tmp);
1394 v1_val = force_gimple_operand_gsi (gsi, v1_val, true, NULL_TREE,
1395 true, GSI_SAME_STMT);
1396
1397 cond = fold_build2 (EQ_EXPR, boolean_type_node,
1398 cond, build_zero_cst (mask_elt_type));
1399 cond = fold_build3 (COND_EXPR, vect_elt_type,
1400 cond, v0_val, v1_val);
1401 t = force_gimple_operand_gsi (gsi, cond, true, NULL_TREE,
1402 true, GSI_SAME_STMT);
1403 }
1404 else
1405 t = v0_val;
1406 }
1407
1408 CONSTRUCTOR_APPEND_ELT (v, NULL_TREE, t);
1409 }
1410
1411 constr = build_constructor (vect_type, v);
1412 gimple_assign_set_rhs_from_tree (gsi, constr);
1413 update_stmt (gsi_stmt (*gsi));
1414 }
1415
1416 /* If OP is a uniform vector return the element it is a splat from. */
1417
1418 static tree
1419 ssa_uniform_vector_p (tree op)
1420 {
1421 if (TREE_CODE (op) == VECTOR_CST
1422 || TREE_CODE (op) == CONSTRUCTOR)
1423 return uniform_vector_p (op);
1424 if (TREE_CODE (op) == SSA_NAME)
1425 {
1426 gimple *def_stmt = SSA_NAME_DEF_STMT (op);
1427 if (gimple_assign_single_p (def_stmt))
1428 return uniform_vector_p (gimple_assign_rhs1 (def_stmt));
1429 }
1430 return NULL_TREE;
1431 }
1432
1433 /* Return type in which CODE operation with optab OP can be
1434 computed. */
1435
1436 static tree
1437 get_compute_type (enum tree_code code, optab op, tree type)
1438 {
1439 /* For very wide vectors, try using a smaller vector mode. */
1440 tree compute_type = type;
1441 if (op
1442 && (!VECTOR_MODE_P (TYPE_MODE (type))
1443 || optab_handler (op, TYPE_MODE (type)) == CODE_FOR_nothing))
1444 {
1445 tree vector_compute_type
1446 = type_for_widest_vector_mode (TREE_TYPE (type), op);
1447 if (vector_compute_type != NULL_TREE
1448 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
1449 < TYPE_VECTOR_SUBPARTS (compute_type))
1450 && TYPE_VECTOR_SUBPARTS (vector_compute_type) > 1
1451 && (optab_handler (op, TYPE_MODE (vector_compute_type))
1452 != CODE_FOR_nothing))
1453 compute_type = vector_compute_type;
1454 }
1455
1456 /* If we are breaking a BLKmode vector into smaller pieces,
1457 type_for_widest_vector_mode has already looked into the optab,
1458 so skip these checks. */
1459 if (compute_type == type)
1460 {
1461 machine_mode compute_mode = TYPE_MODE (compute_type);
1462 if (VECTOR_MODE_P (compute_mode))
1463 {
1464 if (op && optab_handler (op, compute_mode) != CODE_FOR_nothing)
1465 return compute_type;
1466 if (code == MULT_HIGHPART_EXPR
1467 && can_mult_highpart_p (compute_mode,
1468 TYPE_UNSIGNED (compute_type)))
1469 return compute_type;
1470 }
1471 /* There is no operation in hardware, so fall back to scalars. */
1472 compute_type = TREE_TYPE (type);
1473 }
1474
1475 return compute_type;
1476 }
1477
1478 /* Helper function of expand_vector_operations_1. Return number of
1479 vector elements for vector types or 1 for other types. */
1480
1481 static inline int
1482 count_type_subparts (tree type)
1483 {
1484 return VECTOR_TYPE_P (type) ? TYPE_VECTOR_SUBPARTS (type) : 1;
1485 }
1486
1487 static tree
1488 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
1489 tree bitpos, tree bitsize, enum tree_code code,
1490 tree type ATTRIBUTE_UNUSED)
1491 {
1492 if (TREE_CODE (TREE_TYPE (a)) == VECTOR_TYPE)
1493 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
1494 if (TREE_CODE (TREE_TYPE (b)) == VECTOR_TYPE)
1495 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
1496 tree cond = gimple_assign_rhs1 (gsi_stmt (*gsi));
1497 return gimplify_build3 (gsi, code, inner_type, unshare_expr (cond), a, b);
1498 }
1499
1500 /* Expand a vector COND_EXPR to scalars, piecewise. */
1501 static void
1502 expand_vector_scalar_condition (gimple_stmt_iterator *gsi)
1503 {
1504 gassign *stmt = as_a <gassign *> (gsi_stmt (*gsi));
1505 tree type = gimple_expr_type (stmt);
1506 tree compute_type = get_compute_type (COND_EXPR, mov_optab, type);
1507 machine_mode compute_mode = TYPE_MODE (compute_type);
1508 gcc_assert (compute_mode != BLKmode);
1509 tree lhs = gimple_assign_lhs (stmt);
1510 tree rhs2 = gimple_assign_rhs2 (stmt);
1511 tree rhs3 = gimple_assign_rhs3 (stmt);
1512 tree new_rhs;
1513
1514 /* If the compute mode is not a vector mode (hence we are not decomposing
1515 a BLKmode vector to smaller, hardware-supported vectors), we may want
1516 to expand the operations in parallel. */
1517 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
1518 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
1519 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
1520 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
1521 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
1522 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
1523 new_rhs = expand_vector_parallel (gsi, do_cond, type, rhs2, rhs3,
1524 COND_EXPR);
1525 else
1526 new_rhs = expand_vector_piecewise (gsi, do_cond, type, compute_type,
1527 rhs2, rhs3, COND_EXPR);
1528 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
1529 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
1530 new_rhs);
1531
1532 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
1533 way to do it is change expand_vector_operation and its callees to
1534 return a tree_code, RHS1 and RHS2 instead of a tree. */
1535 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
1536 update_stmt (gsi_stmt (*gsi));
433 } 1537 }
434 1538
435 /* Process one statement. If we identify a vector operation, expand it. */ 1539 /* Process one statement. If we identify a vector operation, expand it. */
436 1540
437 static void 1541 static void
438 expand_vector_operations_1 (gimple_stmt_iterator *gsi) 1542 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
439 { 1543 {
440 gimple stmt = gsi_stmt (*gsi); 1544 tree lhs, rhs1, rhs2 = NULL, type, compute_type = NULL_TREE;
441 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
442 enum tree_code code; 1545 enum tree_code code;
443 enum machine_mode compute_mode; 1546 optab op = unknown_optab;
444 optab op = NULL;
445 enum gimple_rhs_class rhs_class; 1547 enum gimple_rhs_class rhs_class;
446 tree new_rhs; 1548 tree new_rhs;
447 1549
448 if (gimple_code (stmt) != GIMPLE_ASSIGN) 1550 /* Only consider code == GIMPLE_ASSIGN. */
1551 gassign *stmt = dyn_cast <gassign *> (gsi_stmt (*gsi));
1552 if (!stmt)
449 return; 1553 return;
450 1554
451 code = gimple_assign_rhs_code (stmt); 1555 code = gimple_assign_rhs_code (stmt);
452 rhs_class = get_gimple_rhs_class (code); 1556 rhs_class = get_gimple_rhs_class (code);
1557 lhs = gimple_assign_lhs (stmt);
1558
1559 if (code == VEC_PERM_EXPR)
1560 {
1561 lower_vec_perm (gsi);
1562 return;
1563 }
1564
1565 if (code == VEC_COND_EXPR)
1566 {
1567 expand_vector_condition (gsi);
1568 return;
1569 }
1570
1571 if (code == COND_EXPR
1572 && TREE_CODE (TREE_TYPE (gimple_assign_lhs (stmt))) == VECTOR_TYPE
1573 && TYPE_MODE (TREE_TYPE (gimple_assign_lhs (stmt))) == BLKmode)
1574 {
1575 expand_vector_scalar_condition (gsi);
1576 return;
1577 }
1578
1579 if (code == CONSTRUCTOR
1580 && TREE_CODE (lhs) == SSA_NAME
1581 && VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (lhs)))
1582 && !gimple_clobber_p (stmt)
1583 && optimize)
1584 {
1585 optimize_vector_constructor (gsi);
1586 return;
1587 }
453 1588
454 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS) 1589 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
455 return; 1590 return;
456 1591
457 lhs = gimple_assign_lhs (stmt);
458 rhs1 = gimple_assign_rhs1 (stmt); 1592 rhs1 = gimple_assign_rhs1 (stmt);
459 type = gimple_expr_type (stmt); 1593 type = gimple_expr_type (stmt);
460 if (rhs_class == GIMPLE_BINARY_RHS) 1594 if (rhs_class == GIMPLE_BINARY_RHS)
461 rhs2 = gimple_assign_rhs2 (stmt); 1595 rhs2 = gimple_assign_rhs2 (stmt);
462 1596
463 if (TREE_CODE (type) != VECTOR_TYPE) 1597 if (TREE_CODE (type) != VECTOR_TYPE)
464 return; 1598 return;
465 1599
466 if (code == NOP_EXPR 1600 /* If the vector operation is operating on all same vector elements
1601 implement it with a scalar operation and a splat if the target
1602 supports the scalar operation. */
1603 tree srhs1, srhs2 = NULL_TREE;
1604 if ((srhs1 = ssa_uniform_vector_p (rhs1)) != NULL_TREE
1605 && (rhs2 == NULL_TREE
1606 || (! VECTOR_TYPE_P (TREE_TYPE (rhs2))
1607 && (srhs2 = rhs2))
1608 || (srhs2 = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
1609 /* As we query direct optabs restrict to non-convert operations. */
1610 && TYPE_MODE (TREE_TYPE (type)) == TYPE_MODE (TREE_TYPE (srhs1)))
1611 {
1612 op = optab_for_tree_code (code, TREE_TYPE (type), optab_scalar);
1613 if (op >= FIRST_NORM_OPTAB && op <= LAST_NORM_OPTAB
1614 && optab_handler (op, TYPE_MODE (TREE_TYPE (type))) != CODE_FOR_nothing)
1615 {
1616 tree slhs = make_ssa_name (TREE_TYPE (srhs1));
1617 gimple *repl = gimple_build_assign (slhs, code, srhs1, srhs2);
1618 gsi_insert_before (gsi, repl, GSI_SAME_STMT);
1619 gimple_assign_set_rhs_from_tree (gsi,
1620 build_vector_from_val (type, slhs));
1621 update_stmt (stmt);
1622 return;
1623 }
1624 }
1625
1626 /* A scalar operation pretending to be a vector one. */
1627 if (VECTOR_BOOLEAN_TYPE_P (type)
1628 && !VECTOR_MODE_P (TYPE_MODE (type))
1629 && TYPE_MODE (type) != BLKmode)
1630 return;
1631
1632 if (CONVERT_EXPR_CODE_P (code)
467 || code == FLOAT_EXPR 1633 || code == FLOAT_EXPR
468 || code == FIX_TRUNC_EXPR 1634 || code == FIX_TRUNC_EXPR
469 || code == VIEW_CONVERT_EXPR) 1635 || code == VIEW_CONVERT_EXPR)
470 return; 1636 return;
471 1637
472 gcc_assert (code != CONVERT_EXPR);
473
474 /* The signedness is determined from input argument. */ 1638 /* The signedness is determined from input argument. */
475 if (code == VEC_UNPACK_FLOAT_HI_EXPR 1639 if (code == VEC_UNPACK_FLOAT_HI_EXPR
476 || code == VEC_UNPACK_FLOAT_LO_EXPR) 1640 || code == VEC_UNPACK_FLOAT_LO_EXPR)
477 type = TREE_TYPE (rhs1); 1641 {
1642 type = TREE_TYPE (rhs1);
1643 /* We do not know how to scalarize those. */
1644 return;
1645 }
1646
1647 /* For widening/narrowing vector operations, the relevant type is of the
1648 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
1649 calculated in the same way above. */
1650 if (code == WIDEN_SUM_EXPR
1651 || code == VEC_WIDEN_MULT_HI_EXPR
1652 || code == VEC_WIDEN_MULT_LO_EXPR
1653 || code == VEC_WIDEN_MULT_EVEN_EXPR
1654 || code == VEC_WIDEN_MULT_ODD_EXPR
1655 || code == VEC_UNPACK_HI_EXPR
1656 || code == VEC_UNPACK_LO_EXPR
1657 || code == VEC_PACK_TRUNC_EXPR
1658 || code == VEC_PACK_SAT_EXPR
1659 || code == VEC_PACK_FIX_TRUNC_EXPR
1660 || code == VEC_WIDEN_LSHIFT_HI_EXPR
1661 || code == VEC_WIDEN_LSHIFT_LO_EXPR)
1662 {
1663 type = TREE_TYPE (rhs1);
1664 /* We do not know how to scalarize those. */
1665 return;
1666 }
478 1667
479 /* Choose between vector shift/rotate by vector and vector shift/rotate by 1668 /* Choose between vector shift/rotate by vector and vector shift/rotate by
480 scalar */ 1669 scalar */
481 if (code == LSHIFT_EXPR 1670 if (code == LSHIFT_EXPR
482 || code == RSHIFT_EXPR 1671 || code == RSHIFT_EXPR
483 || code == LROTATE_EXPR 1672 || code == LROTATE_EXPR
484 || code == RROTATE_EXPR) 1673 || code == RROTATE_EXPR)
485 { 1674 {
486 bool vector_scalar_shift; 1675 optab opv;
487 op = optab_for_tree_code (code, type, optab_scalar); 1676
488 1677 /* Check whether we have vector <op> {x,x,x,x} where x
489 /* Vector/Scalar shift is supported. */ 1678 could be a scalar variable or a constant. Transform
490 vector_scalar_shift = (op && (optab_handler (op, TYPE_MODE (type)) 1679 vector <op> {x,x,x,x} ==> vector <op> scalar. */
491 != CODE_FOR_nothing)); 1680 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
492
493 /* If the 2nd argument is vector, we need a vector/vector shift.
494 Except all the elements in the second vector are the same. */
495 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
496 { 1681 {
497 tree first; 1682 tree first;
498 gimple def_stmt; 1683
499 1684 if ((first = ssa_uniform_vector_p (rhs2)) != NULL_TREE)
500 /* Check whether we have vector <op> {x,x,x,x} where x
501 could be a scalar variable or a constant. Transform
502 vector <op> {x,x,x,x} ==> vector <op> scalar. */
503 if (vector_scalar_shift
504 && ((TREE_CODE (rhs2) == VECTOR_CST
505 && (first = uniform_vector_p (rhs2)) != NULL_TREE)
506 || (TREE_CODE (rhs2) == SSA_NAME
507 && (def_stmt = SSA_NAME_DEF_STMT (rhs2))
508 && gimple_assign_single_p (def_stmt)
509 && (first = uniform_vector_p
510 (gimple_assign_rhs1 (def_stmt))) != NULL_TREE)))
511 { 1685 {
512 gimple_assign_set_rhs2 (stmt, first); 1686 gimple_assign_set_rhs2 (stmt, first);
513 update_stmt (stmt); 1687 update_stmt (stmt);
514 rhs2 = first; 1688 rhs2 = first;
515 } 1689 }
516 else
517 op = optab_for_tree_code (code, type, optab_vector);
518 } 1690 }
519 1691
520 /* Try for a vector/scalar shift, and if we don't have one, see if we 1692 opv = optab_for_tree_code (code, type, optab_vector);
521 have a vector/vector shift */ 1693 if (VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
522 else if (!vector_scalar_shift) 1694 op = opv;
523 { 1695 else
524 op = optab_for_tree_code (code, type, optab_vector); 1696 {
525 1697 op = optab_for_tree_code (code, type, optab_scalar);
526 if (op && (optab_handler (op, TYPE_MODE (type)) 1698
527 != CODE_FOR_nothing)) 1699 compute_type = get_compute_type (code, op, type);
1700 if (compute_type == type)
1701 return;
1702 /* The rtl expander will expand vector/scalar as vector/vector
1703 if necessary. Pick one with wider vector type. */
1704 tree compute_vtype = get_compute_type (code, opv, type);
1705 if (count_type_subparts (compute_vtype)
1706 > count_type_subparts (compute_type))
528 { 1707 {
529 /* Transform vector <op> scalar => vector <op> {x,x,x,x}. */ 1708 compute_type = compute_vtype;
530 int n_parts = TYPE_VECTOR_SUBPARTS (type); 1709 op = opv;
531 int part_size = tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1); 1710 }
532 tree part_type = lang_hooks.types.type_for_size (part_size, 1); 1711 }
533 tree vect_type = build_vector_type (part_type, n_parts); 1712
534 1713 if (code == LROTATE_EXPR || code == RROTATE_EXPR)
535 rhs2 = fold_convert (part_type, rhs2); 1714 {
536 rhs2 = build_vector_from_val (vect_type, rhs2); 1715 if (compute_type == NULL_TREE)
537 gimple_assign_set_rhs2 (stmt, rhs2); 1716 compute_type = get_compute_type (code, op, type);
538 update_stmt (stmt); 1717 if (compute_type == type)
1718 return;
1719 /* Before splitting vector rotates into scalar rotates,
1720 see if we can't use vector shifts and BIT_IOR_EXPR
1721 instead. For vector by vector rotates we'd also
1722 need to check BIT_AND_EXPR and NEGATE_EXPR, punt there
1723 for now, fold doesn't seem to create such rotates anyway. */
1724 if (compute_type == TREE_TYPE (type)
1725 && !VECTOR_INTEGER_TYPE_P (TREE_TYPE (rhs2)))
1726 {
1727 optab oplv = vashl_optab, opl = ashl_optab;
1728 optab oprv = vlshr_optab, opr = lshr_optab, opo = ior_optab;
1729 tree compute_lvtype = get_compute_type (LSHIFT_EXPR, oplv, type);
1730 tree compute_rvtype = get_compute_type (RSHIFT_EXPR, oprv, type);
1731 tree compute_otype = get_compute_type (BIT_IOR_EXPR, opo, type);
1732 tree compute_ltype = get_compute_type (LSHIFT_EXPR, opl, type);
1733 tree compute_rtype = get_compute_type (RSHIFT_EXPR, opr, type);
1734 /* The rtl expander will expand vector/scalar as vector/vector
1735 if necessary. Pick one with wider vector type. */
1736 if (count_type_subparts (compute_lvtype)
1737 > count_type_subparts (compute_ltype))
1738 {
1739 compute_ltype = compute_lvtype;
1740 opl = oplv;
1741 }
1742 if (count_type_subparts (compute_rvtype)
1743 > count_type_subparts (compute_rtype))
1744 {
1745 compute_rtype = compute_rvtype;
1746 opr = oprv;
1747 }
1748 /* Pick the narrowest type from LSHIFT_EXPR, RSHIFT_EXPR and
1749 BIT_IOR_EXPR. */
1750 compute_type = compute_ltype;
1751 if (count_type_subparts (compute_type)
1752 > count_type_subparts (compute_rtype))
1753 compute_type = compute_rtype;
1754 if (count_type_subparts (compute_type)
1755 > count_type_subparts (compute_otype))
1756 compute_type = compute_otype;
1757 /* Verify all 3 operations can be performed in that type. */
1758 if (compute_type != TREE_TYPE (type))
1759 {
1760 if (optab_handler (opl, TYPE_MODE (compute_type))
1761 == CODE_FOR_nothing
1762 || optab_handler (opr, TYPE_MODE (compute_type))
1763 == CODE_FOR_nothing
1764 || optab_handler (opo, TYPE_MODE (compute_type))
1765 == CODE_FOR_nothing)
1766 compute_type = TREE_TYPE (type);
1767 }
539 } 1768 }
540 } 1769 }
541 } 1770 }
542 else 1771 else
543 op = optab_for_tree_code (code, type, optab_default); 1772 op = optab_for_tree_code (code, type, optab_default);
544
545 /* For widening/narrowing vector operations, the relevant type is of the
546 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
547 calculated in the same way above. */
548 if (code == WIDEN_SUM_EXPR
549 || code == VEC_WIDEN_MULT_HI_EXPR
550 || code == VEC_WIDEN_MULT_LO_EXPR
551 || code == VEC_UNPACK_HI_EXPR
552 || code == VEC_UNPACK_LO_EXPR
553 || code == VEC_PACK_TRUNC_EXPR
554 || code == VEC_PACK_SAT_EXPR
555 || code == VEC_PACK_FIX_TRUNC_EXPR)
556 type = TREE_TYPE (rhs1);
557 1773
558 /* Optabs will try converting a negation into a subtraction, so 1774 /* Optabs will try converting a negation into a subtraction, so
559 look for it as well. TODO: negation of floating-point vectors 1775 look for it as well. TODO: negation of floating-point vectors
560 might be turned into an exclusive OR toggling the sign bit. */ 1776 might be turned into an exclusive OR toggling the sign bit. */
561 if (op == NULL 1777 if (op == unknown_optab
562 && code == NEGATE_EXPR 1778 && code == NEGATE_EXPR
563 && INTEGRAL_TYPE_P (TREE_TYPE (type))) 1779 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
564 op = optab_for_tree_code (MINUS_EXPR, type, optab_default); 1780 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
565 1781
566 /* For very wide vectors, try using a smaller vector mode. */ 1782 if (compute_type == NULL_TREE)
567 compute_type = type; 1783 compute_type = get_compute_type (code, op, type);
568 if (TYPE_MODE (type) == BLKmode && op)
569 {
570 tree vector_compute_type
571 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
572 TYPE_SATURATING (TREE_TYPE (type)));
573 if (vector_compute_type != NULL_TREE
574 && (TYPE_VECTOR_SUBPARTS (vector_compute_type)
575 < TYPE_VECTOR_SUBPARTS (compute_type)))
576 compute_type = vector_compute_type;
577 }
578
579 /* If we are breaking a BLKmode vector into smaller pieces,
580 type_for_widest_vector_mode has already looked into the optab,
581 so skip these checks. */
582 if (compute_type == type) 1784 if (compute_type == type)
583 { 1785 return;
584 compute_mode = TYPE_MODE (compute_type); 1786
585 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
586 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT
587 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FRACT
588 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UFRACT
589 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_ACCUM
590 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UACCUM)
591 && op != NULL
592 && optab_handler (op, compute_mode) != CODE_FOR_nothing)
593 return;
594 else
595 /* There is no operation in hardware, so fall back to scalars. */
596 compute_type = TREE_TYPE (type);
597 }
598
599 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
600 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code); 1787 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
1788
1789 /* Leave expression untouched for later expansion. */
1790 if (new_rhs == NULL_TREE)
1791 return;
1792
601 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs))) 1793 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
602 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs), 1794 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
603 new_rhs); 1795 new_rhs);
604 1796
605 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One 1797 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
610 } 1802 }
611 1803
612 /* Use this to lower vector operations introduced by the vectorizer, 1804 /* Use this to lower vector operations introduced by the vectorizer,
613 if it may need the bit-twiddling tricks implemented in this file. */ 1805 if it may need the bit-twiddling tricks implemented in this file. */
614 1806
615 static bool
616 gate_expand_vector_operations (void)
617 {
618 return flag_tree_vectorize != 0;
619 }
620
621 static unsigned int 1807 static unsigned int
622 expand_vector_operations (void) 1808 expand_vector_operations (void)
623 { 1809 {
624 gimple_stmt_iterator gsi; 1810 gimple_stmt_iterator gsi;
625 basic_block bb; 1811 basic_block bb;
626 bool cfg_changed = false; 1812 bool cfg_changed = false;
627 1813
628 FOR_EACH_BB (bb) 1814 FOR_EACH_BB_FN (bb, cfun)
629 { 1815 {
630 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 1816 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
631 { 1817 {
632 expand_vector_operations_1 (&gsi); 1818 expand_vector_operations_1 (&gsi);
633 /* ??? If we do not cleanup EH then we will ICE in 1819 /* ??? If we do not cleanup EH then we will ICE in
641 } 1827 }
642 1828
643 return cfg_changed ? TODO_cleanup_cfg : 0; 1829 return cfg_changed ? TODO_cleanup_cfg : 0;
644 } 1830 }
645 1831
646 struct gimple_opt_pass pass_lower_vector = 1832 namespace {
647 { 1833
648 { 1834 const pass_data pass_data_lower_vector =
649 GIMPLE_PASS, 1835 {
650 "veclower", /* name */ 1836 GIMPLE_PASS, /* type */
651 0, /* gate */ 1837 "veclower", /* name */
652 expand_vector_operations, /* execute */ 1838 OPTGROUP_VEC, /* optinfo_flags */
653 NULL, /* sub */ 1839 TV_NONE, /* tv_id */
654 NULL, /* next */ 1840 PROP_cfg, /* properties_required */
655 0, /* static_pass_number */ 1841 PROP_gimple_lvec, /* properties_provided */
656 TV_NONE, /* tv_id */ 1842 0, /* properties_destroyed */
657 PROP_cfg, /* properties_required */ 1843 0, /* todo_flags_start */
658 0, /* properties_provided */ 1844 TODO_update_ssa, /* todo_flags_finish */
659 0, /* properties_destroyed */
660 0, /* todo_flags_start */
661 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
662 | TODO_verify_ssa
663 | TODO_verify_stmts | TODO_verify_flow
664 }
665 }; 1845 };
666 1846
667 struct gimple_opt_pass pass_lower_vector_ssa = 1847 class pass_lower_vector : public gimple_opt_pass
668 { 1848 {
669 { 1849 public:
670 GIMPLE_PASS, 1850 pass_lower_vector (gcc::context *ctxt)
671 "veclower2", /* name */ 1851 : gimple_opt_pass (pass_data_lower_vector, ctxt)
672 gate_expand_vector_operations, /* gate */ 1852 {}
673 expand_vector_operations, /* execute */ 1853
674 NULL, /* sub */ 1854 /* opt_pass methods: */
675 NULL, /* next */ 1855 virtual bool gate (function *fun)
676 0, /* static_pass_number */ 1856 {
677 TV_NONE, /* tv_id */ 1857 return !(fun->curr_properties & PROP_gimple_lvec);
678 PROP_cfg, /* properties_required */ 1858 }
679 0, /* properties_provided */ 1859
680 0, /* properties_destroyed */ 1860 virtual unsigned int execute (function *)
681 0, /* todo_flags_start */ 1861 {
682 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */ 1862 return expand_vector_operations ();
683 | TODO_verify_ssa 1863 }
684 | TODO_verify_stmts | TODO_verify_flow 1864
685 } 1865 }; // class pass_lower_vector
1866
1867 } // anon namespace
1868
1869 gimple_opt_pass *
1870 make_pass_lower_vector (gcc::context *ctxt)
1871 {
1872 return new pass_lower_vector (ctxt);
1873 }
1874
1875 namespace {
1876
1877 const pass_data pass_data_lower_vector_ssa =
1878 {
1879 GIMPLE_PASS, /* type */
1880 "veclower2", /* name */
1881 OPTGROUP_VEC, /* optinfo_flags */
1882 TV_NONE, /* tv_id */
1883 PROP_cfg, /* properties_required */
1884 PROP_gimple_lvec, /* properties_provided */
1885 0, /* properties_destroyed */
1886 0, /* todo_flags_start */
1887 ( TODO_update_ssa
1888 | TODO_cleanup_cfg ), /* todo_flags_finish */
686 }; 1889 };
687 1890
1891 class pass_lower_vector_ssa : public gimple_opt_pass
1892 {
1893 public:
1894 pass_lower_vector_ssa (gcc::context *ctxt)
1895 : gimple_opt_pass (pass_data_lower_vector_ssa, ctxt)
1896 {}
1897
1898 /* opt_pass methods: */
1899 opt_pass * clone () { return new pass_lower_vector_ssa (m_ctxt); }
1900 virtual unsigned int execute (function *)
1901 {
1902 return expand_vector_operations ();
1903 }
1904
1905 }; // class pass_lower_vector_ssa
1906
1907 } // anon namespace
1908
1909 gimple_opt_pass *
1910 make_pass_lower_vector_ssa (gcc::context *ctxt)
1911 {
1912 return new pass_lower_vector_ssa (ctxt);
1913 }
1914
688 #include "gt-tree-vect-generic.h" 1915 #include "gt-tree-vect-generic.h"