0
|
1 /* Lower vector operations to scalar operations.
|
|
2 Copyright (C) 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it
|
|
7 under the terms of the GNU General Public License as published by the
|
|
8 Free Software Foundation; either version 3, or (at your option) any
|
|
9 later version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT
|
|
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 #include "config.h"
|
|
21 #include "system.h"
|
|
22 #include "coretypes.h"
|
|
23 #include "tree.h"
|
|
24 #include "tm.h"
|
|
25 #include "rtl.h"
|
|
26 #include "expr.h"
|
|
27 #include "insn-codes.h"
|
|
28 #include "diagnostic.h"
|
|
29 #include "optabs.h"
|
|
30 #include "machmode.h"
|
|
31 #include "langhooks.h"
|
|
32 #include "tree-flow.h"
|
|
33 #include "gimple.h"
|
|
34 #include "tree-iterator.h"
|
|
35 #include "tree-pass.h"
|
|
36 #include "flags.h"
|
|
37 #include "ggc.h"
|
|
38
|
|
39
|
|
40 /* Build a constant of type TYPE, made of VALUE's bits replicated
|
|
41 every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision. */
|
|
42 static tree
|
|
43 build_replicated_const (tree type, tree inner_type, HOST_WIDE_INT value)
|
|
44 {
|
|
45 int width = tree_low_cst (TYPE_SIZE (inner_type), 1);
|
|
46 int n = HOST_BITS_PER_WIDE_INT / width;
|
|
47 unsigned HOST_WIDE_INT low, high, mask;
|
|
48 tree ret;
|
|
49
|
|
50 gcc_assert (n);
|
|
51
|
|
52 if (width == HOST_BITS_PER_WIDE_INT)
|
|
53 low = value;
|
|
54 else
|
|
55 {
|
|
56 mask = ((HOST_WIDE_INT)1 << width) - 1;
|
|
57 low = (unsigned HOST_WIDE_INT) ~0 / mask * (value & mask);
|
|
58 }
|
|
59
|
|
60 if (TYPE_PRECISION (type) < HOST_BITS_PER_WIDE_INT)
|
|
61 low &= ((HOST_WIDE_INT)1 << TYPE_PRECISION (type)) - 1, high = 0;
|
|
62 else if (TYPE_PRECISION (type) == HOST_BITS_PER_WIDE_INT)
|
|
63 high = 0;
|
|
64 else if (TYPE_PRECISION (type) == 2 * HOST_BITS_PER_WIDE_INT)
|
|
65 high = low;
|
|
66 else
|
|
67 gcc_unreachable ();
|
|
68
|
|
69 ret = build_int_cst_wide (type, low, high);
|
|
70 return ret;
|
|
71 }
|
|
72
|
|
73 static GTY(()) tree vector_inner_type;
|
|
74 static GTY(()) tree vector_last_type;
|
|
75 static GTY(()) int vector_last_nunits;
|
|
76
|
|
77 /* Return a suitable vector types made of SUBPARTS units each of mode
|
|
78 "word_mode" (the global variable). */
|
|
79 static tree
|
|
80 build_word_mode_vector_type (int nunits)
|
|
81 {
|
|
82 if (!vector_inner_type)
|
|
83 vector_inner_type = lang_hooks.types.type_for_mode (word_mode, 1);
|
|
84 else if (vector_last_nunits == nunits)
|
|
85 {
|
|
86 gcc_assert (TREE_CODE (vector_last_type) == VECTOR_TYPE);
|
|
87 return vector_last_type;
|
|
88 }
|
|
89
|
|
90 /* We build a new type, but we canonicalize it nevertheless,
|
|
91 because it still saves some memory. */
|
|
92 vector_last_nunits = nunits;
|
|
93 vector_last_type = type_hash_canon (nunits,
|
|
94 build_vector_type (vector_inner_type,
|
|
95 nunits));
|
|
96 return vector_last_type;
|
|
97 }
|
|
98
|
|
99 typedef tree (*elem_op_func) (gimple_stmt_iterator *,
|
|
100 tree, tree, tree, tree, tree, enum tree_code);
|
|
101
|
|
102 static inline tree
|
|
103 tree_vec_extract (gimple_stmt_iterator *gsi, tree type,
|
|
104 tree t, tree bitsize, tree bitpos)
|
|
105 {
|
|
106 if (bitpos)
|
|
107 return gimplify_build3 (gsi, BIT_FIELD_REF, type, t, bitsize, bitpos);
|
|
108 else
|
|
109 return gimplify_build1 (gsi, VIEW_CONVERT_EXPR, type, t);
|
|
110 }
|
|
111
|
|
112 static tree
|
|
113 do_unop (gimple_stmt_iterator *gsi, tree inner_type, tree a,
|
|
114 tree b ATTRIBUTE_UNUSED, tree bitpos, tree bitsize,
|
|
115 enum tree_code code)
|
|
116 {
|
|
117 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
|
|
118 return gimplify_build1 (gsi, code, inner_type, a);
|
|
119 }
|
|
120
|
|
121 static tree
|
|
122 do_binop (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
|
|
123 tree bitpos, tree bitsize, enum tree_code code)
|
|
124 {
|
|
125 a = tree_vec_extract (gsi, inner_type, a, bitsize, bitpos);
|
|
126 b = tree_vec_extract (gsi, inner_type, b, bitsize, bitpos);
|
|
127 return gimplify_build2 (gsi, code, inner_type, a, b);
|
|
128 }
|
|
129
|
|
130 /* Expand vector addition to scalars. This does bit twiddling
|
|
131 in order to increase parallelism:
|
|
132
|
|
133 a + b = (((int) a & 0x7f7f7f7f) + ((int) b & 0x7f7f7f7f)) ^
|
|
134 (a ^ b) & 0x80808080
|
|
135
|
|
136 a - b = (((int) a | 0x80808080) - ((int) b & 0x7f7f7f7f)) ^
|
|
137 (a ^ ~b) & 0x80808080
|
|
138
|
|
139 -b = (0x80808080 - ((int) b & 0x7f7f7f7f)) ^ (~b & 0x80808080)
|
|
140
|
|
141 This optimization should be done only if 4 vector items or more
|
|
142 fit into a word. */
|
|
143 static tree
|
|
144 do_plus_minus (gimple_stmt_iterator *gsi, tree word_type, tree a, tree b,
|
|
145 tree bitpos ATTRIBUTE_UNUSED, tree bitsize ATTRIBUTE_UNUSED,
|
|
146 enum tree_code code)
|
|
147 {
|
|
148 tree inner_type = TREE_TYPE (TREE_TYPE (a));
|
|
149 unsigned HOST_WIDE_INT max;
|
|
150 tree low_bits, high_bits, a_low, b_low, result_low, signs;
|
|
151
|
|
152 max = GET_MODE_MASK (TYPE_MODE (inner_type));
|
|
153 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
|
|
154 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
|
|
155
|
|
156 a = tree_vec_extract (gsi, word_type, a, bitsize, bitpos);
|
|
157 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
|
|
158
|
|
159 signs = gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, a, b);
|
|
160 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
|
|
161 if (code == PLUS_EXPR)
|
|
162 a_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, a, low_bits);
|
|
163 else
|
|
164 {
|
|
165 a_low = gimplify_build2 (gsi, BIT_IOR_EXPR, word_type, a, high_bits);
|
|
166 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, signs);
|
|
167 }
|
|
168
|
|
169 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
|
|
170 result_low = gimplify_build2 (gsi, code, word_type, a_low, b_low);
|
|
171 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
|
|
172 }
|
|
173
|
|
174 static tree
|
|
175 do_negate (gimple_stmt_iterator *gsi, tree word_type, tree b,
|
|
176 tree unused ATTRIBUTE_UNUSED, tree bitpos ATTRIBUTE_UNUSED,
|
|
177 tree bitsize ATTRIBUTE_UNUSED,
|
|
178 enum tree_code code ATTRIBUTE_UNUSED)
|
|
179 {
|
|
180 tree inner_type = TREE_TYPE (TREE_TYPE (b));
|
|
181 HOST_WIDE_INT max;
|
|
182 tree low_bits, high_bits, b_low, result_low, signs;
|
|
183
|
|
184 max = GET_MODE_MASK (TYPE_MODE (inner_type));
|
|
185 low_bits = build_replicated_const (word_type, inner_type, max >> 1);
|
|
186 high_bits = build_replicated_const (word_type, inner_type, max & ~(max >> 1));
|
|
187
|
|
188 b = tree_vec_extract (gsi, word_type, b, bitsize, bitpos);
|
|
189
|
|
190 b_low = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, b, low_bits);
|
|
191 signs = gimplify_build1 (gsi, BIT_NOT_EXPR, word_type, b);
|
|
192 signs = gimplify_build2 (gsi, BIT_AND_EXPR, word_type, signs, high_bits);
|
|
193 result_low = gimplify_build2 (gsi, MINUS_EXPR, word_type, high_bits, b_low);
|
|
194 return gimplify_build2 (gsi, BIT_XOR_EXPR, word_type, result_low, signs);
|
|
195 }
|
|
196
|
|
197 /* Expand a vector operation to scalars, by using many operations
|
|
198 whose type is the vector type's inner type. */
|
|
199 static tree
|
|
200 expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
|
|
201 tree type, tree inner_type,
|
|
202 tree a, tree b, enum tree_code code)
|
|
203 {
|
|
204 VEC(constructor_elt,gc) *v;
|
|
205 tree part_width = TYPE_SIZE (inner_type);
|
|
206 tree index = bitsize_int (0);
|
|
207 int nunits = TYPE_VECTOR_SUBPARTS (type);
|
|
208 int delta = tree_low_cst (part_width, 1)
|
|
209 / tree_low_cst (TYPE_SIZE (TREE_TYPE (type)), 1);
|
|
210 int i;
|
|
211
|
|
212 v = VEC_alloc(constructor_elt, gc, (nunits + delta - 1) / delta);
|
|
213 for (i = 0; i < nunits;
|
|
214 i += delta, index = int_const_binop (PLUS_EXPR, index, part_width, 0))
|
|
215 {
|
|
216 tree result = f (gsi, inner_type, a, b, index, part_width, code);
|
|
217 constructor_elt *ce = VEC_quick_push (constructor_elt, v, NULL);
|
|
218 ce->index = NULL_TREE;
|
|
219 ce->value = result;
|
|
220 }
|
|
221
|
|
222 return build_constructor (type, v);
|
|
223 }
|
|
224
|
|
225 /* Expand a vector operation to scalars with the freedom to use
|
|
226 a scalar integer type, or to use a different size for the items
|
|
227 in the vector type. */
|
|
228 static tree
|
|
229 expand_vector_parallel (gimple_stmt_iterator *gsi, elem_op_func f, tree type,
|
|
230 tree a, tree b,
|
|
231 enum tree_code code)
|
|
232 {
|
|
233 tree result, compute_type;
|
|
234 enum machine_mode mode;
|
|
235 int n_words = tree_low_cst (TYPE_SIZE_UNIT (type), 1) / UNITS_PER_WORD;
|
|
236
|
|
237 /* We have three strategies. If the type is already correct, just do
|
|
238 the operation an element at a time. Else, if the vector is wider than
|
|
239 one word, do it a word at a time; finally, if the vector is smaller
|
|
240 than one word, do it as a scalar. */
|
|
241 if (TYPE_MODE (TREE_TYPE (type)) == word_mode)
|
|
242 return expand_vector_piecewise (gsi, f,
|
|
243 type, TREE_TYPE (type),
|
|
244 a, b, code);
|
|
245 else if (n_words > 1)
|
|
246 {
|
|
247 tree word_type = build_word_mode_vector_type (n_words);
|
|
248 result = expand_vector_piecewise (gsi, f,
|
|
249 word_type, TREE_TYPE (word_type),
|
|
250 a, b, code);
|
|
251 result = force_gimple_operand_gsi (gsi, result, true, NULL, true,
|
|
252 GSI_SAME_STMT);
|
|
253 }
|
|
254 else
|
|
255 {
|
|
256 /* Use a single scalar operation with a mode no wider than word_mode. */
|
|
257 mode = mode_for_size (tree_low_cst (TYPE_SIZE (type), 1), MODE_INT, 0);
|
|
258 compute_type = lang_hooks.types.type_for_mode (mode, 1);
|
|
259 result = f (gsi, compute_type, a, b, NULL_TREE, NULL_TREE, code);
|
|
260 }
|
|
261
|
|
262 return result;
|
|
263 }
|
|
264
|
|
265 /* Expand a vector operation to scalars; for integer types we can use
|
|
266 special bit twiddling tricks to do the sums a word at a time, using
|
|
267 function F_PARALLEL instead of F. These tricks are done only if
|
|
268 they can process at least four items, that is, only if the vector
|
|
269 holds at least four items and if a word can hold four items. */
|
|
270 static tree
|
|
271 expand_vector_addition (gimple_stmt_iterator *gsi,
|
|
272 elem_op_func f, elem_op_func f_parallel,
|
|
273 tree type, tree a, tree b, enum tree_code code)
|
|
274 {
|
|
275 int parts_per_word = UNITS_PER_WORD
|
|
276 / tree_low_cst (TYPE_SIZE_UNIT (TREE_TYPE (type)), 1);
|
|
277
|
|
278 if (INTEGRAL_TYPE_P (TREE_TYPE (type))
|
|
279 && parts_per_word >= 4
|
|
280 && TYPE_VECTOR_SUBPARTS (type) >= 4)
|
|
281 return expand_vector_parallel (gsi, f_parallel,
|
|
282 type, a, b, code);
|
|
283 else
|
|
284 return expand_vector_piecewise (gsi, f,
|
|
285 type, TREE_TYPE (type),
|
|
286 a, b, code);
|
|
287 }
|
|
288
|
|
289 static tree
|
|
290 expand_vector_operation (gimple_stmt_iterator *gsi, tree type, tree compute_type,
|
|
291 gimple assign, enum tree_code code)
|
|
292 {
|
|
293 enum machine_mode compute_mode = TYPE_MODE (compute_type);
|
|
294
|
|
295 /* If the compute mode is not a vector mode (hence we are not decomposing
|
|
296 a BLKmode vector to smaller, hardware-supported vectors), we may want
|
|
297 to expand the operations in parallel. */
|
|
298 if (GET_MODE_CLASS (compute_mode) != MODE_VECTOR_INT
|
|
299 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FLOAT
|
|
300 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_FRACT
|
|
301 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UFRACT
|
|
302 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_ACCUM
|
|
303 && GET_MODE_CLASS (compute_mode) != MODE_VECTOR_UACCUM)
|
|
304 switch (code)
|
|
305 {
|
|
306 case PLUS_EXPR:
|
|
307 case MINUS_EXPR:
|
|
308 if (!TYPE_OVERFLOW_TRAPS (type))
|
|
309 return expand_vector_addition (gsi, do_binop, do_plus_minus, type,
|
|
310 gimple_assign_rhs1 (assign),
|
|
311 gimple_assign_rhs2 (assign), code);
|
|
312 break;
|
|
313
|
|
314 case NEGATE_EXPR:
|
|
315 if (!TYPE_OVERFLOW_TRAPS (type))
|
|
316 return expand_vector_addition (gsi, do_unop, do_negate, type,
|
|
317 gimple_assign_rhs1 (assign),
|
|
318 NULL_TREE, code);
|
|
319 break;
|
|
320
|
|
321 case BIT_AND_EXPR:
|
|
322 case BIT_IOR_EXPR:
|
|
323 case BIT_XOR_EXPR:
|
|
324 return expand_vector_parallel (gsi, do_binop, type,
|
|
325 gimple_assign_rhs1 (assign),
|
|
326 gimple_assign_rhs2 (assign), code);
|
|
327
|
|
328 case BIT_NOT_EXPR:
|
|
329 return expand_vector_parallel (gsi, do_unop, type,
|
|
330 gimple_assign_rhs1 (assign),
|
|
331 NULL_TREE, code);
|
|
332
|
|
333 default:
|
|
334 break;
|
|
335 }
|
|
336
|
|
337 if (TREE_CODE_CLASS (code) == tcc_unary)
|
|
338 return expand_vector_piecewise (gsi, do_unop, type, compute_type,
|
|
339 gimple_assign_rhs1 (assign),
|
|
340 NULL_TREE, code);
|
|
341 else
|
|
342 return expand_vector_piecewise (gsi, do_binop, type, compute_type,
|
|
343 gimple_assign_rhs1 (assign),
|
|
344 gimple_assign_rhs2 (assign), code);
|
|
345 }
|
|
346
|
|
347 /* Return a type for the widest vector mode whose components are of mode
|
|
348 INNER_MODE, or NULL_TREE if none is found.
|
|
349 SATP is true for saturating fixed-point types. */
|
|
350
|
|
351 static tree
|
|
352 type_for_widest_vector_mode (enum machine_mode inner_mode, optab op, int satp)
|
|
353 {
|
|
354 enum machine_mode best_mode = VOIDmode, mode;
|
|
355 int best_nunits = 0;
|
|
356
|
|
357 if (SCALAR_FLOAT_MODE_P (inner_mode))
|
|
358 mode = MIN_MODE_VECTOR_FLOAT;
|
|
359 else if (SCALAR_FRACT_MODE_P (inner_mode))
|
|
360 mode = MIN_MODE_VECTOR_FRACT;
|
|
361 else if (SCALAR_UFRACT_MODE_P (inner_mode))
|
|
362 mode = MIN_MODE_VECTOR_UFRACT;
|
|
363 else if (SCALAR_ACCUM_MODE_P (inner_mode))
|
|
364 mode = MIN_MODE_VECTOR_ACCUM;
|
|
365 else if (SCALAR_UACCUM_MODE_P (inner_mode))
|
|
366 mode = MIN_MODE_VECTOR_UACCUM;
|
|
367 else
|
|
368 mode = MIN_MODE_VECTOR_INT;
|
|
369
|
|
370 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
|
|
371 if (GET_MODE_INNER (mode) == inner_mode
|
|
372 && GET_MODE_NUNITS (mode) > best_nunits
|
|
373 && optab_handler (op, mode)->insn_code != CODE_FOR_nothing)
|
|
374 best_mode = mode, best_nunits = GET_MODE_NUNITS (mode);
|
|
375
|
|
376 if (best_mode == VOIDmode)
|
|
377 return NULL_TREE;
|
|
378 else
|
|
379 {
|
|
380 /* For fixed-point modes, we need to pass satp as the 2nd parameter. */
|
|
381 if (ALL_FIXED_POINT_MODE_P (best_mode))
|
|
382 return lang_hooks.types.type_for_mode (best_mode, satp);
|
|
383
|
|
384 return lang_hooks.types.type_for_mode (best_mode, 1);
|
|
385 }
|
|
386 }
|
|
387
|
|
388 /* Process one statement. If we identify a vector operation, expand it. */
|
|
389
|
|
390 static void
|
|
391 expand_vector_operations_1 (gimple_stmt_iterator *gsi)
|
|
392 {
|
|
393 gimple stmt = gsi_stmt (*gsi);
|
|
394 tree lhs, rhs1, rhs2 = NULL, type, compute_type;
|
|
395 enum tree_code code;
|
|
396 enum machine_mode compute_mode;
|
|
397 optab op;
|
|
398 enum gimple_rhs_class rhs_class;
|
|
399 tree new_rhs;
|
|
400
|
|
401 if (gimple_code (stmt) != GIMPLE_ASSIGN)
|
|
402 return;
|
|
403
|
|
404 code = gimple_assign_rhs_code (stmt);
|
|
405 rhs_class = get_gimple_rhs_class (code);
|
|
406
|
|
407 if (rhs_class != GIMPLE_UNARY_RHS && rhs_class != GIMPLE_BINARY_RHS)
|
|
408 return;
|
|
409
|
|
410 lhs = gimple_assign_lhs (stmt);
|
|
411 rhs1 = gimple_assign_rhs1 (stmt);
|
|
412 type = gimple_expr_type (stmt);
|
|
413 if (rhs_class == GIMPLE_BINARY_RHS)
|
|
414 rhs2 = gimple_assign_rhs2 (stmt);
|
|
415
|
|
416 if (TREE_CODE (type) != VECTOR_TYPE)
|
|
417 return;
|
|
418
|
|
419 if (code == NOP_EXPR
|
|
420 || code == FLOAT_EXPR
|
|
421 || code == FIX_TRUNC_EXPR
|
|
422 || code == VIEW_CONVERT_EXPR)
|
|
423 return;
|
|
424
|
|
425 gcc_assert (code != CONVERT_EXPR);
|
|
426
|
|
427 /* The signedness is determined from input argument. */
|
|
428 if (code == VEC_UNPACK_FLOAT_HI_EXPR
|
|
429 || code == VEC_UNPACK_FLOAT_LO_EXPR)
|
|
430 type = TREE_TYPE (rhs1);
|
|
431
|
|
432 /* Choose between vector shift/rotate by vector and vector shift/rotate by
|
|
433 scalar */
|
|
434 if (code == LSHIFT_EXPR
|
|
435 || code == RSHIFT_EXPR
|
|
436 || code == LROTATE_EXPR
|
|
437 || code == RROTATE_EXPR)
|
|
438 {
|
|
439 /* If the 2nd argument is vector, we need a vector/vector shift */
|
|
440 if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
|
|
441 op = optab_for_tree_code (code, type, optab_vector);
|
|
442 else
|
|
443 {
|
|
444 /* Try for a vector/scalar shift, and if we don't have one, see if we
|
|
445 have a vector/vector shift */
|
|
446 op = optab_for_tree_code (code, type, optab_scalar);
|
|
447 if (!op
|
|
448 || (op->handlers[(int) TYPE_MODE (type)].insn_code
|
|
449 == CODE_FOR_nothing))
|
|
450 op = optab_for_tree_code (code, type, optab_vector);
|
|
451 }
|
|
452 }
|
|
453 else
|
|
454 op = optab_for_tree_code (code, type, optab_default);
|
|
455
|
|
456 /* For widening/narrowing vector operations, the relevant type is of the
|
|
457 arguments, not the widened result. VEC_UNPACK_FLOAT_*_EXPR is
|
|
458 calculated in the same way above. */
|
|
459 if (code == WIDEN_SUM_EXPR
|
|
460 || code == VEC_WIDEN_MULT_HI_EXPR
|
|
461 || code == VEC_WIDEN_MULT_LO_EXPR
|
|
462 || code == VEC_UNPACK_HI_EXPR
|
|
463 || code == VEC_UNPACK_LO_EXPR
|
|
464 || code == VEC_PACK_TRUNC_EXPR
|
|
465 || code == VEC_PACK_SAT_EXPR
|
|
466 || code == VEC_PACK_FIX_TRUNC_EXPR)
|
|
467 type = TREE_TYPE (rhs1);
|
|
468
|
|
469 /* Optabs will try converting a negation into a subtraction, so
|
|
470 look for it as well. TODO: negation of floating-point vectors
|
|
471 might be turned into an exclusive OR toggling the sign bit. */
|
|
472 if (op == NULL
|
|
473 && code == NEGATE_EXPR
|
|
474 && INTEGRAL_TYPE_P (TREE_TYPE (type)))
|
|
475 op = optab_for_tree_code (MINUS_EXPR, type, optab_default);
|
|
476
|
|
477 /* For very wide vectors, try using a smaller vector mode. */
|
|
478 compute_type = type;
|
|
479 if (TYPE_MODE (type) == BLKmode && op)
|
|
480 {
|
|
481 tree vector_compute_type
|
|
482 = type_for_widest_vector_mode (TYPE_MODE (TREE_TYPE (type)), op,
|
|
483 TYPE_SATURATING (TREE_TYPE (type)));
|
|
484 if (vector_compute_type != NULL_TREE)
|
|
485 compute_type = vector_compute_type;
|
|
486 }
|
|
487
|
|
488 /* If we are breaking a BLKmode vector into smaller pieces,
|
|
489 type_for_widest_vector_mode has already looked into the optab,
|
|
490 so skip these checks. */
|
|
491 if (compute_type == type)
|
|
492 {
|
|
493 compute_mode = TYPE_MODE (compute_type);
|
|
494 if ((GET_MODE_CLASS (compute_mode) == MODE_VECTOR_INT
|
|
495 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FLOAT
|
|
496 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_FRACT
|
|
497 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UFRACT
|
|
498 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_ACCUM
|
|
499 || GET_MODE_CLASS (compute_mode) == MODE_VECTOR_UACCUM)
|
|
500 && op != NULL
|
|
501 && optab_handler (op, compute_mode)->insn_code != CODE_FOR_nothing)
|
|
502 return;
|
|
503 else
|
|
504 /* There is no operation in hardware, so fall back to scalars. */
|
|
505 compute_type = TREE_TYPE (type);
|
|
506 }
|
|
507
|
|
508 gcc_assert (code != VEC_LSHIFT_EXPR && code != VEC_RSHIFT_EXPR);
|
|
509 new_rhs = expand_vector_operation (gsi, type, compute_type, stmt, code);
|
|
510 if (!useless_type_conversion_p (TREE_TYPE (lhs), TREE_TYPE (new_rhs)))
|
|
511 new_rhs = gimplify_build1 (gsi, VIEW_CONVERT_EXPR, TREE_TYPE (lhs),
|
|
512 new_rhs);
|
|
513
|
|
514 /* NOTE: We should avoid using gimple_assign_set_rhs_from_tree. One
|
|
515 way to do it is change expand_vector_operation and its callees to
|
|
516 return a tree_code, RHS1 and RHS2 instead of a tree. */
|
|
517 gimple_assign_set_rhs_from_tree (gsi, new_rhs);
|
|
518
|
|
519 gimple_set_modified (gsi_stmt (*gsi), true);
|
|
520 }
|
|
521
|
|
522 /* Use this to lower vector operations introduced by the vectorizer,
|
|
523 if it may need the bit-twiddling tricks implemented in this file. */
|
|
524
|
|
525 static bool
|
|
526 gate_expand_vector_operations (void)
|
|
527 {
|
|
528 return flag_tree_vectorize != 0;
|
|
529 }
|
|
530
|
|
531 static unsigned int
|
|
532 expand_vector_operations (void)
|
|
533 {
|
|
534 gimple_stmt_iterator gsi;
|
|
535 basic_block bb;
|
|
536
|
|
537 FOR_EACH_BB (bb)
|
|
538 {
|
|
539 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
540 {
|
|
541 expand_vector_operations_1 (&gsi);
|
|
542 update_stmt_if_modified (gsi_stmt (gsi));
|
|
543 }
|
|
544 }
|
|
545 return 0;
|
|
546 }
|
|
547
|
|
548 struct gimple_opt_pass pass_lower_vector =
|
|
549 {
|
|
550 {
|
|
551 GIMPLE_PASS,
|
|
552 "veclower", /* name */
|
|
553 0, /* gate */
|
|
554 expand_vector_operations, /* execute */
|
|
555 NULL, /* sub */
|
|
556 NULL, /* next */
|
|
557 0, /* static_pass_number */
|
|
558 0, /* tv_id */
|
|
559 PROP_cfg, /* properties_required */
|
|
560 0, /* properties_provided */
|
|
561 0, /* properties_destroyed */
|
|
562 0, /* todo_flags_start */
|
|
563 TODO_dump_func | TODO_ggc_collect
|
|
564 | TODO_verify_stmts /* todo_flags_finish */
|
|
565 }
|
|
566 };
|
|
567
|
|
568 struct gimple_opt_pass pass_lower_vector_ssa =
|
|
569 {
|
|
570 {
|
|
571 GIMPLE_PASS,
|
|
572 "veclower2", /* name */
|
|
573 gate_expand_vector_operations, /* gate */
|
|
574 expand_vector_operations, /* execute */
|
|
575 NULL, /* sub */
|
|
576 NULL, /* next */
|
|
577 0, /* static_pass_number */
|
|
578 0, /* tv_id */
|
|
579 PROP_cfg, /* properties_required */
|
|
580 0, /* properties_provided */
|
|
581 0, /* properties_destroyed */
|
|
582 0, /* todo_flags_start */
|
|
583 TODO_dump_func | TODO_update_ssa /* todo_flags_finish */
|
|
584 | TODO_verify_ssa
|
|
585 | TODO_verify_stmts | TODO_verify_flow
|
|
586 }
|
|
587 };
|
|
588
|
|
589 #include "gt-tree-vect-generic.h"
|