0
|
1 /* Reassociation for trees.
|
|
2 Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc.
|
|
3 Contributed by Daniel Berlin <dan@dberlin.org>
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify
|
|
8 it under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 GNU General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 #include "config.h"
|
|
22 #include "system.h"
|
|
23 #include "coretypes.h"
|
|
24 #include "tm.h"
|
|
25 #include "ggc.h"
|
|
26 #include "tree.h"
|
|
27 #include "basic-block.h"
|
|
28 #include "diagnostic.h"
|
|
29 #include "tree-inline.h"
|
|
30 #include "tree-flow.h"
|
|
31 #include "gimple.h"
|
|
32 #include "tree-dump.h"
|
|
33 #include "timevar.h"
|
|
34 #include "tree-iterator.h"
|
|
35 #include "tree-pass.h"
|
|
36 #include "alloc-pool.h"
|
|
37 #include "vec.h"
|
|
38 #include "langhooks.h"
|
|
39 #include "pointer-set.h"
|
|
40 #include "cfgloop.h"
|
|
41 #include "flags.h"
|
|
42
|
|
43 /* This is a simple global reassociation pass. It is, in part, based
|
|
44 on the LLVM pass of the same name (They do some things more/less
|
|
45 than we do, in different orders, etc).
|
|
46
|
|
47 It consists of five steps:
|
|
48
|
|
49 1. Breaking up subtract operations into addition + negate, where
|
|
50 it would promote the reassociation of adds.
|
|
51
|
|
52 2. Left linearization of the expression trees, so that (A+B)+(C+D)
|
|
53 becomes (((A+B)+C)+D), which is easier for us to rewrite later.
|
|
54 During linearization, we place the operands of the binary
|
|
55 expressions into a vector of operand_entry_t
|
|
56
|
|
57 3. Optimization of the operand lists, eliminating things like a +
|
|
58 -a, a & a, etc.
|
|
59
|
|
60 4. Rewrite the expression trees we linearized and optimized so
|
|
61 they are in proper rank order.
|
|
62
|
|
63 5. Repropagate negates, as nothing else will clean it up ATM.
|
|
64
|
|
65 A bit of theory on #4, since nobody seems to write anything down
|
|
66 about why it makes sense to do it the way they do it:
|
|
67
|
|
68 We could do this much nicer theoretically, but don't (for reasons
|
|
69 explained after how to do it theoretically nice :P).
|
|
70
|
|
71 In order to promote the most redundancy elimination, you want
|
|
72 binary expressions whose operands are the same rank (or
|
|
73 preferably, the same value) exposed to the redundancy eliminator,
|
|
74 for possible elimination.
|
|
75
|
|
76 So the way to do this if we really cared, is to build the new op
|
|
77 tree from the leaves to the roots, merging as you go, and putting the
|
|
78 new op on the end of the worklist, until you are left with one
|
|
79 thing on the worklist.
|
|
80
|
|
81 IE if you have to rewrite the following set of operands (listed with
|
|
82 rank in parentheses), with opcode PLUS_EXPR:
|
|
83
|
|
84 a (1), b (1), c (1), d (2), e (2)
|
|
85
|
|
86
|
|
87 We start with our merge worklist empty, and the ops list with all of
|
|
88 those on it.
|
|
89
|
|
90 You want to first merge all leaves of the same rank, as much as
|
|
91 possible.
|
|
92
|
|
93 So first build a binary op of
|
|
94
|
|
95 mergetmp = a + b, and put "mergetmp" on the merge worklist.
|
|
96
|
|
97 Because there is no three operand form of PLUS_EXPR, c is not going to
|
|
98 be exposed to redundancy elimination as a rank 1 operand.
|
|
99
|
|
100 So you might as well throw it on the merge worklist (you could also
|
|
101 consider it to now be a rank two operand, and merge it with d and e,
|
|
102 but in this case, you then have evicted e from a binary op. So at
|
|
103 least in this situation, you can't win.)
|
|
104
|
|
105 Then build a binary op of d + e
|
|
106 mergetmp2 = d + e
|
|
107
|
|
108 and put mergetmp2 on the merge worklist.
|
|
109
|
|
110 so merge worklist = {mergetmp, c, mergetmp2}
|
|
111
|
|
112 Continue building binary ops of these operations until you have only
|
|
113 one operation left on the worklist.
|
|
114
|
|
115 So we have
|
|
116
|
|
117 build binary op
|
|
118 mergetmp3 = mergetmp + c
|
|
119
|
|
120 worklist = {mergetmp2, mergetmp3}
|
|
121
|
|
122 mergetmp4 = mergetmp2 + mergetmp3
|
|
123
|
|
124 worklist = {mergetmp4}
|
|
125
|
|
126 because we have one operation left, we can now just set the original
|
|
127 statement equal to the result of that operation.
|
|
128
|
|
129 This will at least expose a + b and d + e to redundancy elimination
|
|
130 as binary operations.
|
|
131
|
|
132 For extra points, you can reuse the old statements to build the
|
|
133 mergetmps, since you shouldn't run out.
|
|
134
|
|
135 So why don't we do this?
|
|
136
|
|
137 Because it's expensive, and rarely will help. Most trees we are
|
|
138 reassociating have 3 or less ops. If they have 2 ops, they already
|
|
139 will be written into a nice single binary op. If you have 3 ops, a
|
|
140 single simple check suffices to tell you whether the first two are of the
|
|
141 same rank. If so, you know to order it
|
|
142
|
|
143 mergetmp = op1 + op2
|
|
144 newstmt = mergetmp + op3
|
|
145
|
|
146 instead of
|
|
147 mergetmp = op2 + op3
|
|
148 newstmt = mergetmp + op1
|
|
149
|
|
150 If all three are of the same rank, you can't expose them all in a
|
|
151 single binary operator anyway, so the above is *still* the best you
|
|
152 can do.
|
|
153
|
|
154 Thus, this is what we do. When we have three ops left, we check to see
|
|
155 what order to put them in, and call it a day. As a nod to vector sum
|
|
156 reduction, we check if any of the ops are really a phi node that is a
|
|
157 destructive update for the associating op, and keep the destructive
|
|
158 update together for vector sum reduction recognition. */
|
|
159
|
|
160
|
|
161 /* Statistics */
|
|
162 static struct
|
|
163 {
|
|
164 int linearized;
|
|
165 int constants_eliminated;
|
|
166 int ops_eliminated;
|
|
167 int rewritten;
|
|
168 } reassociate_stats;
|
|
169
|
|
170 /* Operator, rank pair. */
|
|
171 typedef struct operand_entry
|
|
172 {
|
|
173 unsigned int rank;
|
|
174 tree op;
|
|
175 } *operand_entry_t;
|
|
176
|
|
177 static alloc_pool operand_entry_pool;
|
|
178
|
|
179
|
|
180 /* Starting rank number for a given basic block, so that we can rank
|
|
181 operations using unmovable instructions in that BB based on the bb
|
|
182 depth. */
|
|
183 static long *bb_rank;
|
|
184
|
|
185 /* Operand->rank hashtable. */
|
|
186 static struct pointer_map_t *operand_rank;
|
|
187
|
|
188
|
|
189 /* Look up the operand rank structure for expression E. */
|
|
190
|
|
191 static inline long
|
|
192 find_operand_rank (tree e)
|
|
193 {
|
|
194 void **slot = pointer_map_contains (operand_rank, e);
|
|
195 return slot ? (long) *slot : -1;
|
|
196 }
|
|
197
|
|
198 /* Insert {E,RANK} into the operand rank hashtable. */
|
|
199
|
|
200 static inline void
|
|
201 insert_operand_rank (tree e, long rank)
|
|
202 {
|
|
203 void **slot;
|
|
204 gcc_assert (rank > 0);
|
|
205 slot = pointer_map_insert (operand_rank, e);
|
|
206 gcc_assert (!*slot);
|
|
207 *slot = (void *) rank;
|
|
208 }
|
|
209
|
|
210 /* Given an expression E, return the rank of the expression. */
|
|
211
|
|
212 static long
|
|
213 get_rank (tree e)
|
|
214 {
|
|
215 /* Constants have rank 0. */
|
|
216 if (is_gimple_min_invariant (e))
|
|
217 return 0;
|
|
218
|
|
219 /* SSA_NAME's have the rank of the expression they are the result
|
|
220 of.
|
|
221 For globals and uninitialized values, the rank is 0.
|
|
222 For function arguments, use the pre-setup rank.
|
|
223 For PHI nodes, stores, asm statements, etc, we use the rank of
|
|
224 the BB.
|
|
225 For simple operations, the rank is the maximum rank of any of
|
|
226 its operands, or the bb_rank, whichever is less.
|
|
227 I make no claims that this is optimal, however, it gives good
|
|
228 results. */
|
|
229
|
|
230 if (TREE_CODE (e) == SSA_NAME)
|
|
231 {
|
|
232 gimple stmt;
|
|
233 long rank, maxrank;
|
|
234 int i, n;
|
|
235
|
|
236 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL
|
|
237 && SSA_NAME_IS_DEFAULT_DEF (e))
|
|
238 return find_operand_rank (e);
|
|
239
|
|
240 stmt = SSA_NAME_DEF_STMT (e);
|
|
241 if (gimple_bb (stmt) == NULL)
|
|
242 return 0;
|
|
243
|
|
244 if (!is_gimple_assign (stmt)
|
|
245 || !ZERO_SSA_OPERANDS (stmt, SSA_OP_VIRTUAL_DEFS))
|
|
246 return bb_rank[gimple_bb (stmt)->index];
|
|
247
|
|
248 /* If we already have a rank for this expression, use that. */
|
|
249 rank = find_operand_rank (e);
|
|
250 if (rank != -1)
|
|
251 return rank;
|
|
252
|
|
253 /* Otherwise, find the maximum rank for the operands, or the bb
|
|
254 rank, whichever is less. */
|
|
255 rank = 0;
|
|
256 maxrank = bb_rank[gimple_bb(stmt)->index];
|
|
257 if (gimple_assign_single_p (stmt))
|
|
258 {
|
|
259 tree rhs = gimple_assign_rhs1 (stmt);
|
|
260 n = TREE_OPERAND_LENGTH (rhs);
|
|
261 if (n == 0)
|
|
262 rank = MAX (rank, get_rank (rhs));
|
|
263 else
|
|
264 {
|
|
265 for (i = 0;
|
|
266 i < n && TREE_OPERAND (rhs, i) && rank != maxrank; i++)
|
|
267 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i)));
|
|
268 }
|
|
269 }
|
|
270 else
|
|
271 {
|
|
272 n = gimple_num_ops (stmt);
|
|
273 for (i = 1; i < n && rank != maxrank; i++)
|
|
274 {
|
|
275 gcc_assert (gimple_op (stmt, i));
|
|
276 rank = MAX(rank, get_rank (gimple_op (stmt, i)));
|
|
277 }
|
|
278 }
|
|
279
|
|
280 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
281 {
|
|
282 fprintf (dump_file, "Rank for ");
|
|
283 print_generic_expr (dump_file, e, 0);
|
|
284 fprintf (dump_file, " is %ld\n", (rank + 1));
|
|
285 }
|
|
286
|
|
287 /* Note the rank in the hashtable so we don't recompute it. */
|
|
288 insert_operand_rank (e, (rank + 1));
|
|
289 return (rank + 1);
|
|
290 }
|
|
291
|
|
292 /* Globals, etc, are rank 0 */
|
|
293 return 0;
|
|
294 }
|
|
295
|
|
296 DEF_VEC_P(operand_entry_t);
|
|
297 DEF_VEC_ALLOC_P(operand_entry_t, heap);
|
|
298
|
|
299 /* We want integer ones to end up last no matter what, since they are
|
|
300 the ones we can do the most with. */
|
|
301 #define INTEGER_CONST_TYPE 1 << 3
|
|
302 #define FLOAT_CONST_TYPE 1 << 2
|
|
303 #define OTHER_CONST_TYPE 1 << 1
|
|
304
|
|
305 /* Classify an invariant tree into integer, float, or other, so that
|
|
306 we can sort them to be near other constants of the same type. */
|
|
307 static inline int
|
|
308 constant_type (tree t)
|
|
309 {
|
|
310 if (INTEGRAL_TYPE_P (TREE_TYPE (t)))
|
|
311 return INTEGER_CONST_TYPE;
|
|
312 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t)))
|
|
313 return FLOAT_CONST_TYPE;
|
|
314 else
|
|
315 return OTHER_CONST_TYPE;
|
|
316 }
|
|
317
|
|
318 /* qsort comparison function to sort operand entries PA and PB by rank
|
|
319 so that the sorted array is ordered by rank in decreasing order. */
|
|
320 static int
|
|
321 sort_by_operand_rank (const void *pa, const void *pb)
|
|
322 {
|
|
323 const operand_entry_t oea = *(const operand_entry_t *)pa;
|
|
324 const operand_entry_t oeb = *(const operand_entry_t *)pb;
|
|
325
|
|
326 /* It's nicer for optimize_expression if constants that are likely
|
|
327 to fold when added/multiplied//whatever are put next to each
|
|
328 other. Since all constants have rank 0, order them by type. */
|
|
329 if (oeb->rank == 0 && oea->rank == 0)
|
|
330 return constant_type (oeb->op) - constant_type (oea->op);
|
|
331
|
|
332 /* Lastly, make sure the versions that are the same go next to each
|
|
333 other. We use SSA_NAME_VERSION because it's stable. */
|
|
334 if ((oeb->rank - oea->rank == 0)
|
|
335 && TREE_CODE (oea->op) == SSA_NAME
|
|
336 && TREE_CODE (oeb->op) == SSA_NAME)
|
|
337 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op);
|
|
338
|
|
339 return oeb->rank - oea->rank;
|
|
340 }
|
|
341
|
|
342 /* Add an operand entry to *OPS for the tree operand OP. */
|
|
343
|
|
344 static void
|
|
345 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op)
|
|
346 {
|
|
347 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool);
|
|
348
|
|
349 oe->op = op;
|
|
350 oe->rank = get_rank (op);
|
|
351 VEC_safe_push (operand_entry_t, heap, *ops, oe);
|
|
352 }
|
|
353
|
|
354 /* Return true if STMT is reassociable operation containing a binary
|
|
355 operation with tree code CODE, and is inside LOOP. */
|
|
356
|
|
357 static bool
|
|
358 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop)
|
|
359 {
|
|
360 basic_block bb = gimple_bb (stmt);
|
|
361
|
|
362 if (gimple_bb (stmt) == NULL)
|
|
363 return false;
|
|
364
|
|
365 if (!flow_bb_inside_loop_p (loop, bb))
|
|
366 return false;
|
|
367
|
|
368 if (is_gimple_assign (stmt)
|
|
369 && gimple_assign_rhs_code (stmt) == code
|
|
370 && has_single_use (gimple_assign_lhs (stmt)))
|
|
371 return true;
|
|
372
|
|
373 return false;
|
|
374 }
|
|
375
|
|
376
|
|
377 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the
|
|
378 operand of the negate operation. Otherwise, return NULL. */
|
|
379
|
|
380 static tree
|
|
381 get_unary_op (tree name, enum tree_code opcode)
|
|
382 {
|
|
383 gimple stmt = SSA_NAME_DEF_STMT (name);
|
|
384
|
|
385 if (!is_gimple_assign (stmt))
|
|
386 return NULL_TREE;
|
|
387
|
|
388 if (gimple_assign_rhs_code (stmt) == opcode)
|
|
389 return gimple_assign_rhs1 (stmt);
|
|
390 return NULL_TREE;
|
|
391 }
|
|
392
|
|
393 /* If CURR and LAST are a pair of ops that OPCODE allows us to
|
|
394 eliminate through equivalences, do so, remove them from OPS, and
|
|
395 return true. Otherwise, return false. */
|
|
396
|
|
397 static bool
|
|
398 eliminate_duplicate_pair (enum tree_code opcode,
|
|
399 VEC (operand_entry_t, heap) **ops,
|
|
400 bool *all_done,
|
|
401 unsigned int i,
|
|
402 operand_entry_t curr,
|
|
403 operand_entry_t last)
|
|
404 {
|
|
405
|
|
406 /* If we have two of the same op, and the opcode is & |, min, or max,
|
|
407 we can eliminate one of them.
|
|
408 If we have two of the same op, and the opcode is ^, we can
|
|
409 eliminate both of them. */
|
|
410
|
|
411 if (last && last->op == curr->op)
|
|
412 {
|
|
413 switch (opcode)
|
|
414 {
|
|
415 case MAX_EXPR:
|
|
416 case MIN_EXPR:
|
|
417 case BIT_IOR_EXPR:
|
|
418 case BIT_AND_EXPR:
|
|
419 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
420 {
|
|
421 fprintf (dump_file, "Equivalence: ");
|
|
422 print_generic_expr (dump_file, curr->op, 0);
|
|
423 fprintf (dump_file, " [&|minmax] ");
|
|
424 print_generic_expr (dump_file, last->op, 0);
|
|
425 fprintf (dump_file, " -> ");
|
|
426 print_generic_stmt (dump_file, last->op, 0);
|
|
427 }
|
|
428
|
|
429 VEC_ordered_remove (operand_entry_t, *ops, i);
|
|
430 reassociate_stats.ops_eliminated ++;
|
|
431
|
|
432 return true;
|
|
433
|
|
434 case BIT_XOR_EXPR:
|
|
435 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
436 {
|
|
437 fprintf (dump_file, "Equivalence: ");
|
|
438 print_generic_expr (dump_file, curr->op, 0);
|
|
439 fprintf (dump_file, " ^ ");
|
|
440 print_generic_expr (dump_file, last->op, 0);
|
|
441 fprintf (dump_file, " -> nothing\n");
|
|
442 }
|
|
443
|
|
444 reassociate_stats.ops_eliminated += 2;
|
|
445
|
|
446 if (VEC_length (operand_entry_t, *ops) == 2)
|
|
447 {
|
|
448 VEC_free (operand_entry_t, heap, *ops);
|
|
449 *ops = NULL;
|
|
450 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op),
|
|
451 integer_zero_node));
|
|
452 *all_done = true;
|
|
453 }
|
|
454 else
|
|
455 {
|
|
456 VEC_ordered_remove (operand_entry_t, *ops, i-1);
|
|
457 VEC_ordered_remove (operand_entry_t, *ops, i-1);
|
|
458 }
|
|
459
|
|
460 return true;
|
|
461
|
|
462 default:
|
|
463 break;
|
|
464 }
|
|
465 }
|
|
466 return false;
|
|
467 }
|
|
468
|
|
469 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression,
|
|
470 look in OPS for a corresponding positive operation to cancel it
|
|
471 out. If we find one, remove the other from OPS, replace
|
|
472 OPS[CURRINDEX] with 0, and return true. Otherwise, return
|
|
473 false. */
|
|
474
|
|
475 static bool
|
|
476 eliminate_plus_minus_pair (enum tree_code opcode,
|
|
477 VEC (operand_entry_t, heap) **ops,
|
|
478 unsigned int currindex,
|
|
479 operand_entry_t curr)
|
|
480 {
|
|
481 tree negateop;
|
|
482 unsigned int i;
|
|
483 operand_entry_t oe;
|
|
484
|
|
485 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME)
|
|
486 return false;
|
|
487
|
|
488 negateop = get_unary_op (curr->op, NEGATE_EXPR);
|
|
489 if (negateop == NULL_TREE)
|
|
490 return false;
|
|
491
|
|
492 /* Any non-negated version will have a rank that is one less than
|
|
493 the current rank. So once we hit those ranks, if we don't find
|
|
494 one, we can stop. */
|
|
495
|
|
496 for (i = currindex + 1;
|
|
497 VEC_iterate (operand_entry_t, *ops, i, oe)
|
|
498 && oe->rank >= curr->rank - 1 ;
|
|
499 i++)
|
|
500 {
|
|
501 if (oe->op == negateop)
|
|
502 {
|
|
503
|
|
504 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
505 {
|
|
506 fprintf (dump_file, "Equivalence: ");
|
|
507 print_generic_expr (dump_file, negateop, 0);
|
|
508 fprintf (dump_file, " + -");
|
|
509 print_generic_expr (dump_file, oe->op, 0);
|
|
510 fprintf (dump_file, " -> 0\n");
|
|
511 }
|
|
512
|
|
513 VEC_ordered_remove (operand_entry_t, *ops, i);
|
|
514 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op),
|
|
515 integer_zero_node));
|
|
516 VEC_ordered_remove (operand_entry_t, *ops, currindex);
|
|
517 reassociate_stats.ops_eliminated ++;
|
|
518
|
|
519 return true;
|
|
520 }
|
|
521 }
|
|
522
|
|
523 return false;
|
|
524 }
|
|
525
|
|
526 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a
|
|
527 bitwise not expression, look in OPS for a corresponding operand to
|
|
528 cancel it out. If we find one, remove the other from OPS, replace
|
|
529 OPS[CURRINDEX] with 0, and return true. Otherwise, return
|
|
530 false. */
|
|
531
|
|
532 static bool
|
|
533 eliminate_not_pairs (enum tree_code opcode,
|
|
534 VEC (operand_entry_t, heap) **ops,
|
|
535 unsigned int currindex,
|
|
536 operand_entry_t curr)
|
|
537 {
|
|
538 tree notop;
|
|
539 unsigned int i;
|
|
540 operand_entry_t oe;
|
|
541
|
|
542 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR)
|
|
543 || TREE_CODE (curr->op) != SSA_NAME)
|
|
544 return false;
|
|
545
|
|
546 notop = get_unary_op (curr->op, BIT_NOT_EXPR);
|
|
547 if (notop == NULL_TREE)
|
|
548 return false;
|
|
549
|
|
550 /* Any non-not version will have a rank that is one less than
|
|
551 the current rank. So once we hit those ranks, if we don't find
|
|
552 one, we can stop. */
|
|
553
|
|
554 for (i = currindex + 1;
|
|
555 VEC_iterate (operand_entry_t, *ops, i, oe)
|
|
556 && oe->rank >= curr->rank - 1;
|
|
557 i++)
|
|
558 {
|
|
559 if (oe->op == notop)
|
|
560 {
|
|
561 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
562 {
|
|
563 fprintf (dump_file, "Equivalence: ");
|
|
564 print_generic_expr (dump_file, notop, 0);
|
|
565 if (opcode == BIT_AND_EXPR)
|
|
566 fprintf (dump_file, " & ~");
|
|
567 else if (opcode == BIT_IOR_EXPR)
|
|
568 fprintf (dump_file, " | ~");
|
|
569 print_generic_expr (dump_file, oe->op, 0);
|
|
570 if (opcode == BIT_AND_EXPR)
|
|
571 fprintf (dump_file, " -> 0\n");
|
|
572 else if (opcode == BIT_IOR_EXPR)
|
|
573 fprintf (dump_file, " -> -1\n");
|
|
574 }
|
|
575
|
|
576 if (opcode == BIT_AND_EXPR)
|
|
577 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node);
|
|
578 else if (opcode == BIT_IOR_EXPR)
|
|
579 oe->op = build_low_bits_mask (TREE_TYPE (oe->op),
|
|
580 TYPE_PRECISION (TREE_TYPE (oe->op)));
|
|
581
|
|
582 reassociate_stats.ops_eliminated
|
|
583 += VEC_length (operand_entry_t, *ops) - 1;
|
|
584 VEC_free (operand_entry_t, heap, *ops);
|
|
585 *ops = NULL;
|
|
586 VEC_safe_push (operand_entry_t, heap, *ops, oe);
|
|
587 return true;
|
|
588 }
|
|
589 }
|
|
590
|
|
591 return false;
|
|
592 }
|
|
593
|
|
594 /* Use constant value that may be present in OPS to try to eliminate
|
|
595 operands. Note that this function is only really used when we've
|
|
596 eliminated ops for other reasons, or merged constants. Across
|
|
597 single statements, fold already does all of this, plus more. There
|
|
598 is little point in duplicating logic, so I've only included the
|
|
599 identities that I could ever construct testcases to trigger. */
|
|
600
|
|
601 static void
|
|
602 eliminate_using_constants (enum tree_code opcode,
|
|
603 VEC(operand_entry_t, heap) **ops)
|
|
604 {
|
|
605 operand_entry_t oelast = VEC_last (operand_entry_t, *ops);
|
|
606 tree type = TREE_TYPE (oelast->op);
|
|
607
|
|
608 if (oelast->rank == 0
|
|
609 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type)))
|
|
610 {
|
|
611 switch (opcode)
|
|
612 {
|
|
613 case BIT_AND_EXPR:
|
|
614 if (integer_zerop (oelast->op))
|
|
615 {
|
|
616 if (VEC_length (operand_entry_t, *ops) != 1)
|
|
617 {
|
|
618 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
619 fprintf (dump_file, "Found & 0, removing all other ops\n");
|
|
620
|
|
621 reassociate_stats.ops_eliminated
|
|
622 += VEC_length (operand_entry_t, *ops) - 1;
|
|
623
|
|
624 VEC_free (operand_entry_t, heap, *ops);
|
|
625 *ops = NULL;
|
|
626 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
|
|
627 return;
|
|
628 }
|
|
629 }
|
|
630 else if (integer_all_onesp (oelast->op))
|
|
631 {
|
|
632 if (VEC_length (operand_entry_t, *ops) != 1)
|
|
633 {
|
|
634 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
635 fprintf (dump_file, "Found & -1, removing\n");
|
|
636 VEC_pop (operand_entry_t, *ops);
|
|
637 reassociate_stats.ops_eliminated++;
|
|
638 }
|
|
639 }
|
|
640 break;
|
|
641 case BIT_IOR_EXPR:
|
|
642 if (integer_all_onesp (oelast->op))
|
|
643 {
|
|
644 if (VEC_length (operand_entry_t, *ops) != 1)
|
|
645 {
|
|
646 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
647 fprintf (dump_file, "Found | -1, removing all other ops\n");
|
|
648
|
|
649 reassociate_stats.ops_eliminated
|
|
650 += VEC_length (operand_entry_t, *ops) - 1;
|
|
651
|
|
652 VEC_free (operand_entry_t, heap, *ops);
|
|
653 *ops = NULL;
|
|
654 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
|
|
655 return;
|
|
656 }
|
|
657 }
|
|
658 else if (integer_zerop (oelast->op))
|
|
659 {
|
|
660 if (VEC_length (operand_entry_t, *ops) != 1)
|
|
661 {
|
|
662 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
663 fprintf (dump_file, "Found | 0, removing\n");
|
|
664 VEC_pop (operand_entry_t, *ops);
|
|
665 reassociate_stats.ops_eliminated++;
|
|
666 }
|
|
667 }
|
|
668 break;
|
|
669 case MULT_EXPR:
|
|
670 if (integer_zerop (oelast->op)
|
|
671 || (FLOAT_TYPE_P (type)
|
|
672 && !HONOR_NANS (TYPE_MODE (type))
|
|
673 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type))
|
|
674 && real_zerop (oelast->op)))
|
|
675 {
|
|
676 if (VEC_length (operand_entry_t, *ops) != 1)
|
|
677 {
|
|
678 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
679 fprintf (dump_file, "Found * 0, removing all other ops\n");
|
|
680
|
|
681 reassociate_stats.ops_eliminated
|
|
682 += VEC_length (operand_entry_t, *ops) - 1;
|
|
683 VEC_free (operand_entry_t, heap, *ops);
|
|
684 *ops = NULL;
|
|
685 VEC_safe_push (operand_entry_t, heap, *ops, oelast);
|
|
686 return;
|
|
687 }
|
|
688 }
|
|
689 else if (integer_onep (oelast->op)
|
|
690 || (FLOAT_TYPE_P (type)
|
|
691 && !HONOR_SNANS (TYPE_MODE (type))
|
|
692 && real_onep (oelast->op)))
|
|
693 {
|
|
694 if (VEC_length (operand_entry_t, *ops) != 1)
|
|
695 {
|
|
696 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
697 fprintf (dump_file, "Found * 1, removing\n");
|
|
698 VEC_pop (operand_entry_t, *ops);
|
|
699 reassociate_stats.ops_eliminated++;
|
|
700 return;
|
|
701 }
|
|
702 }
|
|
703 break;
|
|
704 case BIT_XOR_EXPR:
|
|
705 case PLUS_EXPR:
|
|
706 case MINUS_EXPR:
|
|
707 if (integer_zerop (oelast->op)
|
|
708 || (FLOAT_TYPE_P (type)
|
|
709 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR)
|
|
710 && fold_real_zero_addition_p (type, oelast->op,
|
|
711 opcode == MINUS_EXPR)))
|
|
712 {
|
|
713 if (VEC_length (operand_entry_t, *ops) != 1)
|
|
714 {
|
|
715 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
716 fprintf (dump_file, "Found [|^+] 0, removing\n");
|
|
717 VEC_pop (operand_entry_t, *ops);
|
|
718 reassociate_stats.ops_eliminated++;
|
|
719 return;
|
|
720 }
|
|
721 }
|
|
722 break;
|
|
723 default:
|
|
724 break;
|
|
725 }
|
|
726 }
|
|
727 }
|
|
728
|
|
729
|
|
730 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple,
|
|
731 bool, bool);
|
|
732
|
|
733 /* Structure for tracking and counting operands. */
|
|
734 typedef struct oecount_s {
|
|
735 int cnt;
|
|
736 enum tree_code oecode;
|
|
737 tree op;
|
|
738 } oecount;
|
|
739
|
|
740 DEF_VEC_O(oecount);
|
|
741 DEF_VEC_ALLOC_O(oecount,heap);
|
|
742
|
|
743 /* The heap for the oecount hashtable and the sorted list of operands. */
|
|
744 static VEC (oecount, heap) *cvec;
|
|
745
|
|
746 /* Hash function for oecount. */
|
|
747
|
|
748 static hashval_t
|
|
749 oecount_hash (const void *p)
|
|
750 {
|
|
751 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42);
|
|
752 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode;
|
|
753 }
|
|
754
|
|
755 /* Comparison function for oecount. */
|
|
756
|
|
757 static int
|
|
758 oecount_eq (const void *p1, const void *p2)
|
|
759 {
|
|
760 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42);
|
|
761 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42);
|
|
762 return (c1->oecode == c2->oecode
|
|
763 && c1->op == c2->op);
|
|
764 }
|
|
765
|
|
766 /* Comparison function for qsort sorting oecount elements by count. */
|
|
767
|
|
768 static int
|
|
769 oecount_cmp (const void *p1, const void *p2)
|
|
770 {
|
|
771 const oecount *c1 = (const oecount *)p1;
|
|
772 const oecount *c2 = (const oecount *)p2;
|
|
773 return c1->cnt - c2->cnt;
|
|
774 }
|
|
775
|
|
776 /* Walks the linear chain with result *DEF searching for an operation
|
|
777 with operand OP and code OPCODE removing that from the chain. *DEF
|
|
778 is updated if there is only one operand but no operation left. */
|
|
779
|
|
780 static void
|
|
781 zero_one_operation (tree *def, enum tree_code opcode, tree op)
|
|
782 {
|
|
783 gimple stmt = SSA_NAME_DEF_STMT (*def);
|
|
784
|
|
785 do
|
|
786 {
|
|
787 tree name = gimple_assign_rhs1 (stmt);
|
|
788
|
|
789 /* If this is the operation we look for and one of the operands
|
|
790 is ours simply propagate the other operand into the stmts
|
|
791 single use. */
|
|
792 if (gimple_assign_rhs_code (stmt) == opcode
|
|
793 && (name == op
|
|
794 || gimple_assign_rhs2 (stmt) == op))
|
|
795 {
|
|
796 gimple use_stmt;
|
|
797 use_operand_p use;
|
|
798 gimple_stmt_iterator gsi;
|
|
799 if (name == op)
|
|
800 name = gimple_assign_rhs2 (stmt);
|
|
801 gcc_assert (has_single_use (gimple_assign_lhs (stmt)));
|
|
802 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt);
|
|
803 if (gimple_assign_lhs (stmt) == *def)
|
|
804 *def = name;
|
|
805 SET_USE (use, name);
|
|
806 if (TREE_CODE (name) != SSA_NAME)
|
|
807 update_stmt (use_stmt);
|
|
808 gsi = gsi_for_stmt (stmt);
|
|
809 gsi_remove (&gsi, true);
|
|
810 release_defs (stmt);
|
|
811 return;
|
|
812 }
|
|
813
|
|
814 /* Continue walking the chain. */
|
|
815 gcc_assert (name != op
|
|
816 && TREE_CODE (name) == SSA_NAME);
|
|
817 stmt = SSA_NAME_DEF_STMT (name);
|
|
818 }
|
|
819 while (1);
|
|
820 }
|
|
821
|
|
822 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for
|
|
823 the result. Places the statement after the definition of either
|
|
824 OP1 or OP2. Returns the new statement. */
|
|
825
|
|
826 static gimple
|
|
827 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode)
|
|
828 {
|
|
829 gimple op1def = NULL, op2def = NULL;
|
|
830 gimple_stmt_iterator gsi;
|
|
831 tree op;
|
|
832 gimple sum;
|
|
833
|
|
834 /* Create the addition statement. */
|
|
835 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2);
|
|
836 op = make_ssa_name (tmpvar, sum);
|
|
837 gimple_assign_set_lhs (sum, op);
|
|
838
|
|
839 /* Find an insertion place and insert. */
|
|
840 if (TREE_CODE (op1) == SSA_NAME)
|
|
841 op1def = SSA_NAME_DEF_STMT (op1);
|
|
842 if (TREE_CODE (op2) == SSA_NAME)
|
|
843 op2def = SSA_NAME_DEF_STMT (op2);
|
|
844 if ((!op1def || gimple_nop_p (op1def))
|
|
845 && (!op2def || gimple_nop_p (op2def)))
|
|
846 {
|
|
847 gsi = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR));
|
|
848 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
|
|
849 }
|
|
850 else if ((!op1def || gimple_nop_p (op1def))
|
|
851 || (op2def && !gimple_nop_p (op2def)
|
|
852 && stmt_dominates_stmt_p (op1def, op2def)))
|
|
853 {
|
|
854 if (gimple_code (op2def) == GIMPLE_PHI)
|
|
855 {
|
|
856 gsi = gsi_start_bb (gimple_bb (op2def));
|
|
857 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
|
|
858 }
|
|
859 else
|
|
860 {
|
|
861 if (!stmt_ends_bb_p (op2def))
|
|
862 {
|
|
863 gsi = gsi_for_stmt (op2def);
|
|
864 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
|
|
865 }
|
|
866 else
|
|
867 {
|
|
868 edge e;
|
|
869 edge_iterator ei;
|
|
870
|
|
871 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs)
|
|
872 if (e->flags & EDGE_FALLTHRU)
|
|
873 gsi_insert_on_edge_immediate (e, sum);
|
|
874 }
|
|
875 }
|
|
876 }
|
|
877 else
|
|
878 {
|
|
879 if (gimple_code (op1def) == GIMPLE_PHI)
|
|
880 {
|
|
881 gsi = gsi_start_bb (gimple_bb (op1def));
|
|
882 gsi_insert_before (&gsi, sum, GSI_NEW_STMT);
|
|
883 }
|
|
884 else
|
|
885 {
|
|
886 if (!stmt_ends_bb_p (op1def))
|
|
887 {
|
|
888 gsi = gsi_for_stmt (op1def);
|
|
889 gsi_insert_after (&gsi, sum, GSI_NEW_STMT);
|
|
890 }
|
|
891 else
|
|
892 {
|
|
893 edge e;
|
|
894 edge_iterator ei;
|
|
895
|
|
896 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs)
|
|
897 if (e->flags & EDGE_FALLTHRU)
|
|
898 gsi_insert_on_edge_immediate (e, sum);
|
|
899 }
|
|
900 }
|
|
901 }
|
|
902 update_stmt (sum);
|
|
903
|
|
904 return sum;
|
|
905 }
|
|
906
|
|
907 /* Perform un-distribution of divisions and multiplications.
|
|
908 A * X + B * X is transformed into (A + B) * X and A / X + B / X
|
|
909 to (A + B) / X for real X.
|
|
910
|
|
911 The algorithm is organized as follows.
|
|
912
|
|
913 - First we walk the addition chain *OPS looking for summands that
|
|
914 are defined by a multiplication or a real division. This results
|
|
915 in the candidates bitmap with relevant indices into *OPS.
|
|
916
|
|
917 - Second we build the chains of multiplications or divisions for
|
|
918 these candidates, counting the number of occurences of (operand, code)
|
|
919 pairs in all of the candidates chains.
|
|
920
|
|
921 - Third we sort the (operand, code) pairs by number of occurence and
|
|
922 process them starting with the pair with the most uses.
|
|
923
|
|
924 * For each such pair we walk the candidates again to build a
|
|
925 second candidate bitmap noting all multiplication/division chains
|
|
926 that have at least one occurence of (operand, code).
|
|
927
|
|
928 * We build an alternate addition chain only covering these
|
|
929 candidates with one (operand, code) operation removed from their
|
|
930 multiplication/division chain.
|
|
931
|
|
932 * The first candidate gets replaced by the alternate addition chain
|
|
933 multiplied/divided by the operand.
|
|
934
|
|
935 * All candidate chains get disabled for further processing and
|
|
936 processing of (operand, code) pairs continues.
|
|
937
|
|
938 The alternate addition chains built are re-processed by the main
|
|
939 reassociation algorithm which allows optimizing a * x * y + b * y * x
|
|
940 to (a + b ) * x * y in one invocation of the reassociation pass. */
|
|
941
|
|
942 static bool
|
|
943 undistribute_ops_list (enum tree_code opcode,
|
|
944 VEC (operand_entry_t, heap) **ops, struct loop *loop)
|
|
945 {
|
|
946 unsigned int length = VEC_length (operand_entry_t, *ops);
|
|
947 operand_entry_t oe1;
|
|
948 unsigned i, j;
|
|
949 sbitmap candidates, candidates2;
|
|
950 unsigned nr_candidates, nr_candidates2;
|
|
951 sbitmap_iterator sbi0;
|
|
952 VEC (operand_entry_t, heap) **subops;
|
|
953 htab_t ctable;
|
|
954 bool changed = false;
|
|
955
|
|
956 if (length <= 1
|
|
957 || opcode != PLUS_EXPR)
|
|
958 return false;
|
|
959
|
|
960 /* Build a list of candidates to process. */
|
|
961 candidates = sbitmap_alloc (length);
|
|
962 sbitmap_zero (candidates);
|
|
963 nr_candidates = 0;
|
|
964 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i)
|
|
965 {
|
|
966 enum tree_code dcode;
|
|
967 gimple oe1def;
|
|
968
|
|
969 if (TREE_CODE (oe1->op) != SSA_NAME)
|
|
970 continue;
|
|
971 oe1def = SSA_NAME_DEF_STMT (oe1->op);
|
|
972 if (!is_gimple_assign (oe1def))
|
|
973 continue;
|
|
974 dcode = gimple_assign_rhs_code (oe1def);
|
|
975 if ((dcode != MULT_EXPR
|
|
976 && dcode != RDIV_EXPR)
|
|
977 || !is_reassociable_op (oe1def, dcode, loop))
|
|
978 continue;
|
|
979
|
|
980 SET_BIT (candidates, i);
|
|
981 nr_candidates++;
|
|
982 }
|
|
983
|
|
984 if (nr_candidates < 2)
|
|
985 {
|
|
986 sbitmap_free (candidates);
|
|
987 return false;
|
|
988 }
|
|
989
|
|
990 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
991 {
|
|
992 fprintf (dump_file, "searching for un-distribute opportunities ");
|
|
993 print_generic_expr (dump_file,
|
|
994 VEC_index (operand_entry_t, *ops,
|
|
995 sbitmap_first_set_bit (candidates))->op, 0);
|
|
996 fprintf (dump_file, " %d\n", nr_candidates);
|
|
997 }
|
|
998
|
|
999 /* Build linearized sub-operand lists and the counting table. */
|
|
1000 cvec = NULL;
|
|
1001 ctable = htab_create (15, oecount_hash, oecount_eq, NULL);
|
|
1002 subops = XCNEWVEC (VEC (operand_entry_t, heap) *,
|
|
1003 VEC_length (operand_entry_t, *ops));
|
|
1004 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
|
|
1005 {
|
|
1006 gimple oedef;
|
|
1007 enum tree_code oecode;
|
|
1008 unsigned j;
|
|
1009
|
|
1010 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op);
|
|
1011 oecode = gimple_assign_rhs_code (oedef);
|
|
1012 linearize_expr_tree (&subops[i], oedef,
|
|
1013 associative_tree_code (oecode), false);
|
|
1014
|
|
1015 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
|
|
1016 {
|
|
1017 oecount c;
|
|
1018 void **slot;
|
|
1019 size_t idx;
|
|
1020 c.oecode = oecode;
|
|
1021 c.cnt = 1;
|
|
1022 c.op = oe1->op;
|
|
1023 VEC_safe_push (oecount, heap, cvec, &c);
|
|
1024 idx = VEC_length (oecount, cvec) + 41;
|
|
1025 slot = htab_find_slot (ctable, (void *)idx, INSERT);
|
|
1026 if (!*slot)
|
|
1027 {
|
|
1028 *slot = (void *)idx;
|
|
1029 }
|
|
1030 else
|
|
1031 {
|
|
1032 VEC_pop (oecount, cvec);
|
|
1033 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++;
|
|
1034 }
|
|
1035 }
|
|
1036 }
|
|
1037 htab_delete (ctable);
|
|
1038
|
|
1039 /* Sort the counting table. */
|
|
1040 qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec),
|
|
1041 sizeof (oecount), oecount_cmp);
|
|
1042
|
|
1043 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1044 {
|
|
1045 oecount *c;
|
|
1046 fprintf (dump_file, "Candidates:\n");
|
|
1047 for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j)
|
|
1048 {
|
|
1049 fprintf (dump_file, " %u %s: ", c->cnt,
|
|
1050 c->oecode == MULT_EXPR
|
|
1051 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?");
|
|
1052 print_generic_expr (dump_file, c->op, 0);
|
|
1053 fprintf (dump_file, "\n");
|
|
1054 }
|
|
1055 }
|
|
1056
|
|
1057 /* Process the (operand, code) pairs in order of most occurence. */
|
|
1058 candidates2 = sbitmap_alloc (length);
|
|
1059 while (!VEC_empty (oecount, cvec))
|
|
1060 {
|
|
1061 oecount *c = VEC_last (oecount, cvec);
|
|
1062 if (c->cnt < 2)
|
|
1063 break;
|
|
1064
|
|
1065 /* Now collect the operands in the outer chain that contain
|
|
1066 the common operand in their inner chain. */
|
|
1067 sbitmap_zero (candidates2);
|
|
1068 nr_candidates2 = 0;
|
|
1069 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0)
|
|
1070 {
|
|
1071 gimple oedef;
|
|
1072 enum tree_code oecode;
|
|
1073 unsigned j;
|
|
1074 tree op = VEC_index (operand_entry_t, *ops, i)->op;
|
|
1075
|
|
1076 /* If we undistributed in this chain already this may be
|
|
1077 a constant. */
|
|
1078 if (TREE_CODE (op) != SSA_NAME)
|
|
1079 continue;
|
|
1080
|
|
1081 oedef = SSA_NAME_DEF_STMT (op);
|
|
1082 oecode = gimple_assign_rhs_code (oedef);
|
|
1083 if (oecode != c->oecode)
|
|
1084 continue;
|
|
1085
|
|
1086 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j)
|
|
1087 {
|
|
1088 if (oe1->op == c->op)
|
|
1089 {
|
|
1090 SET_BIT (candidates2, i);
|
|
1091 ++nr_candidates2;
|
|
1092 break;
|
|
1093 }
|
|
1094 }
|
|
1095 }
|
|
1096
|
|
1097 if (nr_candidates2 >= 2)
|
|
1098 {
|
|
1099 operand_entry_t oe1, oe2;
|
|
1100 tree tmpvar;
|
|
1101 gimple prod;
|
|
1102 int first = sbitmap_first_set_bit (candidates2);
|
|
1103
|
|
1104 /* Build the new addition chain. */
|
|
1105 oe1 = VEC_index (operand_entry_t, *ops, first);
|
|
1106 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1107 {
|
|
1108 fprintf (dump_file, "Building (");
|
|
1109 print_generic_expr (dump_file, oe1->op, 0);
|
|
1110 }
|
|
1111 tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL);
|
|
1112 add_referenced_var (tmpvar);
|
|
1113 zero_one_operation (&oe1->op, c->oecode, c->op);
|
|
1114 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0)
|
|
1115 {
|
|
1116 gimple sum;
|
|
1117 oe2 = VEC_index (operand_entry_t, *ops, i);
|
|
1118 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1119 {
|
|
1120 fprintf (dump_file, " + ");
|
|
1121 print_generic_expr (dump_file, oe2->op, 0);
|
|
1122 }
|
|
1123 zero_one_operation (&oe2->op, c->oecode, c->op);
|
|
1124 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode);
|
|
1125 oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node);
|
|
1126 oe2->rank = 0;
|
|
1127 oe1->op = gimple_get_lhs (sum);
|
|
1128 }
|
|
1129
|
|
1130 /* Apply the multiplication/division. */
|
|
1131 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode);
|
|
1132 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1133 {
|
|
1134 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/");
|
|
1135 print_generic_expr (dump_file, c->op, 0);
|
|
1136 fprintf (dump_file, "\n");
|
|
1137 }
|
|
1138
|
|
1139 /* Record it in the addition chain and disable further
|
|
1140 undistribution with this op. */
|
|
1141 oe1->op = gimple_assign_lhs (prod);
|
|
1142 oe1->rank = get_rank (oe1->op);
|
|
1143 VEC_free (operand_entry_t, heap, subops[first]);
|
|
1144
|
|
1145 changed = true;
|
|
1146 }
|
|
1147
|
|
1148 VEC_pop (oecount, cvec);
|
|
1149 }
|
|
1150
|
|
1151 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i)
|
|
1152 VEC_free (operand_entry_t, heap, subops[i]);
|
|
1153 free (subops);
|
|
1154 VEC_free (oecount, heap, cvec);
|
|
1155 sbitmap_free (candidates);
|
|
1156 sbitmap_free (candidates2);
|
|
1157
|
|
1158 return changed;
|
|
1159 }
|
|
1160
|
|
1161
|
|
1162 /* Perform various identities and other optimizations on the list of
|
|
1163 operand entries, stored in OPS. The tree code for the binary
|
|
1164 operation between all the operands is OPCODE. */
|
|
1165
|
|
1166 static void
|
|
1167 optimize_ops_list (enum tree_code opcode,
|
|
1168 VEC (operand_entry_t, heap) **ops)
|
|
1169 {
|
|
1170 unsigned int length = VEC_length (operand_entry_t, *ops);
|
|
1171 unsigned int i;
|
|
1172 operand_entry_t oe;
|
|
1173 operand_entry_t oelast = NULL;
|
|
1174 bool iterate = false;
|
|
1175
|
|
1176 if (length == 1)
|
|
1177 return;
|
|
1178
|
|
1179 oelast = VEC_last (operand_entry_t, *ops);
|
|
1180
|
|
1181 /* If the last two are constants, pop the constants off, merge them
|
|
1182 and try the next two. */
|
|
1183 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op))
|
|
1184 {
|
|
1185 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2);
|
|
1186
|
|
1187 if (oelm1->rank == 0
|
|
1188 && is_gimple_min_invariant (oelm1->op)
|
|
1189 && useless_type_conversion_p (TREE_TYPE (oelm1->op),
|
|
1190 TREE_TYPE (oelast->op)))
|
|
1191 {
|
|
1192 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op),
|
|
1193 oelm1->op, oelast->op);
|
|
1194
|
|
1195 if (folded && is_gimple_min_invariant (folded))
|
|
1196 {
|
|
1197 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1198 fprintf (dump_file, "Merging constants\n");
|
|
1199
|
|
1200 VEC_pop (operand_entry_t, *ops);
|
|
1201 VEC_pop (operand_entry_t, *ops);
|
|
1202
|
|
1203 add_to_ops_vec (ops, folded);
|
|
1204 reassociate_stats.constants_eliminated++;
|
|
1205
|
|
1206 optimize_ops_list (opcode, ops);
|
|
1207 return;
|
|
1208 }
|
|
1209 }
|
|
1210 }
|
|
1211
|
|
1212 eliminate_using_constants (opcode, ops);
|
|
1213 oelast = NULL;
|
|
1214
|
|
1215 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);)
|
|
1216 {
|
|
1217 bool done = false;
|
|
1218
|
|
1219 if (eliminate_not_pairs (opcode, ops, i, oe))
|
|
1220 return;
|
|
1221 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast)
|
|
1222 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe)))
|
|
1223 {
|
|
1224 if (done)
|
|
1225 return;
|
|
1226 iterate = true;
|
|
1227 oelast = NULL;
|
|
1228 continue;
|
|
1229 }
|
|
1230 oelast = oe;
|
|
1231 i++;
|
|
1232 }
|
|
1233
|
|
1234 length = VEC_length (operand_entry_t, *ops);
|
|
1235 oelast = VEC_last (operand_entry_t, *ops);
|
|
1236
|
|
1237 if (iterate)
|
|
1238 optimize_ops_list (opcode, ops);
|
|
1239 }
|
|
1240
|
|
1241 /* Return true if OPERAND is defined by a PHI node which uses the LHS
|
|
1242 of STMT in it's operands. This is also known as a "destructive
|
|
1243 update" operation. */
|
|
1244
|
|
1245 static bool
|
|
1246 is_phi_for_stmt (gimple stmt, tree operand)
|
|
1247 {
|
|
1248 gimple def_stmt;
|
|
1249 tree lhs;
|
|
1250 use_operand_p arg_p;
|
|
1251 ssa_op_iter i;
|
|
1252
|
|
1253 if (TREE_CODE (operand) != SSA_NAME)
|
|
1254 return false;
|
|
1255
|
|
1256 lhs = gimple_assign_lhs (stmt);
|
|
1257
|
|
1258 def_stmt = SSA_NAME_DEF_STMT (operand);
|
|
1259 if (gimple_code (def_stmt) != GIMPLE_PHI)
|
|
1260 return false;
|
|
1261
|
|
1262 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE)
|
|
1263 if (lhs == USE_FROM_PTR (arg_p))
|
|
1264 return true;
|
|
1265 return false;
|
|
1266 }
|
|
1267
|
|
1268 /* Remove def stmt of VAR if VAR has zero uses and recurse
|
|
1269 on rhs1 operand if so. */
|
|
1270
|
|
1271 static void
|
|
1272 remove_visited_stmt_chain (tree var)
|
|
1273 {
|
|
1274 gimple stmt;
|
|
1275 gimple_stmt_iterator gsi;
|
|
1276
|
|
1277 while (1)
|
|
1278 {
|
|
1279 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var))
|
|
1280 return;
|
|
1281 stmt = SSA_NAME_DEF_STMT (var);
|
|
1282 if (!is_gimple_assign (stmt)
|
|
1283 || !gimple_visited_p (stmt))
|
|
1284 return;
|
|
1285 var = gimple_assign_rhs1 (stmt);
|
|
1286 gsi = gsi_for_stmt (stmt);
|
|
1287 gsi_remove (&gsi, true);
|
|
1288 release_defs (stmt);
|
|
1289 }
|
|
1290 }
|
|
1291
|
|
1292 /* Recursively rewrite our linearized statements so that the operators
|
|
1293 match those in OPS[OPINDEX], putting the computation in rank
|
|
1294 order. */
|
|
1295
|
|
1296 static void
|
|
1297 rewrite_expr_tree (gimple stmt, unsigned int opindex,
|
|
1298 VEC(operand_entry_t, heap) * ops, bool moved)
|
|
1299 {
|
|
1300 tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
1301 tree rhs2 = gimple_assign_rhs2 (stmt);
|
|
1302 operand_entry_t oe;
|
|
1303
|
|
1304 /* If we have three operands left, then we want to make sure the one
|
|
1305 that gets the double binary op are the ones with the same rank.
|
|
1306
|
|
1307 The alternative we try is to see if this is a destructive
|
|
1308 update style statement, which is like:
|
|
1309 b = phi (a, ...)
|
|
1310 a = c + b;
|
|
1311 In that case, we want to use the destructive update form to
|
|
1312 expose the possible vectorizer sum reduction opportunity.
|
|
1313 In that case, the third operand will be the phi node.
|
|
1314
|
|
1315 We could, of course, try to be better as noted above, and do a
|
|
1316 lot of work to try to find these opportunities in >3 operand
|
|
1317 cases, but it is unlikely to be worth it. */
|
|
1318 if (opindex + 3 == VEC_length (operand_entry_t, ops))
|
|
1319 {
|
|
1320 operand_entry_t oe1, oe2, oe3;
|
|
1321
|
|
1322 oe1 = VEC_index (operand_entry_t, ops, opindex);
|
|
1323 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
|
|
1324 oe3 = VEC_index (operand_entry_t, ops, opindex + 2);
|
|
1325
|
|
1326 if ((oe1->rank == oe2->rank
|
|
1327 && oe2->rank != oe3->rank)
|
|
1328 || (is_phi_for_stmt (stmt, oe3->op)
|
|
1329 && !is_phi_for_stmt (stmt, oe1->op)
|
|
1330 && !is_phi_for_stmt (stmt, oe2->op)))
|
|
1331 {
|
|
1332 struct operand_entry temp = *oe3;
|
|
1333 oe3->op = oe1->op;
|
|
1334 oe3->rank = oe1->rank;
|
|
1335 oe1->op = temp.op;
|
|
1336 oe1->rank= temp.rank;
|
|
1337 }
|
|
1338 else if ((oe1->rank == oe3->rank
|
|
1339 && oe2->rank != oe3->rank)
|
|
1340 || (is_phi_for_stmt (stmt, oe2->op)
|
|
1341 && !is_phi_for_stmt (stmt, oe1->op)
|
|
1342 && !is_phi_for_stmt (stmt, oe3->op)))
|
|
1343 {
|
|
1344 struct operand_entry temp = *oe2;
|
|
1345 oe2->op = oe1->op;
|
|
1346 oe2->rank = oe1->rank;
|
|
1347 oe1->op = temp.op;
|
|
1348 oe1->rank= temp.rank;
|
|
1349 }
|
|
1350 }
|
|
1351
|
|
1352 /* The final recursion case for this function is that you have
|
|
1353 exactly two operations left.
|
|
1354 If we had one exactly one op in the entire list to start with, we
|
|
1355 would have never called this function, and the tail recursion
|
|
1356 rewrites them one at a time. */
|
|
1357 if (opindex + 2 == VEC_length (operand_entry_t, ops))
|
|
1358 {
|
|
1359 operand_entry_t oe1, oe2;
|
|
1360
|
|
1361 oe1 = VEC_index (operand_entry_t, ops, opindex);
|
|
1362 oe2 = VEC_index (operand_entry_t, ops, opindex + 1);
|
|
1363
|
|
1364 if (rhs1 != oe1->op || rhs2 != oe2->op)
|
|
1365 {
|
|
1366 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1367 {
|
|
1368 fprintf (dump_file, "Transforming ");
|
|
1369 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1370 }
|
|
1371
|
|
1372 gimple_assign_set_rhs1 (stmt, oe1->op);
|
|
1373 gimple_assign_set_rhs2 (stmt, oe2->op);
|
|
1374 update_stmt (stmt);
|
|
1375 if (rhs1 != oe1->op && rhs1 != oe2->op)
|
|
1376 remove_visited_stmt_chain (rhs1);
|
|
1377
|
|
1378 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1379 {
|
|
1380 fprintf (dump_file, " into ");
|
|
1381 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1382 }
|
|
1383
|
|
1384 }
|
|
1385 return;
|
|
1386 }
|
|
1387
|
|
1388 /* If we hit here, we should have 3 or more ops left. */
|
|
1389 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops));
|
|
1390
|
|
1391 /* Rewrite the next operator. */
|
|
1392 oe = VEC_index (operand_entry_t, ops, opindex);
|
|
1393
|
|
1394 if (oe->op != rhs2)
|
|
1395 {
|
|
1396 if (!moved)
|
|
1397 {
|
|
1398 gimple_stmt_iterator gsinow, gsirhs1;
|
|
1399 gimple stmt1 = stmt, stmt2;
|
|
1400 unsigned int count;
|
|
1401
|
|
1402 gsinow = gsi_for_stmt (stmt);
|
|
1403 count = VEC_length (operand_entry_t, ops) - opindex - 2;
|
|
1404 while (count-- != 0)
|
|
1405 {
|
|
1406 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1));
|
|
1407 gsirhs1 = gsi_for_stmt (stmt2);
|
|
1408 gsi_move_before (&gsirhs1, &gsinow);
|
|
1409 gsi_prev (&gsinow);
|
|
1410 stmt1 = stmt2;
|
|
1411 }
|
|
1412 moved = true;
|
|
1413 }
|
|
1414
|
|
1415 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1416 {
|
|
1417 fprintf (dump_file, "Transforming ");
|
|
1418 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1419 }
|
|
1420
|
|
1421 gimple_assign_set_rhs2 (stmt, oe->op);
|
|
1422 update_stmt (stmt);
|
|
1423
|
|
1424 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1425 {
|
|
1426 fprintf (dump_file, " into ");
|
|
1427 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1428 }
|
|
1429 }
|
|
1430 /* Recurse on the LHS of the binary operator, which is guaranteed to
|
|
1431 be the non-leaf side. */
|
|
1432 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved);
|
|
1433 }
|
|
1434
|
|
1435 /* Transform STMT, which is really (A +B) + (C + D) into the left
|
|
1436 linear form, ((A+B)+C)+D.
|
|
1437 Recurse on D if necessary. */
|
|
1438
|
|
1439 static void
|
|
1440 linearize_expr (gimple stmt)
|
|
1441 {
|
|
1442 gimple_stmt_iterator gsinow, gsirhs;
|
|
1443 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt));
|
|
1444 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
|
|
1445 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
|
|
1446 gimple newbinrhs = NULL;
|
|
1447 struct loop *loop = loop_containing_stmt (stmt);
|
|
1448
|
|
1449 gcc_assert (is_reassociable_op (binlhs, rhscode, loop)
|
|
1450 && is_reassociable_op (binrhs, rhscode, loop));
|
|
1451
|
|
1452 gsinow = gsi_for_stmt (stmt);
|
|
1453 gsirhs = gsi_for_stmt (binrhs);
|
|
1454 gsi_move_before (&gsirhs, &gsinow);
|
|
1455
|
|
1456 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs));
|
|
1457 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs));
|
|
1458 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs));
|
|
1459
|
|
1460 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME)
|
|
1461 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt));
|
|
1462
|
|
1463 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1464 {
|
|
1465 fprintf (dump_file, "Linearized: ");
|
|
1466 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1467 }
|
|
1468
|
|
1469 reassociate_stats.linearized++;
|
|
1470 update_stmt (binrhs);
|
|
1471 update_stmt (binlhs);
|
|
1472 update_stmt (stmt);
|
|
1473
|
|
1474 gimple_set_visited (stmt, true);
|
|
1475 gimple_set_visited (binlhs, true);
|
|
1476 gimple_set_visited (binrhs, true);
|
|
1477
|
|
1478 /* Tail recurse on the new rhs if it still needs reassociation. */
|
|
1479 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop))
|
|
1480 /* ??? This should probably be linearize_expr (newbinrhs) but I don't
|
|
1481 want to change the algorithm while converting to tuples. */
|
|
1482 linearize_expr (stmt);
|
|
1483 }
|
|
1484
|
|
1485 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return
|
|
1486 it. Otherwise, return NULL. */
|
|
1487
|
|
1488 static gimple
|
|
1489 get_single_immediate_use (tree lhs)
|
|
1490 {
|
|
1491 use_operand_p immuse;
|
|
1492 gimple immusestmt;
|
|
1493
|
|
1494 if (TREE_CODE (lhs) == SSA_NAME
|
|
1495 && single_imm_use (lhs, &immuse, &immusestmt)
|
|
1496 && is_gimple_assign (immusestmt))
|
|
1497 return immusestmt;
|
|
1498
|
|
1499 return NULL;
|
|
1500 }
|
|
1501
|
|
1502 static VEC(tree, heap) *broken_up_subtracts;
|
|
1503
|
|
1504 /* Recursively negate the value of TONEGATE, and return the SSA_NAME
|
|
1505 representing the negated value. Insertions of any necessary
|
|
1506 instructions go before GSI.
|
|
1507 This function is recursive in that, if you hand it "a_5" as the
|
|
1508 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will
|
|
1509 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */
|
|
1510
|
|
1511 static tree
|
|
1512 negate_value (tree tonegate, gimple_stmt_iterator *gsi)
|
|
1513 {
|
|
1514 gimple negatedefstmt= NULL;
|
|
1515 tree resultofnegate;
|
|
1516
|
|
1517 /* If we are trying to negate a name, defined by an add, negate the
|
|
1518 add operands instead. */
|
|
1519 if (TREE_CODE (tonegate) == SSA_NAME)
|
|
1520 negatedefstmt = SSA_NAME_DEF_STMT (tonegate);
|
|
1521 if (TREE_CODE (tonegate) == SSA_NAME
|
|
1522 && is_gimple_assign (negatedefstmt)
|
|
1523 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME
|
|
1524 && has_single_use (gimple_assign_lhs (negatedefstmt))
|
|
1525 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR)
|
|
1526 {
|
|
1527 gimple_stmt_iterator gsi;
|
|
1528 tree rhs1 = gimple_assign_rhs1 (negatedefstmt);
|
|
1529 tree rhs2 = gimple_assign_rhs2 (negatedefstmt);
|
|
1530
|
|
1531 gsi = gsi_for_stmt (negatedefstmt);
|
|
1532 rhs1 = negate_value (rhs1, &gsi);
|
|
1533 gimple_assign_set_rhs1 (negatedefstmt, rhs1);
|
|
1534
|
|
1535 gsi = gsi_for_stmt (negatedefstmt);
|
|
1536 rhs2 = negate_value (rhs2, &gsi);
|
|
1537 gimple_assign_set_rhs2 (negatedefstmt, rhs2);
|
|
1538
|
|
1539 update_stmt (negatedefstmt);
|
|
1540 return gimple_assign_lhs (negatedefstmt);
|
|
1541 }
|
|
1542
|
|
1543 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate);
|
|
1544 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true,
|
|
1545 NULL_TREE, true, GSI_SAME_STMT);
|
|
1546 VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate);
|
|
1547 return resultofnegate;
|
|
1548 }
|
|
1549
|
|
1550 /* Return true if we should break up the subtract in STMT into an add
|
|
1551 with negate. This is true when we the subtract operands are really
|
|
1552 adds, or the subtract itself is used in an add expression. In
|
|
1553 either case, breaking up the subtract into an add with negate
|
|
1554 exposes the adds to reassociation. */
|
|
1555
|
|
1556 static bool
|
|
1557 should_break_up_subtract (gimple stmt)
|
|
1558 {
|
|
1559 tree lhs = gimple_assign_lhs (stmt);
|
|
1560 tree binlhs = gimple_assign_rhs1 (stmt);
|
|
1561 tree binrhs = gimple_assign_rhs2 (stmt);
|
|
1562 gimple immusestmt;
|
|
1563 struct loop *loop = loop_containing_stmt (stmt);
|
|
1564
|
|
1565 if (TREE_CODE (binlhs) == SSA_NAME
|
|
1566 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop))
|
|
1567 return true;
|
|
1568
|
|
1569 if (TREE_CODE (binrhs) == SSA_NAME
|
|
1570 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop))
|
|
1571 return true;
|
|
1572
|
|
1573 if (TREE_CODE (lhs) == SSA_NAME
|
|
1574 && (immusestmt = get_single_immediate_use (lhs))
|
|
1575 && is_gimple_assign (immusestmt)
|
|
1576 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR
|
|
1577 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR))
|
|
1578 return true;
|
|
1579 return false;
|
|
1580 }
|
|
1581
|
|
1582 /* Transform STMT from A - B into A + -B. */
|
|
1583
|
|
1584 static void
|
|
1585 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip)
|
|
1586 {
|
|
1587 tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
1588 tree rhs2 = gimple_assign_rhs2 (stmt);
|
|
1589
|
|
1590 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1591 {
|
|
1592 fprintf (dump_file, "Breaking up subtract ");
|
|
1593 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1594 }
|
|
1595
|
|
1596 rhs2 = negate_value (rhs2, gsip);
|
|
1597 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2);
|
|
1598 update_stmt (stmt);
|
|
1599 }
|
|
1600
|
|
1601 /* Recursively linearize a binary expression that is the RHS of STMT.
|
|
1602 Place the operands of the expression tree in the vector named OPS. */
|
|
1603
|
|
1604 static void
|
|
1605 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt,
|
|
1606 bool is_associative, bool set_visited)
|
|
1607 {
|
|
1608 tree binlhs = gimple_assign_rhs1 (stmt);
|
|
1609 tree binrhs = gimple_assign_rhs2 (stmt);
|
|
1610 gimple binlhsdef, binrhsdef;
|
|
1611 bool binlhsisreassoc = false;
|
|
1612 bool binrhsisreassoc = false;
|
|
1613 enum tree_code rhscode = gimple_assign_rhs_code (stmt);
|
|
1614 struct loop *loop = loop_containing_stmt (stmt);
|
|
1615
|
|
1616 if (set_visited)
|
|
1617 gimple_set_visited (stmt, true);
|
|
1618
|
|
1619 if (TREE_CODE (binlhs) == SSA_NAME)
|
|
1620 {
|
|
1621 binlhsdef = SSA_NAME_DEF_STMT (binlhs);
|
|
1622 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop);
|
|
1623 }
|
|
1624
|
|
1625 if (TREE_CODE (binrhs) == SSA_NAME)
|
|
1626 {
|
|
1627 binrhsdef = SSA_NAME_DEF_STMT (binrhs);
|
|
1628 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop);
|
|
1629 }
|
|
1630
|
|
1631 /* If the LHS is not reassociable, but the RHS is, we need to swap
|
|
1632 them. If neither is reassociable, there is nothing we can do, so
|
|
1633 just put them in the ops vector. If the LHS is reassociable,
|
|
1634 linearize it. If both are reassociable, then linearize the RHS
|
|
1635 and the LHS. */
|
|
1636
|
|
1637 if (!binlhsisreassoc)
|
|
1638 {
|
|
1639 tree temp;
|
|
1640
|
|
1641 /* If this is not a associative operation like division, give up. */
|
|
1642 if (!is_associative)
|
|
1643 {
|
|
1644 add_to_ops_vec (ops, binrhs);
|
|
1645 return;
|
|
1646 }
|
|
1647
|
|
1648 if (!binrhsisreassoc)
|
|
1649 {
|
|
1650 add_to_ops_vec (ops, binrhs);
|
|
1651 add_to_ops_vec (ops, binlhs);
|
|
1652 return;
|
|
1653 }
|
|
1654
|
|
1655 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1656 {
|
|
1657 fprintf (dump_file, "swapping operands of ");
|
|
1658 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1659 }
|
|
1660
|
|
1661 swap_tree_operands (stmt,
|
|
1662 gimple_assign_rhs1_ptr (stmt),
|
|
1663 gimple_assign_rhs2_ptr (stmt));
|
|
1664 update_stmt (stmt);
|
|
1665
|
|
1666 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1667 {
|
|
1668 fprintf (dump_file, " is now ");
|
|
1669 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1670 }
|
|
1671
|
|
1672 /* We want to make it so the lhs is always the reassociative op,
|
|
1673 so swap. */
|
|
1674 temp = binlhs;
|
|
1675 binlhs = binrhs;
|
|
1676 binrhs = temp;
|
|
1677 }
|
|
1678 else if (binrhsisreassoc)
|
|
1679 {
|
|
1680 linearize_expr (stmt);
|
|
1681 binlhs = gimple_assign_rhs1 (stmt);
|
|
1682 binrhs = gimple_assign_rhs2 (stmt);
|
|
1683 }
|
|
1684
|
|
1685 gcc_assert (TREE_CODE (binrhs) != SSA_NAME
|
|
1686 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs),
|
|
1687 rhscode, loop));
|
|
1688 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs),
|
|
1689 is_associative, set_visited);
|
|
1690 add_to_ops_vec (ops, binrhs);
|
|
1691 }
|
|
1692
|
|
1693 /* Repropagate the negates back into subtracts, since no other pass
|
|
1694 currently does it. */
|
|
1695
|
|
1696 static void
|
|
1697 repropagate_negates (void)
|
|
1698 {
|
|
1699 unsigned int i = 0;
|
|
1700 tree negate;
|
|
1701
|
|
1702 for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++)
|
|
1703 {
|
|
1704 gimple user = get_single_immediate_use (negate);
|
|
1705
|
|
1706 /* The negate operand can be either operand of a PLUS_EXPR
|
|
1707 (it can be the LHS if the RHS is a constant for example).
|
|
1708
|
|
1709 Force the negate operand to the RHS of the PLUS_EXPR, then
|
|
1710 transform the PLUS_EXPR into a MINUS_EXPR. */
|
|
1711 if (user
|
|
1712 && is_gimple_assign (user)
|
|
1713 && gimple_assign_rhs_code (user) == PLUS_EXPR)
|
|
1714 {
|
|
1715 /* If the negated operand appears on the LHS of the
|
|
1716 PLUS_EXPR, exchange the operands of the PLUS_EXPR
|
|
1717 to force the negated operand to the RHS of the PLUS_EXPR. */
|
|
1718 if (gimple_assign_rhs1 (user) == negate)
|
|
1719 {
|
|
1720 swap_tree_operands (user,
|
|
1721 gimple_assign_rhs1_ptr (user),
|
|
1722 gimple_assign_rhs2_ptr (user));
|
|
1723 }
|
|
1724
|
|
1725 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace
|
|
1726 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */
|
|
1727 if (gimple_assign_rhs2 (user) == negate)
|
|
1728 {
|
|
1729 tree rhs1 = gimple_assign_rhs1 (user);
|
|
1730 tree rhs2 = get_unary_op (negate, NEGATE_EXPR);
|
|
1731 gimple_stmt_iterator gsi = gsi_for_stmt (user);
|
|
1732 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2);
|
|
1733 update_stmt (user);
|
|
1734 }
|
|
1735 }
|
|
1736 }
|
|
1737 }
|
|
1738
|
|
1739 /* Break up subtract operations in block BB.
|
|
1740
|
|
1741 We do this top down because we don't know whether the subtract is
|
|
1742 part of a possible chain of reassociation except at the top.
|
|
1743
|
|
1744 IE given
|
|
1745 d = f + g
|
|
1746 c = a + e
|
|
1747 b = c - d
|
|
1748 q = b - r
|
|
1749 k = t - q
|
|
1750
|
|
1751 we want to break up k = t - q, but we won't until we've transformed q
|
|
1752 = b - r, which won't be broken up until we transform b = c - d.
|
|
1753
|
|
1754 En passant, clear the GIMPLE visited flag on every statement. */
|
|
1755
|
|
1756 static void
|
|
1757 break_up_subtract_bb (basic_block bb)
|
|
1758 {
|
|
1759 gimple_stmt_iterator gsi;
|
|
1760 basic_block son;
|
|
1761
|
|
1762 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
1763 {
|
|
1764 gimple stmt = gsi_stmt (gsi);
|
|
1765 gimple_set_visited (stmt, false);
|
|
1766
|
|
1767 /* Look for simple gimple subtract operations. */
|
|
1768 if (is_gimple_assign (stmt)
|
|
1769 && gimple_assign_rhs_code (stmt) == MINUS_EXPR)
|
|
1770 {
|
|
1771 tree lhs = gimple_assign_lhs (stmt);
|
|
1772 tree rhs1 = gimple_assign_rhs1 (stmt);
|
|
1773 tree rhs2 = gimple_assign_rhs2 (stmt);
|
|
1774
|
|
1775 /* If associative-math we can do reassociation for
|
|
1776 non-integral types. Or, we can do reassociation for
|
|
1777 non-saturating fixed-point types. */
|
|
1778 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
|
1779 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
|
|
1780 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2)))
|
|
1781 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))
|
|
1782 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1))
|
|
1783 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2))
|
|
1784 || !flag_associative_math)
|
|
1785 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs))
|
|
1786 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1))
|
|
1787 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2))))
|
|
1788 continue;
|
|
1789
|
|
1790 /* Check for a subtract used only in an addition. If this
|
|
1791 is the case, transform it into add of a negate for better
|
|
1792 reassociation. IE transform C = A-B into C = A + -B if C
|
|
1793 is only used in an addition. */
|
|
1794 if (should_break_up_subtract (stmt))
|
|
1795 break_up_subtract (stmt, &gsi);
|
|
1796 }
|
|
1797 }
|
|
1798 for (son = first_dom_son (CDI_DOMINATORS, bb);
|
|
1799 son;
|
|
1800 son = next_dom_son (CDI_DOMINATORS, son))
|
|
1801 break_up_subtract_bb (son);
|
|
1802 }
|
|
1803
|
|
1804 /* Reassociate expressions in basic block BB and its post-dominator as
|
|
1805 children. */
|
|
1806
|
|
1807 static void
|
|
1808 reassociate_bb (basic_block bb)
|
|
1809 {
|
|
1810 gimple_stmt_iterator gsi;
|
|
1811 basic_block son;
|
|
1812
|
|
1813 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi))
|
|
1814 {
|
|
1815 gimple stmt = gsi_stmt (gsi);
|
|
1816
|
|
1817 if (is_gimple_assign (stmt))
|
|
1818 {
|
|
1819 tree lhs, rhs1, rhs2;
|
|
1820 enum tree_code rhs_code = gimple_assign_rhs_code (stmt);
|
|
1821
|
|
1822 /* If this is not a gimple binary expression, there is
|
|
1823 nothing for us to do with it. */
|
|
1824 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS)
|
|
1825 continue;
|
|
1826
|
|
1827 /* If this was part of an already processed statement,
|
|
1828 we don't need to touch it again. */
|
|
1829 if (gimple_visited_p (stmt))
|
|
1830 {
|
|
1831 /* This statement might have become dead because of previous
|
|
1832 reassociations. */
|
|
1833 if (has_zero_uses (gimple_get_lhs (stmt)))
|
|
1834 {
|
|
1835 gsi_remove (&gsi, true);
|
|
1836 release_defs (stmt);
|
|
1837 /* We might end up removing the last stmt above which
|
|
1838 places the iterator to the end of the sequence.
|
|
1839 Reset it to the last stmt in this case which might
|
|
1840 be the end of the sequence as well if we removed
|
|
1841 the last statement of the sequence. In which case
|
|
1842 we need to bail out. */
|
|
1843 if (gsi_end_p (gsi))
|
|
1844 {
|
|
1845 gsi = gsi_last_bb (bb);
|
|
1846 if (gsi_end_p (gsi))
|
|
1847 break;
|
|
1848 }
|
|
1849 }
|
|
1850 continue;
|
|
1851 }
|
|
1852
|
|
1853 lhs = gimple_assign_lhs (stmt);
|
|
1854 rhs1 = gimple_assign_rhs1 (stmt);
|
|
1855 rhs2 = gimple_assign_rhs2 (stmt);
|
|
1856
|
|
1857 /* If associative-math we can do reassociation for
|
|
1858 non-integral types. Or, we can do reassociation for
|
|
1859 non-saturating fixed-point types. */
|
|
1860 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs))
|
|
1861 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1))
|
|
1862 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2)))
|
|
1863 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs))
|
|
1864 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1))
|
|
1865 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2))
|
|
1866 || !flag_associative_math)
|
|
1867 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs))
|
|
1868 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1))
|
|
1869 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2))))
|
|
1870 continue;
|
|
1871
|
|
1872 if (associative_tree_code (rhs_code))
|
|
1873 {
|
|
1874 VEC(operand_entry_t, heap) *ops = NULL;
|
|
1875
|
|
1876 /* There may be no immediate uses left by the time we
|
|
1877 get here because we may have eliminated them all. */
|
|
1878 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs))
|
|
1879 continue;
|
|
1880
|
|
1881 gimple_set_visited (stmt, true);
|
|
1882 linearize_expr_tree (&ops, stmt, true, true);
|
|
1883 qsort (VEC_address (operand_entry_t, ops),
|
|
1884 VEC_length (operand_entry_t, ops),
|
|
1885 sizeof (operand_entry_t),
|
|
1886 sort_by_operand_rank);
|
|
1887 optimize_ops_list (rhs_code, &ops);
|
|
1888 if (undistribute_ops_list (rhs_code, &ops,
|
|
1889 loop_containing_stmt (stmt)))
|
|
1890 {
|
|
1891 qsort (VEC_address (operand_entry_t, ops),
|
|
1892 VEC_length (operand_entry_t, ops),
|
|
1893 sizeof (operand_entry_t),
|
|
1894 sort_by_operand_rank);
|
|
1895 optimize_ops_list (rhs_code, &ops);
|
|
1896 }
|
|
1897
|
|
1898 if (VEC_length (operand_entry_t, ops) == 1)
|
|
1899 {
|
|
1900 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1901 {
|
|
1902 fprintf (dump_file, "Transforming ");
|
|
1903 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1904 }
|
|
1905
|
|
1906 rhs1 = gimple_assign_rhs1 (stmt);
|
|
1907 gimple_assign_set_rhs_from_tree (&gsi,
|
|
1908 VEC_last (operand_entry_t,
|
|
1909 ops)->op);
|
|
1910 update_stmt (stmt);
|
|
1911 remove_visited_stmt_chain (rhs1);
|
|
1912
|
|
1913 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1914 {
|
|
1915 fprintf (dump_file, " into ");
|
|
1916 print_gimple_stmt (dump_file, stmt, 0, 0);
|
|
1917 }
|
|
1918 }
|
|
1919 else
|
|
1920 rewrite_expr_tree (stmt, 0, ops, false);
|
|
1921
|
|
1922 VEC_free (operand_entry_t, heap, ops);
|
|
1923 }
|
|
1924 }
|
|
1925 }
|
|
1926 for (son = first_dom_son (CDI_POST_DOMINATORS, bb);
|
|
1927 son;
|
|
1928 son = next_dom_son (CDI_POST_DOMINATORS, son))
|
|
1929 reassociate_bb (son);
|
|
1930 }
|
|
1931
|
|
1932 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops);
|
|
1933 void debug_ops_vector (VEC (operand_entry_t, heap) *ops);
|
|
1934
|
|
1935 /* Dump the operand entry vector OPS to FILE. */
|
|
1936
|
|
1937 void
|
|
1938 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops)
|
|
1939 {
|
|
1940 operand_entry_t oe;
|
|
1941 unsigned int i;
|
|
1942
|
|
1943 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++)
|
|
1944 {
|
|
1945 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank);
|
|
1946 print_generic_expr (file, oe->op, 0);
|
|
1947 }
|
|
1948 }
|
|
1949
|
|
1950 /* Dump the operand entry vector OPS to STDERR. */
|
|
1951
|
|
1952 void
|
|
1953 debug_ops_vector (VEC (operand_entry_t, heap) *ops)
|
|
1954 {
|
|
1955 dump_ops_vector (stderr, ops);
|
|
1956 }
|
|
1957
|
|
1958 static void
|
|
1959 do_reassoc (void)
|
|
1960 {
|
|
1961 break_up_subtract_bb (ENTRY_BLOCK_PTR);
|
|
1962 reassociate_bb (EXIT_BLOCK_PTR);
|
|
1963 }
|
|
1964
|
|
1965 /* Initialize the reassociation pass. */
|
|
1966
|
|
1967 static void
|
|
1968 init_reassoc (void)
|
|
1969 {
|
|
1970 int i;
|
|
1971 long rank = 2;
|
|
1972 tree param;
|
|
1973 int *bbs = XNEWVEC (int, last_basic_block + 1);
|
|
1974
|
|
1975 /* Find the loops, so that we can prevent moving calculations in
|
|
1976 them. */
|
|
1977 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
|
|
1978
|
|
1979 memset (&reassociate_stats, 0, sizeof (reassociate_stats));
|
|
1980
|
|
1981 operand_entry_pool = create_alloc_pool ("operand entry pool",
|
|
1982 sizeof (struct operand_entry), 30);
|
|
1983
|
|
1984 /* Reverse RPO (Reverse Post Order) will give us something where
|
|
1985 deeper loops come later. */
|
|
1986 pre_and_rev_post_order_compute (NULL, bbs, false);
|
|
1987 bb_rank = XCNEWVEC (long, last_basic_block + 1);
|
|
1988 operand_rank = pointer_map_create ();
|
|
1989
|
|
1990 /* Give each argument a distinct rank. */
|
|
1991 for (param = DECL_ARGUMENTS (current_function_decl);
|
|
1992 param;
|
|
1993 param = TREE_CHAIN (param))
|
|
1994 {
|
|
1995 if (gimple_default_def (cfun, param) != NULL)
|
|
1996 {
|
|
1997 tree def = gimple_default_def (cfun, param);
|
|
1998 insert_operand_rank (def, ++rank);
|
|
1999 }
|
|
2000 }
|
|
2001
|
|
2002 /* Give the chain decl a distinct rank. */
|
|
2003 if (cfun->static_chain_decl != NULL)
|
|
2004 {
|
|
2005 tree def = gimple_default_def (cfun, cfun->static_chain_decl);
|
|
2006 if (def != NULL)
|
|
2007 insert_operand_rank (def, ++rank);
|
|
2008 }
|
|
2009
|
|
2010 /* Set up rank for each BB */
|
|
2011 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++)
|
|
2012 bb_rank[bbs[i]] = ++rank << 16;
|
|
2013
|
|
2014 free (bbs);
|
|
2015 calculate_dominance_info (CDI_POST_DOMINATORS);
|
|
2016 broken_up_subtracts = NULL;
|
|
2017 }
|
|
2018
|
|
2019 /* Cleanup after the reassociation pass, and print stats if
|
|
2020 requested. */
|
|
2021
|
|
2022 static void
|
|
2023 fini_reassoc (void)
|
|
2024 {
|
|
2025 statistics_counter_event (cfun, "Linearized",
|
|
2026 reassociate_stats.linearized);
|
|
2027 statistics_counter_event (cfun, "Constants eliminated",
|
|
2028 reassociate_stats.constants_eliminated);
|
|
2029 statistics_counter_event (cfun, "Ops eliminated",
|
|
2030 reassociate_stats.ops_eliminated);
|
|
2031 statistics_counter_event (cfun, "Statements rewritten",
|
|
2032 reassociate_stats.rewritten);
|
|
2033
|
|
2034 pointer_map_destroy (operand_rank);
|
|
2035 free_alloc_pool (operand_entry_pool);
|
|
2036 free (bb_rank);
|
|
2037 VEC_free (tree, heap, broken_up_subtracts);
|
|
2038 free_dominance_info (CDI_POST_DOMINATORS);
|
|
2039 loop_optimizer_finalize ();
|
|
2040 }
|
|
2041
|
|
2042 /* Gate and execute functions for Reassociation. */
|
|
2043
|
|
2044 static unsigned int
|
|
2045 execute_reassoc (void)
|
|
2046 {
|
|
2047 init_reassoc ();
|
|
2048
|
|
2049 do_reassoc ();
|
|
2050 repropagate_negates ();
|
|
2051
|
|
2052 fini_reassoc ();
|
|
2053 return 0;
|
|
2054 }
|
|
2055
|
|
2056 static bool
|
|
2057 gate_tree_ssa_reassoc (void)
|
|
2058 {
|
|
2059 return flag_tree_reassoc != 0;
|
|
2060 }
|
|
2061
|
|
2062 struct gimple_opt_pass pass_reassoc =
|
|
2063 {
|
|
2064 {
|
|
2065 GIMPLE_PASS,
|
|
2066 "reassoc", /* name */
|
|
2067 gate_tree_ssa_reassoc, /* gate */
|
|
2068 execute_reassoc, /* execute */
|
|
2069 NULL, /* sub */
|
|
2070 NULL, /* next */
|
|
2071 0, /* static_pass_number */
|
|
2072 TV_TREE_REASSOC, /* tv_id */
|
|
2073 PROP_cfg | PROP_ssa | PROP_alias, /* properties_required */
|
|
2074 0, /* properties_provided */
|
|
2075 0, /* properties_destroyed */
|
|
2076 0, /* todo_flags_start */
|
|
2077 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */
|
|
2078 }
|
|
2079 };
|
|
2080
|