Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-reassoc.c @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | a06113de4d67 |
children | b7f97abdc517 |
rev | line source |
---|---|
0 | 1 /* Reassociation for trees. |
2 Copyright (C) 2005, 2007, 2008, 2009 Free Software Foundation, Inc. | |
3 Contributed by Daniel Berlin <dan@dberlin.org> | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify | |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, | |
13 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 GNU General Public License for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
24 #include "tm.h" | |
25 #include "ggc.h" | |
26 #include "tree.h" | |
27 #include "basic-block.h" | |
28 #include "diagnostic.h" | |
29 #include "tree-inline.h" | |
30 #include "tree-flow.h" | |
31 #include "gimple.h" | |
32 #include "tree-dump.h" | |
33 #include "timevar.h" | |
34 #include "tree-iterator.h" | |
35 #include "tree-pass.h" | |
36 #include "alloc-pool.h" | |
37 #include "vec.h" | |
38 #include "langhooks.h" | |
39 #include "pointer-set.h" | |
40 #include "cfgloop.h" | |
41 #include "flags.h" | |
42 | |
43 /* This is a simple global reassociation pass. It is, in part, based | |
44 on the LLVM pass of the same name (They do some things more/less | |
45 than we do, in different orders, etc). | |
46 | |
47 It consists of five steps: | |
48 | |
49 1. Breaking up subtract operations into addition + negate, where | |
50 it would promote the reassociation of adds. | |
51 | |
52 2. Left linearization of the expression trees, so that (A+B)+(C+D) | |
53 becomes (((A+B)+C)+D), which is easier for us to rewrite later. | |
54 During linearization, we place the operands of the binary | |
55 expressions into a vector of operand_entry_t | |
56 | |
57 3. Optimization of the operand lists, eliminating things like a + | |
58 -a, a & a, etc. | |
59 | |
60 4. Rewrite the expression trees we linearized and optimized so | |
61 they are in proper rank order. | |
62 | |
63 5. Repropagate negates, as nothing else will clean it up ATM. | |
64 | |
65 A bit of theory on #4, since nobody seems to write anything down | |
66 about why it makes sense to do it the way they do it: | |
67 | |
68 We could do this much nicer theoretically, but don't (for reasons | |
69 explained after how to do it theoretically nice :P). | |
70 | |
71 In order to promote the most redundancy elimination, you want | |
72 binary expressions whose operands are the same rank (or | |
73 preferably, the same value) exposed to the redundancy eliminator, | |
74 for possible elimination. | |
75 | |
76 So the way to do this if we really cared, is to build the new op | |
77 tree from the leaves to the roots, merging as you go, and putting the | |
78 new op on the end of the worklist, until you are left with one | |
79 thing on the worklist. | |
80 | |
81 IE if you have to rewrite the following set of operands (listed with | |
82 rank in parentheses), with opcode PLUS_EXPR: | |
83 | |
84 a (1), b (1), c (1), d (2), e (2) | |
85 | |
86 | |
87 We start with our merge worklist empty, and the ops list with all of | |
88 those on it. | |
89 | |
90 You want to first merge all leaves of the same rank, as much as | |
91 possible. | |
92 | |
93 So first build a binary op of | |
94 | |
95 mergetmp = a + b, and put "mergetmp" on the merge worklist. | |
96 | |
97 Because there is no three operand form of PLUS_EXPR, c is not going to | |
98 be exposed to redundancy elimination as a rank 1 operand. | |
99 | |
100 So you might as well throw it on the merge worklist (you could also | |
101 consider it to now be a rank two operand, and merge it with d and e, | |
102 but in this case, you then have evicted e from a binary op. So at | |
103 least in this situation, you can't win.) | |
104 | |
105 Then build a binary op of d + e | |
106 mergetmp2 = d + e | |
107 | |
108 and put mergetmp2 on the merge worklist. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
109 |
0 | 110 so merge worklist = {mergetmp, c, mergetmp2} |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
111 |
0 | 112 Continue building binary ops of these operations until you have only |
113 one operation left on the worklist. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
114 |
0 | 115 So we have |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
116 |
0 | 117 build binary op |
118 mergetmp3 = mergetmp + c | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
119 |
0 | 120 worklist = {mergetmp2, mergetmp3} |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
121 |
0 | 122 mergetmp4 = mergetmp2 + mergetmp3 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
123 |
0 | 124 worklist = {mergetmp4} |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
125 |
0 | 126 because we have one operation left, we can now just set the original |
127 statement equal to the result of that operation. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
128 |
0 | 129 This will at least expose a + b and d + e to redundancy elimination |
130 as binary operations. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
131 |
0 | 132 For extra points, you can reuse the old statements to build the |
133 mergetmps, since you shouldn't run out. | |
134 | |
135 So why don't we do this? | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
136 |
0 | 137 Because it's expensive, and rarely will help. Most trees we are |
138 reassociating have 3 or less ops. If they have 2 ops, they already | |
139 will be written into a nice single binary op. If you have 3 ops, a | |
140 single simple check suffices to tell you whether the first two are of the | |
141 same rank. If so, you know to order it | |
142 | |
143 mergetmp = op1 + op2 | |
144 newstmt = mergetmp + op3 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
145 |
0 | 146 instead of |
147 mergetmp = op2 + op3 | |
148 newstmt = mergetmp + op1 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
149 |
0 | 150 If all three are of the same rank, you can't expose them all in a |
151 single binary operator anyway, so the above is *still* the best you | |
152 can do. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
153 |
0 | 154 Thus, this is what we do. When we have three ops left, we check to see |
155 what order to put them in, and call it a day. As a nod to vector sum | |
156 reduction, we check if any of the ops are really a phi node that is a | |
157 destructive update for the associating op, and keep the destructive | |
158 update together for vector sum reduction recognition. */ | |
159 | |
160 | |
161 /* Statistics */ | |
162 static struct | |
163 { | |
164 int linearized; | |
165 int constants_eliminated; | |
166 int ops_eliminated; | |
167 int rewritten; | |
168 } reassociate_stats; | |
169 | |
170 /* Operator, rank pair. */ | |
171 typedef struct operand_entry | |
172 { | |
173 unsigned int rank; | |
174 tree op; | |
175 } *operand_entry_t; | |
176 | |
177 static alloc_pool operand_entry_pool; | |
178 | |
179 | |
180 /* Starting rank number for a given basic block, so that we can rank | |
181 operations using unmovable instructions in that BB based on the bb | |
182 depth. */ | |
183 static long *bb_rank; | |
184 | |
185 /* Operand->rank hashtable. */ | |
186 static struct pointer_map_t *operand_rank; | |
187 | |
188 | |
189 /* Look up the operand rank structure for expression E. */ | |
190 | |
191 static inline long | |
192 find_operand_rank (tree e) | |
193 { | |
194 void **slot = pointer_map_contains (operand_rank, e); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
195 return slot ? (long) (intptr_t) *slot : -1; |
0 | 196 } |
197 | |
198 /* Insert {E,RANK} into the operand rank hashtable. */ | |
199 | |
200 static inline void | |
201 insert_operand_rank (tree e, long rank) | |
202 { | |
203 void **slot; | |
204 gcc_assert (rank > 0); | |
205 slot = pointer_map_insert (operand_rank, e); | |
206 gcc_assert (!*slot); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
207 *slot = (void *) (intptr_t) rank; |
0 | 208 } |
209 | |
210 /* Given an expression E, return the rank of the expression. */ | |
211 | |
212 static long | |
213 get_rank (tree e) | |
214 { | |
215 /* Constants have rank 0. */ | |
216 if (is_gimple_min_invariant (e)) | |
217 return 0; | |
218 | |
219 /* SSA_NAME's have the rank of the expression they are the result | |
220 of. | |
221 For globals and uninitialized values, the rank is 0. | |
222 For function arguments, use the pre-setup rank. | |
223 For PHI nodes, stores, asm statements, etc, we use the rank of | |
224 the BB. | |
225 For simple operations, the rank is the maximum rank of any of | |
226 its operands, or the bb_rank, whichever is less. | |
227 I make no claims that this is optimal, however, it gives good | |
228 results. */ | |
229 | |
230 if (TREE_CODE (e) == SSA_NAME) | |
231 { | |
232 gimple stmt; | |
233 long rank, maxrank; | |
234 int i, n; | |
235 | |
236 if (TREE_CODE (SSA_NAME_VAR (e)) == PARM_DECL | |
237 && SSA_NAME_IS_DEFAULT_DEF (e)) | |
238 return find_operand_rank (e); | |
239 | |
240 stmt = SSA_NAME_DEF_STMT (e); | |
241 if (gimple_bb (stmt) == NULL) | |
242 return 0; | |
243 | |
244 if (!is_gimple_assign (stmt) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
245 || gimple_vdef (stmt)) |
0 | 246 return bb_rank[gimple_bb (stmt)->index]; |
247 | |
248 /* If we already have a rank for this expression, use that. */ | |
249 rank = find_operand_rank (e); | |
250 if (rank != -1) | |
251 return rank; | |
252 | |
253 /* Otherwise, find the maximum rank for the operands, or the bb | |
254 rank, whichever is less. */ | |
255 rank = 0; | |
256 maxrank = bb_rank[gimple_bb(stmt)->index]; | |
257 if (gimple_assign_single_p (stmt)) | |
258 { | |
259 tree rhs = gimple_assign_rhs1 (stmt); | |
260 n = TREE_OPERAND_LENGTH (rhs); | |
261 if (n == 0) | |
262 rank = MAX (rank, get_rank (rhs)); | |
263 else | |
264 { | |
265 for (i = 0; | |
266 i < n && TREE_OPERAND (rhs, i) && rank != maxrank; i++) | |
267 rank = MAX(rank, get_rank (TREE_OPERAND (rhs, i))); | |
268 } | |
269 } | |
270 else | |
271 { | |
272 n = gimple_num_ops (stmt); | |
273 for (i = 1; i < n && rank != maxrank; i++) | |
274 { | |
275 gcc_assert (gimple_op (stmt, i)); | |
276 rank = MAX(rank, get_rank (gimple_op (stmt, i))); | |
277 } | |
278 } | |
279 | |
280 if (dump_file && (dump_flags & TDF_DETAILS)) | |
281 { | |
282 fprintf (dump_file, "Rank for "); | |
283 print_generic_expr (dump_file, e, 0); | |
284 fprintf (dump_file, " is %ld\n", (rank + 1)); | |
285 } | |
286 | |
287 /* Note the rank in the hashtable so we don't recompute it. */ | |
288 insert_operand_rank (e, (rank + 1)); | |
289 return (rank + 1); | |
290 } | |
291 | |
292 /* Globals, etc, are rank 0 */ | |
293 return 0; | |
294 } | |
295 | |
296 DEF_VEC_P(operand_entry_t); | |
297 DEF_VEC_ALLOC_P(operand_entry_t, heap); | |
298 | |
299 /* We want integer ones to end up last no matter what, since they are | |
300 the ones we can do the most with. */ | |
301 #define INTEGER_CONST_TYPE 1 << 3 | |
302 #define FLOAT_CONST_TYPE 1 << 2 | |
303 #define OTHER_CONST_TYPE 1 << 1 | |
304 | |
305 /* Classify an invariant tree into integer, float, or other, so that | |
306 we can sort them to be near other constants of the same type. */ | |
307 static inline int | |
308 constant_type (tree t) | |
309 { | |
310 if (INTEGRAL_TYPE_P (TREE_TYPE (t))) | |
311 return INTEGER_CONST_TYPE; | |
312 else if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (t))) | |
313 return FLOAT_CONST_TYPE; | |
314 else | |
315 return OTHER_CONST_TYPE; | |
316 } | |
317 | |
318 /* qsort comparison function to sort operand entries PA and PB by rank | |
319 so that the sorted array is ordered by rank in decreasing order. */ | |
320 static int | |
321 sort_by_operand_rank (const void *pa, const void *pb) | |
322 { | |
323 const operand_entry_t oea = *(const operand_entry_t *)pa; | |
324 const operand_entry_t oeb = *(const operand_entry_t *)pb; | |
325 | |
326 /* It's nicer for optimize_expression if constants that are likely | |
327 to fold when added/multiplied//whatever are put next to each | |
328 other. Since all constants have rank 0, order them by type. */ | |
329 if (oeb->rank == 0 && oea->rank == 0) | |
330 return constant_type (oeb->op) - constant_type (oea->op); | |
331 | |
332 /* Lastly, make sure the versions that are the same go next to each | |
333 other. We use SSA_NAME_VERSION because it's stable. */ | |
334 if ((oeb->rank - oea->rank == 0) | |
335 && TREE_CODE (oea->op) == SSA_NAME | |
336 && TREE_CODE (oeb->op) == SSA_NAME) | |
337 return SSA_NAME_VERSION (oeb->op) - SSA_NAME_VERSION (oea->op); | |
338 | |
339 return oeb->rank - oea->rank; | |
340 } | |
341 | |
342 /* Add an operand entry to *OPS for the tree operand OP. */ | |
343 | |
344 static void | |
345 add_to_ops_vec (VEC(operand_entry_t, heap) **ops, tree op) | |
346 { | |
347 operand_entry_t oe = (operand_entry_t) pool_alloc (operand_entry_pool); | |
348 | |
349 oe->op = op; | |
350 oe->rank = get_rank (op); | |
351 VEC_safe_push (operand_entry_t, heap, *ops, oe); | |
352 } | |
353 | |
354 /* Return true if STMT is reassociable operation containing a binary | |
355 operation with tree code CODE, and is inside LOOP. */ | |
356 | |
357 static bool | |
358 is_reassociable_op (gimple stmt, enum tree_code code, struct loop *loop) | |
359 { | |
360 basic_block bb = gimple_bb (stmt); | |
361 | |
362 if (gimple_bb (stmt) == NULL) | |
363 return false; | |
364 | |
365 if (!flow_bb_inside_loop_p (loop, bb)) | |
366 return false; | |
367 | |
368 if (is_gimple_assign (stmt) | |
369 && gimple_assign_rhs_code (stmt) == code | |
370 && has_single_use (gimple_assign_lhs (stmt))) | |
371 return true; | |
372 | |
373 return false; | |
374 } | |
375 | |
376 | |
377 /* Given NAME, if NAME is defined by a unary operation OPCODE, return the | |
378 operand of the negate operation. Otherwise, return NULL. */ | |
379 | |
380 static tree | |
381 get_unary_op (tree name, enum tree_code opcode) | |
382 { | |
383 gimple stmt = SSA_NAME_DEF_STMT (name); | |
384 | |
385 if (!is_gimple_assign (stmt)) | |
386 return NULL_TREE; | |
387 | |
388 if (gimple_assign_rhs_code (stmt) == opcode) | |
389 return gimple_assign_rhs1 (stmt); | |
390 return NULL_TREE; | |
391 } | |
392 | |
393 /* If CURR and LAST are a pair of ops that OPCODE allows us to | |
394 eliminate through equivalences, do so, remove them from OPS, and | |
395 return true. Otherwise, return false. */ | |
396 | |
397 static bool | |
398 eliminate_duplicate_pair (enum tree_code opcode, | |
399 VEC (operand_entry_t, heap) **ops, | |
400 bool *all_done, | |
401 unsigned int i, | |
402 operand_entry_t curr, | |
403 operand_entry_t last) | |
404 { | |
405 | |
406 /* If we have two of the same op, and the opcode is & |, min, or max, | |
407 we can eliminate one of them. | |
408 If we have two of the same op, and the opcode is ^, we can | |
409 eliminate both of them. */ | |
410 | |
411 if (last && last->op == curr->op) | |
412 { | |
413 switch (opcode) | |
414 { | |
415 case MAX_EXPR: | |
416 case MIN_EXPR: | |
417 case BIT_IOR_EXPR: | |
418 case BIT_AND_EXPR: | |
419 if (dump_file && (dump_flags & TDF_DETAILS)) | |
420 { | |
421 fprintf (dump_file, "Equivalence: "); | |
422 print_generic_expr (dump_file, curr->op, 0); | |
423 fprintf (dump_file, " [&|minmax] "); | |
424 print_generic_expr (dump_file, last->op, 0); | |
425 fprintf (dump_file, " -> "); | |
426 print_generic_stmt (dump_file, last->op, 0); | |
427 } | |
428 | |
429 VEC_ordered_remove (operand_entry_t, *ops, i); | |
430 reassociate_stats.ops_eliminated ++; | |
431 | |
432 return true; | |
433 | |
434 case BIT_XOR_EXPR: | |
435 if (dump_file && (dump_flags & TDF_DETAILS)) | |
436 { | |
437 fprintf (dump_file, "Equivalence: "); | |
438 print_generic_expr (dump_file, curr->op, 0); | |
439 fprintf (dump_file, " ^ "); | |
440 print_generic_expr (dump_file, last->op, 0); | |
441 fprintf (dump_file, " -> nothing\n"); | |
442 } | |
443 | |
444 reassociate_stats.ops_eliminated += 2; | |
445 | |
446 if (VEC_length (operand_entry_t, *ops) == 2) | |
447 { | |
448 VEC_free (operand_entry_t, heap, *ops); | |
449 *ops = NULL; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
450 add_to_ops_vec (ops, fold_convert (TREE_TYPE (last->op), |
0 | 451 integer_zero_node)); |
452 *all_done = true; | |
453 } | |
454 else | |
455 { | |
456 VEC_ordered_remove (operand_entry_t, *ops, i-1); | |
457 VEC_ordered_remove (operand_entry_t, *ops, i-1); | |
458 } | |
459 | |
460 return true; | |
461 | |
462 default: | |
463 break; | |
464 } | |
465 } | |
466 return false; | |
467 } | |
468 | |
469 /* If OPCODE is PLUS_EXPR, CURR->OP is really a negate expression, | |
470 look in OPS for a corresponding positive operation to cancel it | |
471 out. If we find one, remove the other from OPS, replace | |
472 OPS[CURRINDEX] with 0, and return true. Otherwise, return | |
473 false. */ | |
474 | |
475 static bool | |
476 eliminate_plus_minus_pair (enum tree_code opcode, | |
477 VEC (operand_entry_t, heap) **ops, | |
478 unsigned int currindex, | |
479 operand_entry_t curr) | |
480 { | |
481 tree negateop; | |
482 unsigned int i; | |
483 operand_entry_t oe; | |
484 | |
485 if (opcode != PLUS_EXPR || TREE_CODE (curr->op) != SSA_NAME) | |
486 return false; | |
487 | |
488 negateop = get_unary_op (curr->op, NEGATE_EXPR); | |
489 if (negateop == NULL_TREE) | |
490 return false; | |
491 | |
492 /* Any non-negated version will have a rank that is one less than | |
493 the current rank. So once we hit those ranks, if we don't find | |
494 one, we can stop. */ | |
495 | |
496 for (i = currindex + 1; | |
497 VEC_iterate (operand_entry_t, *ops, i, oe) | |
498 && oe->rank >= curr->rank - 1 ; | |
499 i++) | |
500 { | |
501 if (oe->op == negateop) | |
502 { | |
503 | |
504 if (dump_file && (dump_flags & TDF_DETAILS)) | |
505 { | |
506 fprintf (dump_file, "Equivalence: "); | |
507 print_generic_expr (dump_file, negateop, 0); | |
508 fprintf (dump_file, " + -"); | |
509 print_generic_expr (dump_file, oe->op, 0); | |
510 fprintf (dump_file, " -> 0\n"); | |
511 } | |
512 | |
513 VEC_ordered_remove (operand_entry_t, *ops, i); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
514 add_to_ops_vec (ops, fold_convert(TREE_TYPE (oe->op), |
0 | 515 integer_zero_node)); |
516 VEC_ordered_remove (operand_entry_t, *ops, currindex); | |
517 reassociate_stats.ops_eliminated ++; | |
518 | |
519 return true; | |
520 } | |
521 } | |
522 | |
523 return false; | |
524 } | |
525 | |
526 /* If OPCODE is BIT_IOR_EXPR, BIT_AND_EXPR, and, CURR->OP is really a | |
527 bitwise not expression, look in OPS for a corresponding operand to | |
528 cancel it out. If we find one, remove the other from OPS, replace | |
529 OPS[CURRINDEX] with 0, and return true. Otherwise, return | |
530 false. */ | |
531 | |
532 static bool | |
533 eliminate_not_pairs (enum tree_code opcode, | |
534 VEC (operand_entry_t, heap) **ops, | |
535 unsigned int currindex, | |
536 operand_entry_t curr) | |
537 { | |
538 tree notop; | |
539 unsigned int i; | |
540 operand_entry_t oe; | |
541 | |
542 if ((opcode != BIT_IOR_EXPR && opcode != BIT_AND_EXPR) | |
543 || TREE_CODE (curr->op) != SSA_NAME) | |
544 return false; | |
545 | |
546 notop = get_unary_op (curr->op, BIT_NOT_EXPR); | |
547 if (notop == NULL_TREE) | |
548 return false; | |
549 | |
550 /* Any non-not version will have a rank that is one less than | |
551 the current rank. So once we hit those ranks, if we don't find | |
552 one, we can stop. */ | |
553 | |
554 for (i = currindex + 1; | |
555 VEC_iterate (operand_entry_t, *ops, i, oe) | |
556 && oe->rank >= curr->rank - 1; | |
557 i++) | |
558 { | |
559 if (oe->op == notop) | |
560 { | |
561 if (dump_file && (dump_flags & TDF_DETAILS)) | |
562 { | |
563 fprintf (dump_file, "Equivalence: "); | |
564 print_generic_expr (dump_file, notop, 0); | |
565 if (opcode == BIT_AND_EXPR) | |
566 fprintf (dump_file, " & ~"); | |
567 else if (opcode == BIT_IOR_EXPR) | |
568 fprintf (dump_file, " | ~"); | |
569 print_generic_expr (dump_file, oe->op, 0); | |
570 if (opcode == BIT_AND_EXPR) | |
571 fprintf (dump_file, " -> 0\n"); | |
572 else if (opcode == BIT_IOR_EXPR) | |
573 fprintf (dump_file, " -> -1\n"); | |
574 } | |
575 | |
576 if (opcode == BIT_AND_EXPR) | |
577 oe->op = fold_convert (TREE_TYPE (oe->op), integer_zero_node); | |
578 else if (opcode == BIT_IOR_EXPR) | |
579 oe->op = build_low_bits_mask (TREE_TYPE (oe->op), | |
580 TYPE_PRECISION (TREE_TYPE (oe->op))); | |
581 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
582 reassociate_stats.ops_eliminated |
0 | 583 += VEC_length (operand_entry_t, *ops) - 1; |
584 VEC_free (operand_entry_t, heap, *ops); | |
585 *ops = NULL; | |
586 VEC_safe_push (operand_entry_t, heap, *ops, oe); | |
587 return true; | |
588 } | |
589 } | |
590 | |
591 return false; | |
592 } | |
593 | |
594 /* Use constant value that may be present in OPS to try to eliminate | |
595 operands. Note that this function is only really used when we've | |
596 eliminated ops for other reasons, or merged constants. Across | |
597 single statements, fold already does all of this, plus more. There | |
598 is little point in duplicating logic, so I've only included the | |
599 identities that I could ever construct testcases to trigger. */ | |
600 | |
601 static void | |
602 eliminate_using_constants (enum tree_code opcode, | |
603 VEC(operand_entry_t, heap) **ops) | |
604 { | |
605 operand_entry_t oelast = VEC_last (operand_entry_t, *ops); | |
606 tree type = TREE_TYPE (oelast->op); | |
607 | |
608 if (oelast->rank == 0 | |
609 && (INTEGRAL_TYPE_P (type) || FLOAT_TYPE_P (type))) | |
610 { | |
611 switch (opcode) | |
612 { | |
613 case BIT_AND_EXPR: | |
614 if (integer_zerop (oelast->op)) | |
615 { | |
616 if (VEC_length (operand_entry_t, *ops) != 1) | |
617 { | |
618 if (dump_file && (dump_flags & TDF_DETAILS)) | |
619 fprintf (dump_file, "Found & 0, removing all other ops\n"); | |
620 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
621 reassociate_stats.ops_eliminated |
0 | 622 += VEC_length (operand_entry_t, *ops) - 1; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
623 |
0 | 624 VEC_free (operand_entry_t, heap, *ops); |
625 *ops = NULL; | |
626 VEC_safe_push (operand_entry_t, heap, *ops, oelast); | |
627 return; | |
628 } | |
629 } | |
630 else if (integer_all_onesp (oelast->op)) | |
631 { | |
632 if (VEC_length (operand_entry_t, *ops) != 1) | |
633 { | |
634 if (dump_file && (dump_flags & TDF_DETAILS)) | |
635 fprintf (dump_file, "Found & -1, removing\n"); | |
636 VEC_pop (operand_entry_t, *ops); | |
637 reassociate_stats.ops_eliminated++; | |
638 } | |
639 } | |
640 break; | |
641 case BIT_IOR_EXPR: | |
642 if (integer_all_onesp (oelast->op)) | |
643 { | |
644 if (VEC_length (operand_entry_t, *ops) != 1) | |
645 { | |
646 if (dump_file && (dump_flags & TDF_DETAILS)) | |
647 fprintf (dump_file, "Found | -1, removing all other ops\n"); | |
648 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
649 reassociate_stats.ops_eliminated |
0 | 650 += VEC_length (operand_entry_t, *ops) - 1; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
651 |
0 | 652 VEC_free (operand_entry_t, heap, *ops); |
653 *ops = NULL; | |
654 VEC_safe_push (operand_entry_t, heap, *ops, oelast); | |
655 return; | |
656 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
657 } |
0 | 658 else if (integer_zerop (oelast->op)) |
659 { | |
660 if (VEC_length (operand_entry_t, *ops) != 1) | |
661 { | |
662 if (dump_file && (dump_flags & TDF_DETAILS)) | |
663 fprintf (dump_file, "Found | 0, removing\n"); | |
664 VEC_pop (operand_entry_t, *ops); | |
665 reassociate_stats.ops_eliminated++; | |
666 } | |
667 } | |
668 break; | |
669 case MULT_EXPR: | |
670 if (integer_zerop (oelast->op) | |
671 || (FLOAT_TYPE_P (type) | |
672 && !HONOR_NANS (TYPE_MODE (type)) | |
673 && !HONOR_SIGNED_ZEROS (TYPE_MODE (type)) | |
674 && real_zerop (oelast->op))) | |
675 { | |
676 if (VEC_length (operand_entry_t, *ops) != 1) | |
677 { | |
678 if (dump_file && (dump_flags & TDF_DETAILS)) | |
679 fprintf (dump_file, "Found * 0, removing all other ops\n"); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
680 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
681 reassociate_stats.ops_eliminated |
0 | 682 += VEC_length (operand_entry_t, *ops) - 1; |
683 VEC_free (operand_entry_t, heap, *ops); | |
684 *ops = NULL; | |
685 VEC_safe_push (operand_entry_t, heap, *ops, oelast); | |
686 return; | |
687 } | |
688 } | |
689 else if (integer_onep (oelast->op) | |
690 || (FLOAT_TYPE_P (type) | |
691 && !HONOR_SNANS (TYPE_MODE (type)) | |
692 && real_onep (oelast->op))) | |
693 { | |
694 if (VEC_length (operand_entry_t, *ops) != 1) | |
695 { | |
696 if (dump_file && (dump_flags & TDF_DETAILS)) | |
697 fprintf (dump_file, "Found * 1, removing\n"); | |
698 VEC_pop (operand_entry_t, *ops); | |
699 reassociate_stats.ops_eliminated++; | |
700 return; | |
701 } | |
702 } | |
703 break; | |
704 case BIT_XOR_EXPR: | |
705 case PLUS_EXPR: | |
706 case MINUS_EXPR: | |
707 if (integer_zerop (oelast->op) | |
708 || (FLOAT_TYPE_P (type) | |
709 && (opcode == PLUS_EXPR || opcode == MINUS_EXPR) | |
710 && fold_real_zero_addition_p (type, oelast->op, | |
711 opcode == MINUS_EXPR))) | |
712 { | |
713 if (VEC_length (operand_entry_t, *ops) != 1) | |
714 { | |
715 if (dump_file && (dump_flags & TDF_DETAILS)) | |
716 fprintf (dump_file, "Found [|^+] 0, removing\n"); | |
717 VEC_pop (operand_entry_t, *ops); | |
718 reassociate_stats.ops_eliminated++; | |
719 return; | |
720 } | |
721 } | |
722 break; | |
723 default: | |
724 break; | |
725 } | |
726 } | |
727 } | |
728 | |
729 | |
730 static void linearize_expr_tree (VEC(operand_entry_t, heap) **, gimple, | |
731 bool, bool); | |
732 | |
733 /* Structure for tracking and counting operands. */ | |
734 typedef struct oecount_s { | |
735 int cnt; | |
736 enum tree_code oecode; | |
737 tree op; | |
738 } oecount; | |
739 | |
740 DEF_VEC_O(oecount); | |
741 DEF_VEC_ALLOC_O(oecount,heap); | |
742 | |
743 /* The heap for the oecount hashtable and the sorted list of operands. */ | |
744 static VEC (oecount, heap) *cvec; | |
745 | |
746 /* Hash function for oecount. */ | |
747 | |
748 static hashval_t | |
749 oecount_hash (const void *p) | |
750 { | |
751 const oecount *c = VEC_index (oecount, cvec, (size_t)p - 42); | |
752 return htab_hash_pointer (c->op) ^ (hashval_t)c->oecode; | |
753 } | |
754 | |
755 /* Comparison function for oecount. */ | |
756 | |
757 static int | |
758 oecount_eq (const void *p1, const void *p2) | |
759 { | |
760 const oecount *c1 = VEC_index (oecount, cvec, (size_t)p1 - 42); | |
761 const oecount *c2 = VEC_index (oecount, cvec, (size_t)p2 - 42); | |
762 return (c1->oecode == c2->oecode | |
763 && c1->op == c2->op); | |
764 } | |
765 | |
766 /* Comparison function for qsort sorting oecount elements by count. */ | |
767 | |
768 static int | |
769 oecount_cmp (const void *p1, const void *p2) | |
770 { | |
771 const oecount *c1 = (const oecount *)p1; | |
772 const oecount *c2 = (const oecount *)p2; | |
773 return c1->cnt - c2->cnt; | |
774 } | |
775 | |
776 /* Walks the linear chain with result *DEF searching for an operation | |
777 with operand OP and code OPCODE removing that from the chain. *DEF | |
778 is updated if there is only one operand but no operation left. */ | |
779 | |
780 static void | |
781 zero_one_operation (tree *def, enum tree_code opcode, tree op) | |
782 { | |
783 gimple stmt = SSA_NAME_DEF_STMT (*def); | |
784 | |
785 do | |
786 { | |
787 tree name = gimple_assign_rhs1 (stmt); | |
788 | |
789 /* If this is the operation we look for and one of the operands | |
790 is ours simply propagate the other operand into the stmts | |
791 single use. */ | |
792 if (gimple_assign_rhs_code (stmt) == opcode | |
793 && (name == op | |
794 || gimple_assign_rhs2 (stmt) == op)) | |
795 { | |
796 gimple use_stmt; | |
797 use_operand_p use; | |
798 gimple_stmt_iterator gsi; | |
799 if (name == op) | |
800 name = gimple_assign_rhs2 (stmt); | |
801 gcc_assert (has_single_use (gimple_assign_lhs (stmt))); | |
802 single_imm_use (gimple_assign_lhs (stmt), &use, &use_stmt); | |
803 if (gimple_assign_lhs (stmt) == *def) | |
804 *def = name; | |
805 SET_USE (use, name); | |
806 if (TREE_CODE (name) != SSA_NAME) | |
807 update_stmt (use_stmt); | |
808 gsi = gsi_for_stmt (stmt); | |
809 gsi_remove (&gsi, true); | |
810 release_defs (stmt); | |
811 return; | |
812 } | |
813 | |
814 /* Continue walking the chain. */ | |
815 gcc_assert (name != op | |
816 && TREE_CODE (name) == SSA_NAME); | |
817 stmt = SSA_NAME_DEF_STMT (name); | |
818 } | |
819 while (1); | |
820 } | |
821 | |
822 /* Builds one statement performing OP1 OPCODE OP2 using TMPVAR for | |
823 the result. Places the statement after the definition of either | |
824 OP1 or OP2. Returns the new statement. */ | |
825 | |
826 static gimple | |
827 build_and_add_sum (tree tmpvar, tree op1, tree op2, enum tree_code opcode) | |
828 { | |
829 gimple op1def = NULL, op2def = NULL; | |
830 gimple_stmt_iterator gsi; | |
831 tree op; | |
832 gimple sum; | |
833 | |
834 /* Create the addition statement. */ | |
835 sum = gimple_build_assign_with_ops (opcode, tmpvar, op1, op2); | |
836 op = make_ssa_name (tmpvar, sum); | |
837 gimple_assign_set_lhs (sum, op); | |
838 | |
839 /* Find an insertion place and insert. */ | |
840 if (TREE_CODE (op1) == SSA_NAME) | |
841 op1def = SSA_NAME_DEF_STMT (op1); | |
842 if (TREE_CODE (op2) == SSA_NAME) | |
843 op2def = SSA_NAME_DEF_STMT (op2); | |
844 if ((!op1def || gimple_nop_p (op1def)) | |
845 && (!op2def || gimple_nop_p (op2def))) | |
846 { | |
847 gsi = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR)); | |
848 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); | |
849 } | |
850 else if ((!op1def || gimple_nop_p (op1def)) | |
851 || (op2def && !gimple_nop_p (op2def) | |
852 && stmt_dominates_stmt_p (op1def, op2def))) | |
853 { | |
854 if (gimple_code (op2def) == GIMPLE_PHI) | |
855 { | |
856 gsi = gsi_start_bb (gimple_bb (op2def)); | |
857 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); | |
858 } | |
859 else | |
860 { | |
861 if (!stmt_ends_bb_p (op2def)) | |
862 { | |
863 gsi = gsi_for_stmt (op2def); | |
864 gsi_insert_after (&gsi, sum, GSI_NEW_STMT); | |
865 } | |
866 else | |
867 { | |
868 edge e; | |
869 edge_iterator ei; | |
870 | |
871 FOR_EACH_EDGE (e, ei, gimple_bb (op2def)->succs) | |
872 if (e->flags & EDGE_FALLTHRU) | |
873 gsi_insert_on_edge_immediate (e, sum); | |
874 } | |
875 } | |
876 } | |
877 else | |
878 { | |
879 if (gimple_code (op1def) == GIMPLE_PHI) | |
880 { | |
881 gsi = gsi_start_bb (gimple_bb (op1def)); | |
882 gsi_insert_before (&gsi, sum, GSI_NEW_STMT); | |
883 } | |
884 else | |
885 { | |
886 if (!stmt_ends_bb_p (op1def)) | |
887 { | |
888 gsi = gsi_for_stmt (op1def); | |
889 gsi_insert_after (&gsi, sum, GSI_NEW_STMT); | |
890 } | |
891 else | |
892 { | |
893 edge e; | |
894 edge_iterator ei; | |
895 | |
896 FOR_EACH_EDGE (e, ei, gimple_bb (op1def)->succs) | |
897 if (e->flags & EDGE_FALLTHRU) | |
898 gsi_insert_on_edge_immediate (e, sum); | |
899 } | |
900 } | |
901 } | |
902 update_stmt (sum); | |
903 | |
904 return sum; | |
905 } | |
906 | |
907 /* Perform un-distribution of divisions and multiplications. | |
908 A * X + B * X is transformed into (A + B) * X and A / X + B / X | |
909 to (A + B) / X for real X. | |
910 | |
911 The algorithm is organized as follows. | |
912 | |
913 - First we walk the addition chain *OPS looking for summands that | |
914 are defined by a multiplication or a real division. This results | |
915 in the candidates bitmap with relevant indices into *OPS. | |
916 | |
917 - Second we build the chains of multiplications or divisions for | |
918 these candidates, counting the number of occurences of (operand, code) | |
919 pairs in all of the candidates chains. | |
920 | |
921 - Third we sort the (operand, code) pairs by number of occurence and | |
922 process them starting with the pair with the most uses. | |
923 | |
924 * For each such pair we walk the candidates again to build a | |
925 second candidate bitmap noting all multiplication/division chains | |
926 that have at least one occurence of (operand, code). | |
927 | |
928 * We build an alternate addition chain only covering these | |
929 candidates with one (operand, code) operation removed from their | |
930 multiplication/division chain. | |
931 | |
932 * The first candidate gets replaced by the alternate addition chain | |
933 multiplied/divided by the operand. | |
934 | |
935 * All candidate chains get disabled for further processing and | |
936 processing of (operand, code) pairs continues. | |
937 | |
938 The alternate addition chains built are re-processed by the main | |
939 reassociation algorithm which allows optimizing a * x * y + b * y * x | |
940 to (a + b ) * x * y in one invocation of the reassociation pass. */ | |
941 | |
942 static bool | |
943 undistribute_ops_list (enum tree_code opcode, | |
944 VEC (operand_entry_t, heap) **ops, struct loop *loop) | |
945 { | |
946 unsigned int length = VEC_length (operand_entry_t, *ops); | |
947 operand_entry_t oe1; | |
948 unsigned i, j; | |
949 sbitmap candidates, candidates2; | |
950 unsigned nr_candidates, nr_candidates2; | |
951 sbitmap_iterator sbi0; | |
952 VEC (operand_entry_t, heap) **subops; | |
953 htab_t ctable; | |
954 bool changed = false; | |
955 | |
956 if (length <= 1 | |
957 || opcode != PLUS_EXPR) | |
958 return false; | |
959 | |
960 /* Build a list of candidates to process. */ | |
961 candidates = sbitmap_alloc (length); | |
962 sbitmap_zero (candidates); | |
963 nr_candidates = 0; | |
964 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe1); ++i) | |
965 { | |
966 enum tree_code dcode; | |
967 gimple oe1def; | |
968 | |
969 if (TREE_CODE (oe1->op) != SSA_NAME) | |
970 continue; | |
971 oe1def = SSA_NAME_DEF_STMT (oe1->op); | |
972 if (!is_gimple_assign (oe1def)) | |
973 continue; | |
974 dcode = gimple_assign_rhs_code (oe1def); | |
975 if ((dcode != MULT_EXPR | |
976 && dcode != RDIV_EXPR) | |
977 || !is_reassociable_op (oe1def, dcode, loop)) | |
978 continue; | |
979 | |
980 SET_BIT (candidates, i); | |
981 nr_candidates++; | |
982 } | |
983 | |
984 if (nr_candidates < 2) | |
985 { | |
986 sbitmap_free (candidates); | |
987 return false; | |
988 } | |
989 | |
990 if (dump_file && (dump_flags & TDF_DETAILS)) | |
991 { | |
992 fprintf (dump_file, "searching for un-distribute opportunities "); | |
993 print_generic_expr (dump_file, | |
994 VEC_index (operand_entry_t, *ops, | |
995 sbitmap_first_set_bit (candidates))->op, 0); | |
996 fprintf (dump_file, " %d\n", nr_candidates); | |
997 } | |
998 | |
999 /* Build linearized sub-operand lists and the counting table. */ | |
1000 cvec = NULL; | |
1001 ctable = htab_create (15, oecount_hash, oecount_eq, NULL); | |
1002 subops = XCNEWVEC (VEC (operand_entry_t, heap) *, | |
1003 VEC_length (operand_entry_t, *ops)); | |
1004 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) | |
1005 { | |
1006 gimple oedef; | |
1007 enum tree_code oecode; | |
1008 unsigned j; | |
1009 | |
1010 oedef = SSA_NAME_DEF_STMT (VEC_index (operand_entry_t, *ops, i)->op); | |
1011 oecode = gimple_assign_rhs_code (oedef); | |
1012 linearize_expr_tree (&subops[i], oedef, | |
1013 associative_tree_code (oecode), false); | |
1014 | |
1015 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j) | |
1016 { | |
1017 oecount c; | |
1018 void **slot; | |
1019 size_t idx; | |
1020 c.oecode = oecode; | |
1021 c.cnt = 1; | |
1022 c.op = oe1->op; | |
1023 VEC_safe_push (oecount, heap, cvec, &c); | |
1024 idx = VEC_length (oecount, cvec) + 41; | |
1025 slot = htab_find_slot (ctable, (void *)idx, INSERT); | |
1026 if (!*slot) | |
1027 { | |
1028 *slot = (void *)idx; | |
1029 } | |
1030 else | |
1031 { | |
1032 VEC_pop (oecount, cvec); | |
1033 VEC_index (oecount, cvec, (size_t)*slot - 42)->cnt++; | |
1034 } | |
1035 } | |
1036 } | |
1037 htab_delete (ctable); | |
1038 | |
1039 /* Sort the counting table. */ | |
1040 qsort (VEC_address (oecount, cvec), VEC_length (oecount, cvec), | |
1041 sizeof (oecount), oecount_cmp); | |
1042 | |
1043 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1044 { | |
1045 oecount *c; | |
1046 fprintf (dump_file, "Candidates:\n"); | |
1047 for (j = 0; VEC_iterate (oecount, cvec, j, c); ++j) | |
1048 { | |
1049 fprintf (dump_file, " %u %s: ", c->cnt, | |
1050 c->oecode == MULT_EXPR | |
1051 ? "*" : c->oecode == RDIV_EXPR ? "/" : "?"); | |
1052 print_generic_expr (dump_file, c->op, 0); | |
1053 fprintf (dump_file, "\n"); | |
1054 } | |
1055 } | |
1056 | |
1057 /* Process the (operand, code) pairs in order of most occurence. */ | |
1058 candidates2 = sbitmap_alloc (length); | |
1059 while (!VEC_empty (oecount, cvec)) | |
1060 { | |
1061 oecount *c = VEC_last (oecount, cvec); | |
1062 if (c->cnt < 2) | |
1063 break; | |
1064 | |
1065 /* Now collect the operands in the outer chain that contain | |
1066 the common operand in their inner chain. */ | |
1067 sbitmap_zero (candidates2); | |
1068 nr_candidates2 = 0; | |
1069 EXECUTE_IF_SET_IN_SBITMAP (candidates, 0, i, sbi0) | |
1070 { | |
1071 gimple oedef; | |
1072 enum tree_code oecode; | |
1073 unsigned j; | |
1074 tree op = VEC_index (operand_entry_t, *ops, i)->op; | |
1075 | |
1076 /* If we undistributed in this chain already this may be | |
1077 a constant. */ | |
1078 if (TREE_CODE (op) != SSA_NAME) | |
1079 continue; | |
1080 | |
1081 oedef = SSA_NAME_DEF_STMT (op); | |
1082 oecode = gimple_assign_rhs_code (oedef); | |
1083 if (oecode != c->oecode) | |
1084 continue; | |
1085 | |
1086 for (j = 0; VEC_iterate (operand_entry_t, subops[i], j, oe1); ++j) | |
1087 { | |
1088 if (oe1->op == c->op) | |
1089 { | |
1090 SET_BIT (candidates2, i); | |
1091 ++nr_candidates2; | |
1092 break; | |
1093 } | |
1094 } | |
1095 } | |
1096 | |
1097 if (nr_candidates2 >= 2) | |
1098 { | |
1099 operand_entry_t oe1, oe2; | |
1100 tree tmpvar; | |
1101 gimple prod; | |
1102 int first = sbitmap_first_set_bit (candidates2); | |
1103 | |
1104 /* Build the new addition chain. */ | |
1105 oe1 = VEC_index (operand_entry_t, *ops, first); | |
1106 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1107 { | |
1108 fprintf (dump_file, "Building ("); | |
1109 print_generic_expr (dump_file, oe1->op, 0); | |
1110 } | |
1111 tmpvar = create_tmp_var (TREE_TYPE (oe1->op), NULL); | |
1112 add_referenced_var (tmpvar); | |
1113 zero_one_operation (&oe1->op, c->oecode, c->op); | |
1114 EXECUTE_IF_SET_IN_SBITMAP (candidates2, first+1, i, sbi0) | |
1115 { | |
1116 gimple sum; | |
1117 oe2 = VEC_index (operand_entry_t, *ops, i); | |
1118 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1119 { | |
1120 fprintf (dump_file, " + "); | |
1121 print_generic_expr (dump_file, oe2->op, 0); | |
1122 } | |
1123 zero_one_operation (&oe2->op, c->oecode, c->op); | |
1124 sum = build_and_add_sum (tmpvar, oe1->op, oe2->op, opcode); | |
1125 oe2->op = fold_convert (TREE_TYPE (oe2->op), integer_zero_node); | |
1126 oe2->rank = 0; | |
1127 oe1->op = gimple_get_lhs (sum); | |
1128 } | |
1129 | |
1130 /* Apply the multiplication/division. */ | |
1131 prod = build_and_add_sum (tmpvar, oe1->op, c->op, c->oecode); | |
1132 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1133 { | |
1134 fprintf (dump_file, ") %s ", c->oecode == MULT_EXPR ? "*" : "/"); | |
1135 print_generic_expr (dump_file, c->op, 0); | |
1136 fprintf (dump_file, "\n"); | |
1137 } | |
1138 | |
1139 /* Record it in the addition chain and disable further | |
1140 undistribution with this op. */ | |
1141 oe1->op = gimple_assign_lhs (prod); | |
1142 oe1->rank = get_rank (oe1->op); | |
1143 VEC_free (operand_entry_t, heap, subops[first]); | |
1144 | |
1145 changed = true; | |
1146 } | |
1147 | |
1148 VEC_pop (oecount, cvec); | |
1149 } | |
1150 | |
1151 for (i = 0; i < VEC_length (operand_entry_t, *ops); ++i) | |
1152 VEC_free (operand_entry_t, heap, subops[i]); | |
1153 free (subops); | |
1154 VEC_free (oecount, heap, cvec); | |
1155 sbitmap_free (candidates); | |
1156 sbitmap_free (candidates2); | |
1157 | |
1158 return changed; | |
1159 } | |
1160 | |
1161 | |
1162 /* Perform various identities and other optimizations on the list of | |
1163 operand entries, stored in OPS. The tree code for the binary | |
1164 operation between all the operands is OPCODE. */ | |
1165 | |
1166 static void | |
1167 optimize_ops_list (enum tree_code opcode, | |
1168 VEC (operand_entry_t, heap) **ops) | |
1169 { | |
1170 unsigned int length = VEC_length (operand_entry_t, *ops); | |
1171 unsigned int i; | |
1172 operand_entry_t oe; | |
1173 operand_entry_t oelast = NULL; | |
1174 bool iterate = false; | |
1175 | |
1176 if (length == 1) | |
1177 return; | |
1178 | |
1179 oelast = VEC_last (operand_entry_t, *ops); | |
1180 | |
1181 /* If the last two are constants, pop the constants off, merge them | |
1182 and try the next two. */ | |
1183 if (oelast->rank == 0 && is_gimple_min_invariant (oelast->op)) | |
1184 { | |
1185 operand_entry_t oelm1 = VEC_index (operand_entry_t, *ops, length - 2); | |
1186 | |
1187 if (oelm1->rank == 0 | |
1188 && is_gimple_min_invariant (oelm1->op) | |
1189 && useless_type_conversion_p (TREE_TYPE (oelm1->op), | |
1190 TREE_TYPE (oelast->op))) | |
1191 { | |
1192 tree folded = fold_binary (opcode, TREE_TYPE (oelm1->op), | |
1193 oelm1->op, oelast->op); | |
1194 | |
1195 if (folded && is_gimple_min_invariant (folded)) | |
1196 { | |
1197 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1198 fprintf (dump_file, "Merging constants\n"); | |
1199 | |
1200 VEC_pop (operand_entry_t, *ops); | |
1201 VEC_pop (operand_entry_t, *ops); | |
1202 | |
1203 add_to_ops_vec (ops, folded); | |
1204 reassociate_stats.constants_eliminated++; | |
1205 | |
1206 optimize_ops_list (opcode, ops); | |
1207 return; | |
1208 } | |
1209 } | |
1210 } | |
1211 | |
1212 eliminate_using_constants (opcode, ops); | |
1213 oelast = NULL; | |
1214 | |
1215 for (i = 0; VEC_iterate (operand_entry_t, *ops, i, oe);) | |
1216 { | |
1217 bool done = false; | |
1218 | |
1219 if (eliminate_not_pairs (opcode, ops, i, oe)) | |
1220 return; | |
1221 if (eliminate_duplicate_pair (opcode, ops, &done, i, oe, oelast) | |
1222 || (!done && eliminate_plus_minus_pair (opcode, ops, i, oe))) | |
1223 { | |
1224 if (done) | |
1225 return; | |
1226 iterate = true; | |
1227 oelast = NULL; | |
1228 continue; | |
1229 } | |
1230 oelast = oe; | |
1231 i++; | |
1232 } | |
1233 | |
1234 length = VEC_length (operand_entry_t, *ops); | |
1235 oelast = VEC_last (operand_entry_t, *ops); | |
1236 | |
1237 if (iterate) | |
1238 optimize_ops_list (opcode, ops); | |
1239 } | |
1240 | |
1241 /* Return true if OPERAND is defined by a PHI node which uses the LHS | |
1242 of STMT in it's operands. This is also known as a "destructive | |
1243 update" operation. */ | |
1244 | |
1245 static bool | |
1246 is_phi_for_stmt (gimple stmt, tree operand) | |
1247 { | |
1248 gimple def_stmt; | |
1249 tree lhs; | |
1250 use_operand_p arg_p; | |
1251 ssa_op_iter i; | |
1252 | |
1253 if (TREE_CODE (operand) != SSA_NAME) | |
1254 return false; | |
1255 | |
1256 lhs = gimple_assign_lhs (stmt); | |
1257 | |
1258 def_stmt = SSA_NAME_DEF_STMT (operand); | |
1259 if (gimple_code (def_stmt) != GIMPLE_PHI) | |
1260 return false; | |
1261 | |
1262 FOR_EACH_PHI_ARG (arg_p, def_stmt, i, SSA_OP_USE) | |
1263 if (lhs == USE_FROM_PTR (arg_p)) | |
1264 return true; | |
1265 return false; | |
1266 } | |
1267 | |
1268 /* Remove def stmt of VAR if VAR has zero uses and recurse | |
1269 on rhs1 operand if so. */ | |
1270 | |
1271 static void | |
1272 remove_visited_stmt_chain (tree var) | |
1273 { | |
1274 gimple stmt; | |
1275 gimple_stmt_iterator gsi; | |
1276 | |
1277 while (1) | |
1278 { | |
1279 if (TREE_CODE (var) != SSA_NAME || !has_zero_uses (var)) | |
1280 return; | |
1281 stmt = SSA_NAME_DEF_STMT (var); | |
1282 if (!is_gimple_assign (stmt) | |
1283 || !gimple_visited_p (stmt)) | |
1284 return; | |
1285 var = gimple_assign_rhs1 (stmt); | |
1286 gsi = gsi_for_stmt (stmt); | |
1287 gsi_remove (&gsi, true); | |
1288 release_defs (stmt); | |
1289 } | |
1290 } | |
1291 | |
1292 /* Recursively rewrite our linearized statements so that the operators | |
1293 match those in OPS[OPINDEX], putting the computation in rank | |
1294 order. */ | |
1295 | |
1296 static void | |
1297 rewrite_expr_tree (gimple stmt, unsigned int opindex, | |
1298 VEC(operand_entry_t, heap) * ops, bool moved) | |
1299 { | |
1300 tree rhs1 = gimple_assign_rhs1 (stmt); | |
1301 tree rhs2 = gimple_assign_rhs2 (stmt); | |
1302 operand_entry_t oe; | |
1303 | |
1304 /* If we have three operands left, then we want to make sure the one | |
1305 that gets the double binary op are the ones with the same rank. | |
1306 | |
1307 The alternative we try is to see if this is a destructive | |
1308 update style statement, which is like: | |
1309 b = phi (a, ...) | |
1310 a = c + b; | |
1311 In that case, we want to use the destructive update form to | |
1312 expose the possible vectorizer sum reduction opportunity. | |
1313 In that case, the third operand will be the phi node. | |
1314 | |
1315 We could, of course, try to be better as noted above, and do a | |
1316 lot of work to try to find these opportunities in >3 operand | |
1317 cases, but it is unlikely to be worth it. */ | |
1318 if (opindex + 3 == VEC_length (operand_entry_t, ops)) | |
1319 { | |
1320 operand_entry_t oe1, oe2, oe3; | |
1321 | |
1322 oe1 = VEC_index (operand_entry_t, ops, opindex); | |
1323 oe2 = VEC_index (operand_entry_t, ops, opindex + 1); | |
1324 oe3 = VEC_index (operand_entry_t, ops, opindex + 2); | |
1325 | |
1326 if ((oe1->rank == oe2->rank | |
1327 && oe2->rank != oe3->rank) | |
1328 || (is_phi_for_stmt (stmt, oe3->op) | |
1329 && !is_phi_for_stmt (stmt, oe1->op) | |
1330 && !is_phi_for_stmt (stmt, oe2->op))) | |
1331 { | |
1332 struct operand_entry temp = *oe3; | |
1333 oe3->op = oe1->op; | |
1334 oe3->rank = oe1->rank; | |
1335 oe1->op = temp.op; | |
1336 oe1->rank= temp.rank; | |
1337 } | |
1338 else if ((oe1->rank == oe3->rank | |
1339 && oe2->rank != oe3->rank) | |
1340 || (is_phi_for_stmt (stmt, oe2->op) | |
1341 && !is_phi_for_stmt (stmt, oe1->op) | |
1342 && !is_phi_for_stmt (stmt, oe3->op))) | |
1343 { | |
1344 struct operand_entry temp = *oe2; | |
1345 oe2->op = oe1->op; | |
1346 oe2->rank = oe1->rank; | |
1347 oe1->op = temp.op; | |
1348 oe1->rank= temp.rank; | |
1349 } | |
1350 } | |
1351 | |
1352 /* The final recursion case for this function is that you have | |
1353 exactly two operations left. | |
1354 If we had one exactly one op in the entire list to start with, we | |
1355 would have never called this function, and the tail recursion | |
1356 rewrites them one at a time. */ | |
1357 if (opindex + 2 == VEC_length (operand_entry_t, ops)) | |
1358 { | |
1359 operand_entry_t oe1, oe2; | |
1360 | |
1361 oe1 = VEC_index (operand_entry_t, ops, opindex); | |
1362 oe2 = VEC_index (operand_entry_t, ops, opindex + 1); | |
1363 | |
1364 if (rhs1 != oe1->op || rhs2 != oe2->op) | |
1365 { | |
1366 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1367 { | |
1368 fprintf (dump_file, "Transforming "); | |
1369 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1370 } | |
1371 | |
1372 gimple_assign_set_rhs1 (stmt, oe1->op); | |
1373 gimple_assign_set_rhs2 (stmt, oe2->op); | |
1374 update_stmt (stmt); | |
1375 if (rhs1 != oe1->op && rhs1 != oe2->op) | |
1376 remove_visited_stmt_chain (rhs1); | |
1377 | |
1378 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1379 { | |
1380 fprintf (dump_file, " into "); | |
1381 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1382 } | |
1383 | |
1384 } | |
1385 return; | |
1386 } | |
1387 | |
1388 /* If we hit here, we should have 3 or more ops left. */ | |
1389 gcc_assert (opindex + 2 < VEC_length (operand_entry_t, ops)); | |
1390 | |
1391 /* Rewrite the next operator. */ | |
1392 oe = VEC_index (operand_entry_t, ops, opindex); | |
1393 | |
1394 if (oe->op != rhs2) | |
1395 { | |
1396 if (!moved) | |
1397 { | |
1398 gimple_stmt_iterator gsinow, gsirhs1; | |
1399 gimple stmt1 = stmt, stmt2; | |
1400 unsigned int count; | |
1401 | |
1402 gsinow = gsi_for_stmt (stmt); | |
1403 count = VEC_length (operand_entry_t, ops) - opindex - 2; | |
1404 while (count-- != 0) | |
1405 { | |
1406 stmt2 = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt1)); | |
1407 gsirhs1 = gsi_for_stmt (stmt2); | |
1408 gsi_move_before (&gsirhs1, &gsinow); | |
1409 gsi_prev (&gsinow); | |
1410 stmt1 = stmt2; | |
1411 } | |
1412 moved = true; | |
1413 } | |
1414 | |
1415 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1416 { | |
1417 fprintf (dump_file, "Transforming "); | |
1418 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1419 } | |
1420 | |
1421 gimple_assign_set_rhs2 (stmt, oe->op); | |
1422 update_stmt (stmt); | |
1423 | |
1424 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1425 { | |
1426 fprintf (dump_file, " into "); | |
1427 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1428 } | |
1429 } | |
1430 /* Recurse on the LHS of the binary operator, which is guaranteed to | |
1431 be the non-leaf side. */ | |
1432 rewrite_expr_tree (SSA_NAME_DEF_STMT (rhs1), opindex + 1, ops, moved); | |
1433 } | |
1434 | |
1435 /* Transform STMT, which is really (A +B) + (C + D) into the left | |
1436 linear form, ((A+B)+C)+D. | |
1437 Recurse on D if necessary. */ | |
1438 | |
1439 static void | |
1440 linearize_expr (gimple stmt) | |
1441 { | |
1442 gimple_stmt_iterator gsinow, gsirhs; | |
1443 gimple binlhs = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (stmt)); | |
1444 gimple binrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
1445 enum tree_code rhscode = gimple_assign_rhs_code (stmt); | |
1446 gimple newbinrhs = NULL; | |
1447 struct loop *loop = loop_containing_stmt (stmt); | |
1448 | |
1449 gcc_assert (is_reassociable_op (binlhs, rhscode, loop) | |
1450 && is_reassociable_op (binrhs, rhscode, loop)); | |
1451 | |
1452 gsinow = gsi_for_stmt (stmt); | |
1453 gsirhs = gsi_for_stmt (binrhs); | |
1454 gsi_move_before (&gsirhs, &gsinow); | |
1455 | |
1456 gimple_assign_set_rhs2 (stmt, gimple_assign_rhs1 (binrhs)); | |
1457 gimple_assign_set_rhs1 (binrhs, gimple_assign_lhs (binlhs)); | |
1458 gimple_assign_set_rhs1 (stmt, gimple_assign_lhs (binrhs)); | |
1459 | |
1460 if (TREE_CODE (gimple_assign_rhs2 (stmt)) == SSA_NAME) | |
1461 newbinrhs = SSA_NAME_DEF_STMT (gimple_assign_rhs2 (stmt)); | |
1462 | |
1463 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1464 { | |
1465 fprintf (dump_file, "Linearized: "); | |
1466 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1467 } | |
1468 | |
1469 reassociate_stats.linearized++; | |
1470 update_stmt (binrhs); | |
1471 update_stmt (binlhs); | |
1472 update_stmt (stmt); | |
1473 | |
1474 gimple_set_visited (stmt, true); | |
1475 gimple_set_visited (binlhs, true); | |
1476 gimple_set_visited (binrhs, true); | |
1477 | |
1478 /* Tail recurse on the new rhs if it still needs reassociation. */ | |
1479 if (newbinrhs && is_reassociable_op (newbinrhs, rhscode, loop)) | |
1480 /* ??? This should probably be linearize_expr (newbinrhs) but I don't | |
1481 want to change the algorithm while converting to tuples. */ | |
1482 linearize_expr (stmt); | |
1483 } | |
1484 | |
1485 /* If LHS has a single immediate use that is a GIMPLE_ASSIGN statement, return | |
1486 it. Otherwise, return NULL. */ | |
1487 | |
1488 static gimple | |
1489 get_single_immediate_use (tree lhs) | |
1490 { | |
1491 use_operand_p immuse; | |
1492 gimple immusestmt; | |
1493 | |
1494 if (TREE_CODE (lhs) == SSA_NAME | |
1495 && single_imm_use (lhs, &immuse, &immusestmt) | |
1496 && is_gimple_assign (immusestmt)) | |
1497 return immusestmt; | |
1498 | |
1499 return NULL; | |
1500 } | |
1501 | |
1502 static VEC(tree, heap) *broken_up_subtracts; | |
1503 | |
1504 /* Recursively negate the value of TONEGATE, and return the SSA_NAME | |
1505 representing the negated value. Insertions of any necessary | |
1506 instructions go before GSI. | |
1507 This function is recursive in that, if you hand it "a_5" as the | |
1508 value to negate, and a_5 is defined by "a_5 = b_3 + b_4", it will | |
1509 transform b_3 + b_4 into a_5 = -b_3 + -b_4. */ | |
1510 | |
1511 static tree | |
1512 negate_value (tree tonegate, gimple_stmt_iterator *gsi) | |
1513 { | |
1514 gimple negatedefstmt= NULL; | |
1515 tree resultofnegate; | |
1516 | |
1517 /* If we are trying to negate a name, defined by an add, negate the | |
1518 add operands instead. */ | |
1519 if (TREE_CODE (tonegate) == SSA_NAME) | |
1520 negatedefstmt = SSA_NAME_DEF_STMT (tonegate); | |
1521 if (TREE_CODE (tonegate) == SSA_NAME | |
1522 && is_gimple_assign (negatedefstmt) | |
1523 && TREE_CODE (gimple_assign_lhs (negatedefstmt)) == SSA_NAME | |
1524 && has_single_use (gimple_assign_lhs (negatedefstmt)) | |
1525 && gimple_assign_rhs_code (negatedefstmt) == PLUS_EXPR) | |
1526 { | |
1527 gimple_stmt_iterator gsi; | |
1528 tree rhs1 = gimple_assign_rhs1 (negatedefstmt); | |
1529 tree rhs2 = gimple_assign_rhs2 (negatedefstmt); | |
1530 | |
1531 gsi = gsi_for_stmt (negatedefstmt); | |
1532 rhs1 = negate_value (rhs1, &gsi); | |
1533 gimple_assign_set_rhs1 (negatedefstmt, rhs1); | |
1534 | |
1535 gsi = gsi_for_stmt (negatedefstmt); | |
1536 rhs2 = negate_value (rhs2, &gsi); | |
1537 gimple_assign_set_rhs2 (negatedefstmt, rhs2); | |
1538 | |
1539 update_stmt (negatedefstmt); | |
1540 return gimple_assign_lhs (negatedefstmt); | |
1541 } | |
1542 | |
1543 tonegate = fold_build1 (NEGATE_EXPR, TREE_TYPE (tonegate), tonegate); | |
1544 resultofnegate = force_gimple_operand_gsi (gsi, tonegate, true, | |
1545 NULL_TREE, true, GSI_SAME_STMT); | |
1546 VEC_safe_push (tree, heap, broken_up_subtracts, resultofnegate); | |
1547 return resultofnegate; | |
1548 } | |
1549 | |
1550 /* Return true if we should break up the subtract in STMT into an add | |
1551 with negate. This is true when we the subtract operands are really | |
1552 adds, or the subtract itself is used in an add expression. In | |
1553 either case, breaking up the subtract into an add with negate | |
1554 exposes the adds to reassociation. */ | |
1555 | |
1556 static bool | |
1557 should_break_up_subtract (gimple stmt) | |
1558 { | |
1559 tree lhs = gimple_assign_lhs (stmt); | |
1560 tree binlhs = gimple_assign_rhs1 (stmt); | |
1561 tree binrhs = gimple_assign_rhs2 (stmt); | |
1562 gimple immusestmt; | |
1563 struct loop *loop = loop_containing_stmt (stmt); | |
1564 | |
1565 if (TREE_CODE (binlhs) == SSA_NAME | |
1566 && is_reassociable_op (SSA_NAME_DEF_STMT (binlhs), PLUS_EXPR, loop)) | |
1567 return true; | |
1568 | |
1569 if (TREE_CODE (binrhs) == SSA_NAME | |
1570 && is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), PLUS_EXPR, loop)) | |
1571 return true; | |
1572 | |
1573 if (TREE_CODE (lhs) == SSA_NAME | |
1574 && (immusestmt = get_single_immediate_use (lhs)) | |
1575 && is_gimple_assign (immusestmt) | |
1576 && (gimple_assign_rhs_code (immusestmt) == PLUS_EXPR | |
1577 || gimple_assign_rhs_code (immusestmt) == MULT_EXPR)) | |
1578 return true; | |
1579 return false; | |
1580 } | |
1581 | |
1582 /* Transform STMT from A - B into A + -B. */ | |
1583 | |
1584 static void | |
1585 break_up_subtract (gimple stmt, gimple_stmt_iterator *gsip) | |
1586 { | |
1587 tree rhs1 = gimple_assign_rhs1 (stmt); | |
1588 tree rhs2 = gimple_assign_rhs2 (stmt); | |
1589 | |
1590 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1591 { | |
1592 fprintf (dump_file, "Breaking up subtract "); | |
1593 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1594 } | |
1595 | |
1596 rhs2 = negate_value (rhs2, gsip); | |
1597 gimple_assign_set_rhs_with_ops (gsip, PLUS_EXPR, rhs1, rhs2); | |
1598 update_stmt (stmt); | |
1599 } | |
1600 | |
1601 /* Recursively linearize a binary expression that is the RHS of STMT. | |
1602 Place the operands of the expression tree in the vector named OPS. */ | |
1603 | |
1604 static void | |
1605 linearize_expr_tree (VEC(operand_entry_t, heap) **ops, gimple stmt, | |
1606 bool is_associative, bool set_visited) | |
1607 { | |
1608 tree binlhs = gimple_assign_rhs1 (stmt); | |
1609 tree binrhs = gimple_assign_rhs2 (stmt); | |
1610 gimple binlhsdef, binrhsdef; | |
1611 bool binlhsisreassoc = false; | |
1612 bool binrhsisreassoc = false; | |
1613 enum tree_code rhscode = gimple_assign_rhs_code (stmt); | |
1614 struct loop *loop = loop_containing_stmt (stmt); | |
1615 | |
1616 if (set_visited) | |
1617 gimple_set_visited (stmt, true); | |
1618 | |
1619 if (TREE_CODE (binlhs) == SSA_NAME) | |
1620 { | |
1621 binlhsdef = SSA_NAME_DEF_STMT (binlhs); | |
1622 binlhsisreassoc = is_reassociable_op (binlhsdef, rhscode, loop); | |
1623 } | |
1624 | |
1625 if (TREE_CODE (binrhs) == SSA_NAME) | |
1626 { | |
1627 binrhsdef = SSA_NAME_DEF_STMT (binrhs); | |
1628 binrhsisreassoc = is_reassociable_op (binrhsdef, rhscode, loop); | |
1629 } | |
1630 | |
1631 /* If the LHS is not reassociable, but the RHS is, we need to swap | |
1632 them. If neither is reassociable, there is nothing we can do, so | |
1633 just put them in the ops vector. If the LHS is reassociable, | |
1634 linearize it. If both are reassociable, then linearize the RHS | |
1635 and the LHS. */ | |
1636 | |
1637 if (!binlhsisreassoc) | |
1638 { | |
1639 tree temp; | |
1640 | |
1641 /* If this is not a associative operation like division, give up. */ | |
1642 if (!is_associative) | |
1643 { | |
1644 add_to_ops_vec (ops, binrhs); | |
1645 return; | |
1646 } | |
1647 | |
1648 if (!binrhsisreassoc) | |
1649 { | |
1650 add_to_ops_vec (ops, binrhs); | |
1651 add_to_ops_vec (ops, binlhs); | |
1652 return; | |
1653 } | |
1654 | |
1655 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1656 { | |
1657 fprintf (dump_file, "swapping operands of "); | |
1658 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1659 } | |
1660 | |
1661 swap_tree_operands (stmt, | |
1662 gimple_assign_rhs1_ptr (stmt), | |
1663 gimple_assign_rhs2_ptr (stmt)); | |
1664 update_stmt (stmt); | |
1665 | |
1666 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1667 { | |
1668 fprintf (dump_file, " is now "); | |
1669 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1670 } | |
1671 | |
1672 /* We want to make it so the lhs is always the reassociative op, | |
1673 so swap. */ | |
1674 temp = binlhs; | |
1675 binlhs = binrhs; | |
1676 binrhs = temp; | |
1677 } | |
1678 else if (binrhsisreassoc) | |
1679 { | |
1680 linearize_expr (stmt); | |
1681 binlhs = gimple_assign_rhs1 (stmt); | |
1682 binrhs = gimple_assign_rhs2 (stmt); | |
1683 } | |
1684 | |
1685 gcc_assert (TREE_CODE (binrhs) != SSA_NAME | |
1686 || !is_reassociable_op (SSA_NAME_DEF_STMT (binrhs), | |
1687 rhscode, loop)); | |
1688 linearize_expr_tree (ops, SSA_NAME_DEF_STMT (binlhs), | |
1689 is_associative, set_visited); | |
1690 add_to_ops_vec (ops, binrhs); | |
1691 } | |
1692 | |
1693 /* Repropagate the negates back into subtracts, since no other pass | |
1694 currently does it. */ | |
1695 | |
1696 static void | |
1697 repropagate_negates (void) | |
1698 { | |
1699 unsigned int i = 0; | |
1700 tree negate; | |
1701 | |
1702 for (i = 0; VEC_iterate (tree, broken_up_subtracts, i, negate); i++) | |
1703 { | |
1704 gimple user = get_single_immediate_use (negate); | |
1705 | |
1706 /* The negate operand can be either operand of a PLUS_EXPR | |
1707 (it can be the LHS if the RHS is a constant for example). | |
1708 | |
1709 Force the negate operand to the RHS of the PLUS_EXPR, then | |
1710 transform the PLUS_EXPR into a MINUS_EXPR. */ | |
1711 if (user | |
1712 && is_gimple_assign (user) | |
1713 && gimple_assign_rhs_code (user) == PLUS_EXPR) | |
1714 { | |
1715 /* If the negated operand appears on the LHS of the | |
1716 PLUS_EXPR, exchange the operands of the PLUS_EXPR | |
1717 to force the negated operand to the RHS of the PLUS_EXPR. */ | |
1718 if (gimple_assign_rhs1 (user) == negate) | |
1719 { | |
1720 swap_tree_operands (user, | |
1721 gimple_assign_rhs1_ptr (user), | |
1722 gimple_assign_rhs2_ptr (user)); | |
1723 } | |
1724 | |
1725 /* Now transform the PLUS_EXPR into a MINUS_EXPR and replace | |
1726 the RHS of the PLUS_EXPR with the operand of the NEGATE_EXPR. */ | |
1727 if (gimple_assign_rhs2 (user) == negate) | |
1728 { | |
1729 tree rhs1 = gimple_assign_rhs1 (user); | |
1730 tree rhs2 = get_unary_op (negate, NEGATE_EXPR); | |
1731 gimple_stmt_iterator gsi = gsi_for_stmt (user); | |
1732 gimple_assign_set_rhs_with_ops (&gsi, MINUS_EXPR, rhs1, rhs2); | |
1733 update_stmt (user); | |
1734 } | |
1735 } | |
1736 } | |
1737 } | |
1738 | |
1739 /* Break up subtract operations in block BB. | |
1740 | |
1741 We do this top down because we don't know whether the subtract is | |
1742 part of a possible chain of reassociation except at the top. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1743 |
0 | 1744 IE given |
1745 d = f + g | |
1746 c = a + e | |
1747 b = c - d | |
1748 q = b - r | |
1749 k = t - q | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1750 |
0 | 1751 we want to break up k = t - q, but we won't until we've transformed q |
1752 = b - r, which won't be broken up until we transform b = c - d. | |
1753 | |
1754 En passant, clear the GIMPLE visited flag on every statement. */ | |
1755 | |
1756 static void | |
1757 break_up_subtract_bb (basic_block bb) | |
1758 { | |
1759 gimple_stmt_iterator gsi; | |
1760 basic_block son; | |
1761 | |
1762 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1763 { | |
1764 gimple stmt = gsi_stmt (gsi); | |
1765 gimple_set_visited (stmt, false); | |
1766 | |
1767 /* Look for simple gimple subtract operations. */ | |
1768 if (is_gimple_assign (stmt) | |
1769 && gimple_assign_rhs_code (stmt) == MINUS_EXPR) | |
1770 { | |
1771 tree lhs = gimple_assign_lhs (stmt); | |
1772 tree rhs1 = gimple_assign_rhs1 (stmt); | |
1773 tree rhs2 = gimple_assign_rhs2 (stmt); | |
1774 | |
1775 /* If associative-math we can do reassociation for | |
1776 non-integral types. Or, we can do reassociation for | |
1777 non-saturating fixed-point types. */ | |
1778 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
1779 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) | |
1780 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2))) | |
1781 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)) | |
1782 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1)) | |
1783 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2)) | |
1784 || !flag_associative_math) | |
1785 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs)) | |
1786 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1)) | |
1787 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2)))) | |
1788 continue; | |
1789 | |
1790 /* Check for a subtract used only in an addition. If this | |
1791 is the case, transform it into add of a negate for better | |
1792 reassociation. IE transform C = A-B into C = A + -B if C | |
1793 is only used in an addition. */ | |
1794 if (should_break_up_subtract (stmt)) | |
1795 break_up_subtract (stmt, &gsi); | |
1796 } | |
1797 } | |
1798 for (son = first_dom_son (CDI_DOMINATORS, bb); | |
1799 son; | |
1800 son = next_dom_son (CDI_DOMINATORS, son)) | |
1801 break_up_subtract_bb (son); | |
1802 } | |
1803 | |
1804 /* Reassociate expressions in basic block BB and its post-dominator as | |
1805 children. */ | |
1806 | |
1807 static void | |
1808 reassociate_bb (basic_block bb) | |
1809 { | |
1810 gimple_stmt_iterator gsi; | |
1811 basic_block son; | |
1812 | |
1813 for (gsi = gsi_last_bb (bb); !gsi_end_p (gsi); gsi_prev (&gsi)) | |
1814 { | |
1815 gimple stmt = gsi_stmt (gsi); | |
1816 | |
1817 if (is_gimple_assign (stmt)) | |
1818 { | |
1819 tree lhs, rhs1, rhs2; | |
1820 enum tree_code rhs_code = gimple_assign_rhs_code (stmt); | |
1821 | |
1822 /* If this is not a gimple binary expression, there is | |
1823 nothing for us to do with it. */ | |
1824 if (get_gimple_rhs_class (rhs_code) != GIMPLE_BINARY_RHS) | |
1825 continue; | |
1826 | |
1827 /* If this was part of an already processed statement, | |
1828 we don't need to touch it again. */ | |
1829 if (gimple_visited_p (stmt)) | |
1830 { | |
1831 /* This statement might have become dead because of previous | |
1832 reassociations. */ | |
1833 if (has_zero_uses (gimple_get_lhs (stmt))) | |
1834 { | |
1835 gsi_remove (&gsi, true); | |
1836 release_defs (stmt); | |
1837 /* We might end up removing the last stmt above which | |
1838 places the iterator to the end of the sequence. | |
1839 Reset it to the last stmt in this case which might | |
1840 be the end of the sequence as well if we removed | |
1841 the last statement of the sequence. In which case | |
1842 we need to bail out. */ | |
1843 if (gsi_end_p (gsi)) | |
1844 { | |
1845 gsi = gsi_last_bb (bb); | |
1846 if (gsi_end_p (gsi)) | |
1847 break; | |
1848 } | |
1849 } | |
1850 continue; | |
1851 } | |
1852 | |
1853 lhs = gimple_assign_lhs (stmt); | |
1854 rhs1 = gimple_assign_rhs1 (stmt); | |
1855 rhs2 = gimple_assign_rhs2 (stmt); | |
1856 | |
1857 /* If associative-math we can do reassociation for | |
1858 non-integral types. Or, we can do reassociation for | |
1859 non-saturating fixed-point types. */ | |
1860 if ((!INTEGRAL_TYPE_P (TREE_TYPE (lhs)) | |
1861 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs1)) | |
1862 || !INTEGRAL_TYPE_P (TREE_TYPE (rhs2))) | |
1863 && (!SCALAR_FLOAT_TYPE_P (TREE_TYPE (lhs)) | |
1864 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs1)) | |
1865 || !SCALAR_FLOAT_TYPE_P (TREE_TYPE(rhs2)) | |
1866 || !flag_associative_math) | |
1867 && (!NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE (lhs)) | |
1868 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs1)) | |
1869 || !NON_SAT_FIXED_POINT_TYPE_P (TREE_TYPE(rhs2)))) | |
1870 continue; | |
1871 | |
1872 if (associative_tree_code (rhs_code)) | |
1873 { | |
1874 VEC(operand_entry_t, heap) *ops = NULL; | |
1875 | |
1876 /* There may be no immediate uses left by the time we | |
1877 get here because we may have eliminated them all. */ | |
1878 if (TREE_CODE (lhs) == SSA_NAME && has_zero_uses (lhs)) | |
1879 continue; | |
1880 | |
1881 gimple_set_visited (stmt, true); | |
1882 linearize_expr_tree (&ops, stmt, true, true); | |
1883 qsort (VEC_address (operand_entry_t, ops), | |
1884 VEC_length (operand_entry_t, ops), | |
1885 sizeof (operand_entry_t), | |
1886 sort_by_operand_rank); | |
1887 optimize_ops_list (rhs_code, &ops); | |
1888 if (undistribute_ops_list (rhs_code, &ops, | |
1889 loop_containing_stmt (stmt))) | |
1890 { | |
1891 qsort (VEC_address (operand_entry_t, ops), | |
1892 VEC_length (operand_entry_t, ops), | |
1893 sizeof (operand_entry_t), | |
1894 sort_by_operand_rank); | |
1895 optimize_ops_list (rhs_code, &ops); | |
1896 } | |
1897 | |
1898 if (VEC_length (operand_entry_t, ops) == 1) | |
1899 { | |
1900 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1901 { | |
1902 fprintf (dump_file, "Transforming "); | |
1903 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1904 } | |
1905 | |
1906 rhs1 = gimple_assign_rhs1 (stmt); | |
1907 gimple_assign_set_rhs_from_tree (&gsi, | |
1908 VEC_last (operand_entry_t, | |
1909 ops)->op); | |
1910 update_stmt (stmt); | |
1911 remove_visited_stmt_chain (rhs1); | |
1912 | |
1913 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1914 { | |
1915 fprintf (dump_file, " into "); | |
1916 print_gimple_stmt (dump_file, stmt, 0, 0); | |
1917 } | |
1918 } | |
1919 else | |
1920 rewrite_expr_tree (stmt, 0, ops, false); | |
1921 | |
1922 VEC_free (operand_entry_t, heap, ops); | |
1923 } | |
1924 } | |
1925 } | |
1926 for (son = first_dom_son (CDI_POST_DOMINATORS, bb); | |
1927 son; | |
1928 son = next_dom_son (CDI_POST_DOMINATORS, son)) | |
1929 reassociate_bb (son); | |
1930 } | |
1931 | |
1932 void dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops); | |
1933 void debug_ops_vector (VEC (operand_entry_t, heap) *ops); | |
1934 | |
1935 /* Dump the operand entry vector OPS to FILE. */ | |
1936 | |
1937 void | |
1938 dump_ops_vector (FILE *file, VEC (operand_entry_t, heap) *ops) | |
1939 { | |
1940 operand_entry_t oe; | |
1941 unsigned int i; | |
1942 | |
1943 for (i = 0; VEC_iterate (operand_entry_t, ops, i, oe); i++) | |
1944 { | |
1945 fprintf (file, "Op %d -> rank: %d, tree: ", i, oe->rank); | |
1946 print_generic_expr (file, oe->op, 0); | |
1947 } | |
1948 } | |
1949 | |
1950 /* Dump the operand entry vector OPS to STDERR. */ | |
1951 | |
1952 void | |
1953 debug_ops_vector (VEC (operand_entry_t, heap) *ops) | |
1954 { | |
1955 dump_ops_vector (stderr, ops); | |
1956 } | |
1957 | |
1958 static void | |
1959 do_reassoc (void) | |
1960 { | |
1961 break_up_subtract_bb (ENTRY_BLOCK_PTR); | |
1962 reassociate_bb (EXIT_BLOCK_PTR); | |
1963 } | |
1964 | |
1965 /* Initialize the reassociation pass. */ | |
1966 | |
1967 static void | |
1968 init_reassoc (void) | |
1969 { | |
1970 int i; | |
1971 long rank = 2; | |
1972 tree param; | |
1973 int *bbs = XNEWVEC (int, last_basic_block + 1); | |
1974 | |
1975 /* Find the loops, so that we can prevent moving calculations in | |
1976 them. */ | |
1977 loop_optimizer_init (AVOID_CFG_MODIFICATIONS); | |
1978 | |
1979 memset (&reassociate_stats, 0, sizeof (reassociate_stats)); | |
1980 | |
1981 operand_entry_pool = create_alloc_pool ("operand entry pool", | |
1982 sizeof (struct operand_entry), 30); | |
1983 | |
1984 /* Reverse RPO (Reverse Post Order) will give us something where | |
1985 deeper loops come later. */ | |
1986 pre_and_rev_post_order_compute (NULL, bbs, false); | |
1987 bb_rank = XCNEWVEC (long, last_basic_block + 1); | |
1988 operand_rank = pointer_map_create (); | |
1989 | |
1990 /* Give each argument a distinct rank. */ | |
1991 for (param = DECL_ARGUMENTS (current_function_decl); | |
1992 param; | |
1993 param = TREE_CHAIN (param)) | |
1994 { | |
1995 if (gimple_default_def (cfun, param) != NULL) | |
1996 { | |
1997 tree def = gimple_default_def (cfun, param); | |
1998 insert_operand_rank (def, ++rank); | |
1999 } | |
2000 } | |
2001 | |
2002 /* Give the chain decl a distinct rank. */ | |
2003 if (cfun->static_chain_decl != NULL) | |
2004 { | |
2005 tree def = gimple_default_def (cfun, cfun->static_chain_decl); | |
2006 if (def != NULL) | |
2007 insert_operand_rank (def, ++rank); | |
2008 } | |
2009 | |
2010 /* Set up rank for each BB */ | |
2011 for (i = 0; i < n_basic_blocks - NUM_FIXED_BLOCKS; i++) | |
2012 bb_rank[bbs[i]] = ++rank << 16; | |
2013 | |
2014 free (bbs); | |
2015 calculate_dominance_info (CDI_POST_DOMINATORS); | |
2016 broken_up_subtracts = NULL; | |
2017 } | |
2018 | |
2019 /* Cleanup after the reassociation pass, and print stats if | |
2020 requested. */ | |
2021 | |
2022 static void | |
2023 fini_reassoc (void) | |
2024 { | |
2025 statistics_counter_event (cfun, "Linearized", | |
2026 reassociate_stats.linearized); | |
2027 statistics_counter_event (cfun, "Constants eliminated", | |
2028 reassociate_stats.constants_eliminated); | |
2029 statistics_counter_event (cfun, "Ops eliminated", | |
2030 reassociate_stats.ops_eliminated); | |
2031 statistics_counter_event (cfun, "Statements rewritten", | |
2032 reassociate_stats.rewritten); | |
2033 | |
2034 pointer_map_destroy (operand_rank); | |
2035 free_alloc_pool (operand_entry_pool); | |
2036 free (bb_rank); | |
2037 VEC_free (tree, heap, broken_up_subtracts); | |
2038 free_dominance_info (CDI_POST_DOMINATORS); | |
2039 loop_optimizer_finalize (); | |
2040 } | |
2041 | |
2042 /* Gate and execute functions for Reassociation. */ | |
2043 | |
2044 static unsigned int | |
2045 execute_reassoc (void) | |
2046 { | |
2047 init_reassoc (); | |
2048 | |
2049 do_reassoc (); | |
2050 repropagate_negates (); | |
2051 | |
2052 fini_reassoc (); | |
2053 return 0; | |
2054 } | |
2055 | |
2056 static bool | |
2057 gate_tree_ssa_reassoc (void) | |
2058 { | |
2059 return flag_tree_reassoc != 0; | |
2060 } | |
2061 | |
2062 struct gimple_opt_pass pass_reassoc = | |
2063 { | |
2064 { | |
2065 GIMPLE_PASS, | |
2066 "reassoc", /* name */ | |
2067 gate_tree_ssa_reassoc, /* gate */ | |
2068 execute_reassoc, /* execute */ | |
2069 NULL, /* sub */ | |
2070 NULL, /* next */ | |
2071 0, /* static_pass_number */ | |
2072 TV_TREE_REASSOC, /* tv_id */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
2073 PROP_cfg | PROP_ssa, /* properties_required */ |
0 | 2074 0, /* properties_provided */ |
2075 0, /* properties_destroyed */ | |
2076 0, /* todo_flags_start */ | |
2077 TODO_dump_func | TODO_ggc_collect | TODO_verify_ssa /* todo_flags_finish */ | |
2078 } | |
2079 }; | |
2080 |