Mercurial > hg > CbC > CbC_gcc
annotate gcc/tree-ssa-coalesce.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | 84e7813d76e9 |
children |
rev | line source |
---|---|
0 | 1 /* Coalesce SSA_NAMES together for the out-of-ssa pass. |
145 | 2 Copyright (C) 2004-2020 Free Software Foundation, Inc. |
0 | 3 Contributed by Andrew MacLeod <amacleod@redhat.com> |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify | |
8 it under the terms of the GNU General Public License as published by | |
9 the Free Software Foundation; either version 3, or (at your option) | |
10 any later version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, | |
13 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 GNU General Public License for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. */ | |
20 | |
21 #include "config.h" | |
22 #include "system.h" | |
23 #include "coretypes.h" | |
111 | 24 #include "backend.h" |
0 | 25 #include "tree.h" |
111 | 26 #include "gimple.h" |
27 #include "predict.h" | |
28 #include "memmodel.h" | |
29 #include "tm_p.h" | |
30 #include "ssa.h" | |
31 #include "tree-ssa.h" | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
32 #include "tree-pretty-print.h" |
111 | 33 #include "diagnostic-core.h" |
34 #include "dumpfile.h" | |
35 #include "gimple-iterator.h" | |
0 | 36 #include "tree-ssa-live.h" |
111 | 37 #include "tree-ssa-coalesce.h" |
38 #include "explow.h" | |
39 #include "tree-dfa.h" | |
40 #include "stor-layout.h" | |
0 | 41 |
42 /* This set of routines implements a coalesce_list. This is an object which | |
43 is used to track pairs of ssa_names which are desirable to coalesce | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
44 together to avoid copies. Costs are associated with each pair, and when |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
45 all desired information has been collected, the object can be used to |
0 | 46 order the pairs for processing. */ |
47 | |
48 /* This structure defines a pair entry. */ | |
49 | |
111 | 50 struct coalesce_pair |
0 | 51 { |
52 int first_element; | |
53 int second_element; | |
54 int cost; | |
111 | 55 |
56 /* A count of the number of unique partitions this pair would conflict | |
57 with if coalescing was successful. This is the secondary sort key, | |
58 given two pairs with equal costs, we will prefer the pair with a smaller | |
59 conflict set. | |
60 | |
61 This is lazily initialized when we discover two coalescing pairs have | |
62 the same primary cost. | |
63 | |
64 Note this is not updated and propagated as pairs are coalesced. */ | |
65 int conflict_count; | |
66 | |
67 /* The order in which coalescing pairs are discovered is recorded in this | |
68 field, which is used as the final tie breaker when sorting coalesce | |
69 pairs. */ | |
70 int index; | |
71 }; | |
72 | |
73 /* This represents a conflict graph. Implemented as an array of bitmaps. | |
74 A full matrix is used for conflicts rather than just upper triangular form. | |
75 this makes it much simpler and faster to perform conflict merges. */ | |
76 | |
77 struct ssa_conflicts | |
78 { | |
79 bitmap_obstack obstack; /* A place to allocate our bitmaps. */ | |
80 vec<bitmap> conflicts; | |
81 }; | |
82 | |
83 /* The narrow API of the qsort comparison function doesn't allow easy | |
84 access to additional arguments. So we have two globals (ick) to hold | |
85 the data we need. They're initialized before the call to qsort and | |
86 wiped immediately after. */ | |
87 static ssa_conflicts *conflicts_; | |
88 static var_map map_; | |
0 | 89 |
111 | 90 /* Coalesce pair hashtable helpers. */ |
91 | |
92 struct coalesce_pair_hasher : nofree_ptr_hash <coalesce_pair> | |
93 { | |
94 static inline hashval_t hash (const coalesce_pair *); | |
95 static inline bool equal (const coalesce_pair *, const coalesce_pair *); | |
96 }; | |
97 | |
98 /* Hash function for coalesce list. Calculate hash for PAIR. */ | |
99 | |
100 inline hashval_t | |
101 coalesce_pair_hasher::hash (const coalesce_pair *pair) | |
102 { | |
103 hashval_t a = (hashval_t)(pair->first_element); | |
104 hashval_t b = (hashval_t)(pair->second_element); | |
105 | |
106 return b * (b - 1) / 2 + a; | |
107 } | |
108 | |
109 /* Equality function for coalesce list hash table. Compare PAIR1 and PAIR2, | |
110 returning TRUE if the two pairs are equivalent. */ | |
111 | |
112 inline bool | |
113 coalesce_pair_hasher::equal (const coalesce_pair *p1, const coalesce_pair *p2) | |
114 { | |
115 return (p1->first_element == p2->first_element | |
116 && p1->second_element == p2->second_element); | |
117 } | |
118 | |
119 typedef hash_table<coalesce_pair_hasher> coalesce_table_type; | |
120 typedef coalesce_table_type::iterator coalesce_iterator_type; | |
121 | |
122 | |
123 struct cost_one_pair | |
0 | 124 { |
125 int first_element; | |
126 int second_element; | |
111 | 127 cost_one_pair *next; |
128 }; | |
0 | 129 |
130 /* This structure maintains the list of coalesce pairs. */ | |
131 | |
111 | 132 struct coalesce_list |
0 | 133 { |
111 | 134 coalesce_table_type *list; /* Hash table. */ |
135 coalesce_pair **sorted; /* List when sorted. */ | |
0 | 136 int num_sorted; /* Number in the sorted list. */ |
111 | 137 cost_one_pair *cost_one_list;/* Single use coalesces with cost 1. */ |
145 | 138 obstack ob; |
111 | 139 }; |
0 | 140 |
141 #define NO_BEST_COALESCE -1 | |
142 #define MUST_COALESCE_COST INT_MAX | |
143 | |
144 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
145 /* Return cost of execution of copy instruction with FREQUENCY. */ |
0 | 146 |
147 static inline int | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
148 coalesce_cost (int frequency, bool optimize_for_size) |
0 | 149 { |
150 /* Base costs on BB frequencies bounded by 1. */ | |
151 int cost = frequency; | |
152 | |
153 if (!cost) | |
154 cost = 1; | |
155 | |
156 if (optimize_for_size) | |
157 cost = 1; | |
158 | |
159 return cost; | |
160 } | |
161 | |
162 | |
163 /* Return the cost of executing a copy instruction in basic block BB. */ | |
164 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
165 static inline int |
0 | 166 coalesce_cost_bb (basic_block bb) |
167 { | |
131 | 168 return coalesce_cost (bb->count.to_frequency (cfun), |
169 optimize_bb_for_size_p (bb)); | |
0 | 170 } |
171 | |
172 | |
173 /* Return the cost of executing a copy instruction on edge E. */ | |
174 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
175 static inline int |
0 | 176 coalesce_cost_edge (edge e) |
177 { | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
178 int mult = 1; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
179 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
180 /* Inserting copy on critical edge costs more than inserting it elsewhere. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
181 if (EDGE_CRITICAL_P (e)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
182 mult = 2; |
0 | 183 if (e->flags & EDGE_ABNORMAL) |
184 return MUST_COALESCE_COST; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
185 if (e->flags & EDGE_EH) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
186 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
187 edge e2; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
188 edge_iterator ei; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
189 FOR_EACH_EDGE (e2, ei, e->dest->preds) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
190 if (e2 != e) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
191 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
192 /* Putting code on EH edge that leads to BB |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
193 with multiple predecestors imply splitting of |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
194 edge too. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
195 if (mult < 2) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
196 mult = 2; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
197 /* If there are multiple EH predecestors, we |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
198 also copy EH regions and produce separate |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
199 landing pad. This is expensive. */ |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
200 if (e2->flags & EDGE_EH) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
201 { |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
202 mult = 5; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
203 break; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
204 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
205 } |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
206 } |
0 | 207 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
208 return coalesce_cost (EDGE_FREQUENCY (e), |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
209 optimize_edge_for_size_p (e)) * mult; |
0 | 210 } |
211 | |
212 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
213 /* Retrieve a pair to coalesce from the cost_one_list in CL. Returns the |
0 | 214 2 elements via P1 and P2. 1 is returned by the function if there is a pair, |
215 NO_BEST_COALESCE is returned if there aren't any. */ | |
216 | |
217 static inline int | |
111 | 218 pop_cost_one_pair (coalesce_list *cl, int *p1, int *p2) |
0 | 219 { |
111 | 220 cost_one_pair *ptr; |
0 | 221 |
222 ptr = cl->cost_one_list; | |
223 if (!ptr) | |
224 return NO_BEST_COALESCE; | |
225 | |
226 *p1 = ptr->first_element; | |
227 *p2 = ptr->second_element; | |
228 cl->cost_one_list = ptr->next; | |
229 | |
230 return 1; | |
231 } | |
232 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
233 /* Retrieve the most expensive remaining pair to coalesce from CL. Returns the |
0 | 234 2 elements via P1 and P2. Their calculated cost is returned by the function. |
235 NO_BEST_COALESCE is returned if the coalesce list is empty. */ | |
236 | |
237 static inline int | |
111 | 238 pop_best_coalesce (coalesce_list *cl, int *p1, int *p2) |
0 | 239 { |
111 | 240 coalesce_pair *node; |
0 | 241 int ret; |
242 | |
243 if (cl->sorted == NULL) | |
244 return pop_cost_one_pair (cl, p1, p2); | |
245 | |
246 if (cl->num_sorted == 0) | |
247 return pop_cost_one_pair (cl, p1, p2); | |
248 | |
249 node = cl->sorted[--(cl->num_sorted)]; | |
250 *p1 = node->first_element; | |
251 *p2 = node->second_element; | |
252 ret = node->cost; | |
253 | |
254 return ret; | |
255 } | |
256 | |
257 | |
258 /* Create a new empty coalesce list object and return it. */ | |
259 | |
111 | 260 static inline coalesce_list * |
0 | 261 create_coalesce_list (void) |
262 { | |
111 | 263 coalesce_list *list; |
0 | 264 unsigned size = num_ssa_names * 3; |
265 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
266 if (size < 40) |
0 | 267 size = 40; |
268 | |
111 | 269 list = (coalesce_list *) xmalloc (sizeof (struct coalesce_list)); |
270 list->list = new coalesce_table_type (size); | |
0 | 271 list->sorted = NULL; |
272 list->num_sorted = 0; | |
273 list->cost_one_list = NULL; | |
145 | 274 gcc_obstack_init (&list->ob); |
0 | 275 return list; |
276 } | |
277 | |
278 | |
279 /* Delete coalesce list CL. */ | |
280 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
281 static inline void |
111 | 282 delete_coalesce_list (coalesce_list *cl) |
0 | 283 { |
284 gcc_assert (cl->cost_one_list == NULL); | |
111 | 285 delete cl->list; |
286 cl->list = NULL; | |
287 free (cl->sorted); | |
0 | 288 gcc_assert (cl->num_sorted == 0); |
145 | 289 obstack_free (&cl->ob, NULL); |
0 | 290 free (cl); |
291 } | |
292 | |
111 | 293 /* Return the number of unique coalesce pairs in CL. */ |
294 | |
295 static inline int | |
296 num_coalesce_pairs (coalesce_list *cl) | |
297 { | |
298 return cl->list->elements (); | |
299 } | |
0 | 300 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
301 /* Find a matching coalesce pair object in CL for the pair P1 and P2. If |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
302 one isn't found, return NULL if CREATE is false, otherwise create a new |
0 | 303 coalesce pair object and return it. */ |
304 | |
111 | 305 static coalesce_pair * |
306 find_coalesce_pair (coalesce_list *cl, int p1, int p2, bool create) | |
0 | 307 { |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
308 struct coalesce_pair p; |
111 | 309 coalesce_pair **slot; |
0 | 310 unsigned int hash; |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
311 |
0 | 312 /* Normalize so that p1 is the smaller value. */ |
313 if (p2 < p1) | |
314 { | |
315 p.first_element = p2; | |
316 p.second_element = p1; | |
317 } | |
318 else | |
319 { | |
320 p.first_element = p1; | |
321 p.second_element = p2; | |
322 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
323 |
111 | 324 hash = coalesce_pair_hasher::hash (&p); |
325 slot = cl->list->find_slot_with_hash (&p, hash, create ? INSERT : NO_INSERT); | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
326 if (!slot) |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
327 return NULL; |
0 | 328 |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
329 if (!*slot) |
0 | 330 { |
145 | 331 struct coalesce_pair * pair = XOBNEW (&cl->ob, struct coalesce_pair); |
0 | 332 gcc_assert (cl->sorted == NULL); |
333 pair->first_element = p.first_element; | |
334 pair->second_element = p.second_element; | |
335 pair->cost = 0; | |
111 | 336 pair->index = num_coalesce_pairs (cl); |
337 pair->conflict_count = 0; | |
338 *slot = pair; | |
0 | 339 } |
340 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
341 return (struct coalesce_pair *) *slot; |
0 | 342 } |
343 | |
344 static inline void | |
111 | 345 add_cost_one_coalesce (coalesce_list *cl, int p1, int p2) |
0 | 346 { |
111 | 347 cost_one_pair *pair; |
0 | 348 |
145 | 349 pair = XOBNEW (&cl->ob, cost_one_pair); |
0 | 350 pair->first_element = p1; |
351 pair->second_element = p2; | |
352 pair->next = cl->cost_one_list; | |
353 cl->cost_one_list = pair; | |
354 } | |
355 | |
356 | |
357 /* Add a coalesce between P1 and P2 in list CL with a cost of VALUE. */ | |
358 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
359 static inline void |
111 | 360 add_coalesce (coalesce_list *cl, int p1, int p2, int value) |
0 | 361 { |
111 | 362 coalesce_pair *node; |
0 | 363 |
364 gcc_assert (cl->sorted == NULL); | |
365 if (p1 == p2) | |
366 return; | |
367 | |
368 node = find_coalesce_pair (cl, p1, p2, true); | |
369 | |
370 /* Once the value is at least MUST_COALESCE_COST - 1, leave it that way. */ | |
371 if (node->cost < MUST_COALESCE_COST - 1) | |
372 { | |
373 if (value < MUST_COALESCE_COST - 1) | |
374 node->cost += value; | |
375 else | |
376 node->cost = value; | |
377 } | |
378 } | |
379 | |
111 | 380 /* Compute and record how many unique conflicts would exist for the |
381 representative partition for each coalesce pair in CL. | |
382 | |
383 CONFLICTS is the conflict graph and MAP is the current partition view. */ | |
384 | |
385 static void | |
386 initialize_conflict_count (coalesce_pair *p, | |
387 ssa_conflicts *conflicts, | |
388 var_map map) | |
389 { | |
390 int p1 = var_to_partition (map, ssa_name (p->first_element)); | |
391 int p2 = var_to_partition (map, ssa_name (p->second_element)); | |
392 | |
393 /* 4 cases. If both P1 and P2 have conflicts, then build their | |
394 union and count the members. Else handle the degenerate cases | |
395 in the obvious ways. */ | |
396 if (conflicts->conflicts[p1] && conflicts->conflicts[p2]) | |
397 p->conflict_count = bitmap_count_unique_bits (conflicts->conflicts[p1], | |
398 conflicts->conflicts[p2]); | |
399 else if (conflicts->conflicts[p1]) | |
400 p->conflict_count = bitmap_count_bits (conflicts->conflicts[p1]); | |
401 else if (conflicts->conflicts[p2]) | |
402 p->conflict_count = bitmap_count_bits (conflicts->conflicts[p2]); | |
403 else | |
404 p->conflict_count = 0; | |
405 } | |
406 | |
0 | 407 |
408 /* Comparison function to allow qsort to sort P1 and P2 in Ascending order. */ | |
409 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
410 static int |
0 | 411 compare_pairs (const void *p1, const void *p2) |
412 { | |
111 | 413 coalesce_pair *const *const pp1 = (coalesce_pair *const *) p1; |
414 coalesce_pair *const *const pp2 = (coalesce_pair *const *) p2; | |
0 | 415 int result; |
416 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
417 result = (* pp1)->cost - (* pp2)->cost; |
111 | 418 /* We use the size of the resulting conflict set as the secondary sort key. |
419 Given two equal costing coalesce pairs, we want to prefer the pair that | |
420 has the smaller conflict set. */ | |
0 | 421 if (result == 0) |
422 { | |
111 | 423 if (flag_expensive_optimizations) |
424 { | |
425 /* Lazily initialize the conflict counts as it's fairly expensive | |
426 to compute. */ | |
427 if ((*pp2)->conflict_count == 0) | |
428 initialize_conflict_count (*pp2, conflicts_, map_); | |
429 if ((*pp1)->conflict_count == 0) | |
430 initialize_conflict_count (*pp1, conflicts_, map_); | |
431 | |
432 result = (*pp2)->conflict_count - (*pp1)->conflict_count; | |
433 } | |
434 | |
435 /* And if everything else is equal, then sort based on which | |
436 coalesce pair was found first. */ | |
0 | 437 if (result == 0) |
111 | 438 result = (*pp2)->index - (*pp1)->index; |
0 | 439 } |
440 | |
441 return result; | |
442 } | |
443 | |
444 /* Iterate over CL using ITER, returning values in PAIR. */ | |
445 | |
446 #define FOR_EACH_PARTITION_PAIR(PAIR, ITER, CL) \ | |
111 | 447 FOR_EACH_HASH_TABLE_ELEMENT (*(CL)->list, (PAIR), coalesce_pair_p, (ITER)) |
0 | 448 |
449 | |
450 /* Prepare CL for removal of preferred pairs. When finished they are sorted | |
451 in order from most important coalesce to least important. */ | |
452 | |
453 static void | |
111 | 454 sort_coalesce_list (coalesce_list *cl, ssa_conflicts *conflicts, var_map map) |
0 | 455 { |
456 unsigned x, num; | |
111 | 457 coalesce_pair *p; |
458 coalesce_iterator_type ppi; | |
0 | 459 |
460 gcc_assert (cl->sorted == NULL); | |
461 | |
462 num = num_coalesce_pairs (cl); | |
463 cl->num_sorted = num; | |
464 if (num == 0) | |
465 return; | |
466 | |
467 /* Allocate a vector for the pair pointers. */ | |
111 | 468 cl->sorted = XNEWVEC (coalesce_pair *, num); |
0 | 469 |
470 /* Populate the vector with pointers to the pairs. */ | |
471 x = 0; | |
472 FOR_EACH_PARTITION_PAIR (p, ppi, cl) | |
473 cl->sorted[x++] = p; | |
474 gcc_assert (x == num); | |
475 | |
476 /* Already sorted. */ | |
477 if (num == 1) | |
478 return; | |
479 | |
111 | 480 /* We don't want to depend on qsort_r, so we have to stuff away |
481 additional data into globals so it can be referenced in | |
482 compare_pairs. */ | |
483 conflicts_ = conflicts; | |
484 map_ = map; | |
485 qsort (cl->sorted, num, sizeof (coalesce_pair *), compare_pairs); | |
486 conflicts_ = NULL; | |
487 map_ = NULL; | |
0 | 488 } |
489 | |
490 | |
491 /* Send debug info for coalesce list CL to file F. */ | |
492 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
493 static void |
111 | 494 dump_coalesce_list (FILE *f, coalesce_list *cl) |
0 | 495 { |
111 | 496 coalesce_pair *node; |
497 coalesce_iterator_type ppi; | |
498 | |
0 | 499 int x; |
500 tree var; | |
501 | |
502 if (cl->sorted == NULL) | |
503 { | |
504 fprintf (f, "Coalesce List:\n"); | |
505 FOR_EACH_PARTITION_PAIR (node, ppi, cl) | |
506 { | |
507 tree var1 = ssa_name (node->first_element); | |
508 tree var2 = ssa_name (node->second_element); | |
509 print_generic_expr (f, var1, TDF_SLIM); | |
510 fprintf (f, " <-> "); | |
511 print_generic_expr (f, var2, TDF_SLIM); | |
111 | 512 fprintf (f, " (%1d, %1d), ", node->cost, node->conflict_count); |
0 | 513 fprintf (f, "\n"); |
514 } | |
515 } | |
516 else | |
517 { | |
518 fprintf (f, "Sorted Coalesce list:\n"); | |
519 for (x = cl->num_sorted - 1 ; x >=0; x--) | |
520 { | |
521 node = cl->sorted[x]; | |
111 | 522 fprintf (f, "(%d, %d) ", node->cost, node->conflict_count); |
0 | 523 var = ssa_name (node->first_element); |
524 print_generic_expr (f, var, TDF_SLIM); | |
525 fprintf (f, " <-> "); | |
526 var = ssa_name (node->second_element); | |
527 print_generic_expr (f, var, TDF_SLIM); | |
528 fprintf (f, "\n"); | |
529 } | |
530 } | |
531 } | |
532 | |
533 | |
534 /* Return an empty new conflict graph for SIZE elements. */ | |
535 | |
111 | 536 static inline ssa_conflicts * |
0 | 537 ssa_conflicts_new (unsigned size) |
538 { | |
111 | 539 ssa_conflicts *ptr; |
0 | 540 |
111 | 541 ptr = XNEW (ssa_conflicts); |
542 bitmap_obstack_initialize (&ptr->obstack); | |
543 ptr->conflicts.create (size); | |
544 ptr->conflicts.safe_grow_cleared (size); | |
0 | 545 return ptr; |
546 } | |
547 | |
548 | |
549 /* Free storage for conflict graph PTR. */ | |
550 | |
551 static inline void | |
111 | 552 ssa_conflicts_delete (ssa_conflicts *ptr) |
0 | 553 { |
111 | 554 bitmap_obstack_release (&ptr->obstack); |
555 ptr->conflicts.release (); | |
0 | 556 free (ptr); |
557 } | |
558 | |
559 | |
560 /* Test if elements X and Y conflict in graph PTR. */ | |
561 | |
562 static inline bool | |
111 | 563 ssa_conflicts_test_p (ssa_conflicts *ptr, unsigned x, unsigned y) |
0 | 564 { |
111 | 565 bitmap bx = ptr->conflicts[x]; |
566 bitmap by = ptr->conflicts[y]; | |
0 | 567 |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
568 gcc_checking_assert (x != y); |
0 | 569 |
111 | 570 if (bx) |
0 | 571 /* Avoid the lookup if Y has no conflicts. */ |
111 | 572 return by ? bitmap_bit_p (bx, y) : false; |
0 | 573 else |
574 return false; | |
575 } | |
576 | |
577 | |
578 /* Add a conflict with Y to the bitmap for X in graph PTR. */ | |
579 | |
580 static inline void | |
111 | 581 ssa_conflicts_add_one (ssa_conflicts *ptr, unsigned x, unsigned y) |
0 | 582 { |
111 | 583 bitmap bx = ptr->conflicts[x]; |
0 | 584 /* If there are no conflicts yet, allocate the bitmap and set bit. */ |
111 | 585 if (! bx) |
586 bx = ptr->conflicts[x] = BITMAP_ALLOC (&ptr->obstack); | |
587 bitmap_set_bit (bx, y); | |
0 | 588 } |
589 | |
590 | |
591 /* Add conflicts between X and Y in graph PTR. */ | |
592 | |
593 static inline void | |
111 | 594 ssa_conflicts_add (ssa_conflicts *ptr, unsigned x, unsigned y) |
0 | 595 { |
67
f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
parents:
63
diff
changeset
|
596 gcc_checking_assert (x != y); |
0 | 597 ssa_conflicts_add_one (ptr, x, y); |
598 ssa_conflicts_add_one (ptr, y, x); | |
599 } | |
600 | |
601 | |
602 /* Merge all Y's conflict into X in graph PTR. */ | |
603 | |
604 static inline void | |
111 | 605 ssa_conflicts_merge (ssa_conflicts *ptr, unsigned x, unsigned y) |
0 | 606 { |
607 unsigned z; | |
608 bitmap_iterator bi; | |
111 | 609 bitmap bx = ptr->conflicts[x]; |
610 bitmap by = ptr->conflicts[y]; | |
0 | 611 |
111 | 612 gcc_checking_assert (x != y); |
613 if (! by) | |
0 | 614 return; |
615 | |
616 /* Add a conflict between X and every one Y has. If the bitmap doesn't | |
617 exist, then it has already been coalesced, and we don't need to add a | |
618 conflict. */ | |
111 | 619 EXECUTE_IF_SET_IN_BITMAP (by, 0, z, bi) |
620 { | |
621 bitmap bz = ptr->conflicts[z]; | |
622 if (bz) | |
131 | 623 { |
624 bool was_there = bitmap_clear_bit (bz, y); | |
625 gcc_checking_assert (was_there); | |
626 bitmap_set_bit (bz, x); | |
627 } | |
111 | 628 } |
0 | 629 |
111 | 630 if (bx) |
0 | 631 { |
632 /* If X has conflicts, add Y's to X. */ | |
111 | 633 bitmap_ior_into (bx, by); |
634 BITMAP_FREE (by); | |
635 ptr->conflicts[y] = NULL; | |
0 | 636 } |
637 else | |
638 { | |
639 /* If X has no conflicts, simply use Y's. */ | |
111 | 640 ptr->conflicts[x] = by; |
0 | 641 ptr->conflicts[y] = NULL; |
642 } | |
643 } | |
644 | |
645 | |
646 /* Dump a conflicts graph. */ | |
647 | |
648 static void | |
111 | 649 ssa_conflicts_dump (FILE *file, ssa_conflicts *ptr) |
0 | 650 { |
651 unsigned x; | |
111 | 652 bitmap b; |
0 | 653 |
654 fprintf (file, "\nConflict graph:\n"); | |
655 | |
111 | 656 FOR_EACH_VEC_ELT (ptr->conflicts, x, b) |
657 if (b) | |
0 | 658 { |
111 | 659 fprintf (file, "%d: ", x); |
660 dump_bitmap (file, b); | |
0 | 661 } |
662 } | |
663 | |
664 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
665 /* This structure is used to efficiently record the current status of live |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
666 SSA_NAMES when building a conflict graph. |
0 | 667 LIVE_BASE_VAR has a bit set for each base variable which has at least one |
668 ssa version live. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
669 LIVE_BASE_PARTITIONS is an array of bitmaps using the basevar table as an |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
670 index, and is used to track what partitions of each base variable are |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
671 live. This makes it easy to add conflicts between just live partitions |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
672 with the same base variable. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
673 The values in LIVE_BASE_PARTITIONS are only valid if the base variable is |
0 | 674 marked as being live. This delays clearing of these bitmaps until |
675 they are actually needed again. */ | |
676 | |
145 | 677 class live_track |
0 | 678 { |
145 | 679 public: |
111 | 680 bitmap_obstack obstack; /* A place to allocate our bitmaps. */ |
145 | 681 bitmap_head live_base_var; /* Indicates if a basevar is live. */ |
682 bitmap_head *live_base_partitions; /* Live partitions for each basevar. */ | |
0 | 683 var_map map; /* Var_map being used for partition mapping. */ |
111 | 684 }; |
0 | 685 |
686 | |
687 /* This routine will create a new live track structure based on the partitions | |
688 in MAP. */ | |
689 | |
111 | 690 static live_track * |
0 | 691 new_live_track (var_map map) |
692 { | |
111 | 693 live_track *ptr; |
0 | 694 int lim, x; |
695 | |
696 /* Make sure there is a partition view in place. */ | |
697 gcc_assert (map->partition_to_base_index != NULL); | |
698 | |
145 | 699 ptr = XNEW (live_track); |
0 | 700 ptr->map = map; |
701 lim = num_basevars (map); | |
111 | 702 bitmap_obstack_initialize (&ptr->obstack); |
145 | 703 ptr->live_base_partitions = XNEWVEC (bitmap_head, lim); |
704 bitmap_initialize (&ptr->live_base_var, &ptr->obstack); | |
0 | 705 for (x = 0; x < lim; x++) |
145 | 706 bitmap_initialize (&ptr->live_base_partitions[x], &ptr->obstack); |
0 | 707 return ptr; |
708 } | |
709 | |
710 | |
711 /* This routine will free the memory associated with PTR. */ | |
712 | |
713 static void | |
111 | 714 delete_live_track (live_track *ptr) |
0 | 715 { |
111 | 716 bitmap_obstack_release (&ptr->obstack); |
145 | 717 XDELETEVEC (ptr->live_base_partitions); |
718 XDELETE (ptr); | |
0 | 719 } |
720 | |
721 | |
722 /* This function will remove PARTITION from the live list in PTR. */ | |
723 | |
724 static inline void | |
111 | 725 live_track_remove_partition (live_track *ptr, int partition) |
0 | 726 { |
727 int root; | |
728 | |
729 root = basevar_index (ptr->map, partition); | |
145 | 730 bitmap_clear_bit (&ptr->live_base_partitions[root], partition); |
0 | 731 /* If the element list is empty, make the base variable not live either. */ |
145 | 732 if (bitmap_empty_p (&ptr->live_base_partitions[root])) |
733 bitmap_clear_bit (&ptr->live_base_var, root); | |
0 | 734 } |
735 | |
736 | |
737 /* This function will adds PARTITION to the live list in PTR. */ | |
738 | |
739 static inline void | |
111 | 740 live_track_add_partition (live_track *ptr, int partition) |
0 | 741 { |
742 int root; | |
743 | |
744 root = basevar_index (ptr->map, partition); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
745 /* If this base var wasn't live before, it is now. Clear the element list |
0 | 746 since it was delayed until needed. */ |
145 | 747 if (bitmap_set_bit (&ptr->live_base_var, root)) |
748 bitmap_clear (&ptr->live_base_partitions[root]); | |
749 bitmap_set_bit (&ptr->live_base_partitions[root], partition); | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
750 |
0 | 751 } |
752 | |
753 | |
754 /* Clear the live bit for VAR in PTR. */ | |
755 | |
756 static inline void | |
111 | 757 live_track_clear_var (live_track *ptr, tree var) |
0 | 758 { |
759 int p; | |
760 | |
761 p = var_to_partition (ptr->map, var); | |
762 if (p != NO_PARTITION) | |
763 live_track_remove_partition (ptr, p); | |
764 } | |
765 | |
766 | |
767 /* Return TRUE if VAR is live in PTR. */ | |
768 | |
769 static inline bool | |
111 | 770 live_track_live_p (live_track *ptr, tree var) |
0 | 771 { |
772 int p, root; | |
773 | |
774 p = var_to_partition (ptr->map, var); | |
775 if (p != NO_PARTITION) | |
776 { | |
777 root = basevar_index (ptr->map, p); | |
145 | 778 if (bitmap_bit_p (&ptr->live_base_var, root)) |
779 return bitmap_bit_p (&ptr->live_base_partitions[root], p); | |
0 | 780 } |
781 return false; | |
782 } | |
783 | |
784 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
785 /* This routine will add USE to PTR. USE will be marked as live in both the |
0 | 786 ssa live map and the live bitmap for the root of USE. */ |
787 | |
788 static inline void | |
111 | 789 live_track_process_use (live_track *ptr, tree use) |
0 | 790 { |
791 int p; | |
792 | |
793 p = var_to_partition (ptr->map, use); | |
794 if (p == NO_PARTITION) | |
795 return; | |
796 | |
797 /* Mark as live in the appropriate live list. */ | |
798 live_track_add_partition (ptr, p); | |
799 } | |
800 | |
801 | |
802 /* This routine will process a DEF in PTR. DEF will be removed from the live | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
803 lists, and if there are any other live partitions with the same base |
0 | 804 variable, conflicts will be added to GRAPH. */ |
805 | |
806 static inline void | |
111 | 807 live_track_process_def (live_track *ptr, tree def, ssa_conflicts *graph) |
0 | 808 { |
809 int p, root; | |
810 bitmap b; | |
811 unsigned x; | |
812 bitmap_iterator bi; | |
813 | |
814 p = var_to_partition (ptr->map, def); | |
815 if (p == NO_PARTITION) | |
816 return; | |
817 | |
818 /* Clear the liveness bit. */ | |
819 live_track_remove_partition (ptr, p); | |
820 | |
821 /* If the bitmap isn't empty now, conflicts need to be added. */ | |
822 root = basevar_index (ptr->map, p); | |
145 | 823 if (bitmap_bit_p (&ptr->live_base_var, root)) |
0 | 824 { |
145 | 825 b = &ptr->live_base_partitions[root]; |
0 | 826 EXECUTE_IF_SET_IN_BITMAP (b, 0, x, bi) |
827 ssa_conflicts_add (graph, p, x); | |
828 } | |
829 } | |
830 | |
831 | |
832 /* Initialize PTR with the partitions set in INIT. */ | |
833 | |
834 static inline void | |
111 | 835 live_track_init (live_track *ptr, bitmap init) |
0 | 836 { |
837 unsigned p; | |
838 bitmap_iterator bi; | |
839 | |
840 /* Mark all live on exit partitions. */ | |
841 EXECUTE_IF_SET_IN_BITMAP (init, 0, p, bi) | |
842 live_track_add_partition (ptr, p); | |
843 } | |
844 | |
845 | |
846 /* This routine will clear all live partitions in PTR. */ | |
847 | |
848 static inline void | |
111 | 849 live_track_clear_base_vars (live_track *ptr) |
0 | 850 { |
851 /* Simply clear the live base list. Anything marked as live in the element | |
852 lists will be cleared later if/when the base variable ever comes alive | |
853 again. */ | |
145 | 854 bitmap_clear (&ptr->live_base_var); |
0 | 855 } |
856 | |
857 | |
858 /* Build a conflict graph based on LIVEINFO. Any partitions which are in the | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
859 partition view of the var_map liveinfo is based on get entries in the |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
860 conflict graph. Only conflicts between ssa_name partitions with the same |
0 | 861 base variable are added. */ |
862 | |
111 | 863 static ssa_conflicts * |
0 | 864 build_ssa_conflict_graph (tree_live_info_p liveinfo) |
865 { | |
111 | 866 ssa_conflicts *graph; |
0 | 867 var_map map; |
868 basic_block bb; | |
869 ssa_op_iter iter; | |
111 | 870 live_track *live; |
871 basic_block entry; | |
872 | |
873 /* If inter-variable coalescing is enabled, we may attempt to | |
874 coalesce variables from different base variables, including | |
875 different parameters, so we have to make sure default defs live | |
876 at the entry block conflict with each other. */ | |
877 if (flag_tree_coalesce_vars) | |
878 entry = single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)); | |
879 else | |
880 entry = NULL; | |
0 | 881 |
882 map = live_var_map (liveinfo); | |
883 graph = ssa_conflicts_new (num_var_partitions (map)); | |
884 | |
885 live = new_live_track (map); | |
886 | |
131 | 887 for (unsigned i = 0; liveinfo->map->vec_bbs.iterate (i, &bb); ++i) |
0 | 888 { |
889 /* Start with live on exit temporaries. */ | |
890 live_track_init (live, live_on_exit (liveinfo, bb)); | |
891 | |
111 | 892 for (gimple_stmt_iterator gsi = gsi_last_bb (bb); !gsi_end_p (gsi); |
893 gsi_prev (&gsi)) | |
0 | 894 { |
895 tree var; | |
111 | 896 gimple *stmt = gsi_stmt (gsi); |
0 | 897 |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
898 /* A copy between 2 partitions does not introduce an interference |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
899 by itself. If they did, you would never be able to coalesce |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
900 two things which are copied. If the two variables really do |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
901 conflict, they will conflict elsewhere in the program. |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
902 |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
903 This is handled by simply removing the SRC of the copy from the |
0 | 904 live list, and processing the stmt normally. */ |
905 if (is_gimple_assign (stmt)) | |
906 { | |
907 tree lhs = gimple_assign_lhs (stmt); | |
908 tree rhs1 = gimple_assign_rhs1 (stmt); | |
909 if (gimple_assign_copy_p (stmt) | |
910 && TREE_CODE (lhs) == SSA_NAME | |
911 && TREE_CODE (rhs1) == SSA_NAME) | |
912 live_track_clear_var (live, rhs1); | |
913 } | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
914 else if (is_gimple_debug (stmt)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
915 continue; |
0 | 916 |
111 | 917 /* For stmts with more than one SSA_NAME definition pretend all the |
918 SSA_NAME outputs but the first one are live at this point, so | |
919 that conflicts are added in between all those even when they are | |
920 actually not really live after the asm, because expansion might | |
921 copy those into pseudos after the asm and if multiple outputs | |
922 share the same partition, it might overwrite those that should | |
923 be live. E.g. | |
924 asm volatile (".." : "=r" (a) : "=r" (b) : "0" (a), "1" (a)); | |
925 return a; | |
926 See PR70593. */ | |
927 bool first = true; | |
928 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF) | |
929 if (first) | |
930 first = false; | |
931 else | |
932 live_track_process_use (live, var); | |
933 | |
0 | 934 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_DEF) |
935 live_track_process_def (live, var, graph); | |
936 | |
937 FOR_EACH_SSA_TREE_OPERAND (var, stmt, iter, SSA_OP_USE) | |
938 live_track_process_use (live, var); | |
939 } | |
940 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
941 /* If result of a PHI is unused, looping over the statements will not |
0 | 942 record any conflicts since the def was never live. Since the PHI node |
943 is going to be translated out of SSA form, it will insert a copy. | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
944 There must be a conflict recorded between the result of the PHI and |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
945 any variables that are live. Otherwise the out-of-ssa translation |
0 | 946 may create incorrect code. */ |
111 | 947 for (gphi_iterator gsi = gsi_start_phis (bb); !gsi_end_p (gsi); |
948 gsi_next (&gsi)) | |
0 | 949 { |
111 | 950 gphi *phi = gsi.phi (); |
0 | 951 tree result = PHI_RESULT (phi); |
131 | 952 if (virtual_operand_p (result)) |
953 continue; | |
0 | 954 if (live_track_live_p (live, result)) |
955 live_track_process_def (live, result, graph); | |
956 } | |
957 | |
111 | 958 /* Pretend there are defs for params' default defs at the start |
959 of the (post-)entry block. This will prevent PARM_DECLs from | |
960 coalescing into the same partition. Although RESULT_DECLs' | |
961 default defs don't have a useful initial value, we have to | |
962 prevent them from coalescing with PARM_DECLs' default defs | |
963 too, otherwise assign_parms would attempt to assign different | |
964 RTL to the same partition. */ | |
965 if (bb == entry) | |
966 { | |
967 unsigned i; | |
968 tree var; | |
969 | |
970 FOR_EACH_SSA_NAME (i, var, cfun) | |
971 { | |
972 if (!SSA_NAME_IS_DEFAULT_DEF (var) | |
973 || !SSA_NAME_VAR (var) | |
974 || VAR_P (SSA_NAME_VAR (var))) | |
975 continue; | |
976 | |
977 live_track_process_def (live, var, graph); | |
978 /* Process a use too, so that it remains live and | |
979 conflicts with other parms' default defs, even unused | |
980 ones. */ | |
981 live_track_process_use (live, var); | |
982 } | |
983 } | |
984 | |
0 | 985 live_track_clear_base_vars (live); |
986 } | |
987 | |
988 delete_live_track (live); | |
989 return graph; | |
990 } | |
991 | |
992 /* Print a failure to coalesce a MUST_COALESCE pair X and Y. */ | |
993 | |
994 static inline void | |
995 fail_abnormal_edge_coalesce (int x, int y) | |
996 { | |
997 fprintf (stderr, "\nUnable to coalesce ssa_names %d and %d",x, y); | |
998 fprintf (stderr, " which are marked as MUST COALESCE.\n"); | |
999 print_generic_expr (stderr, ssa_name (x), TDF_SLIM); | |
1000 fprintf (stderr, " and "); | |
1001 print_generic_stmt (stderr, ssa_name (y), TDF_SLIM); | |
1002 | |
1003 internal_error ("SSA corruption"); | |
1004 } | |
1005 | |
111 | 1006 /* If VAR is an SSA_NAME associated with a PARM_DECL or a RESULT_DECL, |
1007 and the DECL's default def is unused (i.e., it was introduced by | |
131 | 1008 create_default_def for out-of-ssa), mark VAR and the default def for |
111 | 1009 coalescing. */ |
1010 | |
1011 static void | |
1012 coalesce_with_default (tree var, coalesce_list *cl, bitmap used_in_copy) | |
1013 { | |
1014 if (SSA_NAME_IS_DEFAULT_DEF (var) | |
1015 || !SSA_NAME_VAR (var) | |
1016 || VAR_P (SSA_NAME_VAR (var))) | |
1017 return; | |
1018 | |
1019 tree ssa = ssa_default_def (cfun, SSA_NAME_VAR (var)); | |
1020 if (!has_zero_uses (ssa)) | |
1021 return; | |
1022 | |
1023 add_cost_one_coalesce (cl, SSA_NAME_VERSION (ssa), SSA_NAME_VERSION (var)); | |
1024 bitmap_set_bit (used_in_copy, SSA_NAME_VERSION (var)); | |
131 | 1025 /* Default defs will have their used_in_copy bits set at the beginning of |
1026 populate_coalesce_list_for_outofssa. */ | |
111 | 1027 } |
0 | 1028 |
1029 | |
131 | 1030 /* Given var_map MAP for a region, this function creates and returns a coalesce |
1031 list as well as recording related ssa names in USED_IN_COPIES for use later | |
1032 in the out-of-ssa or live range computation process. */ | |
1033 | |
1034 static coalesce_list * | |
1035 create_coalesce_list_for_region (var_map map, bitmap used_in_copy) | |
0 | 1036 { |
1037 gimple_stmt_iterator gsi; | |
1038 basic_block bb; | |
131 | 1039 coalesce_list *cl = create_coalesce_list (); |
111 | 1040 gimple *stmt; |
0 | 1041 int v1, v2, cost; |
1042 | |
131 | 1043 for (unsigned j = 0; map->vec_bbs.iterate (j, &bb); ++j) |
0 | 1044 { |
1045 tree arg; | |
1046 | |
111 | 1047 for (gphi_iterator gpi = gsi_start_phis (bb); |
1048 !gsi_end_p (gpi); | |
1049 gsi_next (&gpi)) | |
0 | 1050 { |
111 | 1051 gphi *phi = gpi.phi (); |
0 | 1052 size_t i; |
1053 int ver; | |
1054 tree res; | |
1055 bool saw_copy = false; | |
1056 | |
1057 res = gimple_phi_result (phi); | |
131 | 1058 if (virtual_operand_p (res)) |
1059 continue; | |
0 | 1060 ver = SSA_NAME_VERSION (res); |
1061 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1062 /* Register ssa_names and coalesces between the args and the result |
0 | 1063 of all PHI. */ |
1064 for (i = 0; i < gimple_phi_num_args (phi); i++) | |
1065 { | |
1066 edge e = gimple_phi_arg_edge (phi, i); | |
1067 arg = PHI_ARG_DEF (phi, i); | |
111 | 1068 if (TREE_CODE (arg) != SSA_NAME) |
1069 continue; | |
1070 | |
1071 if (gimple_can_coalesce_p (arg, res) | |
1072 || (e->flags & EDGE_ABNORMAL)) | |
1073 { | |
0 | 1074 saw_copy = true; |
1075 bitmap_set_bit (used_in_copy, SSA_NAME_VERSION (arg)); | |
1076 if ((e->flags & EDGE_ABNORMAL) == 0) | |
1077 { | |
1078 int cost = coalesce_cost_edge (e); | |
1079 if (cost == 1 && has_single_use (arg)) | |
111 | 1080 add_cost_one_coalesce (cl, ver, SSA_NAME_VERSION (arg)); |
0 | 1081 else |
1082 add_coalesce (cl, ver, SSA_NAME_VERSION (arg), cost); | |
1083 } | |
1084 } | |
1085 } | |
1086 if (saw_copy) | |
1087 bitmap_set_bit (used_in_copy, ver); | |
1088 } | |
1089 | |
1090 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) | |
1091 { | |
1092 stmt = gsi_stmt (gsi); | |
1093 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1094 if (is_gimple_debug (stmt)) |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1095 continue; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1096 |
0 | 1097 /* Check for copy coalesces. */ |
1098 switch (gimple_code (stmt)) | |
1099 { | |
1100 case GIMPLE_ASSIGN: | |
1101 { | |
1102 tree lhs = gimple_assign_lhs (stmt); | |
1103 tree rhs1 = gimple_assign_rhs1 (stmt); | |
111 | 1104 if (gimple_assign_ssa_name_copy_p (stmt) |
1105 && gimple_can_coalesce_p (lhs, rhs1)) | |
0 | 1106 { |
1107 v1 = SSA_NAME_VERSION (lhs); | |
1108 v2 = SSA_NAME_VERSION (rhs1); | |
1109 cost = coalesce_cost_bb (bb); | |
1110 add_coalesce (cl, v1, v2, cost); | |
1111 bitmap_set_bit (used_in_copy, v1); | |
1112 bitmap_set_bit (used_in_copy, v2); | |
1113 } | |
1114 } | |
1115 break; | |
1116 | |
111 | 1117 case GIMPLE_RETURN: |
1118 { | |
1119 tree res = DECL_RESULT (current_function_decl); | |
1120 if (VOID_TYPE_P (TREE_TYPE (res)) | |
1121 || !is_gimple_reg (res)) | |
1122 break; | |
1123 tree rhs1 = gimple_return_retval (as_a <greturn *> (stmt)); | |
1124 if (!rhs1) | |
1125 break; | |
1126 tree lhs = ssa_default_def (cfun, res); | |
1127 gcc_assert (lhs); | |
1128 if (TREE_CODE (rhs1) == SSA_NAME | |
1129 && gimple_can_coalesce_p (lhs, rhs1)) | |
1130 { | |
1131 v1 = SSA_NAME_VERSION (lhs); | |
1132 v2 = SSA_NAME_VERSION (rhs1); | |
1133 cost = coalesce_cost_bb (bb); | |
1134 add_coalesce (cl, v1, v2, cost); | |
1135 bitmap_set_bit (used_in_copy, v1); | |
1136 bitmap_set_bit (used_in_copy, v2); | |
1137 } | |
1138 break; | |
1139 } | |
1140 | |
0 | 1141 case GIMPLE_ASM: |
1142 { | |
111 | 1143 gasm *asm_stmt = as_a <gasm *> (stmt); |
0 | 1144 unsigned long noutputs, i; |
1145 unsigned long ninputs; | |
1146 tree *outputs, link; | |
111 | 1147 noutputs = gimple_asm_noutputs (asm_stmt); |
1148 ninputs = gimple_asm_ninputs (asm_stmt); | |
0 | 1149 outputs = (tree *) alloca (noutputs * sizeof (tree)); |
111 | 1150 for (i = 0; i < noutputs; ++i) |
1151 { | |
1152 link = gimple_asm_output_op (asm_stmt, i); | |
1153 outputs[i] = TREE_VALUE (link); | |
1154 } | |
0 | 1155 |
1156 for (i = 0; i < ninputs; ++i) | |
1157 { | |
1158 const char *constraint; | |
1159 tree input; | |
1160 char *end; | |
1161 unsigned long match; | |
1162 | |
111 | 1163 link = gimple_asm_input_op (asm_stmt, i); |
0 | 1164 constraint |
1165 = TREE_STRING_POINTER (TREE_VALUE (TREE_PURPOSE (link))); | |
1166 input = TREE_VALUE (link); | |
1167 | |
1168 if (TREE_CODE (input) != SSA_NAME) | |
1169 continue; | |
1170 | |
1171 match = strtoul (constraint, &end, 10); | |
1172 if (match >= noutputs || end == constraint) | |
1173 continue; | |
1174 | |
1175 if (TREE_CODE (outputs[match]) != SSA_NAME) | |
1176 continue; | |
1177 | |
1178 v1 = SSA_NAME_VERSION (outputs[match]); | |
1179 v2 = SSA_NAME_VERSION (input); | |
1180 | |
111 | 1181 if (gimple_can_coalesce_p (outputs[match], input)) |
0 | 1182 { |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1183 cost = coalesce_cost (REG_BR_PROB_BASE, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1184 optimize_bb_for_size_p (bb)); |
0 | 1185 add_coalesce (cl, v1, v2, cost); |
1186 bitmap_set_bit (used_in_copy, v1); | |
1187 bitmap_set_bit (used_in_copy, v2); | |
1188 } | |
1189 } | |
1190 break; | |
1191 } | |
1192 | |
1193 default: | |
1194 break; | |
1195 } | |
1196 } | |
1197 } | |
1198 | |
131 | 1199 return cl; |
1200 } | |
1201 | |
1202 | |
1203 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */ | |
1204 | |
1205 struct ssa_name_var_hash : nofree_ptr_hash <tree_node> | |
1206 { | |
1207 static inline hashval_t hash (const tree_node *); | |
1208 static inline int equal (const tree_node *, const tree_node *); | |
1209 }; | |
1210 | |
1211 inline hashval_t | |
1212 ssa_name_var_hash::hash (const_tree n) | |
1213 { | |
1214 return DECL_UID (SSA_NAME_VAR (n)); | |
1215 } | |
1216 | |
1217 inline int | |
1218 ssa_name_var_hash::equal (const tree_node *n1, const tree_node *n2) | |
1219 { | |
1220 return SSA_NAME_VAR (n1) == SSA_NAME_VAR (n2); | |
1221 } | |
1222 | |
1223 | |
1224 /* This function populates coalesce list CL as well as recording related ssa | |
1225 names in USED_IN_COPIES for use later in the out-of-ssa process. */ | |
1226 | |
1227 static void | |
1228 populate_coalesce_list_for_outofssa (coalesce_list *cl, bitmap used_in_copy) | |
1229 { | |
1230 tree var; | |
1231 tree first; | |
1232 int v1, v2, cost; | |
1233 unsigned i; | |
1234 | |
1235 /* Process result decls and live on entry variables for entry into the | |
1236 coalesce list. */ | |
0 | 1237 first = NULL_TREE; |
111 | 1238 FOR_EACH_SSA_NAME (i, var, cfun) |
0 | 1239 { |
111 | 1240 if (!virtual_operand_p (var)) |
0 | 1241 { |
111 | 1242 coalesce_with_default (var, cl, used_in_copy); |
1243 | |
0 | 1244 /* Add coalesces between all the result decls. */ |
111 | 1245 if (SSA_NAME_VAR (var) |
1246 && TREE_CODE (SSA_NAME_VAR (var)) == RESULT_DECL) | |
0 | 1247 { |
111 | 1248 bitmap_set_bit (used_in_copy, SSA_NAME_VERSION (var)); |
0 | 1249 if (first == NULL_TREE) |
1250 first = var; | |
1251 else | |
1252 { | |
111 | 1253 gcc_assert (gimple_can_coalesce_p (var, first)); |
0 | 1254 v1 = SSA_NAME_VERSION (first); |
1255 v2 = SSA_NAME_VERSION (var); | |
111 | 1256 cost = coalesce_cost_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)); |
0 | 1257 add_coalesce (cl, v1, v2, cost); |
1258 } | |
1259 } | |
1260 /* Mark any default_def variables as being in the coalesce list | |
1261 since they will have to be coalesced with the base variable. If | |
1262 not marked as present, they won't be in the coalesce view. */ | |
111 | 1263 if (SSA_NAME_IS_DEFAULT_DEF (var) |
1264 && (!has_zero_uses (var) | |
1265 || (SSA_NAME_VAR (var) | |
1266 && !VAR_P (SSA_NAME_VAR (var))))) | |
0 | 1267 bitmap_set_bit (used_in_copy, SSA_NAME_VERSION (var)); |
1268 } | |
1269 } | |
1270 | |
131 | 1271 /* If this optimization is disabled, we need to coalesce all the |
1272 names originating from the same SSA_NAME_VAR so debug info | |
1273 remains undisturbed. */ | |
1274 if (!flag_tree_coalesce_vars) | |
1275 { | |
1276 tree a; | |
1277 hash_table<ssa_name_var_hash> ssa_name_hash (10); | |
1278 | |
1279 FOR_EACH_SSA_NAME (i, a, cfun) | |
1280 { | |
1281 if (SSA_NAME_VAR (a) | |
1282 && !DECL_IGNORED_P (SSA_NAME_VAR (a)) | |
1283 && (!has_zero_uses (a) || !SSA_NAME_IS_DEFAULT_DEF (a) | |
1284 || !VAR_P (SSA_NAME_VAR (a)))) | |
1285 { | |
1286 tree *slot = ssa_name_hash.find_slot (a, INSERT); | |
1287 | |
1288 if (!*slot) | |
1289 *slot = a; | |
1290 else | |
1291 { | |
1292 /* If the variable is a PARM_DECL or a RESULT_DECL, we | |
1293 _require_ that all the names originating from it be | |
1294 coalesced, because there must be a single partition | |
1295 containing all the names so that it can be assigned | |
1296 the canonical RTL location of the DECL safely. | |
1297 If in_lto_p, a function could have been compiled | |
1298 originally with optimizations and only the link | |
1299 performed at -O0, so we can't actually require it. */ | |
1300 const int cost | |
1301 = (TREE_CODE (SSA_NAME_VAR (a)) == VAR_DECL || in_lto_p) | |
1302 ? MUST_COALESCE_COST - 1 : MUST_COALESCE_COST; | |
1303 add_coalesce (cl, SSA_NAME_VERSION (a), | |
1304 SSA_NAME_VERSION (*slot), cost); | |
1305 bitmap_set_bit (used_in_copy, SSA_NAME_VERSION (a)); | |
1306 bitmap_set_bit (used_in_copy, SSA_NAME_VERSION (*slot)); | |
1307 } | |
1308 } | |
1309 } | |
1310 } | |
0 | 1311 } |
1312 | |
1313 | |
1314 /* Attempt to coalesce ssa versions X and Y together using the partition | |
1315 mapping in MAP and checking conflicts in GRAPH. Output any debug info to | |
1316 DEBUG, if it is nun-NULL. */ | |
1317 | |
1318 static inline bool | |
111 | 1319 attempt_coalesce (var_map map, ssa_conflicts *graph, int x, int y, |
0 | 1320 FILE *debug) |
1321 { | |
1322 int z; | |
1323 tree var1, var2; | |
1324 int p1, p2; | |
1325 | |
1326 p1 = var_to_partition (map, ssa_name (x)); | |
1327 p2 = var_to_partition (map, ssa_name (y)); | |
1328 | |
1329 if (debug) | |
1330 { | |
1331 fprintf (debug, "(%d)", x); | |
1332 print_generic_expr (debug, partition_to_var (map, p1), TDF_SLIM); | |
1333 fprintf (debug, " & (%d)", y); | |
1334 print_generic_expr (debug, partition_to_var (map, p2), TDF_SLIM); | |
1335 } | |
1336 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1337 if (p1 == p2) |
0 | 1338 { |
1339 if (debug) | |
1340 fprintf (debug, ": Already Coalesced.\n"); | |
1341 return true; | |
1342 } | |
1343 | |
1344 if (debug) | |
1345 fprintf (debug, " [map: %d, %d] ", p1, p2); | |
1346 | |
1347 | |
1348 if (!ssa_conflicts_test_p (graph, p1, p2)) | |
1349 { | |
1350 var1 = partition_to_var (map, p1); | |
1351 var2 = partition_to_var (map, p2); | |
111 | 1352 |
0 | 1353 z = var_union (map, var1, var2); |
1354 if (z == NO_PARTITION) | |
1355 { | |
1356 if (debug) | |
1357 fprintf (debug, ": Unable to perform partition union.\n"); | |
1358 return false; | |
1359 } | |
1360 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1361 /* z is the new combined partition. Remove the other partition from |
0 | 1362 the list, and merge the conflicts. */ |
1363 if (z == p1) | |
1364 ssa_conflicts_merge (graph, p1, p2); | |
1365 else | |
1366 ssa_conflicts_merge (graph, p2, p1); | |
1367 | |
1368 if (debug) | |
1369 fprintf (debug, ": Success -> %d\n", z); | |
111 | 1370 |
0 | 1371 return true; |
1372 } | |
1373 | |
1374 if (debug) | |
1375 fprintf (debug, ": Fail due to conflict\n"); | |
1376 | |
1377 return false; | |
1378 } | |
1379 | |
1380 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1381 /* Attempt to Coalesce partitions in MAP which occur in the list CL using |
0 | 1382 GRAPH. Debug output is sent to DEBUG if it is non-NULL. */ |
1383 | |
1384 static void | |
111 | 1385 coalesce_partitions (var_map map, ssa_conflicts *graph, coalesce_list *cl, |
0 | 1386 FILE *debug) |
1387 { | |
1388 int x = 0, y = 0; | |
1389 tree var1, var2; | |
1390 int cost; | |
1391 basic_block bb; | |
1392 edge e; | |
1393 edge_iterator ei; | |
1394 | |
1395 /* First, coalesce all the copies across abnormal edges. These are not placed | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1396 in the coalesce list because they do not need to be sorted, and simply |
0 | 1397 consume extra memory/compilation time in large programs. */ |
1398 | |
111 | 1399 FOR_EACH_BB_FN (bb, cfun) |
0 | 1400 { |
1401 FOR_EACH_EDGE (e, ei, bb->preds) | |
1402 if (e->flags & EDGE_ABNORMAL) | |
1403 { | |
111 | 1404 gphi_iterator gsi; |
0 | 1405 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); |
1406 gsi_next (&gsi)) | |
1407 { | |
111 | 1408 gphi *phi = gsi.phi (); |
131 | 1409 tree res = PHI_RESULT (phi); |
1410 if (virtual_operand_p (res)) | |
1411 continue; | |
111 | 1412 tree arg = PHI_ARG_DEF (phi, e->dest_idx); |
1413 if (SSA_NAME_IS_DEFAULT_DEF (arg) | |
1414 && (!SSA_NAME_VAR (arg) | |
1415 || TREE_CODE (SSA_NAME_VAR (arg)) != PARM_DECL)) | |
1416 continue; | |
1417 | |
0 | 1418 int v1 = SSA_NAME_VERSION (res); |
1419 int v2 = SSA_NAME_VERSION (arg); | |
1420 | |
1421 if (debug) | |
1422 fprintf (debug, "Abnormal coalesce: "); | |
1423 | |
1424 if (!attempt_coalesce (map, graph, v1, v2, debug)) | |
1425 fail_abnormal_edge_coalesce (v1, v2); | |
1426 } | |
1427 } | |
1428 } | |
1429 | |
1430 /* Now process the items in the coalesce list. */ | |
1431 | |
1432 while ((cost = pop_best_coalesce (cl, &x, &y)) != NO_BEST_COALESCE) | |
1433 { | |
1434 var1 = ssa_name (x); | |
1435 var2 = ssa_name (y); | |
1436 | |
1437 /* Assert the coalesces have the same base variable. */ | |
111 | 1438 gcc_assert (gimple_can_coalesce_p (var1, var2)); |
0 | 1439 |
1440 if (debug) | |
1441 fprintf (debug, "Coalesce list: "); | |
1442 attempt_coalesce (map, graph, x, y, debug); | |
1443 } | |
1444 } | |
1445 | |
111 | 1446 |
1447 /* Output partition map MAP with coalescing plan PART to file F. */ | |
1448 | |
1449 void | |
1450 dump_part_var_map (FILE *f, partition part, var_map map) | |
1451 { | |
1452 int t; | |
1453 unsigned x, y; | |
1454 int p; | |
1455 | |
1456 fprintf (f, "\nCoalescible Partition map \n\n"); | |
1457 | |
1458 for (x = 0; x < map->num_partitions; x++) | |
1459 { | |
1460 if (map->view_to_partition != NULL) | |
1461 p = map->view_to_partition[x]; | |
1462 else | |
1463 p = x; | |
1464 | |
1465 if (ssa_name (p) == NULL_TREE | |
1466 || virtual_operand_p (ssa_name (p))) | |
1467 continue; | |
1468 | |
1469 t = 0; | |
1470 for (y = 1; y < num_ssa_names; y++) | |
1471 { | |
1472 tree var = version_to_var (map, y); | |
1473 if (!var) | |
1474 continue; | |
1475 int q = var_to_partition (map, var); | |
1476 p = partition_find (part, q); | |
1477 gcc_assert (map->partition_to_base_index[q] | |
1478 == map->partition_to_base_index[p]); | |
1479 | |
1480 if (p == (int)x) | |
1481 { | |
1482 if (t++ == 0) | |
1483 { | |
1484 fprintf (f, "Partition %d, base %d (", x, | |
1485 map->partition_to_base_index[q]); | |
1486 print_generic_expr (f, partition_to_var (map, q), TDF_SLIM); | |
1487 fprintf (f, " - "); | |
1488 } | |
1489 fprintf (f, "%d ", y); | |
1490 } | |
1491 } | |
1492 if (t != 0) | |
1493 fprintf (f, ")\n"); | |
1494 } | |
1495 fprintf (f, "\n"); | |
1496 } | |
1497 | |
1498 /* Given SSA_NAMEs NAME1 and NAME2, return true if they are candidates for | |
1499 coalescing together, false otherwise. | |
1500 | |
1501 This must stay consistent with compute_samebase_partition_bases and | |
1502 compute_optimized_partition_bases. */ | |
0 | 1503 |
111 | 1504 bool |
1505 gimple_can_coalesce_p (tree name1, tree name2) | |
0 | 1506 { |
111 | 1507 /* First check the SSA_NAME's associated DECL. Without |
1508 optimization, we only want to coalesce if they have the same DECL | |
1509 or both have no associated DECL. */ | |
1510 tree var1 = SSA_NAME_VAR (name1); | |
1511 tree var2 = SSA_NAME_VAR (name2); | |
1512 var1 = (var1 && (!VAR_P (var1) || !DECL_IGNORED_P (var1))) ? var1 : NULL_TREE; | |
1513 var2 = (var2 && (!VAR_P (var2) || !DECL_IGNORED_P (var2))) ? var2 : NULL_TREE; | |
1514 if (var1 != var2 && !flag_tree_coalesce_vars) | |
1515 return false; | |
1516 | |
1517 /* Now check the types. If the types are the same, then we should | |
1518 try to coalesce V1 and V2. */ | |
1519 tree t1 = TREE_TYPE (name1); | |
1520 tree t2 = TREE_TYPE (name2); | |
1521 if (t1 == t2) | |
1522 { | |
1523 check_modes: | |
1524 /* If the base variables are the same, we're good: none of the | |
1525 other tests below could possibly fail. */ | |
1526 var1 = SSA_NAME_VAR (name1); | |
1527 var2 = SSA_NAME_VAR (name2); | |
1528 if (var1 == var2) | |
1529 return true; | |
1530 | |
1531 /* We don't want to coalesce two SSA names if one of the base | |
1532 variables is supposed to be a register while the other is | |
1533 supposed to be on the stack. Anonymous SSA names most often | |
1534 take registers, but when not optimizing, user variables | |
1535 should go on the stack, so coalescing them with the anonymous | |
1536 variable as the partition leader would end up assigning the | |
1537 user variable to a register. Don't do that! */ | |
1538 bool reg1 = use_register_for_decl (name1); | |
1539 bool reg2 = use_register_for_decl (name2); | |
1540 if (reg1 != reg2) | |
1541 return false; | |
1542 | |
1543 /* Check that the promoted modes and unsignedness are the same. | |
1544 We don't want to coalesce if the promoted modes would be | |
1545 different, or if they would sign-extend differently. Only | |
1546 PARM_DECLs and RESULT_DECLs have different promotion rules, | |
1547 so skip the test if both are variables, or both are anonymous | |
1548 SSA_NAMEs. */ | |
1549 int unsigned1, unsigned2; | |
1550 return ((!var1 || VAR_P (var1)) && (!var2 || VAR_P (var2))) | |
1551 || ((promote_ssa_mode (name1, &unsigned1) | |
1552 == promote_ssa_mode (name2, &unsigned2)) | |
1553 && unsigned1 == unsigned2); | |
1554 } | |
1555 | |
1556 /* If alignment requirements are different, we can't coalesce. */ | |
1557 if (MINIMUM_ALIGNMENT (t1, | |
1558 var1 ? DECL_MODE (var1) : TYPE_MODE (t1), | |
1559 var1 ? LOCAL_DECL_ALIGNMENT (var1) : TYPE_ALIGN (t1)) | |
1560 != MINIMUM_ALIGNMENT (t2, | |
1561 var2 ? DECL_MODE (var2) : TYPE_MODE (t2), | |
1562 var2 ? LOCAL_DECL_ALIGNMENT (var2) : TYPE_ALIGN (t2))) | |
1563 return false; | |
1564 | |
1565 /* If the types are not the same, see whether they are compatible. This | |
1566 (for example) allows coalescing when the types are fundamentally the | |
131 | 1567 same, but just have different names. */ |
1568 if (types_compatible_p (t1, t2)) | |
1569 goto check_modes; | |
111 | 1570 |
1571 return false; | |
0 | 1572 } |
1573 | |
111 | 1574 /* Fill in MAP's partition_to_base_index, with one index for each |
1575 partition of SSA names USED_IN_COPIES and related by CL coalesce | |
1576 possibilities. This must match gimple_can_coalesce_p in the | |
1577 optimized case. */ | |
1578 | |
1579 static void | |
1580 compute_optimized_partition_bases (var_map map, bitmap used_in_copies, | |
1581 coalesce_list *cl) | |
1582 { | |
1583 int parts = num_var_partitions (map); | |
1584 partition tentative = partition_new (parts); | |
1585 | |
1586 /* Partition the SSA versions so that, for each coalescible | |
1587 pair, both of its members are in the same partition in | |
1588 TENTATIVE. */ | |
1589 gcc_assert (!cl->sorted); | |
1590 coalesce_pair *node; | |
1591 coalesce_iterator_type ppi; | |
1592 FOR_EACH_PARTITION_PAIR (node, ppi, cl) | |
1593 { | |
1594 tree v1 = ssa_name (node->first_element); | |
1595 int p1 = partition_find (tentative, var_to_partition (map, v1)); | |
1596 tree v2 = ssa_name (node->second_element); | |
1597 int p2 = partition_find (tentative, var_to_partition (map, v2)); | |
1598 | |
1599 if (p1 == p2) | |
1600 continue; | |
1601 | |
1602 partition_union (tentative, p1, p2); | |
1603 } | |
1604 | |
1605 /* We have to deal with cost one pairs too. */ | |
1606 for (cost_one_pair *co = cl->cost_one_list; co; co = co->next) | |
1607 { | |
1608 tree v1 = ssa_name (co->first_element); | |
1609 int p1 = partition_find (tentative, var_to_partition (map, v1)); | |
1610 tree v2 = ssa_name (co->second_element); | |
1611 int p2 = partition_find (tentative, var_to_partition (map, v2)); | |
1612 | |
1613 if (p1 == p2) | |
1614 continue; | |
1615 | |
1616 partition_union (tentative, p1, p2); | |
1617 } | |
1618 | |
1619 /* And also with abnormal edges. */ | |
1620 basic_block bb; | |
1621 edge e; | |
131 | 1622 unsigned i; |
111 | 1623 edge_iterator ei; |
131 | 1624 for (i = 0; map->vec_bbs.iterate (i, &bb); ++i) |
111 | 1625 { |
1626 FOR_EACH_EDGE (e, ei, bb->preds) | |
1627 if (e->flags & EDGE_ABNORMAL) | |
1628 { | |
1629 gphi_iterator gsi; | |
1630 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); | |
1631 gsi_next (&gsi)) | |
1632 { | |
1633 gphi *phi = gsi.phi (); | |
131 | 1634 tree res = PHI_RESULT (phi); |
1635 if (virtual_operand_p (res)) | |
1636 continue; | |
111 | 1637 tree arg = PHI_ARG_DEF (phi, e->dest_idx); |
1638 if (SSA_NAME_IS_DEFAULT_DEF (arg) | |
1639 && (!SSA_NAME_VAR (arg) | |
1640 || TREE_CODE (SSA_NAME_VAR (arg)) != PARM_DECL)) | |
1641 continue; | |
1642 | |
1643 int p1 = partition_find (tentative, var_to_partition (map, res)); | |
1644 int p2 = partition_find (tentative, var_to_partition (map, arg)); | |
1645 | |
1646 if (p1 == p2) | |
1647 continue; | |
1648 | |
1649 partition_union (tentative, p1, p2); | |
1650 } | |
1651 } | |
1652 } | |
1653 | |
1654 map->partition_to_base_index = XCNEWVEC (int, parts); | |
1655 auto_vec<unsigned int> index_map (parts); | |
1656 if (parts) | |
1657 index_map.quick_grow (parts); | |
1658 | |
1659 const unsigned no_part = -1; | |
1660 unsigned count = parts; | |
1661 while (count) | |
1662 index_map[--count] = no_part; | |
0 | 1663 |
111 | 1664 /* Initialize MAP's mapping from partition to base index, using |
1665 as base indices an enumeration of the TENTATIVE partitions in | |
1666 which each SSA version ended up, so that we compute conflicts | |
1667 between all SSA versions that ended up in the same potential | |
1668 coalesce partition. */ | |
1669 bitmap_iterator bi; | |
1670 EXECUTE_IF_SET_IN_BITMAP (used_in_copies, 0, i, bi) | |
1671 { | |
1672 int pidx = var_to_partition (map, ssa_name (i)); | |
1673 int base = partition_find (tentative, pidx); | |
1674 if (index_map[base] != no_part) | |
1675 continue; | |
1676 index_map[base] = count++; | |
1677 } | |
1678 | |
1679 map->num_basevars = count; | |
1680 | |
1681 EXECUTE_IF_SET_IN_BITMAP (used_in_copies, 0, i, bi) | |
1682 { | |
1683 int pidx = var_to_partition (map, ssa_name (i)); | |
1684 int base = partition_find (tentative, pidx); | |
1685 gcc_assert (index_map[base] < count); | |
1686 map->partition_to_base_index[pidx] = index_map[base]; | |
1687 } | |
1688 | |
1689 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1690 dump_part_var_map (dump_file, tentative, map); | |
1691 | |
1692 partition_delete (tentative); | |
1693 } | |
1694 | |
131 | 1695 /* Given an initial var_map MAP, coalesce variables and return a partition map |
1696 with the resulting coalesce. Note that this function is called in either | |
1697 live range computation context or out-of-ssa context, indicated by MAP. */ | |
111 | 1698 |
131 | 1699 extern void |
1700 coalesce_ssa_name (var_map map) | |
0 | 1701 { |
1702 tree_live_info_p liveinfo; | |
111 | 1703 ssa_conflicts *graph; |
1704 coalesce_list *cl; | |
1705 auto_bitmap used_in_copies; | |
0 | 1706 |
131 | 1707 bitmap_tree_view (used_in_copies); |
1708 cl = create_coalesce_list_for_region (map, used_in_copies); | |
1709 if (map->outofssa_p) | |
1710 populate_coalesce_list_for_outofssa (cl, used_in_copies); | |
1711 bitmap_list_view (used_in_copies); | |
1712 | |
0 | 1713 if (dump_file && (dump_flags & TDF_DETAILS)) |
1714 dump_var_map (dump_file, map); | |
1715 | |
111 | 1716 partition_view_bitmap (map, used_in_copies); |
1717 | |
131 | 1718 compute_optimized_partition_bases (map, used_in_copies, cl); |
0 | 1719 |
1720 if (num_var_partitions (map) < 1) | |
1721 { | |
1722 delete_coalesce_list (cl); | |
131 | 1723 return; |
0 | 1724 } |
1725 | |
1726 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1727 dump_var_map (dump_file, map); | |
1728 | |
111 | 1729 liveinfo = calculate_live_ranges (map, false); |
0 | 1730 |
1731 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1732 dump_live_info (dump_file, liveinfo, LIVEDUMP_ENTRY); | |
1733 | |
1734 /* Build a conflict graph. */ | |
1735 graph = build_ssa_conflict_graph (liveinfo); | |
1736 delete_tree_live_info (liveinfo); | |
1737 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1738 ssa_conflicts_dump (dump_file, graph); | |
1739 | |
111 | 1740 sort_coalesce_list (cl, graph, map); |
0 | 1741 |
1742 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1743 { | |
1744 fprintf (dump_file, "\nAfter sorting:\n"); | |
1745 dump_coalesce_list (dump_file, cl); | |
1746 } | |
1747 | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1748 /* First, coalesce all live on entry variables to their base variable. |
0 | 1749 This will ensure the first use is coming from the correct location. */ |
1750 | |
1751 if (dump_file && (dump_flags & TDF_DETAILS)) | |
1752 dump_var_map (dump_file, map); | |
1753 | |
1754 /* Now coalesce everything in the list. */ | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1755 coalesce_partitions (map, graph, cl, |
111 | 1756 ((dump_flags & TDF_DETAILS) ? dump_file : NULL)); |
0 | 1757 |
1758 delete_coalesce_list (cl); | |
1759 ssa_conflicts_delete (graph); | |
111 | 1760 } |
1761 |