0
|
1 /* Interprocedural constant propagation
|
|
2 Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc.
|
|
3 Contributed by Razya Ladelsky <RAZYA@il.ibm.com>
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it under
|
|
8 the terms of the GNU General Public License as published by the Free
|
|
9 Software Foundation; either version 3, or (at your option) any later
|
|
10 version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
15 for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
|
21 /* Interprocedural constant propagation. The aim of interprocedural constant
|
|
22 propagation (IPCP) is to find which function's argument has the same
|
|
23 constant value in each invocation throughout the whole program. For example,
|
|
24 consider the following program:
|
|
25
|
|
26 int g (int y)
|
|
27 {
|
|
28 printf ("value is %d",y);
|
|
29 }
|
|
30
|
|
31 int f (int x)
|
|
32 {
|
|
33 g (x);
|
|
34 }
|
|
35
|
|
36 int h (int y)
|
|
37 {
|
|
38 g (y);
|
|
39 }
|
|
40
|
|
41 void main (void)
|
|
42 {
|
|
43 f (3);
|
|
44 h (3);
|
|
45 }
|
|
46
|
|
47
|
|
48 The IPCP algorithm will find that g's formal argument y is always called
|
|
49 with the value 3.
|
|
50
|
|
51 The algorithm used is based on "Interprocedural Constant Propagation", by
|
|
52 Challahan David, Keith D Cooper, Ken Kennedy, Linda Torczon, Comp86, pg
|
|
53 152-161
|
|
54
|
|
55 The optimization is divided into three stages:
|
|
56
|
|
57 First stage - intraprocedural analysis
|
|
58 =======================================
|
|
59 This phase computes jump_function and modification flags.
|
|
60
|
|
61 A jump function for a callsite represents the values passed as an actual
|
|
62 arguments of a given callsite. There are three types of values:
|
|
63 Pass through - the caller's formal parameter is passed as an actual argument.
|
|
64 Constant - a constant is passed as an actual argument.
|
|
65 Unknown - neither of the above.
|
|
66
|
|
67 The jump function info, ipa_jump_func, is stored in ipa_edge_args
|
|
68 structure (defined in ipa_prop.h and pointed to by cgraph_node->aux)
|
|
69 modified_flags are defined in ipa_node_params structure
|
|
70 (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
|
|
71
|
|
72 -ipcp_init_stage() is the first stage driver.
|
|
73
|
|
74 Second stage - interprocedural analysis
|
|
75 ========================================
|
|
76 This phase does the interprocedural constant propagation.
|
|
77 It computes lattices for all formal parameters in the program
|
|
78 and their value that may be:
|
|
79 TOP - unknown.
|
|
80 BOTTOM - non constant.
|
|
81 CONSTANT - constant value.
|
|
82
|
|
83 Lattice describing a formal parameter p will have a constant value if all
|
|
84 callsites invoking this function have the same constant value passed to p.
|
|
85
|
|
86 The lattices are stored in ipcp_lattice which is itself in ipa_node_params
|
|
87 structure (defined in ipa_prop.h and pointed to by cgraph_edge->aux).
|
|
88
|
|
89 -ipcp_iterate_stage() is the second stage driver.
|
|
90
|
|
91 Third phase - transformation of function code
|
|
92 ============================================
|
|
93 Propagates the constant-valued formals into the function.
|
|
94 For each function whose parameters are constants, we create its clone.
|
|
95
|
|
96 Then we process the clone in two ways:
|
|
97 1. We insert an assignment statement 'parameter = const' at the beginning
|
|
98 of the cloned function.
|
|
99 2. For read-only parameters that do not live in memory, we replace all their
|
|
100 uses with the constant.
|
|
101
|
|
102 We also need to modify some callsites to call the cloned functions instead
|
|
103 of the original ones. For a callsite passing an argument found to be a
|
|
104 constant by IPCP, there are two different cases to handle:
|
|
105 1. A constant is passed as an argument. In this case the callsite in the
|
|
106 should be redirected to call the cloned callee.
|
|
107 2. A parameter (of the caller) passed as an argument (pass through
|
|
108 argument). In such cases both the caller and the callee have clones and
|
|
109 only the callsite in the cloned caller is redirected to call to the
|
|
110 cloned callee.
|
|
111
|
|
112 This update is done in two steps: First all cloned functions are created
|
|
113 during a traversal of the call graph, during which all callsites are
|
|
114 redirected to call the cloned function. Then the callsites are traversed
|
|
115 and many calls redirected back to fit the description above.
|
|
116
|
|
117 -ipcp_insert_stage() is the third phase driver.
|
|
118
|
|
119 */
|
|
120
|
|
121 #include "config.h"
|
|
122 #include "system.h"
|
|
123 #include "coretypes.h"
|
|
124 #include "tree.h"
|
|
125 #include "target.h"
|
|
126 #include "cgraph.h"
|
|
127 #include "ipa-prop.h"
|
|
128 #include "tree-flow.h"
|
|
129 #include "tree-pass.h"
|
|
130 #include "flags.h"
|
|
131 #include "timevar.h"
|
|
132 #include "diagnostic.h"
|
|
133 #include "tree-dump.h"
|
|
134 #include "tree-inline.h"
|
|
135 #include "fibheap.h"
|
|
136 #include "params.h"
|
|
137
|
|
138 /* Number of functions identified as candidates for cloning. When not cloning
|
|
139 we can simplify iterate stage not forcing it to go through the decision
|
|
140 on what is profitable and what not. */
|
|
141 static int n_cloning_candidates;
|
|
142
|
|
143 /* Maximal count found in program. */
|
|
144 static gcov_type max_count;
|
|
145
|
|
146 /* Cgraph nodes that has been completely replaced by cloning during iterate
|
|
147 * stage and will be removed after ipcp is finished. */
|
|
148 static bitmap dead_nodes;
|
|
149
|
|
150 static void ipcp_print_profile_data (FILE *);
|
|
151 static void ipcp_function_scale_print (FILE *);
|
|
152
|
|
153 /* Get the original node field of ipa_node_params associated with node NODE. */
|
|
154 static inline struct cgraph_node *
|
|
155 ipcp_get_orig_node (struct cgraph_node *node)
|
|
156 {
|
|
157 return IPA_NODE_REF (node)->ipcp_orig_node;
|
|
158 }
|
|
159
|
|
160 /* Return true if NODE describes a cloned/versioned function. */
|
|
161 static inline bool
|
|
162 ipcp_node_is_clone (struct cgraph_node *node)
|
|
163 {
|
|
164 return (ipcp_get_orig_node (node) != NULL);
|
|
165 }
|
|
166
|
|
167 /* Create ipa_node_params and its data structures for NEW_NODE. Set ORIG_NODE
|
|
168 as the ipcp_orig_node field in ipa_node_params. */
|
|
169 static void
|
|
170 ipcp_init_cloned_node (struct cgraph_node *orig_node,
|
|
171 struct cgraph_node *new_node)
|
|
172 {
|
|
173 ipa_check_create_node_params ();
|
|
174 ipa_initialize_node_params (new_node);
|
|
175 IPA_NODE_REF (new_node)->ipcp_orig_node = orig_node;
|
|
176 }
|
|
177
|
|
178 /* Perform intraprocedrual analysis needed for ipcp. */
|
|
179 static void
|
|
180 ipcp_analyze_node (struct cgraph_node *node)
|
|
181 {
|
|
182 /* Unreachable nodes should have been eliminated before ipcp. */
|
|
183 gcc_assert (node->needed || node->reachable);
|
|
184
|
|
185 ipa_initialize_node_params (node);
|
|
186 ipa_detect_param_modifications (node);
|
|
187 }
|
|
188
|
|
189 /* Recompute all local information since node might've got new
|
|
190 direct calls after cloning. */
|
|
191 static void
|
|
192 ipcp_update_cloned_node (struct cgraph_node *new_node)
|
|
193 {
|
|
194 /* We might've introduced new direct calls. */
|
|
195 push_cfun (DECL_STRUCT_FUNCTION (new_node->decl));
|
|
196 current_function_decl = new_node->decl;
|
|
197 rebuild_cgraph_edges ();
|
|
198
|
|
199 /* Indirect inlinng rely on fact that we've already analyzed
|
|
200 the body.. */
|
|
201 if (flag_indirect_inlining)
|
|
202 {
|
|
203 struct cgraph_edge *cs;
|
|
204
|
|
205 ipcp_analyze_node (new_node);
|
|
206
|
|
207 for (cs = new_node->callees; cs; cs = cs->next_callee)
|
|
208 {
|
|
209 ipa_count_arguments (cs);
|
|
210 ipa_compute_jump_functions (cs);
|
|
211 }
|
|
212 }
|
|
213 pop_cfun ();
|
|
214 current_function_decl = NULL;
|
|
215 }
|
|
216
|
|
217 /* Return scale for NODE. */
|
|
218 static inline gcov_type
|
|
219 ipcp_get_node_scale (struct cgraph_node *node)
|
|
220 {
|
|
221 return IPA_NODE_REF (node)->count_scale;
|
|
222 }
|
|
223
|
|
224 /* Set COUNT as scale for NODE. */
|
|
225 static inline void
|
|
226 ipcp_set_node_scale (struct cgraph_node *node, gcov_type count)
|
|
227 {
|
|
228 IPA_NODE_REF (node)->count_scale = count;
|
|
229 }
|
|
230
|
|
231 /* Return whether LAT is a constant lattice. */
|
|
232 static inline bool
|
|
233 ipcp_lat_is_const (struct ipcp_lattice *lat)
|
|
234 {
|
|
235 if (lat->type == IPA_CONST_VALUE)
|
|
236 return true;
|
|
237 else
|
|
238 return false;
|
|
239 }
|
|
240
|
|
241 /* Return whether LAT is a constant lattice that ipa-cp can actually insert
|
|
242 into the code (i.e. constants excluding member pointers and pointers). */
|
|
243 static inline bool
|
|
244 ipcp_lat_is_insertable (struct ipcp_lattice *lat)
|
|
245 {
|
|
246 return lat->type == IPA_CONST_VALUE;
|
|
247 }
|
|
248
|
|
249 /* Return true if LAT1 and LAT2 are equal. */
|
|
250 static inline bool
|
|
251 ipcp_lats_are_equal (struct ipcp_lattice *lat1, struct ipcp_lattice *lat2)
|
|
252 {
|
|
253 gcc_assert (ipcp_lat_is_const (lat1) && ipcp_lat_is_const (lat2));
|
|
254 if (lat1->type != lat2->type)
|
|
255 return false;
|
|
256
|
|
257 if (operand_equal_p (lat1->constant, lat2->constant, 0))
|
|
258 return true;
|
|
259
|
|
260 return false;
|
|
261 }
|
|
262
|
|
263 /* Compute Meet arithmetics:
|
|
264 Meet (IPA_BOTTOM, x) = IPA_BOTTOM
|
|
265 Meet (IPA_TOP,x) = x
|
|
266 Meet (const_a,const_b) = IPA_BOTTOM, if const_a != const_b.
|
|
267 MEET (const_a,const_b) = const_a, if const_a == const_b.*/
|
|
268 static void
|
|
269 ipa_lattice_meet (struct ipcp_lattice *res, struct ipcp_lattice *lat1,
|
|
270 struct ipcp_lattice *lat2)
|
|
271 {
|
|
272 if (lat1->type == IPA_BOTTOM || lat2->type == IPA_BOTTOM)
|
|
273 {
|
|
274 res->type = IPA_BOTTOM;
|
|
275 return;
|
|
276 }
|
|
277 if (lat1->type == IPA_TOP)
|
|
278 {
|
|
279 res->type = lat2->type;
|
|
280 res->constant = lat2->constant;
|
|
281 return;
|
|
282 }
|
|
283 if (lat2->type == IPA_TOP)
|
|
284 {
|
|
285 res->type = lat1->type;
|
|
286 res->constant = lat1->constant;
|
|
287 return;
|
|
288 }
|
|
289 if (!ipcp_lats_are_equal (lat1, lat2))
|
|
290 {
|
|
291 res->type = IPA_BOTTOM;
|
|
292 return;
|
|
293 }
|
|
294 res->type = lat1->type;
|
|
295 res->constant = lat1->constant;
|
|
296 }
|
|
297
|
|
298 /* Return the lattice corresponding to the Ith formal parameter of the function
|
|
299 described by INFO. */
|
|
300 static inline struct ipcp_lattice *
|
|
301 ipcp_get_lattice (struct ipa_node_params *info, int i)
|
|
302 {
|
|
303 return &(info->params[i].ipcp_lattice);
|
|
304 }
|
|
305
|
|
306 /* Given the jump function JFUNC, compute the lattice LAT that describes the
|
|
307 value coming down the callsite. INFO describes the caller node so that
|
|
308 pass-through jump functions can be evaluated. */
|
|
309 static void
|
|
310 ipcp_lattice_from_jfunc (struct ipa_node_params *info, struct ipcp_lattice *lat,
|
|
311 struct ipa_jump_func *jfunc)
|
|
312 {
|
|
313 if (jfunc->type == IPA_CONST)
|
|
314 {
|
|
315 lat->type = IPA_CONST_VALUE;
|
|
316 lat->constant = jfunc->value.constant;
|
|
317 }
|
|
318 else if (jfunc->type == IPA_PASS_THROUGH)
|
|
319 {
|
|
320 struct ipcp_lattice *caller_lat;
|
|
321
|
|
322 caller_lat = ipcp_get_lattice (info, jfunc->value.formal_id);
|
|
323 lat->type = caller_lat->type;
|
|
324 lat->constant = caller_lat->constant;
|
|
325 }
|
|
326 else
|
|
327 lat->type = IPA_BOTTOM;
|
|
328 }
|
|
329
|
|
330 /* True when OLD_LAT and NEW_LAT values are not the same. */
|
|
331
|
|
332 static bool
|
|
333 ipcp_lattice_changed (struct ipcp_lattice *old_lat,
|
|
334 struct ipcp_lattice *new_lat)
|
|
335 {
|
|
336 if (old_lat->type == new_lat->type)
|
|
337 {
|
|
338 if (!ipcp_lat_is_const (old_lat))
|
|
339 return false;
|
|
340 if (ipcp_lats_are_equal (old_lat, new_lat))
|
|
341 return false;
|
|
342 }
|
|
343 return true;
|
|
344 }
|
|
345
|
|
346 /* Print all ipcp_lattices of all functions to F. */
|
|
347 static void
|
|
348 ipcp_print_all_lattices (FILE * f)
|
|
349 {
|
|
350 struct cgraph_node *node;
|
|
351 int i, count;
|
|
352
|
|
353 fprintf (f, "\nLattice:\n");
|
|
354 for (node = cgraph_nodes; node; node = node->next)
|
|
355 {
|
|
356 struct ipa_node_params *info;
|
|
357
|
|
358 if (!node->analyzed)
|
|
359 continue;
|
|
360 info = IPA_NODE_REF (node);
|
|
361 fprintf (f, " Node: %s:\n", cgraph_node_name (node));
|
|
362 count = ipa_get_param_count (info);
|
|
363 for (i = 0; i < count; i++)
|
|
364 {
|
|
365 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
|
|
366
|
|
367 fprintf (f, " param [%d]: ", i);
|
|
368 if (lat->type == IPA_CONST_VALUE)
|
|
369 {
|
|
370 fprintf (f, "type is CONST ");
|
|
371 print_generic_expr (f, lat->constant, 0);
|
|
372 fprintf (f, "\n");
|
|
373 }
|
|
374 else if (lat->type == IPA_TOP)
|
|
375 fprintf (f, "type is TOP\n");
|
|
376 else
|
|
377 fprintf (f, "type is BOTTOM\n");
|
|
378 }
|
|
379 }
|
|
380 }
|
|
381
|
|
382 /* Return true if this NODE is viable candidate for cloning. */
|
|
383 static bool
|
|
384 ipcp_cloning_candidate_p (struct cgraph_node *node)
|
|
385 {
|
|
386 int n_calls = 0;
|
|
387 int n_hot_calls = 0;
|
|
388 gcov_type direct_call_sum = 0;
|
|
389 struct cgraph_edge *e;
|
|
390
|
|
391 /* We never clone functions that are not visible from outside.
|
|
392 FIXME: in future we should clone such functions when they are called with
|
|
393 different constants, but current ipcp implementation is not good on this.
|
|
394 */
|
|
395 if (!node->needed || !node->analyzed)
|
|
396 return false;
|
|
397
|
|
398 if (cgraph_function_body_availability (node) <= AVAIL_OVERWRITABLE)
|
|
399 {
|
|
400 if (dump_file)
|
|
401 fprintf (dump_file, "Not considering %s for cloning; body is overwrittable.\n",
|
|
402 cgraph_node_name (node));
|
|
403 return false;
|
|
404 }
|
|
405 if (!tree_versionable_function_p (node->decl))
|
|
406 {
|
|
407 if (dump_file)
|
|
408 fprintf (dump_file, "Not considering %s for cloning; body is not versionable.\n",
|
|
409 cgraph_node_name (node));
|
|
410 return false;
|
|
411 }
|
|
412 for (e = node->callers; e; e = e->next_caller)
|
|
413 {
|
|
414 direct_call_sum += e->count;
|
|
415 n_calls ++;
|
|
416 if (cgraph_maybe_hot_edge_p (e))
|
|
417 n_hot_calls ++;
|
|
418 }
|
|
419
|
|
420 if (!n_calls)
|
|
421 {
|
|
422 if (dump_file)
|
|
423 fprintf (dump_file, "Not considering %s for cloning; no direct calls.\n",
|
|
424 cgraph_node_name (node));
|
|
425 return false;
|
|
426 }
|
|
427 if (node->local.inline_summary.self_insns < n_calls)
|
|
428 {
|
|
429 if (dump_file)
|
|
430 fprintf (dump_file, "Considering %s for cloning; code would shrink.\n",
|
|
431 cgraph_node_name (node));
|
|
432 return true;
|
|
433 }
|
|
434
|
|
435 if (!flag_ipa_cp_clone)
|
|
436 {
|
|
437 if (dump_file)
|
|
438 fprintf (dump_file, "Not considering %s for cloning; -fipa-cp-clone disabled.\n",
|
|
439 cgraph_node_name (node));
|
|
440 return false;
|
|
441 }
|
|
442
|
|
443 if (!optimize_function_for_speed_p (DECL_STRUCT_FUNCTION (node->decl)))
|
|
444 {
|
|
445 if (dump_file)
|
|
446 fprintf (dump_file, "Not considering %s for cloning; optimizing it for size.\n",
|
|
447 cgraph_node_name (node));
|
|
448 return false;
|
|
449 }
|
|
450
|
|
451 /* When profile is available and function is hot, propagate into it even if
|
|
452 calls seems cold; constant propagation can improve function's speed
|
|
453 significandly. */
|
|
454 if (max_count)
|
|
455 {
|
|
456 if (direct_call_sum > node->count * 90 / 100)
|
|
457 {
|
|
458 if (dump_file)
|
|
459 fprintf (dump_file, "Considering %s for cloning; usually called directly.\n",
|
|
460 cgraph_node_name (node));
|
|
461 return true;
|
|
462 }
|
|
463 }
|
|
464 if (!n_hot_calls)
|
|
465 {
|
|
466 if (dump_file)
|
|
467 fprintf (dump_file, "Not considering %s for cloning; no hot calls.\n",
|
|
468 cgraph_node_name (node));
|
|
469 }
|
|
470 if (dump_file)
|
|
471 fprintf (dump_file, "Considering %s for cloning.\n",
|
|
472 cgraph_node_name (node));
|
|
473 return true;
|
|
474 }
|
|
475
|
|
476 /* Initialize ipcp_lattices array. The lattices corresponding to supported
|
|
477 types (integers, real types and Fortran constants defined as const_decls)
|
|
478 are initialized to IPA_TOP, the rest of them to IPA_BOTTOM. */
|
|
479 static void
|
|
480 ipcp_initialize_node_lattices (struct cgraph_node *node)
|
|
481 {
|
|
482 int i;
|
|
483 struct ipa_node_params *info = IPA_NODE_REF (node);
|
|
484 enum ipa_lattice_type type;
|
|
485
|
|
486 if (ipa_is_called_with_var_arguments (info))
|
|
487 type = IPA_BOTTOM;
|
|
488 else if (!node->needed)
|
|
489 type = IPA_TOP;
|
|
490 /* When cloning is allowed, we can assume that externally visible functions
|
|
491 are not called. We will compensate this by cloning later. */
|
|
492 else if (ipcp_cloning_candidate_p (node))
|
|
493 type = IPA_TOP, n_cloning_candidates ++;
|
|
494 else
|
|
495 type = IPA_BOTTOM;
|
|
496
|
|
497 for (i = 0; i < ipa_get_param_count (info) ; i++)
|
|
498 ipcp_get_lattice (info, i)->type = type;
|
|
499 }
|
|
500
|
|
501 /* build INTEGER_CST tree with type TREE_TYPE and value according to LAT.
|
|
502 Return the tree. */
|
|
503 static tree
|
|
504 build_const_val (struct ipcp_lattice *lat, tree tree_type)
|
|
505 {
|
|
506 tree val;
|
|
507
|
|
508 gcc_assert (ipcp_lat_is_const (lat));
|
|
509 val = lat->constant;
|
|
510
|
|
511 if (!useless_type_conversion_p (tree_type, TREE_TYPE (val)))
|
|
512 {
|
|
513 if (fold_convertible_p (tree_type, val))
|
|
514 return fold_build1 (NOP_EXPR, tree_type, val);
|
|
515 else
|
|
516 return fold_build1 (VIEW_CONVERT_EXPR, tree_type, val);
|
|
517 }
|
|
518 return val;
|
|
519 }
|
|
520
|
|
521 /* Compute the proper scale for NODE. It is the ratio between the number of
|
|
522 direct calls (represented on the incoming cgraph_edges) and sum of all
|
|
523 invocations of NODE (represented as count in cgraph_node). */
|
|
524 static void
|
|
525 ipcp_compute_node_scale (struct cgraph_node *node)
|
|
526 {
|
|
527 gcov_type sum;
|
|
528 struct cgraph_edge *cs;
|
|
529
|
|
530 sum = 0;
|
|
531 /* Compute sum of all counts of callers. */
|
|
532 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
|
|
533 sum += cs->count;
|
|
534 if (node->count == 0)
|
|
535 ipcp_set_node_scale (node, 0);
|
|
536 else
|
|
537 ipcp_set_node_scale (node, sum * REG_BR_PROB_BASE / node->count);
|
|
538 }
|
|
539
|
|
540 /* Initialization and computation of IPCP data structures. This is the initial
|
|
541 intraprocedural analysis of functions, which gathers information to be
|
|
542 propagated later on. */
|
|
543 static void
|
|
544 ipcp_init_stage (void)
|
|
545 {
|
|
546 struct cgraph_node *node;
|
|
547 struct cgraph_edge *cs;
|
|
548
|
|
549 for (node = cgraph_nodes; node; node = node->next)
|
|
550 if (node->analyzed)
|
|
551 ipcp_analyze_node (node);
|
|
552 for (node = cgraph_nodes; node; node = node->next)
|
|
553 {
|
|
554 if (!node->analyzed)
|
|
555 continue;
|
|
556 /* building jump functions */
|
|
557 for (cs = node->callees; cs; cs = cs->next_callee)
|
|
558 {
|
|
559 if (!cs->callee->analyzed)
|
|
560 continue;
|
|
561 ipa_count_arguments (cs);
|
|
562 if (ipa_get_cs_argument_count (IPA_EDGE_REF (cs))
|
|
563 != ipa_get_param_count (IPA_NODE_REF (cs->callee)))
|
|
564 {
|
|
565 /* Handle cases of functions with
|
|
566 a variable number of parameters. */
|
|
567 ipa_set_called_with_variable_arg (IPA_NODE_REF (cs->callee));
|
|
568 if (flag_indirect_inlining)
|
|
569 ipa_compute_jump_functions (cs);
|
|
570 }
|
|
571 else
|
|
572 ipa_compute_jump_functions (cs);
|
|
573 }
|
|
574 }
|
|
575 }
|
|
576
|
|
577 /* Return true if there are some formal parameters whose value is IPA_TOP (in
|
|
578 the whole compilation unit). Change their values to IPA_BOTTOM, since they
|
|
579 most probably get their values from outside of this compilation unit. */
|
|
580 static bool
|
|
581 ipcp_change_tops_to_bottom (void)
|
|
582 {
|
|
583 int i, count;
|
|
584 struct cgraph_node *node;
|
|
585 bool prop_again;
|
|
586
|
|
587 prop_again = false;
|
|
588 for (node = cgraph_nodes; node; node = node->next)
|
|
589 {
|
|
590 struct ipa_node_params *info = IPA_NODE_REF (node);
|
|
591 count = ipa_get_param_count (info);
|
|
592 for (i = 0; i < count; i++)
|
|
593 {
|
|
594 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
|
|
595 if (lat->type == IPA_TOP)
|
|
596 {
|
|
597 prop_again = true;
|
|
598 if (dump_file)
|
|
599 {
|
|
600 fprintf (dump_file, "Forcing param ");
|
|
601 print_generic_expr (dump_file, ipa_get_param (info, i), 0);
|
|
602 fprintf (dump_file, " of node %s to bottom.\n",
|
|
603 cgraph_node_name (node));
|
|
604 }
|
|
605 lat->type = IPA_BOTTOM;
|
|
606 }
|
|
607 }
|
|
608 }
|
|
609 return prop_again;
|
|
610 }
|
|
611
|
|
612 /* Interprocedural analysis. The algorithm propagates constants from the
|
|
613 caller's parameters to the callee's arguments. */
|
|
614 static void
|
|
615 ipcp_propagate_stage (void)
|
|
616 {
|
|
617 int i;
|
|
618 struct ipcp_lattice inc_lat = { IPA_BOTTOM, NULL };
|
|
619 struct ipcp_lattice new_lat = { IPA_BOTTOM, NULL };
|
|
620 struct ipcp_lattice *dest_lat;
|
|
621 struct cgraph_edge *cs;
|
|
622 struct ipa_jump_func *jump_func;
|
|
623 struct ipa_func_list *wl;
|
|
624 int count;
|
|
625
|
|
626 ipa_check_create_node_params ();
|
|
627 ipa_check_create_edge_args ();
|
|
628
|
|
629 /* Initialize worklist to contain all functions. */
|
|
630 wl = ipa_init_func_list ();
|
|
631 while (wl)
|
|
632 {
|
|
633 struct cgraph_node *node = ipa_pop_func_from_list (&wl);
|
|
634 struct ipa_node_params *info = IPA_NODE_REF (node);
|
|
635
|
|
636 for (cs = node->callees; cs; cs = cs->next_callee)
|
|
637 {
|
|
638 struct ipa_node_params *callee_info = IPA_NODE_REF (cs->callee);
|
|
639 struct ipa_edge_args *args = IPA_EDGE_REF (cs);
|
|
640
|
|
641 if (ipa_is_called_with_var_arguments (callee_info))
|
|
642 continue;
|
|
643
|
|
644 count = ipa_get_cs_argument_count (args);
|
|
645 for (i = 0; i < count; i++)
|
|
646 {
|
|
647 jump_func = ipa_get_ith_jump_func (args, i);
|
|
648 ipcp_lattice_from_jfunc (info, &inc_lat, jump_func);
|
|
649 dest_lat = ipcp_get_lattice (callee_info, i);
|
|
650 ipa_lattice_meet (&new_lat, &inc_lat, dest_lat);
|
|
651 if (ipcp_lattice_changed (&new_lat, dest_lat))
|
|
652 {
|
|
653 dest_lat->type = new_lat.type;
|
|
654 dest_lat->constant = new_lat.constant;
|
|
655 ipa_push_func_to_list (&wl, cs->callee);
|
|
656 }
|
|
657 }
|
|
658 }
|
|
659 }
|
|
660 }
|
|
661
|
|
662 /* Call the constant propagation algorithm and re-call it if necessary
|
|
663 (if there are undetermined values left). */
|
|
664 static void
|
|
665 ipcp_iterate_stage (void)
|
|
666 {
|
|
667 struct cgraph_node *node;
|
|
668 n_cloning_candidates = 0;
|
|
669
|
|
670 if (dump_file)
|
|
671 fprintf (dump_file, "\nIPA iterate stage:\n\n");
|
|
672 for (node = cgraph_nodes; node; node = node->next)
|
|
673 {
|
|
674 ipcp_initialize_node_lattices (node);
|
|
675 ipcp_compute_node_scale (node);
|
|
676 }
|
|
677 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
678 {
|
|
679 ipcp_print_all_lattices (dump_file);
|
|
680 ipcp_function_scale_print (dump_file);
|
|
681 }
|
|
682
|
|
683 ipcp_propagate_stage ();
|
|
684 if (ipcp_change_tops_to_bottom ())
|
|
685 /* Some lattices have changed from IPA_TOP to IPA_BOTTOM.
|
|
686 This change should be propagated. */
|
|
687 {
|
|
688 gcc_assert (n_cloning_candidates);
|
|
689 ipcp_propagate_stage ();
|
|
690 }
|
|
691 if (dump_file)
|
|
692 {
|
|
693 fprintf (dump_file, "\nIPA lattices after propagation:\n");
|
|
694 ipcp_print_all_lattices (dump_file);
|
|
695 if (dump_flags & TDF_DETAILS)
|
|
696 ipcp_print_profile_data (dump_file);
|
|
697 }
|
|
698 }
|
|
699
|
|
700 /* Check conditions to forbid constant insertion to function described by
|
|
701 NODE. */
|
|
702 static inline bool
|
|
703 ipcp_node_modifiable_p (struct cgraph_node *node)
|
|
704 {
|
|
705 /* Once we will be able to do in-place replacement, we can be more
|
|
706 lax here. */
|
|
707 return tree_versionable_function_p (node->decl);
|
|
708 }
|
|
709
|
|
710 /* Print count scale data structures. */
|
|
711 static void
|
|
712 ipcp_function_scale_print (FILE * f)
|
|
713 {
|
|
714 struct cgraph_node *node;
|
|
715
|
|
716 for (node = cgraph_nodes; node; node = node->next)
|
|
717 {
|
|
718 if (!node->analyzed)
|
|
719 continue;
|
|
720 fprintf (f, "printing scale for %s: ", cgraph_node_name (node));
|
|
721 fprintf (f, "value is " HOST_WIDE_INT_PRINT_DEC
|
|
722 " \n", (HOST_WIDE_INT) ipcp_get_node_scale (node));
|
|
723 }
|
|
724 }
|
|
725
|
|
726 /* Print counts of all cgraph nodes. */
|
|
727 static void
|
|
728 ipcp_print_func_profile_counts (FILE * f)
|
|
729 {
|
|
730 struct cgraph_node *node;
|
|
731
|
|
732 for (node = cgraph_nodes; node; node = node->next)
|
|
733 {
|
|
734 fprintf (f, "function %s: ", cgraph_node_name (node));
|
|
735 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC
|
|
736 " \n", (HOST_WIDE_INT) node->count);
|
|
737 }
|
|
738 }
|
|
739
|
|
740 /* Print counts of all cgraph edges. */
|
|
741 static void
|
|
742 ipcp_print_call_profile_counts (FILE * f)
|
|
743 {
|
|
744 struct cgraph_node *node;
|
|
745 struct cgraph_edge *cs;
|
|
746
|
|
747 for (node = cgraph_nodes; node; node = node->next)
|
|
748 {
|
|
749 for (cs = node->callees; cs; cs = cs->next_callee)
|
|
750 {
|
|
751 fprintf (f, "%s -> %s ", cgraph_node_name (cs->caller),
|
|
752 cgraph_node_name (cs->callee));
|
|
753 fprintf (f, "count is " HOST_WIDE_INT_PRINT_DEC " \n",
|
|
754 (HOST_WIDE_INT) cs->count);
|
|
755 }
|
|
756 }
|
|
757 }
|
|
758
|
|
759 /* Print all counts and probabilities of cfg edges of all functions. */
|
|
760 static void
|
|
761 ipcp_print_edge_profiles (FILE * f)
|
|
762 {
|
|
763 struct cgraph_node *node;
|
|
764 basic_block bb;
|
|
765 edge_iterator ei;
|
|
766 edge e;
|
|
767
|
|
768 for (node = cgraph_nodes; node; node = node->next)
|
|
769 {
|
|
770 fprintf (f, "function %s: \n", cgraph_node_name (node));
|
|
771 if (node->analyzed)
|
|
772 {
|
|
773 bb =
|
|
774 ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl));
|
|
775 fprintf (f, "ENTRY: ");
|
|
776 fprintf (f, " " HOST_WIDE_INT_PRINT_DEC
|
|
777 " %d\n", (HOST_WIDE_INT) bb->count, bb->frequency);
|
|
778
|
|
779 if (bb->succs)
|
|
780 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
781 {
|
|
782 if (e->dest ==
|
|
783 EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION
|
|
784 (node->decl)))
|
|
785 fprintf (f, "edge ENTRY -> EXIT, Count");
|
|
786 else
|
|
787 fprintf (f, "edge ENTRY -> %d, Count", e->dest->index);
|
|
788 fprintf (f, " " HOST_WIDE_INT_PRINT_DEC
|
|
789 " Prob %d\n", (HOST_WIDE_INT) e->count,
|
|
790 e->probability);
|
|
791 }
|
|
792 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
793 {
|
|
794 fprintf (f, "bb[%d]: ", bb->index);
|
|
795 fprintf (f, " " HOST_WIDE_INT_PRINT_DEC
|
|
796 " %d\n", (HOST_WIDE_INT) bb->count, bb->frequency);
|
|
797 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
798 {
|
|
799 if (e->dest ==
|
|
800 EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION
|
|
801 (node->decl)))
|
|
802 fprintf (f, "edge %d -> EXIT, Count", e->src->index);
|
|
803 else
|
|
804 fprintf (f, "edge %d -> %d, Count", e->src->index,
|
|
805 e->dest->index);
|
|
806 fprintf (f, " " HOST_WIDE_INT_PRINT_DEC " Prob %d\n",
|
|
807 (HOST_WIDE_INT) e->count, e->probability);
|
|
808 }
|
|
809 }
|
|
810 }
|
|
811 }
|
|
812 }
|
|
813
|
|
814 /* Print counts and frequencies for all basic blocks of all functions. */
|
|
815 static void
|
|
816 ipcp_print_bb_profiles (FILE * f)
|
|
817 {
|
|
818 basic_block bb;
|
|
819 struct cgraph_node *node;
|
|
820
|
|
821 for (node = cgraph_nodes; node; node = node->next)
|
|
822 {
|
|
823 fprintf (f, "function %s: \n", cgraph_node_name (node));
|
|
824 if (node->analyzed)
|
|
825 {
|
|
826 bb =
|
|
827 ENTRY_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl));
|
|
828 fprintf (f, "ENTRY: Count");
|
|
829 fprintf (f, " " HOST_WIDE_INT_PRINT_DEC
|
|
830 " Frequency %d\n", (HOST_WIDE_INT) bb->count,
|
|
831 bb->frequency);
|
|
832
|
|
833 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
834 {
|
|
835 fprintf (f, "bb[%d]: Count", bb->index);
|
|
836 fprintf (f, " " HOST_WIDE_INT_PRINT_DEC
|
|
837 " Frequency %d\n", (HOST_WIDE_INT) bb->count,
|
|
838 bb->frequency);
|
|
839 }
|
|
840 bb =
|
|
841 EXIT_BLOCK_PTR_FOR_FUNCTION (DECL_STRUCT_FUNCTION (node->decl));
|
|
842 fprintf (f, "EXIT: Count");
|
|
843 fprintf (f, " " HOST_WIDE_INT_PRINT_DEC
|
|
844 " Frequency %d\n", (HOST_WIDE_INT) bb->count,
|
|
845 bb->frequency);
|
|
846
|
|
847 }
|
|
848 }
|
|
849 }
|
|
850
|
|
851 /* Print profile info for all functions. */
|
|
852 static void
|
|
853 ipcp_print_profile_data (FILE * f)
|
|
854 {
|
|
855 fprintf (f, "\nNODE COUNTS :\n");
|
|
856 ipcp_print_func_profile_counts (f);
|
|
857 fprintf (f, "\nCS COUNTS stage:\n");
|
|
858 ipcp_print_call_profile_counts (f);
|
|
859 fprintf (f, "\nBB COUNTS and FREQUENCIES :\n");
|
|
860 ipcp_print_bb_profiles (f);
|
|
861 fprintf (f, "\nCFG EDGES COUNTS and PROBABILITIES :\n");
|
|
862 ipcp_print_edge_profiles (f);
|
|
863 }
|
|
864
|
|
865 /* Build and initialize ipa_replace_map struct according to LAT. This struct is
|
|
866 processed by versioning, which operates according to the flags set.
|
|
867 PARM_TREE is the formal parameter found to be constant. LAT represents the
|
|
868 constant. */
|
|
869 static struct ipa_replace_map *
|
|
870 ipcp_create_replace_map (tree parm_tree, struct ipcp_lattice *lat)
|
|
871 {
|
|
872 struct ipa_replace_map *replace_map;
|
|
873 tree const_val;
|
|
874
|
|
875 replace_map = XCNEW (struct ipa_replace_map);
|
|
876 const_val = build_const_val (lat, TREE_TYPE (parm_tree));
|
|
877 if (dump_file)
|
|
878 {
|
|
879 fprintf (dump_file, " replacing param ");
|
|
880 print_generic_expr (dump_file, parm_tree, 0);
|
|
881 fprintf (dump_file, " with const ");
|
|
882 print_generic_expr (dump_file, const_val, 0);
|
|
883 fprintf (dump_file, "\n");
|
|
884 }
|
|
885 replace_map->old_tree = parm_tree;
|
|
886 replace_map->new_tree = const_val;
|
|
887 replace_map->replace_p = true;
|
|
888 replace_map->ref_p = false;
|
|
889
|
|
890 return replace_map;
|
|
891 }
|
|
892
|
|
893 /* Return true if this callsite should be redirected to the original callee
|
|
894 (instead of the cloned one). */
|
|
895 static bool
|
|
896 ipcp_need_redirect_p (struct cgraph_edge *cs)
|
|
897 {
|
|
898 struct ipa_node_params *orig_callee_info;
|
|
899 int i, count;
|
|
900 struct ipa_jump_func *jump_func;
|
|
901 struct cgraph_node *node = cs->callee, *orig;
|
|
902
|
|
903 if (!n_cloning_candidates)
|
|
904 return false;
|
|
905
|
|
906 if ((orig = ipcp_get_orig_node (node)) != NULL)
|
|
907 node = orig;
|
|
908 if (ipcp_get_orig_node (cs->caller))
|
|
909 return false;
|
|
910
|
|
911 orig_callee_info = IPA_NODE_REF (node);
|
|
912 count = ipa_get_param_count (orig_callee_info);
|
|
913 for (i = 0; i < count; i++)
|
|
914 {
|
|
915 struct ipcp_lattice *lat = ipcp_get_lattice (orig_callee_info, i);
|
|
916 if (ipcp_lat_is_const (lat))
|
|
917 {
|
|
918 jump_func = ipa_get_ith_jump_func (IPA_EDGE_REF (cs), i);
|
|
919 if (jump_func->type != IPA_CONST)
|
|
920 return true;
|
|
921 }
|
|
922 }
|
|
923
|
|
924 return false;
|
|
925 }
|
|
926
|
|
927 /* Fix the callsites and the call graph after function cloning was done. */
|
|
928 static void
|
|
929 ipcp_update_callgraph (void)
|
|
930 {
|
|
931 struct cgraph_node *node;
|
|
932
|
|
933 for (node = cgraph_nodes; node; node = node->next)
|
|
934 if (node->analyzed && ipcp_node_is_clone (node))
|
|
935 {
|
|
936 bitmap args_to_skip = BITMAP_ALLOC (NULL);
|
|
937 struct cgraph_node *orig_node = ipcp_get_orig_node (node);
|
|
938 struct ipa_node_params *info = IPA_NODE_REF (orig_node);
|
|
939 int i, count = ipa_get_param_count (info);
|
|
940 struct cgraph_edge *cs, *next;
|
|
941
|
|
942 for (i = 0; i < count; i++)
|
|
943 {
|
|
944 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
|
|
945 tree parm_tree = ipa_get_param (info, i);
|
|
946
|
|
947 /* We can proactively remove obviously unused arguments. */
|
|
948 if (is_gimple_reg (parm_tree)
|
|
949 && !gimple_default_def (DECL_STRUCT_FUNCTION (orig_node->decl),
|
|
950 parm_tree))
|
|
951 {
|
|
952 bitmap_set_bit (args_to_skip, i);
|
|
953 continue;
|
|
954 }
|
|
955
|
|
956 if (lat->type == IPA_CONST_VALUE)
|
|
957 bitmap_set_bit (args_to_skip, i);
|
|
958 }
|
|
959 for (cs = node->callers; cs; cs = next)
|
|
960 {
|
|
961 next = cs->next_caller;
|
|
962 if (ipcp_node_is_clone (cs->caller) || !ipcp_need_redirect_p (cs))
|
|
963 {
|
|
964 gimple new_stmt;
|
|
965 gimple_stmt_iterator gsi;
|
|
966
|
|
967 current_function_decl = cs->caller->decl;
|
|
968 push_cfun (DECL_STRUCT_FUNCTION (cs->caller->decl));
|
|
969
|
|
970 new_stmt = gimple_call_copy_skip_args (cs->call_stmt,
|
|
971 args_to_skip);
|
|
972 gsi = gsi_for_stmt (cs->call_stmt);
|
|
973 gsi_replace (&gsi, new_stmt, true);
|
|
974 cgraph_set_call_stmt (cs, new_stmt);
|
|
975 pop_cfun ();
|
|
976 current_function_decl = NULL;
|
|
977 }
|
|
978 else
|
|
979 {
|
|
980 cgraph_redirect_edge_callee (cs, orig_node);
|
|
981 gimple_call_set_fndecl (cs->call_stmt, orig_node->decl);
|
|
982 }
|
|
983 }
|
|
984 }
|
|
985 }
|
|
986
|
|
987 /* Update all cfg basic blocks in NODE according to SCALE. */
|
|
988 static void
|
|
989 ipcp_update_bb_counts (struct cgraph_node *node, gcov_type scale)
|
|
990 {
|
|
991 basic_block bb;
|
|
992
|
|
993 FOR_ALL_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
994 bb->count = bb->count * scale / REG_BR_PROB_BASE;
|
|
995 }
|
|
996
|
|
997 /* Update all cfg edges in NODE according to SCALE. */
|
|
998 static void
|
|
999 ipcp_update_edges_counts (struct cgraph_node *node, gcov_type scale)
|
|
1000 {
|
|
1001 basic_block bb;
|
|
1002 edge_iterator ei;
|
|
1003 edge e;
|
|
1004
|
|
1005 FOR_ALL_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
1006 FOR_EACH_EDGE (e, ei, bb->succs)
|
|
1007 e->count = e->count * scale / REG_BR_PROB_BASE;
|
|
1008 }
|
|
1009
|
|
1010 /* Update profiling info for versioned functions and the functions they were
|
|
1011 versioned from. */
|
|
1012 static void
|
|
1013 ipcp_update_profiling (void)
|
|
1014 {
|
|
1015 struct cgraph_node *node, *orig_node;
|
|
1016 gcov_type scale, scale_complement;
|
|
1017 struct cgraph_edge *cs;
|
|
1018
|
|
1019 for (node = cgraph_nodes; node; node = node->next)
|
|
1020 {
|
|
1021 if (ipcp_node_is_clone (node))
|
|
1022 {
|
|
1023 orig_node = ipcp_get_orig_node (node);
|
|
1024 scale = ipcp_get_node_scale (orig_node);
|
|
1025 node->count = orig_node->count * scale / REG_BR_PROB_BASE;
|
|
1026 scale_complement = REG_BR_PROB_BASE - scale;
|
|
1027 orig_node->count =
|
|
1028 orig_node->count * scale_complement / REG_BR_PROB_BASE;
|
|
1029 for (cs = node->callees; cs; cs = cs->next_callee)
|
|
1030 cs->count = cs->count * scale / REG_BR_PROB_BASE;
|
|
1031 for (cs = orig_node->callees; cs; cs = cs->next_callee)
|
|
1032 cs->count = cs->count * scale_complement / REG_BR_PROB_BASE;
|
|
1033 ipcp_update_bb_counts (node, scale);
|
|
1034 ipcp_update_bb_counts (orig_node, scale_complement);
|
|
1035 ipcp_update_edges_counts (node, scale);
|
|
1036 ipcp_update_edges_counts (orig_node, scale_complement);
|
|
1037 }
|
|
1038 }
|
|
1039 }
|
|
1040
|
|
1041 /* If NODE was cloned, how much would program grow? */
|
|
1042 static long
|
|
1043 ipcp_estimate_growth (struct cgraph_node *node)
|
|
1044 {
|
|
1045 struct cgraph_edge *cs;
|
|
1046 int redirectable_node_callers = 0;
|
|
1047 int removable_args = 0;
|
|
1048 bool need_original = node->needed;
|
|
1049 struct ipa_node_params *info;
|
|
1050 int i, count;
|
|
1051 int growth;
|
|
1052
|
|
1053 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
|
|
1054 if (cs->caller == node || !ipcp_need_redirect_p (cs))
|
|
1055 redirectable_node_callers++;
|
|
1056 else
|
|
1057 need_original = true;
|
|
1058
|
|
1059 /* If we will be able to fully replace orignal node, we never increase
|
|
1060 program size. */
|
|
1061 if (!need_original)
|
|
1062 return 0;
|
|
1063
|
|
1064 info = IPA_NODE_REF (node);
|
|
1065 count = ipa_get_param_count (info);
|
|
1066 for (i = 0; i < count; i++)
|
|
1067 {
|
|
1068 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
|
|
1069 tree parm_tree = ipa_get_param (info, i);
|
|
1070
|
|
1071 /* We can proactively remove obviously unused arguments. */
|
|
1072 if (is_gimple_reg (parm_tree)
|
|
1073 && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
|
|
1074 parm_tree))
|
|
1075 removable_args++;
|
|
1076
|
|
1077 if (lat->type == IPA_CONST_VALUE)
|
|
1078 removable_args++;
|
|
1079 }
|
|
1080
|
|
1081 /* We make just very simple estimate of savings for removal of operand from
|
|
1082 call site. Precise cost is dificult to get, as our size metric counts
|
|
1083 constants and moves as free. Generally we are looking for cases that
|
|
1084 small function is called very many times. */
|
|
1085 growth = node->local.inline_summary.self_insns
|
|
1086 - removable_args * redirectable_node_callers;
|
|
1087 if (growth < 0)
|
|
1088 return 0;
|
|
1089 return growth;
|
|
1090 }
|
|
1091
|
|
1092
|
|
1093 /* Estimate cost of cloning NODE. */
|
|
1094 static long
|
|
1095 ipcp_estimate_cloning_cost (struct cgraph_node *node)
|
|
1096 {
|
|
1097 int freq_sum = 1;
|
|
1098 gcov_type count_sum = 1;
|
|
1099 struct cgraph_edge *e;
|
|
1100 int cost;
|
|
1101
|
|
1102 cost = ipcp_estimate_growth (node) * 1000;
|
|
1103 if (!cost)
|
|
1104 {
|
|
1105 if (dump_file)
|
|
1106 fprintf (dump_file, "Versioning of %s will save code size\n",
|
|
1107 cgraph_node_name (node));
|
|
1108 return 0;
|
|
1109 }
|
|
1110
|
|
1111 for (e = node->callers; e; e = e->next_caller)
|
|
1112 if (!bitmap_bit_p (dead_nodes, e->caller->uid)
|
|
1113 && !ipcp_need_redirect_p (e))
|
|
1114 {
|
|
1115 count_sum += e->count;
|
|
1116 freq_sum += e->frequency + 1;
|
|
1117 }
|
|
1118
|
|
1119 if (max_count)
|
|
1120 cost /= count_sum * 1000 / max_count + 1;
|
|
1121 else
|
|
1122 cost /= freq_sum * 1000 / REG_BR_PROB_BASE + 1;
|
|
1123 if (dump_file)
|
|
1124 fprintf (dump_file, "Cost of versioning %s is %i, (size: %i, freq: %i)\n",
|
|
1125 cgraph_node_name (node), cost, node->local.inline_summary.self_insns,
|
|
1126 freq_sum);
|
|
1127 return cost + 1;
|
|
1128 }
|
|
1129
|
|
1130 /* Return number of live constant parameters. */
|
|
1131 static int
|
|
1132 ipcp_const_param_count (struct cgraph_node *node)
|
|
1133 {
|
|
1134 int const_param = 0;
|
|
1135 struct ipa_node_params *info = IPA_NODE_REF (node);
|
|
1136 int count = ipa_get_param_count (info);
|
|
1137 int i;
|
|
1138
|
|
1139 for (i = 0; i < count; i++)
|
|
1140 {
|
|
1141 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
|
|
1142 tree parm_tree = ipa_get_param (info, i);
|
|
1143 if (ipcp_lat_is_insertable (lat)
|
|
1144 /* Do not count obviously unused arguments. */
|
|
1145 && (!is_gimple_reg (parm_tree)
|
|
1146 || gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
|
|
1147 parm_tree)))
|
|
1148 const_param++;
|
|
1149 }
|
|
1150 return const_param;
|
|
1151 }
|
|
1152
|
|
1153 /* Propagate the constant parameters found by ipcp_iterate_stage()
|
|
1154 to the function's code. */
|
|
1155 static void
|
|
1156 ipcp_insert_stage (void)
|
|
1157 {
|
|
1158 struct cgraph_node *node, *node1 = NULL;
|
|
1159 int i;
|
|
1160 VEC (cgraph_edge_p, heap) * redirect_callers;
|
|
1161 varray_type replace_trees;
|
|
1162 int node_callers, count;
|
|
1163 tree parm_tree;
|
|
1164 struct ipa_replace_map *replace_param;
|
|
1165 fibheap_t heap;
|
|
1166 long overall_insns = 0, new_insns = 0;
|
|
1167 long max_new_insns;
|
|
1168
|
|
1169 ipa_check_create_node_params ();
|
|
1170 ipa_check_create_edge_args ();
|
|
1171 if (dump_file)
|
|
1172 fprintf (dump_file, "\nIPA insert stage:\n\n");
|
|
1173
|
|
1174 dead_nodes = BITMAP_ALLOC (NULL);
|
|
1175
|
|
1176 for (node = cgraph_nodes; node; node = node->next)
|
|
1177 if (node->analyzed)
|
|
1178 {
|
|
1179 if (node->count > max_count)
|
|
1180 max_count = node->count;
|
|
1181 overall_insns += node->local.inline_summary.self_insns;
|
|
1182 }
|
|
1183
|
|
1184 max_new_insns = overall_insns;
|
|
1185 if (max_new_insns < PARAM_VALUE (PARAM_LARGE_UNIT_INSNS))
|
|
1186 max_new_insns = PARAM_VALUE (PARAM_LARGE_UNIT_INSNS);
|
|
1187 max_new_insns = max_new_insns * PARAM_VALUE (PARAM_IPCP_UNIT_GROWTH) / 100 + 1;
|
|
1188
|
|
1189 /* First collect all functions we proved to have constant arguments to heap. */
|
|
1190 heap = fibheap_new ();
|
|
1191 for (node = cgraph_nodes; node; node = node->next)
|
|
1192 {
|
|
1193 struct ipa_node_params *info;
|
|
1194 /* Propagation of the constant is forbidden in certain conditions. */
|
|
1195 if (!node->analyzed || !ipcp_node_modifiable_p (node))
|
|
1196 continue;
|
|
1197 info = IPA_NODE_REF (node);
|
|
1198 if (ipa_is_called_with_var_arguments (info))
|
|
1199 continue;
|
|
1200 if (ipcp_const_param_count (node))
|
|
1201 node->aux = fibheap_insert (heap, ipcp_estimate_cloning_cost (node), node);
|
|
1202 }
|
|
1203
|
|
1204 /* Now clone in priority order until code size growth limits are met or
|
|
1205 heap is emptied. */
|
|
1206 while (!fibheap_empty (heap))
|
|
1207 {
|
|
1208 struct ipa_node_params *info;
|
|
1209 int growth = 0;
|
|
1210 bitmap args_to_skip;
|
|
1211 struct cgraph_edge *cs;
|
|
1212
|
|
1213 node = (struct cgraph_node *)fibheap_extract_min (heap);
|
|
1214 node->aux = NULL;
|
|
1215 if (dump_file)
|
|
1216 fprintf (dump_file, "considering function %s\n",
|
|
1217 cgraph_node_name (node));
|
|
1218
|
|
1219 growth = ipcp_estimate_growth (node);
|
|
1220
|
|
1221 if (new_insns + growth > max_new_insns)
|
|
1222 break;
|
|
1223 if (growth
|
|
1224 && optimize_function_for_size_p (DECL_STRUCT_FUNCTION (node->decl)))
|
|
1225 {
|
|
1226 if (dump_file)
|
|
1227 fprintf (dump_file, "Not versioning, cold code would grow");
|
|
1228 continue;
|
|
1229 }
|
|
1230
|
|
1231 new_insns += growth;
|
|
1232
|
|
1233 /* Look if original function becomes dead after clonning. */
|
|
1234 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
|
|
1235 if (cs->caller == node || ipcp_need_redirect_p (cs))
|
|
1236 break;
|
|
1237 if (!cs && !node->needed)
|
|
1238 bitmap_set_bit (dead_nodes, node->uid);
|
|
1239
|
|
1240 info = IPA_NODE_REF (node);
|
|
1241 count = ipa_get_param_count (info);
|
|
1242
|
|
1243 VARRAY_GENERIC_PTR_INIT (replace_trees, ipcp_const_param_count (node),
|
|
1244 "replace_trees");
|
|
1245 args_to_skip = BITMAP_ALLOC (NULL);
|
|
1246 for (i = 0; i < count; i++)
|
|
1247 {
|
|
1248 struct ipcp_lattice *lat = ipcp_get_lattice (info, i);
|
|
1249 parm_tree = ipa_get_param (info, i);
|
|
1250
|
|
1251 /* We can proactively remove obviously unused arguments. */
|
|
1252 if (is_gimple_reg (parm_tree)
|
|
1253 && !gimple_default_def (DECL_STRUCT_FUNCTION (node->decl),
|
|
1254 parm_tree))
|
|
1255 {
|
|
1256 bitmap_set_bit (args_to_skip, i);
|
|
1257 continue;
|
|
1258 }
|
|
1259
|
|
1260 if (lat->type == IPA_CONST_VALUE)
|
|
1261 {
|
|
1262 replace_param =
|
|
1263 ipcp_create_replace_map (parm_tree, lat);
|
|
1264 VARRAY_PUSH_GENERIC_PTR (replace_trees, replace_param);
|
|
1265 bitmap_set_bit (args_to_skip, i);
|
|
1266 }
|
|
1267 }
|
|
1268
|
|
1269 /* Compute how many callers node has. */
|
|
1270 node_callers = 0;
|
|
1271 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
|
|
1272 node_callers++;
|
|
1273 redirect_callers = VEC_alloc (cgraph_edge_p, heap, node_callers);
|
|
1274 for (cs = node->callers; cs != NULL; cs = cs->next_caller)
|
|
1275 VEC_quick_push (cgraph_edge_p, redirect_callers, cs);
|
|
1276
|
|
1277 /* Redirecting all the callers of the node to the
|
|
1278 new versioned node. */
|
|
1279 node1 =
|
|
1280 cgraph_function_versioning (node, redirect_callers, replace_trees,
|
|
1281 args_to_skip);
|
|
1282 BITMAP_FREE (args_to_skip);
|
|
1283 VEC_free (cgraph_edge_p, heap, redirect_callers);
|
|
1284 VARRAY_CLEAR (replace_trees);
|
|
1285 if (node1 == NULL)
|
|
1286 continue;
|
|
1287 if (dump_file)
|
|
1288 fprintf (dump_file, "versioned function %s with growth %i, overall %i\n",
|
|
1289 cgraph_node_name (node), (int)growth, (int)new_insns);
|
|
1290 ipcp_init_cloned_node (node, node1);
|
|
1291
|
|
1292 /* We've possibly introduced direct calls. */
|
|
1293 ipcp_update_cloned_node (node1);
|
|
1294
|
|
1295 if (dump_file)
|
|
1296 dump_function_to_file (node1->decl, dump_file, dump_flags);
|
|
1297
|
|
1298 for (cs = node->callees; cs; cs = cs->next_callee)
|
|
1299 if (cs->callee->aux)
|
|
1300 {
|
|
1301 fibheap_delete_node (heap, (fibnode_t) cs->callee->aux);
|
|
1302 cs->callee->aux = fibheap_insert (heap,
|
|
1303 ipcp_estimate_cloning_cost (cs->callee),
|
|
1304 cs->callee);
|
|
1305 }
|
|
1306 }
|
|
1307
|
|
1308 while (!fibheap_empty (heap))
|
|
1309 {
|
|
1310 if (dump_file)
|
|
1311 fprintf (dump_file, "skipping function %s\n",
|
|
1312 cgraph_node_name (node));
|
|
1313 node = (struct cgraph_node *) fibheap_extract_min (heap);
|
|
1314 node->aux = NULL;
|
|
1315 }
|
|
1316 fibheap_delete (heap);
|
|
1317 BITMAP_FREE (dead_nodes);
|
|
1318 ipcp_update_callgraph ();
|
|
1319 ipcp_update_profiling ();
|
|
1320 }
|
|
1321
|
|
1322 /* The IPCP driver. */
|
|
1323 static unsigned int
|
|
1324 ipcp_driver (void)
|
|
1325 {
|
|
1326 cgraph_remove_unreachable_nodes (true,dump_file);
|
|
1327 if (dump_file)
|
|
1328 {
|
|
1329 fprintf (dump_file, "\nIPA structures before propagation:\n");
|
|
1330 if (dump_flags & TDF_DETAILS)
|
|
1331 ipa_print_all_params (dump_file);
|
|
1332 ipa_print_all_jump_functions (dump_file);
|
|
1333 }
|
|
1334 /* 2. Do the interprocedural propagation. */
|
|
1335 ipcp_iterate_stage ();
|
|
1336 /* 3. Insert the constants found to the functions. */
|
|
1337 ipcp_insert_stage ();
|
|
1338 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1339 {
|
|
1340 fprintf (dump_file, "\nProfiling info after insert stage:\n");
|
|
1341 ipcp_print_profile_data (dump_file);
|
|
1342 }
|
|
1343 /* Free all IPCP structures. */
|
|
1344 free_all_ipa_structures_after_ipa_cp ();
|
|
1345 if (dump_file)
|
|
1346 fprintf (dump_file, "\nIPA constant propagation end\n");
|
|
1347 return 0;
|
|
1348 }
|
|
1349
|
|
1350 /* Note function body size. */
|
|
1351 static void
|
|
1352 ipcp_generate_summary (void)
|
|
1353 {
|
|
1354 if (dump_file)
|
|
1355 fprintf (dump_file, "\nIPA constant propagation start:\n");
|
|
1356 ipa_check_create_node_params ();
|
|
1357 ipa_check_create_edge_args ();
|
|
1358 ipa_register_cgraph_hooks ();
|
|
1359 /* 1. Call the init stage to initialize
|
|
1360 the ipa_node_params and ipa_edge_args structures. */
|
|
1361 ipcp_init_stage ();
|
|
1362 }
|
|
1363
|
|
1364 /* Gate for IPCP optimization. */
|
|
1365 static bool
|
|
1366 cgraph_gate_cp (void)
|
|
1367 {
|
|
1368 return flag_ipa_cp;
|
|
1369 }
|
|
1370
|
|
1371 struct ipa_opt_pass pass_ipa_cp =
|
|
1372 {
|
|
1373 {
|
|
1374 IPA_PASS,
|
|
1375 "cp", /* name */
|
|
1376 cgraph_gate_cp, /* gate */
|
|
1377 ipcp_driver, /* execute */
|
|
1378 NULL, /* sub */
|
|
1379 NULL, /* next */
|
|
1380 0, /* static_pass_number */
|
|
1381 TV_IPA_CONSTANT_PROP, /* tv_id */
|
|
1382 0, /* properties_required */
|
|
1383 PROP_trees, /* properties_provided */
|
|
1384 0, /* properties_destroyed */
|
|
1385 0, /* todo_flags_start */
|
|
1386 TODO_dump_cgraph | TODO_dump_func |
|
|
1387 TODO_remove_functions /* todo_flags_finish */
|
|
1388 },
|
|
1389 ipcp_generate_summary, /* generate_summary */
|
|
1390 NULL, /* write_summary */
|
|
1391 NULL, /* read_summary */
|
|
1392 NULL, /* function_read_summary */
|
|
1393 0, /* TODOs */
|
|
1394 NULL, /* function_transform */
|
|
1395 NULL, /* variable_transform */
|
|
1396 };
|