111
|
1 /* Basic IPA optimizations based on profile.
|
131
|
2 Copyright (C) 2003-2018 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 /* ipa-profile pass implements the following analysis propagating profille
|
|
21 inter-procedurally.
|
|
22
|
|
23 - Count histogram construction. This is a histogram analyzing how much
|
|
24 time is spent executing statements with a given execution count read
|
|
25 from profile feedback. This histogram is complete only with LTO,
|
|
26 otherwise it contains information only about the current unit.
|
|
27
|
|
28 The information is used to set hot/cold thresholds.
|
|
29 - Next speculative indirect call resolution is performed: the local
|
|
30 profile pass assigns profile-id to each function and provide us with a
|
|
31 histogram specifying the most common target. We look up the callgraph
|
|
32 node corresponding to the target and produce a speculative call.
|
|
33
|
|
34 This call may or may not survive through IPA optimization based on decision
|
|
35 of inliner.
|
|
36 - Finally we propagate the following flags: unlikely executed, executed
|
|
37 once, executed at startup and executed at exit. These flags are used to
|
|
38 control code size/performance threshold and code placement (by producing
|
|
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
|
|
40 #include "config.h"
|
|
41 #include "system.h"
|
|
42 #include "coretypes.h"
|
|
43 #include "backend.h"
|
|
44 #include "tree.h"
|
|
45 #include "gimple.h"
|
|
46 #include "predict.h"
|
|
47 #include "alloc-pool.h"
|
|
48 #include "tree-pass.h"
|
|
49 #include "cgraph.h"
|
|
50 #include "data-streamer.h"
|
|
51 #include "gimple-iterator.h"
|
|
52 #include "ipa-utils.h"
|
|
53 #include "profile.h"
|
|
54 #include "params.h"
|
|
55 #include "value-prof.h"
|
|
56 #include "tree-inline.h"
|
|
57 #include "symbol-summary.h"
|
|
58 #include "tree-vrp.h"
|
|
59 #include "ipa-prop.h"
|
|
60 #include "ipa-fnsummary.h"
|
|
61
|
|
62 /* Entry in the histogram. */
|
|
63
|
|
64 struct histogram_entry
|
|
65 {
|
|
66 gcov_type count;
|
|
67 int time;
|
|
68 int size;
|
|
69 };
|
|
70
|
|
71 /* Histogram of profile values.
|
|
72 The histogram is represented as an ordered vector of entries allocated via
|
|
73 histogram_pool. During construction a separate hashtable is kept to lookup
|
|
74 duplicate entries. */
|
|
75
|
|
76 vec<histogram_entry *> histogram;
|
|
77 static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
|
|
78
|
|
79 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
|
|
80
|
|
81 struct histogram_hash : nofree_ptr_hash <histogram_entry>
|
|
82 {
|
|
83 static inline hashval_t hash (const histogram_entry *);
|
|
84 static inline int equal (const histogram_entry *, const histogram_entry *);
|
|
85 };
|
|
86
|
|
87 inline hashval_t
|
|
88 histogram_hash::hash (const histogram_entry *val)
|
|
89 {
|
|
90 return val->count;
|
|
91 }
|
|
92
|
|
93 inline int
|
|
94 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
|
|
95 {
|
|
96 return val->count == val2->count;
|
|
97 }
|
|
98
|
|
99 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
|
|
100 HASHTABLE is the on-side hash kept to avoid duplicates. */
|
|
101
|
|
102 static void
|
|
103 account_time_size (hash_table<histogram_hash> *hashtable,
|
|
104 vec<histogram_entry *> &histogram,
|
|
105 gcov_type count, int time, int size)
|
|
106 {
|
|
107 histogram_entry key = {count, 0, 0};
|
|
108 histogram_entry **val = hashtable->find_slot (&key, INSERT);
|
|
109
|
|
110 if (!*val)
|
|
111 {
|
|
112 *val = histogram_pool.allocate ();
|
|
113 **val = key;
|
|
114 histogram.safe_push (*val);
|
|
115 }
|
|
116 (*val)->time += time;
|
|
117 (*val)->size += size;
|
|
118 }
|
|
119
|
|
120 int
|
|
121 cmp_counts (const void *v1, const void *v2)
|
|
122 {
|
|
123 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
|
|
124 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
|
|
125 if (h1->count < h2->count)
|
|
126 return 1;
|
|
127 if (h1->count > h2->count)
|
|
128 return -1;
|
|
129 return 0;
|
|
130 }
|
|
131
|
|
132 /* Dump HISTOGRAM to FILE. */
|
|
133
|
|
134 static void
|
|
135 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
|
|
136 {
|
|
137 unsigned int i;
|
|
138 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
|
|
139
|
|
140 fprintf (dump_file, "Histogram:\n");
|
|
141 for (i = 0; i < histogram.length (); i++)
|
|
142 {
|
|
143 overall_time += histogram[i]->count * histogram[i]->time;
|
|
144 overall_size += histogram[i]->size;
|
|
145 }
|
|
146 if (!overall_time)
|
|
147 overall_time = 1;
|
|
148 if (!overall_size)
|
|
149 overall_size = 1;
|
|
150 for (i = 0; i < histogram.length (); i++)
|
|
151 {
|
|
152 cumulated_time += histogram[i]->count * histogram[i]->time;
|
|
153 cumulated_size += histogram[i]->size;
|
|
154 fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
|
|
155 (int64_t) histogram[i]->count,
|
|
156 histogram[i]->time,
|
|
157 cumulated_time * 100.0 / overall_time,
|
|
158 histogram[i]->size,
|
|
159 cumulated_size * 100.0 / overall_size);
|
|
160 }
|
|
161 }
|
|
162
|
|
163 /* Collect histogram from CFG profiles. */
|
|
164
|
|
165 static void
|
|
166 ipa_profile_generate_summary (void)
|
|
167 {
|
|
168 struct cgraph_node *node;
|
|
169 gimple_stmt_iterator gsi;
|
|
170 basic_block bb;
|
|
171
|
|
172 hash_table<histogram_hash> hashtable (10);
|
|
173
|
|
174 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
|
131
|
175 if (ENTRY_BLOCK_PTR_FOR_FN (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
|
|
176 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
177 {
|
|
178 int time = 0;
|
|
179 int size = 0;
|
|
180 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
181 {
|
|
182 gimple *stmt = gsi_stmt (gsi);
|
|
183 if (gimple_code (stmt) == GIMPLE_CALL
|
|
184 && !gimple_call_fndecl (stmt))
|
|
185 {
|
|
186 histogram_value h;
|
|
187 h = gimple_histogram_value_of_type
|
|
188 (DECL_STRUCT_FUNCTION (node->decl),
|
|
189 stmt, HIST_TYPE_INDIR_CALL);
|
|
190 /* No need to do sanity check: gimple_ic_transform already
|
|
191 takes away bad histograms. */
|
|
192 if (h)
|
|
193 {
|
|
194 /* counter 0 is target, counter 1 is number of execution we called target,
|
|
195 counter 2 is total number of executions. */
|
|
196 if (h->hvalue.counters[2])
|
|
197 {
|
|
198 struct cgraph_edge * e = node->get_edge (stmt);
|
|
199 if (e && !e->indirect_unknown_callee)
|
|
200 continue;
|
|
201 e->indirect_info->common_target_id
|
|
202 = h->hvalue.counters [0];
|
|
203 e->indirect_info->common_target_probability
|
|
204 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
|
|
205 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
|
|
206 {
|
|
207 if (dump_file)
|
|
208 fprintf (dump_file, "Probability capped to 1\n");
|
|
209 e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
|
|
210 }
|
|
211 }
|
|
212 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
|
|
213 stmt, h);
|
|
214 }
|
|
215 }
|
|
216 time += estimate_num_insns (stmt, &eni_time_weights);
|
|
217 size += estimate_num_insns (stmt, &eni_size_weights);
|
|
218 }
|
|
219 if (bb->count.ipa_p () && bb->count.initialized_p ())
|
|
220 account_time_size (&hashtable, histogram, bb->count.ipa ().to_gcov_type (),
|
|
221 time, size);
|
|
222 }
|
111
|
223 histogram.qsort (cmp_counts);
|
|
224 }
|
|
225
|
|
226 /* Serialize the ipa info for lto. */
|
|
227
|
|
228 static void
|
|
229 ipa_profile_write_summary (void)
|
|
230 {
|
|
231 struct lto_simple_output_block *ob
|
|
232 = lto_create_simple_output_block (LTO_section_ipa_profile);
|
|
233 unsigned int i;
|
|
234
|
|
235 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
|
|
236 for (i = 0; i < histogram.length (); i++)
|
|
237 {
|
|
238 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
|
|
239 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
|
|
240 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
|
|
241 }
|
|
242 lto_destroy_simple_output_block (ob);
|
|
243 }
|
|
244
|
|
245 /* Deserialize the ipa info for lto. */
|
|
246
|
|
247 static void
|
|
248 ipa_profile_read_summary (void)
|
|
249 {
|
|
250 struct lto_file_decl_data ** file_data_vec
|
|
251 = lto_get_file_decl_data ();
|
|
252 struct lto_file_decl_data * file_data;
|
|
253 int j = 0;
|
|
254
|
|
255 hash_table<histogram_hash> hashtable (10);
|
|
256
|
|
257 while ((file_data = file_data_vec[j++]))
|
|
258 {
|
|
259 const char *data;
|
|
260 size_t len;
|
|
261 struct lto_input_block *ib
|
|
262 = lto_create_simple_input_block (file_data,
|
|
263 LTO_section_ipa_profile,
|
|
264 &data, &len);
|
|
265 if (ib)
|
|
266 {
|
|
267 unsigned int num = streamer_read_uhwi (ib);
|
|
268 unsigned int n;
|
|
269 for (n = 0; n < num; n++)
|
|
270 {
|
|
271 gcov_type count = streamer_read_gcov_count (ib);
|
|
272 int time = streamer_read_uhwi (ib);
|
|
273 int size = streamer_read_uhwi (ib);
|
|
274 account_time_size (&hashtable, histogram,
|
|
275 count, time, size);
|
|
276 }
|
|
277 lto_destroy_simple_input_block (file_data,
|
|
278 LTO_section_ipa_profile,
|
|
279 ib, data, len);
|
|
280 }
|
|
281 }
|
|
282 histogram.qsort (cmp_counts);
|
|
283 }
|
|
284
|
|
285 /* Data used by ipa_propagate_frequency. */
|
|
286
|
|
287 struct ipa_propagate_frequency_data
|
|
288 {
|
|
289 cgraph_node *function_symbol;
|
|
290 bool maybe_unlikely_executed;
|
|
291 bool maybe_executed_once;
|
|
292 bool only_called_at_startup;
|
|
293 bool only_called_at_exit;
|
|
294 };
|
|
295
|
|
296 /* Worker for ipa_propagate_frequency_1. */
|
|
297
|
|
298 static bool
|
|
299 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
|
|
300 {
|
|
301 struct ipa_propagate_frequency_data *d;
|
|
302 struct cgraph_edge *edge;
|
|
303
|
|
304 d = (struct ipa_propagate_frequency_data *)data;
|
|
305 for (edge = node->callers;
|
|
306 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
|
|
307 || d->only_called_at_startup || d->only_called_at_exit);
|
|
308 edge = edge->next_caller)
|
|
309 {
|
|
310 if (edge->caller != d->function_symbol)
|
|
311 {
|
|
312 d->only_called_at_startup &= edge->caller->only_called_at_startup;
|
|
313 /* It makes sense to put main() together with the static constructors.
|
|
314 It will be executed for sure, but rest of functions called from
|
|
315 main are definitely not at startup only. */
|
|
316 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
|
|
317 d->only_called_at_startup = 0;
|
|
318 d->only_called_at_exit &= edge->caller->only_called_at_exit;
|
|
319 }
|
|
320
|
|
321 /* When profile feedback is available, do not try to propagate too hard;
|
|
322 counts are already good guide on function frequencies and roundoff
|
|
323 errors can make us to push function into unlikely section even when
|
|
324 it is executed by the train run. Transfer the function only if all
|
|
325 callers are unlikely executed. */
|
|
326 if (profile_info
|
131
|
327 && !(edge->callee->count.ipa () == profile_count::zero ())
|
111
|
328 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
|
|
329 || (edge->caller->global.inlined_to
|
|
330 && edge->caller->global.inlined_to->frequency
|
|
331 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
|
|
332 d->maybe_unlikely_executed = false;
|
131
|
333 if (edge->count.ipa ().initialized_p ()
|
|
334 && !edge->count.ipa ().nonzero_p ())
|
111
|
335 continue;
|
|
336 switch (edge->caller->frequency)
|
|
337 {
|
|
338 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
|
|
339 break;
|
|
340 case NODE_FREQUENCY_EXECUTED_ONCE:
|
131
|
341 {
|
|
342 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
343 fprintf (dump_file, " Called by %s that is executed once\n",
|
|
344 edge->caller->name ());
|
|
345 d->maybe_unlikely_executed = false;
|
|
346 ipa_call_summary *s = ipa_call_summaries->get (edge);
|
|
347 if (s != NULL && s->loop_depth)
|
|
348 {
|
|
349 d->maybe_executed_once = false;
|
|
350 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
351 fprintf (dump_file, " Called in loop\n");
|
|
352 }
|
|
353 break;
|
|
354 }
|
111
|
355 case NODE_FREQUENCY_HOT:
|
|
356 case NODE_FREQUENCY_NORMAL:
|
|
357 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
358 fprintf (dump_file, " Called by %s that is normal or hot\n",
|
|
359 edge->caller->name ());
|
|
360 d->maybe_unlikely_executed = false;
|
|
361 d->maybe_executed_once = false;
|
|
362 break;
|
|
363 }
|
|
364 }
|
|
365 return edge != NULL;
|
|
366 }
|
|
367
|
|
368 /* Return ture if NODE contains hot calls. */
|
|
369
|
|
370 bool
|
|
371 contains_hot_call_p (struct cgraph_node *node)
|
|
372 {
|
|
373 struct cgraph_edge *e;
|
|
374 for (e = node->callees; e; e = e->next_callee)
|
|
375 if (e->maybe_hot_p ())
|
|
376 return true;
|
|
377 else if (!e->inline_failed
|
|
378 && contains_hot_call_p (e->callee))
|
|
379 return true;
|
|
380 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
381 if (e->maybe_hot_p ())
|
|
382 return true;
|
|
383 return false;
|
|
384 }
|
|
385
|
|
386 /* See if the frequency of NODE can be updated based on frequencies of its
|
|
387 callers. */
|
|
388 bool
|
|
389 ipa_propagate_frequency (struct cgraph_node *node)
|
|
390 {
|
|
391 struct ipa_propagate_frequency_data d = {node, true, true, true, true};
|
|
392 bool changed = false;
|
|
393
|
|
394 /* We can not propagate anything useful about externally visible functions
|
|
395 nor about virtuals. */
|
|
396 if (!node->local.local
|
|
397 || node->alias
|
|
398 || (opt_for_fn (node->decl, flag_devirtualize)
|
|
399 && DECL_VIRTUAL_P (node->decl)))
|
|
400 return false;
|
|
401 gcc_assert (node->analyzed);
|
|
402 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
403 fprintf (dump_file, "Processing frequency %s\n", node->name ());
|
|
404
|
|
405 node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
|
|
406 true);
|
|
407
|
|
408 if ((d.only_called_at_startup && !d.only_called_at_exit)
|
|
409 && !node->only_called_at_startup)
|
|
410 {
|
|
411 node->only_called_at_startup = true;
|
|
412 if (dump_file)
|
|
413 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
|
|
414 node->name ());
|
|
415 changed = true;
|
|
416 }
|
|
417 if ((d.only_called_at_exit && !d.only_called_at_startup)
|
|
418 && !node->only_called_at_exit)
|
|
419 {
|
|
420 node->only_called_at_exit = true;
|
|
421 if (dump_file)
|
|
422 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
|
|
423 node->name ());
|
|
424 changed = true;
|
|
425 }
|
|
426
|
|
427 /* With profile we can decide on hot/normal based on count. */
|
131
|
428 if (node->count. ipa().initialized_p ())
|
111
|
429 {
|
|
430 bool hot = false;
|
131
|
431 if (!(node->count. ipa() == profile_count::zero ())
|
|
432 && node->count. ipa() >= get_hot_bb_threshold ())
|
111
|
433 hot = true;
|
|
434 if (!hot)
|
|
435 hot |= contains_hot_call_p (node);
|
|
436 if (hot)
|
|
437 {
|
|
438 if (node->frequency != NODE_FREQUENCY_HOT)
|
|
439 {
|
|
440 if (dump_file)
|
|
441 fprintf (dump_file, "Node %s promoted to hot.\n",
|
|
442 node->name ());
|
|
443 node->frequency = NODE_FREQUENCY_HOT;
|
|
444 return true;
|
|
445 }
|
|
446 return false;
|
|
447 }
|
|
448 else if (node->frequency == NODE_FREQUENCY_HOT)
|
|
449 {
|
|
450 if (dump_file)
|
|
451 fprintf (dump_file, "Node %s reduced to normal.\n",
|
|
452 node->name ());
|
|
453 node->frequency = NODE_FREQUENCY_NORMAL;
|
|
454 changed = true;
|
|
455 }
|
|
456 }
|
|
457 /* These come either from profile or user hints; never update them. */
|
|
458 if (node->frequency == NODE_FREQUENCY_HOT
|
|
459 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
|
|
460 return changed;
|
|
461 if (d.maybe_unlikely_executed)
|
|
462 {
|
|
463 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
|
|
464 if (dump_file)
|
|
465 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
|
|
466 node->name ());
|
|
467 changed = true;
|
|
468 }
|
|
469 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
|
|
470 {
|
|
471 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
|
|
472 if (dump_file)
|
|
473 fprintf (dump_file, "Node %s promoted to executed once.\n",
|
|
474 node->name ());
|
|
475 changed = true;
|
|
476 }
|
|
477 return changed;
|
|
478 }
|
|
479
|
|
480 /* Simple ipa profile pass propagating frequencies across the callgraph. */
|
|
481
|
|
482 static unsigned int
|
|
483 ipa_profile (void)
|
|
484 {
|
|
485 struct cgraph_node **order;
|
|
486 struct cgraph_edge *e;
|
|
487 int order_pos;
|
|
488 bool something_changed = false;
|
|
489 int i;
|
|
490 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
|
|
491 struct cgraph_node *n,*n2;
|
|
492 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
|
|
493 int nmismatch = 0, nimpossible = 0;
|
|
494 bool node_map_initialized = false;
|
|
495
|
|
496 if (dump_file)
|
|
497 dump_histogram (dump_file, histogram);
|
|
498 for (i = 0; i < (int)histogram.length (); i++)
|
|
499 {
|
|
500 overall_time += histogram[i]->count * histogram[i]->time;
|
|
501 overall_size += histogram[i]->size;
|
|
502 }
|
|
503 if (overall_time)
|
|
504 {
|
|
505 gcov_type threshold;
|
|
506
|
|
507 gcc_assert (overall_size);
|
|
508
|
|
509 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
|
|
510 threshold = 0;
|
|
511 for (i = 0; cumulated < cutoff; i++)
|
|
512 {
|
|
513 cumulated += histogram[i]->count * histogram[i]->time;
|
|
514 threshold = histogram[i]->count;
|
|
515 }
|
|
516 if (!threshold)
|
|
517 threshold = 1;
|
|
518 if (dump_file)
|
|
519 {
|
|
520 gcov_type cumulated_time = 0, cumulated_size = 0;
|
|
521
|
|
522 for (i = 0;
|
|
523 i < (int)histogram.length () && histogram[i]->count >= threshold;
|
|
524 i++)
|
|
525 {
|
|
526 cumulated_time += histogram[i]->count * histogram[i]->time;
|
|
527 cumulated_size += histogram[i]->size;
|
|
528 }
|
|
529 fprintf (dump_file, "Determined min count: %" PRId64
|
|
530 " Time:%3.2f%% Size:%3.2f%%\n",
|
|
531 (int64_t)threshold,
|
|
532 cumulated_time * 100.0 / overall_time,
|
|
533 cumulated_size * 100.0 / overall_size);
|
|
534 }
|
131
|
535
|
111
|
536 if (threshold > get_hot_bb_threshold ()
|
|
537 || in_lto_p)
|
|
538 {
|
|
539 if (dump_file)
|
|
540 fprintf (dump_file, "Threshold updated.\n");
|
|
541 set_hot_bb_threshold (threshold);
|
|
542 }
|
|
543 }
|
|
544 histogram.release ();
|
|
545 histogram_pool.release ();
|
|
546
|
|
547 /* Produce speculative calls: we saved common traget from porfiling into
|
|
548 e->common_target_id. Now, at link time, we can look up corresponding
|
|
549 function node and produce speculative call. */
|
|
550
|
|
551 FOR_EACH_DEFINED_FUNCTION (n)
|
|
552 {
|
|
553 bool update = false;
|
|
554
|
|
555 if (!opt_for_fn (n->decl, flag_ipa_profile))
|
|
556 continue;
|
|
557
|
|
558 for (e = n->indirect_calls; e; e = e->next_callee)
|
|
559 {
|
|
560 if (n->count.initialized_p ())
|
|
561 nindirect++;
|
|
562 if (e->indirect_info->common_target_id)
|
|
563 {
|
|
564 if (!node_map_initialized)
|
|
565 init_node_map (false);
|
|
566 node_map_initialized = true;
|
|
567 ncommon++;
|
|
568 n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
|
|
569 if (n2)
|
|
570 {
|
|
571 if (dump_file)
|
|
572 {
|
|
573 fprintf (dump_file, "Indirect call -> direct call from"
|
|
574 " other module %s => %s, prob %3.2f\n",
|
|
575 n->dump_name (),
|
|
576 n2->dump_name (),
|
|
577 e->indirect_info->common_target_probability
|
|
578 / (float)REG_BR_PROB_BASE);
|
|
579 }
|
|
580 if (e->indirect_info->common_target_probability
|
|
581 < REG_BR_PROB_BASE / 2)
|
|
582 {
|
|
583 nuseless++;
|
|
584 if (dump_file)
|
|
585 fprintf (dump_file,
|
|
586 "Not speculating: probability is too low.\n");
|
|
587 }
|
|
588 else if (!e->maybe_hot_p ())
|
|
589 {
|
|
590 nuseless++;
|
|
591 if (dump_file)
|
|
592 fprintf (dump_file,
|
|
593 "Not speculating: call is cold.\n");
|
|
594 }
|
|
595 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
|
|
596 && n2->can_be_discarded_p ())
|
|
597 {
|
|
598 nuseless++;
|
|
599 if (dump_file)
|
|
600 fprintf (dump_file,
|
|
601 "Not speculating: target is overwritable "
|
|
602 "and can be discarded.\n");
|
|
603 }
|
|
604 else if (ipa_node_params_sum && ipa_edge_args_sum
|
|
605 && (!vec_safe_is_empty
|
|
606 (IPA_NODE_REF (n2)->descriptors))
|
|
607 && ipa_get_param_count (IPA_NODE_REF (n2))
|
|
608 != ipa_get_cs_argument_count (IPA_EDGE_REF (e))
|
|
609 && (ipa_get_param_count (IPA_NODE_REF (n2))
|
|
610 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e))
|
|
611 || !stdarg_p (TREE_TYPE (n2->decl))))
|
|
612 {
|
|
613 nmismatch++;
|
|
614 if (dump_file)
|
|
615 fprintf (dump_file,
|
|
616 "Not speculating: "
|
|
617 "parameter count mistmatch\n");
|
|
618 }
|
|
619 else if (e->indirect_info->polymorphic
|
|
620 && !opt_for_fn (n->decl, flag_devirtualize)
|
|
621 && !possible_polymorphic_call_target_p (e, n2))
|
|
622 {
|
|
623 nimpossible++;
|
|
624 if (dump_file)
|
|
625 fprintf (dump_file,
|
|
626 "Not speculating: "
|
|
627 "function is not in the polymorphic "
|
|
628 "call target list\n");
|
|
629 }
|
|
630 else
|
|
631 {
|
|
632 /* Target may be overwritable, but profile says that
|
|
633 control flow goes to this particular implementation
|
|
634 of N2. Speculate on the local alias to allow inlining.
|
|
635 */
|
|
636 if (!n2->can_be_discarded_p ())
|
|
637 {
|
|
638 cgraph_node *alias;
|
|
639 alias = dyn_cast<cgraph_node *> (n2->noninterposable_alias ());
|
|
640 if (alias)
|
|
641 n2 = alias;
|
|
642 }
|
|
643 nconverted++;
|
|
644 e->make_speculative
|
|
645 (n2,
|
|
646 e->count.apply_probability
|
131
|
647 (e->indirect_info->common_target_probability));
|
111
|
648 update = true;
|
|
649 }
|
|
650 }
|
|
651 else
|
|
652 {
|
|
653 if (dump_file)
|
|
654 fprintf (dump_file, "Function with profile-id %i not found.\n",
|
|
655 e->indirect_info->common_target_id);
|
|
656 nunknown++;
|
|
657 }
|
|
658 }
|
|
659 }
|
|
660 if (update)
|
|
661 ipa_update_overall_fn_summary (n);
|
|
662 }
|
|
663 if (node_map_initialized)
|
|
664 del_node_map ();
|
|
665 if (dump_file && nindirect)
|
|
666 fprintf (dump_file,
|
|
667 "%i indirect calls trained.\n"
|
|
668 "%i (%3.2f%%) have common target.\n"
|
|
669 "%i (%3.2f%%) targets was not found.\n"
|
|
670 "%i (%3.2f%%) targets had parameter count mismatch.\n"
|
|
671 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
|
|
672 "%i (%3.2f%%) speculations seems useless.\n"
|
|
673 "%i (%3.2f%%) speculations produced.\n",
|
|
674 nindirect,
|
|
675 ncommon, ncommon * 100.0 / nindirect,
|
|
676 nunknown, nunknown * 100.0 / nindirect,
|
|
677 nmismatch, nmismatch * 100.0 / nindirect,
|
|
678 nimpossible, nimpossible * 100.0 / nindirect,
|
|
679 nuseless, nuseless * 100.0 / nindirect,
|
|
680 nconverted, nconverted * 100.0 / nindirect);
|
|
681
|
|
682 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
|
|
683 order_pos = ipa_reverse_postorder (order);
|
|
684 for (i = order_pos - 1; i >= 0; i--)
|
|
685 {
|
|
686 if (order[i]->local.local
|
|
687 && opt_for_fn (order[i]->decl, flag_ipa_profile)
|
|
688 && ipa_propagate_frequency (order[i]))
|
|
689 {
|
|
690 for (e = order[i]->callees; e; e = e->next_callee)
|
|
691 if (e->callee->local.local && !e->callee->aux)
|
|
692 {
|
|
693 something_changed = true;
|
|
694 e->callee->aux = (void *)1;
|
|
695 }
|
|
696 }
|
|
697 order[i]->aux = NULL;
|
|
698 }
|
|
699
|
|
700 while (something_changed)
|
|
701 {
|
|
702 something_changed = false;
|
|
703 for (i = order_pos - 1; i >= 0; i--)
|
|
704 {
|
|
705 if (order[i]->aux
|
|
706 && opt_for_fn (order[i]->decl, flag_ipa_profile)
|
|
707 && ipa_propagate_frequency (order[i]))
|
|
708 {
|
|
709 for (e = order[i]->callees; e; e = e->next_callee)
|
|
710 if (e->callee->local.local && !e->callee->aux)
|
|
711 {
|
|
712 something_changed = true;
|
|
713 e->callee->aux = (void *)1;
|
|
714 }
|
|
715 }
|
|
716 order[i]->aux = NULL;
|
|
717 }
|
|
718 }
|
|
719 free (order);
|
|
720 return 0;
|
|
721 }
|
|
722
|
|
723 namespace {
|
|
724
|
|
725 const pass_data pass_data_ipa_profile =
|
|
726 {
|
|
727 IPA_PASS, /* type */
|
|
728 "profile_estimate", /* name */
|
|
729 OPTGROUP_NONE, /* optinfo_flags */
|
|
730 TV_IPA_PROFILE, /* tv_id */
|
|
731 0, /* properties_required */
|
|
732 0, /* properties_provided */
|
|
733 0, /* properties_destroyed */
|
|
734 0, /* todo_flags_start */
|
|
735 0, /* todo_flags_finish */
|
|
736 };
|
|
737
|
|
738 class pass_ipa_profile : public ipa_opt_pass_d
|
|
739 {
|
|
740 public:
|
|
741 pass_ipa_profile (gcc::context *ctxt)
|
|
742 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
|
|
743 ipa_profile_generate_summary, /* generate_summary */
|
|
744 ipa_profile_write_summary, /* write_summary */
|
|
745 ipa_profile_read_summary, /* read_summary */
|
|
746 NULL, /* write_optimization_summary */
|
|
747 NULL, /* read_optimization_summary */
|
|
748 NULL, /* stmt_fixup */
|
|
749 0, /* function_transform_todo_flags_start */
|
|
750 NULL, /* function_transform */
|
|
751 NULL) /* variable_transform */
|
|
752 {}
|
|
753
|
|
754 /* opt_pass methods: */
|
|
755 virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
|
|
756 virtual unsigned int execute (function *) { return ipa_profile (); }
|
|
757
|
|
758 }; // class pass_ipa_profile
|
|
759
|
|
760 } // anon namespace
|
|
761
|
|
762 ipa_opt_pass_d *
|
|
763 make_pass_ipa_profile (gcc::context *ctxt)
|
|
764 {
|
|
765 return new pass_ipa_profile (ctxt);
|
|
766 }
|