111
|
1 /* Basic IPA optimizations based on profile.
|
|
2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
|
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 /* ipa-profile pass implements the following analysis propagating profille
|
|
21 inter-procedurally.
|
|
22
|
|
23 - Count histogram construction. This is a histogram analyzing how much
|
|
24 time is spent executing statements with a given execution count read
|
|
25 from profile feedback. This histogram is complete only with LTO,
|
|
26 otherwise it contains information only about the current unit.
|
|
27
|
|
28 Similar histogram is also estimated by coverage runtime. This histogram
|
|
29 is not dependent on LTO, but it suffers from various defects; first
|
|
30 gcov runtime is not weighting individual basic block by estimated execution
|
|
31 time and second the merging of multiple runs makes assumption that the
|
|
32 histogram distribution did not change. Consequentely histogram constructed
|
|
33 here may be more precise.
|
|
34
|
|
35 The information is used to set hot/cold thresholds.
|
|
36 - Next speculative indirect call resolution is performed: the local
|
|
37 profile pass assigns profile-id to each function and provide us with a
|
|
38 histogram specifying the most common target. We look up the callgraph
|
|
39 node corresponding to the target and produce a speculative call.
|
|
40
|
|
41 This call may or may not survive through IPA optimization based on decision
|
|
42 of inliner.
|
|
43 - Finally we propagate the following flags: unlikely executed, executed
|
|
44 once, executed at startup and executed at exit. These flags are used to
|
|
45 control code size/performance threshold and code placement (by producing
|
|
46 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
|
|
47 #include "config.h"
|
|
48 #include "system.h"
|
|
49 #include "coretypes.h"
|
|
50 #include "backend.h"
|
|
51 #include "tree.h"
|
|
52 #include "gimple.h"
|
|
53 #include "predict.h"
|
|
54 #include "alloc-pool.h"
|
|
55 #include "tree-pass.h"
|
|
56 #include "cgraph.h"
|
|
57 #include "data-streamer.h"
|
|
58 #include "gimple-iterator.h"
|
|
59 #include "ipa-utils.h"
|
|
60 #include "profile.h"
|
|
61 #include "params.h"
|
|
62 #include "value-prof.h"
|
|
63 #include "tree-inline.h"
|
|
64 #include "symbol-summary.h"
|
|
65 #include "tree-vrp.h"
|
|
66 #include "ipa-prop.h"
|
|
67 #include "ipa-fnsummary.h"
|
|
68
|
|
69 /* Entry in the histogram. */
|
|
70
|
|
71 struct histogram_entry
|
|
72 {
|
|
73 gcov_type count;
|
|
74 int time;
|
|
75 int size;
|
|
76 };
|
|
77
|
|
78 /* Histogram of profile values.
|
|
79 The histogram is represented as an ordered vector of entries allocated via
|
|
80 histogram_pool. During construction a separate hashtable is kept to lookup
|
|
81 duplicate entries. */
|
|
82
|
|
83 vec<histogram_entry *> histogram;
|
|
84 static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
|
|
85
|
|
86 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
|
|
87
|
|
88 struct histogram_hash : nofree_ptr_hash <histogram_entry>
|
|
89 {
|
|
90 static inline hashval_t hash (const histogram_entry *);
|
|
91 static inline int equal (const histogram_entry *, const histogram_entry *);
|
|
92 };
|
|
93
|
|
94 inline hashval_t
|
|
95 histogram_hash::hash (const histogram_entry *val)
|
|
96 {
|
|
97 return val->count;
|
|
98 }
|
|
99
|
|
100 inline int
|
|
101 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
|
|
102 {
|
|
103 return val->count == val2->count;
|
|
104 }
|
|
105
|
|
106 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
|
|
107 HASHTABLE is the on-side hash kept to avoid duplicates. */
|
|
108
|
|
109 static void
|
|
110 account_time_size (hash_table<histogram_hash> *hashtable,
|
|
111 vec<histogram_entry *> &histogram,
|
|
112 gcov_type count, int time, int size)
|
|
113 {
|
|
114 histogram_entry key = {count, 0, 0};
|
|
115 histogram_entry **val = hashtable->find_slot (&key, INSERT);
|
|
116
|
|
117 if (!*val)
|
|
118 {
|
|
119 *val = histogram_pool.allocate ();
|
|
120 **val = key;
|
|
121 histogram.safe_push (*val);
|
|
122 }
|
|
123 (*val)->time += time;
|
|
124 (*val)->size += size;
|
|
125 }
|
|
126
|
|
127 int
|
|
128 cmp_counts (const void *v1, const void *v2)
|
|
129 {
|
|
130 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
|
|
131 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
|
|
132 if (h1->count < h2->count)
|
|
133 return 1;
|
|
134 if (h1->count > h2->count)
|
|
135 return -1;
|
|
136 return 0;
|
|
137 }
|
|
138
|
|
139 /* Dump HISTOGRAM to FILE. */
|
|
140
|
|
141 static void
|
|
142 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
|
|
143 {
|
|
144 unsigned int i;
|
|
145 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0, overall_size = 0;
|
|
146
|
|
147 fprintf (dump_file, "Histogram:\n");
|
|
148 for (i = 0; i < histogram.length (); i++)
|
|
149 {
|
|
150 overall_time += histogram[i]->count * histogram[i]->time;
|
|
151 overall_size += histogram[i]->size;
|
|
152 }
|
|
153 if (!overall_time)
|
|
154 overall_time = 1;
|
|
155 if (!overall_size)
|
|
156 overall_size = 1;
|
|
157 for (i = 0; i < histogram.length (); i++)
|
|
158 {
|
|
159 cumulated_time += histogram[i]->count * histogram[i]->time;
|
|
160 cumulated_size += histogram[i]->size;
|
|
161 fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
|
|
162 (int64_t) histogram[i]->count,
|
|
163 histogram[i]->time,
|
|
164 cumulated_time * 100.0 / overall_time,
|
|
165 histogram[i]->size,
|
|
166 cumulated_size * 100.0 / overall_size);
|
|
167 }
|
|
168 }
|
|
169
|
|
170 /* Collect histogram from CFG profiles. */
|
|
171
|
|
172 static void
|
|
173 ipa_profile_generate_summary (void)
|
|
174 {
|
|
175 struct cgraph_node *node;
|
|
176 gimple_stmt_iterator gsi;
|
|
177 basic_block bb;
|
|
178
|
|
179 hash_table<histogram_hash> hashtable (10);
|
|
180
|
|
181 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
|
|
182 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
183 {
|
|
184 int time = 0;
|
|
185 int size = 0;
|
|
186 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
187 {
|
|
188 gimple *stmt = gsi_stmt (gsi);
|
|
189 if (gimple_code (stmt) == GIMPLE_CALL
|
|
190 && !gimple_call_fndecl (stmt))
|
|
191 {
|
|
192 histogram_value h;
|
|
193 h = gimple_histogram_value_of_type
|
|
194 (DECL_STRUCT_FUNCTION (node->decl),
|
|
195 stmt, HIST_TYPE_INDIR_CALL);
|
|
196 /* No need to do sanity check: gimple_ic_transform already
|
|
197 takes away bad histograms. */
|
|
198 if (h)
|
|
199 {
|
|
200 /* counter 0 is target, counter 1 is number of execution we called target,
|
|
201 counter 2 is total number of executions. */
|
|
202 if (h->hvalue.counters[2])
|
|
203 {
|
|
204 struct cgraph_edge * e = node->get_edge (stmt);
|
|
205 if (e && !e->indirect_unknown_callee)
|
|
206 continue;
|
|
207 e->indirect_info->common_target_id
|
|
208 = h->hvalue.counters [0];
|
|
209 e->indirect_info->common_target_probability
|
|
210 = GCOV_COMPUTE_SCALE (h->hvalue.counters [1], h->hvalue.counters [2]);
|
|
211 if (e->indirect_info->common_target_probability > REG_BR_PROB_BASE)
|
|
212 {
|
|
213 if (dump_file)
|
|
214 fprintf (dump_file, "Probability capped to 1\n");
|
|
215 e->indirect_info->common_target_probability = REG_BR_PROB_BASE;
|
|
216 }
|
|
217 }
|
|
218 gimple_remove_histogram_value (DECL_STRUCT_FUNCTION (node->decl),
|
|
219 stmt, h);
|
|
220 }
|
|
221 }
|
|
222 time += estimate_num_insns (stmt, &eni_time_weights);
|
|
223 size += estimate_num_insns (stmt, &eni_size_weights);
|
|
224 }
|
|
225 if (bb->count.initialized_p ())
|
|
226 account_time_size (&hashtable, histogram, bb->count.to_gcov_type (),
|
|
227 time, size);
|
|
228 }
|
|
229 histogram.qsort (cmp_counts);
|
|
230 }
|
|
231
|
|
232 /* Serialize the ipa info for lto. */
|
|
233
|
|
234 static void
|
|
235 ipa_profile_write_summary (void)
|
|
236 {
|
|
237 struct lto_simple_output_block *ob
|
|
238 = lto_create_simple_output_block (LTO_section_ipa_profile);
|
|
239 unsigned int i;
|
|
240
|
|
241 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
|
|
242 for (i = 0; i < histogram.length (); i++)
|
|
243 {
|
|
244 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
|
|
245 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
|
|
246 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
|
|
247 }
|
|
248 lto_destroy_simple_output_block (ob);
|
|
249 }
|
|
250
|
|
251 /* Deserialize the ipa info for lto. */
|
|
252
|
|
253 static void
|
|
254 ipa_profile_read_summary (void)
|
|
255 {
|
|
256 struct lto_file_decl_data ** file_data_vec
|
|
257 = lto_get_file_decl_data ();
|
|
258 struct lto_file_decl_data * file_data;
|
|
259 int j = 0;
|
|
260
|
|
261 hash_table<histogram_hash> hashtable (10);
|
|
262
|
|
263 while ((file_data = file_data_vec[j++]))
|
|
264 {
|
|
265 const char *data;
|
|
266 size_t len;
|
|
267 struct lto_input_block *ib
|
|
268 = lto_create_simple_input_block (file_data,
|
|
269 LTO_section_ipa_profile,
|
|
270 &data, &len);
|
|
271 if (ib)
|
|
272 {
|
|
273 unsigned int num = streamer_read_uhwi (ib);
|
|
274 unsigned int n;
|
|
275 for (n = 0; n < num; n++)
|
|
276 {
|
|
277 gcov_type count = streamer_read_gcov_count (ib);
|
|
278 int time = streamer_read_uhwi (ib);
|
|
279 int size = streamer_read_uhwi (ib);
|
|
280 account_time_size (&hashtable, histogram,
|
|
281 count, time, size);
|
|
282 }
|
|
283 lto_destroy_simple_input_block (file_data,
|
|
284 LTO_section_ipa_profile,
|
|
285 ib, data, len);
|
|
286 }
|
|
287 }
|
|
288 histogram.qsort (cmp_counts);
|
|
289 }
|
|
290
|
|
291 /* Data used by ipa_propagate_frequency. */
|
|
292
|
|
293 struct ipa_propagate_frequency_data
|
|
294 {
|
|
295 cgraph_node *function_symbol;
|
|
296 bool maybe_unlikely_executed;
|
|
297 bool maybe_executed_once;
|
|
298 bool only_called_at_startup;
|
|
299 bool only_called_at_exit;
|
|
300 };
|
|
301
|
|
302 /* Worker for ipa_propagate_frequency_1. */
|
|
303
|
|
304 static bool
|
|
305 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
|
|
306 {
|
|
307 struct ipa_propagate_frequency_data *d;
|
|
308 struct cgraph_edge *edge;
|
|
309
|
|
310 d = (struct ipa_propagate_frequency_data *)data;
|
|
311 for (edge = node->callers;
|
|
312 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
|
|
313 || d->only_called_at_startup || d->only_called_at_exit);
|
|
314 edge = edge->next_caller)
|
|
315 {
|
|
316 if (edge->caller != d->function_symbol)
|
|
317 {
|
|
318 d->only_called_at_startup &= edge->caller->only_called_at_startup;
|
|
319 /* It makes sense to put main() together with the static constructors.
|
|
320 It will be executed for sure, but rest of functions called from
|
|
321 main are definitely not at startup only. */
|
|
322 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
|
|
323 d->only_called_at_startup = 0;
|
|
324 d->only_called_at_exit &= edge->caller->only_called_at_exit;
|
|
325 }
|
|
326
|
|
327 /* When profile feedback is available, do not try to propagate too hard;
|
|
328 counts are already good guide on function frequencies and roundoff
|
|
329 errors can make us to push function into unlikely section even when
|
|
330 it is executed by the train run. Transfer the function only if all
|
|
331 callers are unlikely executed. */
|
|
332 if (profile_info
|
|
333 && edge->callee->count.initialized_p ()
|
|
334 /* Thunks are not profiled. This is more or less implementation
|
|
335 bug. */
|
|
336 && !d->function_symbol->thunk.thunk_p
|
|
337 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
|
|
338 || (edge->caller->global.inlined_to
|
|
339 && edge->caller->global.inlined_to->frequency
|
|
340 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
|
|
341 d->maybe_unlikely_executed = false;
|
|
342 if (!edge->frequency)
|
|
343 continue;
|
|
344 switch (edge->caller->frequency)
|
|
345 {
|
|
346 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
|
|
347 break;
|
|
348 case NODE_FREQUENCY_EXECUTED_ONCE:
|
|
349 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
350 fprintf (dump_file, " Called by %s that is executed once\n",
|
|
351 edge->caller->name ());
|
|
352 d->maybe_unlikely_executed = false;
|
|
353 if (ipa_call_summaries->get (edge)->loop_depth)
|
|
354 {
|
|
355 d->maybe_executed_once = false;
|
|
356 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
357 fprintf (dump_file, " Called in loop\n");
|
|
358 }
|
|
359 break;
|
|
360 case NODE_FREQUENCY_HOT:
|
|
361 case NODE_FREQUENCY_NORMAL:
|
|
362 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
363 fprintf (dump_file, " Called by %s that is normal or hot\n",
|
|
364 edge->caller->name ());
|
|
365 d->maybe_unlikely_executed = false;
|
|
366 d->maybe_executed_once = false;
|
|
367 break;
|
|
368 }
|
|
369 }
|
|
370 return edge != NULL;
|
|
371 }
|
|
372
|
|
373 /* Return ture if NODE contains hot calls. */
|
|
374
|
|
375 bool
|
|
376 contains_hot_call_p (struct cgraph_node *node)
|
|
377 {
|
|
378 struct cgraph_edge *e;
|
|
379 for (e = node->callees; e; e = e->next_callee)
|
|
380 if (e->maybe_hot_p ())
|
|
381 return true;
|
|
382 else if (!e->inline_failed
|
|
383 && contains_hot_call_p (e->callee))
|
|
384 return true;
|
|
385 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
386 if (e->maybe_hot_p ())
|
|
387 return true;
|
|
388 return false;
|
|
389 }
|
|
390
|
|
391 /* See if the frequency of NODE can be updated based on frequencies of its
|
|
392 callers. */
|
|
393 bool
|
|
394 ipa_propagate_frequency (struct cgraph_node *node)
|
|
395 {
|
|
396 struct ipa_propagate_frequency_data d = {node, true, true, true, true};
|
|
397 bool changed = false;
|
|
398
|
|
399 /* We can not propagate anything useful about externally visible functions
|
|
400 nor about virtuals. */
|
|
401 if (!node->local.local
|
|
402 || node->alias
|
|
403 || (opt_for_fn (node->decl, flag_devirtualize)
|
|
404 && DECL_VIRTUAL_P (node->decl)))
|
|
405 return false;
|
|
406 gcc_assert (node->analyzed);
|
|
407 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
408 fprintf (dump_file, "Processing frequency %s\n", node->name ());
|
|
409
|
|
410 node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
|
|
411 true);
|
|
412
|
|
413 if ((d.only_called_at_startup && !d.only_called_at_exit)
|
|
414 && !node->only_called_at_startup)
|
|
415 {
|
|
416 node->only_called_at_startup = true;
|
|
417 if (dump_file)
|
|
418 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
|
|
419 node->name ());
|
|
420 changed = true;
|
|
421 }
|
|
422 if ((d.only_called_at_exit && !d.only_called_at_startup)
|
|
423 && !node->only_called_at_exit)
|
|
424 {
|
|
425 node->only_called_at_exit = true;
|
|
426 if (dump_file)
|
|
427 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
|
|
428 node->name ());
|
|
429 changed = true;
|
|
430 }
|
|
431
|
|
432 /* With profile we can decide on hot/normal based on count. */
|
|
433 if (node->count.initialized_p ())
|
|
434 {
|
|
435 bool hot = false;
|
|
436 if (!(node->count == profile_count::zero ())
|
|
437 && node->count >= get_hot_bb_threshold ())
|
|
438 hot = true;
|
|
439 if (!hot)
|
|
440 hot |= contains_hot_call_p (node);
|
|
441 if (hot)
|
|
442 {
|
|
443 if (node->frequency != NODE_FREQUENCY_HOT)
|
|
444 {
|
|
445 if (dump_file)
|
|
446 fprintf (dump_file, "Node %s promoted to hot.\n",
|
|
447 node->name ());
|
|
448 node->frequency = NODE_FREQUENCY_HOT;
|
|
449 return true;
|
|
450 }
|
|
451 return false;
|
|
452 }
|
|
453 else if (node->frequency == NODE_FREQUENCY_HOT)
|
|
454 {
|
|
455 if (dump_file)
|
|
456 fprintf (dump_file, "Node %s reduced to normal.\n",
|
|
457 node->name ());
|
|
458 node->frequency = NODE_FREQUENCY_NORMAL;
|
|
459 changed = true;
|
|
460 }
|
|
461 }
|
|
462 /* These come either from profile or user hints; never update them. */
|
|
463 if (node->frequency == NODE_FREQUENCY_HOT
|
|
464 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
|
|
465 return changed;
|
|
466 if (d.maybe_unlikely_executed)
|
|
467 {
|
|
468 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
|
|
469 if (dump_file)
|
|
470 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
|
|
471 node->name ());
|
|
472 changed = true;
|
|
473 }
|
|
474 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
|
|
475 {
|
|
476 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
|
|
477 if (dump_file)
|
|
478 fprintf (dump_file, "Node %s promoted to executed once.\n",
|
|
479 node->name ());
|
|
480 changed = true;
|
|
481 }
|
|
482 return changed;
|
|
483 }
|
|
484
|
|
485 /* Simple ipa profile pass propagating frequencies across the callgraph. */
|
|
486
|
|
487 static unsigned int
|
|
488 ipa_profile (void)
|
|
489 {
|
|
490 struct cgraph_node **order;
|
|
491 struct cgraph_edge *e;
|
|
492 int order_pos;
|
|
493 bool something_changed = false;
|
|
494 int i;
|
|
495 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
|
|
496 struct cgraph_node *n,*n2;
|
|
497 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
|
|
498 int nmismatch = 0, nimpossible = 0;
|
|
499 bool node_map_initialized = false;
|
|
500
|
|
501 if (dump_file)
|
|
502 dump_histogram (dump_file, histogram);
|
|
503 for (i = 0; i < (int)histogram.length (); i++)
|
|
504 {
|
|
505 overall_time += histogram[i]->count * histogram[i]->time;
|
|
506 overall_size += histogram[i]->size;
|
|
507 }
|
|
508 if (overall_time)
|
|
509 {
|
|
510 gcov_type threshold;
|
|
511
|
|
512 gcc_assert (overall_size);
|
|
513 if (dump_file)
|
|
514 {
|
|
515 gcov_type min, cumulated_time = 0, cumulated_size = 0;
|
|
516
|
|
517 fprintf (dump_file, "Overall time: %" PRId64"\n",
|
|
518 (int64_t)overall_time);
|
|
519 min = get_hot_bb_threshold ();
|
|
520 for (i = 0; i < (int)histogram.length () && histogram[i]->count >= min;
|
|
521 i++)
|
|
522 {
|
|
523 cumulated_time += histogram[i]->count * histogram[i]->time;
|
|
524 cumulated_size += histogram[i]->size;
|
|
525 }
|
|
526 fprintf (dump_file, "GCOV min count: %" PRId64
|
|
527 " Time:%3.2f%% Size:%3.2f%%\n",
|
|
528 (int64_t)min,
|
|
529 cumulated_time * 100.0 / overall_time,
|
|
530 cumulated_size * 100.0 / overall_size);
|
|
531 }
|
|
532 cutoff = (overall_time * PARAM_VALUE (HOT_BB_COUNT_WS_PERMILLE) + 500) / 1000;
|
|
533 threshold = 0;
|
|
534 for (i = 0; cumulated < cutoff; i++)
|
|
535 {
|
|
536 cumulated += histogram[i]->count * histogram[i]->time;
|
|
537 threshold = histogram[i]->count;
|
|
538 }
|
|
539 if (!threshold)
|
|
540 threshold = 1;
|
|
541 if (dump_file)
|
|
542 {
|
|
543 gcov_type cumulated_time = 0, cumulated_size = 0;
|
|
544
|
|
545 for (i = 0;
|
|
546 i < (int)histogram.length () && histogram[i]->count >= threshold;
|
|
547 i++)
|
|
548 {
|
|
549 cumulated_time += histogram[i]->count * histogram[i]->time;
|
|
550 cumulated_size += histogram[i]->size;
|
|
551 }
|
|
552 fprintf (dump_file, "Determined min count: %" PRId64
|
|
553 " Time:%3.2f%% Size:%3.2f%%\n",
|
|
554 (int64_t)threshold,
|
|
555 cumulated_time * 100.0 / overall_time,
|
|
556 cumulated_size * 100.0 / overall_size);
|
|
557 }
|
|
558 if (threshold > get_hot_bb_threshold ()
|
|
559 || in_lto_p)
|
|
560 {
|
|
561 if (dump_file)
|
|
562 fprintf (dump_file, "Threshold updated.\n");
|
|
563 set_hot_bb_threshold (threshold);
|
|
564 }
|
|
565 }
|
|
566 histogram.release ();
|
|
567 histogram_pool.release ();
|
|
568
|
|
569 /* Produce speculative calls: we saved common traget from porfiling into
|
|
570 e->common_target_id. Now, at link time, we can look up corresponding
|
|
571 function node and produce speculative call. */
|
|
572
|
|
573 FOR_EACH_DEFINED_FUNCTION (n)
|
|
574 {
|
|
575 bool update = false;
|
|
576
|
|
577 if (!opt_for_fn (n->decl, flag_ipa_profile))
|
|
578 continue;
|
|
579
|
|
580 for (e = n->indirect_calls; e; e = e->next_callee)
|
|
581 {
|
|
582 if (n->count.initialized_p ())
|
|
583 nindirect++;
|
|
584 if (e->indirect_info->common_target_id)
|
|
585 {
|
|
586 if (!node_map_initialized)
|
|
587 init_node_map (false);
|
|
588 node_map_initialized = true;
|
|
589 ncommon++;
|
|
590 n2 = find_func_by_profile_id (e->indirect_info->common_target_id);
|
|
591 if (n2)
|
|
592 {
|
|
593 if (dump_file)
|
|
594 {
|
|
595 fprintf (dump_file, "Indirect call -> direct call from"
|
|
596 " other module %s => %s, prob %3.2f\n",
|
|
597 n->dump_name (),
|
|
598 n2->dump_name (),
|
|
599 e->indirect_info->common_target_probability
|
|
600 / (float)REG_BR_PROB_BASE);
|
|
601 }
|
|
602 if (e->indirect_info->common_target_probability
|
|
603 < REG_BR_PROB_BASE / 2)
|
|
604 {
|
|
605 nuseless++;
|
|
606 if (dump_file)
|
|
607 fprintf (dump_file,
|
|
608 "Not speculating: probability is too low.\n");
|
|
609 }
|
|
610 else if (!e->maybe_hot_p ())
|
|
611 {
|
|
612 nuseless++;
|
|
613 if (dump_file)
|
|
614 fprintf (dump_file,
|
|
615 "Not speculating: call is cold.\n");
|
|
616 }
|
|
617 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
|
|
618 && n2->can_be_discarded_p ())
|
|
619 {
|
|
620 nuseless++;
|
|
621 if (dump_file)
|
|
622 fprintf (dump_file,
|
|
623 "Not speculating: target is overwritable "
|
|
624 "and can be discarded.\n");
|
|
625 }
|
|
626 else if (ipa_node_params_sum && ipa_edge_args_sum
|
|
627 && (!vec_safe_is_empty
|
|
628 (IPA_NODE_REF (n2)->descriptors))
|
|
629 && ipa_get_param_count (IPA_NODE_REF (n2))
|
|
630 != ipa_get_cs_argument_count (IPA_EDGE_REF (e))
|
|
631 && (ipa_get_param_count (IPA_NODE_REF (n2))
|
|
632 >= ipa_get_cs_argument_count (IPA_EDGE_REF (e))
|
|
633 || !stdarg_p (TREE_TYPE (n2->decl))))
|
|
634 {
|
|
635 nmismatch++;
|
|
636 if (dump_file)
|
|
637 fprintf (dump_file,
|
|
638 "Not speculating: "
|
|
639 "parameter count mistmatch\n");
|
|
640 }
|
|
641 else if (e->indirect_info->polymorphic
|
|
642 && !opt_for_fn (n->decl, flag_devirtualize)
|
|
643 && !possible_polymorphic_call_target_p (e, n2))
|
|
644 {
|
|
645 nimpossible++;
|
|
646 if (dump_file)
|
|
647 fprintf (dump_file,
|
|
648 "Not speculating: "
|
|
649 "function is not in the polymorphic "
|
|
650 "call target list\n");
|
|
651 }
|
|
652 else
|
|
653 {
|
|
654 /* Target may be overwritable, but profile says that
|
|
655 control flow goes to this particular implementation
|
|
656 of N2. Speculate on the local alias to allow inlining.
|
|
657 */
|
|
658 if (!n2->can_be_discarded_p ())
|
|
659 {
|
|
660 cgraph_node *alias;
|
|
661 alias = dyn_cast<cgraph_node *> (n2->noninterposable_alias ());
|
|
662 if (alias)
|
|
663 n2 = alias;
|
|
664 }
|
|
665 nconverted++;
|
|
666 e->make_speculative
|
|
667 (n2,
|
|
668 e->count.apply_probability
|
|
669 (e->indirect_info->common_target_probability),
|
|
670 apply_scale (e->frequency,
|
|
671 e->indirect_info->common_target_probability));
|
|
672 update = true;
|
|
673 }
|
|
674 }
|
|
675 else
|
|
676 {
|
|
677 if (dump_file)
|
|
678 fprintf (dump_file, "Function with profile-id %i not found.\n",
|
|
679 e->indirect_info->common_target_id);
|
|
680 nunknown++;
|
|
681 }
|
|
682 }
|
|
683 }
|
|
684 if (update)
|
|
685 ipa_update_overall_fn_summary (n);
|
|
686 }
|
|
687 if (node_map_initialized)
|
|
688 del_node_map ();
|
|
689 if (dump_file && nindirect)
|
|
690 fprintf (dump_file,
|
|
691 "%i indirect calls trained.\n"
|
|
692 "%i (%3.2f%%) have common target.\n"
|
|
693 "%i (%3.2f%%) targets was not found.\n"
|
|
694 "%i (%3.2f%%) targets had parameter count mismatch.\n"
|
|
695 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
|
|
696 "%i (%3.2f%%) speculations seems useless.\n"
|
|
697 "%i (%3.2f%%) speculations produced.\n",
|
|
698 nindirect,
|
|
699 ncommon, ncommon * 100.0 / nindirect,
|
|
700 nunknown, nunknown * 100.0 / nindirect,
|
|
701 nmismatch, nmismatch * 100.0 / nindirect,
|
|
702 nimpossible, nimpossible * 100.0 / nindirect,
|
|
703 nuseless, nuseless * 100.0 / nindirect,
|
|
704 nconverted, nconverted * 100.0 / nindirect);
|
|
705
|
|
706 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
|
|
707 order_pos = ipa_reverse_postorder (order);
|
|
708 for (i = order_pos - 1; i >= 0; i--)
|
|
709 {
|
|
710 if (order[i]->local.local
|
|
711 && opt_for_fn (order[i]->decl, flag_ipa_profile)
|
|
712 && ipa_propagate_frequency (order[i]))
|
|
713 {
|
|
714 for (e = order[i]->callees; e; e = e->next_callee)
|
|
715 if (e->callee->local.local && !e->callee->aux)
|
|
716 {
|
|
717 something_changed = true;
|
|
718 e->callee->aux = (void *)1;
|
|
719 }
|
|
720 }
|
|
721 order[i]->aux = NULL;
|
|
722 }
|
|
723
|
|
724 while (something_changed)
|
|
725 {
|
|
726 something_changed = false;
|
|
727 for (i = order_pos - 1; i >= 0; i--)
|
|
728 {
|
|
729 if (order[i]->aux
|
|
730 && opt_for_fn (order[i]->decl, flag_ipa_profile)
|
|
731 && ipa_propagate_frequency (order[i]))
|
|
732 {
|
|
733 for (e = order[i]->callees; e; e = e->next_callee)
|
|
734 if (e->callee->local.local && !e->callee->aux)
|
|
735 {
|
|
736 something_changed = true;
|
|
737 e->callee->aux = (void *)1;
|
|
738 }
|
|
739 }
|
|
740 order[i]->aux = NULL;
|
|
741 }
|
|
742 }
|
|
743 free (order);
|
|
744 return 0;
|
|
745 }
|
|
746
|
|
747 namespace {
|
|
748
|
|
749 const pass_data pass_data_ipa_profile =
|
|
750 {
|
|
751 IPA_PASS, /* type */
|
|
752 "profile_estimate", /* name */
|
|
753 OPTGROUP_NONE, /* optinfo_flags */
|
|
754 TV_IPA_PROFILE, /* tv_id */
|
|
755 0, /* properties_required */
|
|
756 0, /* properties_provided */
|
|
757 0, /* properties_destroyed */
|
|
758 0, /* todo_flags_start */
|
|
759 0, /* todo_flags_finish */
|
|
760 };
|
|
761
|
|
762 class pass_ipa_profile : public ipa_opt_pass_d
|
|
763 {
|
|
764 public:
|
|
765 pass_ipa_profile (gcc::context *ctxt)
|
|
766 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
|
|
767 ipa_profile_generate_summary, /* generate_summary */
|
|
768 ipa_profile_write_summary, /* write_summary */
|
|
769 ipa_profile_read_summary, /* read_summary */
|
|
770 NULL, /* write_optimization_summary */
|
|
771 NULL, /* read_optimization_summary */
|
|
772 NULL, /* stmt_fixup */
|
|
773 0, /* function_transform_todo_flags_start */
|
|
774 NULL, /* function_transform */
|
|
775 NULL) /* variable_transform */
|
|
776 {}
|
|
777
|
|
778 /* opt_pass methods: */
|
|
779 virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
|
|
780 virtual unsigned int execute (function *) { return ipa_profile (); }
|
|
781
|
|
782 }; // class pass_ipa_profile
|
|
783
|
|
784 } // anon namespace
|
|
785
|
|
786 ipa_opt_pass_d *
|
|
787 make_pass_ipa_profile (gcc::context *ctxt)
|
|
788 {
|
|
789 return new pass_ipa_profile (ctxt);
|
|
790 }
|