111
|
1 /* Basic IPA optimizations based on profile.
|
145
|
2 Copyright (C) 2003-2020 Free Software Foundation, Inc.
|
111
|
3
|
|
4 This file is part of GCC.
|
|
5
|
|
6 GCC is free software; you can redistribute it and/or modify it under
|
|
7 the terms of the GNU General Public License as published by the Free
|
|
8 Software Foundation; either version 3, or (at your option) any later
|
|
9 version.
|
|
10
|
|
11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
14 for more details.
|
|
15
|
|
16 You should have received a copy of the GNU General Public License
|
|
17 along with GCC; see the file COPYING3. If not see
|
|
18 <http://www.gnu.org/licenses/>. */
|
|
19
|
|
20 /* ipa-profile pass implements the following analysis propagating profille
|
|
21 inter-procedurally.
|
|
22
|
|
23 - Count histogram construction. This is a histogram analyzing how much
|
|
24 time is spent executing statements with a given execution count read
|
|
25 from profile feedback. This histogram is complete only with LTO,
|
|
26 otherwise it contains information only about the current unit.
|
|
27
|
|
28 The information is used to set hot/cold thresholds.
|
|
29 - Next speculative indirect call resolution is performed: the local
|
|
30 profile pass assigns profile-id to each function and provide us with a
|
|
31 histogram specifying the most common target. We look up the callgraph
|
|
32 node corresponding to the target and produce a speculative call.
|
|
33
|
|
34 This call may or may not survive through IPA optimization based on decision
|
|
35 of inliner.
|
|
36 - Finally we propagate the following flags: unlikely executed, executed
|
|
37 once, executed at startup and executed at exit. These flags are used to
|
|
38 control code size/performance threshold and code placement (by producing
|
|
39 .text.unlikely/.text.hot/.text.startup/.text.exit subsections). */
|
|
40 #include "config.h"
|
|
41 #include "system.h"
|
|
42 #include "coretypes.h"
|
|
43 #include "backend.h"
|
|
44 #include "tree.h"
|
|
45 #include "gimple.h"
|
|
46 #include "predict.h"
|
|
47 #include "alloc-pool.h"
|
|
48 #include "tree-pass.h"
|
|
49 #include "cgraph.h"
|
|
50 #include "data-streamer.h"
|
|
51 #include "gimple-iterator.h"
|
|
52 #include "ipa-utils.h"
|
|
53 #include "profile.h"
|
|
54 #include "value-prof.h"
|
|
55 #include "tree-inline.h"
|
|
56 #include "symbol-summary.h"
|
|
57 #include "tree-vrp.h"
|
|
58 #include "ipa-prop.h"
|
|
59 #include "ipa-fnsummary.h"
|
|
60
|
|
61 /* Entry in the histogram. */
|
|
62
|
|
63 struct histogram_entry
|
|
64 {
|
|
65 gcov_type count;
|
|
66 int time;
|
|
67 int size;
|
|
68 };
|
|
69
|
|
70 /* Histogram of profile values.
|
|
71 The histogram is represented as an ordered vector of entries allocated via
|
|
72 histogram_pool. During construction a separate hashtable is kept to lookup
|
|
73 duplicate entries. */
|
|
74
|
|
75 vec<histogram_entry *> histogram;
|
|
76 static object_allocator<histogram_entry> histogram_pool ("IPA histogram");
|
|
77
|
|
78 /* Hashtable support for storing SSA names hashed by their SSA_NAME_VAR. */
|
|
79
|
|
80 struct histogram_hash : nofree_ptr_hash <histogram_entry>
|
|
81 {
|
|
82 static inline hashval_t hash (const histogram_entry *);
|
|
83 static inline int equal (const histogram_entry *, const histogram_entry *);
|
|
84 };
|
|
85
|
|
86 inline hashval_t
|
|
87 histogram_hash::hash (const histogram_entry *val)
|
|
88 {
|
|
89 return val->count;
|
|
90 }
|
|
91
|
|
92 inline int
|
|
93 histogram_hash::equal (const histogram_entry *val, const histogram_entry *val2)
|
|
94 {
|
|
95 return val->count == val2->count;
|
|
96 }
|
|
97
|
|
98 /* Account TIME and SIZE executed COUNT times into HISTOGRAM.
|
|
99 HASHTABLE is the on-side hash kept to avoid duplicates. */
|
|
100
|
|
101 static void
|
|
102 account_time_size (hash_table<histogram_hash> *hashtable,
|
|
103 vec<histogram_entry *> &histogram,
|
|
104 gcov_type count, int time, int size)
|
|
105 {
|
|
106 histogram_entry key = {count, 0, 0};
|
|
107 histogram_entry **val = hashtable->find_slot (&key, INSERT);
|
|
108
|
|
109 if (!*val)
|
|
110 {
|
|
111 *val = histogram_pool.allocate ();
|
|
112 **val = key;
|
|
113 histogram.safe_push (*val);
|
|
114 }
|
|
115 (*val)->time += time;
|
|
116 (*val)->size += size;
|
|
117 }
|
|
118
|
|
119 int
|
|
120 cmp_counts (const void *v1, const void *v2)
|
|
121 {
|
|
122 const histogram_entry *h1 = *(const histogram_entry * const *)v1;
|
|
123 const histogram_entry *h2 = *(const histogram_entry * const *)v2;
|
|
124 if (h1->count < h2->count)
|
|
125 return 1;
|
|
126 if (h1->count > h2->count)
|
|
127 return -1;
|
|
128 return 0;
|
|
129 }
|
|
130
|
|
131 /* Dump HISTOGRAM to FILE. */
|
|
132
|
|
133 static void
|
|
134 dump_histogram (FILE *file, vec<histogram_entry *> histogram)
|
|
135 {
|
|
136 unsigned int i;
|
145
|
137 gcov_type overall_time = 0, cumulated_time = 0, cumulated_size = 0,
|
|
138 overall_size = 0;
|
111
|
139
|
|
140 fprintf (dump_file, "Histogram:\n");
|
|
141 for (i = 0; i < histogram.length (); i++)
|
|
142 {
|
|
143 overall_time += histogram[i]->count * histogram[i]->time;
|
|
144 overall_size += histogram[i]->size;
|
|
145 }
|
|
146 if (!overall_time)
|
|
147 overall_time = 1;
|
|
148 if (!overall_size)
|
|
149 overall_size = 1;
|
|
150 for (i = 0; i < histogram.length (); i++)
|
|
151 {
|
|
152 cumulated_time += histogram[i]->count * histogram[i]->time;
|
|
153 cumulated_size += histogram[i]->size;
|
|
154 fprintf (file, " %" PRId64": time:%i (%2.2f) size:%i (%2.2f)\n",
|
|
155 (int64_t) histogram[i]->count,
|
|
156 histogram[i]->time,
|
|
157 cumulated_time * 100.0 / overall_time,
|
|
158 histogram[i]->size,
|
|
159 cumulated_size * 100.0 / overall_size);
|
|
160 }
|
|
161 }
|
|
162
|
145
|
163 /* Structure containing speculative target information from profile. */
|
|
164
|
|
165 struct speculative_call_target
|
|
166 {
|
|
167 speculative_call_target (unsigned int id = 0, int prob = 0)
|
|
168 : target_id (id), target_probability (prob)
|
|
169 {
|
|
170 }
|
|
171
|
|
172 /* Profile_id of target obtained from profile. */
|
|
173 unsigned int target_id;
|
|
174 /* Probability that call will land in function with target_id. */
|
|
175 unsigned int target_probability;
|
|
176 };
|
|
177
|
|
178 class speculative_call_summary
|
|
179 {
|
|
180 public:
|
|
181 speculative_call_summary () : speculative_call_targets ()
|
|
182 {}
|
|
183
|
|
184 auto_vec<speculative_call_target> speculative_call_targets;
|
|
185
|
|
186 void dump (FILE *f);
|
|
187
|
|
188 };
|
|
189
|
|
190 /* Class to manage call summaries. */
|
|
191
|
|
192 class ipa_profile_call_summaries
|
|
193 : public call_summary<speculative_call_summary *>
|
|
194 {
|
|
195 public:
|
|
196 ipa_profile_call_summaries (symbol_table *table)
|
|
197 : call_summary<speculative_call_summary *> (table)
|
|
198 {}
|
|
199
|
|
200 /* Duplicate info when an edge is cloned. */
|
|
201 virtual void duplicate (cgraph_edge *, cgraph_edge *,
|
|
202 speculative_call_summary *old_sum,
|
|
203 speculative_call_summary *new_sum);
|
|
204 };
|
|
205
|
|
206 static ipa_profile_call_summaries *call_sums = NULL;
|
|
207
|
|
208 /* Dump all information in speculative call summary to F. */
|
|
209
|
|
210 void
|
|
211 speculative_call_summary::dump (FILE *f)
|
|
212 {
|
|
213 cgraph_node *n2;
|
|
214
|
|
215 unsigned spec_count = speculative_call_targets.length ();
|
|
216 for (unsigned i = 0; i < spec_count; i++)
|
|
217 {
|
|
218 speculative_call_target item = speculative_call_targets[i];
|
|
219 n2 = find_func_by_profile_id (item.target_id);
|
|
220 if (n2)
|
|
221 fprintf (f, " The %i speculative target is %s with prob %3.2f\n", i,
|
|
222 n2->dump_name (),
|
|
223 item.target_probability / (float) REG_BR_PROB_BASE);
|
|
224 else
|
|
225 fprintf (f, " The %i speculative target is %u with prob %3.2f\n", i,
|
|
226 item.target_id,
|
|
227 item.target_probability / (float) REG_BR_PROB_BASE);
|
|
228 }
|
|
229 }
|
|
230
|
|
231 /* Duplicate info when an edge is cloned. */
|
|
232
|
|
233 void
|
|
234 ipa_profile_call_summaries::duplicate (cgraph_edge *, cgraph_edge *,
|
|
235 speculative_call_summary *old_sum,
|
|
236 speculative_call_summary *new_sum)
|
|
237 {
|
|
238 if (!old_sum)
|
|
239 return;
|
|
240
|
|
241 unsigned old_count = old_sum->speculative_call_targets.length ();
|
|
242 if (!old_count)
|
|
243 return;
|
|
244
|
|
245 new_sum->speculative_call_targets.reserve_exact (old_count);
|
|
246 new_sum->speculative_call_targets.quick_grow_cleared (old_count);
|
|
247
|
|
248 for (unsigned i = 0; i < old_count; i++)
|
|
249 {
|
|
250 new_sum->speculative_call_targets[i]
|
|
251 = old_sum->speculative_call_targets[i];
|
|
252 }
|
|
253 }
|
|
254
|
|
255 /* Collect histogram and speculative target summaries from CFG profiles. */
|
111
|
256
|
|
257 static void
|
|
258 ipa_profile_generate_summary (void)
|
|
259 {
|
|
260 struct cgraph_node *node;
|
|
261 gimple_stmt_iterator gsi;
|
|
262 basic_block bb;
|
|
263
|
|
264 hash_table<histogram_hash> hashtable (10);
|
145
|
265
|
|
266 gcc_checking_assert (!call_sums);
|
|
267 call_sums = new ipa_profile_call_summaries (symtab);
|
|
268
|
111
|
269 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
|
145
|
270 if (ENTRY_BLOCK_PTR_FOR_FN
|
|
271 (DECL_STRUCT_FUNCTION (node->decl))->count.ipa_p ())
|
131
|
272 FOR_EACH_BB_FN (bb, DECL_STRUCT_FUNCTION (node->decl))
|
|
273 {
|
|
274 int time = 0;
|
|
275 int size = 0;
|
|
276 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
|
|
277 {
|
|
278 gimple *stmt = gsi_stmt (gsi);
|
|
279 if (gimple_code (stmt) == GIMPLE_CALL
|
|
280 && !gimple_call_fndecl (stmt))
|
|
281 {
|
|
282 histogram_value h;
|
|
283 h = gimple_histogram_value_of_type
|
|
284 (DECL_STRUCT_FUNCTION (node->decl),
|
|
285 stmt, HIST_TYPE_INDIR_CALL);
|
|
286 /* No need to do sanity check: gimple_ic_transform already
|
|
287 takes away bad histograms. */
|
|
288 if (h)
|
|
289 {
|
145
|
290 gcov_type val, count, all;
|
|
291 struct cgraph_edge *e = node->get_edge (stmt);
|
|
292 if (e && !e->indirect_unknown_callee)
|
|
293 continue;
|
|
294
|
|
295 speculative_call_summary *csum
|
|
296 = call_sums->get_create (e);
|
|
297
|
|
298 for (unsigned j = 0; j < GCOV_TOPN_VALUES; j++)
|
131
|
299 {
|
145
|
300 if (!get_nth_most_common_value (NULL, "indirect call",
|
|
301 h, &val, &count, &all,
|
|
302 j))
|
131
|
303 continue;
|
145
|
304
|
|
305 if (val == 0 || count == 0)
|
|
306 continue;
|
|
307
|
|
308 if (count > all)
|
131
|
309 {
|
|
310 if (dump_file)
|
145
|
311 fprintf (dump_file,
|
|
312 "Probability capped to 1\n");
|
|
313 count = all;
|
131
|
314 }
|
145
|
315 speculative_call_target item (
|
|
316 val, GCOV_COMPUTE_SCALE (count, all));
|
|
317 csum->speculative_call_targets.safe_push (item);
|
131
|
318 }
|
145
|
319
|
|
320 gimple_remove_histogram_value
|
|
321 (DECL_STRUCT_FUNCTION (node->decl), stmt, h);
|
131
|
322 }
|
|
323 }
|
|
324 time += estimate_num_insns (stmt, &eni_time_weights);
|
|
325 size += estimate_num_insns (stmt, &eni_size_weights);
|
|
326 }
|
|
327 if (bb->count.ipa_p () && bb->count.initialized_p ())
|
145
|
328 account_time_size (&hashtable, histogram,
|
|
329 bb->count.ipa ().to_gcov_type (),
|
131
|
330 time, size);
|
|
331 }
|
111
|
332 histogram.qsort (cmp_counts);
|
|
333 }
|
|
334
|
145
|
335 /* Serialize the speculative summary info for LTO. */
|
|
336
|
|
337 static void
|
|
338 ipa_profile_write_edge_summary (lto_simple_output_block *ob,
|
|
339 speculative_call_summary *csum)
|
|
340 {
|
|
341 unsigned len = 0;
|
|
342
|
|
343 len = csum->speculative_call_targets.length ();
|
|
344
|
|
345 gcc_assert (len <= GCOV_TOPN_VALUES);
|
|
346
|
|
347 streamer_write_hwi_stream (ob->main_stream, len);
|
|
348
|
|
349 if (len)
|
|
350 {
|
|
351 unsigned spec_count = csum->speculative_call_targets.length ();
|
|
352 for (unsigned i = 0; i < spec_count; i++)
|
|
353 {
|
|
354 speculative_call_target item = csum->speculative_call_targets[i];
|
|
355 gcc_assert (item.target_id);
|
|
356 streamer_write_hwi_stream (ob->main_stream, item.target_id);
|
|
357 streamer_write_hwi_stream (ob->main_stream, item.target_probability);
|
|
358 }
|
|
359 }
|
|
360 }
|
|
361
|
111
|
362 /* Serialize the ipa info for lto. */
|
|
363
|
|
364 static void
|
|
365 ipa_profile_write_summary (void)
|
|
366 {
|
|
367 struct lto_simple_output_block *ob
|
|
368 = lto_create_simple_output_block (LTO_section_ipa_profile);
|
|
369 unsigned int i;
|
|
370
|
|
371 streamer_write_uhwi_stream (ob->main_stream, histogram.length ());
|
|
372 for (i = 0; i < histogram.length (); i++)
|
|
373 {
|
|
374 streamer_write_gcov_count_stream (ob->main_stream, histogram[i]->count);
|
|
375 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->time);
|
|
376 streamer_write_uhwi_stream (ob->main_stream, histogram[i]->size);
|
|
377 }
|
145
|
378
|
|
379 if (!call_sums)
|
|
380 return;
|
|
381
|
|
382 /* Serialize speculative targets information. */
|
|
383 unsigned int count = 0;
|
|
384 lto_symtab_encoder_t encoder = ob->decl_state->symtab_node_encoder;
|
|
385 lto_symtab_encoder_iterator lsei;
|
|
386 cgraph_node *node;
|
|
387
|
|
388 for (lsei = lsei_start_function_in_partition (encoder); !lsei_end_p (lsei);
|
|
389 lsei_next_function_in_partition (&lsei))
|
|
390 {
|
|
391 node = lsei_cgraph_node (lsei);
|
|
392 if (node->definition && node->has_gimple_body_p ()
|
|
393 && node->indirect_calls)
|
|
394 count++;
|
|
395 }
|
|
396
|
|
397 streamer_write_uhwi_stream (ob->main_stream, count);
|
|
398
|
|
399 /* Process all of the functions. */
|
|
400 for (lsei = lsei_start_function_in_partition (encoder);
|
|
401 !lsei_end_p (lsei) && count; lsei_next_function_in_partition (&lsei))
|
|
402 {
|
|
403 cgraph_node *node = lsei_cgraph_node (lsei);
|
|
404 if (node->definition && node->has_gimple_body_p ()
|
|
405 && node->indirect_calls)
|
|
406 {
|
|
407 int node_ref = lto_symtab_encoder_encode (encoder, node);
|
|
408 streamer_write_uhwi_stream (ob->main_stream, node_ref);
|
|
409
|
|
410 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
|
|
411 {
|
|
412 speculative_call_summary *csum = call_sums->get_create (e);
|
|
413 ipa_profile_write_edge_summary (ob, csum);
|
|
414 }
|
|
415 }
|
|
416 }
|
|
417
|
111
|
418 lto_destroy_simple_output_block (ob);
|
|
419 }
|
|
420
|
145
|
421 /* Dump all profile summary data for all cgraph nodes and edges to file F. */
|
|
422
|
|
423 static void
|
|
424 ipa_profile_dump_all_summaries (FILE *f)
|
|
425 {
|
|
426 fprintf (dump_file,
|
|
427 "\n========== IPA-profile speculative targets: ==========\n");
|
|
428 cgraph_node *node;
|
|
429 FOR_EACH_FUNCTION_WITH_GIMPLE_BODY (node)
|
|
430 {
|
|
431 fprintf (f, "\nSummary for node %s:\n", node->dump_name ());
|
|
432 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
|
|
433 {
|
|
434 fprintf (f, " Summary for %s of indirect edge %d:\n",
|
|
435 e->caller->dump_name (), e->lto_stmt_uid);
|
|
436 speculative_call_summary *csum = call_sums->get_create (e);
|
|
437 csum->dump (f);
|
|
438 }
|
|
439 }
|
|
440 fprintf (f, "\n\n");
|
|
441 }
|
|
442
|
|
443 /* Read speculative targets information about edge for LTO WPA. */
|
|
444
|
|
445 static void
|
|
446 ipa_profile_read_edge_summary (class lto_input_block *ib, cgraph_edge *edge)
|
|
447 {
|
|
448 unsigned i, len;
|
|
449
|
|
450 len = streamer_read_hwi (ib);
|
|
451 gcc_assert (len <= GCOV_TOPN_VALUES);
|
|
452
|
|
453 speculative_call_summary *csum = call_sums->get_create (edge);
|
|
454
|
|
455 for (i = 0; i < len; i++)
|
|
456 {
|
|
457 unsigned int target_id = streamer_read_hwi (ib);
|
|
458 int target_probability = streamer_read_hwi (ib);
|
|
459 speculative_call_target item (target_id, target_probability);
|
|
460 csum->speculative_call_targets.safe_push (item);
|
|
461 }
|
|
462 }
|
|
463
|
|
464 /* Read profile speculative targets section information for LTO WPA. */
|
|
465
|
|
466 static void
|
|
467 ipa_profile_read_summary_section (struct lto_file_decl_data *file_data,
|
|
468 class lto_input_block *ib)
|
|
469 {
|
|
470 if (!ib)
|
|
471 return;
|
|
472
|
|
473 lto_symtab_encoder_t encoder = file_data->symtab_node_encoder;
|
|
474
|
|
475 unsigned int count = streamer_read_uhwi (ib);
|
|
476
|
|
477 unsigned int i;
|
|
478 unsigned int index;
|
|
479 cgraph_node * node;
|
|
480
|
|
481 for (i = 0; i < count; i++)
|
|
482 {
|
|
483 index = streamer_read_uhwi (ib);
|
|
484 encoder = file_data->symtab_node_encoder;
|
|
485 node
|
|
486 = dyn_cast<cgraph_node *> (lto_symtab_encoder_deref (encoder, index));
|
|
487
|
|
488 for (cgraph_edge *e = node->indirect_calls; e; e = e->next_callee)
|
|
489 ipa_profile_read_edge_summary (ib, e);
|
|
490 }
|
|
491 }
|
|
492
|
|
493 /* Deserialize the IPA histogram and speculative targets summary info for LTO.
|
|
494 */
|
111
|
495
|
|
496 static void
|
|
497 ipa_profile_read_summary (void)
|
|
498 {
|
|
499 struct lto_file_decl_data ** file_data_vec
|
|
500 = lto_get_file_decl_data ();
|
|
501 struct lto_file_decl_data * file_data;
|
|
502 int j = 0;
|
|
503
|
|
504 hash_table<histogram_hash> hashtable (10);
|
|
505
|
145
|
506 gcc_checking_assert (!call_sums);
|
|
507 call_sums = new ipa_profile_call_summaries (symtab);
|
|
508
|
111
|
509 while ((file_data = file_data_vec[j++]))
|
|
510 {
|
|
511 const char *data;
|
|
512 size_t len;
|
145
|
513 class lto_input_block *ib
|
111
|
514 = lto_create_simple_input_block (file_data,
|
|
515 LTO_section_ipa_profile,
|
|
516 &data, &len);
|
|
517 if (ib)
|
|
518 {
|
|
519 unsigned int num = streamer_read_uhwi (ib);
|
|
520 unsigned int n;
|
|
521 for (n = 0; n < num; n++)
|
|
522 {
|
|
523 gcov_type count = streamer_read_gcov_count (ib);
|
|
524 int time = streamer_read_uhwi (ib);
|
|
525 int size = streamer_read_uhwi (ib);
|
|
526 account_time_size (&hashtable, histogram,
|
|
527 count, time, size);
|
|
528 }
|
145
|
529
|
|
530 ipa_profile_read_summary_section (file_data, ib);
|
|
531
|
111
|
532 lto_destroy_simple_input_block (file_data,
|
|
533 LTO_section_ipa_profile,
|
|
534 ib, data, len);
|
|
535 }
|
|
536 }
|
|
537 histogram.qsort (cmp_counts);
|
|
538 }
|
|
539
|
|
540 /* Data used by ipa_propagate_frequency. */
|
|
541
|
|
542 struct ipa_propagate_frequency_data
|
|
543 {
|
|
544 cgraph_node *function_symbol;
|
|
545 bool maybe_unlikely_executed;
|
|
546 bool maybe_executed_once;
|
|
547 bool only_called_at_startup;
|
|
548 bool only_called_at_exit;
|
|
549 };
|
|
550
|
|
551 /* Worker for ipa_propagate_frequency_1. */
|
|
552
|
|
553 static bool
|
|
554 ipa_propagate_frequency_1 (struct cgraph_node *node, void *data)
|
|
555 {
|
|
556 struct ipa_propagate_frequency_data *d;
|
|
557 struct cgraph_edge *edge;
|
|
558
|
|
559 d = (struct ipa_propagate_frequency_data *)data;
|
|
560 for (edge = node->callers;
|
|
561 edge && (d->maybe_unlikely_executed || d->maybe_executed_once
|
|
562 || d->only_called_at_startup || d->only_called_at_exit);
|
|
563 edge = edge->next_caller)
|
|
564 {
|
|
565 if (edge->caller != d->function_symbol)
|
|
566 {
|
|
567 d->only_called_at_startup &= edge->caller->only_called_at_startup;
|
|
568 /* It makes sense to put main() together with the static constructors.
|
|
569 It will be executed for sure, but rest of functions called from
|
|
570 main are definitely not at startup only. */
|
|
571 if (MAIN_NAME_P (DECL_NAME (edge->caller->decl)))
|
|
572 d->only_called_at_startup = 0;
|
|
573 d->only_called_at_exit &= edge->caller->only_called_at_exit;
|
|
574 }
|
|
575
|
|
576 /* When profile feedback is available, do not try to propagate too hard;
|
|
577 counts are already good guide on function frequencies and roundoff
|
|
578 errors can make us to push function into unlikely section even when
|
|
579 it is executed by the train run. Transfer the function only if all
|
|
580 callers are unlikely executed. */
|
|
581 if (profile_info
|
131
|
582 && !(edge->callee->count.ipa () == profile_count::zero ())
|
111
|
583 && (edge->caller->frequency != NODE_FREQUENCY_UNLIKELY_EXECUTED
|
145
|
584 || (edge->caller->inlined_to
|
|
585 && edge->caller->inlined_to->frequency
|
111
|
586 != NODE_FREQUENCY_UNLIKELY_EXECUTED)))
|
|
587 d->maybe_unlikely_executed = false;
|
131
|
588 if (edge->count.ipa ().initialized_p ()
|
|
589 && !edge->count.ipa ().nonzero_p ())
|
111
|
590 continue;
|
|
591 switch (edge->caller->frequency)
|
|
592 {
|
|
593 case NODE_FREQUENCY_UNLIKELY_EXECUTED:
|
|
594 break;
|
|
595 case NODE_FREQUENCY_EXECUTED_ONCE:
|
131
|
596 {
|
|
597 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
598 fprintf (dump_file, " Called by %s that is executed once\n",
|
145
|
599 edge->caller->dump_name ());
|
131
|
600 d->maybe_unlikely_executed = false;
|
|
601 ipa_call_summary *s = ipa_call_summaries->get (edge);
|
|
602 if (s != NULL && s->loop_depth)
|
|
603 {
|
|
604 d->maybe_executed_once = false;
|
|
605 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
606 fprintf (dump_file, " Called in loop\n");
|
|
607 }
|
|
608 break;
|
|
609 }
|
111
|
610 case NODE_FREQUENCY_HOT:
|
|
611 case NODE_FREQUENCY_NORMAL:
|
|
612 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
613 fprintf (dump_file, " Called by %s that is normal or hot\n",
|
145
|
614 edge->caller->dump_name ());
|
111
|
615 d->maybe_unlikely_executed = false;
|
|
616 d->maybe_executed_once = false;
|
|
617 break;
|
|
618 }
|
|
619 }
|
|
620 return edge != NULL;
|
|
621 }
|
|
622
|
|
623 /* Return ture if NODE contains hot calls. */
|
|
624
|
|
625 bool
|
|
626 contains_hot_call_p (struct cgraph_node *node)
|
|
627 {
|
|
628 struct cgraph_edge *e;
|
|
629 for (e = node->callees; e; e = e->next_callee)
|
|
630 if (e->maybe_hot_p ())
|
|
631 return true;
|
|
632 else if (!e->inline_failed
|
|
633 && contains_hot_call_p (e->callee))
|
|
634 return true;
|
|
635 for (e = node->indirect_calls; e; e = e->next_callee)
|
|
636 if (e->maybe_hot_p ())
|
|
637 return true;
|
|
638 return false;
|
|
639 }
|
|
640
|
|
641 /* See if the frequency of NODE can be updated based on frequencies of its
|
|
642 callers. */
|
|
643 bool
|
|
644 ipa_propagate_frequency (struct cgraph_node *node)
|
|
645 {
|
|
646 struct ipa_propagate_frequency_data d = {node, true, true, true, true};
|
|
647 bool changed = false;
|
|
648
|
145
|
649 /* We cannot propagate anything useful about externally visible functions
|
111
|
650 nor about virtuals. */
|
145
|
651 if (!node->local
|
111
|
652 || node->alias
|
|
653 || (opt_for_fn (node->decl, flag_devirtualize)
|
|
654 && DECL_VIRTUAL_P (node->decl)))
|
|
655 return false;
|
|
656 gcc_assert (node->analyzed);
|
|
657 if (dump_file && (dump_flags & TDF_DETAILS))
|
145
|
658 fprintf (dump_file, "Processing frequency %s\n", node->dump_name ());
|
111
|
659
|
|
660 node->call_for_symbol_and_aliases (ipa_propagate_frequency_1, &d,
|
|
661 true);
|
|
662
|
|
663 if ((d.only_called_at_startup && !d.only_called_at_exit)
|
|
664 && !node->only_called_at_startup)
|
|
665 {
|
|
666 node->only_called_at_startup = true;
|
|
667 if (dump_file)
|
|
668 fprintf (dump_file, "Node %s promoted to only called at startup.\n",
|
145
|
669 node->dump_name ());
|
111
|
670 changed = true;
|
|
671 }
|
|
672 if ((d.only_called_at_exit && !d.only_called_at_startup)
|
|
673 && !node->only_called_at_exit)
|
|
674 {
|
|
675 node->only_called_at_exit = true;
|
|
676 if (dump_file)
|
|
677 fprintf (dump_file, "Node %s promoted to only called at exit.\n",
|
145
|
678 node->dump_name ());
|
111
|
679 changed = true;
|
|
680 }
|
|
681
|
|
682 /* With profile we can decide on hot/normal based on count. */
|
131
|
683 if (node->count. ipa().initialized_p ())
|
111
|
684 {
|
|
685 bool hot = false;
|
131
|
686 if (!(node->count. ipa() == profile_count::zero ())
|
|
687 && node->count. ipa() >= get_hot_bb_threshold ())
|
111
|
688 hot = true;
|
|
689 if (!hot)
|
|
690 hot |= contains_hot_call_p (node);
|
|
691 if (hot)
|
|
692 {
|
|
693 if (node->frequency != NODE_FREQUENCY_HOT)
|
|
694 {
|
|
695 if (dump_file)
|
|
696 fprintf (dump_file, "Node %s promoted to hot.\n",
|
145
|
697 node->dump_name ());
|
111
|
698 node->frequency = NODE_FREQUENCY_HOT;
|
|
699 return true;
|
|
700 }
|
|
701 return false;
|
|
702 }
|
|
703 else if (node->frequency == NODE_FREQUENCY_HOT)
|
|
704 {
|
|
705 if (dump_file)
|
|
706 fprintf (dump_file, "Node %s reduced to normal.\n",
|
145
|
707 node->dump_name ());
|
111
|
708 node->frequency = NODE_FREQUENCY_NORMAL;
|
|
709 changed = true;
|
|
710 }
|
|
711 }
|
|
712 /* These come either from profile or user hints; never update them. */
|
|
713 if (node->frequency == NODE_FREQUENCY_HOT
|
|
714 || node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
|
|
715 return changed;
|
|
716 if (d.maybe_unlikely_executed)
|
|
717 {
|
|
718 node->frequency = NODE_FREQUENCY_UNLIKELY_EXECUTED;
|
|
719 if (dump_file)
|
|
720 fprintf (dump_file, "Node %s promoted to unlikely executed.\n",
|
145
|
721 node->dump_name ());
|
111
|
722 changed = true;
|
|
723 }
|
|
724 else if (d.maybe_executed_once && node->frequency != NODE_FREQUENCY_EXECUTED_ONCE)
|
|
725 {
|
|
726 node->frequency = NODE_FREQUENCY_EXECUTED_ONCE;
|
|
727 if (dump_file)
|
|
728 fprintf (dump_file, "Node %s promoted to executed once.\n",
|
145
|
729 node->dump_name ());
|
111
|
730 changed = true;
|
|
731 }
|
|
732 return changed;
|
|
733 }
|
|
734
|
145
|
735 /* Check that number of arguments of N agrees with E.
|
|
736 Be conservative when summaries are not present. */
|
|
737
|
|
738 static bool
|
|
739 check_argument_count (struct cgraph_node *n, struct cgraph_edge *e)
|
|
740 {
|
|
741 if (!ipa_node_params_sum || !ipa_edge_args_sum)
|
|
742 return true;
|
|
743 class ipa_node_params *info = IPA_NODE_REF (n->function_symbol ());
|
|
744 if (!info)
|
|
745 return true;
|
|
746 ipa_edge_args *e_info = IPA_EDGE_REF (e);
|
|
747 if (!e_info)
|
|
748 return true;
|
|
749 if (ipa_get_param_count (info) != ipa_get_cs_argument_count (e_info)
|
|
750 && (ipa_get_param_count (info) >= ipa_get_cs_argument_count (e_info)
|
|
751 || !stdarg_p (TREE_TYPE (n->decl))))
|
|
752 return false;
|
|
753 return true;
|
|
754 }
|
|
755
|
111
|
756 /* Simple ipa profile pass propagating frequencies across the callgraph. */
|
|
757
|
|
758 static unsigned int
|
|
759 ipa_profile (void)
|
|
760 {
|
|
761 struct cgraph_node **order;
|
|
762 struct cgraph_edge *e;
|
|
763 int order_pos;
|
|
764 bool something_changed = false;
|
|
765 int i;
|
|
766 gcov_type overall_time = 0, cutoff = 0, cumulated = 0, overall_size = 0;
|
|
767 struct cgraph_node *n,*n2;
|
|
768 int nindirect = 0, ncommon = 0, nunknown = 0, nuseless = 0, nconverted = 0;
|
|
769 int nmismatch = 0, nimpossible = 0;
|
|
770 bool node_map_initialized = false;
|
145
|
771 gcov_type threshold;
|
111
|
772
|
|
773 if (dump_file)
|
|
774 dump_histogram (dump_file, histogram);
|
|
775 for (i = 0; i < (int)histogram.length (); i++)
|
|
776 {
|
|
777 overall_time += histogram[i]->count * histogram[i]->time;
|
|
778 overall_size += histogram[i]->size;
|
|
779 }
|
145
|
780 threshold = 0;
|
111
|
781 if (overall_time)
|
|
782 {
|
|
783 gcc_assert (overall_size);
|
|
784
|
145
|
785 cutoff = (overall_time * param_hot_bb_count_ws_permille + 500) / 1000;
|
111
|
786 for (i = 0; cumulated < cutoff; i++)
|
|
787 {
|
|
788 cumulated += histogram[i]->count * histogram[i]->time;
|
|
789 threshold = histogram[i]->count;
|
|
790 }
|
|
791 if (!threshold)
|
|
792 threshold = 1;
|
|
793 if (dump_file)
|
|
794 {
|
|
795 gcov_type cumulated_time = 0, cumulated_size = 0;
|
|
796
|
|
797 for (i = 0;
|
|
798 i < (int)histogram.length () && histogram[i]->count >= threshold;
|
|
799 i++)
|
|
800 {
|
|
801 cumulated_time += histogram[i]->count * histogram[i]->time;
|
|
802 cumulated_size += histogram[i]->size;
|
|
803 }
|
|
804 fprintf (dump_file, "Determined min count: %" PRId64
|
|
805 " Time:%3.2f%% Size:%3.2f%%\n",
|
|
806 (int64_t)threshold,
|
|
807 cumulated_time * 100.0 / overall_time,
|
|
808 cumulated_size * 100.0 / overall_size);
|
|
809 }
|
131
|
810
|
145
|
811 if (in_lto_p)
|
111
|
812 {
|
|
813 if (dump_file)
|
145
|
814 fprintf (dump_file, "Setting hotness threshold in LTO mode.\n");
|
111
|
815 set_hot_bb_threshold (threshold);
|
|
816 }
|
|
817 }
|
|
818 histogram.release ();
|
|
819 histogram_pool.release ();
|
|
820
|
145
|
821 /* Produce speculative calls: we saved common target from profiling into
|
|
822 e->target_id. Now, at link time, we can look up corresponding
|
111
|
823 function node and produce speculative call. */
|
|
824
|
145
|
825 gcc_checking_assert (call_sums);
|
|
826
|
|
827 if (dump_file)
|
|
828 {
|
|
829 if (!node_map_initialized)
|
|
830 init_node_map (false);
|
|
831 node_map_initialized = true;
|
|
832
|
|
833 ipa_profile_dump_all_summaries (dump_file);
|
|
834 }
|
|
835
|
111
|
836 FOR_EACH_DEFINED_FUNCTION (n)
|
|
837 {
|
|
838 bool update = false;
|
|
839
|
|
840 if (!opt_for_fn (n->decl, flag_ipa_profile))
|
|
841 continue;
|
|
842
|
|
843 for (e = n->indirect_calls; e; e = e->next_callee)
|
|
844 {
|
|
845 if (n->count.initialized_p ())
|
|
846 nindirect++;
|
145
|
847
|
|
848 speculative_call_summary *csum = call_sums->get_create (e);
|
|
849 unsigned spec_count = csum->speculative_call_targets.length ();
|
|
850 if (spec_count)
|
111
|
851 {
|
|
852 if (!node_map_initialized)
|
145
|
853 init_node_map (false);
|
111
|
854 node_map_initialized = true;
|
|
855 ncommon++;
|
145
|
856
|
|
857 if (in_lto_p)
|
111
|
858 {
|
|
859 if (dump_file)
|
|
860 {
|
145
|
861 fprintf (dump_file,
|
|
862 "Updating hotness threshold in LTO mode.\n");
|
|
863 fprintf (dump_file, "Updated min count: %" PRId64 "\n",
|
|
864 (int64_t) threshold / spec_count);
|
111
|
865 }
|
145
|
866 set_hot_bb_threshold (threshold / spec_count);
|
|
867 }
|
|
868
|
|
869 unsigned speculative_id = 0;
|
|
870 profile_count orig = e->count;
|
|
871 for (unsigned i = 0; i < spec_count; i++)
|
|
872 {
|
|
873 speculative_call_target item
|
|
874 = csum->speculative_call_targets[i];
|
|
875 n2 = find_func_by_profile_id (item.target_id);
|
|
876 if (n2)
|
111
|
877 {
|
|
878 if (dump_file)
|
145
|
879 {
|
|
880 fprintf (dump_file,
|
|
881 "Indirect call -> direct call from"
|
|
882 " other module %s => %s, prob %3.2f\n",
|
|
883 n->dump_name (),
|
|
884 n2->dump_name (),
|
|
885 item.target_probability
|
|
886 / (float) REG_BR_PROB_BASE);
|
|
887 }
|
|
888 if (item.target_probability
|
|
889 < REG_BR_PROB_BASE / GCOV_TOPN_VALUES / 2)
|
|
890 {
|
|
891 nuseless++;
|
|
892 if (dump_file)
|
|
893 fprintf (dump_file,
|
|
894 "Not speculating: "
|
|
895 "probability is too low.\n");
|
|
896 }
|
|
897 else if (!e->maybe_hot_p ())
|
|
898 {
|
|
899 nuseless++;
|
|
900 if (dump_file)
|
|
901 fprintf (dump_file,
|
|
902 "Not speculating: call is cold.\n");
|
|
903 }
|
|
904 else if (n2->get_availability () <= AVAIL_INTERPOSABLE
|
|
905 && n2->can_be_discarded_p ())
|
|
906 {
|
|
907 nuseless++;
|
|
908 if (dump_file)
|
|
909 fprintf (dump_file,
|
|
910 "Not speculating: target is overwritable "
|
|
911 "and can be discarded.\n");
|
|
912 }
|
|
913 else if (!check_argument_count (n2, e))
|
|
914 {
|
|
915 nmismatch++;
|
|
916 if (dump_file)
|
|
917 fprintf (dump_file,
|
|
918 "Not speculating: "
|
|
919 "parameter count mismatch\n");
|
|
920 }
|
|
921 else if (e->indirect_info->polymorphic
|
|
922 && !opt_for_fn (n->decl, flag_devirtualize)
|
|
923 && !possible_polymorphic_call_target_p (e, n2))
|
|
924 {
|
|
925 nimpossible++;
|
|
926 if (dump_file)
|
|
927 fprintf (dump_file,
|
|
928 "Not speculating: "
|
|
929 "function is not in the polymorphic "
|
|
930 "call target list\n");
|
|
931 }
|
|
932 else
|
|
933 {
|
|
934 /* Target may be overwritable, but profile says that
|
|
935 control flow goes to this particular implementation
|
|
936 of N2. Speculate on the local alias to allow
|
|
937 inlining. */
|
|
938 if (!n2->can_be_discarded_p ())
|
|
939 {
|
|
940 cgraph_node *alias;
|
|
941 alias = dyn_cast<cgraph_node *>
|
|
942 (n2->noninterposable_alias ());
|
|
943 if (alias)
|
|
944 n2 = alias;
|
|
945 }
|
|
946 nconverted++;
|
|
947 profile_probability prob
|
|
948 = profile_probability::from_reg_br_prob_base
|
|
949 (item.target_probability).adjusted ();
|
|
950 e->make_speculative (n2,
|
|
951 orig.apply_probability (prob),
|
|
952 speculative_id);
|
|
953 update = true;
|
|
954 speculative_id++;
|
|
955 }
|
111
|
956 }
|
|
957 else
|
|
958 {
|
145
|
959 if (dump_file)
|
|
960 fprintf (dump_file,
|
|
961 "Function with profile-id %i not found.\n",
|
|
962 item.target_id);
|
|
963 nunknown++;
|
111
|
964 }
|
|
965 }
|
|
966 }
|
145
|
967 }
|
|
968 if (update)
|
|
969 ipa_update_overall_fn_summary (n);
|
|
970 }
|
111
|
971 if (node_map_initialized)
|
|
972 del_node_map ();
|
|
973 if (dump_file && nindirect)
|
|
974 fprintf (dump_file,
|
|
975 "%i indirect calls trained.\n"
|
|
976 "%i (%3.2f%%) have common target.\n"
|
|
977 "%i (%3.2f%%) targets was not found.\n"
|
|
978 "%i (%3.2f%%) targets had parameter count mismatch.\n"
|
|
979 "%i (%3.2f%%) targets was not in polymorphic call target list.\n"
|
|
980 "%i (%3.2f%%) speculations seems useless.\n"
|
|
981 "%i (%3.2f%%) speculations produced.\n",
|
|
982 nindirect,
|
|
983 ncommon, ncommon * 100.0 / nindirect,
|
|
984 nunknown, nunknown * 100.0 / nindirect,
|
|
985 nmismatch, nmismatch * 100.0 / nindirect,
|
|
986 nimpossible, nimpossible * 100.0 / nindirect,
|
|
987 nuseless, nuseless * 100.0 / nindirect,
|
|
988 nconverted, nconverted * 100.0 / nindirect);
|
|
989
|
|
990 order = XCNEWVEC (struct cgraph_node *, symtab->cgraph_count);
|
|
991 order_pos = ipa_reverse_postorder (order);
|
|
992 for (i = order_pos - 1; i >= 0; i--)
|
|
993 {
|
145
|
994 if (order[i]->local
|
111
|
995 && opt_for_fn (order[i]->decl, flag_ipa_profile)
|
|
996 && ipa_propagate_frequency (order[i]))
|
|
997 {
|
|
998 for (e = order[i]->callees; e; e = e->next_callee)
|
145
|
999 if (e->callee->local && !e->callee->aux)
|
111
|
1000 {
|
|
1001 something_changed = true;
|
|
1002 e->callee->aux = (void *)1;
|
|
1003 }
|
|
1004 }
|
|
1005 order[i]->aux = NULL;
|
|
1006 }
|
|
1007
|
|
1008 while (something_changed)
|
|
1009 {
|
|
1010 something_changed = false;
|
|
1011 for (i = order_pos - 1; i >= 0; i--)
|
|
1012 {
|
|
1013 if (order[i]->aux
|
|
1014 && opt_for_fn (order[i]->decl, flag_ipa_profile)
|
|
1015 && ipa_propagate_frequency (order[i]))
|
|
1016 {
|
|
1017 for (e = order[i]->callees; e; e = e->next_callee)
|
145
|
1018 if (e->callee->local && !e->callee->aux)
|
111
|
1019 {
|
|
1020 something_changed = true;
|
|
1021 e->callee->aux = (void *)1;
|
|
1022 }
|
|
1023 }
|
|
1024 order[i]->aux = NULL;
|
|
1025 }
|
|
1026 }
|
|
1027 free (order);
|
145
|
1028
|
|
1029 if (dump_file && (dump_flags & TDF_DETAILS))
|
|
1030 symtab->dump (dump_file);
|
|
1031
|
|
1032 delete call_sums;
|
|
1033 call_sums = NULL;
|
|
1034
|
111
|
1035 return 0;
|
|
1036 }
|
|
1037
|
|
1038 namespace {
|
|
1039
|
|
1040 const pass_data pass_data_ipa_profile =
|
|
1041 {
|
|
1042 IPA_PASS, /* type */
|
|
1043 "profile_estimate", /* name */
|
|
1044 OPTGROUP_NONE, /* optinfo_flags */
|
|
1045 TV_IPA_PROFILE, /* tv_id */
|
|
1046 0, /* properties_required */
|
|
1047 0, /* properties_provided */
|
|
1048 0, /* properties_destroyed */
|
|
1049 0, /* todo_flags_start */
|
|
1050 0, /* todo_flags_finish */
|
|
1051 };
|
|
1052
|
|
1053 class pass_ipa_profile : public ipa_opt_pass_d
|
|
1054 {
|
|
1055 public:
|
|
1056 pass_ipa_profile (gcc::context *ctxt)
|
|
1057 : ipa_opt_pass_d (pass_data_ipa_profile, ctxt,
|
|
1058 ipa_profile_generate_summary, /* generate_summary */
|
|
1059 ipa_profile_write_summary, /* write_summary */
|
|
1060 ipa_profile_read_summary, /* read_summary */
|
|
1061 NULL, /* write_optimization_summary */
|
|
1062 NULL, /* read_optimization_summary */
|
|
1063 NULL, /* stmt_fixup */
|
|
1064 0, /* function_transform_todo_flags_start */
|
|
1065 NULL, /* function_transform */
|
|
1066 NULL) /* variable_transform */
|
|
1067 {}
|
|
1068
|
|
1069 /* opt_pass methods: */
|
|
1070 virtual bool gate (function *) { return flag_ipa_profile || in_lto_p; }
|
|
1071 virtual unsigned int execute (function *) { return ipa_profile (); }
|
|
1072
|
|
1073 }; // class pass_ipa_profile
|
|
1074
|
|
1075 } // anon namespace
|
|
1076
|
|
1077 ipa_opt_pass_d *
|
|
1078 make_pass_ipa_profile (gcc::context *ctxt)
|
|
1079 {
|
|
1080 return new pass_ipa_profile (ctxt);
|
|
1081 }
|