111
|
1 /* FMA steering optimization pass for Cortex-A57.
|
145
|
2 Copyright (C) 2015-2020 Free Software Foundation, Inc.
|
111
|
3 Contributed by ARM Ltd.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but
|
|
13 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15 General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
131
|
21 #define IN_TARGET_CODE 1
|
|
22
|
111
|
23 #include "config.h"
|
|
24 #define INCLUDE_LIST
|
|
25 #include "system.h"
|
|
26 #include "coretypes.h"
|
|
27 #include "backend.h"
|
|
28 #include "target.h"
|
|
29 #include "rtl.h"
|
|
30 #include "df.h"
|
|
31 #include "insn-config.h"
|
|
32 #include "regs.h"
|
|
33 #include "memmodel.h"
|
|
34 #include "emit-rtl.h"
|
|
35 #include "recog.h"
|
|
36 #include "cfganal.h"
|
|
37 #include "insn-attr.h"
|
|
38 #include "context.h"
|
|
39 #include "tree-pass.h"
|
145
|
40 #include "function-abi.h"
|
111
|
41 #include "regrename.h"
|
|
42 #include "aarch64-protos.h"
|
|
43
|
|
44 /* For better performance, the destination of FMADD/FMSUB instructions should
|
|
45 have the same parity as their accumulator register if the accumulator
|
|
46 contains the result of a previous FMUL or FMADD/FMSUB instruction if
|
|
47 targetting Cortex-A57 processors. Performance is also increased by
|
|
48 otherwise keeping a good balance in the parity of the destination register
|
|
49 of FMUL or FMADD/FMSUB.
|
|
50
|
|
51 This pass ensure that registers are renamed so that these conditions hold.
|
|
52 We reuse the existing register renaming facility from regrename.c to build
|
|
53 dependency chains and expose candidate registers for renaming.
|
|
54
|
|
55
|
|
56 The algorithm has three steps:
|
|
57
|
|
58 First, the functions of the register renaming pass are called. These
|
|
59 analyze the instructions and produce a list of def/use chains of
|
|
60 instructions.
|
|
61
|
|
62 Next, this information is used to build trees of multiply and
|
|
63 multiply-accumulate instructions. The roots of these trees are any
|
|
64 multiply, or any multiply-accumulate whose accumulator is not dependent on
|
|
65 a multiply or multiply-accumulate instruction. A child is added to the
|
|
66 tree where a dependency chain exists between the result of the parent
|
|
67 instruction and the accumulator operand of the child, as in the diagram
|
|
68 below:
|
|
69
|
|
70 fmul s2, s0, s1
|
|
71 / \
|
|
72 fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2
|
|
73 |
|
|
74 fmadd s3, s1, s1, s0
|
|
75
|
|
76 Trees made of a single instruction are permitted.
|
|
77
|
|
78 Finally, renaming is performed. The parity of the destination register at
|
|
79 the root of a tree is checked against the current balance of multiply and
|
|
80 multiply-accumulate on each pipeline. If necessary, the root of a tree is
|
|
81 renamed, in which case the rest of the tree is then renamed to keep the same
|
|
82 parity in the destination registers of all instructions in the tree. */
|
|
83
|
|
84
|
|
85
|
|
86 /* Forward declarations. */
|
|
87 class fma_node;
|
|
88 class fma_root_node;
|
|
89 class func_fma_steering;
|
|
90
|
|
91 /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
|
|
92 FMADD/FMSUB instructions form a graph. This is because alternatives can
|
|
93 make a register be set by several FMUL or FMADD/FMSUB instructions in
|
|
94 different basic blocks and because of loops. For ease of browsing, the
|
|
95 connected components of this graph are broken up into forests of trees.
|
|
96 Forests are represented by fma_forest objects, contained in the fma_forests
|
|
97 list. Using a separate object for the forests allows for a better use of
|
|
98 memory as there is some information that is global to each forest, such as
|
|
99 the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
|
|
100 floating-point execution pipelines. */
|
|
101
|
|
102 class fma_forest
|
|
103 {
|
|
104 public:
|
|
105 fma_forest (func_fma_steering *, fma_root_node *, int);
|
|
106 ~fma_forest ();
|
|
107
|
|
108 int get_id ();
|
|
109 std::list<fma_root_node *> *get_roots ();
|
|
110 func_fma_steering *get_globals ();
|
|
111 int get_target_parity ();
|
|
112 void fma_node_created (fma_node *);
|
|
113 void merge_forest (fma_forest *);
|
|
114 void dump_info ();
|
|
115 void dispatch ();
|
|
116
|
|
117 private:
|
145
|
118 /* Prohibit copy construction. */
|
|
119 fma_forest (const fma_forest &);
|
|
120
|
111
|
121 /* The list of roots that form this forest. */
|
|
122 std::list<fma_root_node *> *m_roots;
|
|
123
|
|
124 /* Target parity the destination register of all FMUL and FMADD/FMSUB
|
|
125 instructions in this forest should have. */
|
|
126 int m_target_parity;
|
|
127
|
|
128 /* Link to the instance of func_fma_steering holding data related to the
|
|
129 FMA steering of the current function (cfun). */
|
|
130 func_fma_steering *m_globals;
|
|
131
|
|
132 /* Identifier for the forest (used for dumps). */
|
|
133 int m_id;
|
|
134
|
|
135 /* Total number of nodes in the forest (for statistics). */
|
|
136 int m_nb_nodes;
|
|
137 };
|
|
138
|
|
139 class fma_node
|
|
140 {
|
|
141 public:
|
|
142 fma_node (fma_node *parent, du_chain *chain);
|
|
143 ~fma_node ();
|
|
144
|
|
145 bool root_p ();
|
|
146 fma_forest *get_forest ();
|
|
147 std::list<fma_node *> *get_children ();
|
|
148 rtx_insn *get_insn ();
|
|
149 void add_child (fma_node *);
|
|
150 int get_parity ();
|
|
151 void set_head (du_head *);
|
|
152 void rename (fma_forest *);
|
|
153 void dump_info (fma_forest *);
|
|
154
|
145
|
155 private:
|
|
156 /* Prohibit copy construction. */
|
|
157 fma_node (const fma_node &);
|
|
158
|
111
|
159 protected:
|
|
160 /* Root node that lead to this node. */
|
|
161 fma_root_node *m_root;
|
|
162
|
|
163 /* The parent node of this node. If the node belong to a chain with several
|
|
164 parent nodes, the first one encountered in a depth-first search is chosen
|
|
165 as canonical parent. */
|
|
166 fma_node *m_parent;
|
|
167
|
|
168 /* The list of child nodes. If a chain contains several parent nodes, one is
|
|
169 chosen as canonical parent and the others will have no children. */
|
|
170 std::list<fma_node *> *m_children;
|
|
171
|
|
172 /* The associated DU_HEAD chain that the insn represented by this object
|
|
173 is (one of) the root of. When a chain contains several roots, the non
|
|
174 canonical ones have this field set to NULL. */
|
|
175 struct du_head *m_head;
|
|
176
|
|
177 /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */
|
|
178 rtx_insn *m_insn;
|
|
179 };
|
|
180
|
|
181 class fma_root_node : public fma_node
|
|
182 {
|
|
183 public:
|
|
184 fma_root_node (func_fma_steering *, du_chain *, int);
|
|
185
|
|
186 fma_forest *get_forest ();
|
|
187 void set_forest (fma_forest *);
|
|
188 void dump_info (fma_forest *);
|
|
189
|
|
190 private:
|
|
191 /* The forest this node belonged to when it was created. */
|
|
192 fma_forest *m_forest;
|
|
193 };
|
|
194
|
|
195 /* Class holding all data and methods relative to the FMA steering of a given
|
|
196 function. The FMA steering pass could then run in parallel for different
|
|
197 functions. */
|
|
198
|
|
199 class func_fma_steering
|
|
200 {
|
|
201 public:
|
|
202 func_fma_steering ();
|
|
203 ~func_fma_steering ();
|
|
204
|
|
205 int get_fpu_balance ();
|
|
206 void remove_forest (fma_forest *);
|
|
207 bool put_node (fma_node *);
|
|
208 void update_balance (int);
|
|
209 fma_node *get_fma_node (rtx_insn *);
|
|
210 void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
|
|
211 void execute_fma_steering ();
|
|
212
|
|
213 private:
|
145
|
214 /* Prohibit copy construction. */
|
|
215 func_fma_steering (const func_fma_steering &);
|
|
216
|
111
|
217 void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
|
|
218 void (*) (fma_forest *, fma_node *), bool);
|
|
219 void analyze ();
|
|
220 void rename_fma_trees ();
|
|
221
|
|
222 /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
|
|
223 fma_node object. Used when analyzing an instruction that is a root of
|
|
224 a chain to find if such an object was created because this instruction
|
|
225 is also a use in another chain. */
|
|
226 hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
|
|
227
|
|
228 /* A list of all the forests in a given function. */
|
|
229 std::list<fma_forest *> m_fma_forests;
|
|
230
|
|
231 /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
|
|
232 pipelines:
|
|
233 < 0: more instruction dispatched to the first pipeline
|
|
234 == 0: perfect balance
|
|
235 > 0: more instruction dispatched to the second pipeline. */
|
|
236 int m_fpu_balance;
|
|
237
|
|
238 /* Identifier for the next forest created. */
|
|
239 int m_next_forest_id;
|
|
240 };
|
|
241
|
|
242 /* Rename the register HEAD->regno in all the insns in the chain HEAD to any
|
|
243 register not in the set UNAVAILABLE. Adapted from rename_chains in
|
|
244 regrename.c. */
|
|
245
|
|
246 static bool
|
|
247 rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
|
|
248 {
|
|
249 int best_new_reg;
|
|
250 int n_uses = 0;
|
|
251 struct du_chain *tmp;
|
|
252 int reg = head->regno;
|
|
253 enum reg_class super_class = NO_REGS;
|
|
254
|
|
255 if (head->cannot_rename)
|
|
256 return false;
|
|
257
|
|
258 if (fixed_regs[reg] || global_regs[reg]
|
|
259 || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
|
|
260 return false;
|
|
261
|
|
262 /* Iterate over elements in the chain in order to:
|
|
263 1. Count number of uses, and narrow the set of registers we can
|
|
264 use for renaming.
|
|
265 2. Compute the superunion of register classes in this chain. */
|
|
266 for (tmp = head->first; tmp; tmp = tmp->next_use)
|
|
267 {
|
|
268 if (DEBUG_INSN_P (tmp->insn))
|
|
269 continue;
|
|
270 n_uses++;
|
145
|
271 *unavailable |= ~reg_class_contents[tmp->cl];
|
111
|
272 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
|
|
273 }
|
|
274
|
|
275 if (n_uses < 1)
|
|
276 return false;
|
|
277
|
|
278 best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
|
|
279 false);
|
|
280
|
|
281 if (dump_file)
|
|
282 {
|
|
283 fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
|
|
284 INSN_UID (head->first->insn));
|
145
|
285 if (head->call_abis)
|
111
|
286 fprintf (dump_file, " crosses a call");
|
|
287 }
|
|
288
|
|
289 if (best_new_reg == reg)
|
|
290 {
|
|
291 if (dump_file)
|
|
292 fprintf (dump_file, "; no available better choice\n");
|
|
293 return false;
|
|
294 }
|
|
295
|
|
296 if (regrename_do_replace (head, best_new_reg))
|
|
297 {
|
|
298 if (dump_file)
|
|
299 fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
|
|
300 df_set_regs_ever_live (best_new_reg, true);
|
|
301 }
|
|
302 else
|
|
303 {
|
|
304 if (dump_file)
|
|
305 fprintf (dump_file, ", renaming as %s failed\n",
|
|
306 reg_names[best_new_reg]);
|
|
307 return false;
|
|
308 }
|
|
309 return true;
|
|
310 }
|
|
311
|
|
312 /* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */
|
|
313
|
|
314 static bool
|
|
315 is_fmac_op (enum attr_type t)
|
|
316 {
|
|
317 return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
|
|
318 }
|
|
319
|
|
320 /* Return whether T is the attribute of a FMUL instruction. */
|
|
321
|
|
322 static bool
|
|
323 is_fmul_op (enum attr_type t)
|
|
324 {
|
|
325 return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
|
|
326 }
|
|
327
|
|
328 /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
|
|
329 instruction. */
|
|
330
|
|
331 static bool
|
|
332 is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
|
|
333 {
|
|
334 enum attr_type t;
|
|
335
|
|
336 if (!NONDEBUG_INSN_P (insn))
|
|
337 return false;
|
|
338
|
|
339 if (recog_memoized (insn) < 0)
|
|
340 return false;
|
|
341
|
|
342 /* Only consider chain(s) this instruction is a root of if this is an FMUL or
|
|
343 FMADD/FMSUB instruction. This allows to avoid browsing chains of all
|
|
344 instructions for FMUL or FMADD/FMSUB in them. */
|
|
345 t = get_attr_type (insn);
|
|
346 return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
|
|
347 }
|
|
348
|
|
349
|
|
350 /*
|
|
351 * Class fma_forest method definitions.
|
|
352 */
|
|
353
|
|
354 fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
|
|
355 int id)
|
|
356 {
|
|
357 memset (this, 0, sizeof (*this));
|
|
358 this->m_globals = fma_steer;
|
|
359 this->m_roots = new std::list<fma_root_node *>;
|
|
360 this->m_roots->push_back (fma_root);
|
|
361 this->m_id = id;
|
|
362 }
|
|
363
|
|
364 fma_forest::~fma_forest ()
|
|
365 {
|
|
366 delete this->m_roots;
|
|
367 }
|
|
368
|
|
369 int
|
|
370 fma_forest::get_id ()
|
|
371 {
|
|
372 return this->m_id;
|
|
373 }
|
|
374
|
|
375 std::list<fma_root_node *> *
|
|
376 fma_forest::get_roots ()
|
|
377 {
|
|
378 return this->m_roots;
|
|
379 }
|
|
380
|
|
381 func_fma_steering *
|
|
382 fma_forest::get_globals ()
|
|
383 {
|
|
384 return this->m_globals;
|
|
385 }
|
|
386
|
|
387 int
|
|
388 fma_forest::get_target_parity ()
|
|
389 {
|
|
390 return this->m_target_parity;
|
|
391 }
|
|
392
|
|
393 /* Act on the creation of NODE by updating statistics in FOREST and adding an
|
|
394 entry for it in the func_fma_steering hashmap. */
|
|
395
|
|
396 void fma_forest::fma_node_created (fma_node *node)
|
|
397 {
|
|
398 bool created = !this->m_globals->put_node (node);
|
|
399
|
|
400 gcc_assert (created);
|
|
401 this->m_nb_nodes++;
|
|
402 }
|
|
403
|
|
404 /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
|
|
405 fma_forest object to represent both. */
|
|
406
|
|
407 void
|
|
408 fma_forest::merge_forest (fma_forest *other_forest)
|
|
409 {
|
|
410 std::list<fma_root_node *> *other_roots;
|
|
411 std::list<fma_root_node *>::iterator other_root_iter;
|
|
412
|
|
413 if (this == other_forest)
|
|
414 return;
|
|
415
|
|
416 other_roots = other_forest->m_roots;
|
|
417
|
|
418 /* Update root nodes' pointer to forest. */
|
|
419 for (other_root_iter = other_roots->begin ();
|
131
|
420 other_root_iter != other_roots->end (); ++other_root_iter)
|
111
|
421 (*other_root_iter)->set_forest (this);
|
|
422
|
|
423 /* Remove other_forest from the list of forests and move its tree roots in
|
|
424 the list of tree roots of ref_forest. */
|
|
425 this->m_globals->remove_forest (other_forest);
|
|
426 this->m_roots->splice (this->m_roots->begin (), *other_roots);
|
|
427 this->m_nb_nodes += other_forest->m_nb_nodes;
|
|
428
|
|
429 delete other_forest;
|
|
430 }
|
|
431
|
|
432 /* Dump information about the forest FOREST. */
|
|
433
|
|
434 void
|
|
435 fma_forest::dump_info ()
|
|
436 {
|
|
437 gcc_assert (dump_file);
|
|
438
|
|
439 fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
|
|
440 this->m_nb_nodes);
|
|
441 }
|
|
442
|
|
443 /* Wrapper around fma_forest::dump_info for use as parameter of function
|
|
444 pointer type in func_fma_steering::dfs. */
|
|
445
|
|
446 static void
|
|
447 dump_forest_info (fma_forest *forest)
|
|
448 {
|
|
449 forest->dump_info ();
|
|
450 }
|
|
451
|
|
452 /* Dispatch forest to the least utilized pipeline. */
|
|
453
|
|
454 void
|
|
455 fma_forest::dispatch ()
|
|
456 {
|
|
457 this->m_target_parity = this->m_roots->front ()->get_parity ();
|
|
458 int fpu_balance = this->m_globals->get_fpu_balance ();
|
|
459 if (fpu_balance != 0)
|
|
460 this->m_target_parity = (fpu_balance < 0);
|
|
461
|
|
462 if (dump_file)
|
|
463 fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
|
|
464 this->m_target_parity ? "odd" : "even");
|
|
465 }
|
|
466
|
|
467 /* Wrapper around fma_forest::dispatch for use as parameter of function pointer
|
|
468 type in func_fma_steering::dfs. */
|
|
469
|
|
470 static void
|
|
471 dispatch_forest (fma_forest *forest)
|
|
472 {
|
|
473 forest->dispatch ();
|
|
474 }
|
|
475
|
|
476 fma_node::fma_node (fma_node *parent, du_chain *chain)
|
|
477 {
|
|
478 memset (this, 0, sizeof (*this));
|
|
479 this->m_parent = parent;
|
|
480 this->m_children = new std::list<fma_node *>;
|
|
481 this->m_insn = chain->insn;
|
|
482 /* root_p () cannot be used to check for root before root is set. */
|
|
483 if (this->m_parent == this)
|
|
484 this->m_root = static_cast<fma_root_node *> (parent);
|
|
485 else
|
|
486 {
|
|
487 this->m_root = parent->m_root;
|
|
488 this->get_forest ()->fma_node_created (this);
|
|
489 }
|
|
490 }
|
|
491
|
|
492 fma_node::~fma_node ()
|
|
493 {
|
|
494 delete this->m_children;
|
|
495 }
|
|
496
|
|
497 std::list<fma_node *> *
|
|
498 fma_node::get_children ()
|
|
499 {
|
|
500 return this->m_children;
|
|
501 }
|
|
502
|
|
503 rtx_insn *
|
|
504 fma_node::get_insn ()
|
|
505 {
|
|
506 return this->m_insn;
|
|
507 }
|
|
508
|
|
509 void
|
|
510 fma_node::set_head (du_head *head)
|
|
511 {
|
|
512 gcc_assert (!this->m_head);
|
|
513 this->m_head = head;
|
|
514 }
|
|
515
|
|
516 /* Add a child to this node in the list of children. */
|
|
517
|
|
518 void
|
|
519 fma_node::add_child (fma_node *child)
|
|
520 {
|
|
521 this->m_children->push_back (child);
|
|
522 }
|
|
523
|
|
524 /* Return the parity of the destination register of the instruction represented
|
|
525 by this node. */
|
|
526
|
|
527 int
|
|
528 fma_node::get_parity ()
|
|
529 {
|
|
530 return this->m_head->regno % 2;
|
|
531 }
|
|
532
|
|
533 /* Get the actual forest associated with a non root node as the one the node
|
|
534 points to might have been merged into another one. In that case the pointer
|
|
535 in the root nodes are updated so we return the forest pointer of a root node
|
|
536 pointed to by the initial forest. Despite being a oneliner, this method is
|
|
537 defined here as it references a method from fma_root_node. */
|
|
538
|
|
539 fma_forest *
|
|
540 fma_node::get_forest ()
|
|
541 {
|
|
542 return this->m_root->get_forest ();
|
|
543 }
|
|
544
|
|
545 /* Return whether a node is a root node. */
|
|
546
|
|
547 bool
|
|
548 fma_node::root_p ()
|
|
549 {
|
|
550 return this->m_root == this;
|
|
551 }
|
|
552
|
|
553 /* Dump information about the children of node FMA_NODE in forest FOREST. */
|
|
554
|
|
555 void
|
|
556 fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
|
|
557 {
|
|
558 struct du_chain *chain;
|
|
559 std::list<fma_node *>::iterator fma_child;
|
|
560
|
|
561 gcc_assert (dump_file);
|
|
562
|
|
563 if (this->get_children ()->empty ())
|
|
564 return;
|
|
565
|
|
566 fprintf (dump_file, "Instruction(s)");
|
|
567 for (chain = this->m_head->first; chain; chain = chain->next_use)
|
|
568 {
|
|
569 if (!is_fmul_fmac_insn (chain->insn, true))
|
|
570 continue;
|
|
571
|
|
572 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
|
|
573 continue;
|
|
574
|
|
575 fprintf (dump_file, " %d", INSN_UID (chain->insn));
|
|
576 }
|
|
577
|
|
578 fprintf (dump_file, " is(are) accumulator dependency of instructions");
|
|
579 for (fma_child = this->get_children ()->begin ();
|
|
580 fma_child != this->get_children ()->end (); fma_child++)
|
|
581 fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
|
|
582 fprintf (dump_file, "\n");
|
|
583 }
|
|
584
|
|
585 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
|
|
586 type in func_fma_steering::dfs. */
|
|
587
|
|
588 static void
|
|
589 dump_tree_node_info (fma_forest *forest, fma_node *node)
|
|
590 {
|
|
591 node->dump_info (forest);
|
|
592 }
|
|
593
|
|
594 /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
|
|
595 represented by FMA_NODE to a register that respect the target parity for
|
|
596 FOREST or with same parity of the instruction represented by its parent node
|
|
597 if it has one. */
|
|
598
|
|
599 void
|
|
600 fma_node::rename (fma_forest *forest)
|
|
601 {
|
|
602 int cur_parity, target_parity;
|
|
603
|
|
604 /* This is alternate root of a chain and thus has no children. It will be
|
|
605 renamed when processing the canonical root for that chain. */
|
|
606 if (!this->m_head)
|
|
607 return;
|
|
608
|
|
609 target_parity = forest->get_target_parity ();
|
|
610 if (this->m_parent)
|
|
611 target_parity = this->m_parent->get_parity ();
|
|
612 cur_parity = this->get_parity ();
|
|
613
|
|
614 /* Rename if parity differs. */
|
|
615 if (cur_parity != target_parity)
|
|
616 {
|
|
617 rtx_insn *insn = this->m_insn;
|
|
618 HARD_REG_SET unavailable;
|
|
619 machine_mode mode;
|
|
620 int reg;
|
|
621
|
|
622 if (dump_file)
|
|
623 {
|
|
624 unsigned cur_dest_reg = this->m_head->regno;
|
|
625
|
|
626 fprintf (dump_file, "FMA or FMUL at insn %d but destination "
|
|
627 "register (%s) has different parity from expected to "
|
|
628 "maximize FPU pipeline utilization\n", INSN_UID (insn),
|
|
629 reg_names[cur_dest_reg]);
|
|
630 }
|
|
631
|
|
632 /* Don't clobber traceback for noreturn functions. */
|
|
633 CLEAR_HARD_REG_SET (unavailable);
|
|
634 if (frame_pointer_needed)
|
|
635 {
|
|
636 add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
|
|
637 add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
|
|
638 }
|
|
639
|
|
640 /* Exclude registers with wrong parity. */
|
|
641 mode = GET_MODE (SET_DEST (PATTERN (insn)));
|
|
642 for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
|
|
643 add_to_hard_reg_set (&unavailable, mode, reg);
|
|
644
|
|
645 if (!rename_single_chain (this->m_head, &unavailable))
|
|
646 {
|
|
647 if (dump_file)
|
|
648 fprintf (dump_file, "Destination register of insn %d could not be "
|
|
649 "renamed. Dependent FMA insns will use this parity from "
|
|
650 "there on.\n", INSN_UID (insn));
|
|
651 }
|
|
652 else
|
|
653 cur_parity = target_parity;
|
|
654 }
|
|
655
|
|
656 forest->get_globals ()->update_balance (cur_parity);
|
|
657 }
|
|
658
|
|
659 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
|
|
660 type in func_fma_steering::dfs. */
|
|
661
|
|
662 static void
|
|
663 rename_fma_node (fma_forest *forest, fma_node *node)
|
|
664 {
|
|
665 node->rename (forest);
|
|
666 }
|
|
667
|
|
668 fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
|
|
669 int id) : fma_node (this, chain)
|
|
670 {
|
|
671 this->m_forest = new fma_forest (globals, this, id);
|
|
672 this->m_forest->fma_node_created (this);
|
|
673 }
|
|
674
|
|
675 fma_forest *
|
|
676 fma_root_node::get_forest ()
|
|
677 {
|
|
678 return this->m_forest;
|
|
679 }
|
|
680
|
|
681 void
|
|
682 fma_root_node::set_forest (fma_forest *ref_forest)
|
|
683 {
|
|
684 this->m_forest = ref_forest;
|
|
685 }
|
|
686
|
|
687 /* Dump information about the roots of forest FOREST. */
|
|
688
|
|
689 void
|
|
690 fma_root_node::dump_info (fma_forest *forest)
|
|
691 {
|
|
692 gcc_assert (dump_file);
|
|
693
|
|
694 if (this == forest->get_roots ()->front ())
|
|
695 fprintf (dump_file, "Instruction(s) at root of forest #%d:",
|
|
696 forest->get_id ());
|
|
697 fprintf (dump_file, " %d", INSN_UID (this->m_insn));
|
|
698 if (this == forest->get_roots ()->back ())
|
|
699 fprintf (dump_file, "\n");
|
|
700 }
|
|
701
|
|
702 /* Wrapper around fma_root_node::dump_info for use as parameter of function
|
|
703 pointer type in func_fma_steering::dfs. */
|
|
704
|
|
705 static void
|
|
706 dump_tree_root_info (fma_forest *forest, fma_root_node *node)
|
|
707 {
|
|
708 node->dump_info (forest);
|
|
709 }
|
|
710
|
|
711 func_fma_steering::func_fma_steering () : m_fpu_balance (0)
|
|
712 {
|
|
713 this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
|
|
714 this->m_fma_forests.clear ();
|
|
715 this->m_next_forest_id = 0;
|
|
716 }
|
|
717
|
|
718 func_fma_steering::~func_fma_steering ()
|
|
719 {
|
|
720 delete this->m_insn_fma_head_map;
|
|
721 }
|
|
722
|
|
723 int
|
|
724 func_fma_steering::get_fpu_balance ()
|
|
725 {
|
|
726 return this->m_fpu_balance;
|
|
727 }
|
|
728
|
|
729 void
|
|
730 func_fma_steering::remove_forest (fma_forest *forest)
|
|
731 {
|
|
732 this->m_fma_forests.remove (forest);
|
|
733 }
|
|
734
|
|
735 /* Memorize the mapping of this instruction to its fma_node object and return
|
|
736 whether such a mapping existed. */
|
|
737
|
|
738 bool
|
|
739 func_fma_steering::put_node (fma_node *node)
|
|
740 {
|
|
741 return this->m_insn_fma_head_map->put (node->get_insn (), node);
|
|
742 }
|
|
743
|
|
744 /* Update the current balance considering a node with the given PARITY. */
|
|
745
|
|
746 void
|
|
747 func_fma_steering::update_balance (int parity)
|
|
748 {
|
|
749 this->m_fpu_balance = parity ? this->m_fpu_balance + 1
|
|
750 : this->m_fpu_balance - 1;
|
|
751 }
|
|
752
|
|
753 /* Return whether an fma_node object exists for instruction INSN and, if not,
|
|
754 allocate one in *RET. */
|
|
755
|
|
756 fma_node *
|
|
757 func_fma_steering::get_fma_node (rtx_insn *insn)
|
|
758 {
|
|
759 fma_node **fma_slot;
|
|
760
|
|
761 fma_slot = this->m_insn_fma_head_map->get (insn);
|
|
762 if (fma_slot)
|
|
763 return *fma_slot;
|
|
764 return NULL;
|
|
765 }
|
|
766
|
|
767 /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
|
|
768 instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
|
|
769 part of FOREST. For the children, the associated head is left untouched
|
|
770 (and thus null) as this function will be called again when considering the
|
|
771 chain where they are def. For the parent, the chain is given in HEAD. */
|
|
772
|
|
773 void
|
|
774 func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
|
|
775 du_chain *chain, du_head_p head)
|
|
776 {
|
|
777 fma_forest *forest;
|
|
778 fma_node *node = this->get_fma_node (chain->insn);
|
|
779
|
|
780 /* This is a root node. */
|
|
781 if (!node)
|
|
782 {
|
|
783 fma_root_node *root_node;
|
|
784
|
|
785 root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
|
|
786 forest = root_node->get_forest ();
|
|
787 node = root_node;
|
|
788
|
|
789 /* Until proved otherwise, assume this root is not part of an existing
|
|
790 forest and thus add its forest to the list of forests. */
|
|
791 this->m_fma_forests.push_back (forest);
|
|
792 }
|
|
793 else
|
|
794 forest = node->get_forest ();
|
|
795
|
|
796 node->set_head (head);
|
|
797
|
|
798 /* fma_node is part of a chain with several defs, one of them having already
|
|
799 been processed. The root of that already processed def is the canonical
|
|
800 one and the root of fma_node is added to its forest. No need to process
|
|
801 the children nodes as they were already processed when the other def was
|
|
802 processed. */
|
|
803 if (ref_forest)
|
|
804 {
|
|
805 ref_forest->merge_forest (forest);
|
|
806 return;
|
|
807 }
|
|
808
|
|
809 for (chain = head->first; chain; chain = chain->next_use)
|
|
810 {
|
|
811 fma_node *child_fma;
|
|
812 rtx fma_rtx, *accum_rtx_p;
|
|
813
|
|
814 if (!is_fmul_fmac_insn (chain->insn, false))
|
|
815 continue;
|
|
816
|
|
817 /* Get FMA rtx. */
|
|
818 fma_rtx = SET_SRC (PATTERN (chain->insn));
|
|
819 /* FMA is negated. */
|
|
820 if (GET_CODE (fma_rtx) == NEG)
|
|
821 fma_rtx = XEXP (fma_rtx, 0);
|
|
822 /* Get accumulator rtx. */
|
|
823 accum_rtx_p = &XEXP (fma_rtx, 2);
|
|
824 /* Accumulator is negated. */
|
|
825 if (!REG_P (*accum_rtx_p))
|
|
826 accum_rtx_p = &XEXP (*accum_rtx_p, 0);
|
|
827
|
|
828 /* This du_chain structure is not for the accumulator register. */
|
|
829 if (accum_rtx_p != chain->loc)
|
|
830 continue;
|
|
831
|
|
832 /* If object already created, this is a loop carried dependency. We
|
|
833 don't include this object in the children as we want trees for
|
|
834 rename_fma_trees to not be an infinite loop. */
|
|
835 if (this->get_fma_node (chain->insn))
|
|
836 continue;
|
|
837
|
|
838 child_fma = new fma_node (node, chain);
|
|
839
|
|
840 /* Memorize the mapping of this instruction to its fma_node object
|
|
841 as it will be processed for the chain starting at its destination
|
|
842 register later. */
|
|
843
|
|
844 /* Link to siblings. */
|
|
845 node->add_child (child_fma);
|
|
846 }
|
|
847 }
|
|
848
|
|
849 /* Perform a depth-first search of the forests of fma_node in
|
|
850 THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
|
|
851 THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
|
|
852 PROCESS_NODE () on each node. If FREE is true, free all std::list in the
|
|
853 same dfs. */
|
|
854
|
|
855 void
|
|
856 func_fma_steering::dfs (void (*process_forest) (fma_forest *),
|
|
857 void (*process_root) (fma_forest *, fma_root_node *),
|
|
858 void (*process_node) (fma_forest *, fma_node *),
|
|
859 bool free)
|
|
860 {
|
131
|
861 auto_vec<fma_node *> to_process;
|
|
862 auto_vec<fma_node *> to_free;
|
111
|
863 std::list<fma_forest *>::iterator forest_iter;
|
|
864
|
|
865 /* For each forest. */
|
|
866 for (forest_iter = this->m_fma_forests.begin ();
|
131
|
867 forest_iter != this->m_fma_forests.end (); ++forest_iter)
|
111
|
868 {
|
|
869 std::list<fma_root_node *>::iterator root_iter;
|
|
870
|
|
871 if (process_forest)
|
|
872 process_forest (*forest_iter);
|
|
873
|
|
874 /* For each tree root in this forest. */
|
|
875 for (root_iter = (*forest_iter)->get_roots ()->begin ();
|
131
|
876 root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
|
111
|
877 {
|
|
878 if (process_root)
|
|
879 process_root (*forest_iter, *root_iter);
|
|
880 to_process.safe_push (*root_iter);
|
|
881 }
|
|
882
|
|
883 /* For each tree node in this forest. */
|
|
884 while (!to_process.is_empty ())
|
|
885 {
|
|
886 fma_node *node;
|
|
887 std::list<fma_node *>::iterator child_iter;
|
|
888
|
|
889 node = to_process.pop ();
|
|
890
|
|
891 if (process_node)
|
|
892 process_node (*forest_iter, node);
|
|
893
|
131
|
894 for (child_iter = node->get_children ()->begin ();
|
|
895 child_iter != node->get_children ()->end (); ++child_iter)
|
|
896 to_process.safe_push (*child_iter);
|
111
|
897
|
131
|
898 /* Defer freeing so that the process_node callback can access the
|
|
899 parent and children of the node being processed. */
|
111
|
900 if (free)
|
131
|
901 to_free.safe_push (node);
|
|
902 }
|
|
903
|
|
904 if (free)
|
|
905 {
|
|
906 delete *forest_iter;
|
|
907
|
|
908 while (!to_free.is_empty ())
|
111
|
909 {
|
131
|
910 fma_node *node = to_free.pop ();
|
111
|
911 if (node->root_p ())
|
|
912 delete static_cast<fma_root_node *> (node);
|
|
913 else
|
|
914 delete node;
|
|
915 }
|
|
916 }
|
|
917 }
|
|
918 }
|
|
919
|
|
920 /* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */
|
|
921
|
|
922 void
|
|
923 func_fma_steering::analyze ()
|
|
924 {
|
|
925 int i, n_blocks, *bb_dfs_preorder;
|
|
926 basic_block bb;
|
|
927 rtx_insn *insn;
|
|
928
|
|
929 bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
|
|
930 n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
|
|
931
|
|
932 /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
|
|
933 instructions. */
|
|
934 for (i = 0; i < n_blocks; i++)
|
|
935 {
|
|
936 bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
|
|
937 FOR_BB_INSNS (bb, insn)
|
|
938 {
|
|
939 operand_rr_info *dest_op_info;
|
|
940 struct du_chain *chain = NULL;
|
|
941 unsigned dest_regno;
|
|
942 fma_forest *forest = NULL;
|
|
943 du_head_p head = NULL;
|
|
944 int i;
|
|
945
|
|
946 if (!is_fmul_fmac_insn (insn, true))
|
|
947 continue;
|
|
948
|
|
949 /* Search the chain where this instruction is (one of) the root. */
|
|
950 dest_op_info = insn_rr[INSN_UID (insn)].op_info;
|
|
951 dest_regno = REGNO (SET_DEST (PATTERN (insn)));
|
|
952 for (i = 0; i < dest_op_info->n_chains; i++)
|
|
953 {
|
|
954 /* The register tracked by this chain does not match the
|
|
955 destination register of insn. */
|
|
956 if (dest_op_info->heads[i]->regno != dest_regno)
|
|
957 continue;
|
|
958
|
|
959 head = dest_op_info->heads[i];
|
|
960 /* The chain was merged in another, find the new head. */
|
|
961 if (!head->first)
|
|
962 head = regrename_chain_from_id (head->id);
|
|
963
|
|
964 /* Search the chain element for this instruction and, if another
|
|
965 FMUL or FMADD/FMSUB instruction was already processed, note
|
|
966 the forest of its tree. */
|
|
967 forest = NULL;
|
|
968 for (chain = head->first; chain; chain = chain->next_use)
|
|
969 {
|
|
970 fma_node **fma_slot;
|
|
971
|
|
972 if (!is_fmul_fmac_insn (chain->insn, true))
|
|
973 continue;
|
|
974
|
|
975 /* This is a use, continue. */
|
|
976 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
|
|
977 continue;
|
|
978
|
|
979 if (chain->insn == insn)
|
|
980 break;
|
|
981
|
|
982 fma_slot = this->m_insn_fma_head_map->get (chain->insn);
|
|
983 if (fma_slot && (*fma_slot)->get_children ())
|
|
984 forest = (*fma_slot)->get_forest ();
|
|
985 }
|
|
986 if (chain)
|
|
987 break;
|
|
988 }
|
|
989
|
|
990 /* Due to implementation of regrename, dest register can slip away
|
|
991 from regrename's analysis. As a result, there is no chain for
|
|
992 the destination register of insn. We simply skip the insn even
|
|
993 it is a fmul/fmac instruction. This can happen when the dest
|
|
994 register is also a source register of insn and one of the below
|
|
995 conditions is satisfied:
|
|
996 1) the source reg is setup in larger mode than this insn;
|
|
997 2) the source reg is uninitialized;
|
|
998 3) the source reg is passed in as parameter. */
|
|
999 if (i < dest_op_info->n_chains)
|
|
1000 this->analyze_fma_fmul_insn (forest, chain, head);
|
|
1001 }
|
|
1002 }
|
|
1003 free (bb_dfs_preorder);
|
|
1004
|
|
1005 if (dump_file)
|
|
1006 this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
|
|
1007 false);
|
|
1008 }
|
|
1009
|
|
1010 /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
|
|
1011 the objective of keeping FPU pipeline balanced in term of instructions and
|
|
1012 having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
|
|
1013 scheduled on the same pipeline. */
|
|
1014
|
|
1015 void
|
|
1016 func_fma_steering::rename_fma_trees ()
|
|
1017 {
|
|
1018 this->dfs (dispatch_forest, NULL, rename_fma_node, true);
|
|
1019
|
|
1020 if (dump_file && !this->m_fma_forests.empty ())
|
|
1021 {
|
|
1022 fprintf (dump_file, "Function %s has ", current_function_name ());
|
|
1023 if (this->m_fpu_balance == 0)
|
|
1024 fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
|
|
1025 "two FPU pipelines\n");
|
|
1026 else if (this->m_fpu_balance > 0)
|
|
1027 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
|
|
1028 "FPU pipeline\n", this->m_fpu_balance);
|
|
1029 else /* this->m_fpu_balance < 0 */
|
|
1030 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
|
|
1031 "FPU pipeline\n", - this->m_fpu_balance);
|
|
1032 }
|
|
1033 }
|
|
1034
|
|
1035 /* Execute FMA steering pass. */
|
|
1036
|
|
1037 void
|
|
1038 func_fma_steering::execute_fma_steering ()
|
|
1039 {
|
|
1040 df_set_flags (DF_LR_RUN_DCE);
|
|
1041 df_note_add_problem ();
|
|
1042 df_analyze ();
|
|
1043 df_set_flags (DF_DEFER_INSN_RESCAN);
|
|
1044
|
|
1045 regrename_init (true);
|
|
1046 regrename_analyze (NULL);
|
|
1047 this->analyze ();
|
|
1048 this->rename_fma_trees ();
|
|
1049 regrename_finish ();
|
|
1050 }
|
|
1051
|
|
1052 const pass_data pass_data_fma_steering =
|
|
1053 {
|
|
1054 RTL_PASS, /* type */
|
|
1055 "fma_steering", /* name */
|
|
1056 OPTGROUP_NONE, /* optinfo_flags */
|
|
1057 TV_NONE, /* tv_id */
|
|
1058 0, /* properties_required */
|
|
1059 0, /* properties_provided */
|
|
1060 0, /* properties_destroyed */
|
|
1061 0, /* todo_flags_start */
|
|
1062 TODO_df_finish, /* todo_flags_finish */
|
|
1063 };
|
|
1064
|
|
1065 class pass_fma_steering : public rtl_opt_pass
|
|
1066 {
|
|
1067 public:
|
|
1068 pass_fma_steering (gcc::context *ctxt)
|
|
1069 : rtl_opt_pass (pass_data_fma_steering, ctxt)
|
|
1070 {}
|
|
1071
|
|
1072 /* opt_pass methods: */
|
|
1073 virtual bool gate (function *)
|
|
1074 {
|
|
1075 return (aarch64_tune_params.extra_tuning_flags
|
|
1076 & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
|
|
1077 && optimize >= 2;
|
|
1078 }
|
|
1079
|
|
1080 virtual unsigned int execute (function *)
|
|
1081 {
|
|
1082 func_fma_steering *fma_steering = new func_fma_steering;
|
|
1083 fma_steering->execute_fma_steering ();
|
|
1084 delete fma_steering;
|
|
1085 return 0;
|
|
1086 }
|
|
1087
|
|
1088 }; // class pass_fma_steering
|
|
1089
|
|
1090 /* Create a new fma steering pass instance. */
|
|
1091
|
|
1092 rtl_opt_pass *
|
|
1093 make_pass_fma_steering (gcc::context *ctxt)
|
|
1094 {
|
|
1095 return new pass_fma_steering (ctxt);
|
|
1096 }
|