111
|
1 /* FMA steering optimization pass for Cortex-A57.
|
131
|
2 Copyright (C) 2015-2018 Free Software Foundation, Inc.
|
111
|
3 Contributed by ARM Ltd.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but
|
|
13 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15 General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
131
|
21 #define IN_TARGET_CODE 1
|
|
22
|
111
|
23 #include "config.h"
|
|
24 #define INCLUDE_LIST
|
|
25 #include "system.h"
|
|
26 #include "coretypes.h"
|
|
27 #include "backend.h"
|
|
28 #include "target.h"
|
|
29 #include "rtl.h"
|
|
30 #include "df.h"
|
|
31 #include "insn-config.h"
|
|
32 #include "regs.h"
|
|
33 #include "memmodel.h"
|
|
34 #include "emit-rtl.h"
|
|
35 #include "recog.h"
|
|
36 #include "cfganal.h"
|
|
37 #include "insn-attr.h"
|
|
38 #include "context.h"
|
|
39 #include "tree-pass.h"
|
|
40 #include "regrename.h"
|
|
41 #include "aarch64-protos.h"
|
|
42
|
|
43 /* For better performance, the destination of FMADD/FMSUB instructions should
|
|
44 have the same parity as their accumulator register if the accumulator
|
|
45 contains the result of a previous FMUL or FMADD/FMSUB instruction if
|
|
46 targetting Cortex-A57 processors. Performance is also increased by
|
|
47 otherwise keeping a good balance in the parity of the destination register
|
|
48 of FMUL or FMADD/FMSUB.
|
|
49
|
|
50 This pass ensure that registers are renamed so that these conditions hold.
|
|
51 We reuse the existing register renaming facility from regrename.c to build
|
|
52 dependency chains and expose candidate registers for renaming.
|
|
53
|
|
54
|
|
55 The algorithm has three steps:
|
|
56
|
|
57 First, the functions of the register renaming pass are called. These
|
|
58 analyze the instructions and produce a list of def/use chains of
|
|
59 instructions.
|
|
60
|
|
61 Next, this information is used to build trees of multiply and
|
|
62 multiply-accumulate instructions. The roots of these trees are any
|
|
63 multiply, or any multiply-accumulate whose accumulator is not dependent on
|
|
64 a multiply or multiply-accumulate instruction. A child is added to the
|
|
65 tree where a dependency chain exists between the result of the parent
|
|
66 instruction and the accumulator operand of the child, as in the diagram
|
|
67 below:
|
|
68
|
|
69 fmul s2, s0, s1
|
|
70 / \
|
|
71 fmadd s0, s1, s1, s2 fmadd s4, s1, s1 s2
|
|
72 |
|
|
73 fmadd s3, s1, s1, s0
|
|
74
|
|
75 Trees made of a single instruction are permitted.
|
|
76
|
|
77 Finally, renaming is performed. The parity of the destination register at
|
|
78 the root of a tree is checked against the current balance of multiply and
|
|
79 multiply-accumulate on each pipeline. If necessary, the root of a tree is
|
|
80 renamed, in which case the rest of the tree is then renamed to keep the same
|
|
81 parity in the destination registers of all instructions in the tree. */
|
|
82
|
|
83
|
|
84
|
|
85 /* Forward declarations. */
|
|
86 class fma_node;
|
|
87 class fma_root_node;
|
|
88 class func_fma_steering;
|
|
89
|
|
90 /* Dependencies between FMUL or FMADD/FMSUB instructions and subsequent
|
|
91 FMADD/FMSUB instructions form a graph. This is because alternatives can
|
|
92 make a register be set by several FMUL or FMADD/FMSUB instructions in
|
|
93 different basic blocks and because of loops. For ease of browsing, the
|
|
94 connected components of this graph are broken up into forests of trees.
|
|
95 Forests are represented by fma_forest objects, contained in the fma_forests
|
|
96 list. Using a separate object for the forests allows for a better use of
|
|
97 memory as there is some information that is global to each forest, such as
|
|
98 the number of FMSUB and FMADD/FMSUB instructions currently scheduled on each
|
|
99 floating-point execution pipelines. */
|
|
100
|
|
101 class fma_forest
|
|
102 {
|
|
103 public:
|
|
104 fma_forest (func_fma_steering *, fma_root_node *, int);
|
|
105 ~fma_forest ();
|
|
106
|
|
107 int get_id ();
|
|
108 std::list<fma_root_node *> *get_roots ();
|
|
109 func_fma_steering *get_globals ();
|
|
110 int get_target_parity ();
|
|
111 void fma_node_created (fma_node *);
|
|
112 void merge_forest (fma_forest *);
|
|
113 void dump_info ();
|
|
114 void dispatch ();
|
|
115
|
|
116 private:
|
|
117 /* The list of roots that form this forest. */
|
|
118 std::list<fma_root_node *> *m_roots;
|
|
119
|
|
120 /* Target parity the destination register of all FMUL and FMADD/FMSUB
|
|
121 instructions in this forest should have. */
|
|
122 int m_target_parity;
|
|
123
|
|
124 /* Link to the instance of func_fma_steering holding data related to the
|
|
125 FMA steering of the current function (cfun). */
|
|
126 func_fma_steering *m_globals;
|
|
127
|
|
128 /* Identifier for the forest (used for dumps). */
|
|
129 int m_id;
|
|
130
|
|
131 /* Total number of nodes in the forest (for statistics). */
|
|
132 int m_nb_nodes;
|
|
133 };
|
|
134
|
|
135 class fma_node
|
|
136 {
|
|
137 public:
|
|
138 fma_node (fma_node *parent, du_chain *chain);
|
|
139 ~fma_node ();
|
|
140
|
|
141 bool root_p ();
|
|
142 fma_forest *get_forest ();
|
|
143 std::list<fma_node *> *get_children ();
|
|
144 rtx_insn *get_insn ();
|
|
145 void add_child (fma_node *);
|
|
146 int get_parity ();
|
|
147 void set_head (du_head *);
|
|
148 void rename (fma_forest *);
|
|
149 void dump_info (fma_forest *);
|
|
150
|
|
151 protected:
|
|
152 /* Root node that lead to this node. */
|
|
153 fma_root_node *m_root;
|
|
154
|
|
155 /* The parent node of this node. If the node belong to a chain with several
|
|
156 parent nodes, the first one encountered in a depth-first search is chosen
|
|
157 as canonical parent. */
|
|
158 fma_node *m_parent;
|
|
159
|
|
160 /* The list of child nodes. If a chain contains several parent nodes, one is
|
|
161 chosen as canonical parent and the others will have no children. */
|
|
162 std::list<fma_node *> *m_children;
|
|
163
|
|
164 /* The associated DU_HEAD chain that the insn represented by this object
|
|
165 is (one of) the root of. When a chain contains several roots, the non
|
|
166 canonical ones have this field set to NULL. */
|
|
167 struct du_head *m_head;
|
|
168
|
|
169 /* The FMUL or FMADD/FMSUB instruction this object corresponds to. */
|
|
170 rtx_insn *m_insn;
|
|
171 };
|
|
172
|
|
173 class fma_root_node : public fma_node
|
|
174 {
|
|
175 public:
|
|
176 fma_root_node (func_fma_steering *, du_chain *, int);
|
|
177
|
|
178 fma_forest *get_forest ();
|
|
179 void set_forest (fma_forest *);
|
|
180 void dump_info (fma_forest *);
|
|
181
|
|
182 private:
|
|
183 /* The forest this node belonged to when it was created. */
|
|
184 fma_forest *m_forest;
|
|
185 };
|
|
186
|
|
187 /* Class holding all data and methods relative to the FMA steering of a given
|
|
188 function. The FMA steering pass could then run in parallel for different
|
|
189 functions. */
|
|
190
|
|
191 class func_fma_steering
|
|
192 {
|
|
193 public:
|
|
194 func_fma_steering ();
|
|
195 ~func_fma_steering ();
|
|
196
|
|
197 int get_fpu_balance ();
|
|
198 void remove_forest (fma_forest *);
|
|
199 bool put_node (fma_node *);
|
|
200 void update_balance (int);
|
|
201 fma_node *get_fma_node (rtx_insn *);
|
|
202 void analyze_fma_fmul_insn (fma_forest *, du_chain *, du_head_p);
|
|
203 void execute_fma_steering ();
|
|
204
|
|
205 private:
|
|
206 void dfs (void (*) (fma_forest *), void (*) (fma_forest *, fma_root_node *),
|
|
207 void (*) (fma_forest *, fma_node *), bool);
|
|
208 void analyze ();
|
|
209 void rename_fma_trees ();
|
|
210
|
|
211 /* Mapping between FMUL or FMADD/FMSUB instructions and the associated
|
|
212 fma_node object. Used when analyzing an instruction that is a root of
|
|
213 a chain to find if such an object was created because this instruction
|
|
214 is also a use in another chain. */
|
|
215 hash_map<rtx_insn *, fma_node *> *m_insn_fma_head_map;
|
|
216
|
|
217 /* A list of all the forests in a given function. */
|
|
218 std::list<fma_forest *> m_fma_forests;
|
|
219
|
|
220 /* Balance of FMUL and FMADD/FMSUB instructions between the two FPU
|
|
221 pipelines:
|
|
222 < 0: more instruction dispatched to the first pipeline
|
|
223 == 0: perfect balance
|
|
224 > 0: more instruction dispatched to the second pipeline. */
|
|
225 int m_fpu_balance;
|
|
226
|
|
227 /* Identifier for the next forest created. */
|
|
228 int m_next_forest_id;
|
|
229 };
|
|
230
|
|
231 /* Rename the register HEAD->regno in all the insns in the chain HEAD to any
|
|
232 register not in the set UNAVAILABLE. Adapted from rename_chains in
|
|
233 regrename.c. */
|
|
234
|
|
235 static bool
|
|
236 rename_single_chain (du_head_p head, HARD_REG_SET *unavailable)
|
|
237 {
|
|
238 int best_new_reg;
|
|
239 int n_uses = 0;
|
|
240 struct du_chain *tmp;
|
|
241 int reg = head->regno;
|
|
242 enum reg_class super_class = NO_REGS;
|
|
243
|
|
244 if (head->cannot_rename)
|
|
245 return false;
|
|
246
|
|
247 if (fixed_regs[reg] || global_regs[reg]
|
|
248 || (frame_pointer_needed && reg == HARD_FRAME_POINTER_REGNUM))
|
|
249 return false;
|
|
250
|
|
251 /* Iterate over elements in the chain in order to:
|
|
252 1. Count number of uses, and narrow the set of registers we can
|
|
253 use for renaming.
|
|
254 2. Compute the superunion of register classes in this chain. */
|
|
255 for (tmp = head->first; tmp; tmp = tmp->next_use)
|
|
256 {
|
|
257 if (DEBUG_INSN_P (tmp->insn))
|
|
258 continue;
|
|
259 n_uses++;
|
|
260 IOR_COMPL_HARD_REG_SET (*unavailable, reg_class_contents[tmp->cl]);
|
|
261 super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
|
|
262 }
|
|
263
|
|
264 if (n_uses < 1)
|
|
265 return false;
|
|
266
|
|
267 best_new_reg = find_rename_reg (head, super_class, unavailable, reg,
|
|
268 false);
|
|
269
|
|
270 if (dump_file)
|
|
271 {
|
|
272 fprintf (dump_file, "Register %s in insn %d", reg_names[reg],
|
|
273 INSN_UID (head->first->insn));
|
|
274 if (head->need_caller_save_reg)
|
|
275 fprintf (dump_file, " crosses a call");
|
|
276 }
|
|
277
|
|
278 if (best_new_reg == reg)
|
|
279 {
|
|
280 if (dump_file)
|
|
281 fprintf (dump_file, "; no available better choice\n");
|
|
282 return false;
|
|
283 }
|
|
284
|
|
285 if (regrename_do_replace (head, best_new_reg))
|
|
286 {
|
|
287 if (dump_file)
|
|
288 fprintf (dump_file, ", renamed as %s\n", reg_names[best_new_reg]);
|
|
289 df_set_regs_ever_live (best_new_reg, true);
|
|
290 }
|
|
291 else
|
|
292 {
|
|
293 if (dump_file)
|
|
294 fprintf (dump_file, ", renaming as %s failed\n",
|
|
295 reg_names[best_new_reg]);
|
|
296 return false;
|
|
297 }
|
|
298 return true;
|
|
299 }
|
|
300
|
|
301 /* Return whether T is the attribute of a FMADD/FMSUB-like instruction. */
|
|
302
|
|
303 static bool
|
|
304 is_fmac_op (enum attr_type t)
|
|
305 {
|
|
306 return (t == TYPE_FMACS) || (t == TYPE_FMACD) || (t == TYPE_NEON_FP_MLA_S);
|
|
307 }
|
|
308
|
|
309 /* Return whether T is the attribute of a FMUL instruction. */
|
|
310
|
|
311 static bool
|
|
312 is_fmul_op (enum attr_type t)
|
|
313 {
|
|
314 return (t == TYPE_FMULS) || (t == TYPE_FMULD) || (t == TYPE_NEON_FP_MUL_S);
|
|
315 }
|
|
316
|
|
317 /* Return whether INSN is an FMUL (if FMUL_OK is true) or FMADD/FMSUB
|
|
318 instruction. */
|
|
319
|
|
320 static bool
|
|
321 is_fmul_fmac_insn (rtx_insn *insn, bool fmul_ok)
|
|
322 {
|
|
323 enum attr_type t;
|
|
324
|
|
325 if (!NONDEBUG_INSN_P (insn))
|
|
326 return false;
|
|
327
|
|
328 if (recog_memoized (insn) < 0)
|
|
329 return false;
|
|
330
|
|
331 /* Only consider chain(s) this instruction is a root of if this is an FMUL or
|
|
332 FMADD/FMSUB instruction. This allows to avoid browsing chains of all
|
|
333 instructions for FMUL or FMADD/FMSUB in them. */
|
|
334 t = get_attr_type (insn);
|
|
335 return is_fmac_op (t) || (fmul_ok && is_fmul_op (t));
|
|
336 }
|
|
337
|
|
338
|
|
339 /*
|
|
340 * Class fma_forest method definitions.
|
|
341 */
|
|
342
|
|
343 fma_forest::fma_forest (func_fma_steering *fma_steer, fma_root_node *fma_root,
|
|
344 int id)
|
|
345 {
|
|
346 memset (this, 0, sizeof (*this));
|
|
347 this->m_globals = fma_steer;
|
|
348 this->m_roots = new std::list<fma_root_node *>;
|
|
349 this->m_roots->push_back (fma_root);
|
|
350 this->m_id = id;
|
|
351 }
|
|
352
|
|
353 fma_forest::~fma_forest ()
|
|
354 {
|
|
355 delete this->m_roots;
|
|
356 }
|
|
357
|
|
358 int
|
|
359 fma_forest::get_id ()
|
|
360 {
|
|
361 return this->m_id;
|
|
362 }
|
|
363
|
|
364 std::list<fma_root_node *> *
|
|
365 fma_forest::get_roots ()
|
|
366 {
|
|
367 return this->m_roots;
|
|
368 }
|
|
369
|
|
370 func_fma_steering *
|
|
371 fma_forest::get_globals ()
|
|
372 {
|
|
373 return this->m_globals;
|
|
374 }
|
|
375
|
|
376 int
|
|
377 fma_forest::get_target_parity ()
|
|
378 {
|
|
379 return this->m_target_parity;
|
|
380 }
|
|
381
|
|
382 /* Act on the creation of NODE by updating statistics in FOREST and adding an
|
|
383 entry for it in the func_fma_steering hashmap. */
|
|
384
|
|
385 void fma_forest::fma_node_created (fma_node *node)
|
|
386 {
|
|
387 bool created = !this->m_globals->put_node (node);
|
|
388
|
|
389 gcc_assert (created);
|
|
390 this->m_nb_nodes++;
|
|
391 }
|
|
392
|
|
393 /* Merge REF_FOREST and OTHER_FOREST together, making REF_FOREST the canonical
|
|
394 fma_forest object to represent both. */
|
|
395
|
|
396 void
|
|
397 fma_forest::merge_forest (fma_forest *other_forest)
|
|
398 {
|
|
399 std::list<fma_root_node *> *other_roots;
|
|
400 std::list<fma_root_node *>::iterator other_root_iter;
|
|
401
|
|
402 if (this == other_forest)
|
|
403 return;
|
|
404
|
|
405 other_roots = other_forest->m_roots;
|
|
406
|
|
407 /* Update root nodes' pointer to forest. */
|
|
408 for (other_root_iter = other_roots->begin ();
|
131
|
409 other_root_iter != other_roots->end (); ++other_root_iter)
|
111
|
410 (*other_root_iter)->set_forest (this);
|
|
411
|
|
412 /* Remove other_forest from the list of forests and move its tree roots in
|
|
413 the list of tree roots of ref_forest. */
|
|
414 this->m_globals->remove_forest (other_forest);
|
|
415 this->m_roots->splice (this->m_roots->begin (), *other_roots);
|
|
416 this->m_nb_nodes += other_forest->m_nb_nodes;
|
|
417
|
|
418 delete other_forest;
|
|
419 }
|
|
420
|
|
421 /* Dump information about the forest FOREST. */
|
|
422
|
|
423 void
|
|
424 fma_forest::dump_info ()
|
|
425 {
|
|
426 gcc_assert (dump_file);
|
|
427
|
|
428 fprintf (dump_file, "Forest #%d has %d nodes\n", this->m_id,
|
|
429 this->m_nb_nodes);
|
|
430 }
|
|
431
|
|
432 /* Wrapper around fma_forest::dump_info for use as parameter of function
|
|
433 pointer type in func_fma_steering::dfs. */
|
|
434
|
|
435 static void
|
|
436 dump_forest_info (fma_forest *forest)
|
|
437 {
|
|
438 forest->dump_info ();
|
|
439 }
|
|
440
|
|
441 /* Dispatch forest to the least utilized pipeline. */
|
|
442
|
|
443 void
|
|
444 fma_forest::dispatch ()
|
|
445 {
|
|
446 this->m_target_parity = this->m_roots->front ()->get_parity ();
|
|
447 int fpu_balance = this->m_globals->get_fpu_balance ();
|
|
448 if (fpu_balance != 0)
|
|
449 this->m_target_parity = (fpu_balance < 0);
|
|
450
|
|
451 if (dump_file)
|
|
452 fprintf (dump_file, "Target parity for forest #%d: %s\n", this->m_id,
|
|
453 this->m_target_parity ? "odd" : "even");
|
|
454 }
|
|
455
|
|
456 /* Wrapper around fma_forest::dispatch for use as parameter of function pointer
|
|
457 type in func_fma_steering::dfs. */
|
|
458
|
|
459 static void
|
|
460 dispatch_forest (fma_forest *forest)
|
|
461 {
|
|
462 forest->dispatch ();
|
|
463 }
|
|
464
|
|
465 fma_node::fma_node (fma_node *parent, du_chain *chain)
|
|
466 {
|
|
467 memset (this, 0, sizeof (*this));
|
|
468 this->m_parent = parent;
|
|
469 this->m_children = new std::list<fma_node *>;
|
|
470 this->m_insn = chain->insn;
|
|
471 /* root_p () cannot be used to check for root before root is set. */
|
|
472 if (this->m_parent == this)
|
|
473 this->m_root = static_cast<fma_root_node *> (parent);
|
|
474 else
|
|
475 {
|
|
476 this->m_root = parent->m_root;
|
|
477 this->get_forest ()->fma_node_created (this);
|
|
478 }
|
|
479 }
|
|
480
|
|
481 fma_node::~fma_node ()
|
|
482 {
|
|
483 delete this->m_children;
|
|
484 }
|
|
485
|
|
486 std::list<fma_node *> *
|
|
487 fma_node::get_children ()
|
|
488 {
|
|
489 return this->m_children;
|
|
490 }
|
|
491
|
|
492 rtx_insn *
|
|
493 fma_node::get_insn ()
|
|
494 {
|
|
495 return this->m_insn;
|
|
496 }
|
|
497
|
|
498 void
|
|
499 fma_node::set_head (du_head *head)
|
|
500 {
|
|
501 gcc_assert (!this->m_head);
|
|
502 this->m_head = head;
|
|
503 }
|
|
504
|
|
505 /* Add a child to this node in the list of children. */
|
|
506
|
|
507 void
|
|
508 fma_node::add_child (fma_node *child)
|
|
509 {
|
|
510 this->m_children->push_back (child);
|
|
511 }
|
|
512
|
|
513 /* Return the parity of the destination register of the instruction represented
|
|
514 by this node. */
|
|
515
|
|
516 int
|
|
517 fma_node::get_parity ()
|
|
518 {
|
|
519 return this->m_head->regno % 2;
|
|
520 }
|
|
521
|
|
522 /* Get the actual forest associated with a non root node as the one the node
|
|
523 points to might have been merged into another one. In that case the pointer
|
|
524 in the root nodes are updated so we return the forest pointer of a root node
|
|
525 pointed to by the initial forest. Despite being a oneliner, this method is
|
|
526 defined here as it references a method from fma_root_node. */
|
|
527
|
|
528 fma_forest *
|
|
529 fma_node::get_forest ()
|
|
530 {
|
|
531 return this->m_root->get_forest ();
|
|
532 }
|
|
533
|
|
534 /* Return whether a node is a root node. */
|
|
535
|
|
536 bool
|
|
537 fma_node::root_p ()
|
|
538 {
|
|
539 return this->m_root == this;
|
|
540 }
|
|
541
|
|
542 /* Dump information about the children of node FMA_NODE in forest FOREST. */
|
|
543
|
|
544 void
|
|
545 fma_node::dump_info (ATTRIBUTE_UNUSED fma_forest *forest)
|
|
546 {
|
|
547 struct du_chain *chain;
|
|
548 std::list<fma_node *>::iterator fma_child;
|
|
549
|
|
550 gcc_assert (dump_file);
|
|
551
|
|
552 if (this->get_children ()->empty ())
|
|
553 return;
|
|
554
|
|
555 fprintf (dump_file, "Instruction(s)");
|
|
556 for (chain = this->m_head->first; chain; chain = chain->next_use)
|
|
557 {
|
|
558 if (!is_fmul_fmac_insn (chain->insn, true))
|
|
559 continue;
|
|
560
|
|
561 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
|
|
562 continue;
|
|
563
|
|
564 fprintf (dump_file, " %d", INSN_UID (chain->insn));
|
|
565 }
|
|
566
|
|
567 fprintf (dump_file, " is(are) accumulator dependency of instructions");
|
|
568 for (fma_child = this->get_children ()->begin ();
|
|
569 fma_child != this->get_children ()->end (); fma_child++)
|
|
570 fprintf (dump_file, " %d", INSN_UID ((*fma_child)->m_insn));
|
|
571 fprintf (dump_file, "\n");
|
|
572 }
|
|
573
|
|
574 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
|
|
575 type in func_fma_steering::dfs. */
|
|
576
|
|
577 static void
|
|
578 dump_tree_node_info (fma_forest *forest, fma_node *node)
|
|
579 {
|
|
580 node->dump_info (forest);
|
|
581 }
|
|
582
|
|
583 /* Rename the destination register of a single FMUL or FMADD/FMSUB instruction
|
|
584 represented by FMA_NODE to a register that respect the target parity for
|
|
585 FOREST or with same parity of the instruction represented by its parent node
|
|
586 if it has one. */
|
|
587
|
|
588 void
|
|
589 fma_node::rename (fma_forest *forest)
|
|
590 {
|
|
591 int cur_parity, target_parity;
|
|
592
|
|
593 /* This is alternate root of a chain and thus has no children. It will be
|
|
594 renamed when processing the canonical root for that chain. */
|
|
595 if (!this->m_head)
|
|
596 return;
|
|
597
|
|
598 target_parity = forest->get_target_parity ();
|
|
599 if (this->m_parent)
|
|
600 target_parity = this->m_parent->get_parity ();
|
|
601 cur_parity = this->get_parity ();
|
|
602
|
|
603 /* Rename if parity differs. */
|
|
604 if (cur_parity != target_parity)
|
|
605 {
|
|
606 rtx_insn *insn = this->m_insn;
|
|
607 HARD_REG_SET unavailable;
|
|
608 machine_mode mode;
|
|
609 int reg;
|
|
610
|
|
611 if (dump_file)
|
|
612 {
|
|
613 unsigned cur_dest_reg = this->m_head->regno;
|
|
614
|
|
615 fprintf (dump_file, "FMA or FMUL at insn %d but destination "
|
|
616 "register (%s) has different parity from expected to "
|
|
617 "maximize FPU pipeline utilization\n", INSN_UID (insn),
|
|
618 reg_names[cur_dest_reg]);
|
|
619 }
|
|
620
|
|
621 /* Don't clobber traceback for noreturn functions. */
|
|
622 CLEAR_HARD_REG_SET (unavailable);
|
|
623 if (frame_pointer_needed)
|
|
624 {
|
|
625 add_to_hard_reg_set (&unavailable, Pmode, FRAME_POINTER_REGNUM);
|
|
626 add_to_hard_reg_set (&unavailable, Pmode, HARD_FRAME_POINTER_REGNUM);
|
|
627 }
|
|
628
|
|
629 /* Exclude registers with wrong parity. */
|
|
630 mode = GET_MODE (SET_DEST (PATTERN (insn)));
|
|
631 for (reg = cur_parity; reg < FIRST_PSEUDO_REGISTER; reg += 2)
|
|
632 add_to_hard_reg_set (&unavailable, mode, reg);
|
|
633
|
|
634 if (!rename_single_chain (this->m_head, &unavailable))
|
|
635 {
|
|
636 if (dump_file)
|
|
637 fprintf (dump_file, "Destination register of insn %d could not be "
|
|
638 "renamed. Dependent FMA insns will use this parity from "
|
|
639 "there on.\n", INSN_UID (insn));
|
|
640 }
|
|
641 else
|
|
642 cur_parity = target_parity;
|
|
643 }
|
|
644
|
|
645 forest->get_globals ()->update_balance (cur_parity);
|
|
646 }
|
|
647
|
|
648 /* Wrapper around fma_node::dump_info for use as parameter of function pointer
|
|
649 type in func_fma_steering::dfs. */
|
|
650
|
|
651 static void
|
|
652 rename_fma_node (fma_forest *forest, fma_node *node)
|
|
653 {
|
|
654 node->rename (forest);
|
|
655 }
|
|
656
|
|
657 fma_root_node::fma_root_node (func_fma_steering *globals, du_chain *chain,
|
|
658 int id) : fma_node (this, chain)
|
|
659 {
|
|
660 this->m_forest = new fma_forest (globals, this, id);
|
|
661 this->m_forest->fma_node_created (this);
|
|
662 }
|
|
663
|
|
664 fma_forest *
|
|
665 fma_root_node::get_forest ()
|
|
666 {
|
|
667 return this->m_forest;
|
|
668 }
|
|
669
|
|
670 void
|
|
671 fma_root_node::set_forest (fma_forest *ref_forest)
|
|
672 {
|
|
673 this->m_forest = ref_forest;
|
|
674 }
|
|
675
|
|
676 /* Dump information about the roots of forest FOREST. */
|
|
677
|
|
678 void
|
|
679 fma_root_node::dump_info (fma_forest *forest)
|
|
680 {
|
|
681 gcc_assert (dump_file);
|
|
682
|
|
683 if (this == forest->get_roots ()->front ())
|
|
684 fprintf (dump_file, "Instruction(s) at root of forest #%d:",
|
|
685 forest->get_id ());
|
|
686 fprintf (dump_file, " %d", INSN_UID (this->m_insn));
|
|
687 if (this == forest->get_roots ()->back ())
|
|
688 fprintf (dump_file, "\n");
|
|
689 }
|
|
690
|
|
691 /* Wrapper around fma_root_node::dump_info for use as parameter of function
|
|
692 pointer type in func_fma_steering::dfs. */
|
|
693
|
|
694 static void
|
|
695 dump_tree_root_info (fma_forest *forest, fma_root_node *node)
|
|
696 {
|
|
697 node->dump_info (forest);
|
|
698 }
|
|
699
|
|
700 func_fma_steering::func_fma_steering () : m_fpu_balance (0)
|
|
701 {
|
|
702 this->m_insn_fma_head_map = new hash_map<rtx_insn *, fma_node *>;
|
|
703 this->m_fma_forests.clear ();
|
|
704 this->m_next_forest_id = 0;
|
|
705 }
|
|
706
|
|
707 func_fma_steering::~func_fma_steering ()
|
|
708 {
|
|
709 delete this->m_insn_fma_head_map;
|
|
710 }
|
|
711
|
|
712 int
|
|
713 func_fma_steering::get_fpu_balance ()
|
|
714 {
|
|
715 return this->m_fpu_balance;
|
|
716 }
|
|
717
|
|
718 void
|
|
719 func_fma_steering::remove_forest (fma_forest *forest)
|
|
720 {
|
|
721 this->m_fma_forests.remove (forest);
|
|
722 }
|
|
723
|
|
724 /* Memorize the mapping of this instruction to its fma_node object and return
|
|
725 whether such a mapping existed. */
|
|
726
|
|
727 bool
|
|
728 func_fma_steering::put_node (fma_node *node)
|
|
729 {
|
|
730 return this->m_insn_fma_head_map->put (node->get_insn (), node);
|
|
731 }
|
|
732
|
|
733 /* Update the current balance considering a node with the given PARITY. */
|
|
734
|
|
735 void
|
|
736 func_fma_steering::update_balance (int parity)
|
|
737 {
|
|
738 this->m_fpu_balance = parity ? this->m_fpu_balance + 1
|
|
739 : this->m_fpu_balance - 1;
|
|
740 }
|
|
741
|
|
742 /* Return whether an fma_node object exists for instruction INSN and, if not,
|
|
743 allocate one in *RET. */
|
|
744
|
|
745 fma_node *
|
|
746 func_fma_steering::get_fma_node (rtx_insn *insn)
|
|
747 {
|
|
748 fma_node **fma_slot;
|
|
749
|
|
750 fma_slot = this->m_insn_fma_head_map->get (insn);
|
|
751 if (fma_slot)
|
|
752 return *fma_slot;
|
|
753 return NULL;
|
|
754 }
|
|
755
|
|
756 /* Allocate and initialize fma_node objects for the FMUL or FMADD/FMSUB
|
|
757 instruction in CHAIN->insn and its dependent FMADD/FMSUB instructions, all
|
|
758 part of FOREST. For the children, the associated head is left untouched
|
|
759 (and thus null) as this function will be called again when considering the
|
|
760 chain where they are def. For the parent, the chain is given in HEAD. */
|
|
761
|
|
762 void
|
|
763 func_fma_steering::analyze_fma_fmul_insn (fma_forest *ref_forest,
|
|
764 du_chain *chain, du_head_p head)
|
|
765 {
|
|
766 fma_forest *forest;
|
|
767 fma_node *node = this->get_fma_node (chain->insn);
|
|
768
|
|
769 /* This is a root node. */
|
|
770 if (!node)
|
|
771 {
|
|
772 fma_root_node *root_node;
|
|
773
|
|
774 root_node = new fma_root_node (this, chain, this->m_next_forest_id++);
|
|
775 forest = root_node->get_forest ();
|
|
776 node = root_node;
|
|
777
|
|
778 /* Until proved otherwise, assume this root is not part of an existing
|
|
779 forest and thus add its forest to the list of forests. */
|
|
780 this->m_fma_forests.push_back (forest);
|
|
781 }
|
|
782 else
|
|
783 forest = node->get_forest ();
|
|
784
|
|
785 node->set_head (head);
|
|
786
|
|
787 /* fma_node is part of a chain with several defs, one of them having already
|
|
788 been processed. The root of that already processed def is the canonical
|
|
789 one and the root of fma_node is added to its forest. No need to process
|
|
790 the children nodes as they were already processed when the other def was
|
|
791 processed. */
|
|
792 if (ref_forest)
|
|
793 {
|
|
794 ref_forest->merge_forest (forest);
|
|
795 return;
|
|
796 }
|
|
797
|
|
798 for (chain = head->first; chain; chain = chain->next_use)
|
|
799 {
|
|
800 fma_node *child_fma;
|
|
801 rtx fma_rtx, *accum_rtx_p;
|
|
802
|
|
803 if (!is_fmul_fmac_insn (chain->insn, false))
|
|
804 continue;
|
|
805
|
|
806 /* Get FMA rtx. */
|
|
807 fma_rtx = SET_SRC (PATTERN (chain->insn));
|
|
808 /* FMA is negated. */
|
|
809 if (GET_CODE (fma_rtx) == NEG)
|
|
810 fma_rtx = XEXP (fma_rtx, 0);
|
|
811 /* Get accumulator rtx. */
|
|
812 accum_rtx_p = &XEXP (fma_rtx, 2);
|
|
813 /* Accumulator is negated. */
|
|
814 if (!REG_P (*accum_rtx_p))
|
|
815 accum_rtx_p = &XEXP (*accum_rtx_p, 0);
|
|
816
|
|
817 /* This du_chain structure is not for the accumulator register. */
|
|
818 if (accum_rtx_p != chain->loc)
|
|
819 continue;
|
|
820
|
|
821 /* If object already created, this is a loop carried dependency. We
|
|
822 don't include this object in the children as we want trees for
|
|
823 rename_fma_trees to not be an infinite loop. */
|
|
824 if (this->get_fma_node (chain->insn))
|
|
825 continue;
|
|
826
|
|
827 child_fma = new fma_node (node, chain);
|
|
828
|
|
829 /* Memorize the mapping of this instruction to its fma_node object
|
|
830 as it will be processed for the chain starting at its destination
|
|
831 register later. */
|
|
832
|
|
833 /* Link to siblings. */
|
|
834 node->add_child (child_fma);
|
|
835 }
|
|
836 }
|
|
837
|
|
838 /* Perform a depth-first search of the forests of fma_node in
|
|
839 THIS->m_fma_forests, calling PROCESS_FOREST () on each fma_forest object in
|
|
840 THIS->m_fma_forests list, PROCESS_ROOT () on each tree root and
|
|
841 PROCESS_NODE () on each node. If FREE is true, free all std::list in the
|
|
842 same dfs. */
|
|
843
|
|
844 void
|
|
845 func_fma_steering::dfs (void (*process_forest) (fma_forest *),
|
|
846 void (*process_root) (fma_forest *, fma_root_node *),
|
|
847 void (*process_node) (fma_forest *, fma_node *),
|
|
848 bool free)
|
|
849 {
|
131
|
850 auto_vec<fma_node *> to_process;
|
|
851 auto_vec<fma_node *> to_free;
|
111
|
852 std::list<fma_forest *>::iterator forest_iter;
|
|
853
|
|
854 /* For each forest. */
|
|
855 for (forest_iter = this->m_fma_forests.begin ();
|
131
|
856 forest_iter != this->m_fma_forests.end (); ++forest_iter)
|
111
|
857 {
|
|
858 std::list<fma_root_node *>::iterator root_iter;
|
|
859
|
|
860 if (process_forest)
|
|
861 process_forest (*forest_iter);
|
|
862
|
|
863 /* For each tree root in this forest. */
|
|
864 for (root_iter = (*forest_iter)->get_roots ()->begin ();
|
131
|
865 root_iter != (*forest_iter)->get_roots ()->end (); ++root_iter)
|
111
|
866 {
|
|
867 if (process_root)
|
|
868 process_root (*forest_iter, *root_iter);
|
|
869 to_process.safe_push (*root_iter);
|
|
870 }
|
|
871
|
|
872 /* For each tree node in this forest. */
|
|
873 while (!to_process.is_empty ())
|
|
874 {
|
|
875 fma_node *node;
|
|
876 std::list<fma_node *>::iterator child_iter;
|
|
877
|
|
878 node = to_process.pop ();
|
|
879
|
|
880 if (process_node)
|
|
881 process_node (*forest_iter, node);
|
|
882
|
131
|
883 for (child_iter = node->get_children ()->begin ();
|
|
884 child_iter != node->get_children ()->end (); ++child_iter)
|
|
885 to_process.safe_push (*child_iter);
|
111
|
886
|
131
|
887 /* Defer freeing so that the process_node callback can access the
|
|
888 parent and children of the node being processed. */
|
111
|
889 if (free)
|
131
|
890 to_free.safe_push (node);
|
|
891 }
|
|
892
|
|
893 if (free)
|
|
894 {
|
|
895 delete *forest_iter;
|
|
896
|
|
897 while (!to_free.is_empty ())
|
111
|
898 {
|
131
|
899 fma_node *node = to_free.pop ();
|
111
|
900 if (node->root_p ())
|
|
901 delete static_cast<fma_root_node *> (node);
|
|
902 else
|
|
903 delete node;
|
|
904 }
|
|
905 }
|
|
906 }
|
|
907 }
|
|
908
|
|
909 /* Build the dependency trees of FMUL and FMADD/FMSUB instructions. */
|
|
910
|
|
911 void
|
|
912 func_fma_steering::analyze ()
|
|
913 {
|
|
914 int i, n_blocks, *bb_dfs_preorder;
|
|
915 basic_block bb;
|
|
916 rtx_insn *insn;
|
|
917
|
|
918 bb_dfs_preorder = XNEWVEC (int, last_basic_block_for_fn (cfun));
|
|
919 n_blocks = pre_and_rev_post_order_compute (bb_dfs_preorder, NULL, false);
|
|
920
|
|
921 /* Browse the graph of basic blocks looking for FMUL or FMADD/FMSUB
|
|
922 instructions. */
|
|
923 for (i = 0; i < n_blocks; i++)
|
|
924 {
|
|
925 bb = BASIC_BLOCK_FOR_FN (cfun, bb_dfs_preorder[i]);
|
|
926 FOR_BB_INSNS (bb, insn)
|
|
927 {
|
|
928 operand_rr_info *dest_op_info;
|
|
929 struct du_chain *chain = NULL;
|
|
930 unsigned dest_regno;
|
|
931 fma_forest *forest = NULL;
|
|
932 du_head_p head = NULL;
|
|
933 int i;
|
|
934
|
|
935 if (!is_fmul_fmac_insn (insn, true))
|
|
936 continue;
|
|
937
|
|
938 /* Search the chain where this instruction is (one of) the root. */
|
|
939 dest_op_info = insn_rr[INSN_UID (insn)].op_info;
|
|
940 dest_regno = REGNO (SET_DEST (PATTERN (insn)));
|
|
941 for (i = 0; i < dest_op_info->n_chains; i++)
|
|
942 {
|
|
943 /* The register tracked by this chain does not match the
|
|
944 destination register of insn. */
|
|
945 if (dest_op_info->heads[i]->regno != dest_regno)
|
|
946 continue;
|
|
947
|
|
948 head = dest_op_info->heads[i];
|
|
949 /* The chain was merged in another, find the new head. */
|
|
950 if (!head->first)
|
|
951 head = regrename_chain_from_id (head->id);
|
|
952
|
|
953 /* Search the chain element for this instruction and, if another
|
|
954 FMUL or FMADD/FMSUB instruction was already processed, note
|
|
955 the forest of its tree. */
|
|
956 forest = NULL;
|
|
957 for (chain = head->first; chain; chain = chain->next_use)
|
|
958 {
|
|
959 fma_node **fma_slot;
|
|
960
|
|
961 if (!is_fmul_fmac_insn (chain->insn, true))
|
|
962 continue;
|
|
963
|
|
964 /* This is a use, continue. */
|
|
965 if (chain->loc != &SET_DEST (PATTERN (chain->insn)))
|
|
966 continue;
|
|
967
|
|
968 if (chain->insn == insn)
|
|
969 break;
|
|
970
|
|
971 fma_slot = this->m_insn_fma_head_map->get (chain->insn);
|
|
972 if (fma_slot && (*fma_slot)->get_children ())
|
|
973 forest = (*fma_slot)->get_forest ();
|
|
974 }
|
|
975 if (chain)
|
|
976 break;
|
|
977 }
|
|
978
|
|
979 /* Due to implementation of regrename, dest register can slip away
|
|
980 from regrename's analysis. As a result, there is no chain for
|
|
981 the destination register of insn. We simply skip the insn even
|
|
982 it is a fmul/fmac instruction. This can happen when the dest
|
|
983 register is also a source register of insn and one of the below
|
|
984 conditions is satisfied:
|
|
985 1) the source reg is setup in larger mode than this insn;
|
|
986 2) the source reg is uninitialized;
|
|
987 3) the source reg is passed in as parameter. */
|
|
988 if (i < dest_op_info->n_chains)
|
|
989 this->analyze_fma_fmul_insn (forest, chain, head);
|
|
990 }
|
|
991 }
|
|
992 free (bb_dfs_preorder);
|
|
993
|
|
994 if (dump_file)
|
|
995 this->dfs (dump_forest_info, dump_tree_root_info, dump_tree_node_info,
|
|
996 false);
|
|
997 }
|
|
998
|
|
999 /* Perform the renaming of all chains with FMUL or FMADD/FMSUB involved with
|
|
1000 the objective of keeping FPU pipeline balanced in term of instructions and
|
|
1001 having FMADD/FMSUB with dependencies on previous FMUL or FMADD/FMSUB be
|
|
1002 scheduled on the same pipeline. */
|
|
1003
|
|
1004 void
|
|
1005 func_fma_steering::rename_fma_trees ()
|
|
1006 {
|
|
1007 this->dfs (dispatch_forest, NULL, rename_fma_node, true);
|
|
1008
|
|
1009 if (dump_file && !this->m_fma_forests.empty ())
|
|
1010 {
|
|
1011 fprintf (dump_file, "Function %s has ", current_function_name ());
|
|
1012 if (this->m_fpu_balance == 0)
|
|
1013 fprintf (dump_file, "perfect balance of FMUL/FMA chains between the "
|
|
1014 "two FPU pipelines\n");
|
|
1015 else if (this->m_fpu_balance > 0)
|
|
1016 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the second "
|
|
1017 "FPU pipeline\n", this->m_fpu_balance);
|
|
1018 else /* this->m_fpu_balance < 0 */
|
|
1019 fprintf (dump_file, "%d more FMUL/FMA chains scheduled on the first "
|
|
1020 "FPU pipeline\n", - this->m_fpu_balance);
|
|
1021 }
|
|
1022 }
|
|
1023
|
|
1024 /* Execute FMA steering pass. */
|
|
1025
|
|
1026 void
|
|
1027 func_fma_steering::execute_fma_steering ()
|
|
1028 {
|
|
1029 df_set_flags (DF_LR_RUN_DCE);
|
|
1030 df_note_add_problem ();
|
|
1031 df_analyze ();
|
|
1032 df_set_flags (DF_DEFER_INSN_RESCAN);
|
|
1033
|
|
1034 regrename_init (true);
|
|
1035 regrename_analyze (NULL);
|
|
1036 this->analyze ();
|
|
1037 this->rename_fma_trees ();
|
|
1038 regrename_finish ();
|
|
1039 }
|
|
1040
|
|
1041 const pass_data pass_data_fma_steering =
|
|
1042 {
|
|
1043 RTL_PASS, /* type */
|
|
1044 "fma_steering", /* name */
|
|
1045 OPTGROUP_NONE, /* optinfo_flags */
|
|
1046 TV_NONE, /* tv_id */
|
|
1047 0, /* properties_required */
|
|
1048 0, /* properties_provided */
|
|
1049 0, /* properties_destroyed */
|
|
1050 0, /* todo_flags_start */
|
|
1051 TODO_df_finish, /* todo_flags_finish */
|
|
1052 };
|
|
1053
|
|
1054 class pass_fma_steering : public rtl_opt_pass
|
|
1055 {
|
|
1056 public:
|
|
1057 pass_fma_steering (gcc::context *ctxt)
|
|
1058 : rtl_opt_pass (pass_data_fma_steering, ctxt)
|
|
1059 {}
|
|
1060
|
|
1061 /* opt_pass methods: */
|
|
1062 virtual bool gate (function *)
|
|
1063 {
|
|
1064 return (aarch64_tune_params.extra_tuning_flags
|
|
1065 & AARCH64_EXTRA_TUNE_RENAME_FMA_REGS)
|
|
1066 && optimize >= 2;
|
|
1067 }
|
|
1068
|
|
1069 virtual unsigned int execute (function *)
|
|
1070 {
|
|
1071 func_fma_steering *fma_steering = new func_fma_steering;
|
|
1072 fma_steering->execute_fma_steering ();
|
|
1073 delete fma_steering;
|
|
1074 return 0;
|
|
1075 }
|
|
1076
|
|
1077 }; // class pass_fma_steering
|
|
1078
|
|
1079 /* Create a new fma steering pass instance. */
|
|
1080
|
|
1081 rtl_opt_pass *
|
|
1082 make_pass_fma_steering (gcc::context *ctxt)
|
|
1083 {
|
|
1084 return new pass_fma_steering (ctxt);
|
|
1085 }
|