0
|
1 /* Integrated Register Allocator (IRA) entry point.
|
|
2 Copyright (C) 2006, 2007, 2008, 2009
|
|
3 Free Software Foundation, Inc.
|
|
4 Contributed by Vladimir Makarov <vmakarov@redhat.com>.
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it under
|
|
9 the terms of the GNU General Public License as published by the Free
|
|
10 Software Foundation; either version 3, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>. */
|
|
21
|
|
22 /* The integrated register allocator (IRA) is a
|
|
23 regional register allocator performing graph coloring on a top-down
|
|
24 traversal of nested regions. Graph coloring in a region is based
|
|
25 on Chaitin-Briggs algorithm. It is called integrated because
|
|
26 register coalescing, register live range splitting, and choosing a
|
|
27 better hard register are done on-the-fly during coloring. Register
|
|
28 coalescing and choosing a cheaper hard register is done by hard
|
|
29 register preferencing during hard register assigning. The live
|
|
30 range splitting is a byproduct of the regional register allocation.
|
|
31
|
|
32 Major IRA notions are:
|
|
33
|
|
34 o *Region* is a part of CFG where graph coloring based on
|
|
35 Chaitin-Briggs algorithm is done. IRA can work on any set of
|
|
36 nested CFG regions forming a tree. Currently the regions are
|
|
37 the entire function for the root region and natural loops for
|
|
38 the other regions. Therefore data structure representing a
|
|
39 region is called loop_tree_node.
|
|
40
|
|
41 o *Cover class* is a register class belonging to a set of
|
|
42 non-intersecting register classes containing all of the
|
|
43 hard-registers available for register allocation. The set of
|
|
44 all cover classes for a target is defined in the corresponding
|
|
45 machine-description file according some criteria. Such notion
|
|
46 is needed because Chaitin-Briggs algorithm works on
|
|
47 non-intersected register classes.
|
|
48
|
|
49 o *Allocno* represents the live range of a pseudo-register in a
|
|
50 region. Besides the obvious attributes like the corresponding
|
|
51 pseudo-register number, cover class, conflicting allocnos and
|
|
52 conflicting hard-registers, there are a few allocno attributes
|
|
53 which are important for understanding the allocation algorithm:
|
|
54
|
|
55 - *Live ranges*. This is a list of ranges of *program
|
|
56 points* where the allocno lives. Program points represent
|
|
57 places where a pseudo can be born or become dead (there are
|
|
58 approximately two times more program points than the insns)
|
|
59 and they are represented by integers starting with 0. The
|
|
60 live ranges are used to find conflicts between allocnos of
|
|
61 different cover classes. They also play very important role
|
|
62 for the transformation of the IRA internal representation of
|
|
63 several regions into a one region representation. The later is
|
|
64 used during the reload pass work because each allocno
|
|
65 represents all of the corresponding pseudo-registers.
|
|
66
|
|
67 - *Hard-register costs*. This is a vector of size equal to the
|
|
68 number of available hard-registers of the allocno's cover
|
|
69 class. The cost of a callee-clobbered hard-register for an
|
|
70 allocno is increased by the cost of save/restore code around
|
|
71 the calls through the given allocno's life. If the allocno
|
|
72 is a move instruction operand and another operand is a
|
|
73 hard-register of the allocno's cover class, the cost of the
|
|
74 hard-register is decreased by the move cost.
|
|
75
|
|
76 When an allocno is assigned, the hard-register with minimal
|
|
77 full cost is used. Initially, a hard-register's full cost is
|
|
78 the corresponding value from the hard-register's cost vector.
|
|
79 If the allocno is connected by a *copy* (see below) to
|
|
80 another allocno which has just received a hard-register, the
|
|
81 cost of the hard-register is decreased. Before choosing a
|
|
82 hard-register for an allocno, the allocno's current costs of
|
|
83 the hard-registers are modified by the conflict hard-register
|
|
84 costs of all of the conflicting allocnos which are not
|
|
85 assigned yet.
|
|
86
|
|
87 - *Conflict hard-register costs*. This is a vector of the same
|
|
88 size as the hard-register costs vector. To permit an
|
|
89 unassigned allocno to get a better hard-register, IRA uses
|
|
90 this vector to calculate the final full cost of the
|
|
91 available hard-registers. Conflict hard-register costs of an
|
|
92 unassigned allocno are also changed with a change of the
|
|
93 hard-register cost of the allocno when a copy involving the
|
|
94 allocno is processed as described above. This is done to
|
|
95 show other unassigned allocnos that a given allocno prefers
|
|
96 some hard-registers in order to remove the move instruction
|
|
97 corresponding to the copy.
|
|
98
|
|
99 o *Cap*. If a pseudo-register does not live in a region but
|
|
100 lives in a nested region, IRA creates a special allocno called
|
|
101 a cap in the outer region. A region cap is also created for a
|
|
102 subregion cap.
|
|
103
|
|
104 o *Copy*. Allocnos can be connected by copies. Copies are used
|
|
105 to modify hard-register costs for allocnos during coloring.
|
|
106 Such modifications reflects a preference to use the same
|
|
107 hard-register for the allocnos connected by copies. Usually
|
|
108 copies are created for move insns (in this case it results in
|
|
109 register coalescing). But IRA also creates copies for operands
|
|
110 of an insn which should be assigned to the same hard-register
|
|
111 due to constraints in the machine description (it usually
|
|
112 results in removing a move generated in reload to satisfy
|
|
113 the constraints) and copies referring to the allocno which is
|
|
114 the output operand of an instruction and the allocno which is
|
|
115 an input operand dying in the instruction (creation of such
|
|
116 copies results in less register shuffling). IRA *does not*
|
|
117 create copies between the same register allocnos from different
|
|
118 regions because we use another technique for propagating
|
|
119 hard-register preference on the borders of regions.
|
|
120
|
|
121 Allocnos (including caps) for the upper region in the region tree
|
|
122 *accumulate* information important for coloring from allocnos with
|
|
123 the same pseudo-register from nested regions. This includes
|
|
124 hard-register and memory costs, conflicts with hard-registers,
|
|
125 allocno conflicts, allocno copies and more. *Thus, attributes for
|
|
126 allocnos in a region have the same values as if the region had no
|
|
127 subregions*. It means that attributes for allocnos in the
|
|
128 outermost region corresponding to the function have the same values
|
|
129 as though the allocation used only one region which is the entire
|
|
130 function. It also means that we can look at IRA work as if the
|
|
131 first IRA did allocation for all function then it improved the
|
|
132 allocation for loops then their subloops and so on.
|
|
133
|
|
134 IRA major passes are:
|
|
135
|
|
136 o Building IRA internal representation which consists of the
|
|
137 following subpasses:
|
|
138
|
|
139 * First, IRA builds regions and creates allocnos (file
|
|
140 ira-build.c) and initializes most of their attributes.
|
|
141
|
|
142 * Then IRA finds a cover class for each allocno and calculates
|
|
143 its initial (non-accumulated) cost of memory and each
|
|
144 hard-register of its cover class (file ira-cost.c).
|
|
145
|
|
146 * IRA creates live ranges of each allocno, calulates register
|
|
147 pressure for each cover class in each region, sets up
|
|
148 conflict hard registers for each allocno and info about calls
|
|
149 the allocno lives through (file ira-lives.c).
|
|
150
|
|
151 * IRA removes low register pressure loops from the regions
|
|
152 mostly to speed IRA up (file ira-build.c).
|
|
153
|
|
154 * IRA propagates accumulated allocno info from lower region
|
|
155 allocnos to corresponding upper region allocnos (file
|
|
156 ira-build.c).
|
|
157
|
|
158 * IRA creates all caps (file ira-build.c).
|
|
159
|
|
160 * Having live-ranges of allocnos and their cover classes, IRA
|
|
161 creates conflicting allocnos of the same cover class for each
|
|
162 allocno. Conflicting allocnos are stored as a bit vector or
|
|
163 array of pointers to the conflicting allocnos whatever is
|
|
164 more profitable (file ira-conflicts.c). At this point IRA
|
|
165 creates allocno copies.
|
|
166
|
|
167 o Coloring. Now IRA has all necessary info to start graph coloring
|
|
168 process. It is done in each region on top-down traverse of the
|
|
169 region tree (file ira-color.c). There are following subpasses:
|
|
170
|
|
171 * Optional aggressive coalescing of allocnos in the region.
|
|
172
|
|
173 * Putting allocnos onto the coloring stack. IRA uses Briggs
|
|
174 optimistic coloring which is a major improvement over
|
|
175 Chaitin's coloring. Therefore IRA does not spill allocnos at
|
|
176 this point. There is some freedom in the order of putting
|
|
177 allocnos on the stack which can affect the final result of
|
|
178 the allocation. IRA uses some heuristics to improve the order.
|
|
179
|
|
180 * Popping the allocnos from the stack and assigning them hard
|
|
181 registers. If IRA can not assign a hard register to an
|
|
182 allocno and the allocno is coalesced, IRA undoes the
|
|
183 coalescing and puts the uncoalesced allocnos onto the stack in
|
|
184 the hope that some such allocnos will get a hard register
|
|
185 separately. If IRA fails to assign hard register or memory
|
|
186 is more profitable for it, IRA spills the allocno. IRA
|
|
187 assigns the allocno the hard-register with minimal full
|
|
188 allocation cost which reflects the cost of usage of the
|
|
189 hard-register for the allocno and cost of usage of the
|
|
190 hard-register for allocnos conflicting with given allocno.
|
|
191
|
|
192 * After allono assigning in the region, IRA modifies the hard
|
|
193 register and memory costs for the corresponding allocnos in
|
|
194 the subregions to reflect the cost of possible loads, stores,
|
|
195 or moves on the border of the region and its subregions.
|
|
196 When default regional allocation algorithm is used
|
|
197 (-fira-algorithm=mixed), IRA just propagates the assignment
|
|
198 for allocnos if the register pressure in the region for the
|
|
199 corresponding cover class is less than number of available
|
|
200 hard registers for given cover class.
|
|
201
|
|
202 o Spill/restore code moving. When IRA performs an allocation
|
|
203 by traversing regions in top-down order, it does not know what
|
|
204 happens below in the region tree. Therefore, sometimes IRA
|
|
205 misses opportunities to perform a better allocation. A simple
|
|
206 optimization tries to improve allocation in a region having
|
|
207 subregions and containing in another region. If the
|
|
208 corresponding allocnos in the subregion are spilled, it spills
|
|
209 the region allocno if it is profitable. The optimization
|
|
210 implements a simple iterative algorithm performing profitable
|
|
211 transformations while they are still possible. It is fast in
|
|
212 practice, so there is no real need for a better time complexity
|
|
213 algorithm.
|
|
214
|
|
215 o Code change. After coloring, two allocnos representing the same
|
|
216 pseudo-register outside and inside a region respectively may be
|
|
217 assigned to different locations (hard-registers or memory). In
|
|
218 this case IRA creates and uses a new pseudo-register inside the
|
|
219 region and adds code to move allocno values on the region's
|
|
220 borders. This is done during top-down traversal of the regions
|
|
221 (file ira-emit.c). In some complicated cases IRA can create a
|
|
222 new allocno to move allocno values (e.g. when a swap of values
|
|
223 stored in two hard-registers is needed). At this stage, the
|
|
224 new allocno is marked as spilled. IRA still creates the
|
|
225 pseudo-register and the moves on the region borders even when
|
|
226 both allocnos were assigned to the same hard-register. If the
|
|
227 reload pass spills a pseudo-register for some reason, the
|
|
228 effect will be smaller because another allocno will still be in
|
|
229 the hard-register. In most cases, this is better then spilling
|
|
230 both allocnos. If reload does not change the allocation
|
|
231 for the two pseudo-registers, the trivial move will be removed
|
|
232 by post-reload optimizations. IRA does not generate moves for
|
|
233 allocnos assigned to the same hard register when the default
|
|
234 regional allocation algorithm is used and the register pressure
|
|
235 in the region for the corresponding allocno cover class is less
|
|
236 than number of available hard registers for given cover class.
|
|
237 IRA also does some optimizations to remove redundant stores and
|
|
238 to reduce code duplication on the region borders.
|
|
239
|
|
240 o Flattening internal representation. After changing code, IRA
|
|
241 transforms its internal representation for several regions into
|
|
242 one region representation (file ira-build.c). This process is
|
|
243 called IR flattening. Such process is more complicated than IR
|
|
244 rebuilding would be, but is much faster.
|
|
245
|
|
246 o After IR flattening, IRA tries to assign hard registers to all
|
|
247 spilled allocnos. This is impelemented by a simple and fast
|
|
248 priority coloring algorithm (see function
|
|
249 ira_reassign_conflict_allocnos::ira-color.c). Here new allocnos
|
|
250 created during the code change pass can be assigned to hard
|
|
251 registers.
|
|
252
|
|
253 o At the end IRA calls the reload pass. The reload pass
|
|
254 communicates with IRA through several functions in file
|
|
255 ira-color.c to improve its decisions in
|
|
256
|
|
257 * sharing stack slots for the spilled pseudos based on IRA info
|
|
258 about pseudo-register conflicts.
|
|
259
|
|
260 * reassigning hard-registers to all spilled pseudos at the end
|
|
261 of each reload iteration.
|
|
262
|
|
263 * choosing a better hard-register to spill based on IRA info
|
|
264 about pseudo-register live ranges and the register pressure
|
|
265 in places where the pseudo-register lives.
|
|
266
|
|
267 IRA uses a lot of data representing the target processors. These
|
|
268 data are initilized in file ira.c.
|
|
269
|
|
270 If function has no loops (or the loops are ignored when
|
|
271 -fira-algorithm=CB is used), we have classic Chaitin-Briggs
|
|
272 coloring (only instead of separate pass of coalescing, we use hard
|
|
273 register preferencing). In such case, IRA works much faster
|
|
274 because many things are not made (like IR flattening, the
|
|
275 spill/restore optimization, and the code change).
|
|
276
|
|
277 Literature is worth to read for better understanding the code:
|
|
278
|
|
279 o Preston Briggs, Keith D. Cooper, Linda Torczon. Improvements to
|
|
280 Graph Coloring Register Allocation.
|
|
281
|
|
282 o David Callahan, Brian Koblenz. Register allocation via
|
|
283 hierarchical graph coloring.
|
|
284
|
|
285 o Keith Cooper, Anshuman Dasgupta, Jason Eckhardt. Revisiting Graph
|
|
286 Coloring Register Allocation: A Study of the Chaitin-Briggs and
|
|
287 Callahan-Koblenz Algorithms.
|
|
288
|
|
289 o Guei-Yuan Lueh, Thomas Gross, and Ali-Reza Adl-Tabatabai. Global
|
|
290 Register Allocation Based on Graph Fusion.
|
|
291
|
|
292 o Vladimir Makarov. The Integrated Register Allocator for GCC.
|
|
293
|
|
294 o Vladimir Makarov. The top-down register allocator for irregular
|
|
295 register file architectures.
|
|
296
|
|
297 */
|
|
298
|
|
299
|
|
300 #include "config.h"
|
|
301 #include "system.h"
|
|
302 #include "coretypes.h"
|
|
303 #include "tm.h"
|
|
304 #include "regs.h"
|
|
305 #include "rtl.h"
|
|
306 #include "tm_p.h"
|
|
307 #include "target.h"
|
|
308 #include "flags.h"
|
|
309 #include "obstack.h"
|
|
310 #include "bitmap.h"
|
|
311 #include "hard-reg-set.h"
|
|
312 #include "basic-block.h"
|
|
313 #include "expr.h"
|
|
314 #include "recog.h"
|
|
315 #include "params.h"
|
|
316 #include "timevar.h"
|
|
317 #include "tree-pass.h"
|
|
318 #include "output.h"
|
|
319 #include "except.h"
|
|
320 #include "reload.h"
|
|
321 #include "errors.h"
|
|
322 #include "integrate.h"
|
|
323 #include "df.h"
|
|
324 #include "ggc.h"
|
|
325 #include "ira-int.h"
|
|
326
|
|
327
|
|
328 /* A modified value of flag `-fira-verbose' used internally. */
|
|
329 int internal_flag_ira_verbose;
|
|
330
|
|
331 /* Dump file of the allocator if it is not NULL. */
|
|
332 FILE *ira_dump_file;
|
|
333
|
|
334 /* Pools for allocnos, copies, allocno live ranges. */
|
|
335 alloc_pool allocno_pool, copy_pool, allocno_live_range_pool;
|
|
336
|
|
337 /* The number of elements in the following array. */
|
|
338 int ira_spilled_reg_stack_slots_num;
|
|
339
|
|
340 /* The following array contains info about spilled pseudo-registers
|
|
341 stack slots used in current function so far. */
|
|
342 struct ira_spilled_reg_stack_slot *ira_spilled_reg_stack_slots;
|
|
343
|
|
344 /* Correspondingly overall cost of the allocation, cost of the
|
|
345 allocnos assigned to hard-registers, cost of the allocnos assigned
|
|
346 to memory, cost of loads, stores and register move insns generated
|
|
347 for pseudo-register live range splitting (see ira-emit.c). */
|
|
348 int ira_overall_cost;
|
|
349 int ira_reg_cost, ira_mem_cost;
|
|
350 int ira_load_cost, ira_store_cost, ira_shuffle_cost;
|
|
351 int ira_move_loops_num, ira_additional_jumps_num;
|
|
352
|
|
353 /* All registers that can be eliminated. */
|
|
354
|
|
355 HARD_REG_SET eliminable_regset;
|
|
356
|
|
357 /* Map: hard regs X modes -> set of hard registers for storing value
|
|
358 of given mode starting with given hard register. */
|
|
359 HARD_REG_SET ira_reg_mode_hard_regset[FIRST_PSEUDO_REGISTER][NUM_MACHINE_MODES];
|
|
360
|
|
361 /* The following two variables are array analogs of the macros
|
|
362 MEMORY_MOVE_COST and REGISTER_MOVE_COST. */
|
|
363 short int ira_memory_move_cost[MAX_MACHINE_MODE][N_REG_CLASSES][2];
|
|
364 move_table *ira_register_move_cost[MAX_MACHINE_MODE];
|
|
365
|
|
366 /* Similar to may_move_in_cost but it is calculated in IRA instead of
|
|
367 regclass. Another difference is that we take only available hard
|
|
368 registers into account to figure out that one register class is a
|
|
369 subset of the another one. */
|
|
370 move_table *ira_may_move_in_cost[MAX_MACHINE_MODE];
|
|
371
|
|
372 /* Similar to may_move_out_cost but it is calculated in IRA instead of
|
|
373 regclass. Another difference is that we take only available hard
|
|
374 registers into account to figure out that one register class is a
|
|
375 subset of the another one. */
|
|
376 move_table *ira_may_move_out_cost[MAX_MACHINE_MODE];
|
|
377
|
|
378 /* Register class subset relation: TRUE if the first class is a subset
|
|
379 of the second one considering only hard registers available for the
|
|
380 allocation. */
|
|
381 int ira_class_subset_p[N_REG_CLASSES][N_REG_CLASSES];
|
|
382
|
|
383 /* Temporary hard reg set used for a different calculation. */
|
|
384 static HARD_REG_SET temp_hard_regset;
|
|
385
|
|
386
|
|
387
|
|
388 /* The function sets up the map IRA_REG_MODE_HARD_REGSET. */
|
|
389 static void
|
|
390 setup_reg_mode_hard_regset (void)
|
|
391 {
|
|
392 int i, m, hard_regno;
|
|
393
|
|
394 for (m = 0; m < NUM_MACHINE_MODES; m++)
|
|
395 for (hard_regno = 0; hard_regno < FIRST_PSEUDO_REGISTER; hard_regno++)
|
|
396 {
|
|
397 CLEAR_HARD_REG_SET (ira_reg_mode_hard_regset[hard_regno][m]);
|
|
398 for (i = hard_regno_nregs[hard_regno][m] - 1; i >= 0; i--)
|
|
399 if (hard_regno + i < FIRST_PSEUDO_REGISTER)
|
|
400 SET_HARD_REG_BIT (ira_reg_mode_hard_regset[hard_regno][m],
|
|
401 hard_regno + i);
|
|
402 }
|
|
403 }
|
|
404
|
|
405
|
|
406
|
|
407 /* Hard registers that can not be used for the register allocator for
|
|
408 all functions of the current compilation unit. */
|
|
409 static HARD_REG_SET no_unit_alloc_regs;
|
|
410
|
|
411 /* Array of the number of hard registers of given class which are
|
|
412 available for allocation. The order is defined by the
|
|
413 allocation order. */
|
|
414 short ira_class_hard_regs[N_REG_CLASSES][FIRST_PSEUDO_REGISTER];
|
|
415
|
|
416 /* The number of elements of the above array for given register
|
|
417 class. */
|
|
418 int ira_class_hard_regs_num[N_REG_CLASSES];
|
|
419
|
|
420 /* Index (in ira_class_hard_regs) for given register class and hard
|
|
421 register (in general case a hard register can belong to several
|
|
422 register classes). The index is negative for hard registers
|
|
423 unavailable for the allocation. */
|
|
424 short ira_class_hard_reg_index[N_REG_CLASSES][FIRST_PSEUDO_REGISTER];
|
|
425
|
|
426 /* The function sets up the three arrays declared above. */
|
|
427 static void
|
|
428 setup_class_hard_regs (void)
|
|
429 {
|
|
430 int cl, i, hard_regno, n;
|
|
431 HARD_REG_SET processed_hard_reg_set;
|
|
432
|
|
433 ira_assert (SHRT_MAX >= FIRST_PSEUDO_REGISTER);
|
|
434 /* We could call ORDER_REGS_FOR_LOCAL_ALLOC here (it is usually
|
|
435 putting hard callee-used hard registers first). But our
|
|
436 heuristics work better. */
|
|
437 for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
|
|
438 {
|
|
439 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
440 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
441 CLEAR_HARD_REG_SET (processed_hard_reg_set);
|
|
442 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
|
443 ira_class_hard_reg_index[cl][0] = -1;
|
|
444 for (n = 0, i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
|
445 {
|
|
446 #ifdef REG_ALLOC_ORDER
|
|
447 hard_regno = reg_alloc_order[i];
|
|
448 #else
|
|
449 hard_regno = i;
|
|
450 #endif
|
|
451 if (TEST_HARD_REG_BIT (processed_hard_reg_set, hard_regno))
|
|
452 continue;
|
|
453 SET_HARD_REG_BIT (processed_hard_reg_set, hard_regno);
|
|
454 if (! TEST_HARD_REG_BIT (temp_hard_regset, hard_regno))
|
|
455 ira_class_hard_reg_index[cl][hard_regno] = -1;
|
|
456 else
|
|
457 {
|
|
458 ira_class_hard_reg_index[cl][hard_regno] = n;
|
|
459 ira_class_hard_regs[cl][n++] = hard_regno;
|
|
460 }
|
|
461 }
|
|
462 ira_class_hard_regs_num[cl] = n;
|
|
463 }
|
|
464 }
|
|
465
|
|
466 /* Number of given class hard registers available for the register
|
|
467 allocation for given classes. */
|
|
468 int ira_available_class_regs[N_REG_CLASSES];
|
|
469
|
|
470 /* Set up IRA_AVAILABLE_CLASS_REGS. */
|
|
471 static void
|
|
472 setup_available_class_regs (void)
|
|
473 {
|
|
474 int i, j;
|
|
475
|
|
476 memset (ira_available_class_regs, 0, sizeof (ira_available_class_regs));
|
|
477 for (i = 0; i < N_REG_CLASSES; i++)
|
|
478 {
|
|
479 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]);
|
|
480 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
481 for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
|
|
482 if (TEST_HARD_REG_BIT (temp_hard_regset, j))
|
|
483 ira_available_class_regs[i]++;
|
|
484 }
|
|
485 }
|
|
486
|
|
487 /* Set up global variables defining info about hard registers for the
|
|
488 allocation. These depend on USE_HARD_FRAME_P whose TRUE value means
|
|
489 that we can use the hard frame pointer for the allocation. */
|
|
490 static void
|
|
491 setup_alloc_regs (bool use_hard_frame_p)
|
|
492 {
|
|
493 COPY_HARD_REG_SET (no_unit_alloc_regs, fixed_reg_set);
|
|
494 if (! use_hard_frame_p)
|
|
495 SET_HARD_REG_BIT (no_unit_alloc_regs, HARD_FRAME_POINTER_REGNUM);
|
|
496 setup_class_hard_regs ();
|
|
497 setup_available_class_regs ();
|
|
498 }
|
|
499
|
|
500
|
|
501
|
|
502 /* Set up IRA_MEMORY_MOVE_COST, IRA_REGISTER_MOVE_COST. */
|
|
503 static void
|
|
504 setup_class_subset_and_memory_move_costs (void)
|
|
505 {
|
|
506 int cl, cl2;
|
|
507 enum machine_mode mode;
|
|
508 HARD_REG_SET temp_hard_regset2;
|
|
509
|
|
510 for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
|
|
511 ira_memory_move_cost[mode][NO_REGS][0]
|
|
512 = ira_memory_move_cost[mode][NO_REGS][1] = SHRT_MAX;
|
|
513 for (cl = (int) N_REG_CLASSES - 1; cl >= 0; cl--)
|
|
514 {
|
|
515 if (cl != (int) NO_REGS)
|
|
516 for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
|
|
517 {
|
|
518 ira_memory_move_cost[mode][cl][0] = MEMORY_MOVE_COST (mode, cl, 0);
|
|
519 ira_memory_move_cost[mode][cl][1] = MEMORY_MOVE_COST (mode, cl, 1);
|
|
520 /* Costs for NO_REGS are used in cost calculation on the
|
|
521 1st pass when the preferred register classes are not
|
|
522 known yet. In this case we take the best scenario. */
|
|
523 if (ira_memory_move_cost[mode][NO_REGS][0]
|
|
524 > ira_memory_move_cost[mode][cl][0])
|
|
525 ira_memory_move_cost[mode][NO_REGS][0]
|
|
526 = ira_memory_move_cost[mode][cl][0];
|
|
527 if (ira_memory_move_cost[mode][NO_REGS][1]
|
|
528 > ira_memory_move_cost[mode][cl][1])
|
|
529 ira_memory_move_cost[mode][NO_REGS][1]
|
|
530 = ira_memory_move_cost[mode][cl][1];
|
|
531 }
|
|
532 for (cl2 = (int) N_REG_CLASSES - 1; cl2 >= 0; cl2--)
|
|
533 {
|
|
534 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
535 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
536 COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[cl2]);
|
|
537 AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs);
|
|
538 ira_class_subset_p[cl][cl2]
|
|
539 = hard_reg_set_subset_p (temp_hard_regset, temp_hard_regset2);
|
|
540 }
|
|
541 }
|
|
542 }
|
|
543
|
|
544
|
|
545
|
|
546 /* Define the following macro if allocation through malloc if
|
|
547 preferable. */
|
|
548 #define IRA_NO_OBSTACK
|
|
549
|
|
550 #ifndef IRA_NO_OBSTACK
|
|
551 /* Obstack used for storing all dynamic data (except bitmaps) of the
|
|
552 IRA. */
|
|
553 static struct obstack ira_obstack;
|
|
554 #endif
|
|
555
|
|
556 /* Obstack used for storing all bitmaps of the IRA. */
|
|
557 static struct bitmap_obstack ira_bitmap_obstack;
|
|
558
|
|
559 /* Allocate memory of size LEN for IRA data. */
|
|
560 void *
|
|
561 ira_allocate (size_t len)
|
|
562 {
|
|
563 void *res;
|
|
564
|
|
565 #ifndef IRA_NO_OBSTACK
|
|
566 res = obstack_alloc (&ira_obstack, len);
|
|
567 #else
|
|
568 res = xmalloc (len);
|
|
569 #endif
|
|
570 return res;
|
|
571 }
|
|
572
|
|
573 /* Reallocate memory PTR of size LEN for IRA data. */
|
|
574 void *
|
|
575 ira_reallocate (void *ptr, size_t len)
|
|
576 {
|
|
577 void *res;
|
|
578
|
|
579 #ifndef IRA_NO_OBSTACK
|
|
580 res = obstack_alloc (&ira_obstack, len);
|
|
581 #else
|
|
582 res = xrealloc (ptr, len);
|
|
583 #endif
|
|
584 return res;
|
|
585 }
|
|
586
|
|
587 /* Free memory ADDR allocated for IRA data. */
|
|
588 void
|
|
589 ira_free (void *addr ATTRIBUTE_UNUSED)
|
|
590 {
|
|
591 #ifndef IRA_NO_OBSTACK
|
|
592 /* do nothing */
|
|
593 #else
|
|
594 free (addr);
|
|
595 #endif
|
|
596 }
|
|
597
|
|
598
|
|
599 /* Allocate and returns bitmap for IRA. */
|
|
600 bitmap
|
|
601 ira_allocate_bitmap (void)
|
|
602 {
|
|
603 return BITMAP_ALLOC (&ira_bitmap_obstack);
|
|
604 }
|
|
605
|
|
606 /* Free bitmap B allocated for IRA. */
|
|
607 void
|
|
608 ira_free_bitmap (bitmap b ATTRIBUTE_UNUSED)
|
|
609 {
|
|
610 /* do nothing */
|
|
611 }
|
|
612
|
|
613
|
|
614
|
|
615 /* Output information about allocation of all allocnos (except for
|
|
616 caps) into file F. */
|
|
617 void
|
|
618 ira_print_disposition (FILE *f)
|
|
619 {
|
|
620 int i, n, max_regno;
|
|
621 ira_allocno_t a;
|
|
622 basic_block bb;
|
|
623
|
|
624 fprintf (f, "Disposition:");
|
|
625 max_regno = max_reg_num ();
|
|
626 for (n = 0, i = FIRST_PSEUDO_REGISTER; i < max_regno; i++)
|
|
627 for (a = ira_regno_allocno_map[i];
|
|
628 a != NULL;
|
|
629 a = ALLOCNO_NEXT_REGNO_ALLOCNO (a))
|
|
630 {
|
|
631 if (n % 4 == 0)
|
|
632 fprintf (f, "\n");
|
|
633 n++;
|
|
634 fprintf (f, " %4d:r%-4d", ALLOCNO_NUM (a), ALLOCNO_REGNO (a));
|
|
635 if ((bb = ALLOCNO_LOOP_TREE_NODE (a)->bb) != NULL)
|
|
636 fprintf (f, "b%-3d", bb->index);
|
|
637 else
|
|
638 fprintf (f, "l%-3d", ALLOCNO_LOOP_TREE_NODE (a)->loop->num);
|
|
639 if (ALLOCNO_HARD_REGNO (a) >= 0)
|
|
640 fprintf (f, " %3d", ALLOCNO_HARD_REGNO (a));
|
|
641 else
|
|
642 fprintf (f, " mem");
|
|
643 }
|
|
644 fprintf (f, "\n");
|
|
645 }
|
|
646
|
|
647 /* Outputs information about allocation of all allocnos into
|
|
648 stderr. */
|
|
649 void
|
|
650 ira_debug_disposition (void)
|
|
651 {
|
|
652 ira_print_disposition (stderr);
|
|
653 }
|
|
654
|
|
655
|
|
656
|
|
657 /* For each reg class, table listing all the classes contained in it
|
|
658 (excluding the class itself. Non-allocatable registers are
|
|
659 excluded from the consideration). */
|
|
660 static enum reg_class alloc_reg_class_subclasses[N_REG_CLASSES][N_REG_CLASSES];
|
|
661
|
|
662 /* Initialize the table of subclasses of each reg class. */
|
|
663 static void
|
|
664 setup_reg_subclasses (void)
|
|
665 {
|
|
666 int i, j;
|
|
667 HARD_REG_SET temp_hard_regset2;
|
|
668
|
|
669 for (i = 0; i < N_REG_CLASSES; i++)
|
|
670 for (j = 0; j < N_REG_CLASSES; j++)
|
|
671 alloc_reg_class_subclasses[i][j] = LIM_REG_CLASSES;
|
|
672
|
|
673 for (i = 0; i < N_REG_CLASSES; i++)
|
|
674 {
|
|
675 if (i == (int) NO_REGS)
|
|
676 continue;
|
|
677
|
|
678 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]);
|
|
679 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
680 if (hard_reg_set_empty_p (temp_hard_regset))
|
|
681 continue;
|
|
682 for (j = 0; j < N_REG_CLASSES; j++)
|
|
683 if (i != j)
|
|
684 {
|
|
685 enum reg_class *p;
|
|
686
|
|
687 COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[j]);
|
|
688 AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs);
|
|
689 if (! hard_reg_set_subset_p (temp_hard_regset,
|
|
690 temp_hard_regset2))
|
|
691 continue;
|
|
692 p = &alloc_reg_class_subclasses[j][0];
|
|
693 while (*p != LIM_REG_CLASSES) p++;
|
|
694 *p = (enum reg_class) i;
|
|
695 }
|
|
696 }
|
|
697 }
|
|
698
|
|
699
|
|
700
|
|
701 /* Number of cover classes. Cover classes is non-intersected register
|
|
702 classes containing all hard-registers available for the
|
|
703 allocation. */
|
|
704 int ira_reg_class_cover_size;
|
|
705
|
|
706 /* The array containing cover classes (see also comments for macro
|
|
707 IRA_COVER_CLASSES). Only first IRA_REG_CLASS_COVER_SIZE elements are
|
|
708 used for this. */
|
|
709 enum reg_class ira_reg_class_cover[N_REG_CLASSES];
|
|
710
|
|
711 /* The number of elements in the subsequent array. */
|
|
712 int ira_important_classes_num;
|
|
713
|
|
714 /* The array containing non-empty classes (including non-empty cover
|
|
715 classes) which are subclasses of cover classes. Such classes is
|
|
716 important for calculation of the hard register usage costs. */
|
|
717 enum reg_class ira_important_classes[N_REG_CLASSES];
|
|
718
|
|
719 /* The array containing indexes of important classes in the previous
|
|
720 array. The array elements are defined only for important
|
|
721 classes. */
|
|
722 int ira_important_class_nums[N_REG_CLASSES];
|
|
723
|
|
724 /* Set the four global variables defined above. */
|
|
725 static void
|
|
726 setup_cover_and_important_classes (void)
|
|
727 {
|
|
728 int i, j, n;
|
|
729 bool set_p, eq_p;
|
|
730 enum reg_class cl;
|
|
731 const enum reg_class *cover_classes;
|
|
732 HARD_REG_SET temp_hard_regset2;
|
|
733 static enum reg_class classes[LIM_REG_CLASSES + 1];
|
|
734
|
|
735 if (targetm.ira_cover_classes == NULL)
|
|
736 cover_classes = NULL;
|
|
737 else
|
|
738 cover_classes = targetm.ira_cover_classes ();
|
|
739 if (cover_classes == NULL)
|
|
740 ira_assert (flag_ira_algorithm == IRA_ALGORITHM_PRIORITY);
|
|
741 else
|
|
742 {
|
|
743 for (i = 0; (cl = cover_classes[i]) != LIM_REG_CLASSES; i++)
|
|
744 classes[i] = cl;
|
|
745 classes[i] = LIM_REG_CLASSES;
|
|
746 }
|
|
747
|
|
748 if (flag_ira_algorithm == IRA_ALGORITHM_PRIORITY)
|
|
749 {
|
|
750 n = 0;
|
|
751 for (i = 0; i <= LIM_REG_CLASSES; i++)
|
|
752 {
|
|
753 if (i == NO_REGS)
|
|
754 continue;
|
|
755 #ifdef CONSTRAINT__LIMIT
|
|
756 for (j = 0; j < CONSTRAINT__LIMIT; j++)
|
|
757 if ((int) regclass_for_constraint (j) == i)
|
|
758 break;
|
|
759 if (j < CONSTRAINT__LIMIT)
|
|
760 {
|
|
761 classes[n++] = i;
|
|
762 continue;
|
|
763 }
|
|
764 #endif
|
|
765 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[i]);
|
|
766 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
767 for (j = 0; j < LIM_REG_CLASSES; j++)
|
|
768 {
|
|
769 if (i == j)
|
|
770 continue;
|
|
771 COPY_HARD_REG_SET (temp_hard_regset2, reg_class_contents[j]);
|
|
772 AND_COMPL_HARD_REG_SET (temp_hard_regset2,
|
|
773 no_unit_alloc_regs);
|
|
774 if (hard_reg_set_equal_p (temp_hard_regset,
|
|
775 temp_hard_regset2))
|
|
776 break;
|
|
777 }
|
|
778 if (j >= i)
|
|
779 classes[n++] = i;
|
|
780 }
|
|
781 classes[n] = LIM_REG_CLASSES;
|
|
782 }
|
|
783
|
|
784 ira_reg_class_cover_size = 0;
|
|
785 for (i = 0; (cl = classes[i]) != LIM_REG_CLASSES; i++)
|
|
786 {
|
|
787 for (j = 0; j < i; j++)
|
|
788 if (flag_ira_algorithm != IRA_ALGORITHM_PRIORITY
|
|
789 && reg_classes_intersect_p (cl, classes[j]))
|
|
790 gcc_unreachable ();
|
|
791 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
792 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
793 if (! hard_reg_set_empty_p (temp_hard_regset))
|
|
794 ira_reg_class_cover[ira_reg_class_cover_size++] = cl;
|
|
795 }
|
|
796 ira_important_classes_num = 0;
|
|
797 for (cl = 0; cl < N_REG_CLASSES; cl++)
|
|
798 {
|
|
799 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
800 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
801 if (! hard_reg_set_empty_p (temp_hard_regset))
|
|
802 {
|
|
803 set_p = eq_p = false;
|
|
804 for (j = 0; j < ira_reg_class_cover_size; j++)
|
|
805 {
|
|
806 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
807 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
808 COPY_HARD_REG_SET (temp_hard_regset2,
|
|
809 reg_class_contents[ira_reg_class_cover[j]]);
|
|
810 AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs);
|
|
811 if (cl == ira_reg_class_cover[j])
|
|
812 {
|
|
813 eq_p = false;
|
|
814 set_p = true;
|
|
815 break;
|
|
816 }
|
|
817 else if (hard_reg_set_equal_p (temp_hard_regset,
|
|
818 temp_hard_regset2))
|
|
819 eq_p = true;
|
|
820 else if (hard_reg_set_subset_p (temp_hard_regset,
|
|
821 temp_hard_regset2))
|
|
822 set_p = true;
|
|
823 }
|
|
824 if (set_p && ! eq_p)
|
|
825 {
|
|
826 ira_important_class_nums[cl] = ira_important_classes_num;
|
|
827 ira_important_classes[ira_important_classes_num++] = cl;
|
|
828 }
|
|
829 }
|
|
830 }
|
|
831 }
|
|
832
|
|
833 /* Map of all register classes to corresponding cover class containing
|
|
834 the given class. If given class is not a subset of a cover class,
|
|
835 we translate it into the cheapest cover class. */
|
|
836 enum reg_class ira_class_translate[N_REG_CLASSES];
|
|
837
|
|
838 /* Set up array IRA_CLASS_TRANSLATE. */
|
|
839 static void
|
|
840 setup_class_translate (void)
|
|
841 {
|
|
842 enum reg_class cl, cover_class, best_class, *cl_ptr;
|
|
843 enum machine_mode mode;
|
|
844 int i, cost, min_cost, best_cost;
|
|
845
|
|
846 for (cl = 0; cl < N_REG_CLASSES; cl++)
|
|
847 ira_class_translate[cl] = NO_REGS;
|
|
848
|
|
849 if (flag_ira_algorithm == IRA_ALGORITHM_PRIORITY)
|
|
850 for (cl = 0; cl < LIM_REG_CLASSES; cl++)
|
|
851 {
|
|
852 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
853 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
854 for (i = 0; i < ira_reg_class_cover_size; i++)
|
|
855 {
|
|
856 HARD_REG_SET temp_hard_regset2;
|
|
857
|
|
858 cover_class = ira_reg_class_cover[i];
|
|
859 COPY_HARD_REG_SET (temp_hard_regset2,
|
|
860 reg_class_contents[cover_class]);
|
|
861 AND_COMPL_HARD_REG_SET (temp_hard_regset2, no_unit_alloc_regs);
|
|
862 if (hard_reg_set_equal_p (temp_hard_regset, temp_hard_regset2))
|
|
863 ira_class_translate[cl] = cover_class;
|
|
864 }
|
|
865 }
|
|
866 for (i = 0; i < ira_reg_class_cover_size; i++)
|
|
867 {
|
|
868 cover_class = ira_reg_class_cover[i];
|
|
869 if (flag_ira_algorithm != IRA_ALGORITHM_PRIORITY)
|
|
870 for (cl_ptr = &alloc_reg_class_subclasses[cover_class][0];
|
|
871 (cl = *cl_ptr) != LIM_REG_CLASSES;
|
|
872 cl_ptr++)
|
|
873 {
|
|
874 if (ira_class_translate[cl] == NO_REGS)
|
|
875 ira_class_translate[cl] = cover_class;
|
|
876 #ifdef ENABLE_IRA_CHECKING
|
|
877 else
|
|
878 {
|
|
879 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
880 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
881 if (! hard_reg_set_empty_p (temp_hard_regset))
|
|
882 gcc_unreachable ();
|
|
883 }
|
|
884 #endif
|
|
885 }
|
|
886 ira_class_translate[cover_class] = cover_class;
|
|
887 }
|
|
888 /* For classes which are not fully covered by a cover class (in
|
|
889 other words covered by more one cover class), use the cheapest
|
|
890 cover class. */
|
|
891 for (cl = 0; cl < N_REG_CLASSES; cl++)
|
|
892 {
|
|
893 if (cl == NO_REGS || ira_class_translate[cl] != NO_REGS)
|
|
894 continue;
|
|
895 best_class = NO_REGS;
|
|
896 best_cost = INT_MAX;
|
|
897 for (i = 0; i < ira_reg_class_cover_size; i++)
|
|
898 {
|
|
899 cover_class = ira_reg_class_cover[i];
|
|
900 COPY_HARD_REG_SET (temp_hard_regset,
|
|
901 reg_class_contents[cover_class]);
|
|
902 AND_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl]);
|
|
903 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
904 if (! hard_reg_set_empty_p (temp_hard_regset))
|
|
905 {
|
|
906 min_cost = INT_MAX;
|
|
907 for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
|
|
908 {
|
|
909 cost = (ira_memory_move_cost[mode][cl][0]
|
|
910 + ira_memory_move_cost[mode][cl][1]);
|
|
911 if (min_cost > cost)
|
|
912 min_cost = cost;
|
|
913 }
|
|
914 if (best_class == NO_REGS || best_cost > min_cost)
|
|
915 {
|
|
916 best_class = cover_class;
|
|
917 best_cost = min_cost;
|
|
918 }
|
|
919 }
|
|
920 }
|
|
921 ira_class_translate[cl] = best_class;
|
|
922 }
|
|
923 }
|
|
924
|
|
925 /* The biggest important reg_class inside of intersection of the two
|
|
926 reg_classes (that is calculated taking only hard registers
|
|
927 available for allocation into account). If the both reg_classes
|
|
928 contain no hard registers available for allocation, the value is
|
|
929 calculated by taking all hard-registers including fixed ones into
|
|
930 account. */
|
|
931 enum reg_class ira_reg_class_intersect[N_REG_CLASSES][N_REG_CLASSES];
|
|
932
|
|
933 /* True if the two classes (that is calculated taking only hard
|
|
934 registers available for allocation into account) are
|
|
935 intersected. */
|
|
936 bool ira_reg_classes_intersect_p[N_REG_CLASSES][N_REG_CLASSES];
|
|
937
|
|
938 /* Important classes with end marker LIM_REG_CLASSES which are
|
|
939 supersets with given important class (the first index). That
|
|
940 includes given class itself. This is calculated taking only hard
|
|
941 registers available for allocation into account. */
|
|
942 enum reg_class ira_reg_class_super_classes[N_REG_CLASSES][N_REG_CLASSES];
|
|
943
|
|
944 /* The biggest important reg_class inside of union of the two
|
|
945 reg_classes (that is calculated taking only hard registers
|
|
946 available for allocation into account). If the both reg_classes
|
|
947 contain no hard registers available for allocation, the value is
|
|
948 calculated by taking all hard-registers including fixed ones into
|
|
949 account. In other words, the value is the corresponding
|
|
950 reg_class_subunion value. */
|
|
951 enum reg_class ira_reg_class_union[N_REG_CLASSES][N_REG_CLASSES];
|
|
952
|
|
953 /* Set up the above reg class relations. */
|
|
954 static void
|
|
955 setup_reg_class_relations (void)
|
|
956 {
|
|
957 int i, cl1, cl2, cl3;
|
|
958 HARD_REG_SET intersection_set, union_set, temp_set2;
|
|
959 bool important_class_p[N_REG_CLASSES];
|
|
960
|
|
961 memset (important_class_p, 0, sizeof (important_class_p));
|
|
962 for (i = 0; i < ira_important_classes_num; i++)
|
|
963 important_class_p[ira_important_classes[i]] = true;
|
|
964 for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++)
|
|
965 {
|
|
966 ira_reg_class_super_classes[cl1][0] = LIM_REG_CLASSES;
|
|
967 for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++)
|
|
968 {
|
|
969 ira_reg_classes_intersect_p[cl1][cl2] = false;
|
|
970 ira_reg_class_intersect[cl1][cl2] = NO_REGS;
|
|
971 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl1]);
|
|
972 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
973 COPY_HARD_REG_SET (temp_set2, reg_class_contents[cl2]);
|
|
974 AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs);
|
|
975 if (hard_reg_set_empty_p (temp_hard_regset)
|
|
976 && hard_reg_set_empty_p (temp_set2))
|
|
977 {
|
|
978 for (i = 0;; i++)
|
|
979 {
|
|
980 cl3 = reg_class_subclasses[cl1][i];
|
|
981 if (cl3 == LIM_REG_CLASSES)
|
|
982 break;
|
|
983 if (reg_class_subset_p (ira_reg_class_intersect[cl1][cl2],
|
|
984 cl3))
|
|
985 ira_reg_class_intersect[cl1][cl2] = cl3;
|
|
986 }
|
|
987 ira_reg_class_union[cl1][cl2] = reg_class_subunion[cl1][cl2];
|
|
988 continue;
|
|
989 }
|
|
990 ira_reg_classes_intersect_p[cl1][cl2]
|
|
991 = hard_reg_set_intersect_p (temp_hard_regset, temp_set2);
|
|
992 if (important_class_p[cl1] && important_class_p[cl2]
|
|
993 && hard_reg_set_subset_p (temp_hard_regset, temp_set2))
|
|
994 {
|
|
995 enum reg_class *p;
|
|
996
|
|
997 p = &ira_reg_class_super_classes[cl1][0];
|
|
998 while (*p != LIM_REG_CLASSES)
|
|
999 p++;
|
|
1000 *p++ = (enum reg_class) cl2;
|
|
1001 *p = LIM_REG_CLASSES;
|
|
1002 }
|
|
1003 ira_reg_class_union[cl1][cl2] = NO_REGS;
|
|
1004 COPY_HARD_REG_SET (intersection_set, reg_class_contents[cl1]);
|
|
1005 AND_HARD_REG_SET (intersection_set, reg_class_contents[cl2]);
|
|
1006 AND_COMPL_HARD_REG_SET (intersection_set, no_unit_alloc_regs);
|
|
1007 COPY_HARD_REG_SET (union_set, reg_class_contents[cl1]);
|
|
1008 IOR_HARD_REG_SET (union_set, reg_class_contents[cl2]);
|
|
1009 AND_COMPL_HARD_REG_SET (union_set, no_unit_alloc_regs);
|
|
1010 for (i = 0; i < ira_important_classes_num; i++)
|
|
1011 {
|
|
1012 cl3 = ira_important_classes[i];
|
|
1013 COPY_HARD_REG_SET (temp_hard_regset, reg_class_contents[cl3]);
|
|
1014 AND_COMPL_HARD_REG_SET (temp_hard_regset, no_unit_alloc_regs);
|
|
1015 if (hard_reg_set_subset_p (temp_hard_regset, intersection_set))
|
|
1016 {
|
|
1017 COPY_HARD_REG_SET
|
|
1018 (temp_set2,
|
|
1019 reg_class_contents[(int)
|
|
1020 ira_reg_class_intersect[cl1][cl2]]);
|
|
1021 AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs);
|
|
1022 if (! hard_reg_set_subset_p (temp_hard_regset, temp_set2)
|
|
1023 /* Ignore unavailable hard registers and prefer
|
|
1024 smallest class for debugging purposes. */
|
|
1025 || (hard_reg_set_equal_p (temp_hard_regset, temp_set2)
|
|
1026 && hard_reg_set_subset_p
|
|
1027 (reg_class_contents[cl3],
|
|
1028 reg_class_contents
|
|
1029 [(int) ira_reg_class_intersect[cl1][cl2]])))
|
|
1030 ira_reg_class_intersect[cl1][cl2] = (enum reg_class) cl3;
|
|
1031 }
|
|
1032 if (hard_reg_set_subset_p (temp_hard_regset, union_set))
|
|
1033 {
|
|
1034 COPY_HARD_REG_SET
|
|
1035 (temp_set2,
|
|
1036 reg_class_contents[(int) ira_reg_class_union[cl1][cl2]]);
|
|
1037 AND_COMPL_HARD_REG_SET (temp_set2, no_unit_alloc_regs);
|
|
1038 if (ira_reg_class_union[cl1][cl2] == NO_REGS
|
|
1039 || (hard_reg_set_subset_p (temp_set2, temp_hard_regset)
|
|
1040
|
|
1041 && (! hard_reg_set_equal_p (temp_set2,
|
|
1042 temp_hard_regset)
|
|
1043 /* Ignore unavailable hard registers and
|
|
1044 prefer smallest class for debugging
|
|
1045 purposes. */
|
|
1046 || hard_reg_set_subset_p
|
|
1047 (reg_class_contents[cl3],
|
|
1048 reg_class_contents
|
|
1049 [(int) ira_reg_class_union[cl1][cl2]]))))
|
|
1050 ira_reg_class_union[cl1][cl2] = (enum reg_class) cl3;
|
|
1051 }
|
|
1052 }
|
|
1053 }
|
|
1054 }
|
|
1055 }
|
|
1056
|
|
1057 /* Output all cover classes and the translation map into file F. */
|
|
1058 static void
|
|
1059 print_class_cover (FILE *f)
|
|
1060 {
|
|
1061 static const char *const reg_class_names[] = REG_CLASS_NAMES;
|
|
1062 int i;
|
|
1063
|
|
1064 fprintf (f, "Class cover:\n");
|
|
1065 for (i = 0; i < ira_reg_class_cover_size; i++)
|
|
1066 fprintf (f, " %s", reg_class_names[ira_reg_class_cover[i]]);
|
|
1067 fprintf (f, "\nClass translation:\n");
|
|
1068 for (i = 0; i < N_REG_CLASSES; i++)
|
|
1069 fprintf (f, " %s -> %s\n", reg_class_names[i],
|
|
1070 reg_class_names[ira_class_translate[i]]);
|
|
1071 }
|
|
1072
|
|
1073 /* Output all cover classes and the translation map into
|
|
1074 stderr. */
|
|
1075 void
|
|
1076 ira_debug_class_cover (void)
|
|
1077 {
|
|
1078 print_class_cover (stderr);
|
|
1079 }
|
|
1080
|
|
1081 /* Set up different arrays concerning class subsets, cover and
|
|
1082 important classes. */
|
|
1083 static void
|
|
1084 find_reg_class_closure (void)
|
|
1085 {
|
|
1086 setup_reg_subclasses ();
|
|
1087 setup_cover_and_important_classes ();
|
|
1088 setup_class_translate ();
|
|
1089 setup_reg_class_relations ();
|
|
1090 }
|
|
1091
|
|
1092
|
|
1093
|
|
1094 /* Map: hard register number -> cover class it belongs to. If the
|
|
1095 corresponding class is NO_REGS, the hard register is not available
|
|
1096 for allocation. */
|
|
1097 enum reg_class ira_hard_regno_cover_class[FIRST_PSEUDO_REGISTER];
|
|
1098
|
|
1099 /* Set up the array above. */
|
|
1100 static void
|
|
1101 setup_hard_regno_cover_class (void)
|
|
1102 {
|
|
1103 int i, j;
|
|
1104 enum reg_class cl;
|
|
1105
|
|
1106 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
|
1107 {
|
|
1108 ira_hard_regno_cover_class[i] = NO_REGS;
|
|
1109 for (j = 0; j < ira_reg_class_cover_size; j++)
|
|
1110 {
|
|
1111 cl = ira_reg_class_cover[j];
|
|
1112 if (ira_class_hard_reg_index[cl][i] >= 0)
|
|
1113 {
|
|
1114 ira_hard_regno_cover_class[i] = cl;
|
|
1115 break;
|
|
1116 }
|
|
1117 }
|
|
1118
|
|
1119 }
|
|
1120 }
|
|
1121
|
|
1122
|
|
1123
|
|
1124 /* Map: register class x machine mode -> number of hard registers of
|
|
1125 given class needed to store value of given mode. If the number is
|
|
1126 different, the size will be negative. */
|
|
1127 int ira_reg_class_nregs[N_REG_CLASSES][MAX_MACHINE_MODE];
|
|
1128
|
|
1129 /* Maximal value of the previous array elements. */
|
|
1130 int ira_max_nregs;
|
|
1131
|
|
1132 /* Form IRA_REG_CLASS_NREGS map. */
|
|
1133 static void
|
|
1134 setup_reg_class_nregs (void)
|
|
1135 {
|
|
1136 int m;
|
|
1137 enum reg_class cl;
|
|
1138
|
|
1139 ira_max_nregs = -1;
|
|
1140 for (cl = 0; cl < N_REG_CLASSES; cl++)
|
|
1141 for (m = 0; m < MAX_MACHINE_MODE; m++)
|
|
1142 {
|
|
1143 ira_reg_class_nregs[cl][m] = CLASS_MAX_NREGS (cl, m);
|
|
1144 if (ira_max_nregs < ira_reg_class_nregs[cl][m])
|
|
1145 ira_max_nregs = ira_reg_class_nregs[cl][m];
|
|
1146 }
|
|
1147 }
|
|
1148
|
|
1149
|
|
1150
|
|
1151 /* Array whose values are hard regset of hard registers available for
|
|
1152 the allocation of given register class whose HARD_REGNO_MODE_OK
|
|
1153 values for given mode are zero. */
|
|
1154 HARD_REG_SET prohibited_class_mode_regs[N_REG_CLASSES][NUM_MACHINE_MODES];
|
|
1155
|
|
1156 /* Set up PROHIBITED_CLASS_MODE_REGS. */
|
|
1157 static void
|
|
1158 setup_prohibited_class_mode_regs (void)
|
|
1159 {
|
|
1160 int i, j, k, hard_regno;
|
|
1161 enum reg_class cl;
|
|
1162
|
|
1163 for (i = 0; i < ira_reg_class_cover_size; i++)
|
|
1164 {
|
|
1165 cl = ira_reg_class_cover[i];
|
|
1166 for (j = 0; j < NUM_MACHINE_MODES; j++)
|
|
1167 {
|
|
1168 CLEAR_HARD_REG_SET (prohibited_class_mode_regs[cl][j]);
|
|
1169 for (k = ira_class_hard_regs_num[cl] - 1; k >= 0; k--)
|
|
1170 {
|
|
1171 hard_regno = ira_class_hard_regs[cl][k];
|
|
1172 if (! HARD_REGNO_MODE_OK (hard_regno, j))
|
|
1173 SET_HARD_REG_BIT (prohibited_class_mode_regs[cl][j],
|
|
1174 hard_regno);
|
|
1175 }
|
|
1176 }
|
|
1177 }
|
|
1178 }
|
|
1179
|
|
1180
|
|
1181
|
|
1182 /* Allocate and initialize IRA_REGISTER_MOVE_COST,
|
|
1183 IRA_MAY_MOVE_IN_COST, and IRA_MAY_MOVE_OUT_COST for MODE if it is
|
|
1184 not done yet. */
|
|
1185 void
|
|
1186 ira_init_register_move_cost (enum machine_mode mode)
|
|
1187 {
|
|
1188 int cl1, cl2;
|
|
1189
|
|
1190 ira_assert (ira_register_move_cost[mode] == NULL
|
|
1191 && ira_may_move_in_cost[mode] == NULL
|
|
1192 && ira_may_move_out_cost[mode] == NULL);
|
|
1193 if (move_cost[mode] == NULL)
|
|
1194 init_move_cost (mode);
|
|
1195 ira_register_move_cost[mode] = move_cost[mode];
|
|
1196 /* Don't use ira_allocate because the tables exist out of scope of a
|
|
1197 IRA call. */
|
|
1198 ira_may_move_in_cost[mode]
|
|
1199 = (move_table *) xmalloc (sizeof (move_table) * N_REG_CLASSES);
|
|
1200 memcpy (ira_may_move_in_cost[mode], may_move_in_cost[mode],
|
|
1201 sizeof (move_table) * N_REG_CLASSES);
|
|
1202 ira_may_move_out_cost[mode]
|
|
1203 = (move_table *) xmalloc (sizeof (move_table) * N_REG_CLASSES);
|
|
1204 memcpy (ira_may_move_out_cost[mode], may_move_out_cost[mode],
|
|
1205 sizeof (move_table) * N_REG_CLASSES);
|
|
1206 for (cl1 = 0; cl1 < N_REG_CLASSES; cl1++)
|
|
1207 {
|
|
1208 for (cl2 = 0; cl2 < N_REG_CLASSES; cl2++)
|
|
1209 {
|
|
1210 if (ira_class_subset_p[cl1][cl2])
|
|
1211 ira_may_move_in_cost[mode][cl1][cl2] = 0;
|
|
1212 if (ira_class_subset_p[cl2][cl1])
|
|
1213 ira_may_move_out_cost[mode][cl1][cl2] = 0;
|
|
1214 }
|
|
1215 }
|
|
1216 }
|
|
1217
|
|
1218
|
|
1219
|
|
1220 /* This is called once during compiler work. It sets up
|
|
1221 different arrays whose values don't depend on the compiled
|
|
1222 function. */
|
|
1223 void
|
|
1224 ira_init_once (void)
|
|
1225 {
|
|
1226 enum machine_mode mode;
|
|
1227
|
|
1228 for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
|
|
1229 {
|
|
1230 ira_register_move_cost[mode] = NULL;
|
|
1231 ira_may_move_in_cost[mode] = NULL;
|
|
1232 ira_may_move_out_cost[mode] = NULL;
|
|
1233 }
|
|
1234 ira_init_costs_once ();
|
|
1235 }
|
|
1236
|
|
1237 /* Free ira_register_move_cost, ira_may_move_in_cost, and
|
|
1238 ira_may_move_out_cost for each mode. */
|
|
1239 static void
|
|
1240 free_register_move_costs (void)
|
|
1241 {
|
|
1242 enum machine_mode mode;
|
|
1243
|
|
1244 for (mode = 0; mode < MAX_MACHINE_MODE; mode++)
|
|
1245 {
|
|
1246 if (ira_may_move_in_cost[mode] != NULL)
|
|
1247 free (ira_may_move_in_cost[mode]);
|
|
1248 if (ira_may_move_out_cost[mode] != NULL)
|
|
1249 free (ira_may_move_out_cost[mode]);
|
|
1250 ira_register_move_cost[mode] = NULL;
|
|
1251 ira_may_move_in_cost[mode] = NULL;
|
|
1252 ira_may_move_out_cost[mode] = NULL;
|
|
1253 }
|
|
1254 }
|
|
1255
|
|
1256 /* This is called every time when register related information is
|
|
1257 changed. */
|
|
1258 void
|
|
1259 ira_init (void)
|
|
1260 {
|
|
1261 free_register_move_costs ();
|
|
1262 setup_reg_mode_hard_regset ();
|
|
1263 setup_alloc_regs (flag_omit_frame_pointer != 0);
|
|
1264 setup_class_subset_and_memory_move_costs ();
|
|
1265 find_reg_class_closure ();
|
|
1266 setup_hard_regno_cover_class ();
|
|
1267 setup_reg_class_nregs ();
|
|
1268 setup_prohibited_class_mode_regs ();
|
|
1269 ira_init_costs ();
|
|
1270 }
|
|
1271
|
|
1272 /* Function called once at the end of compiler work. */
|
|
1273 void
|
|
1274 ira_finish_once (void)
|
|
1275 {
|
|
1276 ira_finish_costs_once ();
|
|
1277 free_register_move_costs ();
|
|
1278 }
|
|
1279
|
|
1280
|
|
1281
|
|
1282 /* Array whose values are hard regset of hard registers for which
|
|
1283 move of the hard register in given mode into itself is
|
|
1284 prohibited. */
|
|
1285 HARD_REG_SET ira_prohibited_mode_move_regs[NUM_MACHINE_MODES];
|
|
1286
|
|
1287 /* Flag of that the above array has been initialized. */
|
|
1288 static bool ira_prohibited_mode_move_regs_initialized_p = false;
|
|
1289
|
|
1290 /* Set up IRA_PROHIBITED_MODE_MOVE_REGS. */
|
|
1291 static void
|
|
1292 setup_prohibited_mode_move_regs (void)
|
|
1293 {
|
|
1294 int i, j;
|
|
1295 rtx test_reg1, test_reg2, move_pat, move_insn;
|
|
1296
|
|
1297 if (ira_prohibited_mode_move_regs_initialized_p)
|
|
1298 return;
|
|
1299 ira_prohibited_mode_move_regs_initialized_p = true;
|
|
1300 test_reg1 = gen_rtx_REG (VOIDmode, 0);
|
|
1301 test_reg2 = gen_rtx_REG (VOIDmode, 0);
|
|
1302 move_pat = gen_rtx_SET (VOIDmode, test_reg1, test_reg2);
|
|
1303 move_insn = gen_rtx_INSN (VOIDmode, 0, 0, 0, 0, 0, move_pat, -1, 0);
|
|
1304 for (i = 0; i < NUM_MACHINE_MODES; i++)
|
|
1305 {
|
|
1306 SET_HARD_REG_SET (ira_prohibited_mode_move_regs[i]);
|
|
1307 for (j = 0; j < FIRST_PSEUDO_REGISTER; j++)
|
|
1308 {
|
|
1309 if (! HARD_REGNO_MODE_OK (j, i))
|
|
1310 continue;
|
|
1311 SET_REGNO (test_reg1, j);
|
|
1312 PUT_MODE (test_reg1, i);
|
|
1313 SET_REGNO (test_reg2, j);
|
|
1314 PUT_MODE (test_reg2, i);
|
|
1315 INSN_CODE (move_insn) = -1;
|
|
1316 recog_memoized (move_insn);
|
|
1317 if (INSN_CODE (move_insn) < 0)
|
|
1318 continue;
|
|
1319 extract_insn (move_insn);
|
|
1320 if (! constrain_operands (1))
|
|
1321 continue;
|
|
1322 CLEAR_HARD_REG_BIT (ira_prohibited_mode_move_regs[i], j);
|
|
1323 }
|
|
1324 }
|
|
1325 }
|
|
1326
|
|
1327
|
|
1328
|
|
1329 /* Function specific hard registers that can not be used for the
|
|
1330 register allocation. */
|
|
1331 HARD_REG_SET ira_no_alloc_regs;
|
|
1332
|
|
1333 /* Return TRUE if *LOC contains an asm. */
|
|
1334 static int
|
|
1335 insn_contains_asm_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
|
|
1336 {
|
|
1337 if ( !*loc)
|
|
1338 return FALSE;
|
|
1339 if (GET_CODE (*loc) == ASM_OPERANDS)
|
|
1340 return TRUE;
|
|
1341 return FALSE;
|
|
1342 }
|
|
1343
|
|
1344
|
|
1345 /* Return TRUE if INSN contains an ASM. */
|
|
1346 static bool
|
|
1347 insn_contains_asm (rtx insn)
|
|
1348 {
|
|
1349 return for_each_rtx (&insn, insn_contains_asm_1, NULL);
|
|
1350 }
|
|
1351
|
|
1352 /* Set up regs_asm_clobbered. */
|
|
1353 static void
|
|
1354 compute_regs_asm_clobbered (char *regs_asm_clobbered)
|
|
1355 {
|
|
1356 basic_block bb;
|
|
1357
|
|
1358 memset (regs_asm_clobbered, 0, sizeof (char) * FIRST_PSEUDO_REGISTER);
|
|
1359
|
|
1360 FOR_EACH_BB (bb)
|
|
1361 {
|
|
1362 rtx insn;
|
|
1363 FOR_BB_INSNS_REVERSE (bb, insn)
|
|
1364 {
|
|
1365 df_ref *def_rec;
|
|
1366
|
|
1367 if (insn_contains_asm (insn))
|
|
1368 for (def_rec = DF_INSN_DEFS (insn); *def_rec; def_rec++)
|
|
1369 {
|
|
1370 df_ref def = *def_rec;
|
|
1371 unsigned int dregno = DF_REF_REGNO (def);
|
|
1372 if (dregno < FIRST_PSEUDO_REGISTER)
|
|
1373 {
|
|
1374 unsigned int i;
|
|
1375 enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (def));
|
|
1376 unsigned int end = dregno
|
|
1377 + hard_regno_nregs[dregno][mode] - 1;
|
|
1378
|
|
1379 for (i = dregno; i <= end; ++i)
|
|
1380 regs_asm_clobbered[i] = 1;
|
|
1381 }
|
|
1382 }
|
|
1383 }
|
|
1384 }
|
|
1385 }
|
|
1386
|
|
1387
|
|
1388 /* Set up ELIMINABLE_REGSET, IRA_NO_ALLOC_REGS, and REGS_EVER_LIVE. */
|
|
1389 static void
|
|
1390 setup_eliminable_regset (void)
|
|
1391 {
|
|
1392 /* Like regs_ever_live, but 1 if a reg is set or clobbered from an
|
|
1393 asm. Unlike regs_ever_live, elements of this array corresponding
|
|
1394 to eliminable regs (like the frame pointer) are set if an asm
|
|
1395 sets them. */
|
|
1396 char *regs_asm_clobbered
|
|
1397 = (char *) alloca (FIRST_PSEUDO_REGISTER * sizeof (char));
|
|
1398 #ifdef ELIMINABLE_REGS
|
|
1399 int i;
|
|
1400 static const struct {const int from, to; } eliminables[] = ELIMINABLE_REGS;
|
|
1401 #endif
|
|
1402 /* FIXME: If EXIT_IGNORE_STACK is set, we will not save and restore
|
|
1403 sp for alloca. So we can't eliminate the frame pointer in that
|
|
1404 case. At some point, we should improve this by emitting the
|
|
1405 sp-adjusting insns for this case. */
|
|
1406 int need_fp
|
|
1407 = (! flag_omit_frame_pointer
|
|
1408 || (cfun->calls_alloca && EXIT_IGNORE_STACK)
|
|
1409 || crtl->accesses_prior_frames
|
|
1410 || crtl->stack_realign_needed
|
|
1411 || FRAME_POINTER_REQUIRED);
|
|
1412
|
|
1413 frame_pointer_needed = need_fp;
|
|
1414
|
|
1415 COPY_HARD_REG_SET (ira_no_alloc_regs, no_unit_alloc_regs);
|
|
1416 CLEAR_HARD_REG_SET (eliminable_regset);
|
|
1417
|
|
1418 compute_regs_asm_clobbered (regs_asm_clobbered);
|
|
1419 /* Build the regset of all eliminable registers and show we can't
|
|
1420 use those that we already know won't be eliminated. */
|
|
1421 #ifdef ELIMINABLE_REGS
|
|
1422 for (i = 0; i < (int) ARRAY_SIZE (eliminables); i++)
|
|
1423 {
|
|
1424 bool cannot_elim
|
|
1425 = (! CAN_ELIMINATE (eliminables[i].from, eliminables[i].to)
|
|
1426 || (eliminables[i].to == STACK_POINTER_REGNUM && need_fp));
|
|
1427
|
|
1428 if (! regs_asm_clobbered[eliminables[i].from])
|
|
1429 {
|
|
1430 SET_HARD_REG_BIT (eliminable_regset, eliminables[i].from);
|
|
1431
|
|
1432 if (cannot_elim)
|
|
1433 SET_HARD_REG_BIT (ira_no_alloc_regs, eliminables[i].from);
|
|
1434 }
|
|
1435 else if (cannot_elim)
|
|
1436 error ("%s cannot be used in asm here",
|
|
1437 reg_names[eliminables[i].from]);
|
|
1438 else
|
|
1439 df_set_regs_ever_live (eliminables[i].from, true);
|
|
1440 }
|
|
1441 #if FRAME_POINTER_REGNUM != HARD_FRAME_POINTER_REGNUM
|
|
1442 if (! regs_asm_clobbered[HARD_FRAME_POINTER_REGNUM])
|
|
1443 {
|
|
1444 SET_HARD_REG_BIT (eliminable_regset, HARD_FRAME_POINTER_REGNUM);
|
|
1445 if (need_fp)
|
|
1446 SET_HARD_REG_BIT (ira_no_alloc_regs, HARD_FRAME_POINTER_REGNUM);
|
|
1447 }
|
|
1448 else if (need_fp)
|
|
1449 error ("%s cannot be used in asm here",
|
|
1450 reg_names[HARD_FRAME_POINTER_REGNUM]);
|
|
1451 else
|
|
1452 df_set_regs_ever_live (HARD_FRAME_POINTER_REGNUM, true);
|
|
1453 #endif
|
|
1454
|
|
1455 #else
|
|
1456 if (! regs_asm_clobbered[FRAME_POINTER_REGNUM])
|
|
1457 {
|
|
1458 SET_HARD_REG_BIT (eliminable_regset, FRAME_POINTER_REGNUM);
|
|
1459 if (need_fp)
|
|
1460 SET_HARD_REG_BIT (ira_no_alloc_regs, FRAME_POINTER_REGNUM);
|
|
1461 }
|
|
1462 else if (need_fp)
|
|
1463 error ("%s cannot be used in asm here", reg_names[FRAME_POINTER_REGNUM]);
|
|
1464 else
|
|
1465 df_set_regs_ever_live (FRAME_POINTER_REGNUM, true);
|
|
1466 #endif
|
|
1467 }
|
|
1468
|
|
1469
|
|
1470
|
|
1471 /* The length of the following two arrays. */
|
|
1472 int ira_reg_equiv_len;
|
|
1473
|
|
1474 /* The element value is TRUE if the corresponding regno value is
|
|
1475 invariant. */
|
|
1476 bool *ira_reg_equiv_invariant_p;
|
|
1477
|
|
1478 /* The element value is equiv constant of given pseudo-register or
|
|
1479 NULL_RTX. */
|
|
1480 rtx *ira_reg_equiv_const;
|
|
1481
|
|
1482 /* Set up the two arrays declared above. */
|
|
1483 static void
|
|
1484 find_reg_equiv_invariant_const (void)
|
|
1485 {
|
|
1486 int i;
|
|
1487 bool invariant_p;
|
|
1488 rtx list, insn, note, constant, x;
|
|
1489
|
|
1490 for (i = FIRST_PSEUDO_REGISTER; i < reg_equiv_init_size; i++)
|
|
1491 {
|
|
1492 constant = NULL_RTX;
|
|
1493 invariant_p = false;
|
|
1494 for (list = reg_equiv_init[i]; list != NULL_RTX; list = XEXP (list, 1))
|
|
1495 {
|
|
1496 insn = XEXP (list, 0);
|
|
1497 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
|
|
1498
|
|
1499 if (note == NULL_RTX)
|
|
1500 continue;
|
|
1501
|
|
1502 x = XEXP (note, 0);
|
|
1503
|
|
1504 if (! function_invariant_p (x)
|
|
1505 || ! flag_pic
|
|
1506 /* A function invariant is often CONSTANT_P but may
|
|
1507 include a register. We promise to only pass CONSTANT_P
|
|
1508 objects to LEGITIMATE_PIC_OPERAND_P. */
|
|
1509 || (CONSTANT_P (x) && LEGITIMATE_PIC_OPERAND_P (x)))
|
|
1510 {
|
|
1511 /* It can happen that a REG_EQUIV note contains a MEM
|
|
1512 that is not a legitimate memory operand. As later
|
|
1513 stages of the reload assume that all addresses found
|
|
1514 in the reg_equiv_* arrays were originally legitimate,
|
|
1515 we ignore such REG_EQUIV notes. */
|
|
1516 if (memory_operand (x, VOIDmode))
|
|
1517 invariant_p = MEM_READONLY_P (x);
|
|
1518 else if (function_invariant_p (x))
|
|
1519 {
|
|
1520 if (GET_CODE (x) == PLUS
|
|
1521 || x == frame_pointer_rtx || x == arg_pointer_rtx)
|
|
1522 invariant_p = true;
|
|
1523 else
|
|
1524 constant = x;
|
|
1525 }
|
|
1526 }
|
|
1527 }
|
|
1528 ira_reg_equiv_invariant_p[i] = invariant_p;
|
|
1529 ira_reg_equiv_const[i] = constant;
|
|
1530 }
|
|
1531 }
|
|
1532
|
|
1533
|
|
1534
|
|
1535 /* Vector of substitutions of register numbers,
|
|
1536 used to map pseudo regs into hardware regs.
|
|
1537 This is set up as a result of register allocation.
|
|
1538 Element N is the hard reg assigned to pseudo reg N,
|
|
1539 or is -1 if no hard reg was assigned.
|
|
1540 If N is a hard reg number, element N is N. */
|
|
1541 short *reg_renumber;
|
|
1542
|
|
1543 /* Set up REG_RENUMBER and CALLER_SAVE_NEEDED (used by reload) from
|
|
1544 the allocation found by IRA. */
|
|
1545 static void
|
|
1546 setup_reg_renumber (void)
|
|
1547 {
|
|
1548 int regno, hard_regno;
|
|
1549 ira_allocno_t a;
|
|
1550 ira_allocno_iterator ai;
|
|
1551
|
|
1552 caller_save_needed = 0;
|
|
1553 FOR_EACH_ALLOCNO (a, ai)
|
|
1554 {
|
|
1555 /* There are no caps at this point. */
|
|
1556 ira_assert (ALLOCNO_CAP_MEMBER (a) == NULL);
|
|
1557 if (! ALLOCNO_ASSIGNED_P (a))
|
|
1558 /* It can happen if A is not referenced but partially anticipated
|
|
1559 somewhere in a region. */
|
|
1560 ALLOCNO_ASSIGNED_P (a) = true;
|
|
1561 ira_free_allocno_updated_costs (a);
|
|
1562 hard_regno = ALLOCNO_HARD_REGNO (a);
|
|
1563 regno = (int) REGNO (ALLOCNO_REG (a));
|
|
1564 reg_renumber[regno] = (hard_regno < 0 ? -1 : hard_regno);
|
|
1565 if (hard_regno >= 0 && ALLOCNO_CALLS_CROSSED_NUM (a) != 0
|
|
1566 && ! ira_hard_reg_not_in_set_p (hard_regno, ALLOCNO_MODE (a),
|
|
1567 call_used_reg_set))
|
|
1568 {
|
|
1569 ira_assert (!optimize || flag_caller_saves
|
|
1570 || regno >= ira_reg_equiv_len
|
|
1571 || ira_reg_equiv_const[regno]
|
|
1572 || ira_reg_equiv_invariant_p[regno]);
|
|
1573 caller_save_needed = 1;
|
|
1574 }
|
|
1575 }
|
|
1576 }
|
|
1577
|
|
1578 /* Set up allocno assignment flags for further allocation
|
|
1579 improvements. */
|
|
1580 static void
|
|
1581 setup_allocno_assignment_flags (void)
|
|
1582 {
|
|
1583 int hard_regno;
|
|
1584 ira_allocno_t a;
|
|
1585 ira_allocno_iterator ai;
|
|
1586
|
|
1587 FOR_EACH_ALLOCNO (a, ai)
|
|
1588 {
|
|
1589 if (! ALLOCNO_ASSIGNED_P (a))
|
|
1590 /* It can happen if A is not referenced but partially anticipated
|
|
1591 somewhere in a region. */
|
|
1592 ira_free_allocno_updated_costs (a);
|
|
1593 hard_regno = ALLOCNO_HARD_REGNO (a);
|
|
1594 /* Don't assign hard registers to allocnos which are destination
|
|
1595 of removed store at the end of loop. It has no sense to keep
|
|
1596 the same value in different hard registers. It is also
|
|
1597 impossible to assign hard registers correctly to such
|
|
1598 allocnos because the cost info and info about intersected
|
|
1599 calls are incorrect for them. */
|
|
1600 ALLOCNO_ASSIGNED_P (a) = (hard_regno >= 0
|
|
1601 || ALLOCNO_MEM_OPTIMIZED_DEST_P (a)
|
|
1602 || (ALLOCNO_MEMORY_COST (a)
|
|
1603 - ALLOCNO_COVER_CLASS_COST (a)) < 0);
|
|
1604 ira_assert (hard_regno < 0
|
|
1605 || ! ira_hard_reg_not_in_set_p (hard_regno, ALLOCNO_MODE (a),
|
|
1606 reg_class_contents
|
|
1607 [ALLOCNO_COVER_CLASS (a)]));
|
|
1608 }
|
|
1609 }
|
|
1610
|
|
1611 /* Evaluate overall allocation cost and the costs for using hard
|
|
1612 registers and memory for allocnos. */
|
|
1613 static void
|
|
1614 calculate_allocation_cost (void)
|
|
1615 {
|
|
1616 int hard_regno, cost;
|
|
1617 ira_allocno_t a;
|
|
1618 ira_allocno_iterator ai;
|
|
1619
|
|
1620 ira_overall_cost = ira_reg_cost = ira_mem_cost = 0;
|
|
1621 FOR_EACH_ALLOCNO (a, ai)
|
|
1622 {
|
|
1623 hard_regno = ALLOCNO_HARD_REGNO (a);
|
|
1624 ira_assert (hard_regno < 0
|
|
1625 || ! ira_hard_reg_not_in_set_p
|
|
1626 (hard_regno, ALLOCNO_MODE (a),
|
|
1627 reg_class_contents[ALLOCNO_COVER_CLASS (a)]));
|
|
1628 if (hard_regno < 0)
|
|
1629 {
|
|
1630 cost = ALLOCNO_MEMORY_COST (a);
|
|
1631 ira_mem_cost += cost;
|
|
1632 }
|
|
1633 else if (ALLOCNO_HARD_REG_COSTS (a) != NULL)
|
|
1634 {
|
|
1635 cost = (ALLOCNO_HARD_REG_COSTS (a)
|
|
1636 [ira_class_hard_reg_index
|
|
1637 [ALLOCNO_COVER_CLASS (a)][hard_regno]]);
|
|
1638 ira_reg_cost += cost;
|
|
1639 }
|
|
1640 else
|
|
1641 {
|
|
1642 cost = ALLOCNO_COVER_CLASS_COST (a);
|
|
1643 ira_reg_cost += cost;
|
|
1644 }
|
|
1645 ira_overall_cost += cost;
|
|
1646 }
|
|
1647
|
|
1648 if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
|
|
1649 {
|
|
1650 fprintf (ira_dump_file,
|
|
1651 "+++Costs: overall %d, reg %d, mem %d, ld %d, st %d, move %d\n",
|
|
1652 ira_overall_cost, ira_reg_cost, ira_mem_cost,
|
|
1653 ira_load_cost, ira_store_cost, ira_shuffle_cost);
|
|
1654 fprintf (ira_dump_file, "+++ move loops %d, new jumps %d\n",
|
|
1655 ira_move_loops_num, ira_additional_jumps_num);
|
|
1656 }
|
|
1657
|
|
1658 }
|
|
1659
|
|
1660 #ifdef ENABLE_IRA_CHECKING
|
|
1661 /* Check the correctness of the allocation. We do need this because
|
|
1662 of complicated code to transform more one region internal
|
|
1663 representation into one region representation. */
|
|
1664 static void
|
|
1665 check_allocation (void)
|
|
1666 {
|
|
1667 ira_allocno_t a, conflict_a;
|
|
1668 int hard_regno, conflict_hard_regno, nregs, conflict_nregs;
|
|
1669 ira_allocno_conflict_iterator aci;
|
|
1670 ira_allocno_iterator ai;
|
|
1671
|
|
1672 FOR_EACH_ALLOCNO (a, ai)
|
|
1673 {
|
|
1674 if (ALLOCNO_CAP_MEMBER (a) != NULL
|
|
1675 || (hard_regno = ALLOCNO_HARD_REGNO (a)) < 0)
|
|
1676 continue;
|
|
1677 nregs = hard_regno_nregs[hard_regno][ALLOCNO_MODE (a)];
|
|
1678 FOR_EACH_ALLOCNO_CONFLICT (a, conflict_a, aci)
|
|
1679 if ((conflict_hard_regno = ALLOCNO_HARD_REGNO (conflict_a)) >= 0)
|
|
1680 {
|
|
1681 conflict_nregs
|
|
1682 = (hard_regno_nregs
|
|
1683 [conflict_hard_regno][ALLOCNO_MODE (conflict_a)]);
|
|
1684 if ((conflict_hard_regno <= hard_regno
|
|
1685 && hard_regno < conflict_hard_regno + conflict_nregs)
|
|
1686 || (hard_regno <= conflict_hard_regno
|
|
1687 && conflict_hard_regno < hard_regno + nregs))
|
|
1688 {
|
|
1689 fprintf (stderr, "bad allocation for %d and %d\n",
|
|
1690 ALLOCNO_REGNO (a), ALLOCNO_REGNO (conflict_a));
|
|
1691 gcc_unreachable ();
|
|
1692 }
|
|
1693 }
|
|
1694 }
|
|
1695 }
|
|
1696 #endif
|
|
1697
|
|
1698 /* Fix values of array REG_EQUIV_INIT after live range splitting done
|
|
1699 by IRA. */
|
|
1700 static void
|
|
1701 fix_reg_equiv_init (void)
|
|
1702 {
|
|
1703 int max_regno = max_reg_num ();
|
|
1704 int i, new_regno;
|
|
1705 rtx x, prev, next, insn, set;
|
|
1706
|
|
1707 if (reg_equiv_init_size < max_regno)
|
|
1708 {
|
|
1709 reg_equiv_init
|
|
1710 = (rtx *) ggc_realloc (reg_equiv_init, max_regno * sizeof (rtx));
|
|
1711 while (reg_equiv_init_size < max_regno)
|
|
1712 reg_equiv_init[reg_equiv_init_size++] = NULL_RTX;
|
|
1713 for (i = FIRST_PSEUDO_REGISTER; i < reg_equiv_init_size; i++)
|
|
1714 for (prev = NULL_RTX, x = reg_equiv_init[i]; x != NULL_RTX; x = next)
|
|
1715 {
|
|
1716 next = XEXP (x, 1);
|
|
1717 insn = XEXP (x, 0);
|
|
1718 set = single_set (insn);
|
|
1719 ira_assert (set != NULL_RTX
|
|
1720 && (REG_P (SET_DEST (set)) || REG_P (SET_SRC (set))));
|
|
1721 if (REG_P (SET_DEST (set))
|
|
1722 && ((int) REGNO (SET_DEST (set)) == i
|
|
1723 || (int) ORIGINAL_REGNO (SET_DEST (set)) == i))
|
|
1724 new_regno = REGNO (SET_DEST (set));
|
|
1725 else if (REG_P (SET_SRC (set))
|
|
1726 && ((int) REGNO (SET_SRC (set)) == i
|
|
1727 || (int) ORIGINAL_REGNO (SET_SRC (set)) == i))
|
|
1728 new_regno = REGNO (SET_SRC (set));
|
|
1729 else
|
|
1730 gcc_unreachable ();
|
|
1731 if (new_regno == i)
|
|
1732 prev = x;
|
|
1733 else
|
|
1734 {
|
|
1735 if (prev == NULL_RTX)
|
|
1736 reg_equiv_init[i] = next;
|
|
1737 else
|
|
1738 XEXP (prev, 1) = next;
|
|
1739 XEXP (x, 1) = reg_equiv_init[new_regno];
|
|
1740 reg_equiv_init[new_regno] = x;
|
|
1741 }
|
|
1742 }
|
|
1743 }
|
|
1744 }
|
|
1745
|
|
1746 #ifdef ENABLE_IRA_CHECKING
|
|
1747 /* Print redundant memory-memory copies. */
|
|
1748 static void
|
|
1749 print_redundant_copies (void)
|
|
1750 {
|
|
1751 int hard_regno;
|
|
1752 ira_allocno_t a;
|
|
1753 ira_copy_t cp, next_cp;
|
|
1754 ira_allocno_iterator ai;
|
|
1755
|
|
1756 FOR_EACH_ALLOCNO (a, ai)
|
|
1757 {
|
|
1758 if (ALLOCNO_CAP_MEMBER (a) != NULL)
|
|
1759 /* It is a cap. */
|
|
1760 continue;
|
|
1761 hard_regno = ALLOCNO_HARD_REGNO (a);
|
|
1762 if (hard_regno >= 0)
|
|
1763 continue;
|
|
1764 for (cp = ALLOCNO_COPIES (a); cp != NULL; cp = next_cp)
|
|
1765 if (cp->first == a)
|
|
1766 next_cp = cp->next_first_allocno_copy;
|
|
1767 else
|
|
1768 {
|
|
1769 next_cp = cp->next_second_allocno_copy;
|
|
1770 if (internal_flag_ira_verbose > 4 && ira_dump_file != NULL
|
|
1771 && cp->insn != NULL_RTX
|
|
1772 && ALLOCNO_HARD_REGNO (cp->first) == hard_regno)
|
|
1773 fprintf (ira_dump_file,
|
|
1774 " Redundant move from %d(freq %d):%d\n",
|
|
1775 INSN_UID (cp->insn), cp->freq, hard_regno);
|
|
1776 }
|
|
1777 }
|
|
1778 }
|
|
1779 #endif
|
|
1780
|
|
1781 /* Setup preferred and alternative classes for new pseudo-registers
|
|
1782 created by IRA starting with START. */
|
|
1783 static void
|
|
1784 setup_preferred_alternate_classes_for_new_pseudos (int start)
|
|
1785 {
|
|
1786 int i, old_regno;
|
|
1787 int max_regno = max_reg_num ();
|
|
1788
|
|
1789 for (i = start; i < max_regno; i++)
|
|
1790 {
|
|
1791 old_regno = ORIGINAL_REGNO (regno_reg_rtx[i]);
|
|
1792 ira_assert (i != old_regno);
|
|
1793 setup_reg_classes (i, reg_preferred_class (old_regno),
|
|
1794 reg_alternate_class (old_regno));
|
|
1795 if (internal_flag_ira_verbose > 2 && ira_dump_file != NULL)
|
|
1796 fprintf (ira_dump_file,
|
|
1797 " New r%d: setting preferred %s, alternative %s\n",
|
|
1798 i, reg_class_names[reg_preferred_class (old_regno)],
|
|
1799 reg_class_names[reg_alternate_class (old_regno)]);
|
|
1800 }
|
|
1801 }
|
|
1802
|
|
1803
|
|
1804
|
|
1805 /* Regional allocation can create new pseudo-registers. This function
|
|
1806 expands some arrays for pseudo-registers. */
|
|
1807 static void
|
|
1808 expand_reg_info (int old_size)
|
|
1809 {
|
|
1810 int i;
|
|
1811 int size = max_reg_num ();
|
|
1812
|
|
1813 resize_reg_info ();
|
|
1814 for (i = old_size; i < size; i++)
|
|
1815 {
|
|
1816 reg_renumber[i] = -1;
|
|
1817 setup_reg_classes (i, GENERAL_REGS, ALL_REGS);
|
|
1818 }
|
|
1819 }
|
|
1820
|
|
1821 /* Return TRUE if there is too high register pressure in the function.
|
|
1822 It is used to decide when stack slot sharing is worth to do. */
|
|
1823 static bool
|
|
1824 too_high_register_pressure_p (void)
|
|
1825 {
|
|
1826 int i;
|
|
1827 enum reg_class cover_class;
|
|
1828
|
|
1829 for (i = 0; i < ira_reg_class_cover_size; i++)
|
|
1830 {
|
|
1831 cover_class = ira_reg_class_cover[i];
|
|
1832 if (ira_loop_tree_root->reg_pressure[cover_class] > 10000)
|
|
1833 return true;
|
|
1834 }
|
|
1835 return false;
|
|
1836 }
|
|
1837
|
|
1838
|
|
1839
|
|
1840 /* Indicate that hard register number FROM was eliminated and replaced with
|
|
1841 an offset from hard register number TO. The status of hard registers live
|
|
1842 at the start of a basic block is updated by replacing a use of FROM with
|
|
1843 a use of TO. */
|
|
1844
|
|
1845 void
|
|
1846 mark_elimination (int from, int to)
|
|
1847 {
|
|
1848 basic_block bb;
|
|
1849
|
|
1850 FOR_EACH_BB (bb)
|
|
1851 {
|
|
1852 /* We don't use LIVE info in IRA. */
|
|
1853 regset r = DF_LR_IN (bb);
|
|
1854
|
|
1855 if (REGNO_REG_SET_P (r, from))
|
|
1856 {
|
|
1857 CLEAR_REGNO_REG_SET (r, from);
|
|
1858 SET_REGNO_REG_SET (r, to);
|
|
1859 }
|
|
1860 }
|
|
1861 }
|
|
1862
|
|
1863
|
|
1864
|
|
1865 struct equivalence
|
|
1866 {
|
|
1867 /* Set when a REG_EQUIV note is found or created. Use to
|
|
1868 keep track of what memory accesses might be created later,
|
|
1869 e.g. by reload. */
|
|
1870 rtx replacement;
|
|
1871 rtx *src_p;
|
|
1872 /* The list of each instruction which initializes this register. */
|
|
1873 rtx init_insns;
|
|
1874 /* Loop depth is used to recognize equivalences which appear
|
|
1875 to be present within the same loop (or in an inner loop). */
|
|
1876 int loop_depth;
|
|
1877 /* Nonzero if this had a preexisting REG_EQUIV note. */
|
|
1878 int is_arg_equivalence;
|
|
1879 /* Set when an attempt should be made to replace a register
|
|
1880 with the associated src_p entry. */
|
|
1881 char replace;
|
|
1882 };
|
|
1883
|
|
1884 /* reg_equiv[N] (where N is a pseudo reg number) is the equivalence
|
|
1885 structure for that register. */
|
|
1886 static struct equivalence *reg_equiv;
|
|
1887
|
|
1888 /* Used for communication between the following two functions: contains
|
|
1889 a MEM that we wish to ensure remains unchanged. */
|
|
1890 static rtx equiv_mem;
|
|
1891
|
|
1892 /* Set nonzero if EQUIV_MEM is modified. */
|
|
1893 static int equiv_mem_modified;
|
|
1894
|
|
1895 /* If EQUIV_MEM is modified by modifying DEST, indicate that it is modified.
|
|
1896 Called via note_stores. */
|
|
1897 static void
|
|
1898 validate_equiv_mem_from_store (rtx dest, const_rtx set ATTRIBUTE_UNUSED,
|
|
1899 void *data ATTRIBUTE_UNUSED)
|
|
1900 {
|
|
1901 if ((REG_P (dest)
|
|
1902 && reg_overlap_mentioned_p (dest, equiv_mem))
|
|
1903 || (MEM_P (dest)
|
|
1904 && true_dependence (dest, VOIDmode, equiv_mem, rtx_varies_p)))
|
|
1905 equiv_mem_modified = 1;
|
|
1906 }
|
|
1907
|
|
1908 /* Verify that no store between START and the death of REG invalidates
|
|
1909 MEMREF. MEMREF is invalidated by modifying a register used in MEMREF,
|
|
1910 by storing into an overlapping memory location, or with a non-const
|
|
1911 CALL_INSN.
|
|
1912
|
|
1913 Return 1 if MEMREF remains valid. */
|
|
1914 static int
|
|
1915 validate_equiv_mem (rtx start, rtx reg, rtx memref)
|
|
1916 {
|
|
1917 rtx insn;
|
|
1918 rtx note;
|
|
1919
|
|
1920 equiv_mem = memref;
|
|
1921 equiv_mem_modified = 0;
|
|
1922
|
|
1923 /* If the memory reference has side effects or is volatile, it isn't a
|
|
1924 valid equivalence. */
|
|
1925 if (side_effects_p (memref))
|
|
1926 return 0;
|
|
1927
|
|
1928 for (insn = start; insn && ! equiv_mem_modified; insn = NEXT_INSN (insn))
|
|
1929 {
|
|
1930 if (! INSN_P (insn))
|
|
1931 continue;
|
|
1932
|
|
1933 if (find_reg_note (insn, REG_DEAD, reg))
|
|
1934 return 1;
|
|
1935
|
|
1936 if (CALL_P (insn) && ! MEM_READONLY_P (memref)
|
|
1937 && ! RTL_CONST_OR_PURE_CALL_P (insn))
|
|
1938 return 0;
|
|
1939
|
|
1940 note_stores (PATTERN (insn), validate_equiv_mem_from_store, NULL);
|
|
1941
|
|
1942 /* If a register mentioned in MEMREF is modified via an
|
|
1943 auto-increment, we lose the equivalence. Do the same if one
|
|
1944 dies; although we could extend the life, it doesn't seem worth
|
|
1945 the trouble. */
|
|
1946
|
|
1947 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
|
|
1948 if ((REG_NOTE_KIND (note) == REG_INC
|
|
1949 || REG_NOTE_KIND (note) == REG_DEAD)
|
|
1950 && REG_P (XEXP (note, 0))
|
|
1951 && reg_overlap_mentioned_p (XEXP (note, 0), memref))
|
|
1952 return 0;
|
|
1953 }
|
|
1954
|
|
1955 return 0;
|
|
1956 }
|
|
1957
|
|
1958 /* Returns zero if X is known to be invariant. */
|
|
1959 static int
|
|
1960 equiv_init_varies_p (rtx x)
|
|
1961 {
|
|
1962 RTX_CODE code = GET_CODE (x);
|
|
1963 int i;
|
|
1964 const char *fmt;
|
|
1965
|
|
1966 switch (code)
|
|
1967 {
|
|
1968 case MEM:
|
|
1969 return !MEM_READONLY_P (x) || equiv_init_varies_p (XEXP (x, 0));
|
|
1970
|
|
1971 case CONST:
|
|
1972 case CONST_INT:
|
|
1973 case CONST_DOUBLE:
|
|
1974 case CONST_FIXED:
|
|
1975 case CONST_VECTOR:
|
|
1976 case SYMBOL_REF:
|
|
1977 case LABEL_REF:
|
|
1978 return 0;
|
|
1979
|
|
1980 case REG:
|
|
1981 return reg_equiv[REGNO (x)].replace == 0 && rtx_varies_p (x, 0);
|
|
1982
|
|
1983 case ASM_OPERANDS:
|
|
1984 if (MEM_VOLATILE_P (x))
|
|
1985 return 1;
|
|
1986
|
|
1987 /* Fall through. */
|
|
1988
|
|
1989 default:
|
|
1990 break;
|
|
1991 }
|
|
1992
|
|
1993 fmt = GET_RTX_FORMAT (code);
|
|
1994 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
|
1995 if (fmt[i] == 'e')
|
|
1996 {
|
|
1997 if (equiv_init_varies_p (XEXP (x, i)))
|
|
1998 return 1;
|
|
1999 }
|
|
2000 else if (fmt[i] == 'E')
|
|
2001 {
|
|
2002 int j;
|
|
2003 for (j = 0; j < XVECLEN (x, i); j++)
|
|
2004 if (equiv_init_varies_p (XVECEXP (x, i, j)))
|
|
2005 return 1;
|
|
2006 }
|
|
2007
|
|
2008 return 0;
|
|
2009 }
|
|
2010
|
|
2011 /* Returns nonzero if X (used to initialize register REGNO) is movable.
|
|
2012 X is only movable if the registers it uses have equivalent initializations
|
|
2013 which appear to be within the same loop (or in an inner loop) and movable
|
|
2014 or if they are not candidates for local_alloc and don't vary. */
|
|
2015 static int
|
|
2016 equiv_init_movable_p (rtx x, int regno)
|
|
2017 {
|
|
2018 int i, j;
|
|
2019 const char *fmt;
|
|
2020 enum rtx_code code = GET_CODE (x);
|
|
2021
|
|
2022 switch (code)
|
|
2023 {
|
|
2024 case SET:
|
|
2025 return equiv_init_movable_p (SET_SRC (x), regno);
|
|
2026
|
|
2027 case CC0:
|
|
2028 case CLOBBER:
|
|
2029 return 0;
|
|
2030
|
|
2031 case PRE_INC:
|
|
2032 case PRE_DEC:
|
|
2033 case POST_INC:
|
|
2034 case POST_DEC:
|
|
2035 case PRE_MODIFY:
|
|
2036 case POST_MODIFY:
|
|
2037 return 0;
|
|
2038
|
|
2039 case REG:
|
|
2040 return (reg_equiv[REGNO (x)].loop_depth >= reg_equiv[regno].loop_depth
|
|
2041 && reg_equiv[REGNO (x)].replace)
|
|
2042 || (REG_BASIC_BLOCK (REGNO (x)) < NUM_FIXED_BLOCKS && ! rtx_varies_p (x, 0));
|
|
2043
|
|
2044 case UNSPEC_VOLATILE:
|
|
2045 return 0;
|
|
2046
|
|
2047 case ASM_OPERANDS:
|
|
2048 if (MEM_VOLATILE_P (x))
|
|
2049 return 0;
|
|
2050
|
|
2051 /* Fall through. */
|
|
2052
|
|
2053 default:
|
|
2054 break;
|
|
2055 }
|
|
2056
|
|
2057 fmt = GET_RTX_FORMAT (code);
|
|
2058 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
|
2059 switch (fmt[i])
|
|
2060 {
|
|
2061 case 'e':
|
|
2062 if (! equiv_init_movable_p (XEXP (x, i), regno))
|
|
2063 return 0;
|
|
2064 break;
|
|
2065 case 'E':
|
|
2066 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
|
2067 if (! equiv_init_movable_p (XVECEXP (x, i, j), regno))
|
|
2068 return 0;
|
|
2069 break;
|
|
2070 }
|
|
2071
|
|
2072 return 1;
|
|
2073 }
|
|
2074
|
|
2075 /* TRUE if X uses any registers for which reg_equiv[REGNO].replace is true. */
|
|
2076 static int
|
|
2077 contains_replace_regs (rtx x)
|
|
2078 {
|
|
2079 int i, j;
|
|
2080 const char *fmt;
|
|
2081 enum rtx_code code = GET_CODE (x);
|
|
2082
|
|
2083 switch (code)
|
|
2084 {
|
|
2085 case CONST_INT:
|
|
2086 case CONST:
|
|
2087 case LABEL_REF:
|
|
2088 case SYMBOL_REF:
|
|
2089 case CONST_DOUBLE:
|
|
2090 case CONST_FIXED:
|
|
2091 case CONST_VECTOR:
|
|
2092 case PC:
|
|
2093 case CC0:
|
|
2094 case HIGH:
|
|
2095 return 0;
|
|
2096
|
|
2097 case REG:
|
|
2098 return reg_equiv[REGNO (x)].replace;
|
|
2099
|
|
2100 default:
|
|
2101 break;
|
|
2102 }
|
|
2103
|
|
2104 fmt = GET_RTX_FORMAT (code);
|
|
2105 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
|
2106 switch (fmt[i])
|
|
2107 {
|
|
2108 case 'e':
|
|
2109 if (contains_replace_regs (XEXP (x, i)))
|
|
2110 return 1;
|
|
2111 break;
|
|
2112 case 'E':
|
|
2113 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
|
2114 if (contains_replace_regs (XVECEXP (x, i, j)))
|
|
2115 return 1;
|
|
2116 break;
|
|
2117 }
|
|
2118
|
|
2119 return 0;
|
|
2120 }
|
|
2121
|
|
2122 /* TRUE if X references a memory location that would be affected by a store
|
|
2123 to MEMREF. */
|
|
2124 static int
|
|
2125 memref_referenced_p (rtx memref, rtx x)
|
|
2126 {
|
|
2127 int i, j;
|
|
2128 const char *fmt;
|
|
2129 enum rtx_code code = GET_CODE (x);
|
|
2130
|
|
2131 switch (code)
|
|
2132 {
|
|
2133 case CONST_INT:
|
|
2134 case CONST:
|
|
2135 case LABEL_REF:
|
|
2136 case SYMBOL_REF:
|
|
2137 case CONST_DOUBLE:
|
|
2138 case CONST_FIXED:
|
|
2139 case CONST_VECTOR:
|
|
2140 case PC:
|
|
2141 case CC0:
|
|
2142 case HIGH:
|
|
2143 case LO_SUM:
|
|
2144 return 0;
|
|
2145
|
|
2146 case REG:
|
|
2147 return (reg_equiv[REGNO (x)].replacement
|
|
2148 && memref_referenced_p (memref,
|
|
2149 reg_equiv[REGNO (x)].replacement));
|
|
2150
|
|
2151 case MEM:
|
|
2152 if (true_dependence (memref, VOIDmode, x, rtx_varies_p))
|
|
2153 return 1;
|
|
2154 break;
|
|
2155
|
|
2156 case SET:
|
|
2157 /* If we are setting a MEM, it doesn't count (its address does), but any
|
|
2158 other SET_DEST that has a MEM in it is referencing the MEM. */
|
|
2159 if (MEM_P (SET_DEST (x)))
|
|
2160 {
|
|
2161 if (memref_referenced_p (memref, XEXP (SET_DEST (x), 0)))
|
|
2162 return 1;
|
|
2163 }
|
|
2164 else if (memref_referenced_p (memref, SET_DEST (x)))
|
|
2165 return 1;
|
|
2166
|
|
2167 return memref_referenced_p (memref, SET_SRC (x));
|
|
2168
|
|
2169 default:
|
|
2170 break;
|
|
2171 }
|
|
2172
|
|
2173 fmt = GET_RTX_FORMAT (code);
|
|
2174 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
|
|
2175 switch (fmt[i])
|
|
2176 {
|
|
2177 case 'e':
|
|
2178 if (memref_referenced_p (memref, XEXP (x, i)))
|
|
2179 return 1;
|
|
2180 break;
|
|
2181 case 'E':
|
|
2182 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
|
|
2183 if (memref_referenced_p (memref, XVECEXP (x, i, j)))
|
|
2184 return 1;
|
|
2185 break;
|
|
2186 }
|
|
2187
|
|
2188 return 0;
|
|
2189 }
|
|
2190
|
|
2191 /* TRUE if some insn in the range (START, END] references a memory location
|
|
2192 that would be affected by a store to MEMREF. */
|
|
2193 static int
|
|
2194 memref_used_between_p (rtx memref, rtx start, rtx end)
|
|
2195 {
|
|
2196 rtx insn;
|
|
2197
|
|
2198 for (insn = NEXT_INSN (start); insn != NEXT_INSN (end);
|
|
2199 insn = NEXT_INSN (insn))
|
|
2200 {
|
|
2201 if (!INSN_P (insn))
|
|
2202 continue;
|
|
2203
|
|
2204 if (memref_referenced_p (memref, PATTERN (insn)))
|
|
2205 return 1;
|
|
2206
|
|
2207 /* Nonconst functions may access memory. */
|
|
2208 if (CALL_P (insn) && (! RTL_CONST_CALL_P (insn)))
|
|
2209 return 1;
|
|
2210 }
|
|
2211
|
|
2212 return 0;
|
|
2213 }
|
|
2214
|
|
2215 /* Mark REG as having no known equivalence.
|
|
2216 Some instructions might have been processed before and furnished
|
|
2217 with REG_EQUIV notes for this register; these notes will have to be
|
|
2218 removed.
|
|
2219 STORE is the piece of RTL that does the non-constant / conflicting
|
|
2220 assignment - a SET, CLOBBER or REG_INC note. It is currently not used,
|
|
2221 but needs to be there because this function is called from note_stores. */
|
|
2222 static void
|
|
2223 no_equiv (rtx reg, const_rtx store ATTRIBUTE_UNUSED, void *data ATTRIBUTE_UNUSED)
|
|
2224 {
|
|
2225 int regno;
|
|
2226 rtx list;
|
|
2227
|
|
2228 if (!REG_P (reg))
|
|
2229 return;
|
|
2230 regno = REGNO (reg);
|
|
2231 list = reg_equiv[regno].init_insns;
|
|
2232 if (list == const0_rtx)
|
|
2233 return;
|
|
2234 reg_equiv[regno].init_insns = const0_rtx;
|
|
2235 reg_equiv[regno].replacement = NULL_RTX;
|
|
2236 /* This doesn't matter for equivalences made for argument registers, we
|
|
2237 should keep their initialization insns. */
|
|
2238 if (reg_equiv[regno].is_arg_equivalence)
|
|
2239 return;
|
|
2240 reg_equiv_init[regno] = NULL_RTX;
|
|
2241 for (; list; list = XEXP (list, 1))
|
|
2242 {
|
|
2243 rtx insn = XEXP (list, 0);
|
|
2244 remove_note (insn, find_reg_note (insn, REG_EQUIV, NULL_RTX));
|
|
2245 }
|
|
2246 }
|
|
2247
|
|
2248 /* Nonzero if we recorded an equivalence for a LABEL_REF. */
|
|
2249 static int recorded_label_ref;
|
|
2250
|
|
2251 /* Find registers that are equivalent to a single value throughout the
|
|
2252 compilation (either because they can be referenced in memory or are set once
|
|
2253 from a single constant). Lower their priority for a register.
|
|
2254
|
|
2255 If such a register is only referenced once, try substituting its value
|
|
2256 into the using insn. If it succeeds, we can eliminate the register
|
|
2257 completely.
|
|
2258
|
|
2259 Initialize the REG_EQUIV_INIT array of initializing insns.
|
|
2260
|
|
2261 Return non-zero if jump label rebuilding should be done. */
|
|
2262 static int
|
|
2263 update_equiv_regs (void)
|
|
2264 {
|
|
2265 rtx insn;
|
|
2266 basic_block bb;
|
|
2267 int loop_depth;
|
|
2268 bitmap cleared_regs;
|
|
2269
|
|
2270 /* We need to keep track of whether or not we recorded a LABEL_REF so
|
|
2271 that we know if the jump optimizer needs to be rerun. */
|
|
2272 recorded_label_ref = 0;
|
|
2273
|
|
2274 reg_equiv = XCNEWVEC (struct equivalence, max_regno);
|
|
2275 reg_equiv_init = GGC_CNEWVEC (rtx, max_regno);
|
|
2276 reg_equiv_init_size = max_regno;
|
|
2277
|
|
2278 init_alias_analysis ();
|
|
2279
|
|
2280 /* Scan the insns and find which registers have equivalences. Do this
|
|
2281 in a separate scan of the insns because (due to -fcse-follow-jumps)
|
|
2282 a register can be set below its use. */
|
|
2283 FOR_EACH_BB (bb)
|
|
2284 {
|
|
2285 loop_depth = bb->loop_depth;
|
|
2286
|
|
2287 for (insn = BB_HEAD (bb);
|
|
2288 insn != NEXT_INSN (BB_END (bb));
|
|
2289 insn = NEXT_INSN (insn))
|
|
2290 {
|
|
2291 rtx note;
|
|
2292 rtx set;
|
|
2293 rtx dest, src;
|
|
2294 int regno;
|
|
2295
|
|
2296 if (! INSN_P (insn))
|
|
2297 continue;
|
|
2298
|
|
2299 for (note = REG_NOTES (insn); note; note = XEXP (note, 1))
|
|
2300 if (REG_NOTE_KIND (note) == REG_INC)
|
|
2301 no_equiv (XEXP (note, 0), note, NULL);
|
|
2302
|
|
2303 set = single_set (insn);
|
|
2304
|
|
2305 /* If this insn contains more (or less) than a single SET,
|
|
2306 only mark all destinations as having no known equivalence. */
|
|
2307 if (set == 0)
|
|
2308 {
|
|
2309 note_stores (PATTERN (insn), no_equiv, NULL);
|
|
2310 continue;
|
|
2311 }
|
|
2312 else if (GET_CODE (PATTERN (insn)) == PARALLEL)
|
|
2313 {
|
|
2314 int i;
|
|
2315
|
|
2316 for (i = XVECLEN (PATTERN (insn), 0) - 1; i >= 0; i--)
|
|
2317 {
|
|
2318 rtx part = XVECEXP (PATTERN (insn), 0, i);
|
|
2319 if (part != set)
|
|
2320 note_stores (part, no_equiv, NULL);
|
|
2321 }
|
|
2322 }
|
|
2323
|
|
2324 dest = SET_DEST (set);
|
|
2325 src = SET_SRC (set);
|
|
2326
|
|
2327 /* See if this is setting up the equivalence between an argument
|
|
2328 register and its stack slot. */
|
|
2329 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
|
|
2330 if (note)
|
|
2331 {
|
|
2332 gcc_assert (REG_P (dest));
|
|
2333 regno = REGNO (dest);
|
|
2334
|
|
2335 /* Note that we don't want to clear reg_equiv_init even if there
|
|
2336 are multiple sets of this register. */
|
|
2337 reg_equiv[regno].is_arg_equivalence = 1;
|
|
2338
|
|
2339 /* Record for reload that this is an equivalencing insn. */
|
|
2340 if (rtx_equal_p (src, XEXP (note, 0)))
|
|
2341 reg_equiv_init[regno]
|
|
2342 = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init[regno]);
|
|
2343
|
|
2344 /* Continue normally in case this is a candidate for
|
|
2345 replacements. */
|
|
2346 }
|
|
2347
|
|
2348 if (!optimize)
|
|
2349 continue;
|
|
2350
|
|
2351 /* We only handle the case of a pseudo register being set
|
|
2352 once, or always to the same value. */
|
|
2353 /* ??? The mn10200 port breaks if we add equivalences for
|
|
2354 values that need an ADDRESS_REGS register and set them equivalent
|
|
2355 to a MEM of a pseudo. The actual problem is in the over-conservative
|
|
2356 handling of INPADDR_ADDRESS / INPUT_ADDRESS / INPUT triples in
|
|
2357 calculate_needs, but we traditionally work around this problem
|
|
2358 here by rejecting equivalences when the destination is in a register
|
|
2359 that's likely spilled. This is fragile, of course, since the
|
|
2360 preferred class of a pseudo depends on all instructions that set
|
|
2361 or use it. */
|
|
2362
|
|
2363 if (!REG_P (dest)
|
|
2364 || (regno = REGNO (dest)) < FIRST_PSEUDO_REGISTER
|
|
2365 || reg_equiv[regno].init_insns == const0_rtx
|
|
2366 || (CLASS_LIKELY_SPILLED_P (reg_preferred_class (regno))
|
|
2367 && MEM_P (src) && ! reg_equiv[regno].is_arg_equivalence))
|
|
2368 {
|
|
2369 /* This might be setting a SUBREG of a pseudo, a pseudo that is
|
|
2370 also set somewhere else to a constant. */
|
|
2371 note_stores (set, no_equiv, NULL);
|
|
2372 continue;
|
|
2373 }
|
|
2374
|
|
2375 note = find_reg_note (insn, REG_EQUAL, NULL_RTX);
|
|
2376
|
|
2377 /* cse sometimes generates function invariants, but doesn't put a
|
|
2378 REG_EQUAL note on the insn. Since this note would be redundant,
|
|
2379 there's no point creating it earlier than here. */
|
|
2380 if (! note && ! rtx_varies_p (src, 0))
|
|
2381 note = set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
|
|
2382
|
|
2383 /* Don't bother considering a REG_EQUAL note containing an EXPR_LIST
|
|
2384 since it represents a function call */
|
|
2385 if (note && GET_CODE (XEXP (note, 0)) == EXPR_LIST)
|
|
2386 note = NULL_RTX;
|
|
2387
|
|
2388 if (DF_REG_DEF_COUNT (regno) != 1
|
|
2389 && (! note
|
|
2390 || rtx_varies_p (XEXP (note, 0), 0)
|
|
2391 || (reg_equiv[regno].replacement
|
|
2392 && ! rtx_equal_p (XEXP (note, 0),
|
|
2393 reg_equiv[regno].replacement))))
|
|
2394 {
|
|
2395 no_equiv (dest, set, NULL);
|
|
2396 continue;
|
|
2397 }
|
|
2398 /* Record this insn as initializing this register. */
|
|
2399 reg_equiv[regno].init_insns
|
|
2400 = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv[regno].init_insns);
|
|
2401
|
|
2402 /* If this register is known to be equal to a constant, record that
|
|
2403 it is always equivalent to the constant. */
|
|
2404 if (DF_REG_DEF_COUNT (regno) == 1
|
|
2405 && note && ! rtx_varies_p (XEXP (note, 0), 0))
|
|
2406 {
|
|
2407 rtx note_value = XEXP (note, 0);
|
|
2408 remove_note (insn, note);
|
|
2409 set_unique_reg_note (insn, REG_EQUIV, note_value);
|
|
2410 }
|
|
2411
|
|
2412 /* If this insn introduces a "constant" register, decrease the priority
|
|
2413 of that register. Record this insn if the register is only used once
|
|
2414 more and the equivalence value is the same as our source.
|
|
2415
|
|
2416 The latter condition is checked for two reasons: First, it is an
|
|
2417 indication that it may be more efficient to actually emit the insn
|
|
2418 as written (if no registers are available, reload will substitute
|
|
2419 the equivalence). Secondly, it avoids problems with any registers
|
|
2420 dying in this insn whose death notes would be missed.
|
|
2421
|
|
2422 If we don't have a REG_EQUIV note, see if this insn is loading
|
|
2423 a register used only in one basic block from a MEM. If so, and the
|
|
2424 MEM remains unchanged for the life of the register, add a REG_EQUIV
|
|
2425 note. */
|
|
2426
|
|
2427 note = find_reg_note (insn, REG_EQUIV, NULL_RTX);
|
|
2428
|
|
2429 if (note == 0 && REG_BASIC_BLOCK (regno) >= NUM_FIXED_BLOCKS
|
|
2430 && MEM_P (SET_SRC (set))
|
|
2431 && validate_equiv_mem (insn, dest, SET_SRC (set)))
|
|
2432 note = set_unique_reg_note (insn, REG_EQUIV, copy_rtx (SET_SRC (set)));
|
|
2433
|
|
2434 if (note)
|
|
2435 {
|
|
2436 int regno = REGNO (dest);
|
|
2437 rtx x = XEXP (note, 0);
|
|
2438
|
|
2439 /* If we haven't done so, record for reload that this is an
|
|
2440 equivalencing insn. */
|
|
2441 if (!reg_equiv[regno].is_arg_equivalence)
|
|
2442 reg_equiv_init[regno]
|
|
2443 = gen_rtx_INSN_LIST (VOIDmode, insn, reg_equiv_init[regno]);
|
|
2444
|
|
2445 /* Record whether or not we created a REG_EQUIV note for a LABEL_REF.
|
|
2446 We might end up substituting the LABEL_REF for uses of the
|
|
2447 pseudo here or later. That kind of transformation may turn an
|
|
2448 indirect jump into a direct jump, in which case we must rerun the
|
|
2449 jump optimizer to ensure that the JUMP_LABEL fields are valid. */
|
|
2450 if (GET_CODE (x) == LABEL_REF
|
|
2451 || (GET_CODE (x) == CONST
|
|
2452 && GET_CODE (XEXP (x, 0)) == PLUS
|
|
2453 && (GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF)))
|
|
2454 recorded_label_ref = 1;
|
|
2455
|
|
2456 reg_equiv[regno].replacement = x;
|
|
2457 reg_equiv[regno].src_p = &SET_SRC (set);
|
|
2458 reg_equiv[regno].loop_depth = loop_depth;
|
|
2459
|
|
2460 /* Don't mess with things live during setjmp. */
|
|
2461 if (REG_LIVE_LENGTH (regno) >= 0 && optimize)
|
|
2462 {
|
|
2463 /* Note that the statement below does not affect the priority
|
|
2464 in local-alloc! */
|
|
2465 REG_LIVE_LENGTH (regno) *= 2;
|
|
2466
|
|
2467 /* If the register is referenced exactly twice, meaning it is
|
|
2468 set once and used once, indicate that the reference may be
|
|
2469 replaced by the equivalence we computed above. Do this
|
|
2470 even if the register is only used in one block so that
|
|
2471 dependencies can be handled where the last register is
|
|
2472 used in a different block (i.e. HIGH / LO_SUM sequences)
|
|
2473 and to reduce the number of registers alive across
|
|
2474 calls. */
|
|
2475
|
|
2476 if (REG_N_REFS (regno) == 2
|
|
2477 && (rtx_equal_p (x, src)
|
|
2478 || ! equiv_init_varies_p (src))
|
|
2479 && NONJUMP_INSN_P (insn)
|
|
2480 && equiv_init_movable_p (PATTERN (insn), regno))
|
|
2481 reg_equiv[regno].replace = 1;
|
|
2482 }
|
|
2483 }
|
|
2484 }
|
|
2485 }
|
|
2486
|
|
2487 if (!optimize)
|
|
2488 goto out;
|
|
2489
|
|
2490 /* A second pass, to gather additional equivalences with memory. This needs
|
|
2491 to be done after we know which registers we are going to replace. */
|
|
2492
|
|
2493 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
|
|
2494 {
|
|
2495 rtx set, src, dest;
|
|
2496 unsigned regno;
|
|
2497
|
|
2498 if (! INSN_P (insn))
|
|
2499 continue;
|
|
2500
|
|
2501 set = single_set (insn);
|
|
2502 if (! set)
|
|
2503 continue;
|
|
2504
|
|
2505 dest = SET_DEST (set);
|
|
2506 src = SET_SRC (set);
|
|
2507
|
|
2508 /* If this sets a MEM to the contents of a REG that is only used
|
|
2509 in a single basic block, see if the register is always equivalent
|
|
2510 to that memory location and if moving the store from INSN to the
|
|
2511 insn that set REG is safe. If so, put a REG_EQUIV note on the
|
|
2512 initializing insn.
|
|
2513
|
|
2514 Don't add a REG_EQUIV note if the insn already has one. The existing
|
|
2515 REG_EQUIV is likely more useful than the one we are adding.
|
|
2516
|
|
2517 If one of the regs in the address has reg_equiv[REGNO].replace set,
|
|
2518 then we can't add this REG_EQUIV note. The reg_equiv[REGNO].replace
|
|
2519 optimization may move the set of this register immediately before
|
|
2520 insn, which puts it after reg_equiv[REGNO].init_insns, and hence
|
|
2521 the mention in the REG_EQUIV note would be to an uninitialized
|
|
2522 pseudo. */
|
|
2523
|
|
2524 if (MEM_P (dest) && REG_P (src)
|
|
2525 && (regno = REGNO (src)) >= FIRST_PSEUDO_REGISTER
|
|
2526 && REG_BASIC_BLOCK (regno) >= NUM_FIXED_BLOCKS
|
|
2527 && DF_REG_DEF_COUNT (regno) == 1
|
|
2528 && reg_equiv[regno].init_insns != 0
|
|
2529 && reg_equiv[regno].init_insns != const0_rtx
|
|
2530 && ! find_reg_note (XEXP (reg_equiv[regno].init_insns, 0),
|
|
2531 REG_EQUIV, NULL_RTX)
|
|
2532 && ! contains_replace_regs (XEXP (dest, 0)))
|
|
2533 {
|
|
2534 rtx init_insn = XEXP (reg_equiv[regno].init_insns, 0);
|
|
2535 if (validate_equiv_mem (init_insn, src, dest)
|
|
2536 && ! memref_used_between_p (dest, init_insn, insn)
|
|
2537 /* Attaching a REG_EQUIV note will fail if INIT_INSN has
|
|
2538 multiple sets. */
|
|
2539 && set_unique_reg_note (init_insn, REG_EQUIV, copy_rtx (dest)))
|
|
2540 {
|
|
2541 /* This insn makes the equivalence, not the one initializing
|
|
2542 the register. */
|
|
2543 reg_equiv_init[regno]
|
|
2544 = gen_rtx_INSN_LIST (VOIDmode, insn, NULL_RTX);
|
|
2545 df_notes_rescan (init_insn);
|
|
2546 }
|
|
2547 }
|
|
2548 }
|
|
2549
|
|
2550 cleared_regs = BITMAP_ALLOC (NULL);
|
|
2551 /* Now scan all regs killed in an insn to see if any of them are
|
|
2552 registers only used that once. If so, see if we can replace the
|
|
2553 reference with the equivalent form. If we can, delete the
|
|
2554 initializing reference and this register will go away. If we
|
|
2555 can't replace the reference, and the initializing reference is
|
|
2556 within the same loop (or in an inner loop), then move the register
|
|
2557 initialization just before the use, so that they are in the same
|
|
2558 basic block. */
|
|
2559 FOR_EACH_BB_REVERSE (bb)
|
|
2560 {
|
|
2561 loop_depth = bb->loop_depth;
|
|
2562 for (insn = BB_END (bb);
|
|
2563 insn != PREV_INSN (BB_HEAD (bb));
|
|
2564 insn = PREV_INSN (insn))
|
|
2565 {
|
|
2566 rtx link;
|
|
2567
|
|
2568 if (! INSN_P (insn))
|
|
2569 continue;
|
|
2570
|
|
2571 /* Don't substitute into a non-local goto, this confuses CFG. */
|
|
2572 if (JUMP_P (insn)
|
|
2573 && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
|
|
2574 continue;
|
|
2575
|
|
2576 for (link = REG_NOTES (insn); link; link = XEXP (link, 1))
|
|
2577 {
|
|
2578 if (REG_NOTE_KIND (link) == REG_DEAD
|
|
2579 /* Make sure this insn still refers to the register. */
|
|
2580 && reg_mentioned_p (XEXP (link, 0), PATTERN (insn)))
|
|
2581 {
|
|
2582 int regno = REGNO (XEXP (link, 0));
|
|
2583 rtx equiv_insn;
|
|
2584
|
|
2585 if (! reg_equiv[regno].replace
|
|
2586 || reg_equiv[regno].loop_depth < loop_depth)
|
|
2587 continue;
|
|
2588
|
|
2589 /* reg_equiv[REGNO].replace gets set only when
|
|
2590 REG_N_REFS[REGNO] is 2, i.e. the register is set
|
|
2591 once and used once. (If it were only set, but not used,
|
|
2592 flow would have deleted the setting insns.) Hence
|
|
2593 there can only be one insn in reg_equiv[REGNO].init_insns. */
|
|
2594 gcc_assert (reg_equiv[regno].init_insns
|
|
2595 && !XEXP (reg_equiv[regno].init_insns, 1));
|
|
2596 equiv_insn = XEXP (reg_equiv[regno].init_insns, 0);
|
|
2597
|
|
2598 /* We may not move instructions that can throw, since
|
|
2599 that changes basic block boundaries and we are not
|
|
2600 prepared to adjust the CFG to match. */
|
|
2601 if (can_throw_internal (equiv_insn))
|
|
2602 continue;
|
|
2603
|
|
2604 if (asm_noperands (PATTERN (equiv_insn)) < 0
|
|
2605 && validate_replace_rtx (regno_reg_rtx[regno],
|
|
2606 *(reg_equiv[regno].src_p), insn))
|
|
2607 {
|
|
2608 rtx equiv_link;
|
|
2609 rtx last_link;
|
|
2610 rtx note;
|
|
2611
|
|
2612 /* Find the last note. */
|
|
2613 for (last_link = link; XEXP (last_link, 1);
|
|
2614 last_link = XEXP (last_link, 1))
|
|
2615 ;
|
|
2616
|
|
2617 /* Append the REG_DEAD notes from equiv_insn. */
|
|
2618 equiv_link = REG_NOTES (equiv_insn);
|
|
2619 while (equiv_link)
|
|
2620 {
|
|
2621 note = equiv_link;
|
|
2622 equiv_link = XEXP (equiv_link, 1);
|
|
2623 if (REG_NOTE_KIND (note) == REG_DEAD)
|
|
2624 {
|
|
2625 remove_note (equiv_insn, note);
|
|
2626 XEXP (last_link, 1) = note;
|
|
2627 XEXP (note, 1) = NULL_RTX;
|
|
2628 last_link = note;
|
|
2629 }
|
|
2630 }
|
|
2631
|
|
2632 remove_death (regno, insn);
|
|
2633 SET_REG_N_REFS (regno, 0);
|
|
2634 REG_FREQ (regno) = 0;
|
|
2635 delete_insn (equiv_insn);
|
|
2636
|
|
2637 reg_equiv[regno].init_insns
|
|
2638 = XEXP (reg_equiv[regno].init_insns, 1);
|
|
2639
|
|
2640 reg_equiv_init[regno] = NULL_RTX;
|
|
2641 bitmap_set_bit (cleared_regs, regno);
|
|
2642 }
|
|
2643 /* Move the initialization of the register to just before
|
|
2644 INSN. Update the flow information. */
|
|
2645 else if (PREV_INSN (insn) != equiv_insn)
|
|
2646 {
|
|
2647 rtx new_insn;
|
|
2648
|
|
2649 new_insn = emit_insn_before (PATTERN (equiv_insn), insn);
|
|
2650 REG_NOTES (new_insn) = REG_NOTES (equiv_insn);
|
|
2651 REG_NOTES (equiv_insn) = 0;
|
|
2652 /* Rescan it to process the notes. */
|
|
2653 df_insn_rescan (new_insn);
|
|
2654
|
|
2655 /* Make sure this insn is recognized before
|
|
2656 reload begins, otherwise
|
|
2657 eliminate_regs_in_insn will die. */
|
|
2658 INSN_CODE (new_insn) = INSN_CODE (equiv_insn);
|
|
2659
|
|
2660 delete_insn (equiv_insn);
|
|
2661
|
|
2662 XEXP (reg_equiv[regno].init_insns, 0) = new_insn;
|
|
2663
|
|
2664 REG_BASIC_BLOCK (regno) = bb->index;
|
|
2665 REG_N_CALLS_CROSSED (regno) = 0;
|
|
2666 REG_FREQ_CALLS_CROSSED (regno) = 0;
|
|
2667 REG_N_THROWING_CALLS_CROSSED (regno) = 0;
|
|
2668 REG_LIVE_LENGTH (regno) = 2;
|
|
2669
|
|
2670 if (insn == BB_HEAD (bb))
|
|
2671 BB_HEAD (bb) = PREV_INSN (insn);
|
|
2672
|
|
2673 reg_equiv_init[regno]
|
|
2674 = gen_rtx_INSN_LIST (VOIDmode, new_insn, NULL_RTX);
|
|
2675 bitmap_set_bit (cleared_regs, regno);
|
|
2676 }
|
|
2677 }
|
|
2678 }
|
|
2679 }
|
|
2680 }
|
|
2681
|
|
2682 if (!bitmap_empty_p (cleared_regs))
|
|
2683 FOR_EACH_BB (bb)
|
|
2684 {
|
|
2685 bitmap_and_compl_into (DF_LIVE_IN (bb), cleared_regs);
|
|
2686 bitmap_and_compl_into (DF_LIVE_OUT (bb), cleared_regs);
|
|
2687 bitmap_and_compl_into (DF_LR_IN (bb), cleared_regs);
|
|
2688 bitmap_and_compl_into (DF_LR_OUT (bb), cleared_regs);
|
|
2689 }
|
|
2690
|
|
2691 BITMAP_FREE (cleared_regs);
|
|
2692
|
|
2693 out:
|
|
2694 /* Clean up. */
|
|
2695
|
|
2696 end_alias_analysis ();
|
|
2697 free (reg_equiv);
|
|
2698 return recorded_label_ref;
|
|
2699 }
|
|
2700
|
|
2701
|
|
2702
|
|
2703 /* Print chain C to FILE. */
|
|
2704 static void
|
|
2705 print_insn_chain (FILE *file, struct insn_chain *c)
|
|
2706 {
|
|
2707 fprintf (file, "insn=%d, ", INSN_UID(c->insn));
|
|
2708 bitmap_print (file, &c->live_throughout, "live_throughout: ", ", ");
|
|
2709 bitmap_print (file, &c->dead_or_set, "dead_or_set: ", "\n");
|
|
2710 }
|
|
2711
|
|
2712
|
|
2713 /* Print all reload_insn_chains to FILE. */
|
|
2714 static void
|
|
2715 print_insn_chains (FILE *file)
|
|
2716 {
|
|
2717 struct insn_chain *c;
|
|
2718 for (c = reload_insn_chain; c ; c = c->next)
|
|
2719 print_insn_chain (file, c);
|
|
2720 }
|
|
2721
|
|
2722 /* Return true if pseudo REGNO should be added to set live_throughout
|
|
2723 or dead_or_set of the insn chains for reload consideration. */
|
|
2724 static bool
|
|
2725 pseudo_for_reload_consideration_p (int regno)
|
|
2726 {
|
|
2727 /* Consider spilled pseudos too for IRA because they still have a
|
|
2728 chance to get hard-registers in the reload when IRA is used. */
|
|
2729 return (reg_renumber[regno] >= 0
|
|
2730 || (ira_conflicts_p && flag_ira_share_spill_slots));
|
|
2731 }
|
|
2732
|
|
2733 /* Init LIVE_SUBREGS[ALLOCNUM] and LIVE_SUBREGS_USED[ALLOCNUM] using
|
|
2734 REG to the number of nregs, and INIT_VALUE to get the
|
|
2735 initialization. ALLOCNUM need not be the regno of REG. */
|
|
2736 static void
|
|
2737 init_live_subregs (bool init_value, sbitmap *live_subregs,
|
|
2738 int *live_subregs_used, int allocnum, rtx reg)
|
|
2739 {
|
|
2740 unsigned int regno = REGNO (SUBREG_REG (reg));
|
|
2741 int size = GET_MODE_SIZE (GET_MODE (regno_reg_rtx[regno]));
|
|
2742
|
|
2743 gcc_assert (size > 0);
|
|
2744
|
|
2745 /* Been there, done that. */
|
|
2746 if (live_subregs_used[allocnum])
|
|
2747 return;
|
|
2748
|
|
2749 /* Create a new one with zeros. */
|
|
2750 if (live_subregs[allocnum] == NULL)
|
|
2751 live_subregs[allocnum] = sbitmap_alloc (size);
|
|
2752
|
|
2753 /* If the entire reg was live before blasting into subregs, we need
|
|
2754 to init all of the subregs to ones else init to 0. */
|
|
2755 if (init_value)
|
|
2756 sbitmap_ones (live_subregs[allocnum]);
|
|
2757 else
|
|
2758 sbitmap_zero (live_subregs[allocnum]);
|
|
2759
|
|
2760 /* Set the number of bits that we really want. */
|
|
2761 live_subregs_used[allocnum] = size;
|
|
2762 }
|
|
2763
|
|
2764 /* Walk the insns of the current function and build reload_insn_chain,
|
|
2765 and record register life information. */
|
|
2766 static void
|
|
2767 build_insn_chain (void)
|
|
2768 {
|
|
2769 unsigned int i;
|
|
2770 struct insn_chain **p = &reload_insn_chain;
|
|
2771 basic_block bb;
|
|
2772 struct insn_chain *c = NULL;
|
|
2773 struct insn_chain *next = NULL;
|
|
2774 bitmap live_relevant_regs = BITMAP_ALLOC (NULL);
|
|
2775 bitmap elim_regset = BITMAP_ALLOC (NULL);
|
|
2776 /* live_subregs is a vector used to keep accurate information about
|
|
2777 which hardregs are live in multiword pseudos. live_subregs and
|
|
2778 live_subregs_used are indexed by pseudo number. The live_subreg
|
|
2779 entry for a particular pseudo is only used if the corresponding
|
|
2780 element is non zero in live_subregs_used. The value in
|
|
2781 live_subregs_used is number of bytes that the pseudo can
|
|
2782 occupy. */
|
|
2783 sbitmap *live_subregs = XCNEWVEC (sbitmap, max_regno);
|
|
2784 int *live_subregs_used = XNEWVEC (int, max_regno);
|
|
2785
|
|
2786 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
|
|
2787 if (TEST_HARD_REG_BIT (eliminable_regset, i))
|
|
2788 bitmap_set_bit (elim_regset, i);
|
|
2789 FOR_EACH_BB_REVERSE (bb)
|
|
2790 {
|
|
2791 bitmap_iterator bi;
|
|
2792 rtx insn;
|
|
2793
|
|
2794 CLEAR_REG_SET (live_relevant_regs);
|
|
2795 memset (live_subregs_used, 0, max_regno * sizeof (int));
|
|
2796
|
|
2797 EXECUTE_IF_SET_IN_BITMAP (df_get_live_out (bb), 0, i, bi)
|
|
2798 {
|
|
2799 if (i >= FIRST_PSEUDO_REGISTER)
|
|
2800 break;
|
|
2801 bitmap_set_bit (live_relevant_regs, i);
|
|
2802 }
|
|
2803
|
|
2804 EXECUTE_IF_SET_IN_BITMAP (df_get_live_out (bb),
|
|
2805 FIRST_PSEUDO_REGISTER, i, bi)
|
|
2806 {
|
|
2807 if (pseudo_for_reload_consideration_p (i))
|
|
2808 bitmap_set_bit (live_relevant_regs, i);
|
|
2809 }
|
|
2810
|
|
2811 FOR_BB_INSNS_REVERSE (bb, insn)
|
|
2812 {
|
|
2813 if (!NOTE_P (insn) && !BARRIER_P (insn))
|
|
2814 {
|
|
2815 unsigned int uid = INSN_UID (insn);
|
|
2816 df_ref *def_rec;
|
|
2817 df_ref *use_rec;
|
|
2818
|
|
2819 c = new_insn_chain ();
|
|
2820 c->next = next;
|
|
2821 next = c;
|
|
2822 *p = c;
|
|
2823 p = &c->prev;
|
|
2824
|
|
2825 c->insn = insn;
|
|
2826 c->block = bb->index;
|
|
2827
|
|
2828 if (INSN_P (insn))
|
|
2829 for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++)
|
|
2830 {
|
|
2831 df_ref def = *def_rec;
|
|
2832 unsigned int regno = DF_REF_REGNO (def);
|
|
2833
|
|
2834 /* Ignore may clobbers because these are generated
|
|
2835 from calls. However, every other kind of def is
|
|
2836 added to dead_or_set. */
|
|
2837 if (!DF_REF_FLAGS_IS_SET (def, DF_REF_MAY_CLOBBER))
|
|
2838 {
|
|
2839 if (regno < FIRST_PSEUDO_REGISTER)
|
|
2840 {
|
|
2841 if (!fixed_regs[regno])
|
|
2842 bitmap_set_bit (&c->dead_or_set, regno);
|
|
2843 }
|
|
2844 else if (pseudo_for_reload_consideration_p (regno))
|
|
2845 bitmap_set_bit (&c->dead_or_set, regno);
|
|
2846 }
|
|
2847
|
|
2848 if ((regno < FIRST_PSEUDO_REGISTER
|
|
2849 || reg_renumber[regno] >= 0
|
|
2850 || ira_conflicts_p)
|
|
2851 && (!DF_REF_FLAGS_IS_SET (def, DF_REF_CONDITIONAL)))
|
|
2852 {
|
|
2853 rtx reg = DF_REF_REG (def);
|
|
2854
|
|
2855 /* We can model subregs, but not if they are
|
|
2856 wrapped in ZERO_EXTRACTS. */
|
|
2857 if (GET_CODE (reg) == SUBREG
|
|
2858 && !DF_REF_FLAGS_IS_SET (def, DF_REF_ZERO_EXTRACT))
|
|
2859 {
|
|
2860 unsigned int start = SUBREG_BYTE (reg);
|
|
2861 unsigned int last = start
|
|
2862 + GET_MODE_SIZE (GET_MODE (reg));
|
|
2863
|
|
2864 init_live_subregs
|
|
2865 (bitmap_bit_p (live_relevant_regs, regno),
|
|
2866 live_subregs, live_subregs_used, regno, reg);
|
|
2867
|
|
2868 if (!DF_REF_FLAGS_IS_SET
|
|
2869 (def, DF_REF_STRICT_LOW_PART))
|
|
2870 {
|
|
2871 /* Expand the range to cover entire words.
|
|
2872 Bytes added here are "don't care". */
|
|
2873 start
|
|
2874 = start / UNITS_PER_WORD * UNITS_PER_WORD;
|
|
2875 last = ((last + UNITS_PER_WORD - 1)
|
|
2876 / UNITS_PER_WORD * UNITS_PER_WORD);
|
|
2877 }
|
|
2878
|
|
2879 /* Ignore the paradoxical bits. */
|
|
2880 if ((int)last > live_subregs_used[regno])
|
|
2881 last = live_subregs_used[regno];
|
|
2882
|
|
2883 while (start < last)
|
|
2884 {
|
|
2885 RESET_BIT (live_subregs[regno], start);
|
|
2886 start++;
|
|
2887 }
|
|
2888
|
|
2889 if (sbitmap_empty_p (live_subregs[regno]))
|
|
2890 {
|
|
2891 live_subregs_used[regno] = 0;
|
|
2892 bitmap_clear_bit (live_relevant_regs, regno);
|
|
2893 }
|
|
2894 else
|
|
2895 /* Set live_relevant_regs here because
|
|
2896 that bit has to be true to get us to
|
|
2897 look at the live_subregs fields. */
|
|
2898 bitmap_set_bit (live_relevant_regs, regno);
|
|
2899 }
|
|
2900 else
|
|
2901 {
|
|
2902 /* DF_REF_PARTIAL is generated for
|
|
2903 subregs, STRICT_LOW_PART, and
|
|
2904 ZERO_EXTRACT. We handle the subreg
|
|
2905 case above so here we have to keep from
|
|
2906 modeling the def as a killing def. */
|
|
2907 if (!DF_REF_FLAGS_IS_SET (def, DF_REF_PARTIAL))
|
|
2908 {
|
|
2909 bitmap_clear_bit (live_relevant_regs, regno);
|
|
2910 live_subregs_used[regno] = 0;
|
|
2911 }
|
|
2912 }
|
|
2913 }
|
|
2914 }
|
|
2915
|
|
2916 bitmap_and_compl_into (live_relevant_regs, elim_regset);
|
|
2917 bitmap_copy (&c->live_throughout, live_relevant_regs);
|
|
2918
|
|
2919 if (INSN_P (insn))
|
|
2920 for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++)
|
|
2921 {
|
|
2922 df_ref use = *use_rec;
|
|
2923 unsigned int regno = DF_REF_REGNO (use);
|
|
2924 rtx reg = DF_REF_REG (use);
|
|
2925
|
|
2926 /* DF_REF_READ_WRITE on a use means that this use
|
|
2927 is fabricated from a def that is a partial set
|
|
2928 to a multiword reg. Here, we only model the
|
|
2929 subreg case that is not wrapped in ZERO_EXTRACT
|
|
2930 precisely so we do not need to look at the
|
|
2931 fabricated use. */
|
|
2932 if (DF_REF_FLAGS_IS_SET (use, DF_REF_READ_WRITE)
|
|
2933 && !DF_REF_FLAGS_IS_SET (use, DF_REF_ZERO_EXTRACT)
|
|
2934 && DF_REF_FLAGS_IS_SET (use, DF_REF_SUBREG))
|
|
2935 continue;
|
|
2936
|
|
2937 /* Add the last use of each var to dead_or_set. */
|
|
2938 if (!bitmap_bit_p (live_relevant_regs, regno))
|
|
2939 {
|
|
2940 if (regno < FIRST_PSEUDO_REGISTER)
|
|
2941 {
|
|
2942 if (!fixed_regs[regno])
|
|
2943 bitmap_set_bit (&c->dead_or_set, regno);
|
|
2944 }
|
|
2945 else if (pseudo_for_reload_consideration_p (regno))
|
|
2946 bitmap_set_bit (&c->dead_or_set, regno);
|
|
2947 }
|
|
2948
|
|
2949 if (regno < FIRST_PSEUDO_REGISTER
|
|
2950 || pseudo_for_reload_consideration_p (regno))
|
|
2951 {
|
|
2952 if (GET_CODE (reg) == SUBREG
|
|
2953 && !DF_REF_FLAGS_IS_SET (use,
|
|
2954 DF_REF_SIGN_EXTRACT
|
|
2955 | DF_REF_ZERO_EXTRACT))
|
|
2956 {
|
|
2957 unsigned int start = SUBREG_BYTE (reg);
|
|
2958 unsigned int last = start
|
|
2959 + GET_MODE_SIZE (GET_MODE (reg));
|
|
2960
|
|
2961 init_live_subregs
|
|
2962 (bitmap_bit_p (live_relevant_regs, regno),
|
|
2963 live_subregs, live_subregs_used, regno, reg);
|
|
2964
|
|
2965 /* Ignore the paradoxical bits. */
|
|
2966 if ((int)last > live_subregs_used[regno])
|
|
2967 last = live_subregs_used[regno];
|
|
2968
|
|
2969 while (start < last)
|
|
2970 {
|
|
2971 SET_BIT (live_subregs[regno], start);
|
|
2972 start++;
|
|
2973 }
|
|
2974 }
|
|
2975 else
|
|
2976 /* Resetting the live_subregs_used is
|
|
2977 effectively saying do not use the subregs
|
|
2978 because we are reading the whole
|
|
2979 pseudo. */
|
|
2980 live_subregs_used[regno] = 0;
|
|
2981 bitmap_set_bit (live_relevant_regs, regno);
|
|
2982 }
|
|
2983 }
|
|
2984 }
|
|
2985 }
|
|
2986
|
|
2987 /* FIXME!! The following code is a disaster. Reload needs to see the
|
|
2988 labels and jump tables that are just hanging out in between
|
|
2989 the basic blocks. See pr33676. */
|
|
2990 insn = BB_HEAD (bb);
|
|
2991
|
|
2992 /* Skip over the barriers and cruft. */
|
|
2993 while (insn && (BARRIER_P (insn) || NOTE_P (insn)
|
|
2994 || BLOCK_FOR_INSN (insn) == bb))
|
|
2995 insn = PREV_INSN (insn);
|
|
2996
|
|
2997 /* While we add anything except barriers and notes, the focus is
|
|
2998 to get the labels and jump tables into the
|
|
2999 reload_insn_chain. */
|
|
3000 while (insn)
|
|
3001 {
|
|
3002 if (!NOTE_P (insn) && !BARRIER_P (insn))
|
|
3003 {
|
|
3004 if (BLOCK_FOR_INSN (insn))
|
|
3005 break;
|
|
3006
|
|
3007 c = new_insn_chain ();
|
|
3008 c->next = next;
|
|
3009 next = c;
|
|
3010 *p = c;
|
|
3011 p = &c->prev;
|
|
3012
|
|
3013 /* The block makes no sense here, but it is what the old
|
|
3014 code did. */
|
|
3015 c->block = bb->index;
|
|
3016 c->insn = insn;
|
|
3017 bitmap_copy (&c->live_throughout, live_relevant_regs);
|
|
3018 }
|
|
3019 insn = PREV_INSN (insn);
|
|
3020 }
|
|
3021 }
|
|
3022
|
|
3023 for (i = 0; i < (unsigned int) max_regno; i++)
|
|
3024 if (live_subregs[i])
|
|
3025 free (live_subregs[i]);
|
|
3026
|
|
3027 reload_insn_chain = c;
|
|
3028 *p = NULL;
|
|
3029
|
|
3030 free (live_subregs);
|
|
3031 free (live_subregs_used);
|
|
3032 BITMAP_FREE (live_relevant_regs);
|
|
3033 BITMAP_FREE (elim_regset);
|
|
3034
|
|
3035 if (dump_file)
|
|
3036 print_insn_chains (dump_file);
|
|
3037 }
|
|
3038
|
|
3039
|
|
3040
|
|
3041 /* All natural loops. */
|
|
3042 struct loops ira_loops;
|
|
3043
|
|
3044 /* True if we have allocno conflicts. It is false for non-optimized
|
|
3045 mode or when the conflict table is too big. */
|
|
3046 bool ira_conflicts_p;
|
|
3047
|
|
3048 /* This is the main entry of IRA. */
|
|
3049 static void
|
|
3050 ira (FILE *f)
|
|
3051 {
|
|
3052 int overall_cost_before, allocated_reg_info_size;
|
|
3053 bool loops_p;
|
|
3054 int max_regno_before_ira, ira_max_point_before_emit;
|
|
3055 int rebuild_p;
|
|
3056 int saved_flag_ira_share_spill_slots;
|
|
3057 basic_block bb;
|
|
3058
|
|
3059 timevar_push (TV_IRA);
|
|
3060
|
|
3061 if (flag_ira_verbose < 10)
|
|
3062 {
|
|
3063 internal_flag_ira_verbose = flag_ira_verbose;
|
|
3064 ira_dump_file = f;
|
|
3065 }
|
|
3066 else
|
|
3067 {
|
|
3068 internal_flag_ira_verbose = flag_ira_verbose - 10;
|
|
3069 ira_dump_file = stderr;
|
|
3070 }
|
|
3071
|
|
3072 ira_conflicts_p = optimize > 0;
|
|
3073 setup_prohibited_mode_move_regs ();
|
|
3074
|
|
3075 df_note_add_problem ();
|
|
3076
|
|
3077 if (optimize == 1)
|
|
3078 {
|
|
3079 df_live_add_problem ();
|
|
3080 df_live_set_all_dirty ();
|
|
3081 }
|
|
3082 #ifdef ENABLE_CHECKING
|
|
3083 df->changeable_flags |= DF_VERIFY_SCHEDULED;
|
|
3084 #endif
|
|
3085 df_analyze ();
|
|
3086 df_clear_flags (DF_NO_INSN_RESCAN);
|
|
3087 regstat_init_n_sets_and_refs ();
|
|
3088 regstat_compute_ri ();
|
|
3089
|
|
3090 /* If we are not optimizing, then this is the only place before
|
|
3091 register allocation where dataflow is done. And that is needed
|
|
3092 to generate these warnings. */
|
|
3093 if (warn_clobbered)
|
|
3094 generate_setjmp_warnings ();
|
|
3095
|
|
3096 /* Determine if the current function is a leaf before running IRA
|
|
3097 since this can impact optimizations done by the prologue and
|
|
3098 epilogue thus changing register elimination offsets. */
|
|
3099 current_function_is_leaf = leaf_function_p ();
|
|
3100
|
|
3101 rebuild_p = update_equiv_regs ();
|
|
3102
|
|
3103 #ifndef IRA_NO_OBSTACK
|
|
3104 gcc_obstack_init (&ira_obstack);
|
|
3105 #endif
|
|
3106 bitmap_obstack_initialize (&ira_bitmap_obstack);
|
|
3107 if (optimize)
|
|
3108 {
|
|
3109 max_regno = max_reg_num ();
|
|
3110 ira_reg_equiv_len = max_regno;
|
|
3111 ira_reg_equiv_invariant_p
|
|
3112 = (bool *) ira_allocate (max_regno * sizeof (bool));
|
|
3113 memset (ira_reg_equiv_invariant_p, 0, max_regno * sizeof (bool));
|
|
3114 ira_reg_equiv_const = (rtx *) ira_allocate (max_regno * sizeof (rtx));
|
|
3115 memset (ira_reg_equiv_const, 0, max_regno * sizeof (rtx));
|
|
3116 find_reg_equiv_invariant_const ();
|
|
3117 if (rebuild_p)
|
|
3118 {
|
|
3119 timevar_push (TV_JUMP);
|
|
3120 rebuild_jump_labels (get_insns ());
|
|
3121 purge_all_dead_edges ();
|
|
3122 timevar_pop (TV_JUMP);
|
|
3123 }
|
|
3124 }
|
|
3125
|
|
3126 max_regno_before_ira = allocated_reg_info_size = max_reg_num ();
|
|
3127 allocate_reg_info ();
|
|
3128 setup_eliminable_regset ();
|
|
3129
|
|
3130 ira_overall_cost = ira_reg_cost = ira_mem_cost = 0;
|
|
3131 ira_load_cost = ira_store_cost = ira_shuffle_cost = 0;
|
|
3132 ira_move_loops_num = ira_additional_jumps_num = 0;
|
|
3133
|
|
3134 ira_assert (current_loops == NULL);
|
|
3135 flow_loops_find (&ira_loops);
|
|
3136 current_loops = &ira_loops;
|
|
3137
|
|
3138 if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
|
|
3139 fprintf (ira_dump_file, "Building IRA IR\n");
|
|
3140 loops_p = ira_build (optimize
|
|
3141 && (flag_ira_region == IRA_REGION_ALL
|
|
3142 || flag_ira_region == IRA_REGION_MIXED));
|
|
3143
|
|
3144 ira_assert (ira_conflicts_p || !loops_p);
|
|
3145
|
|
3146 saved_flag_ira_share_spill_slots = flag_ira_share_spill_slots;
|
|
3147 if (too_high_register_pressure_p ())
|
|
3148 /* It is just wasting compiler's time to pack spilled pseudos into
|
|
3149 stack slots in this case -- prohibit it. */
|
|
3150 flag_ira_share_spill_slots = FALSE;
|
|
3151
|
|
3152 ira_color ();
|
|
3153
|
|
3154 ira_max_point_before_emit = ira_max_point;
|
|
3155
|
|
3156 ira_emit (loops_p);
|
|
3157
|
|
3158 if (ira_conflicts_p)
|
|
3159 {
|
|
3160 max_regno = max_reg_num ();
|
|
3161
|
|
3162 if (! loops_p)
|
|
3163 ira_initiate_assign ();
|
|
3164 else
|
|
3165 {
|
|
3166 expand_reg_info (allocated_reg_info_size);
|
|
3167 setup_preferred_alternate_classes_for_new_pseudos
|
|
3168 (allocated_reg_info_size);
|
|
3169 allocated_reg_info_size = max_regno;
|
|
3170
|
|
3171 if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL)
|
|
3172 fprintf (ira_dump_file, "Flattening IR\n");
|
|
3173 ira_flattening (max_regno_before_ira, ira_max_point_before_emit);
|
|
3174 /* New insns were generated: add notes and recalculate live
|
|
3175 info. */
|
|
3176 df_analyze ();
|
|
3177
|
|
3178 flow_loops_find (&ira_loops);
|
|
3179 current_loops = &ira_loops;
|
|
3180
|
|
3181 setup_allocno_assignment_flags ();
|
|
3182 ira_initiate_assign ();
|
|
3183 ira_reassign_conflict_allocnos (max_regno);
|
|
3184 }
|
|
3185 }
|
|
3186
|
|
3187 setup_reg_renumber ();
|
|
3188
|
|
3189 calculate_allocation_cost ();
|
|
3190
|
|
3191 #ifdef ENABLE_IRA_CHECKING
|
|
3192 if (ira_conflicts_p)
|
|
3193 check_allocation ();
|
|
3194 #endif
|
|
3195
|
|
3196 delete_trivially_dead_insns (get_insns (), max_reg_num ());
|
|
3197 max_regno = max_reg_num ();
|
|
3198
|
|
3199 /* And the reg_equiv_memory_loc array. */
|
|
3200 VEC_safe_grow (rtx, gc, reg_equiv_memory_loc_vec, max_regno);
|
|
3201 memset (VEC_address (rtx, reg_equiv_memory_loc_vec), 0,
|
|
3202 sizeof (rtx) * max_regno);
|
|
3203 reg_equiv_memory_loc = VEC_address (rtx, reg_equiv_memory_loc_vec);
|
|
3204
|
|
3205 if (max_regno != max_regno_before_ira)
|
|
3206 {
|
|
3207 regstat_free_n_sets_and_refs ();
|
|
3208 regstat_free_ri ();
|
|
3209 regstat_init_n_sets_and_refs ();
|
|
3210 regstat_compute_ri ();
|
|
3211 }
|
|
3212
|
|
3213 allocate_initial_values (reg_equiv_memory_loc);
|
|
3214
|
|
3215 overall_cost_before = ira_overall_cost;
|
|
3216 if (ira_conflicts_p)
|
|
3217 {
|
|
3218 fix_reg_equiv_init ();
|
|
3219
|
|
3220 #ifdef ENABLE_IRA_CHECKING
|
|
3221 print_redundant_copies ();
|
|
3222 #endif
|
|
3223
|
|
3224 ira_spilled_reg_stack_slots_num = 0;
|
|
3225 ira_spilled_reg_stack_slots
|
|
3226 = ((struct ira_spilled_reg_stack_slot *)
|
|
3227 ira_allocate (max_regno
|
|
3228 * sizeof (struct ira_spilled_reg_stack_slot)));
|
|
3229 memset (ira_spilled_reg_stack_slots, 0,
|
|
3230 max_regno * sizeof (struct ira_spilled_reg_stack_slot));
|
|
3231 }
|
|
3232
|
|
3233 timevar_pop (TV_IRA);
|
|
3234
|
|
3235 timevar_push (TV_RELOAD);
|
|
3236 df_set_flags (DF_NO_INSN_RESCAN);
|
|
3237 build_insn_chain ();
|
|
3238
|
|
3239 reload_completed = !reload (get_insns (), ira_conflicts_p);
|
|
3240
|
|
3241 timevar_pop (TV_RELOAD);
|
|
3242
|
|
3243 timevar_push (TV_IRA);
|
|
3244
|
|
3245 if (ira_conflicts_p)
|
|
3246 {
|
|
3247 ira_free (ira_spilled_reg_stack_slots);
|
|
3248
|
|
3249 ira_finish_assign ();
|
|
3250
|
|
3251 }
|
|
3252 if (internal_flag_ira_verbose > 0 && ira_dump_file != NULL
|
|
3253 && overall_cost_before != ira_overall_cost)
|
|
3254 fprintf (ira_dump_file, "+++Overall after reload %d\n", ira_overall_cost);
|
|
3255 ira_destroy ();
|
|
3256
|
|
3257 flag_ira_share_spill_slots = saved_flag_ira_share_spill_slots;
|
|
3258
|
|
3259 flow_loops_free (&ira_loops);
|
|
3260 free_dominance_info (CDI_DOMINATORS);
|
|
3261 FOR_ALL_BB (bb)
|
|
3262 bb->loop_father = NULL;
|
|
3263 current_loops = NULL;
|
|
3264
|
|
3265 regstat_free_ri ();
|
|
3266 regstat_free_n_sets_and_refs ();
|
|
3267
|
|
3268 if (optimize)
|
|
3269 {
|
|
3270 cleanup_cfg (CLEANUP_EXPENSIVE);
|
|
3271
|
|
3272 ira_free (ira_reg_equiv_invariant_p);
|
|
3273 ira_free (ira_reg_equiv_const);
|
|
3274 }
|
|
3275
|
|
3276 bitmap_obstack_release (&ira_bitmap_obstack);
|
|
3277 #ifndef IRA_NO_OBSTACK
|
|
3278 obstack_free (&ira_obstack, NULL);
|
|
3279 #endif
|
|
3280
|
|
3281 /* The code after the reload has changed so much that at this point
|
|
3282 we might as well just rescan everything. Not that
|
|
3283 df_rescan_all_insns is not going to help here because it does not
|
|
3284 touch the artificial uses and defs. */
|
|
3285 df_finish_pass (true);
|
|
3286 if (optimize > 1)
|
|
3287 df_live_add_problem ();
|
|
3288 df_scan_alloc (NULL);
|
|
3289 df_scan_blocks ();
|
|
3290
|
|
3291 if (optimize)
|
|
3292 df_analyze ();
|
|
3293
|
|
3294 timevar_pop (TV_IRA);
|
|
3295 }
|
|
3296
|
|
3297
|
|
3298
|
|
3299 static bool
|
|
3300 gate_ira (void)
|
|
3301 {
|
|
3302 return true;
|
|
3303 }
|
|
3304
|
|
3305 /* Run the integrated register allocator. */
|
|
3306 static unsigned int
|
|
3307 rest_of_handle_ira (void)
|
|
3308 {
|
|
3309 ira (dump_file);
|
|
3310 return 0;
|
|
3311 }
|
|
3312
|
|
3313 struct rtl_opt_pass pass_ira =
|
|
3314 {
|
|
3315 {
|
|
3316 RTL_PASS,
|
|
3317 "ira", /* name */
|
|
3318 gate_ira, /* gate */
|
|
3319 rest_of_handle_ira, /* execute */
|
|
3320 NULL, /* sub */
|
|
3321 NULL, /* next */
|
|
3322 0, /* static_pass_number */
|
|
3323 0, /* tv_id */
|
|
3324 0, /* properties_required */
|
|
3325 0, /* properties_provided */
|
|
3326 0, /* properties_destroyed */
|
|
3327 0, /* todo_flags_start */
|
|
3328 TODO_dump_func |
|
|
3329 TODO_ggc_collect /* todo_flags_finish */
|
|
3330 }
|
|
3331 };
|