Mercurial > hg > CbC > CbC_gcc
comparison gcc/lower-subreg.c @ 111:04ced10e8804
gcc 7
author | kono |
---|---|
date | Fri, 27 Oct 2017 22:46:09 +0900 |
parents | f6334be47118 |
children | 84e7813d76e9 |
comparison
equal
deleted
inserted
replaced
68:561a7518be6b | 111:04ced10e8804 |
---|---|
1 /* Decompose multiword subregs. | 1 /* Decompose multiword subregs. |
2 Copyright (C) 2007, 2008, 2009, 2010 Free Software Foundation, Inc. | 2 Copyright (C) 2007-2017 Free Software Foundation, Inc. |
3 Contributed by Richard Henderson <rth@redhat.com> | 3 Contributed by Richard Henderson <rth@redhat.com> |
4 Ian Lance Taylor <iant@google.com> | 4 Ian Lance Taylor <iant@google.com> |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
20 <http://www.gnu.org/licenses/>. */ | 20 <http://www.gnu.org/licenses/>. */ |
21 | 21 |
22 #include "config.h" | 22 #include "config.h" |
23 #include "system.h" | 23 #include "system.h" |
24 #include "coretypes.h" | 24 #include "coretypes.h" |
25 #include "machmode.h" | 25 #include "backend.h" |
26 #include "tm.h" | |
27 #include "rtl.h" | 26 #include "rtl.h" |
27 #include "tree.h" | |
28 #include "cfghooks.h" | |
29 #include "df.h" | |
30 #include "memmodel.h" | |
28 #include "tm_p.h" | 31 #include "tm_p.h" |
29 #include "timevar.h" | 32 #include "expmed.h" |
30 #include "flags.h" | |
31 #include "insn-config.h" | 33 #include "insn-config.h" |
32 #include "obstack.h" | 34 #include "emit-rtl.h" |
33 #include "basic-block.h" | |
34 #include "recog.h" | 35 #include "recog.h" |
35 #include "bitmap.h" | 36 #include "cfgrtl.h" |
37 #include "cfgbuild.h" | |
36 #include "dce.h" | 38 #include "dce.h" |
37 #include "expr.h" | 39 #include "expr.h" |
38 #include "except.h" | |
39 #include "regs.h" | |
40 #include "tree-pass.h" | 40 #include "tree-pass.h" |
41 #include "df.h" | 41 #include "lower-subreg.h" |
42 | 42 #include "rtl-iter.h" |
43 #ifdef STACK_GROWS_DOWNWARD | 43 #include "target.h" |
44 # undef STACK_GROWS_DOWNWARD | 44 |
45 # define STACK_GROWS_DOWNWARD 1 | |
46 #else | |
47 # define STACK_GROWS_DOWNWARD 0 | |
48 #endif | |
49 | |
50 DEF_VEC_P (bitmap); | |
51 DEF_VEC_ALLOC_P (bitmap,heap); | |
52 | 45 |
53 /* Decompose multi-word pseudo-registers into individual | 46 /* Decompose multi-word pseudo-registers into individual |
54 pseudo-registers when possible. This is possible when all the uses | 47 pseudo-registers when possible and profitable. This is possible |
55 of a multi-word register are via SUBREG, or are copies of the | 48 when all the uses of a multi-word register are via SUBREG, or are |
56 register to another location. Breaking apart the register permits | 49 copies of the register to another location. Breaking apart the |
57 more CSE and permits better register allocation. */ | 50 register permits more CSE and permits better register allocation. |
51 This is profitable if the machine does not have move instructions | |
52 to do this. | |
53 | |
54 This pass only splits moves with modes that are wider than | |
55 word_mode and ASHIFTs, LSHIFTRTs, ASHIFTRTs and ZERO_EXTENDs with | |
56 integer modes that are twice the width of word_mode. The latter | |
57 could be generalized if there was a need to do this, but the trend in | |
58 architectures is to not need this. | |
59 | |
60 There are two useful preprocessor defines for use by maintainers: | |
61 | |
62 #define LOG_COSTS 1 | |
63 | |
64 if you wish to see the actual cost estimates that are being used | |
65 for each mode wider than word mode and the cost estimates for zero | |
66 extension and the shifts. This can be useful when port maintainers | |
67 are tuning insn rtx costs. | |
68 | |
69 #define FORCE_LOWERING 1 | |
70 | |
71 if you wish to test the pass with all the transformation forced on. | |
72 This can be useful for finding bugs in the transformations. */ | |
73 | |
74 #define LOG_COSTS 0 | |
75 #define FORCE_LOWERING 0 | |
58 | 76 |
59 /* Bit N in this bitmap is set if regno N is used in a context in | 77 /* Bit N in this bitmap is set if regno N is used in a context in |
60 which we can decompose it. */ | 78 which we can decompose it. */ |
61 static bitmap decomposable_context; | 79 static bitmap decomposable_context; |
62 | 80 |
70 avoid generating accesses to its subwords in integer modes. */ | 88 avoid generating accesses to its subwords in integer modes. */ |
71 static bitmap subreg_context; | 89 static bitmap subreg_context; |
72 | 90 |
73 /* Bit N in the bitmap in element M of this array is set if there is a | 91 /* Bit N in the bitmap in element M of this array is set if there is a |
74 copy from reg M to reg N. */ | 92 copy from reg M to reg N. */ |
75 static VEC(bitmap,heap) *reg_copy_graph; | 93 static vec<bitmap> reg_copy_graph; |
76 | 94 |
77 /* Return whether X is a simple object which we can take a word_mode | 95 struct target_lower_subreg default_target_lower_subreg; |
78 subreg of. */ | 96 #if SWITCHABLE_TARGET |
97 struct target_lower_subreg *this_target_lower_subreg | |
98 = &default_target_lower_subreg; | |
99 #endif | |
100 | |
101 #define twice_word_mode \ | |
102 this_target_lower_subreg->x_twice_word_mode | |
103 #define choices \ | |
104 this_target_lower_subreg->x_choices | |
105 | |
106 /* RTXes used while computing costs. */ | |
107 struct cost_rtxes { | |
108 /* Source and target registers. */ | |
109 rtx source; | |
110 rtx target; | |
111 | |
112 /* A twice_word_mode ZERO_EXTEND of SOURCE. */ | |
113 rtx zext; | |
114 | |
115 /* A shift of SOURCE. */ | |
116 rtx shift; | |
117 | |
118 /* A SET of TARGET. */ | |
119 rtx set; | |
120 }; | |
121 | |
122 /* Return the cost of a CODE shift in mode MODE by OP1 bits, using the | |
123 rtxes in RTXES. SPEED_P selects between the speed and size cost. */ | |
124 | |
125 static int | |
126 shift_cost (bool speed_p, struct cost_rtxes *rtxes, enum rtx_code code, | |
127 machine_mode mode, int op1) | |
128 { | |
129 PUT_CODE (rtxes->shift, code); | |
130 PUT_MODE (rtxes->shift, mode); | |
131 PUT_MODE (rtxes->source, mode); | |
132 XEXP (rtxes->shift, 1) = GEN_INT (op1); | |
133 return set_src_cost (rtxes->shift, mode, speed_p); | |
134 } | |
135 | |
136 /* For each X in the range [0, BITS_PER_WORD), set SPLITTING[X] | |
137 to true if it is profitable to split a double-word CODE shift | |
138 of X + BITS_PER_WORD bits. SPEED_P says whether we are testing | |
139 for speed or size profitability. | |
140 | |
141 Use the rtxes in RTXES to calculate costs. WORD_MOVE_ZERO_COST is | |
142 the cost of moving zero into a word-mode register. WORD_MOVE_COST | |
143 is the cost of moving between word registers. */ | |
144 | |
145 static void | |
146 compute_splitting_shift (bool speed_p, struct cost_rtxes *rtxes, | |
147 bool *splitting, enum rtx_code code, | |
148 int word_move_zero_cost, int word_move_cost) | |
149 { | |
150 int wide_cost, narrow_cost, upper_cost, i; | |
151 | |
152 for (i = 0; i < BITS_PER_WORD; i++) | |
153 { | |
154 wide_cost = shift_cost (speed_p, rtxes, code, twice_word_mode, | |
155 i + BITS_PER_WORD); | |
156 if (i == 0) | |
157 narrow_cost = word_move_cost; | |
158 else | |
159 narrow_cost = shift_cost (speed_p, rtxes, code, word_mode, i); | |
160 | |
161 if (code != ASHIFTRT) | |
162 upper_cost = word_move_zero_cost; | |
163 else if (i == BITS_PER_WORD - 1) | |
164 upper_cost = word_move_cost; | |
165 else | |
166 upper_cost = shift_cost (speed_p, rtxes, code, word_mode, | |
167 BITS_PER_WORD - 1); | |
168 | |
169 if (LOG_COSTS) | |
170 fprintf (stderr, "%s %s by %d: original cost %d, split cost %d + %d\n", | |
171 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code), | |
172 i + BITS_PER_WORD, wide_cost, narrow_cost, upper_cost); | |
173 | |
174 if (FORCE_LOWERING || wide_cost >= narrow_cost + upper_cost) | |
175 splitting[i] = true; | |
176 } | |
177 } | |
178 | |
179 /* Compute what we should do when optimizing for speed or size; SPEED_P | |
180 selects which. Use RTXES for computing costs. */ | |
181 | |
182 static void | |
183 compute_costs (bool speed_p, struct cost_rtxes *rtxes) | |
184 { | |
185 unsigned int i; | |
186 int word_move_zero_cost, word_move_cost; | |
187 | |
188 PUT_MODE (rtxes->target, word_mode); | |
189 SET_SRC (rtxes->set) = CONST0_RTX (word_mode); | |
190 word_move_zero_cost = set_rtx_cost (rtxes->set, speed_p); | |
191 | |
192 SET_SRC (rtxes->set) = rtxes->source; | |
193 word_move_cost = set_rtx_cost (rtxes->set, speed_p); | |
194 | |
195 if (LOG_COSTS) | |
196 fprintf (stderr, "%s move: from zero cost %d, from reg cost %d\n", | |
197 GET_MODE_NAME (word_mode), word_move_zero_cost, word_move_cost); | |
198 | |
199 for (i = 0; i < MAX_MACHINE_MODE; i++) | |
200 { | |
201 machine_mode mode = (machine_mode) i; | |
202 int factor = GET_MODE_SIZE (mode) / UNITS_PER_WORD; | |
203 if (factor > 1) | |
204 { | |
205 int mode_move_cost; | |
206 | |
207 PUT_MODE (rtxes->target, mode); | |
208 PUT_MODE (rtxes->source, mode); | |
209 mode_move_cost = set_rtx_cost (rtxes->set, speed_p); | |
210 | |
211 if (LOG_COSTS) | |
212 fprintf (stderr, "%s move: original cost %d, split cost %d * %d\n", | |
213 GET_MODE_NAME (mode), mode_move_cost, | |
214 word_move_cost, factor); | |
215 | |
216 if (FORCE_LOWERING || mode_move_cost >= word_move_cost * factor) | |
217 { | |
218 choices[speed_p].move_modes_to_split[i] = true; | |
219 choices[speed_p].something_to_do = true; | |
220 } | |
221 } | |
222 } | |
223 | |
224 /* For the moves and shifts, the only case that is checked is one | |
225 where the mode of the target is an integer mode twice the width | |
226 of the word_mode. | |
227 | |
228 If it is not profitable to split a double word move then do not | |
229 even consider the shifts or the zero extension. */ | |
230 if (choices[speed_p].move_modes_to_split[(int) twice_word_mode]) | |
231 { | |
232 int zext_cost; | |
233 | |
234 /* The only case here to check to see if moving the upper part with a | |
235 zero is cheaper than doing the zext itself. */ | |
236 PUT_MODE (rtxes->source, word_mode); | |
237 zext_cost = set_src_cost (rtxes->zext, twice_word_mode, speed_p); | |
238 | |
239 if (LOG_COSTS) | |
240 fprintf (stderr, "%s %s: original cost %d, split cost %d + %d\n", | |
241 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (ZERO_EXTEND), | |
242 zext_cost, word_move_cost, word_move_zero_cost); | |
243 | |
244 if (FORCE_LOWERING || zext_cost >= word_move_cost + word_move_zero_cost) | |
245 choices[speed_p].splitting_zext = true; | |
246 | |
247 compute_splitting_shift (speed_p, rtxes, | |
248 choices[speed_p].splitting_ashift, ASHIFT, | |
249 word_move_zero_cost, word_move_cost); | |
250 compute_splitting_shift (speed_p, rtxes, | |
251 choices[speed_p].splitting_lshiftrt, LSHIFTRT, | |
252 word_move_zero_cost, word_move_cost); | |
253 compute_splitting_shift (speed_p, rtxes, | |
254 choices[speed_p].splitting_ashiftrt, ASHIFTRT, | |
255 word_move_zero_cost, word_move_cost); | |
256 } | |
257 } | |
258 | |
259 /* Do one-per-target initialisation. This involves determining | |
260 which operations on the machine are profitable. If none are found, | |
261 then the pass just returns when called. */ | |
262 | |
263 void | |
264 init_lower_subreg (void) | |
265 { | |
266 struct cost_rtxes rtxes; | |
267 | |
268 memset (this_target_lower_subreg, 0, sizeof (*this_target_lower_subreg)); | |
269 | |
270 twice_word_mode = GET_MODE_2XWIDER_MODE (word_mode).require (); | |
271 | |
272 rtxes.target = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 1); | |
273 rtxes.source = gen_rtx_REG (word_mode, LAST_VIRTUAL_REGISTER + 2); | |
274 rtxes.set = gen_rtx_SET (rtxes.target, rtxes.source); | |
275 rtxes.zext = gen_rtx_ZERO_EXTEND (twice_word_mode, rtxes.source); | |
276 rtxes.shift = gen_rtx_ASHIFT (twice_word_mode, rtxes.source, const0_rtx); | |
277 | |
278 if (LOG_COSTS) | |
279 fprintf (stderr, "\nSize costs\n==========\n\n"); | |
280 compute_costs (false, &rtxes); | |
281 | |
282 if (LOG_COSTS) | |
283 fprintf (stderr, "\nSpeed costs\n===========\n\n"); | |
284 compute_costs (true, &rtxes); | |
285 } | |
79 | 286 |
80 static bool | 287 static bool |
81 simple_move_operand (rtx x) | 288 simple_move_operand (rtx x) |
82 { | 289 { |
83 if (GET_CODE (x) == SUBREG) | 290 if (GET_CODE (x) == SUBREG) |
92 || GET_CODE (x) == CONST) | 299 || GET_CODE (x) == CONST) |
93 return false; | 300 return false; |
94 | 301 |
95 if (MEM_P (x) | 302 if (MEM_P (x) |
96 && (MEM_VOLATILE_P (x) | 303 && (MEM_VOLATILE_P (x) |
97 || mode_dependent_address_p (XEXP (x, 0)))) | 304 || mode_dependent_address_p (XEXP (x, 0), MEM_ADDR_SPACE (x)))) |
98 return false; | 305 return false; |
99 | 306 |
100 return true; | 307 return true; |
101 } | 308 } |
102 | 309 |
103 /* If INSN is a single set between two objects, return the single set. | 310 /* If INSN is a single set between two objects that we want to split, |
104 Such an insn can always be decomposed. INSN should have been | 311 return the single set. SPEED_P says whether we are optimizing |
105 passed to recog and extract_insn before this is called. */ | 312 INSN for speed or size. |
313 | |
314 INSN should have been passed to recog and extract_insn before this | |
315 is called. */ | |
106 | 316 |
107 static rtx | 317 static rtx |
108 simple_move (rtx insn) | 318 simple_move (rtx_insn *insn, bool speed_p) |
109 { | 319 { |
110 rtx x; | 320 rtx x; |
111 rtx set; | 321 rtx set; |
112 enum machine_mode mode; | 322 machine_mode mode; |
113 | 323 |
114 if (recog_data.n_operands != 2) | 324 if (recog_data.n_operands != 2) |
115 return NULL_RTX; | 325 return NULL_RTX; |
116 | 326 |
117 set = single_set (insn); | 327 set = single_set (insn); |
136 /* We try to decompose in integer modes, to avoid generating | 346 /* We try to decompose in integer modes, to avoid generating |
137 inefficient code copying between integer and floating point | 347 inefficient code copying between integer and floating point |
138 registers. That means that we can't decompose if this is a | 348 registers. That means that we can't decompose if this is a |
139 non-integer mode for which there is no integer mode of the same | 349 non-integer mode for which there is no integer mode of the same |
140 size. */ | 350 size. */ |
141 mode = GET_MODE (SET_SRC (set)); | 351 mode = GET_MODE (SET_DEST (set)); |
142 if (!SCALAR_INT_MODE_P (mode) | 352 if (!SCALAR_INT_MODE_P (mode) |
143 && (mode_for_size (GET_MODE_SIZE (mode) * BITS_PER_UNIT, MODE_INT, 0) | 353 && !int_mode_for_size (GET_MODE_BITSIZE (mode), 0).exists ()) |
144 == BLKmode)) | |
145 return NULL_RTX; | 354 return NULL_RTX; |
146 | 355 |
147 /* Reject PARTIAL_INT modes. They are used for processor specific | 356 /* Reject PARTIAL_INT modes. They are used for processor specific |
148 purposes and it's probably best not to tamper with them. */ | 357 purposes and it's probably best not to tamper with them. */ |
149 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) | 358 if (GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) |
150 return NULL_RTX; | 359 return NULL_RTX; |
151 | 360 |
361 if (!choices[speed_p].move_modes_to_split[(int) mode]) | |
362 return NULL_RTX; | |
363 | |
152 return set; | 364 return set; |
153 } | 365 } |
154 | 366 |
155 /* If SET is a copy from one multi-word pseudo-register to another, | 367 /* If SET is a copy from one multi-word pseudo-register to another, |
156 record that in reg_copy_graph. Return whether it is such a | 368 record that in reg_copy_graph. Return whether it is such a |
170 rd = REGNO (dest); | 382 rd = REGNO (dest); |
171 rs = REGNO (src); | 383 rs = REGNO (src); |
172 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) | 384 if (HARD_REGISTER_NUM_P (rd) || HARD_REGISTER_NUM_P (rs)) |
173 return false; | 385 return false; |
174 | 386 |
175 if (GET_MODE_SIZE (GET_MODE (dest)) <= UNITS_PER_WORD) | 387 b = reg_copy_graph[rs]; |
176 return false; | |
177 | |
178 b = VEC_index (bitmap, reg_copy_graph, rs); | |
179 if (b == NULL) | 388 if (b == NULL) |
180 { | 389 { |
181 b = BITMAP_ALLOC (NULL); | 390 b = BITMAP_ALLOC (NULL); |
182 VEC_replace (bitmap, reg_copy_graph, rs, b); | 391 reg_copy_graph[rs] = b; |
183 } | 392 } |
184 | 393 |
185 bitmap_set_bit (b, rd); | 394 bitmap_set_bit (b, rd); |
186 | 395 |
187 return true; | 396 return true; |
194 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ | 403 copies of registers which are in NON_DECOMPOSABLE_CONTEXT. */ |
195 | 404 |
196 static void | 405 static void |
197 propagate_pseudo_copies (void) | 406 propagate_pseudo_copies (void) |
198 { | 407 { |
199 bitmap queue, propagate; | 408 auto_bitmap queue, propagate; |
200 | |
201 queue = BITMAP_ALLOC (NULL); | |
202 propagate = BITMAP_ALLOC (NULL); | |
203 | 409 |
204 bitmap_copy (queue, decomposable_context); | 410 bitmap_copy (queue, decomposable_context); |
205 do | 411 do |
206 { | 412 { |
207 bitmap_iterator iter; | 413 bitmap_iterator iter; |
209 | 415 |
210 bitmap_clear (propagate); | 416 bitmap_clear (propagate); |
211 | 417 |
212 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) | 418 EXECUTE_IF_SET_IN_BITMAP (queue, 0, i, iter) |
213 { | 419 { |
214 bitmap b = VEC_index (bitmap, reg_copy_graph, i); | 420 bitmap b = reg_copy_graph[i]; |
215 if (b) | 421 if (b) |
216 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); | 422 bitmap_ior_and_compl_into (propagate, b, non_decomposable_context); |
217 } | 423 } |
218 | 424 |
219 bitmap_and_compl (queue, propagate, decomposable_context); | 425 bitmap_and_compl (queue, propagate, decomposable_context); |
220 bitmap_ior_into (decomposable_context, propagate); | 426 bitmap_ior_into (decomposable_context, propagate); |
221 } | 427 } |
222 while (!bitmap_empty_p (queue)); | 428 while (!bitmap_empty_p (queue)); |
223 | |
224 BITMAP_FREE (queue); | |
225 BITMAP_FREE (propagate); | |
226 } | 429 } |
227 | 430 |
228 /* A pointer to one of these values is passed to | 431 /* A pointer to one of these values is passed to |
229 find_decomposable_subregs via for_each_rtx. */ | 432 find_decomposable_subregs. */ |
230 | 433 |
231 enum classify_move_insn | 434 enum classify_move_insn |
232 { | 435 { |
233 /* Not a simple move from one location to another. */ | 436 /* Not a simple move from one location to another. */ |
234 NOT_SIMPLE_MOVE, | 437 NOT_SIMPLE_MOVE, |
235 /* A simple move from one pseudo-register to another. */ | 438 /* A simple move we want to decompose. */ |
236 SIMPLE_PSEUDO_REG_MOVE, | 439 DECOMPOSABLE_SIMPLE_MOVE, |
237 /* A simple move involving a non-pseudo-register. */ | 440 /* Any other simple move. */ |
238 SIMPLE_MOVE | 441 SIMPLE_MOVE |
239 }; | 442 }; |
240 | 443 |
241 /* This is called via for_each_rtx. If we find a SUBREG which we | 444 /* If we find a SUBREG in *LOC which we could use to decompose a |
242 could use to decompose a pseudo-register, set a bit in | 445 pseudo-register, set a bit in DECOMPOSABLE_CONTEXT. If we find an |
243 DECOMPOSABLE_CONTEXT. If we find an unadorned register which is | 446 unadorned register which is not a simple pseudo-register copy, |
244 not a simple pseudo-register copy, DATA will point at the type of | 447 DATA will point at the type of move, and we set a bit in |
245 move, and we set a bit in DECOMPOSABLE_CONTEXT or | 448 DECOMPOSABLE_CONTEXT or NON_DECOMPOSABLE_CONTEXT as appropriate. */ |
246 NON_DECOMPOSABLE_CONTEXT as appropriate. */ | 449 |
247 | 450 static void |
248 static int | 451 find_decomposable_subregs (rtx *loc, enum classify_move_insn *pcmi) |
249 find_decomposable_subregs (rtx *px, void *data) | 452 { |
250 { | 453 subrtx_var_iterator::array_type array; |
251 enum classify_move_insn *pcmi = (enum classify_move_insn *) data; | 454 FOR_EACH_SUBRTX_VAR (iter, array, *loc, NONCONST) |
252 rtx x = *px; | 455 { |
253 | 456 rtx x = *iter; |
254 if (x == NULL_RTX) | 457 if (GET_CODE (x) == SUBREG) |
255 return 0; | |
256 | |
257 if (GET_CODE (x) == SUBREG) | |
258 { | |
259 rtx inner = SUBREG_REG (x); | |
260 unsigned int regno, outer_size, inner_size, outer_words, inner_words; | |
261 | |
262 if (!REG_P (inner)) | |
263 return 0; | |
264 | |
265 regno = REGNO (inner); | |
266 if (HARD_REGISTER_NUM_P (regno)) | |
267 return -1; | |
268 | |
269 outer_size = GET_MODE_SIZE (GET_MODE (x)); | |
270 inner_size = GET_MODE_SIZE (GET_MODE (inner)); | |
271 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
272 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
273 | |
274 /* We only try to decompose single word subregs of multi-word | |
275 registers. When we find one, we return -1 to avoid iterating | |
276 over the inner register. | |
277 | |
278 ??? This doesn't allow, e.g., DImode subregs of TImode values | |
279 on 32-bit targets. We would need to record the way the | |
280 pseudo-register was used, and only decompose if all the uses | |
281 were the same number and size of pieces. Hopefully this | |
282 doesn't happen much. */ | |
283 | |
284 if (outer_words == 1 && inner_words > 1) | |
285 { | 458 { |
286 bitmap_set_bit (decomposable_context, regno); | 459 rtx inner = SUBREG_REG (x); |
287 return -1; | 460 unsigned int regno, outer_size, inner_size, outer_words, inner_words; |
288 } | 461 |
289 | 462 if (!REG_P (inner)) |
290 /* If this is a cast from one mode to another, where the modes | 463 continue; |
291 have the same size, and they are not tieable, then mark this | 464 |
292 register as non-decomposable. If we decompose it we are | 465 regno = REGNO (inner); |
293 likely to mess up whatever the backend is trying to do. */ | 466 if (HARD_REGISTER_NUM_P (regno)) |
294 if (outer_words > 1 | |
295 && outer_size == inner_size | |
296 && !MODES_TIEABLE_P (GET_MODE (x), GET_MODE (inner))) | |
297 { | |
298 bitmap_set_bit (non_decomposable_context, regno); | |
299 bitmap_set_bit (subreg_context, regno); | |
300 return -1; | |
301 } | |
302 } | |
303 else if (REG_P (x)) | |
304 { | |
305 unsigned int regno; | |
306 | |
307 /* We will see an outer SUBREG before we see the inner REG, so | |
308 when we see a plain REG here it means a direct reference to | |
309 the register. | |
310 | |
311 If this is not a simple copy from one location to another, | |
312 then we can not decompose this register. If this is a simple | |
313 copy from one pseudo-register to another, and the mode is right | |
314 then we mark the register as decomposable. | |
315 Otherwise we don't say anything about this register -- | |
316 it could be decomposed, but whether that would be | |
317 profitable depends upon how it is used elsewhere. | |
318 | |
319 We only set bits in the bitmap for multi-word | |
320 pseudo-registers, since those are the only ones we care about | |
321 and it keeps the size of the bitmaps down. */ | |
322 | |
323 regno = REGNO (x); | |
324 if (!HARD_REGISTER_NUM_P (regno) | |
325 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) | |
326 { | |
327 switch (*pcmi) | |
328 { | 467 { |
329 case NOT_SIMPLE_MOVE: | 468 iter.skip_subrtxes (); |
469 continue; | |
470 } | |
471 | |
472 outer_size = GET_MODE_SIZE (GET_MODE (x)); | |
473 inner_size = GET_MODE_SIZE (GET_MODE (inner)); | |
474 outer_words = (outer_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
475 inner_words = (inner_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | |
476 | |
477 /* We only try to decompose single word subregs of multi-word | |
478 registers. When we find one, we return -1 to avoid iterating | |
479 over the inner register. | |
480 | |
481 ??? This doesn't allow, e.g., DImode subregs of TImode values | |
482 on 32-bit targets. We would need to record the way the | |
483 pseudo-register was used, and only decompose if all the uses | |
484 were the same number and size of pieces. Hopefully this | |
485 doesn't happen much. */ | |
486 | |
487 if (outer_words == 1 && inner_words > 1) | |
488 { | |
489 bitmap_set_bit (decomposable_context, regno); | |
490 iter.skip_subrtxes (); | |
491 continue; | |
492 } | |
493 | |
494 /* If this is a cast from one mode to another, where the modes | |
495 have the same size, and they are not tieable, then mark this | |
496 register as non-decomposable. If we decompose it we are | |
497 likely to mess up whatever the backend is trying to do. */ | |
498 if (outer_words > 1 | |
499 && outer_size == inner_size | |
500 && !targetm.modes_tieable_p (GET_MODE (x), GET_MODE (inner))) | |
501 { | |
330 bitmap_set_bit (non_decomposable_context, regno); | 502 bitmap_set_bit (non_decomposable_context, regno); |
331 break; | 503 bitmap_set_bit (subreg_context, regno); |
332 case SIMPLE_PSEUDO_REG_MOVE: | 504 iter.skip_subrtxes (); |
333 if (MODES_TIEABLE_P (GET_MODE (x), word_mode)) | 505 continue; |
334 bitmap_set_bit (decomposable_context, regno); | |
335 break; | |
336 case SIMPLE_MOVE: | |
337 break; | |
338 default: | |
339 gcc_unreachable (); | |
340 } | 506 } |
341 } | 507 } |
342 } | 508 else if (REG_P (x)) |
343 else if (MEM_P (x)) | 509 { |
344 { | 510 unsigned int regno; |
345 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; | 511 |
346 | 512 /* We will see an outer SUBREG before we see the inner REG, so |
347 /* Any registers used in a MEM do not participate in a | 513 when we see a plain REG here it means a direct reference to |
348 SIMPLE_MOVE or SIMPLE_PSEUDO_REG_MOVE. Do our own recursion | 514 the register. |
349 here, and return -1 to block the parent's recursion. */ | 515 |
350 for_each_rtx (&XEXP (x, 0), find_decomposable_subregs, &cmi_mem); | 516 If this is not a simple copy from one location to another, |
351 return -1; | 517 then we can not decompose this register. If this is a simple |
352 } | 518 copy we want to decompose, and the mode is right, |
353 | 519 then we mark the register as decomposable. |
354 return 0; | 520 Otherwise we don't say anything about this register -- |
521 it could be decomposed, but whether that would be | |
522 profitable depends upon how it is used elsewhere. | |
523 | |
524 We only set bits in the bitmap for multi-word | |
525 pseudo-registers, since those are the only ones we care about | |
526 and it keeps the size of the bitmaps down. */ | |
527 | |
528 regno = REGNO (x); | |
529 if (!HARD_REGISTER_NUM_P (regno) | |
530 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD) | |
531 { | |
532 switch (*pcmi) | |
533 { | |
534 case NOT_SIMPLE_MOVE: | |
535 bitmap_set_bit (non_decomposable_context, regno); | |
536 break; | |
537 case DECOMPOSABLE_SIMPLE_MOVE: | |
538 if (targetm.modes_tieable_p (GET_MODE (x), word_mode)) | |
539 bitmap_set_bit (decomposable_context, regno); | |
540 break; | |
541 case SIMPLE_MOVE: | |
542 break; | |
543 default: | |
544 gcc_unreachable (); | |
545 } | |
546 } | |
547 } | |
548 else if (MEM_P (x)) | |
549 { | |
550 enum classify_move_insn cmi_mem = NOT_SIMPLE_MOVE; | |
551 | |
552 /* Any registers used in a MEM do not participate in a | |
553 SIMPLE_MOVE or DECOMPOSABLE_SIMPLE_MOVE. Do our own recursion | |
554 here, and return -1 to block the parent's recursion. */ | |
555 find_decomposable_subregs (&XEXP (x, 0), &cmi_mem); | |
556 iter.skip_subrtxes (); | |
557 } | |
558 } | |
355 } | 559 } |
356 | 560 |
357 /* Decompose REGNO into word-sized components. We smash the REG node | 561 /* Decompose REGNO into word-sized components. We smash the REG node |
358 in place. This ensures that (1) something goes wrong quickly if we | 562 in place. This ensures that (1) something goes wrong quickly if we |
359 fail to make some replacement, and (2) the debug information inside | 563 fail to make some replacement, and (2) the debug information inside |
390 } | 594 } |
391 | 595 |
392 /* Get a SUBREG of a CONCATN. */ | 596 /* Get a SUBREG of a CONCATN. */ |
393 | 597 |
394 static rtx | 598 static rtx |
395 simplify_subreg_concatn (enum machine_mode outermode, rtx op, | 599 simplify_subreg_concatn (machine_mode outermode, rtx op, |
396 unsigned int byte) | 600 unsigned int byte) |
397 { | 601 { |
398 unsigned int inner_size; | 602 unsigned int inner_size; |
399 enum machine_mode innermode, partmode; | 603 machine_mode innermode, partmode; |
400 rtx part; | 604 rtx part; |
401 unsigned int final_offset; | 605 unsigned int final_offset; |
402 | 606 |
403 gcc_assert (GET_CODE (op) == CONCATN); | 607 gcc_assert (GET_CODE (op) == CONCATN); |
404 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); | 608 gcc_assert (byte % GET_MODE_SIZE (outermode) == 0); |
405 | 609 |
406 innermode = GET_MODE (op); | 610 innermode = GET_MODE (op); |
407 gcc_assert (byte < GET_MODE_SIZE (innermode)); | 611 gcc_assert (byte < GET_MODE_SIZE (innermode)); |
408 gcc_assert (GET_MODE_SIZE (outermode) <= GET_MODE_SIZE (innermode)); | 612 if (GET_MODE_SIZE (outermode) > GET_MODE_SIZE (innermode)) |
613 return NULL_RTX; | |
409 | 614 |
410 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); | 615 inner_size = GET_MODE_SIZE (innermode) / XVECLEN (op, 0); |
411 part = XVECEXP (op, 0, byte / inner_size); | 616 part = XVECEXP (op, 0, byte / inner_size); |
412 partmode = GET_MODE (part); | 617 partmode = GET_MODE (part); |
618 | |
619 final_offset = byte % inner_size; | |
620 if (final_offset + GET_MODE_SIZE (outermode) > inner_size) | |
621 return NULL_RTX; | |
413 | 622 |
414 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of | 623 /* VECTOR_CSTs in debug expressions are expanded into CONCATN instead of |
415 regular CONST_VECTORs. They have vector or integer modes, depending | 624 regular CONST_VECTORs. They have vector or integer modes, depending |
416 on the capabilities of the target. Cope with them. */ | 625 on the capabilities of the target. Cope with them. */ |
417 if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) | 626 if (partmode == VOIDmode && VECTOR_MODE_P (innermode)) |
418 partmode = GET_MODE_INNER (innermode); | 627 partmode = GET_MODE_INNER (innermode); |
419 else if (partmode == VOIDmode) | 628 else if (partmode == VOIDmode) |
420 { | 629 partmode = mode_for_size (inner_size * BITS_PER_UNIT, |
421 enum mode_class mclass = GET_MODE_CLASS (innermode); | 630 GET_MODE_CLASS (innermode), 0).require (); |
422 partmode = mode_for_size (inner_size * BITS_PER_UNIT, mclass, 0); | |
423 } | |
424 | |
425 final_offset = byte % inner_size; | |
426 if (final_offset + GET_MODE_SIZE (outermode) > inner_size) | |
427 return NULL_RTX; | |
428 | 631 |
429 return simplify_gen_subreg (outermode, part, partmode, final_offset); | 632 return simplify_gen_subreg (outermode, part, partmode, final_offset); |
430 } | 633 } |
431 | 634 |
432 /* Wrapper around simplify_gen_subreg which handles CONCATN. */ | 635 /* Wrapper around simplify_gen_subreg which handles CONCATN. */ |
433 | 636 |
434 static rtx | 637 static rtx |
435 simplify_gen_subreg_concatn (enum machine_mode outermode, rtx op, | 638 simplify_gen_subreg_concatn (machine_mode outermode, rtx op, |
436 enum machine_mode innermode, unsigned int byte) | 639 machine_mode innermode, unsigned int byte) |
437 { | 640 { |
438 rtx ret; | 641 rtx ret; |
439 | 642 |
440 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. | 643 /* We have to handle generating a SUBREG of a SUBREG of a CONCATN. |
441 If OP is a SUBREG of a CONCATN, then it must be a simple mode | 644 If OP is a SUBREG of a CONCATN, then it must be a simple mode |
454 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), | 657 op2 = simplify_subreg_concatn (GET_MODE (op), SUBREG_REG (op), |
455 SUBREG_BYTE (op)); | 658 SUBREG_BYTE (op)); |
456 if (op2 == NULL_RTX) | 659 if (op2 == NULL_RTX) |
457 { | 660 { |
458 /* We don't handle paradoxical subregs here. */ | 661 /* We don't handle paradoxical subregs here. */ |
459 gcc_assert (GET_MODE_SIZE (outermode) | 662 gcc_assert (!paradoxical_subreg_p (outermode, GET_MODE (op))); |
460 <= GET_MODE_SIZE (GET_MODE (op))); | 663 gcc_assert (!paradoxical_subreg_p (op)); |
461 gcc_assert (GET_MODE_SIZE (GET_MODE (op)) | |
462 <= GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))); | |
463 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), | 664 op2 = simplify_subreg_concatn (outermode, SUBREG_REG (op), |
464 byte + SUBREG_BYTE (op)); | 665 byte + SUBREG_BYTE (op)); |
465 gcc_assert (op2 != NULL_RTX); | 666 gcc_assert (op2 != NULL_RTX); |
466 return op2; | 667 return op2; |
467 } | 668 } |
478 | 679 |
479 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then | 680 /* If we see an insn like (set (reg:DI) (subreg:DI (reg:SI) 0)) then |
480 resolve_simple_move will ask for the high part of the paradoxical | 681 resolve_simple_move will ask for the high part of the paradoxical |
481 subreg, which does not have a value. Just return a zero. */ | 682 subreg, which does not have a value. Just return a zero. */ |
482 if (ret == NULL_RTX | 683 if (ret == NULL_RTX |
483 && GET_CODE (op) == SUBREG | 684 && paradoxical_subreg_p (op)) |
484 && SUBREG_BYTE (op) == 0 | |
485 && (GET_MODE_SIZE (innermode) | |
486 > GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))))) | |
487 return CONST0_RTX (outermode); | 685 return CONST0_RTX (outermode); |
488 | 686 |
489 gcc_assert (ret != NULL_RTX); | 687 gcc_assert (ret != NULL_RTX); |
490 return ret; | 688 return ret; |
491 } | 689 } |
508 if (GET_CODE (x) != SUBREG) | 706 if (GET_CODE (x) != SUBREG) |
509 return false; | 707 return false; |
510 return resolve_reg_p (SUBREG_REG (x)); | 708 return resolve_reg_p (SUBREG_REG (x)); |
511 } | 709 } |
512 | 710 |
513 /* This is called via for_each_rtx. Look for SUBREGs which need to be | 711 /* Look for SUBREGs in *LOC which need to be decomposed. */ |
514 decomposed. */ | 712 |
515 | 713 static bool |
516 static int | 714 resolve_subreg_use (rtx *loc, rtx insn) |
517 resolve_subreg_use (rtx *px, void *data) | 715 { |
518 { | 716 subrtx_ptr_iterator::array_type array; |
519 rtx insn = (rtx) data; | 717 FOR_EACH_SUBRTX_PTR (iter, array, loc, NONCONST) |
520 rtx x = *px; | 718 { |
521 | 719 rtx *loc = *iter; |
522 if (x == NULL_RTX) | 720 rtx x = *loc; |
523 return 0; | 721 if (resolve_subreg_p (x)) |
524 | |
525 if (resolve_subreg_p (x)) | |
526 { | |
527 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), | |
528 SUBREG_BYTE (x)); | |
529 | |
530 /* It is possible for a note to contain a reference which we can | |
531 decompose. In this case, return 1 to the caller to indicate | |
532 that the note must be removed. */ | |
533 if (!x) | |
534 { | 722 { |
535 gcc_assert (!insn); | 723 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), |
536 return 1; | 724 SUBREG_BYTE (x)); |
725 | |
726 /* It is possible for a note to contain a reference which we can | |
727 decompose. In this case, return 1 to the caller to indicate | |
728 that the note must be removed. */ | |
729 if (!x) | |
730 { | |
731 gcc_assert (!insn); | |
732 return true; | |
733 } | |
734 | |
735 validate_change (insn, loc, x, 1); | |
736 iter.skip_subrtxes (); | |
537 } | 737 } |
538 | 738 else if (resolve_reg_p (x)) |
539 validate_change (insn, px, x, 1); | 739 /* Return 1 to the caller to indicate that we found a direct |
540 return -1; | 740 reference to a register which is being decomposed. This can |
541 } | 741 happen inside notes, multiword shift or zero-extend |
542 | 742 instructions. */ |
543 if (resolve_reg_p (x)) | 743 return true; |
544 { | 744 } |
545 /* Return 1 to the caller to indicate that we found a direct | 745 |
546 reference to a register which is being decomposed. This can | 746 return false; |
547 happen inside notes, multiword shift or zero-extend | |
548 instructions. */ | |
549 return 1; | |
550 } | |
551 | |
552 return 0; | |
553 } | |
554 | |
555 /* This is called via for_each_rtx. Look for SUBREGs which can be | |
556 decomposed and decomposed REGs that need copying. */ | |
557 | |
558 static int | |
559 adjust_decomposed_uses (rtx *px, void *data ATTRIBUTE_UNUSED) | |
560 { | |
561 rtx x = *px; | |
562 | |
563 if (x == NULL_RTX) | |
564 return 0; | |
565 | |
566 if (resolve_subreg_p (x)) | |
567 { | |
568 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), | |
569 SUBREG_BYTE (x)); | |
570 | |
571 if (x) | |
572 *px = x; | |
573 else | |
574 x = copy_rtx (*px); | |
575 } | |
576 | |
577 if (resolve_reg_p (x)) | |
578 *px = copy_rtx (x); | |
579 | |
580 return 0; | |
581 } | 747 } |
582 | 748 |
583 /* Resolve any decomposed registers which appear in register notes on | 749 /* Resolve any decomposed registers which appear in register notes on |
584 INSN. */ | 750 INSN. */ |
585 | 751 |
586 static void | 752 static void |
587 resolve_reg_notes (rtx insn) | 753 resolve_reg_notes (rtx_insn *insn) |
588 { | 754 { |
589 rtx *pnote, note; | 755 rtx *pnote, note; |
590 | 756 |
591 note = find_reg_equal_equiv_note (insn); | 757 note = find_reg_equal_equiv_note (insn); |
592 if (note) | 758 if (note) |
593 { | 759 { |
594 int old_count = num_validated_changes (); | 760 int old_count = num_validated_changes (); |
595 if (for_each_rtx (&XEXP (note, 0), resolve_subreg_use, NULL)) | 761 if (resolve_subreg_use (&XEXP (note, 0), NULL_RTX)) |
596 remove_note (insn, note); | 762 remove_note (insn, note); |
597 else | 763 else |
598 if (old_count != num_validated_changes ()) | 764 if (old_count != num_validated_changes ()) |
599 df_notes_rescan (insn); | 765 df_notes_rescan (insn); |
600 } | 766 } |
632 if (REG_P (x)) | 798 if (REG_P (x)) |
633 { | 799 { |
634 unsigned int regno = REGNO (x); | 800 unsigned int regno = REGNO (x); |
635 | 801 |
636 if (HARD_REGISTER_NUM_P (regno)) | 802 if (HARD_REGISTER_NUM_P (regno)) |
637 return (validate_subreg (word_mode, GET_MODE (x), x, UNITS_PER_WORD) | 803 { |
638 && HARD_REGNO_MODE_OK (regno, word_mode)); | 804 unsigned int byte, num_bytes; |
805 | |
806 num_bytes = GET_MODE_SIZE (GET_MODE (x)); | |
807 for (byte = 0; byte < num_bytes; byte += UNITS_PER_WORD) | |
808 if (simplify_subreg_regno (regno, GET_MODE (x), byte, word_mode) < 0) | |
809 return false; | |
810 return true; | |
811 } | |
639 else | 812 else |
640 return !bitmap_bit_p (subreg_context, regno); | 813 return !bitmap_bit_p (subreg_context, regno); |
641 } | 814 } |
642 | 815 |
643 return true; | 816 return true; |
645 | 818 |
646 /* Decompose the registers used in a simple move SET within INSN. If | 819 /* Decompose the registers used in a simple move SET within INSN. If |
647 we don't change anything, return INSN, otherwise return the start | 820 we don't change anything, return INSN, otherwise return the start |
648 of the sequence of moves. */ | 821 of the sequence of moves. */ |
649 | 822 |
650 static rtx | 823 static rtx_insn * |
651 resolve_simple_move (rtx set, rtx insn) | 824 resolve_simple_move (rtx set, rtx_insn *insn) |
652 { | 825 { |
653 rtx src, dest, real_dest, insns; | 826 rtx src, dest, real_dest; |
654 enum machine_mode orig_mode, dest_mode; | 827 rtx_insn *insns; |
828 machine_mode orig_mode, dest_mode; | |
655 unsigned int words; | 829 unsigned int words; |
656 bool pushing; | 830 bool pushing; |
657 | 831 |
658 src = SET_SRC (set); | 832 src = SET_SRC (set); |
659 dest = SET_DEST (set); | 833 dest = SET_DEST (set); |
660 orig_mode = GET_MODE (dest); | 834 orig_mode = GET_MODE (dest); |
661 | 835 |
662 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; | 836 words = (GET_MODE_SIZE (orig_mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD; |
663 if (words <= 1) | 837 gcc_assert (words > 1); |
664 return insn; | |
665 | 838 |
666 start_sequence (); | 839 start_sequence (); |
667 | 840 |
668 /* We have to handle copying from a SUBREG of a decomposed reg where | 841 /* We have to handle copying from a SUBREG of a decomposed reg where |
669 the SUBREG is larger than word size. Rather than assume that we | 842 the SUBREG is larger than word size. Rather than assume that we |
691 && resolve_reg_p (SUBREG_REG (dest)) | 864 && resolve_reg_p (SUBREG_REG (dest)) |
692 && (SUBREG_BYTE (dest) != 0 | 865 && (SUBREG_BYTE (dest) != 0 |
693 || (GET_MODE_SIZE (orig_mode) | 866 || (GET_MODE_SIZE (orig_mode) |
694 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) | 867 != GET_MODE_SIZE (GET_MODE (SUBREG_REG (dest)))))) |
695 { | 868 { |
696 rtx reg, minsn, smove; | 869 rtx reg, smove; |
870 rtx_insn *minsn; | |
697 | 871 |
698 reg = gen_reg_rtx (orig_mode); | 872 reg = gen_reg_rtx (orig_mode); |
699 minsn = emit_move_insn (reg, src); | 873 minsn = emit_move_insn (reg, src); |
700 smove = single_set (minsn); | 874 smove = single_set (minsn); |
701 gcc_assert (smove != NULL_RTX); | 875 gcc_assert (smove != NULL_RTX); |
726 if (MEM_P (src) || MEM_P (dest)) | 900 if (MEM_P (src) || MEM_P (dest)) |
727 { | 901 { |
728 int acg; | 902 int acg; |
729 | 903 |
730 if (MEM_P (src)) | 904 if (MEM_P (src)) |
731 for_each_rtx (&XEXP (src, 0), resolve_subreg_use, NULL_RTX); | 905 resolve_subreg_use (&XEXP (src, 0), NULL_RTX); |
732 if (MEM_P (dest)) | 906 if (MEM_P (dest)) |
733 for_each_rtx (&XEXP (dest, 0), resolve_subreg_use, NULL_RTX); | 907 resolve_subreg_use (&XEXP (dest, 0), NULL_RTX); |
734 acg = apply_change_group (); | 908 acg = apply_change_group (); |
735 gcc_assert (acg); | 909 gcc_assert (acg); |
736 } | 910 } |
737 | 911 |
738 /* If SRC is a register which we can't decompose, or has side | 912 /* If SRC is a register which we can't decompose, or has side |
743 || GET_CODE (src) == ASM_OPERANDS) | 917 || GET_CODE (src) == ASM_OPERANDS) |
744 { | 918 { |
745 rtx reg; | 919 rtx reg; |
746 | 920 |
747 reg = gen_reg_rtx (orig_mode); | 921 reg = gen_reg_rtx (orig_mode); |
748 emit_move_insn (reg, src); | 922 |
923 if (AUTO_INC_DEC) | |
924 { | |
925 rtx_insn *move = emit_move_insn (reg, src); | |
926 if (MEM_P (src)) | |
927 { | |
928 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); | |
929 if (note) | |
930 add_reg_note (move, REG_INC, XEXP (note, 0)); | |
931 } | |
932 } | |
933 else | |
934 emit_move_insn (reg, src); | |
935 | |
749 src = reg; | 936 src = reg; |
750 } | 937 } |
751 | 938 |
752 /* If DEST is a register which we can't decompose, or has side | 939 /* If DEST is a register which we can't decompose, or has side |
753 effects, we need to first move to a temporary register. We | 940 effects, we need to first move to a temporary register. We |
765 && !resolve_subreg_p (dest))) | 952 && !resolve_subreg_p (dest))) |
766 { | 953 { |
767 if (real_dest == NULL_RTX) | 954 if (real_dest == NULL_RTX) |
768 real_dest = dest; | 955 real_dest = dest; |
769 if (!SCALAR_INT_MODE_P (dest_mode)) | 956 if (!SCALAR_INT_MODE_P (dest_mode)) |
770 { | 957 dest_mode = int_mode_for_mode (dest_mode).require (); |
771 dest_mode = mode_for_size (GET_MODE_SIZE (dest_mode) * BITS_PER_UNIT, | |
772 MODE_INT, 0); | |
773 gcc_assert (dest_mode != BLKmode); | |
774 } | |
775 dest = gen_reg_rtx (dest_mode); | 958 dest = gen_reg_rtx (dest_mode); |
776 if (REG_P (real_dest)) | 959 if (REG_P (real_dest)) |
777 REG_ATTRS (dest) = REG_ATTRS (real_dest); | 960 REG_ATTRS (dest) = REG_ATTRS (real_dest); |
778 } | 961 } |
779 | 962 |
825 i * UNITS_PER_WORD)); | 1008 i * UNITS_PER_WORD)); |
826 } | 1009 } |
827 | 1010 |
828 if (real_dest != NULL_RTX) | 1011 if (real_dest != NULL_RTX) |
829 { | 1012 { |
830 rtx mdest, minsn, smove; | 1013 rtx mdest, smove; |
1014 rtx_insn *minsn; | |
831 | 1015 |
832 if (dest_mode == orig_mode) | 1016 if (dest_mode == orig_mode) |
833 mdest = dest; | 1017 mdest = dest; |
834 else | 1018 else |
835 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); | 1019 mdest = simplify_gen_subreg (orig_mode, dest, GET_MODE (dest), 0); |
836 minsn = emit_move_insn (real_dest, mdest); | 1020 minsn = emit_move_insn (real_dest, mdest); |
837 | 1021 |
1022 if (AUTO_INC_DEC && MEM_P (real_dest) | |
1023 && !(resolve_reg_p (real_dest) || resolve_subreg_p (real_dest))) | |
1024 { | |
1025 rtx note = find_reg_note (insn, REG_INC, NULL_RTX); | |
1026 if (note) | |
1027 add_reg_note (minsn, REG_INC, XEXP (note, 0)); | |
1028 } | |
1029 | |
838 smove = single_set (minsn); | 1030 smove = single_set (minsn); |
839 gcc_assert (smove != NULL_RTX); | 1031 gcc_assert (smove != NULL_RTX); |
840 | 1032 |
841 resolve_simple_move (smove, minsn); | 1033 resolve_simple_move (smove, minsn); |
842 } | 1034 } |
846 | 1038 |
847 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); | 1039 copy_reg_eh_region_note_forward (insn, insns, NULL_RTX); |
848 | 1040 |
849 emit_insn_before (insns, insn); | 1041 emit_insn_before (insns, insn); |
850 | 1042 |
851 delete_insn (insn); | 1043 /* If we get here via self-recursion, then INSN is not yet in the insns |
1044 chain and delete_insn will fail. We only want to remove INSN from the | |
1045 current sequence. See PR56738. */ | |
1046 if (in_sequence_p ()) | |
1047 remove_insn (insn); | |
1048 else | |
1049 delete_insn (insn); | |
852 | 1050 |
853 return insns; | 1051 return insns; |
854 } | 1052 } |
855 | 1053 |
856 /* Change a CLOBBER of a decomposed register into a CLOBBER of the | 1054 /* Change a CLOBBER of a decomposed register into a CLOBBER of the |
857 component registers. Return whether we changed something. */ | 1055 component registers. Return whether we changed something. */ |
858 | 1056 |
859 static bool | 1057 static bool |
860 resolve_clobber (rtx pat, rtx insn) | 1058 resolve_clobber (rtx pat, rtx_insn *insn) |
861 { | 1059 { |
862 rtx reg; | 1060 rtx reg; |
863 enum machine_mode orig_mode; | 1061 machine_mode orig_mode; |
864 unsigned int words, i; | 1062 unsigned int words, i; |
865 int ret; | 1063 int ret; |
866 | 1064 |
867 reg = XEXP (pat, 0); | 1065 reg = XEXP (pat, 0); |
868 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) | 1066 if (!resolve_reg_p (reg) && !resolve_subreg_p (reg)) |
896 | 1094 |
897 /* A USE of a decomposed register is no longer meaningful. Return | 1095 /* A USE of a decomposed register is no longer meaningful. Return |
898 whether we changed something. */ | 1096 whether we changed something. */ |
899 | 1097 |
900 static bool | 1098 static bool |
901 resolve_use (rtx pat, rtx insn) | 1099 resolve_use (rtx pat, rtx_insn *insn) |
902 { | 1100 { |
903 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) | 1101 if (resolve_reg_p (XEXP (pat, 0)) || resolve_subreg_p (XEXP (pat, 0))) |
904 { | 1102 { |
905 delete_insn (insn); | 1103 delete_insn (insn); |
906 return true; | 1104 return true; |
912 } | 1110 } |
913 | 1111 |
914 /* A VAR_LOCATION can be simplified. */ | 1112 /* A VAR_LOCATION can be simplified. */ |
915 | 1113 |
916 static void | 1114 static void |
917 resolve_debug (rtx insn) | 1115 resolve_debug (rtx_insn *insn) |
918 { | 1116 { |
919 for_each_rtx (&PATTERN (insn), adjust_decomposed_uses, NULL_RTX); | 1117 subrtx_ptr_iterator::array_type array; |
1118 FOR_EACH_SUBRTX_PTR (iter, array, &PATTERN (insn), NONCONST) | |
1119 { | |
1120 rtx *loc = *iter; | |
1121 rtx x = *loc; | |
1122 if (resolve_subreg_p (x)) | |
1123 { | |
1124 x = simplify_subreg_concatn (GET_MODE (x), SUBREG_REG (x), | |
1125 SUBREG_BYTE (x)); | |
1126 | |
1127 if (x) | |
1128 *loc = x; | |
1129 else | |
1130 x = copy_rtx (*loc); | |
1131 } | |
1132 if (resolve_reg_p (x)) | |
1133 *loc = copy_rtx (x); | |
1134 } | |
920 | 1135 |
921 df_insn_rescan (insn); | 1136 df_insn_rescan (insn); |
922 | 1137 |
923 resolve_reg_notes (insn); | 1138 resolve_reg_notes (insn); |
924 } | 1139 } |
925 | 1140 |
926 /* Checks if INSN is a decomposable multiword-shift or zero-extend and | 1141 /* Check if INSN is a decomposable multiword-shift or zero-extend and |
927 sets the decomposable_context bitmap accordingly. A non-zero value | 1142 set the decomposable_context bitmap accordingly. SPEED_P is true |
928 is returned if a decomposable insn has been found. */ | 1143 if we are optimizing INSN for speed rather than size. Return true |
929 | 1144 if INSN is decomposable. */ |
930 static int | 1145 |
931 find_decomposable_shift_zext (rtx insn) | 1146 static bool |
1147 find_decomposable_shift_zext (rtx_insn *insn, bool speed_p) | |
932 { | 1148 { |
933 rtx set; | 1149 rtx set; |
934 rtx op; | 1150 rtx op; |
935 rtx op_operand; | 1151 rtx op_operand; |
936 | 1152 |
937 set = single_set (insn); | 1153 set = single_set (insn); |
938 if (!set) | 1154 if (!set) |
939 return 0; | 1155 return false; |
940 | 1156 |
941 op = SET_SRC (set); | 1157 op = SET_SRC (set); |
942 if (GET_CODE (op) != ASHIFT | 1158 if (GET_CODE (op) != ASHIFT |
943 && GET_CODE (op) != LSHIFTRT | 1159 && GET_CODE (op) != LSHIFTRT |
1160 && GET_CODE (op) != ASHIFTRT | |
944 && GET_CODE (op) != ZERO_EXTEND) | 1161 && GET_CODE (op) != ZERO_EXTEND) |
945 return 0; | 1162 return false; |
946 | 1163 |
947 op_operand = XEXP (op, 0); | 1164 op_operand = XEXP (op, 0); |
948 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) | 1165 if (!REG_P (SET_DEST (set)) || !REG_P (op_operand) |
949 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) | 1166 || HARD_REGISTER_NUM_P (REGNO (SET_DEST (set))) |
950 || HARD_REGISTER_NUM_P (REGNO (op_operand)) | 1167 || HARD_REGISTER_NUM_P (REGNO (op_operand)) |
951 || !SCALAR_INT_MODE_P (GET_MODE (op))) | 1168 || GET_MODE (op) != twice_word_mode) |
952 return 0; | 1169 return false; |
953 | 1170 |
954 if (GET_CODE (op) == ZERO_EXTEND) | 1171 if (GET_CODE (op) == ZERO_EXTEND) |
955 { | 1172 { |
956 if (GET_MODE (op_operand) != word_mode | 1173 if (GET_MODE (op_operand) != word_mode |
957 || GET_MODE_BITSIZE (GET_MODE (op)) != 2 * BITS_PER_WORD) | 1174 || !choices[speed_p].splitting_zext) |
958 return 0; | 1175 return false; |
959 } | 1176 } |
960 else /* left or right shift */ | 1177 else /* left or right shift */ |
961 { | 1178 { |
1179 bool *splitting = (GET_CODE (op) == ASHIFT | |
1180 ? choices[speed_p].splitting_ashift | |
1181 : GET_CODE (op) == ASHIFTRT | |
1182 ? choices[speed_p].splitting_ashiftrt | |
1183 : choices[speed_p].splitting_lshiftrt); | |
962 if (!CONST_INT_P (XEXP (op, 1)) | 1184 if (!CONST_INT_P (XEXP (op, 1)) |
963 || INTVAL (XEXP (op, 1)) < BITS_PER_WORD | 1185 || !IN_RANGE (INTVAL (XEXP (op, 1)), BITS_PER_WORD, |
964 || GET_MODE_BITSIZE (GET_MODE (op_operand)) != 2 * BITS_PER_WORD) | 1186 2 * BITS_PER_WORD - 1) |
965 return 0; | 1187 || !splitting[INTVAL (XEXP (op, 1)) - BITS_PER_WORD]) |
1188 return false; | |
1189 | |
1190 bitmap_set_bit (decomposable_context, REGNO (op_operand)); | |
966 } | 1191 } |
967 | 1192 |
968 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); | 1193 bitmap_set_bit (decomposable_context, REGNO (SET_DEST (set))); |
969 | 1194 |
970 if (GET_CODE (op) != ZERO_EXTEND) | 1195 return true; |
971 bitmap_set_bit (decomposable_context, REGNO (op_operand)); | |
972 | |
973 return 1; | |
974 } | 1196 } |
975 | 1197 |
976 /* Decompose a more than word wide shift (in INSN) of a multiword | 1198 /* Decompose a more than word wide shift (in INSN) of a multiword |
977 pseudo or a multiword zero-extend of a wordmode pseudo into a move | 1199 pseudo or a multiword zero-extend of a wordmode pseudo into a move |
978 and 'set to zero' insn. Return a pointer to the new insn when a | 1200 and 'set to zero' insn. Return a pointer to the new insn when a |
979 replacement was done. */ | 1201 replacement was done. */ |
980 | 1202 |
981 static rtx | 1203 static rtx_insn * |
982 resolve_shift_zext (rtx insn) | 1204 resolve_shift_zext (rtx_insn *insn) |
983 { | 1205 { |
984 rtx set; | 1206 rtx set; |
985 rtx op; | 1207 rtx op; |
986 rtx op_operand; | 1208 rtx op_operand; |
987 rtx insns; | 1209 rtx_insn *insns; |
988 rtx src_reg, dest_reg, dest_zero; | 1210 rtx src_reg, dest_reg, dest_upper, upper_src = NULL_RTX; |
989 int src_reg_num, dest_reg_num, offset1, offset2, src_offset; | 1211 int src_reg_num, dest_reg_num, offset1, offset2, src_offset; |
1212 scalar_int_mode inner_mode; | |
990 | 1213 |
991 set = single_set (insn); | 1214 set = single_set (insn); |
992 if (!set) | 1215 if (!set) |
993 return NULL_RTX; | 1216 return NULL; |
994 | 1217 |
995 op = SET_SRC (set); | 1218 op = SET_SRC (set); |
996 if (GET_CODE (op) != ASHIFT | 1219 if (GET_CODE (op) != ASHIFT |
997 && GET_CODE (op) != LSHIFTRT | 1220 && GET_CODE (op) != LSHIFTRT |
1221 && GET_CODE (op) != ASHIFTRT | |
998 && GET_CODE (op) != ZERO_EXTEND) | 1222 && GET_CODE (op) != ZERO_EXTEND) |
999 return NULL_RTX; | 1223 return NULL; |
1000 | 1224 |
1001 op_operand = XEXP (op, 0); | 1225 op_operand = XEXP (op, 0); |
1002 | 1226 if (!is_a <scalar_int_mode> (GET_MODE (op_operand), &inner_mode)) |
1227 return NULL; | |
1228 | |
1229 /* We can tear this operation apart only if the regs were already | |
1230 torn apart. */ | |
1003 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) | 1231 if (!resolve_reg_p (SET_DEST (set)) && !resolve_reg_p (op_operand)) |
1004 return NULL_RTX; | 1232 return NULL; |
1005 | 1233 |
1006 /* src_reg_num is the number of the word mode register which we | 1234 /* src_reg_num is the number of the word mode register which we |
1007 are operating on. For a left shift and a zero_extend on little | 1235 are operating on. For a left shift and a zero_extend on little |
1008 endian machines this is register 0. */ | 1236 endian machines this is register 0. */ |
1009 src_reg_num = GET_CODE (op) == LSHIFTRT ? 1 : 0; | 1237 src_reg_num = (GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT) |
1010 | 1238 ? 1 : 0; |
1011 if (WORDS_BIG_ENDIAN | 1239 |
1012 && GET_MODE_SIZE (GET_MODE (op_operand)) > UNITS_PER_WORD) | 1240 if (WORDS_BIG_ENDIAN && GET_MODE_SIZE (inner_mode) > UNITS_PER_WORD) |
1013 src_reg_num = 1 - src_reg_num; | 1241 src_reg_num = 1 - src_reg_num; |
1014 | 1242 |
1015 if (GET_CODE (op) == ZERO_EXTEND) | 1243 if (GET_CODE (op) == ZERO_EXTEND) |
1016 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; | 1244 dest_reg_num = WORDS_BIG_ENDIAN ? 1 : 0; |
1017 else | 1245 else |
1024 start_sequence (); | 1252 start_sequence (); |
1025 | 1253 |
1026 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), | 1254 dest_reg = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), |
1027 GET_MODE (SET_DEST (set)), | 1255 GET_MODE (SET_DEST (set)), |
1028 offset1); | 1256 offset1); |
1029 dest_zero = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), | 1257 dest_upper = simplify_gen_subreg_concatn (word_mode, SET_DEST (set), |
1030 GET_MODE (SET_DEST (set)), | 1258 GET_MODE (SET_DEST (set)), |
1031 offset2); | 1259 offset2); |
1032 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, | 1260 src_reg = simplify_gen_subreg_concatn (word_mode, op_operand, |
1033 GET_MODE (op_operand), | 1261 GET_MODE (op_operand), |
1034 src_offset); | 1262 src_offset); |
1263 if (GET_CODE (op) == ASHIFTRT | |
1264 && INTVAL (XEXP (op, 1)) != 2 * BITS_PER_WORD - 1) | |
1265 upper_src = expand_shift (RSHIFT_EXPR, word_mode, copy_rtx (src_reg), | |
1266 BITS_PER_WORD - 1, NULL_RTX, 0); | |
1267 | |
1035 if (GET_CODE (op) != ZERO_EXTEND) | 1268 if (GET_CODE (op) != ZERO_EXTEND) |
1036 { | 1269 { |
1037 int shift_count = INTVAL (XEXP (op, 1)); | 1270 int shift_count = INTVAL (XEXP (op, 1)); |
1038 if (shift_count > BITS_PER_WORD) | 1271 if (shift_count > BITS_PER_WORD) |
1039 src_reg = expand_shift (GET_CODE (op) == ASHIFT ? | 1272 src_reg = expand_shift (GET_CODE (op) == ASHIFT ? |
1040 LSHIFT_EXPR : RSHIFT_EXPR, | 1273 LSHIFT_EXPR : RSHIFT_EXPR, |
1041 word_mode, src_reg, | 1274 word_mode, src_reg, |
1042 build_int_cst (NULL_TREE, | 1275 shift_count - BITS_PER_WORD, |
1043 shift_count - BITS_PER_WORD), | 1276 dest_reg, GET_CODE (op) != ASHIFTRT); |
1044 dest_reg, 1); | |
1045 } | 1277 } |
1046 | 1278 |
1047 if (dest_reg != src_reg) | 1279 if (dest_reg != src_reg) |
1048 emit_move_insn (dest_reg, src_reg); | 1280 emit_move_insn (dest_reg, src_reg); |
1049 emit_move_insn (dest_zero, CONST0_RTX (word_mode)); | 1281 if (GET_CODE (op) != ASHIFTRT) |
1282 emit_move_insn (dest_upper, CONST0_RTX (word_mode)); | |
1283 else if (INTVAL (XEXP (op, 1)) == 2 * BITS_PER_WORD - 1) | |
1284 emit_move_insn (dest_upper, copy_rtx (src_reg)); | |
1285 else | |
1286 emit_move_insn (dest_upper, upper_src); | |
1050 insns = get_insns (); | 1287 insns = get_insns (); |
1051 | 1288 |
1052 end_sequence (); | 1289 end_sequence (); |
1053 | 1290 |
1054 emit_insn_before (insns, insn); | 1291 emit_insn_before (insns, insn); |
1055 | 1292 |
1056 if (dump_file) | 1293 if (dump_file) |
1057 { | 1294 { |
1058 rtx in; | 1295 rtx_insn *in; |
1059 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); | 1296 fprintf (dump_file, "; Replacing insn: %d with insns: ", INSN_UID (insn)); |
1060 for (in = insns; in != insn; in = NEXT_INSN (in)) | 1297 for (in = insns; in != insn; in = NEXT_INSN (in)) |
1061 fprintf (dump_file, "%d ", INSN_UID (in)); | 1298 fprintf (dump_file, "%d ", INSN_UID (in)); |
1062 fprintf (dump_file, "\n"); | 1299 fprintf (dump_file, "\n"); |
1063 } | 1300 } |
1064 | 1301 |
1065 delete_insn (insn); | 1302 delete_insn (insn); |
1066 return insns; | 1303 return insns; |
1067 } | 1304 } |
1068 | 1305 |
1306 /* Print to dump_file a description of what we're doing with shift code CODE. | |
1307 SPLITTING[X] is true if we are splitting shifts by X + BITS_PER_WORD. */ | |
1308 | |
1309 static void | |
1310 dump_shift_choices (enum rtx_code code, bool *splitting) | |
1311 { | |
1312 int i; | |
1313 const char *sep; | |
1314 | |
1315 fprintf (dump_file, | |
1316 " Splitting mode %s for %s lowering with shift amounts = ", | |
1317 GET_MODE_NAME (twice_word_mode), GET_RTX_NAME (code)); | |
1318 sep = ""; | |
1319 for (i = 0; i < BITS_PER_WORD; i++) | |
1320 if (splitting[i]) | |
1321 { | |
1322 fprintf (dump_file, "%s%d", sep, i + BITS_PER_WORD); | |
1323 sep = ","; | |
1324 } | |
1325 fprintf (dump_file, "\n"); | |
1326 } | |
1327 | |
1328 /* Print to dump_file a description of what we're doing when optimizing | |
1329 for speed or size; SPEED_P says which. DESCRIPTION is a description | |
1330 of the SPEED_P choice. */ | |
1331 | |
1332 static void | |
1333 dump_choices (bool speed_p, const char *description) | |
1334 { | |
1335 unsigned int i; | |
1336 | |
1337 fprintf (dump_file, "Choices when optimizing for %s:\n", description); | |
1338 | |
1339 for (i = 0; i < MAX_MACHINE_MODE; i++) | |
1340 if (GET_MODE_SIZE ((machine_mode) i) > UNITS_PER_WORD) | |
1341 fprintf (dump_file, " %s mode %s for copy lowering.\n", | |
1342 choices[speed_p].move_modes_to_split[i] | |
1343 ? "Splitting" | |
1344 : "Skipping", | |
1345 GET_MODE_NAME ((machine_mode) i)); | |
1346 | |
1347 fprintf (dump_file, " %s mode %s for zero_extend lowering.\n", | |
1348 choices[speed_p].splitting_zext ? "Splitting" : "Skipping", | |
1349 GET_MODE_NAME (twice_word_mode)); | |
1350 | |
1351 dump_shift_choices (ASHIFT, choices[speed_p].splitting_ashift); | |
1352 dump_shift_choices (LSHIFTRT, choices[speed_p].splitting_lshiftrt); | |
1353 dump_shift_choices (ASHIFTRT, choices[speed_p].splitting_ashiftrt); | |
1354 fprintf (dump_file, "\n"); | |
1355 } | |
1356 | |
1069 /* Look for registers which are always accessed via word-sized SUBREGs | 1357 /* Look for registers which are always accessed via word-sized SUBREGs |
1070 or via copies. Decompose these registers into several word-sized | 1358 or -if DECOMPOSE_COPIES is true- via copies. Decompose these |
1071 pseudo-registers. */ | 1359 registers into several word-sized pseudo-registers. */ |
1072 | 1360 |
1073 static void | 1361 static void |
1074 decompose_multiword_subregs (void) | 1362 decompose_multiword_subregs (bool decompose_copies) |
1075 { | 1363 { |
1076 unsigned int max; | 1364 unsigned int max; |
1077 basic_block bb; | 1365 basic_block bb; |
1078 | 1366 bool speed_p; |
1079 if (df) | 1367 |
1080 df_set_flags (DF_DEFER_INSN_RESCAN); | 1368 if (dump_file) |
1369 { | |
1370 dump_choices (false, "size"); | |
1371 dump_choices (true, "speed"); | |
1372 } | |
1373 | |
1374 /* Check if this target even has any modes to consider lowering. */ | |
1375 if (!choices[false].something_to_do && !choices[true].something_to_do) | |
1376 { | |
1377 if (dump_file) | |
1378 fprintf (dump_file, "Nothing to do!\n"); | |
1379 return; | |
1380 } | |
1081 | 1381 |
1082 max = max_reg_num (); | 1382 max = max_reg_num (); |
1083 | 1383 |
1084 /* First see if there are any multi-word pseudo-registers. If there | 1384 /* First see if there are any multi-word pseudo-registers. If there |
1085 aren't, there is nothing we can do. This should speed up this | 1385 aren't, there is nothing we can do. This should speed up this |
1086 pass in the normal case, since it should be faster than scanning | 1386 pass in the normal case, since it should be faster than scanning |
1087 all the insns. */ | 1387 all the insns. */ |
1088 { | 1388 { |
1089 unsigned int i; | 1389 unsigned int i; |
1390 bool useful_modes_seen = false; | |
1090 | 1391 |
1091 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) | 1392 for (i = FIRST_PSEUDO_REGISTER; i < max; ++i) |
1393 if (regno_reg_rtx[i] != NULL) | |
1394 { | |
1395 machine_mode mode = GET_MODE (regno_reg_rtx[i]); | |
1396 if (choices[false].move_modes_to_split[(int) mode] | |
1397 || choices[true].move_modes_to_split[(int) mode]) | |
1398 { | |
1399 useful_modes_seen = true; | |
1400 break; | |
1401 } | |
1402 } | |
1403 | |
1404 if (!useful_modes_seen) | |
1092 { | 1405 { |
1093 if (regno_reg_rtx[i] != NULL | 1406 if (dump_file) |
1094 && GET_MODE_SIZE (GET_MODE (regno_reg_rtx[i])) > UNITS_PER_WORD) | 1407 fprintf (dump_file, "Nothing to lower in this function.\n"); |
1095 break; | 1408 return; |
1096 } | 1409 } |
1097 if (i == max) | |
1098 return; | |
1099 } | 1410 } |
1100 | 1411 |
1101 if (df) | 1412 if (df) |
1102 run_word_dce (); | 1413 { |
1103 | 1414 df_set_flags (DF_DEFER_INSN_RESCAN); |
1104 /* FIXME: When the dataflow branch is merged, we can change this | 1415 run_word_dce (); |
1105 code to look for each multi-word pseudo-register and to find each | 1416 } |
1106 insn which sets or uses that register. That should be faster | 1417 |
1107 than scanning all the insns. */ | 1418 /* FIXME: It may be possible to change this code to look for each |
1419 multi-word pseudo-register and to find each insn which sets or | |
1420 uses that register. That should be faster than scanning all the | |
1421 insns. */ | |
1108 | 1422 |
1109 decomposable_context = BITMAP_ALLOC (NULL); | 1423 decomposable_context = BITMAP_ALLOC (NULL); |
1110 non_decomposable_context = BITMAP_ALLOC (NULL); | 1424 non_decomposable_context = BITMAP_ALLOC (NULL); |
1111 subreg_context = BITMAP_ALLOC (NULL); | 1425 subreg_context = BITMAP_ALLOC (NULL); |
1112 | 1426 |
1113 reg_copy_graph = VEC_alloc (bitmap, heap, max); | 1427 reg_copy_graph.create (max); |
1114 VEC_safe_grow (bitmap, heap, reg_copy_graph, max); | 1428 reg_copy_graph.safe_grow_cleared (max); |
1115 memset (VEC_address (bitmap, reg_copy_graph), 0, sizeof (bitmap) * max); | 1429 memset (reg_copy_graph.address (), 0, sizeof (bitmap) * max); |
1116 | 1430 |
1117 FOR_EACH_BB (bb) | 1431 speed_p = optimize_function_for_speed_p (cfun); |
1118 { | 1432 FOR_EACH_BB_FN (bb, cfun) |
1119 rtx insn; | 1433 { |
1434 rtx_insn *insn; | |
1120 | 1435 |
1121 FOR_BB_INSNS (bb, insn) | 1436 FOR_BB_INSNS (bb, insn) |
1122 { | 1437 { |
1123 rtx set; | 1438 rtx set; |
1124 enum classify_move_insn cmi; | 1439 enum classify_move_insn cmi; |
1127 if (!INSN_P (insn) | 1442 if (!INSN_P (insn) |
1128 || GET_CODE (PATTERN (insn)) == CLOBBER | 1443 || GET_CODE (PATTERN (insn)) == CLOBBER |
1129 || GET_CODE (PATTERN (insn)) == USE) | 1444 || GET_CODE (PATTERN (insn)) == USE) |
1130 continue; | 1445 continue; |
1131 | 1446 |
1132 if (find_decomposable_shift_zext (insn)) | 1447 recog_memoized (insn); |
1448 | |
1449 if (find_decomposable_shift_zext (insn, speed_p)) | |
1133 continue; | 1450 continue; |
1134 | 1451 |
1135 recog_memoized (insn); | |
1136 extract_insn (insn); | 1452 extract_insn (insn); |
1137 | 1453 |
1138 set = simple_move (insn); | 1454 set = simple_move (insn, speed_p); |
1139 | 1455 |
1140 if (!set) | 1456 if (!set) |
1141 cmi = NOT_SIMPLE_MOVE; | 1457 cmi = NOT_SIMPLE_MOVE; |
1142 else | 1458 else |
1143 { | 1459 { |
1460 /* We mark pseudo-to-pseudo copies as decomposable during the | |
1461 second pass only. The first pass is so early that there is | |
1462 good chance such moves will be optimized away completely by | |
1463 subsequent optimizations anyway. | |
1464 | |
1465 However, we call find_pseudo_copy even during the first pass | |
1466 so as to properly set up the reg_copy_graph. */ | |
1144 if (find_pseudo_copy (set)) | 1467 if (find_pseudo_copy (set)) |
1145 cmi = SIMPLE_PSEUDO_REG_MOVE; | 1468 cmi = decompose_copies? DECOMPOSABLE_SIMPLE_MOVE : SIMPLE_MOVE; |
1146 else | 1469 else |
1147 cmi = SIMPLE_MOVE; | 1470 cmi = SIMPLE_MOVE; |
1148 } | 1471 } |
1149 | 1472 |
1150 n = recog_data.n_operands; | 1473 n = recog_data.n_operands; |
1151 for (i = 0; i < n; ++i) | 1474 for (i = 0; i < n; ++i) |
1152 { | 1475 { |
1153 for_each_rtx (&recog_data.operand[i], | 1476 find_decomposable_subregs (&recog_data.operand[i], &cmi); |
1154 find_decomposable_subregs, | |
1155 &cmi); | |
1156 | 1477 |
1157 /* We handle ASM_OPERANDS as a special case to support | 1478 /* We handle ASM_OPERANDS as a special case to support |
1158 things like x86 rdtsc which returns a DImode value. | 1479 things like x86 rdtsc which returns a DImode value. |
1159 We can decompose the output, which will certainly be | 1480 We can decompose the output, which will certainly be |
1160 operand 0, but not the inputs. */ | 1481 operand 0, but not the inputs. */ |
1170 } | 1491 } |
1171 | 1492 |
1172 bitmap_and_compl_into (decomposable_context, non_decomposable_context); | 1493 bitmap_and_compl_into (decomposable_context, non_decomposable_context); |
1173 if (!bitmap_empty_p (decomposable_context)) | 1494 if (!bitmap_empty_p (decomposable_context)) |
1174 { | 1495 { |
1175 sbitmap sub_blocks; | |
1176 unsigned int i; | 1496 unsigned int i; |
1177 sbitmap_iterator sbi; | 1497 sbitmap_iterator sbi; |
1178 bitmap_iterator iter; | 1498 bitmap_iterator iter; |
1179 unsigned int regno; | 1499 unsigned int regno; |
1180 | 1500 |
1181 propagate_pseudo_copies (); | 1501 propagate_pseudo_copies (); |
1182 | 1502 |
1183 sub_blocks = sbitmap_alloc (last_basic_block); | 1503 auto_sbitmap sub_blocks (last_basic_block_for_fn (cfun)); |
1184 sbitmap_zero (sub_blocks); | 1504 bitmap_clear (sub_blocks); |
1185 | 1505 |
1186 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) | 1506 EXECUTE_IF_SET_IN_BITMAP (decomposable_context, 0, regno, iter) |
1187 decompose_register (regno); | 1507 decompose_register (regno); |
1188 | 1508 |
1189 FOR_EACH_BB (bb) | 1509 FOR_EACH_BB_FN (bb, cfun) |
1190 { | 1510 { |
1191 rtx insn; | 1511 rtx_insn *insn; |
1192 | 1512 |
1193 FOR_BB_INSNS (bb, insn) | 1513 FOR_BB_INSNS (bb, insn) |
1194 { | 1514 { |
1195 rtx pat; | 1515 rtx pat; |
1196 | 1516 |
1210 int i; | 1530 int i; |
1211 | 1531 |
1212 recog_memoized (insn); | 1532 recog_memoized (insn); |
1213 extract_insn (insn); | 1533 extract_insn (insn); |
1214 | 1534 |
1215 set = simple_move (insn); | 1535 set = simple_move (insn, speed_p); |
1216 if (set) | 1536 if (set) |
1217 { | 1537 { |
1218 rtx orig_insn = insn; | 1538 rtx_insn *orig_insn = insn; |
1219 bool cfi = control_flow_insn_p (insn); | 1539 bool cfi = control_flow_insn_p (insn); |
1220 | 1540 |
1221 /* We can end up splitting loads to multi-word pseudos | 1541 /* We can end up splitting loads to multi-word pseudos |
1222 into separate loads to machine word size pseudos. | 1542 into separate loads to machine word size pseudos. |
1223 When this happens, we first had one load that can | 1543 When this happens, we first had one load that can |
1238 { | 1558 { |
1239 recog_memoized (insn); | 1559 recog_memoized (insn); |
1240 extract_insn (insn); | 1560 extract_insn (insn); |
1241 | 1561 |
1242 if (cfi) | 1562 if (cfi) |
1243 SET_BIT (sub_blocks, bb->index); | 1563 bitmap_set_bit (sub_blocks, bb->index); |
1244 } | 1564 } |
1245 } | 1565 } |
1246 else | 1566 else |
1247 { | 1567 { |
1248 rtx decomposed_shift; | 1568 rtx_insn *decomposed_shift; |
1249 | 1569 |
1250 decomposed_shift = resolve_shift_zext (insn); | 1570 decomposed_shift = resolve_shift_zext (insn); |
1251 if (decomposed_shift != NULL_RTX) | 1571 if (decomposed_shift != NULL_RTX) |
1252 { | 1572 { |
1253 insn = decomposed_shift; | 1573 insn = decomposed_shift; |
1255 extract_insn (insn); | 1575 extract_insn (insn); |
1256 } | 1576 } |
1257 } | 1577 } |
1258 | 1578 |
1259 for (i = recog_data.n_operands - 1; i >= 0; --i) | 1579 for (i = recog_data.n_operands - 1; i >= 0; --i) |
1260 for_each_rtx (recog_data.operand_loc[i], | 1580 resolve_subreg_use (recog_data.operand_loc[i], insn); |
1261 resolve_subreg_use, | |
1262 insn); | |
1263 | 1581 |
1264 resolve_reg_notes (insn); | 1582 resolve_reg_notes (insn); |
1265 | 1583 |
1266 if (num_validated_changes () > 0) | 1584 if (num_validated_changes () > 0) |
1267 { | 1585 { |
1283 | 1601 |
1284 /* If we had insns to split that caused control flow insns in the middle | 1602 /* If we had insns to split that caused control flow insns in the middle |
1285 of a basic block, split those blocks now. Note that we only handle | 1603 of a basic block, split those blocks now. Note that we only handle |
1286 the case where splitting a load has caused multiple possibly trapping | 1604 the case where splitting a load has caused multiple possibly trapping |
1287 loads to appear. */ | 1605 loads to appear. */ |
1288 EXECUTE_IF_SET_IN_SBITMAP (sub_blocks, 0, i, sbi) | 1606 EXECUTE_IF_SET_IN_BITMAP (sub_blocks, 0, i, sbi) |
1289 { | 1607 { |
1290 rtx insn, end; | 1608 rtx_insn *insn, *end; |
1291 edge fallthru; | 1609 edge fallthru; |
1292 | 1610 |
1293 bb = BASIC_BLOCK (i); | 1611 bb = BASIC_BLOCK_FOR_FN (cfun, i); |
1294 insn = BB_HEAD (bb); | 1612 insn = BB_HEAD (bb); |
1295 end = BB_END (bb); | 1613 end = BB_END (bb); |
1296 | 1614 |
1297 while (insn != end) | 1615 while (insn != end) |
1298 { | 1616 { |
1308 } | 1626 } |
1309 else | 1627 else |
1310 insn = NEXT_INSN (insn); | 1628 insn = NEXT_INSN (insn); |
1311 } | 1629 } |
1312 } | 1630 } |
1313 | |
1314 sbitmap_free (sub_blocks); | |
1315 } | 1631 } |
1316 | 1632 |
1317 { | 1633 { |
1318 unsigned int i; | 1634 unsigned int i; |
1319 bitmap b; | 1635 bitmap b; |
1320 | 1636 |
1321 FOR_EACH_VEC_ELT (bitmap, reg_copy_graph, i, b) | 1637 FOR_EACH_VEC_ELT (reg_copy_graph, i, b) |
1322 if (b) | 1638 if (b) |
1323 BITMAP_FREE (b); | 1639 BITMAP_FREE (b); |
1324 } | 1640 } |
1325 | 1641 |
1326 VEC_free (bitmap, heap, reg_copy_graph); | 1642 reg_copy_graph.release (); |
1327 | 1643 |
1328 BITMAP_FREE (decomposable_context); | 1644 BITMAP_FREE (decomposable_context); |
1329 BITMAP_FREE (non_decomposable_context); | 1645 BITMAP_FREE (non_decomposable_context); |
1330 BITMAP_FREE (subreg_context); | 1646 BITMAP_FREE (subreg_context); |
1331 } | 1647 } |
1332 | 1648 |
1333 /* Gate function for lower subreg pass. */ | |
1334 | |
1335 static bool | |
1336 gate_handle_lower_subreg (void) | |
1337 { | |
1338 return flag_split_wide_types != 0; | |
1339 } | |
1340 | |
1341 /* Implement first lower subreg pass. */ | 1649 /* Implement first lower subreg pass. */ |
1342 | 1650 |
1343 static unsigned int | 1651 namespace { |
1344 rest_of_handle_lower_subreg (void) | 1652 |
1345 { | 1653 const pass_data pass_data_lower_subreg = |
1346 decompose_multiword_subregs (); | 1654 { |
1347 return 0; | 1655 RTL_PASS, /* type */ |
1656 "subreg1", /* name */ | |
1657 OPTGROUP_NONE, /* optinfo_flags */ | |
1658 TV_LOWER_SUBREG, /* tv_id */ | |
1659 0, /* properties_required */ | |
1660 0, /* properties_provided */ | |
1661 0, /* properties_destroyed */ | |
1662 0, /* todo_flags_start */ | |
1663 0, /* todo_flags_finish */ | |
1664 }; | |
1665 | |
1666 class pass_lower_subreg : public rtl_opt_pass | |
1667 { | |
1668 public: | |
1669 pass_lower_subreg (gcc::context *ctxt) | |
1670 : rtl_opt_pass (pass_data_lower_subreg, ctxt) | |
1671 {} | |
1672 | |
1673 /* opt_pass methods: */ | |
1674 virtual bool gate (function *) { return flag_split_wide_types != 0; } | |
1675 virtual unsigned int execute (function *) | |
1676 { | |
1677 decompose_multiword_subregs (false); | |
1678 return 0; | |
1679 } | |
1680 | |
1681 }; // class pass_lower_subreg | |
1682 | |
1683 } // anon namespace | |
1684 | |
1685 rtl_opt_pass * | |
1686 make_pass_lower_subreg (gcc::context *ctxt) | |
1687 { | |
1688 return new pass_lower_subreg (ctxt); | |
1348 } | 1689 } |
1349 | 1690 |
1350 /* Implement second lower subreg pass. */ | 1691 /* Implement second lower subreg pass. */ |
1351 | 1692 |
1352 static unsigned int | 1693 namespace { |
1353 rest_of_handle_lower_subreg2 (void) | 1694 |
1354 { | 1695 const pass_data pass_data_lower_subreg2 = |
1355 decompose_multiword_subregs (); | 1696 { |
1356 return 0; | 1697 RTL_PASS, /* type */ |
1357 } | 1698 "subreg2", /* name */ |
1358 | 1699 OPTGROUP_NONE, /* optinfo_flags */ |
1359 struct rtl_opt_pass pass_lower_subreg = | 1700 TV_LOWER_SUBREG, /* tv_id */ |
1360 { | 1701 0, /* properties_required */ |
1361 { | 1702 0, /* properties_provided */ |
1362 RTL_PASS, | 1703 0, /* properties_destroyed */ |
1363 "subreg1", /* name */ | 1704 0, /* todo_flags_start */ |
1364 gate_handle_lower_subreg, /* gate */ | 1705 TODO_df_finish, /* todo_flags_finish */ |
1365 rest_of_handle_lower_subreg, /* execute */ | |
1366 NULL, /* sub */ | |
1367 NULL, /* next */ | |
1368 0, /* static_pass_number */ | |
1369 TV_LOWER_SUBREG, /* tv_id */ | |
1370 0, /* properties_required */ | |
1371 0, /* properties_provided */ | |
1372 0, /* properties_destroyed */ | |
1373 0, /* todo_flags_start */ | |
1374 TODO_dump_func | | |
1375 TODO_ggc_collect | | |
1376 TODO_verify_flow /* todo_flags_finish */ | |
1377 } | |
1378 }; | 1706 }; |
1379 | 1707 |
1380 struct rtl_opt_pass pass_lower_subreg2 = | 1708 class pass_lower_subreg2 : public rtl_opt_pass |
1381 { | 1709 { |
1382 { | 1710 public: |
1383 RTL_PASS, | 1711 pass_lower_subreg2 (gcc::context *ctxt) |
1384 "subreg2", /* name */ | 1712 : rtl_opt_pass (pass_data_lower_subreg2, ctxt) |
1385 gate_handle_lower_subreg, /* gate */ | 1713 {} |
1386 rest_of_handle_lower_subreg2, /* execute */ | 1714 |
1387 NULL, /* sub */ | 1715 /* opt_pass methods: */ |
1388 NULL, /* next */ | 1716 virtual bool gate (function *) { return flag_split_wide_types != 0; } |
1389 0, /* static_pass_number */ | 1717 virtual unsigned int execute (function *) |
1390 TV_LOWER_SUBREG, /* tv_id */ | 1718 { |
1391 0, /* properties_required */ | 1719 decompose_multiword_subregs (true); |
1392 0, /* properties_provided */ | 1720 return 0; |
1393 0, /* properties_destroyed */ | 1721 } |
1394 0, /* todo_flags_start */ | 1722 |
1395 TODO_df_finish | TODO_verify_rtl_sharing | | 1723 }; // class pass_lower_subreg2 |
1396 TODO_dump_func | | 1724 |
1397 TODO_ggc_collect | | 1725 } // anon namespace |
1398 TODO_verify_flow /* todo_flags_finish */ | 1726 |
1399 } | 1727 rtl_opt_pass * |
1400 }; | 1728 make_pass_lower_subreg2 (gcc::context *ctxt) |
1729 { | |
1730 return new pass_lower_subreg2 (ctxt); | |
1731 } |