Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/arm.c @ 67:f6334be47118
update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author | nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 22 Mar 2011 17:18:12 +0900 |
parents | b7f97abdc517 |
children | 04ced10e8804 |
comparison
equal
deleted
inserted
replaced
65:65488c3d617d | 67:f6334be47118 |
---|---|
1 /* Output routines for GCC for ARM. | 1 /* Output routines for GCC for ARM. |
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, | 2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 | 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 |
4 Free Software Foundation, Inc. | 4 Free Software Foundation, Inc. |
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) | 5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) |
6 and Martin Simmons (@harleqn.co.uk). | 6 and Martin Simmons (@harleqn.co.uk). |
7 More major hacks by Richard Earnshaw (rearnsha@arm.com). | 7 More major hacks by Richard Earnshaw (rearnsha@arm.com). |
8 | 8 |
38 #include "flags.h" | 38 #include "flags.h" |
39 #include "reload.h" | 39 #include "reload.h" |
40 #include "function.h" | 40 #include "function.h" |
41 #include "expr.h" | 41 #include "expr.h" |
42 #include "optabs.h" | 42 #include "optabs.h" |
43 #include "toplev.h" | 43 #include "diagnostic-core.h" |
44 #include "recog.h" | 44 #include "recog.h" |
45 #include "cgraph.h" | 45 #include "cgraph.h" |
46 #include "ggc.h" | 46 #include "ggc.h" |
47 #include "except.h" | 47 #include "except.h" |
48 #include "c-pragma.h" | 48 #include "c-family/c-pragma.h" /* ??? */ |
49 #include "integrate.h" | 49 #include "integrate.h" |
50 #include "tm_p.h" | 50 #include "tm_p.h" |
51 #include "target.h" | 51 #include "target.h" |
52 #include "target-def.h" | 52 #include "target-def.h" |
53 #include "debug.h" | 53 #include "debug.h" |
54 #include "langhooks.h" | 54 #include "langhooks.h" |
55 #include "df.h" | 55 #include "df.h" |
56 #include "intl.h" | 56 #include "intl.h" |
57 #include "libfuncs.h" | 57 #include "libfuncs.h" |
58 #include "params.h" | |
58 | 59 |
59 /* Forward definitions of types. */ | 60 /* Forward definitions of types. */ |
60 typedef struct minipool_node Mnode; | 61 typedef struct minipool_node Mnode; |
61 typedef struct minipool_fixup Mfix; | 62 typedef struct minipool_fixup Mfix; |
62 | 63 |
63 void (*arm_lang_output_object_attributes_hook)(void); | 64 void (*arm_lang_output_object_attributes_hook)(void); |
64 | 65 |
65 /* Forward function declarations. */ | 66 /* Forward function declarations. */ |
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree); | |
66 static int arm_compute_static_chain_stack_bytes (void); | 68 static int arm_compute_static_chain_stack_bytes (void); |
67 static arm_stack_offsets *arm_get_frame_offsets (void); | 69 static arm_stack_offsets *arm_get_frame_offsets (void); |
68 static void arm_add_gc_roots (void); | 70 static void arm_add_gc_roots (void); |
69 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx, | 71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx, |
70 HOST_WIDE_INT, rtx, rtx, int, int); | 72 HOST_WIDE_INT, rtx, rtx, int, int); |
81 static bool thumb_force_lr_save (void); | 83 static bool thumb_force_lr_save (void); |
82 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); | 84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); |
83 static rtx emit_sfm (int, int); | 85 static rtx emit_sfm (int, int); |
84 static unsigned arm_size_return_regs (void); | 86 static unsigned arm_size_return_regs (void); |
85 static bool arm_assemble_integer (rtx, unsigned int, int); | 87 static bool arm_assemble_integer (rtx, unsigned int, int); |
88 static void arm_print_operand (FILE *, rtx, int); | |
89 static void arm_print_operand_address (FILE *, rtx); | |
90 static bool arm_print_operand_punct_valid_p (unsigned char code); | |
86 static const char *fp_const_from_val (REAL_VALUE_TYPE *); | 91 static const char *fp_const_from_val (REAL_VALUE_TYPE *); |
87 static arm_cc get_arm_condition_code (rtx); | 92 static arm_cc get_arm_condition_code (rtx); |
88 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); | 93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); |
89 static rtx is_jump_table (rtx); | 94 static rtx is_jump_table (rtx); |
90 static const char *output_multi_immediate (rtx *, const char *, const char *, | 95 static const char *output_multi_immediate (rtx *, const char *, const char *, |
157 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); | 162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); |
158 static void emit_constant_insn (rtx cond, rtx pattern); | 163 static void emit_constant_insn (rtx cond, rtx pattern); |
159 static rtx emit_set_insn (rtx, rtx); | 164 static rtx emit_set_insn (rtx, rtx); |
160 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, | 165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, |
161 tree, bool); | 166 tree, bool); |
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode, | |
168 const_tree, bool); | |
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode, | |
170 const_tree, bool); | |
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree); | |
162 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, | 172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, |
163 const_tree); | 173 const_tree); |
164 static int aapcs_select_return_coproc (const_tree, const_tree); | 174 static int aapcs_select_return_coproc (const_tree, const_tree); |
165 | 175 |
166 #ifdef OBJECT_FORMAT_ELF | 176 #ifdef OBJECT_FORMAT_ELF |
182 static bool arm_default_short_enums (void); | 192 static bool arm_default_short_enums (void); |
183 static bool arm_align_anon_bitfield (void); | 193 static bool arm_align_anon_bitfield (void); |
184 static bool arm_return_in_msb (const_tree); | 194 static bool arm_return_in_msb (const_tree); |
185 static bool arm_must_pass_in_stack (enum machine_mode, const_tree); | 195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree); |
186 static bool arm_return_in_memory (const_tree, const_tree); | 196 static bool arm_return_in_memory (const_tree, const_tree); |
187 #ifdef TARGET_UNWIND_INFO | 197 #if ARM_UNWIND_INFO |
188 static void arm_unwind_emit (FILE *, rtx); | 198 static void arm_unwind_emit (FILE *, rtx); |
189 static bool arm_output_ttype (rtx); | 199 static bool arm_output_ttype (rtx); |
200 static void arm_asm_emit_except_personality (rtx); | |
201 static void arm_asm_init_sections (void); | |
190 #endif | 202 #endif |
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *); | |
191 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); | 204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); |
192 static rtx arm_dwarf_register_span (rtx); | 205 static rtx arm_dwarf_register_span (rtx); |
193 | 206 |
194 static tree arm_cxx_guard_type (void); | 207 static tree arm_cxx_guard_type (void); |
195 static bool arm_cxx_guard_mask_bit (void); | 208 static bool arm_cxx_guard_mask_bit (void); |
202 static bool arm_cxx_use_aeabi_atexit (void); | 215 static bool arm_cxx_use_aeabi_atexit (void); |
203 static void arm_init_libfuncs (void); | 216 static void arm_init_libfuncs (void); |
204 static tree arm_build_builtin_va_list (void); | 217 static tree arm_build_builtin_va_list (void); |
205 static void arm_expand_builtin_va_start (tree, rtx); | 218 static void arm_expand_builtin_va_start (tree, rtx); |
206 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); | 219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); |
220 static void arm_option_override (void); | |
207 static bool arm_handle_option (size_t, const char *, int); | 221 static bool arm_handle_option (size_t, const char *, int); |
208 static void arm_target_help (void); | 222 static void arm_target_help (void); |
209 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); | 223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); |
210 static bool arm_cannot_copy_insn_p (rtx); | 224 static bool arm_cannot_copy_insn_p (rtx); |
211 static bool arm_tls_symbol_p (rtx x); | 225 static bool arm_tls_symbol_p (rtx x); |
212 static int arm_issue_rate (void); | 226 static int arm_issue_rate (void); |
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; | 227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; |
228 static bool arm_output_addr_const_extra (FILE *, rtx); | |
214 static bool arm_allocate_stack_slots_for_args (void); | 229 static bool arm_allocate_stack_slots_for_args (void); |
215 static const char *arm_invalid_parameter_type (const_tree t); | 230 static const char *arm_invalid_parameter_type (const_tree t); |
216 static const char *arm_invalid_return_type (const_tree t); | 231 static const char *arm_invalid_return_type (const_tree t); |
217 static tree arm_promoted_type (const_tree t); | 232 static tree arm_promoted_type (const_tree t); |
218 static tree arm_convert_to_type (tree type, tree expr); | 233 static tree arm_convert_to_type (tree type, tree expr); |
221 static bool arm_can_eliminate (const int, const int); | 236 static bool arm_can_eliminate (const int, const int); |
222 static void arm_asm_trampoline_template (FILE *); | 237 static void arm_asm_trampoline_template (FILE *); |
223 static void arm_trampoline_init (rtx, tree, rtx); | 238 static void arm_trampoline_init (rtx, tree, rtx); |
224 static rtx arm_trampoline_adjust_address (rtx); | 239 static rtx arm_trampoline_adjust_address (rtx); |
225 static rtx arm_pic_static_addr (rtx orig, rtx reg); | 240 static rtx arm_pic_static_addr (rtx orig, rtx reg); |
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *); | |
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *); | |
243 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *); | |
244 static enum machine_mode arm_preferred_simd_mode (enum machine_mode); | |
245 static bool arm_class_likely_spilled_p (reg_class_t); | |
246 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); | |
247 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode, | |
248 const_tree type, | |
249 int misalignment, | |
250 bool is_packed); | |
251 static void arm_conditional_register_usage (void); | |
252 static reg_class_t arm_preferred_rename_class (reg_class_t rclass); | |
226 | 253 |
227 | 254 |
228 /* Table of machine attributes. */ | 255 /* Table of machine attributes. */ |
229 static const struct attribute_spec arm_attribute_table[] = | 256 static const struct attribute_spec arm_attribute_table[] = |
230 { | 257 { |
260 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, | 287 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, |
261 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute }, | 288 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute }, |
262 #endif | 289 #endif |
263 { NULL, 0, 0, false, false, false, NULL } | 290 { NULL, 0, 0, false, false, false, NULL } |
264 }; | 291 }; |
292 | |
293 /* Set default optimization options. */ | |
294 static const struct default_options arm_option_optimization_table[] = | |
295 { | |
296 /* Enable section anchors by default at -O1 or higher. */ | |
297 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 }, | |
298 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 }, | |
299 { OPT_LEVELS_NONE, 0, NULL, 0 } | |
300 }; | |
265 | 301 |
266 /* Initialize the GCC target structure. */ | 302 /* Initialize the GCC target structure. */ |
267 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES | 303 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES |
268 #undef TARGET_MERGE_DECL_ATTRIBUTES | 304 #undef TARGET_MERGE_DECL_ATTRIBUTES |
269 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes | 305 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes |
283 #undef TARGET_ASM_ALIGNED_SI_OP | 319 #undef TARGET_ASM_ALIGNED_SI_OP |
284 #define TARGET_ASM_ALIGNED_SI_OP NULL | 320 #define TARGET_ASM_ALIGNED_SI_OP NULL |
285 #undef TARGET_ASM_INTEGER | 321 #undef TARGET_ASM_INTEGER |
286 #define TARGET_ASM_INTEGER arm_assemble_integer | 322 #define TARGET_ASM_INTEGER arm_assemble_integer |
287 | 323 |
324 #undef TARGET_PRINT_OPERAND | |
325 #define TARGET_PRINT_OPERAND arm_print_operand | |
326 #undef TARGET_PRINT_OPERAND_ADDRESS | |
327 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address | |
328 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P | |
329 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p | |
330 | |
331 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA | |
332 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra | |
333 | |
288 #undef TARGET_ASM_FUNCTION_PROLOGUE | 334 #undef TARGET_ASM_FUNCTION_PROLOGUE |
289 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue | 335 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue |
290 | 336 |
291 #undef TARGET_ASM_FUNCTION_EPILOGUE | 337 #undef TARGET_ASM_FUNCTION_EPILOGUE |
292 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue | 338 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue |
295 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG) | 341 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG) |
296 #undef TARGET_HANDLE_OPTION | 342 #undef TARGET_HANDLE_OPTION |
297 #define TARGET_HANDLE_OPTION arm_handle_option | 343 #define TARGET_HANDLE_OPTION arm_handle_option |
298 #undef TARGET_HELP | 344 #undef TARGET_HELP |
299 #define TARGET_HELP arm_target_help | 345 #define TARGET_HELP arm_target_help |
346 #undef TARGET_OPTION_OVERRIDE | |
347 #define TARGET_OPTION_OVERRIDE arm_option_override | |
348 #undef TARGET_OPTION_OPTIMIZATION_TABLE | |
349 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table | |
300 | 350 |
301 #undef TARGET_COMP_TYPE_ATTRIBUTES | 351 #undef TARGET_COMP_TYPE_ATTRIBUTES |
302 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes | 352 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes |
303 | 353 |
304 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES | 354 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES |
341 | 391 |
342 #undef TARGET_SHIFT_TRUNCATION_MASK | 392 #undef TARGET_SHIFT_TRUNCATION_MASK |
343 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask | 393 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask |
344 #undef TARGET_VECTOR_MODE_SUPPORTED_P | 394 #undef TARGET_VECTOR_MODE_SUPPORTED_P |
345 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p | 395 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p |
396 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE | |
397 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode | |
346 | 398 |
347 #undef TARGET_MACHINE_DEPENDENT_REORG | 399 #undef TARGET_MACHINE_DEPENDENT_REORG |
348 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg | 400 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg |
349 | 401 |
350 #undef TARGET_INIT_BUILTINS | 402 #undef TARGET_INIT_BUILTINS |
361 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes | 413 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes |
362 #undef TARGET_PASS_BY_REFERENCE | 414 #undef TARGET_PASS_BY_REFERENCE |
363 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference | 415 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference |
364 #undef TARGET_ARG_PARTIAL_BYTES | 416 #undef TARGET_ARG_PARTIAL_BYTES |
365 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes | 417 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes |
418 #undef TARGET_FUNCTION_ARG | |
419 #define TARGET_FUNCTION_ARG arm_function_arg | |
420 #undef TARGET_FUNCTION_ARG_ADVANCE | |
421 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance | |
422 #undef TARGET_FUNCTION_ARG_BOUNDARY | |
423 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary | |
366 | 424 |
367 #undef TARGET_SETUP_INCOMING_VARARGS | 425 #undef TARGET_SETUP_INCOMING_VARARGS |
368 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs | 426 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs |
369 | 427 |
370 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS | 428 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS |
421 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory | 479 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory |
422 | 480 |
423 #undef TARGET_MUST_PASS_IN_STACK | 481 #undef TARGET_MUST_PASS_IN_STACK |
424 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack | 482 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack |
425 | 483 |
426 #ifdef TARGET_UNWIND_INFO | 484 #if ARM_UNWIND_INFO |
427 #undef TARGET_UNWIND_EMIT | 485 #undef TARGET_ASM_UNWIND_EMIT |
428 #define TARGET_UNWIND_EMIT arm_unwind_emit | 486 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit |
429 | 487 |
430 /* EABI unwinding tables use a different format for the typeinfo tables. */ | 488 /* EABI unwinding tables use a different format for the typeinfo tables. */ |
431 #undef TARGET_ASM_TTYPE | 489 #undef TARGET_ASM_TTYPE |
432 #define TARGET_ASM_TTYPE arm_output_ttype | 490 #define TARGET_ASM_TTYPE arm_output_ttype |
433 | 491 |
434 #undef TARGET_ARM_EABI_UNWINDER | 492 #undef TARGET_ARM_EABI_UNWINDER |
435 #define TARGET_ARM_EABI_UNWINDER true | 493 #define TARGET_ARM_EABI_UNWINDER true |
436 #endif /* TARGET_UNWIND_INFO */ | 494 |
495 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY | |
496 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality | |
497 | |
498 #undef TARGET_ASM_INIT_SECTIONS | |
499 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections | |
500 #endif /* ARM_UNWIND_INFO */ | |
501 | |
502 #undef TARGET_EXCEPT_UNWIND_INFO | |
503 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info | |
437 | 504 |
438 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC | 505 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC |
439 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec | 506 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec |
440 | 507 |
441 #undef TARGET_DWARF_REGISTER_SPAN | 508 #undef TARGET_DWARF_REGISTER_SPAN |
504 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required | 571 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required |
505 | 572 |
506 #undef TARGET_CAN_ELIMINATE | 573 #undef TARGET_CAN_ELIMINATE |
507 #define TARGET_CAN_ELIMINATE arm_can_eliminate | 574 #define TARGET_CAN_ELIMINATE arm_can_eliminate |
508 | 575 |
576 #undef TARGET_CONDITIONAL_REGISTER_USAGE | |
577 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage | |
578 | |
579 #undef TARGET_CLASS_LIKELY_SPILLED_P | |
580 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p | |
581 | |
582 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE | |
583 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ | |
584 arm_vector_alignment_reachable | |
585 | |
586 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT | |
587 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ | |
588 arm_builtin_support_vector_misalignment | |
589 | |
590 #undef TARGET_PREFERRED_RENAME_CLASS | |
591 #define TARGET_PREFERRED_RENAME_CLASS \ | |
592 arm_preferred_rename_class | |
593 | |
509 struct gcc_target targetm = TARGET_INITIALIZER; | 594 struct gcc_target targetm = TARGET_INITIALIZER; |
510 | 595 |
511 /* Obstack for minipool constant handling. */ | 596 /* Obstack for minipool constant handling. */ |
512 static struct obstack minipool_obstack; | 597 static struct obstack minipool_obstack; |
513 static char * minipool_startobj; | 598 static char * minipool_startobj; |
524 /* The processor for which instructions should be scheduled. */ | 609 /* The processor for which instructions should be scheduled. */ |
525 enum processor_type arm_tune = arm_none; | 610 enum processor_type arm_tune = arm_none; |
526 | 611 |
527 /* The current tuning set. */ | 612 /* The current tuning set. */ |
528 const struct tune_params *current_tune; | 613 const struct tune_params *current_tune; |
529 | |
530 /* The default processor used if not overridden by commandline. */ | |
531 static enum processor_type arm_default_cpu = arm_none; | |
532 | 614 |
533 /* Which floating point hardware to schedule for. */ | 615 /* Which floating point hardware to schedule for. */ |
534 int arm_fpu_attr; | 616 int arm_fpu_attr; |
535 | 617 |
536 /* Which floating popint hardware to use. */ | 618 /* Which floating popint hardware to use. */ |
580 #define FL_DIV (1 << 18) /* Hardware divide. */ | 662 #define FL_DIV (1 << 18) /* Hardware divide. */ |
581 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ | 663 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ |
582 #define FL_NEON (1 << 20) /* Neon instructions. */ | 664 #define FL_NEON (1 << 20) /* Neon instructions. */ |
583 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M | 665 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M |
584 architecture. */ | 666 architecture. */ |
667 #define FL_ARCH7 (1 << 22) /* Architecture 7. */ | |
585 | 668 |
586 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ | 669 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ |
670 | |
671 /* Flags that only effect tuning, not available instructions. */ | |
672 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \ | |
673 | FL_CO_PROC) | |
587 | 674 |
588 #define FL_FOR_ARCH2 FL_NOTM | 675 #define FL_FOR_ARCH2 FL_NOTM |
589 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) | 676 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) |
590 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) | 677 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) |
591 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) | 678 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) |
600 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) | 687 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) |
601 #define FL_FOR_ARCH6Z FL_FOR_ARCH6 | 688 #define FL_FOR_ARCH6Z FL_FOR_ARCH6 |
602 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K | 689 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K |
603 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) | 690 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) |
604 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) | 691 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) |
605 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) | 692 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7) |
606 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) | 693 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) |
607 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) | 694 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) |
608 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) | 695 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) |
609 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) | 696 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) |
610 | 697 |
638 int arm_arch6 = 0; | 725 int arm_arch6 = 0; |
639 | 726 |
640 /* Nonzero if this chip supports the ARM 6K extensions. */ | 727 /* Nonzero if this chip supports the ARM 6K extensions. */ |
641 int arm_arch6k = 0; | 728 int arm_arch6k = 0; |
642 | 729 |
730 /* Nonzero if this chip supports the ARM 7 extensions. */ | |
731 int arm_arch7 = 0; | |
732 | |
643 /* Nonzero if instructions not present in the 'M' profile can be used. */ | 733 /* Nonzero if instructions not present in the 'M' profile can be used. */ |
644 int arm_arch_notm = 0; | 734 int arm_arch_notm = 0; |
645 | 735 |
646 /* Nonzero if instructions present in ARMv7E-M can be used. */ | 736 /* Nonzero if instructions present in ARMv7E-M can be used. */ |
647 int arm_arch7em = 0; | 737 int arm_arch7em = 0; |
671 /* Nonzero if tuning for Cortex-A9. */ | 761 /* Nonzero if tuning for Cortex-A9. */ |
672 int arm_tune_cortex_a9 = 0; | 762 int arm_tune_cortex_a9 = 0; |
673 | 763 |
674 /* Nonzero if generating Thumb instructions. */ | 764 /* Nonzero if generating Thumb instructions. */ |
675 int thumb_code = 0; | 765 int thumb_code = 0; |
766 | |
767 /* Nonzero if generating Thumb-1 instructions. */ | |
768 int thumb1_code = 0; | |
676 | 769 |
677 /* Nonzero if we should define __THUMB_INTERWORK__ in the | 770 /* Nonzero if we should define __THUMB_INTERWORK__ in the |
678 preprocessor. | 771 preprocessor. |
679 XXX This is a bit of a hack, it's intended to help work around | 772 XXX This is a bit of a hack, it's intended to help work around |
680 problems in GLD which doesn't understand that armv5t code is | 773 problems in GLD which doesn't understand that armv5t code is |
685 int arm_arch_thumb2; | 778 int arm_arch_thumb2; |
686 | 779 |
687 /* Nonzero if chip supports integer division instruction. */ | 780 /* Nonzero if chip supports integer division instruction. */ |
688 int arm_arch_hwdiv; | 781 int arm_arch_hwdiv; |
689 | 782 |
690 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we | 783 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, |
691 must report the mode of the memory reference from PRINT_OPERAND to | 784 we must report the mode of the memory reference from |
692 PRINT_OPERAND_ADDRESS. */ | 785 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */ |
693 enum machine_mode output_memory_reference_mode; | 786 enum machine_mode output_memory_reference_mode; |
694 | 787 |
695 /* The register number to be used for the PIC offset register. */ | 788 /* The register number to be used for the PIC offset register. */ |
696 unsigned arm_pic_register = INVALID_REGNUM; | 789 unsigned arm_pic_register = INVALID_REGNUM; |
697 | 790 |
698 /* Set to 1 after arm_reorg has started. Reset to start at the start of | 791 /* Set to 1 after arm_reorg has started. Reset to start at the start of |
699 the next function. */ | 792 the next function. */ |
700 static int after_arm_reorg = 0; | 793 static int after_arm_reorg = 0; |
701 | 794 |
702 static enum arm_pcs arm_pcs_default; | 795 enum arm_pcs arm_pcs_default; |
703 | 796 |
704 /* For an explanation of these variables, see final_prescan_insn below. */ | 797 /* For an explanation of these variables, see final_prescan_insn below. */ |
705 int arm_ccfsm_state; | 798 int arm_ccfsm_state; |
706 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ | 799 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ |
707 enum arm_cond_code arm_current_cc; | 800 enum arm_cond_code arm_current_cc; |
801 | |
708 rtx arm_target_insn; | 802 rtx arm_target_insn; |
709 int arm_target_label; | 803 int arm_target_label; |
710 /* The number of conditionally executed insns, including the current insn. */ | 804 /* The number of conditionally executed insns, including the current insn. */ |
711 int arm_condexec_count = 0; | 805 int arm_condexec_count = 0; |
712 /* A bitmask specifying the patterns for the IT block. | 806 /* A bitmask specifying the patterns for the IT block. |
720 { | 814 { |
721 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", | 815 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", |
722 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" | 816 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" |
723 }; | 817 }; |
724 | 818 |
819 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */ | |
820 int arm_regs_in_sequence[] = | |
821 { | |
822 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 | |
823 }; | |
824 | |
725 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl") | 825 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl") |
726 #define streq(string1, string2) (strcmp (string1, string2) == 0) | 826 #define streq(string1, string2) (strcmp (string1, string2) == 0) |
727 | 827 |
728 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ | 828 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ |
729 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ | 829 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ |
738 const char *arch; | 838 const char *arch; |
739 const unsigned long flags; | 839 const unsigned long flags; |
740 const struct tune_params *const tune; | 840 const struct tune_params *const tune; |
741 }; | 841 }; |
742 | 842 |
843 | |
844 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1 | |
845 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \ | |
846 prefetch_slots, \ | |
847 l1_size, \ | |
848 l1_line_size | |
849 | |
743 const struct tune_params arm_slowmul_tune = | 850 const struct tune_params arm_slowmul_tune = |
744 { | 851 { |
745 arm_slowmul_rtx_costs, | 852 arm_slowmul_rtx_costs, |
746 3 | 853 NULL, |
854 3, | |
855 ARM_PREFETCH_NOT_BENEFICIAL | |
747 }; | 856 }; |
748 | 857 |
749 const struct tune_params arm_fastmul_tune = | 858 const struct tune_params arm_fastmul_tune = |
750 { | 859 { |
751 arm_fastmul_rtx_costs, | 860 arm_fastmul_rtx_costs, |
752 1 | 861 NULL, |
862 1, | |
863 ARM_PREFETCH_NOT_BENEFICIAL | |
753 }; | 864 }; |
754 | 865 |
755 const struct tune_params arm_xscale_tune = | 866 const struct tune_params arm_xscale_tune = |
756 { | 867 { |
757 arm_xscale_rtx_costs, | 868 arm_xscale_rtx_costs, |
758 2 | 869 xscale_sched_adjust_cost, |
870 2, | |
871 ARM_PREFETCH_NOT_BENEFICIAL | |
759 }; | 872 }; |
760 | 873 |
761 const struct tune_params arm_9e_tune = | 874 const struct tune_params arm_9e_tune = |
762 { | 875 { |
763 arm_9e_rtx_costs, | 876 arm_9e_rtx_costs, |
764 1 | 877 NULL, |
878 1, | |
879 ARM_PREFETCH_NOT_BENEFICIAL | |
765 }; | 880 }; |
881 | |
882 const struct tune_params arm_cortex_a9_tune = | |
883 { | |
884 arm_9e_rtx_costs, | |
885 cortex_a9_sched_adjust_cost, | |
886 1, | |
887 ARM_PREFETCH_BENEFICIAL(4,32,32) | |
888 }; | |
889 | |
890 const struct tune_params arm_fa726te_tune = | |
891 { | |
892 arm_9e_rtx_costs, | |
893 fa726te_sched_adjust_cost, | |
894 1, | |
895 ARM_PREFETCH_NOT_BENEFICIAL | |
896 }; | |
897 | |
766 | 898 |
767 /* Not all of these give usefully different compilation alternatives, | 899 /* Not all of these give usefully different compilation alternatives, |
768 but there is no simple way of generalizing them. */ | 900 but there is no simple way of generalizing them. */ |
769 static const struct processors all_cores[] = | 901 static const struct processors all_cores[] = |
770 { | 902 { |
771 /* ARM Cores */ | 903 /* ARM Cores */ |
772 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ | 904 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ |
773 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, | 905 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, |
774 #include "arm-cores.def" | 906 #include "arm-cores.def" |
775 #undef ARM_CORE | 907 #undef ARM_CORE |
776 {NULL, arm_none, NULL, 0, NULL} | 908 {NULL, arm_none, NULL, 0, NULL} |
777 }; | 909 }; |
778 | 910 |
803 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL}, | 935 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL}, |
804 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL}, | 936 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL}, |
805 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, | 937 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, |
806 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, | 938 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, |
807 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, | 939 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, |
808 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, | 940 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, |
809 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, | 941 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, |
810 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, | 942 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, |
811 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, | 943 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, |
812 {NULL, arm_none, NULL, 0 , NULL} | 944 {NULL, arm_none, NULL, 0 , NULL} |
813 }; | 945 }; |
814 | 946 |
815 struct arm_cpu_select | 947 |
816 { | 948 /* These are populated as commandline arguments are processed, or NULL |
817 const char * string; | 949 if not specified. */ |
818 const char * name; | 950 static const struct processors *arm_selected_arch; |
819 const struct processors * processors; | 951 static const struct processors *arm_selected_cpu; |
820 }; | 952 static const struct processors *arm_selected_tune; |
821 | |
822 /* This is a magic structure. The 'string' field is magically filled in | |
823 with a pointer to the value specified by the user on the command line | |
824 assuming that the user has specified such a value. */ | |
825 | |
826 static struct arm_cpu_select arm_select[] = | |
827 { | |
828 /* string name processors */ | |
829 { NULL, "-mcpu=", all_cores }, | |
830 { NULL, "-march=", all_architectures }, | |
831 { NULL, "-mtune=", all_cores } | |
832 }; | |
833 | |
834 /* Defines representing the indexes into the above table. */ | |
835 #define ARM_OPT_SET_CPU 0 | |
836 #define ARM_OPT_SET_ARCH 1 | |
837 #define ARM_OPT_SET_TUNE 2 | |
838 | 953 |
839 /* The name of the preprocessor macro to define for this architecture. */ | 954 /* The name of the preprocessor macro to define for this architecture. */ |
840 | 955 |
841 char arm_arch_name[] = "__ARM_ARCH_0UNK__"; | 956 char arm_arch_name[] = "__ARM_ARCH_0UNK__"; |
842 | 957 |
1142 TYPE_DECL, | 1257 TYPE_DECL, |
1143 get_identifier ("__va_list"), | 1258 get_identifier ("__va_list"), |
1144 va_list_type); | 1259 va_list_type); |
1145 DECL_ARTIFICIAL (va_list_name) = 1; | 1260 DECL_ARTIFICIAL (va_list_name) = 1; |
1146 TYPE_NAME (va_list_type) = va_list_name; | 1261 TYPE_NAME (va_list_type) = va_list_name; |
1262 TYPE_STUB_DECL (va_list_type) = va_list_name; | |
1147 /* Create the __ap field. */ | 1263 /* Create the __ap field. */ |
1148 ap_field = build_decl (BUILTINS_LOCATION, | 1264 ap_field = build_decl (BUILTINS_LOCATION, |
1149 FIELD_DECL, | 1265 FIELD_DECL, |
1150 get_identifier ("__ap"), | 1266 get_identifier ("__ap"), |
1151 ptr_type_node); | 1267 ptr_type_node); |
1194 { | 1310 { |
1195 valist = arm_extract_valist_ptr (valist); | 1311 valist = arm_extract_valist_ptr (valist); |
1196 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); | 1312 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
1197 } | 1313 } |
1198 | 1314 |
1315 /* Lookup NAME in SEL. */ | |
1316 | |
1317 static const struct processors * | |
1318 arm_find_cpu (const char *name, const struct processors *sel, const char *desc) | |
1319 { | |
1320 if (!(name && *name)) | |
1321 return NULL; | |
1322 | |
1323 for (; sel->name != NULL; sel++) | |
1324 { | |
1325 if (streq (name, sel->name)) | |
1326 return sel; | |
1327 } | |
1328 | |
1329 error ("bad value (%s) for %s switch", name, desc); | |
1330 return NULL; | |
1331 } | |
1332 | |
1199 /* Implement TARGET_HANDLE_OPTION. */ | 1333 /* Implement TARGET_HANDLE_OPTION. */ |
1200 | 1334 |
1201 static bool | 1335 static bool |
1202 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) | 1336 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) |
1203 { | 1337 { |
1204 switch (code) | 1338 switch (code) |
1205 { | 1339 { |
1206 case OPT_march_: | 1340 case OPT_march_: |
1207 arm_select[1].string = arg; | 1341 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march"); |
1208 return true; | 1342 return true; |
1209 | 1343 |
1210 case OPT_mcpu_: | 1344 case OPT_mcpu_: |
1211 arm_select[0].string = arg; | 1345 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu"); |
1212 return true; | 1346 return true; |
1213 | 1347 |
1214 case OPT_mhard_float: | 1348 case OPT_mhard_float: |
1215 target_float_abi_name = "hard"; | 1349 target_float_abi_name = "hard"; |
1216 return true; | 1350 return true; |
1218 case OPT_msoft_float: | 1352 case OPT_msoft_float: |
1219 target_float_abi_name = "soft"; | 1353 target_float_abi_name = "soft"; |
1220 return true; | 1354 return true; |
1221 | 1355 |
1222 case OPT_mtune_: | 1356 case OPT_mtune_: |
1223 arm_select[2].string = arg; | 1357 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune"); |
1224 return true; | 1358 return true; |
1225 | 1359 |
1226 default: | 1360 default: |
1227 return true; | 1361 return true; |
1228 } | 1362 } |
1241 replaced by a single function. */ | 1375 replaced by a single function. */ |
1242 if (columns == 0) | 1376 if (columns == 0) |
1243 { | 1377 { |
1244 const char *p; | 1378 const char *p; |
1245 | 1379 |
1246 GET_ENVIRONMENT (p, "COLUMNS"); | 1380 p = getenv ("COLUMNS"); |
1247 if (p != NULL) | 1381 if (p != NULL) |
1248 { | 1382 { |
1249 int value = atoi (p); | 1383 int value = atoi (p); |
1250 | 1384 |
1251 if (value > 0) | 1385 if (value > 0) |
1312 } | 1446 } |
1313 printf ("\n"); | 1447 printf ("\n"); |
1314 | 1448 |
1315 } | 1449 } |
1316 | 1450 |
1317 /* Fix up any incompatible options that the user has specified. | 1451 /* Fix up any incompatible options that the user has specified. */ |
1318 This has now turned into a maze. */ | 1452 static void |
1319 void | 1453 arm_option_override (void) |
1320 arm_override_options (void) | |
1321 { | 1454 { |
1322 unsigned i; | 1455 unsigned i; |
1323 enum processor_type target_arch_cpu = arm_none; | 1456 |
1324 enum processor_type selected_cpu = arm_none; | 1457 #ifdef SUBTARGET_OVERRIDE_OPTIONS |
1325 | 1458 SUBTARGET_OVERRIDE_OPTIONS; |
1326 /* Set up the flags based on the cpu/architecture selected by the user. */ | 1459 #endif |
1327 for (i = ARRAY_SIZE (arm_select); i--;) | 1460 |
1328 { | 1461 if (arm_selected_arch) |
1329 struct arm_cpu_select * ptr = arm_select + i; | 1462 { |
1330 | 1463 if (arm_selected_cpu) |
1331 if (ptr->string != NULL && ptr->string[0] != '\0') | 1464 { |
1332 { | 1465 /* Check for conflict between mcpu and march. */ |
1333 const struct processors * sel; | 1466 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE) |
1334 | 1467 { |
1335 for (sel = ptr->processors; sel->name != NULL; sel++) | 1468 warning (0, "switch -mcpu=%s conflicts with -march=%s switch", |
1336 if (streq (ptr->string, sel->name)) | 1469 arm_selected_cpu->name, arm_selected_arch->name); |
1337 { | 1470 /* -march wins for code generation. |
1338 /* Set the architecture define. */ | 1471 -mcpu wins for default tuning. */ |
1339 if (i != ARM_OPT_SET_TUNE) | 1472 if (!arm_selected_tune) |
1340 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); | 1473 arm_selected_tune = arm_selected_cpu; |
1341 | 1474 |
1342 /* Determine the processor core for which we should | 1475 arm_selected_cpu = arm_selected_arch; |
1343 tune code-generation. */ | 1476 } |
1344 if (/* -mcpu= is a sensible default. */ | 1477 else |
1345 i == ARM_OPT_SET_CPU | 1478 /* -mcpu wins. */ |
1346 /* -mtune= overrides -mcpu= and -march=. */ | 1479 arm_selected_arch = NULL; |
1347 || i == ARM_OPT_SET_TUNE) | 1480 } |
1348 arm_tune = (enum processor_type) (sel - ptr->processors); | 1481 else |
1349 | 1482 /* Pick a CPU based on the architecture. */ |
1350 /* Remember the CPU associated with this architecture. | 1483 arm_selected_cpu = arm_selected_arch; |
1351 If no other option is used to set the CPU type, | 1484 } |
1352 we'll use this to guess the most suitable tuning | |
1353 options. */ | |
1354 if (i == ARM_OPT_SET_ARCH) | |
1355 target_arch_cpu = sel->core; | |
1356 | |
1357 if (i == ARM_OPT_SET_CPU) | |
1358 selected_cpu = (enum processor_type) (sel - ptr->processors); | |
1359 | |
1360 if (i != ARM_OPT_SET_TUNE) | |
1361 { | |
1362 /* If we have been given an architecture and a processor | |
1363 make sure that they are compatible. We only generate | |
1364 a warning though, and we prefer the CPU over the | |
1365 architecture. */ | |
1366 if (insn_flags != 0 && (insn_flags ^ sel->flags)) | |
1367 warning (0, "switch -mcpu=%s conflicts with -march= switch", | |
1368 ptr->string); | |
1369 | |
1370 insn_flags = sel->flags; | |
1371 } | |
1372 | |
1373 break; | |
1374 } | |
1375 | |
1376 if (sel->name == NULL) | |
1377 error ("bad value (%s) for %s switch", ptr->string, ptr->name); | |
1378 } | |
1379 } | |
1380 | |
1381 /* Guess the tuning options from the architecture if necessary. */ | |
1382 if (arm_tune == arm_none) | |
1383 arm_tune = target_arch_cpu; | |
1384 | 1485 |
1385 /* If the user did not specify a processor, choose one for them. */ | 1486 /* If the user did not specify a processor, choose one for them. */ |
1386 if (insn_flags == 0) | 1487 if (!arm_selected_cpu) |
1387 { | 1488 { |
1388 const struct processors * sel; | 1489 const struct processors * sel; |
1389 unsigned int sought; | 1490 unsigned int sought; |
1390 | 1491 |
1391 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT; | 1492 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT]; |
1392 if (selected_cpu == arm_none) | 1493 if (!arm_selected_cpu->name) |
1393 { | 1494 { |
1394 #ifdef SUBTARGET_CPU_DEFAULT | 1495 #ifdef SUBTARGET_CPU_DEFAULT |
1395 /* Use the subtarget default CPU if none was specified by | 1496 /* Use the subtarget default CPU if none was specified by |
1396 configure. */ | 1497 configure. */ |
1397 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT; | 1498 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT]; |
1398 #endif | 1499 #endif |
1399 /* Default to ARM6. */ | 1500 /* Default to ARM6. */ |
1400 if (selected_cpu == arm_none) | 1501 if (!arm_selected_cpu->name) |
1401 selected_cpu = arm6; | 1502 arm_selected_cpu = &all_cores[arm6]; |
1402 } | 1503 } |
1403 sel = &all_cores[selected_cpu]; | 1504 |
1404 | 1505 sel = arm_selected_cpu; |
1405 insn_flags = sel->flags; | 1506 insn_flags = sel->flags; |
1406 | 1507 |
1407 /* Now check to see if the user has specified some command line | 1508 /* Now check to see if the user has specified some command line |
1408 switch that require certain abilities from the cpu. */ | 1509 switch that require certain abilities from the cpu. */ |
1409 sought = 0; | 1510 sought = 0; |
1460 | 1561 |
1461 gcc_assert (best_fit); | 1562 gcc_assert (best_fit); |
1462 sel = best_fit; | 1563 sel = best_fit; |
1463 } | 1564 } |
1464 | 1565 |
1465 insn_flags = sel->flags; | 1566 arm_selected_cpu = sel; |
1466 } | 1567 } |
1467 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); | 1568 } |
1468 arm_default_cpu = (enum processor_type) (sel - all_cores); | 1569 |
1469 if (arm_tune == arm_none) | 1570 gcc_assert (arm_selected_cpu); |
1470 arm_tune = arm_default_cpu; | 1571 /* The selected cpu may be an architecture, so lookup tuning by core ID. */ |
1471 } | 1572 if (!arm_selected_tune) |
1472 | 1573 arm_selected_tune = &all_cores[arm_selected_cpu->core]; |
1473 /* The processor for which we should tune should now have been | 1574 |
1474 chosen. */ | 1575 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch); |
1475 gcc_assert (arm_tune != arm_none); | 1576 insn_flags = arm_selected_cpu->flags; |
1476 | 1577 |
1477 tune_flags = all_cores[(int)arm_tune].flags; | 1578 arm_tune = arm_selected_tune->core; |
1478 current_tune = all_cores[(int)arm_tune].tune; | 1579 tune_flags = arm_selected_tune->flags; |
1580 current_tune = arm_selected_tune->tune; | |
1479 | 1581 |
1480 if (target_fp16_format_name) | 1582 if (target_fp16_format_name) |
1481 { | 1583 { |
1482 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) | 1584 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) |
1483 { | 1585 { |
1536 } | 1638 } |
1537 | 1639 |
1538 /* Callee super interworking implies thumb interworking. Adding | 1640 /* Callee super interworking implies thumb interworking. Adding |
1539 this to the flags here simplifies the logic elsewhere. */ | 1641 this to the flags here simplifies the logic elsewhere. */ |
1540 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) | 1642 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) |
1541 target_flags |= MASK_INTERWORK; | 1643 target_flags |= MASK_INTERWORK; |
1542 | 1644 |
1543 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done | 1645 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done |
1544 from here where no function is being compiled currently. */ | 1646 from here where no function is being compiled currently. */ |
1545 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) | 1647 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) |
1546 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); | 1648 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); |
1547 | 1649 |
1548 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) | 1650 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) |
1549 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); | 1651 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); |
1550 | |
1551 if (TARGET_ARM && TARGET_CALLER_INTERWORKING) | |
1552 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb"); | |
1553 | 1652 |
1554 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) | 1653 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) |
1555 { | 1654 { |
1556 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); | 1655 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); |
1557 target_flags |= MASK_APCS_FRAME; | 1656 target_flags |= MASK_APCS_FRAME; |
1584 arm_arch5 = (insn_flags & FL_ARCH5) != 0; | 1683 arm_arch5 = (insn_flags & FL_ARCH5) != 0; |
1585 arm_arch5e = (insn_flags & FL_ARCH5E) != 0; | 1684 arm_arch5e = (insn_flags & FL_ARCH5E) != 0; |
1586 arm_arch6 = (insn_flags & FL_ARCH6) != 0; | 1685 arm_arch6 = (insn_flags & FL_ARCH6) != 0; |
1587 arm_arch6k = (insn_flags & FL_ARCH6K) != 0; | 1686 arm_arch6k = (insn_flags & FL_ARCH6K) != 0; |
1588 arm_arch_notm = (insn_flags & FL_NOTM) != 0; | 1687 arm_arch_notm = (insn_flags & FL_NOTM) != 0; |
1688 arm_arch7 = (insn_flags & FL_ARCH7) != 0; | |
1589 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; | 1689 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; |
1590 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; | 1690 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; |
1591 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; | 1691 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; |
1592 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; | 1692 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; |
1593 | 1693 |
1594 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; | 1694 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; |
1595 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; | 1695 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; |
1596 thumb_code = (TARGET_ARM == 0); | 1696 thumb_code = TARGET_ARM == 0; |
1697 thumb1_code = TARGET_THUMB1 != 0; | |
1597 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; | 1698 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; |
1598 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; | 1699 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; |
1599 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; | 1700 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; |
1600 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; | 1701 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; |
1601 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; | 1702 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; |
1856 } | 1957 } |
1857 | 1958 |
1858 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ | 1959 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ |
1859 if (fix_cm3_ldrd == 2) | 1960 if (fix_cm3_ldrd == 2) |
1860 { | 1961 { |
1861 if (selected_cpu == cortexm3) | 1962 if (arm_selected_cpu->core == cortexm3) |
1862 fix_cm3_ldrd = 1; | 1963 fix_cm3_ldrd = 1; |
1863 else | 1964 else |
1864 fix_cm3_ldrd = 0; | 1965 fix_cm3_ldrd = 0; |
1865 } | 1966 } |
1866 | 1967 |
1891 inform (input_location, | 1992 inform (input_location, |
1892 "-freorder-blocks-and-partition not supported on this architecture"); | 1993 "-freorder-blocks-and-partition not supported on this architecture"); |
1893 flag_reorder_blocks_and_partition = 0; | 1994 flag_reorder_blocks_and_partition = 0; |
1894 flag_reorder_blocks = 1; | 1995 flag_reorder_blocks = 1; |
1895 } | 1996 } |
1997 | |
1998 if (flag_pic) | |
1999 /* Hoisting PIC address calculations more aggressively provides a small, | |
2000 but measurable, size reduction for PIC code. Therefore, we decrease | |
2001 the bar for unrestricted expression hoisting to the cost of PIC address | |
2002 calculation, which is 2 instructions. */ | |
2003 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2, | |
2004 global_options.x_param_values, | |
2005 global_options_set.x_param_values); | |
2006 | |
2007 /* ARM EABI defaults to strict volatile bitfields. */ | |
2008 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0) | |
2009 flag_strict_volatile_bitfields = 1; | |
2010 | |
2011 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed | |
2012 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */ | |
2013 if (flag_prefetch_loop_arrays < 0 | |
2014 && HAVE_prefetch | |
2015 && optimize >= 3 | |
2016 && current_tune->num_prefetch_slots > 0) | |
2017 flag_prefetch_loop_arrays = 1; | |
2018 | |
2019 /* Set up parameters to be used in prefetching algorithm. Do not override the | |
2020 defaults unless we are tuning for a core we have researched values for. */ | |
2021 if (current_tune->num_prefetch_slots > 0) | |
2022 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, | |
2023 current_tune->num_prefetch_slots, | |
2024 global_options.x_param_values, | |
2025 global_options_set.x_param_values); | |
2026 if (current_tune->l1_cache_line_size >= 0) | |
2027 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, | |
2028 current_tune->l1_cache_line_size, | |
2029 global_options.x_param_values, | |
2030 global_options_set.x_param_values); | |
2031 if (current_tune->l1_cache_size >= 0) | |
2032 maybe_set_param_value (PARAM_L1_CACHE_SIZE, | |
2033 current_tune->l1_cache_size, | |
2034 global_options.x_param_values, | |
2035 global_options_set.x_param_values); | |
1896 | 2036 |
1897 /* Register global variables with the garbage collector. */ | 2037 /* Register global variables with the garbage collector. */ |
1898 arm_add_gc_roots (); | 2038 arm_add_gc_roots (); |
1899 } | 2039 } |
1900 | 2040 |
1980 register values that will never be needed again. This optimization | 2120 register values that will never be needed again. This optimization |
1981 was added to speed up context switching in a kernel application. */ | 2121 was added to speed up context switching in a kernel application. */ |
1982 if (optimize > 0 | 2122 if (optimize > 0 |
1983 && (TREE_NOTHROW (current_function_decl) | 2123 && (TREE_NOTHROW (current_function_decl) |
1984 || !(flag_unwind_tables | 2124 || !(flag_unwind_tables |
1985 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))) | 2125 || (flag_exceptions |
2126 && arm_except_unwind_info (&global_options) != UI_SJLJ))) | |
1986 && TREE_THIS_VOLATILE (current_function_decl)) | 2127 && TREE_THIS_VOLATILE (current_function_decl)) |
1987 type |= ARM_FT_VOLATILE; | 2128 type |= ARM_FT_VOLATILE; |
1988 | 2129 |
1989 if (cfun->static_chain_decl != NULL) | 2130 if (cfun->static_chain_decl != NULL) |
1990 type |= ARM_FT_NESTED; | 2131 type |= ARM_FT_NESTED; |
2280 } | 2421 } |
2281 else | 2422 else |
2282 { | 2423 { |
2283 HOST_WIDE_INT v; | 2424 HOST_WIDE_INT v; |
2284 | 2425 |
2285 /* Allow repeated pattern. */ | 2426 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */ |
2286 v = i & 0xff; | 2427 v = i & 0xff; |
2287 v |= v << 16; | 2428 v |= v << 16; |
2288 if (i == v || i == (v | (v << 8))) | 2429 if (i == v || i == (v | (v << 8))) |
2430 return TRUE; | |
2431 | |
2432 /* Allow repeated pattern 0xXY00XY00. */ | |
2433 v = i & 0xff00; | |
2434 v |= v << 16; | |
2435 if (i == v) | |
2289 return TRUE; | 2436 return TRUE; |
2290 } | 2437 } |
2291 | 2438 |
2292 return FALSE; | 2439 return FALSE; |
2293 } | 2440 } |
3204 /* Canonicalize a comparison so that we are more likely to recognize it. | 3351 /* Canonicalize a comparison so that we are more likely to recognize it. |
3205 This can be done for a few constant compares, where we can make the | 3352 This can be done for a few constant compares, where we can make the |
3206 immediate value easier to load. */ | 3353 immediate value easier to load. */ |
3207 | 3354 |
3208 enum rtx_code | 3355 enum rtx_code |
3209 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode, | 3356 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1) |
3210 rtx * op1) | 3357 { |
3211 { | 3358 enum machine_mode mode; |
3212 unsigned HOST_WIDE_INT i = INTVAL (*op1); | 3359 unsigned HOST_WIDE_INT i, maxval; |
3213 unsigned HOST_WIDE_INT maxval; | 3360 |
3361 mode = GET_MODE (*op0); | |
3362 if (mode == VOIDmode) | |
3363 mode = GET_MODE (*op1); | |
3364 | |
3214 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; | 3365 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; |
3366 | |
3367 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode | |
3368 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either | |
3369 reversed or (for constant OP1) adjusted to GE/LT. Similarly | |
3370 for GTU/LEU in Thumb mode. */ | |
3371 if (mode == DImode) | |
3372 { | |
3373 rtx tem; | |
3374 | |
3375 /* To keep things simple, always use the Cirrus cfcmp64 if it is | |
3376 available. */ | |
3377 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK) | |
3378 return code; | |
3379 | |
3380 if (code == GT || code == LE | |
3381 || (!TARGET_ARM && (code == GTU || code == LEU))) | |
3382 { | |
3383 /* Missing comparison. First try to use an available | |
3384 comparison. */ | |
3385 if (GET_CODE (*op1) == CONST_INT) | |
3386 { | |
3387 i = INTVAL (*op1); | |
3388 switch (code) | |
3389 { | |
3390 case GT: | |
3391 case LE: | |
3392 if (i != maxval | |
3393 && arm_const_double_by_immediates (GEN_INT (i + 1))) | |
3394 { | |
3395 *op1 = GEN_INT (i + 1); | |
3396 return code == GT ? GE : LT; | |
3397 } | |
3398 break; | |
3399 case GTU: | |
3400 case LEU: | |
3401 if (i != ~((unsigned HOST_WIDE_INT) 0) | |
3402 && arm_const_double_by_immediates (GEN_INT (i + 1))) | |
3403 { | |
3404 *op1 = GEN_INT (i + 1); | |
3405 return code == GTU ? GEU : LTU; | |
3406 } | |
3407 break; | |
3408 default: | |
3409 gcc_unreachable (); | |
3410 } | |
3411 } | |
3412 | |
3413 /* If that did not work, reverse the condition. */ | |
3414 tem = *op0; | |
3415 *op0 = *op1; | |
3416 *op1 = tem; | |
3417 return swap_condition (code); | |
3418 } | |
3419 | |
3420 return code; | |
3421 } | |
3422 | |
3423 /* Comparisons smaller than DImode. Only adjust comparisons against | |
3424 an out-of-range constant. */ | |
3425 if (GET_CODE (*op1) != CONST_INT | |
3426 || const_ok_for_arm (INTVAL (*op1)) | |
3427 || const_ok_for_arm (- INTVAL (*op1))) | |
3428 return code; | |
3429 | |
3430 i = INTVAL (*op1); | |
3215 | 3431 |
3216 switch (code) | 3432 switch (code) |
3217 { | 3433 { |
3218 case EQ: | 3434 case EQ: |
3219 case NE: | 3435 case NE: |
3487 | 3703 |
3488 /* Find the first field, ignoring non FIELD_DECL things which will | 3704 /* Find the first field, ignoring non FIELD_DECL things which will |
3489 have been created by C++. */ | 3705 have been created by C++. */ |
3490 for (field = TYPE_FIELDS (type); | 3706 for (field = TYPE_FIELDS (type); |
3491 field && TREE_CODE (field) != FIELD_DECL; | 3707 field && TREE_CODE (field) != FIELD_DECL; |
3492 field = TREE_CHAIN (field)) | 3708 field = DECL_CHAIN (field)) |
3493 continue; | 3709 continue; |
3494 | 3710 |
3495 if (field == NULL) | 3711 if (field == NULL) |
3496 return false; /* An empty structure. Allowed by an extension to ANSI C. */ | 3712 return false; /* An empty structure. Allowed by an extension to ANSI C. */ |
3497 | 3713 |
3506 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) | 3722 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) |
3507 return true; | 3723 return true; |
3508 | 3724 |
3509 /* Now check the remaining fields, if any. Only bitfields are allowed, | 3725 /* Now check the remaining fields, if any. Only bitfields are allowed, |
3510 since they are not addressable. */ | 3726 since they are not addressable. */ |
3511 for (field = TREE_CHAIN (field); | 3727 for (field = DECL_CHAIN (field); |
3512 field; | 3728 field; |
3513 field = TREE_CHAIN (field)) | 3729 field = DECL_CHAIN (field)) |
3514 { | 3730 { |
3515 if (TREE_CODE (field) != FIELD_DECL) | 3731 if (TREE_CODE (field) != FIELD_DECL) |
3516 continue; | 3732 continue; |
3517 | 3733 |
3518 if (!DECL_BIT_FIELD_TYPE (field)) | 3734 if (!DECL_BIT_FIELD_TYPE (field)) |
3528 | 3744 |
3529 /* Unions can be returned in registers if every element is | 3745 /* Unions can be returned in registers if every element is |
3530 integral, or can be returned in an integer register. */ | 3746 integral, or can be returned in an integer register. */ |
3531 for (field = TYPE_FIELDS (type); | 3747 for (field = TYPE_FIELDS (type); |
3532 field; | 3748 field; |
3533 field = TREE_CHAIN (field)) | 3749 field = DECL_CHAIN (field)) |
3534 { | 3750 { |
3535 if (TREE_CODE (field) != FIELD_DECL) | 3751 if (TREE_CODE (field) != FIELD_DECL) |
3536 continue; | 3752 continue; |
3537 | 3753 |
3538 if (FLOAT_TYPE_P (TREE_TYPE (field))) | 3754 if (FLOAT_TYPE_P (TREE_TYPE (field))) |
3634 if (TARGET_AAPCS_BASED) | 3850 if (TARGET_AAPCS_BASED) |
3635 { | 3851 { |
3636 /* Detect varargs functions. These always use the base rules | 3852 /* Detect varargs functions. These always use the base rules |
3637 (no argument is ever a candidate for a co-processor | 3853 (no argument is ever a candidate for a co-processor |
3638 register). */ | 3854 register). */ |
3639 bool base_rules = (TYPE_ARG_TYPES (type) != 0 | 3855 bool base_rules = stdarg_p (type); |
3640 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type))) | |
3641 != void_type_node)); | |
3642 | 3856 |
3643 if (user_convention) | 3857 if (user_convention) |
3644 { | 3858 { |
3645 if (user_pcs > ARM_PCS_AAPCS_LOCAL) | 3859 if (user_pcs > ARM_PCS_AAPCS_LOCAL) |
3646 sorry ("Non-AAPCS derived PCS variant"); | 3860 sorry ("non-AAPCS derived PCS variant"); |
3647 else if (base_rules && user_pcs != ARM_PCS_AAPCS) | 3861 else if (base_rules && user_pcs != ARM_PCS_AAPCS) |
3648 error ("Variadic functions must use the base AAPCS variant"); | 3862 error ("variadic functions must use the base AAPCS variant"); |
3649 } | 3863 } |
3650 | 3864 |
3651 if (base_rules) | 3865 if (base_rules) |
3652 return ARM_PCS_AAPCS; | 3866 return ARM_PCS_AAPCS; |
3653 else if (user_convention) | 3867 else if (user_convention) |
3788 | 4002 |
3789 /* Can't handle incomplete types. */ | 4003 /* Can't handle incomplete types. */ |
3790 if (!COMPLETE_TYPE_P(type)) | 4004 if (!COMPLETE_TYPE_P(type)) |
3791 return -1; | 4005 return -1; |
3792 | 4006 |
3793 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | 4007 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
3794 { | 4008 { |
3795 if (TREE_CODE (field) != FIELD_DECL) | 4009 if (TREE_CODE (field) != FIELD_DECL) |
3796 continue; | 4010 continue; |
3797 | 4011 |
3798 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); | 4012 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); |
3820 | 4034 |
3821 /* Can't handle incomplete types. */ | 4035 /* Can't handle incomplete types. */ |
3822 if (!COMPLETE_TYPE_P(type)) | 4036 if (!COMPLETE_TYPE_P(type)) |
3823 return -1; | 4037 return -1; |
3824 | 4038 |
3825 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) | 4039 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) |
3826 { | 4040 { |
3827 if (TREE_CODE (field) != FIELD_DECL) | 4041 if (TREE_CODE (field) != FIELD_DECL) |
3828 continue; | 4042 continue; |
3829 | 4043 |
3830 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); | 4044 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); |
3852 /* Return true if PCS_VARIANT should use VFP registers. */ | 4066 /* Return true if PCS_VARIANT should use VFP registers. */ |
3853 static bool | 4067 static bool |
3854 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) | 4068 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) |
3855 { | 4069 { |
3856 if (pcs_variant == ARM_PCS_AAPCS_VFP) | 4070 if (pcs_variant == ARM_PCS_AAPCS_VFP) |
3857 return true; | 4071 { |
4072 static bool seen_thumb1_vfp = false; | |
4073 | |
4074 if (TARGET_THUMB1 && !seen_thumb1_vfp) | |
4075 { | |
4076 sorry ("Thumb-1 hard-float VFP ABI"); | |
4077 /* sorry() is not immediately fatal, so only display this once. */ | |
4078 seen_thumb1_vfp = true; | |
4079 } | |
4080 | |
4081 return true; | |
4082 } | |
3858 | 4083 |
3859 if (pcs_variant != ARM_PCS_AAPCS_LOCAL) | 4084 if (pcs_variant != ARM_PCS_AAPCS_LOCAL) |
3860 return false; | 4085 return false; |
3861 | 4086 |
3862 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && | 4087 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && |
4084 | 4309 |
4085 #undef AAPCS_CP | 4310 #undef AAPCS_CP |
4086 | 4311 |
4087 static int | 4312 static int |
4088 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, | 4313 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
4089 tree type) | 4314 const_tree type) |
4090 { | 4315 { |
4091 int i; | 4316 int i; |
4092 | 4317 |
4093 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) | 4318 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) |
4094 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) | 4319 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) |
4196 | 4421 |
4197 /* Lay out a function argument using the AAPCS rules. The rule | 4422 /* Lay out a function argument using the AAPCS rules. The rule |
4198 numbers referred to here are those in the AAPCS. */ | 4423 numbers referred to here are those in the AAPCS. */ |
4199 static void | 4424 static void |
4200 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, | 4425 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
4201 tree type, int named) | 4426 const_tree type, bool named) |
4202 { | 4427 { |
4203 int nregs, nregs2; | 4428 int nregs, nregs2; |
4204 int ncrn; | 4429 int ncrn; |
4205 | 4430 |
4206 /* We only need to do this once per argument. */ | 4431 /* We only need to do this once per argument. */ |
4360 } | 4585 } |
4361 } | 4586 } |
4362 | 4587 |
4363 | 4588 |
4364 /* Return true if mode/type need doubleword alignment. */ | 4589 /* Return true if mode/type need doubleword alignment. */ |
4365 bool | 4590 static bool |
4366 arm_needs_doubleword_align (enum machine_mode mode, tree type) | 4591 arm_needs_doubleword_align (enum machine_mode mode, const_tree type) |
4367 { | 4592 { |
4368 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY | 4593 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY |
4369 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); | 4594 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); |
4370 } | 4595 } |
4371 | 4596 |
4379 This is null for libcalls where that information may | 4604 This is null for libcalls where that information may |
4380 not be available. | 4605 not be available. |
4381 CUM is a variable of type CUMULATIVE_ARGS which gives info about | 4606 CUM is a variable of type CUMULATIVE_ARGS which gives info about |
4382 the preceding args and about the function being called. | 4607 the preceding args and about the function being called. |
4383 NAMED is nonzero if this argument is a named parameter | 4608 NAMED is nonzero if this argument is a named parameter |
4384 (otherwise it is an extra parameter matching an ellipsis). */ | 4609 (otherwise it is an extra parameter matching an ellipsis). |
4385 | 4610 |
4386 rtx | 4611 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all |
4612 other arguments are passed on the stack. If (NAMED == 0) (which happens | |
4613 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is | |
4614 defined), say it is passed in the stack (function_prologue will | |
4615 indeed make it pass in the stack if necessary). */ | |
4616 | |
4617 static rtx | |
4387 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, | 4618 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
4388 tree type, int named) | 4619 const_tree type, bool named) |
4389 { | 4620 { |
4390 int nregs; | 4621 int nregs; |
4391 | 4622 |
4392 /* Handle the special case quickly. Pick an arbitrary value for op2 of | 4623 /* Handle the special case quickly. Pick an arbitrary value for op2 of |
4393 a call insn (op3 of a call_value insn). */ | 4624 a call insn (op3 of a call_value insn). */ |
4418 /* Put doubleword aligned quantities in even register pairs. */ | 4649 /* Put doubleword aligned quantities in even register pairs. */ |
4419 if (pcum->nregs & 1 | 4650 if (pcum->nregs & 1 |
4420 && ARM_DOUBLEWORD_ALIGN | 4651 && ARM_DOUBLEWORD_ALIGN |
4421 && arm_needs_doubleword_align (mode, type)) | 4652 && arm_needs_doubleword_align (mode, type)) |
4422 pcum->nregs++; | 4653 pcum->nregs++; |
4423 | |
4424 if (mode == VOIDmode) | |
4425 /* Pick an arbitrary value for operand 2 of the call insn. */ | |
4426 return const0_rtx; | |
4427 | 4654 |
4428 /* Only allow splitting an arg between regs and memory if all preceding | 4655 /* Only allow splitting an arg between regs and memory if all preceding |
4429 args were allocated to regs. For args passed by reference we only count | 4656 args were allocated to regs. For args passed by reference we only count |
4430 the reference pointer. */ | 4657 the reference pointer. */ |
4431 if (pcum->can_split) | 4658 if (pcum->can_split) |
4437 return NULL_RTX; | 4664 return NULL_RTX; |
4438 | 4665 |
4439 return gen_rtx_REG (mode, pcum->nregs); | 4666 return gen_rtx_REG (mode, pcum->nregs); |
4440 } | 4667 } |
4441 | 4668 |
4669 static unsigned int | |
4670 arm_function_arg_boundary (enum machine_mode mode, const_tree type) | |
4671 { | |
4672 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type) | |
4673 ? DOUBLEWORD_ALIGNMENT | |
4674 : PARM_BOUNDARY); | |
4675 } | |
4676 | |
4442 static int | 4677 static int |
4443 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, | 4678 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
4444 tree type, bool named) | 4679 tree type, bool named) |
4445 { | 4680 { |
4446 int nregs = pcum->nregs; | 4681 int nregs = pcum->nregs; |
4460 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; | 4695 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; |
4461 | 4696 |
4462 return 0; | 4697 return 0; |
4463 } | 4698 } |
4464 | 4699 |
4465 void | 4700 /* Update the data in PCUM to advance over an argument |
4701 of mode MODE and data type TYPE. | |
4702 (TYPE is null for libcalls where that information may not be available.) */ | |
4703 | |
4704 static void | |
4466 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, | 4705 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, |
4467 tree type, bool named) | 4706 const_tree type, bool named) |
4468 { | 4707 { |
4469 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) | 4708 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) |
4470 { | 4709 { |
4471 aapcs_layout_arg (pcum, mode, type, named); | 4710 aapcs_layout_arg (pcum, mode, type, named); |
4472 | 4711 |
4884 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) | 5123 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) |
4885 crtl->uses_pic_offset_table = 1; | 5124 crtl->uses_pic_offset_table = 1; |
4886 } | 5125 } |
4887 else | 5126 else |
4888 { | 5127 { |
4889 rtx seq; | 5128 rtx seq, insn; |
4890 | 5129 |
4891 if (!cfun->machine->pic_reg) | 5130 if (!cfun->machine->pic_reg) |
4892 cfun->machine->pic_reg = gen_reg_rtx (Pmode); | 5131 cfun->machine->pic_reg = gen_reg_rtx (Pmode); |
4893 | 5132 |
4894 /* Play games to avoid marking the function as needing pic | 5133 /* Play games to avoid marking the function as needing pic |
4901 | 5140 |
4902 arm_load_pic_register (0UL); | 5141 arm_load_pic_register (0UL); |
4903 | 5142 |
4904 seq = get_insns (); | 5143 seq = get_insns (); |
4905 end_sequence (); | 5144 end_sequence (); |
5145 | |
5146 for (insn = seq; insn; insn = NEXT_INSN (insn)) | |
5147 if (INSN_P (insn)) | |
5148 INSN_LOCATOR (insn) = prologue_locator; | |
5149 | |
4906 /* We can be called during expansion of PHI nodes, where | 5150 /* We can be called during expansion of PHI nodes, where |
4907 we can't yet emit instructions directly in the final | 5151 we can't yet emit instructions directly in the final |
4908 insn stream. Queue the insns on the entry edge, they will | 5152 insn stream. Queue the insns on the entry edge, they will |
4909 be committed after everything else is expanded. */ | 5153 be committed after everything else is expanded. */ |
4910 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); | 5154 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); |
4917 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) | 5161 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) |
4918 { | 5162 { |
4919 if (GET_CODE (orig) == SYMBOL_REF | 5163 if (GET_CODE (orig) == SYMBOL_REF |
4920 || GET_CODE (orig) == LABEL_REF) | 5164 || GET_CODE (orig) == LABEL_REF) |
4921 { | 5165 { |
4922 rtx pic_ref, address; | |
4923 rtx insn; | 5166 rtx insn; |
4924 | 5167 |
4925 if (reg == 0) | 5168 if (reg == 0) |
4926 { | 5169 { |
4927 gcc_assert (can_create_pseudo_p ()); | 5170 gcc_assert (can_create_pseudo_p ()); |
4928 reg = gen_reg_rtx (Pmode); | 5171 reg = gen_reg_rtx (Pmode); |
4929 address = gen_reg_rtx (Pmode); | 5172 } |
4930 } | |
4931 else | |
4932 address = reg; | |
4933 | 5173 |
4934 /* VxWorks does not impose a fixed gap between segments; the run-time | 5174 /* VxWorks does not impose a fixed gap between segments; the run-time |
4935 gap can be different from the object-file gap. We therefore can't | 5175 gap can be different from the object-file gap. We therefore can't |
4936 use GOTOFF unless we are absolutely sure that the symbol is in the | 5176 use GOTOFF unless we are absolutely sure that the symbol is in the |
4937 same segment as the GOT. Unfortunately, the flexibility of linker | 5177 same segment as the GOT. Unfortunately, the flexibility of linker |
4943 && NEED_GOT_RELOC | 5183 && NEED_GOT_RELOC |
4944 && !TARGET_VXWORKS_RTP) | 5184 && !TARGET_VXWORKS_RTP) |
4945 insn = arm_pic_static_addr (orig, reg); | 5185 insn = arm_pic_static_addr (orig, reg); |
4946 else | 5186 else |
4947 { | 5187 { |
5188 rtx pat; | |
5189 rtx mem; | |
5190 | |
4948 /* If this function doesn't have a pic register, create one now. */ | 5191 /* If this function doesn't have a pic register, create one now. */ |
4949 require_pic_register (); | 5192 require_pic_register (); |
4950 | 5193 |
4951 if (TARGET_32BIT) | 5194 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig); |
4952 emit_insn (gen_pic_load_addr_32bit (address, orig)); | 5195 |
4953 else /* TARGET_THUMB1 */ | 5196 /* Make the MEM as close to a constant as possible. */ |
4954 emit_insn (gen_pic_load_addr_thumb1 (address, orig)); | 5197 mem = SET_SRC (pat); |
4955 | 5198 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); |
4956 pic_ref = gen_const_mem (Pmode, | 5199 MEM_READONLY_P (mem) = 1; |
4957 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, | 5200 MEM_NOTRAP_P (mem) = 1; |
4958 address)); | 5201 |
4959 insn = emit_move_insn (reg, pic_ref); | 5202 insn = emit_insn (pat); |
4960 } | 5203 } |
4961 | 5204 |
4962 /* Put a REG_EQUAL note on this insn, so that it can be optimized | 5205 /* Put a REG_EQUAL note on this insn, so that it can be optimized |
4963 by loop. */ | 5206 by loop. */ |
4964 set_unique_reg_note (insn, REG_EQUAL, orig); | 5207 set_unique_reg_note (insn, REG_EQUAL, orig); |
5234 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); | 5477 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); |
5235 | 5478 |
5236 return FALSE; | 5479 return FALSE; |
5237 } | 5480 } |
5238 | 5481 |
5482 /* Return true if X will surely end up in an index register after next | |
5483 splitting pass. */ | |
5484 static bool | |
5485 will_be_in_index_register (const_rtx x) | |
5486 { | |
5487 /* arm.md: calculate_pic_address will split this into a register. */ | |
5488 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM; | |
5489 } | |
5490 | |
5239 /* Return nonzero if X is a valid ARM state address operand. */ | 5491 /* Return nonzero if X is a valid ARM state address operand. */ |
5240 int | 5492 int |
5241 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, | 5493 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, |
5242 int strict_p) | 5494 int strict_p) |
5243 { | 5495 { |
5291 { | 5543 { |
5292 rtx xop0 = XEXP (x, 0); | 5544 rtx xop0 = XEXP (x, 0); |
5293 rtx xop1 = XEXP (x, 1); | 5545 rtx xop1 = XEXP (x, 1); |
5294 | 5546 |
5295 return ((arm_address_register_rtx_p (xop0, strict_p) | 5547 return ((arm_address_register_rtx_p (xop0, strict_p) |
5296 && GET_CODE(xop1) == CONST_INT | 5548 && ((GET_CODE(xop1) == CONST_INT |
5297 && arm_legitimate_index_p (mode, xop1, outer, strict_p)) | 5549 && arm_legitimate_index_p (mode, xop1, outer, strict_p)) |
5550 || (!strict_p && will_be_in_index_register (xop1)))) | |
5298 || (arm_address_register_rtx_p (xop1, strict_p) | 5551 || (arm_address_register_rtx_p (xop1, strict_p) |
5299 && arm_legitimate_index_p (mode, xop0, outer, strict_p))); | 5552 && arm_legitimate_index_p (mode, xop0, outer, strict_p))); |
5300 } | 5553 } |
5301 | 5554 |
5302 #if 0 | 5555 #if 0 |
5378 { | 5631 { |
5379 rtx xop0 = XEXP (x, 0); | 5632 rtx xop0 = XEXP (x, 0); |
5380 rtx xop1 = XEXP (x, 1); | 5633 rtx xop1 = XEXP (x, 1); |
5381 | 5634 |
5382 return ((arm_address_register_rtx_p (xop0, strict_p) | 5635 return ((arm_address_register_rtx_p (xop0, strict_p) |
5383 && thumb2_legitimate_index_p (mode, xop1, strict_p)) | 5636 && (thumb2_legitimate_index_p (mode, xop1, strict_p) |
5637 || (!strict_p && will_be_in_index_register (xop1)))) | |
5384 || (arm_address_register_rtx_p (xop1, strict_p) | 5638 || (arm_address_register_rtx_p (xop1, strict_p) |
5385 && thumb2_legitimate_index_p (mode, xop0, strict_p))); | 5639 && thumb2_legitimate_index_p (mode, xop0, strict_p))); |
5386 } | 5640 } |
5387 | 5641 |
5388 else if (GET_MODE_CLASS (mode) != MODE_FLOAT | 5642 else if (GET_MODE_CLASS (mode) != MODE_FLOAT |
5405 HOST_WIDE_INT range; | 5659 HOST_WIDE_INT range; |
5406 enum rtx_code code = GET_CODE (index); | 5660 enum rtx_code code = GET_CODE (index); |
5407 | 5661 |
5408 /* Standard coprocessor addressing modes. */ | 5662 /* Standard coprocessor addressing modes. */ |
5409 if (TARGET_HARD_FLOAT | 5663 if (TARGET_HARD_FLOAT |
5410 && (TARGET_FPA || TARGET_MAVERICK) | 5664 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK) |
5411 && (GET_MODE_CLASS (mode) == MODE_FLOAT | 5665 && (mode == SFmode || mode == DFmode |
5412 || (TARGET_MAVERICK && mode == DImode))) | 5666 || (TARGET_MAVERICK && mode == DImode))) |
5413 return (code == CONST_INT && INTVAL (index) < 1024 | 5667 return (code == CONST_INT && INTVAL (index) < 1024 |
5414 && INTVAL (index) > -1024 | 5668 && INTVAL (index) > -1024 |
5415 && (INTVAL (index) & 3) == 0); | 5669 && (INTVAL (index) & 3) == 0); |
5416 | 5670 |
5417 if (TARGET_NEON | 5671 /* For quad modes, we restrict the constant offset to be slightly less |
5418 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))) | 5672 than what the instruction format permits. We do this because for |
5673 quad mode moves, we will actually decompose them into two separate | |
5674 double-mode reads or writes. INDEX must therefore be a valid | |
5675 (double-mode) offset and so should INDEX+8. */ | |
5676 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) | |
5419 return (code == CONST_INT | 5677 return (code == CONST_INT |
5420 && INTVAL (index) < 1016 | 5678 && INTVAL (index) < 1016 |
5679 && INTVAL (index) > -1024 | |
5680 && (INTVAL (index) & 3) == 0); | |
5681 | |
5682 /* We have no such constraint on double mode offsets, so we permit the | |
5683 full range of the instruction format. */ | |
5684 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) | |
5685 return (code == CONST_INT | |
5686 && INTVAL (index) < 1024 | |
5421 && INTVAL (index) > -1024 | 5687 && INTVAL (index) > -1024 |
5422 && (INTVAL (index) & 3) == 0); | 5688 && (INTVAL (index) & 3) == 0); |
5423 | 5689 |
5424 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) | 5690 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) |
5425 return (code == CONST_INT | 5691 return (code == CONST_INT |
5514 enum rtx_code code = GET_CODE (index); | 5780 enum rtx_code code = GET_CODE (index); |
5515 | 5781 |
5516 /* ??? Combine arm and thumb2 coprocessor addressing modes. */ | 5782 /* ??? Combine arm and thumb2 coprocessor addressing modes. */ |
5517 /* Standard coprocessor addressing modes. */ | 5783 /* Standard coprocessor addressing modes. */ |
5518 if (TARGET_HARD_FLOAT | 5784 if (TARGET_HARD_FLOAT |
5519 && (TARGET_FPA || TARGET_MAVERICK) | 5785 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK) |
5520 && (GET_MODE_CLASS (mode) == MODE_FLOAT | 5786 && (mode == SFmode || mode == DFmode |
5521 || (TARGET_MAVERICK && mode == DImode))) | 5787 || (TARGET_MAVERICK && mode == DImode))) |
5522 return (code == CONST_INT && INTVAL (index) < 1024 | 5788 return (code == CONST_INT && INTVAL (index) < 1024 |
5523 && INTVAL (index) > -1024 | 5789 /* Thumb-2 allows only > -256 index range for it's core register |
5790 load/stores. Since we allow SF/DF in core registers, we have | |
5791 to use the intersection between -256~4096 (core) and -1024~1024 | |
5792 (coprocessor). */ | |
5793 && INTVAL (index) > -256 | |
5524 && (INTVAL (index) & 3) == 0); | 5794 && (INTVAL (index) & 3) == 0); |
5525 | 5795 |
5526 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) | 5796 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) |
5527 { | 5797 { |
5528 /* For DImode assume values will usually live in core regs | 5798 /* For DImode assume values will usually live in core regs |
5532 && INTVAL (index) < 1024 | 5802 && INTVAL (index) < 1024 |
5533 && INTVAL (index) > -1024 | 5803 && INTVAL (index) > -1024 |
5534 && (INTVAL (index) & 3) == 0); | 5804 && (INTVAL (index) & 3) == 0); |
5535 } | 5805 } |
5536 | 5806 |
5537 if (TARGET_NEON | 5807 /* For quad modes, we restrict the constant offset to be slightly less |
5538 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))) | 5808 than what the instruction format permits. We do this because for |
5809 quad mode moves, we will actually decompose them into two separate | |
5810 double-mode reads or writes. INDEX must therefore be a valid | |
5811 (double-mode) offset and so should INDEX+8. */ | |
5812 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) | |
5539 return (code == CONST_INT | 5813 return (code == CONST_INT |
5540 && INTVAL (index) < 1016 | 5814 && INTVAL (index) < 1016 |
5815 && INTVAL (index) > -1024 | |
5816 && (INTVAL (index) & 3) == 0); | |
5817 | |
5818 /* We have no such constraint on double mode offsets, so we permit the | |
5819 full range of the instruction format. */ | |
5820 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) | |
5821 return (code == CONST_INT | |
5822 && INTVAL (index) < 1024 | |
5541 && INTVAL (index) > -1024 | 5823 && INTVAL (index) > -1024 |
5542 && (INTVAL (index) & 3) == 0); | 5824 && (INTVAL (index) & 3) == 0); |
5543 | 5825 |
5544 if (arm_address_register_rtx_p (index, strict_p) | 5826 if (arm_address_register_rtx_p (index, strict_p) |
5545 && (GET_MODE_SIZE (mode) <= 4)) | 5827 && (GET_MODE_SIZE (mode) <= 4)) |
5681 permits SP+OFFSET. */ | 5963 permits SP+OFFSET. */ |
5682 if (GET_MODE_SIZE (mode) <= 4 | 5964 if (GET_MODE_SIZE (mode) <= 4 |
5683 && XEXP (x, 0) != frame_pointer_rtx | 5965 && XEXP (x, 0) != frame_pointer_rtx |
5684 && XEXP (x, 1) != frame_pointer_rtx | 5966 && XEXP (x, 1) != frame_pointer_rtx |
5685 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) | 5967 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) |
5686 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) | 5968 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) |
5969 || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) | |
5687 return 1; | 5970 return 1; |
5688 | 5971 |
5689 /* REG+const has 5-7 bit offset for non-SP registers. */ | 5972 /* REG+const has 5-7 bit offset for non-SP registers. */ |
5690 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) | 5973 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) |
5691 || XEXP (x, 0) == arg_pointer_rtx) | 5974 || XEXP (x, 0) == arg_pointer_rtx) |
5708 | 5991 |
5709 else if (GET_CODE (XEXP (x, 0)) == REG | 5992 else if (GET_CODE (XEXP (x, 0)) == REG |
5710 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM | 5993 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM |
5711 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM | 5994 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM |
5712 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER | 5995 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER |
5713 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER)) | 5996 && REGNO (XEXP (x, 0)) |
5997 <= LAST_VIRTUAL_POINTER_REGISTER)) | |
5714 && GET_MODE_SIZE (mode) >= 4 | 5998 && GET_MODE_SIZE (mode) >= 4 |
5715 && GET_CODE (XEXP (x, 1)) == CONST_INT | 5999 && GET_CODE (XEXP (x, 1)) == CONST_INT |
5716 && (INTVAL (XEXP (x, 1)) & 3) == 0) | 6000 && (INTVAL (XEXP (x, 1)) & 3) == 0) |
5717 return 1; | 6001 return 1; |
5718 } | 6002 } |
6106 } | 6390 } |
6107 | 6391 |
6108 return x; | 6392 return x; |
6109 } | 6393 } |
6110 | 6394 |
6395 bool | |
6396 arm_legitimize_reload_address (rtx *p, | |
6397 enum machine_mode mode, | |
6398 int opnum, int type, | |
6399 int ind_levels ATTRIBUTE_UNUSED) | |
6400 { | |
6401 if (GET_CODE (*p) == PLUS | |
6402 && GET_CODE (XEXP (*p, 0)) == REG | |
6403 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0))) | |
6404 && GET_CODE (XEXP (*p, 1)) == CONST_INT) | |
6405 { | |
6406 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1)); | |
6407 HOST_WIDE_INT low, high; | |
6408 | |
6409 if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT)) | |
6410 low = ((val & 0xf) ^ 0x8) - 0x8; | |
6411 else if (TARGET_MAVERICK && TARGET_HARD_FLOAT) | |
6412 /* Need to be careful, -256 is not a valid offset. */ | |
6413 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); | |
6414 else if (mode == SImode | |
6415 || (mode == SFmode && TARGET_SOFT_FLOAT) | |
6416 || ((mode == HImode || mode == QImode) && ! arm_arch4)) | |
6417 /* Need to be careful, -4096 is not a valid offset. */ | |
6418 low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff); | |
6419 else if ((mode == HImode || mode == QImode) && arm_arch4) | |
6420 /* Need to be careful, -256 is not a valid offset. */ | |
6421 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff); | |
6422 else if (GET_MODE_CLASS (mode) == MODE_FLOAT | |
6423 && TARGET_HARD_FLOAT && TARGET_FPA) | |
6424 /* Need to be careful, -1024 is not a valid offset. */ | |
6425 low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff); | |
6426 else | |
6427 return false; | |
6428 | |
6429 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff) | |
6430 ^ (unsigned HOST_WIDE_INT) 0x80000000) | |
6431 - (unsigned HOST_WIDE_INT) 0x80000000); | |
6432 /* Check for overflow or zero */ | |
6433 if (low == 0 || high == 0 || (high + low != val)) | |
6434 return false; | |
6435 | |
6436 /* Reload the high part into a base reg; leave the low part | |
6437 in the mem. */ | |
6438 *p = gen_rtx_PLUS (GET_MODE (*p), | |
6439 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0), | |
6440 GEN_INT (high)), | |
6441 GEN_INT (low)); | |
6442 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL, | |
6443 MODE_BASE_REG_CLASS (mode), GET_MODE (*p), | |
6444 VOIDmode, 0, 0, opnum, (enum reload_type) type); | |
6445 return true; | |
6446 } | |
6447 | |
6448 return false; | |
6449 } | |
6450 | |
6111 rtx | 6451 rtx |
6112 thumb_legitimize_reload_address (rtx *x_p, | 6452 thumb_legitimize_reload_address (rtx *x_p, |
6113 enum machine_mode mode, | 6453 enum machine_mode mode, |
6114 int opnum, int type, | 6454 int opnum, int type, |
6115 int ind_levels ATTRIBUTE_UNUSED) | 6455 int ind_levels ATTRIBUTE_UNUSED) |
6217 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG)) | 6557 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG)) |
6218 | 6558 |
6219 #define REG_OR_SUBREG_RTX(X) \ | 6559 #define REG_OR_SUBREG_RTX(X) \ |
6220 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X)) | 6560 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X)) |
6221 | 6561 |
6222 #ifndef COSTS_N_INSNS | |
6223 #define COSTS_N_INSNS(N) ((N) * 4 - 2) | |
6224 #endif | |
6225 static inline int | 6562 static inline int |
6226 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) | 6563 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) |
6227 { | 6564 { |
6228 enum machine_mode mode = GET_MODE (x); | 6565 enum machine_mode mode = GET_MODE (x); |
6566 int total; | |
6229 | 6567 |
6230 switch (code) | 6568 switch (code) |
6231 { | 6569 { |
6232 case ASHIFT: | 6570 case ASHIFT: |
6233 case ASHIFTRT: | 6571 case ASHIFTRT: |
6322 /* XXX a guess. */ | 6660 /* XXX a guess. */ |
6323 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) | 6661 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) |
6324 return 14; | 6662 return 14; |
6325 return 2; | 6663 return 2; |
6326 | 6664 |
6665 case SIGN_EXTEND: | |
6327 case ZERO_EXTEND: | 6666 case ZERO_EXTEND: |
6328 /* XXX still guessing. */ | 6667 total = mode == DImode ? COSTS_N_INSNS (1) : 0; |
6329 switch (GET_MODE (XEXP (x, 0))) | 6668 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code); |
6330 { | 6669 |
6331 case QImode: | 6670 if (mode == SImode) |
6332 return (1 + (mode == DImode ? 4 : 0) | 6671 return total; |
6333 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); | 6672 |
6334 | 6673 if (arm_arch6) |
6335 case HImode: | 6674 return total + COSTS_N_INSNS (1); |
6336 return (4 + (mode == DImode ? 4 : 0) | 6675 |
6337 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); | 6676 /* Assume a two-shift sequence. Increase the cost slightly so |
6338 | 6677 we prefer actual shifts over an extend operation. */ |
6339 case SImode: | 6678 return total + 1 + COSTS_N_INSNS (2); |
6340 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); | |
6341 | |
6342 default: | |
6343 return 99; | |
6344 } | |
6345 | 6679 |
6346 default: | 6680 default: |
6347 return 99; | 6681 return 99; |
6348 } | 6682 } |
6349 } | 6683 } |
6408 ++*total; | 6742 ++*total; |
6409 | 6743 |
6410 return true; | 6744 return true; |
6411 | 6745 |
6412 case MINUS: | 6746 case MINUS: |
6413 if (TARGET_THUMB2) | |
6414 { | |
6415 if (GET_MODE_CLASS (mode) == MODE_FLOAT) | |
6416 { | |
6417 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) | |
6418 *total = COSTS_N_INSNS (1); | |
6419 else | |
6420 *total = COSTS_N_INSNS (20); | |
6421 } | |
6422 else | |
6423 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); | |
6424 /* Thumb2 does not have RSB, so all arguments must be | |
6425 registers (subtracting a constant is canonicalized as | |
6426 addition of the negated constant). */ | |
6427 return false; | |
6428 } | |
6429 | |
6430 if (mode == DImode) | 6747 if (mode == DImode) |
6431 { | 6748 { |
6432 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); | 6749 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
6433 if (GET_CODE (XEXP (x, 0)) == CONST_INT | 6750 if (GET_CODE (XEXP (x, 0)) == CONST_INT |
6434 && const_ok_for_arm (INTVAL (XEXP (x, 0)))) | 6751 && const_ok_for_arm (INTVAL (XEXP (x, 0)))) |
6582 /* Normally the frame registers will be spilt into reg+const during | 6899 /* Normally the frame registers will be spilt into reg+const during |
6583 reload, so it is a bad idea to combine them with other instructions, | 6900 reload, so it is a bad idea to combine them with other instructions, |
6584 since then they might not be moved outside of loops. As a compromise | 6901 since then they might not be moved outside of loops. As a compromise |
6585 we allow integration with ops that have a constant as their second | 6902 we allow integration with ops that have a constant as their second |
6586 operand. */ | 6903 operand. */ |
6587 if ((REG_OR_SUBREG_REG (XEXP (x, 0)) | 6904 if (REG_OR_SUBREG_REG (XEXP (x, 0)) |
6588 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) | 6905 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) |
6589 && GET_CODE (XEXP (x, 1)) != CONST_INT) | 6906 && GET_CODE (XEXP (x, 1)) != CONST_INT) |
6590 || (REG_OR_SUBREG_REG (XEXP (x, 0)) | 6907 *total = COSTS_N_INSNS (1); |
6591 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))))) | |
6592 *total = 4; | |
6593 | 6908 |
6594 if (mode == DImode) | 6909 if (mode == DImode) |
6595 { | 6910 { |
6596 *total += COSTS_N_INSNS (2); | 6911 *total += COSTS_N_INSNS (2); |
6597 if (GET_CODE (XEXP (x, 1)) == CONST_INT | 6912 if (GET_CODE (XEXP (x, 1)) == CONST_INT |
6825 if (mode == DImode) | 7140 if (mode == DImode) |
6826 *total += COSTS_N_INSNS (3); | 7141 *total += COSTS_N_INSNS (3); |
6827 return false; | 7142 return false; |
6828 | 7143 |
6829 case SIGN_EXTEND: | 7144 case SIGN_EXTEND: |
6830 if (GET_MODE_CLASS (mode) == MODE_INT) | |
6831 { | |
6832 *total = 0; | |
6833 if (mode == DImode) | |
6834 *total += COSTS_N_INSNS (1); | |
6835 | |
6836 if (GET_MODE (XEXP (x, 0)) != SImode) | |
6837 { | |
6838 if (arm_arch6) | |
6839 { | |
6840 if (GET_CODE (XEXP (x, 0)) != MEM) | |
6841 *total += COSTS_N_INSNS (1); | |
6842 } | |
6843 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) | |
6844 *total += COSTS_N_INSNS (2); | |
6845 } | |
6846 | |
6847 return false; | |
6848 } | |
6849 | |
6850 /* Fall through */ | |
6851 case ZERO_EXTEND: | 7145 case ZERO_EXTEND: |
6852 *total = 0; | 7146 *total = 0; |
6853 if (GET_MODE_CLASS (mode) == MODE_INT) | 7147 if (GET_MODE_CLASS (mode) == MODE_INT) |
6854 { | 7148 { |
7149 rtx op = XEXP (x, 0); | |
7150 enum machine_mode opmode = GET_MODE (op); | |
7151 | |
6855 if (mode == DImode) | 7152 if (mode == DImode) |
6856 *total += COSTS_N_INSNS (1); | 7153 *total += COSTS_N_INSNS (1); |
6857 | 7154 |
6858 if (GET_MODE (XEXP (x, 0)) != SImode) | 7155 if (opmode != SImode) |
6859 { | 7156 { |
6860 if (arm_arch6) | 7157 if (MEM_P (op)) |
6861 { | 7158 { |
6862 if (GET_CODE (XEXP (x, 0)) != MEM) | 7159 /* If !arm_arch4, we use one of the extendhisi2_mem |
6863 *total += COSTS_N_INSNS (1); | 7160 or movhi_bytes patterns for HImode. For a QImode |
7161 sign extension, we first zero-extend from memory | |
7162 and then perform a shift sequence. */ | |
7163 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND)) | |
7164 *total += COSTS_N_INSNS (2); | |
6864 } | 7165 } |
6865 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) | 7166 else if (arm_arch6) |
6866 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ? | 7167 *total += COSTS_N_INSNS (1); |
6867 1 : 2); | 7168 |
7169 /* We don't have the necessary insn, so we need to perform some | |
7170 other operation. */ | |
7171 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode) | |
7172 /* An and with constant 255. */ | |
7173 *total += COSTS_N_INSNS (1); | |
7174 else | |
7175 /* A shift sequence. Increase costs slightly to avoid | |
7176 combining two shifts into an extend operation. */ | |
7177 *total += COSTS_N_INSNS (2) + 1; | |
6868 } | 7178 } |
6869 | 7179 |
6870 return false; | 7180 return false; |
6871 } | 7181 } |
6872 | 7182 |
6967 | 7277 |
6968 case CONST_INT: | 7278 case CONST_INT: |
6969 if (outer == SET) | 7279 if (outer == SET) |
6970 { | 7280 { |
6971 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) | 7281 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) |
6972 return 0; | 7282 return COSTS_N_INSNS (1); |
7283 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ | |
7284 if (INTVAL (x) >= -255 && INTVAL (x) <= -1) | |
7285 return COSTS_N_INSNS (2); | |
7286 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ | |
6973 if (thumb_shiftable_const (INTVAL (x))) | 7287 if (thumb_shiftable_const (INTVAL (x))) |
6974 return COSTS_N_INSNS (2); | 7288 return COSTS_N_INSNS (2); |
6975 return COSTS_N_INSNS (3); | 7289 return COSTS_N_INSNS (3); |
6976 } | 7290 } |
6977 else if ((outer == PLUS || outer == COMPARE) | 7291 else if ((outer == PLUS || outer == COMPARE) |
7071 case MEM: | 7385 case MEM: |
7072 /* A memory access costs 1 insn if the mode is small, or the address is | 7386 /* A memory access costs 1 insn if the mode is small, or the address is |
7073 a single register, otherwise it costs one insn per word. */ | 7387 a single register, otherwise it costs one insn per word. */ |
7074 if (REG_P (XEXP (x, 0))) | 7388 if (REG_P (XEXP (x, 0))) |
7075 *total = COSTS_N_INSNS (1); | 7389 *total = COSTS_N_INSNS (1); |
7390 else if (flag_pic | |
7391 && GET_CODE (XEXP (x, 0)) == PLUS | |
7392 && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) | |
7393 /* This will be split into two instructions. | |
7394 See arm.md:calculate_pic_address. */ | |
7395 *total = COSTS_N_INSNS (2); | |
7076 else | 7396 else |
7077 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); | 7397 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); |
7078 return true; | 7398 return true; |
7079 | 7399 |
7080 case DIV: | 7400 case DIV: |
7218 else | 7538 else |
7219 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); | 7539 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); |
7220 return false; | 7540 return false; |
7221 | 7541 |
7222 case SIGN_EXTEND: | 7542 case SIGN_EXTEND: |
7223 *total = 0; | |
7224 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4) | |
7225 { | |
7226 if (!(arm_arch4 && MEM_P (XEXP (x, 0)))) | |
7227 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2); | |
7228 } | |
7229 if (mode == DImode) | |
7230 *total += COSTS_N_INSNS (1); | |
7231 return false; | |
7232 | |
7233 case ZERO_EXTEND: | 7543 case ZERO_EXTEND: |
7234 *total = 0; | 7544 return arm_rtx_costs_1 (x, outer_code, total, 0); |
7235 if (!(arm_arch4 && MEM_P (XEXP (x, 0)))) | |
7236 { | |
7237 switch (GET_MODE (XEXP (x, 0))) | |
7238 { | |
7239 case QImode: | |
7240 *total += COSTS_N_INSNS (1); | |
7241 break; | |
7242 | |
7243 case HImode: | |
7244 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2); | |
7245 | |
7246 case SImode: | |
7247 break; | |
7248 | |
7249 default: | |
7250 *total += COSTS_N_INSNS (2); | |
7251 } | |
7252 } | |
7253 | |
7254 if (mode == DImode) | |
7255 *total += COSTS_N_INSNS (1); | |
7256 | |
7257 return false; | |
7258 | 7545 |
7259 case CONST_INT: | 7546 case CONST_INT: |
7260 if (const_ok_for_arm (INTVAL (x))) | 7547 if (const_ok_for_arm (INTVAL (x))) |
7261 /* A multiplication by a constant requires another instruction | 7548 /* A multiplication by a constant requires another instruction |
7262 to load the constant to a register. */ | 7549 to load the constant to a register. */ |
7658 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) | 7945 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) |
7659 { | 7946 { |
7660 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); | 7947 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); |
7661 } | 7948 } |
7662 | 7949 |
7663 static int | 7950 /* Adjust cost hook for XScale. */ |
7664 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) | 7951 static bool |
7665 { | 7952 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) |
7666 rtx i_pat, d_pat; | 7953 { |
7667 | |
7668 /* Some true dependencies can have a higher cost depending | 7954 /* Some true dependencies can have a higher cost depending |
7669 on precisely how certain input operands are used. */ | 7955 on precisely how certain input operands are used. */ |
7670 if (arm_tune_xscale | 7956 if (REG_NOTE_KIND(link) == 0 |
7671 && REG_NOTE_KIND (link) == 0 | |
7672 && recog_memoized (insn) >= 0 | 7957 && recog_memoized (insn) >= 0 |
7673 && recog_memoized (dep) >= 0) | 7958 && recog_memoized (dep) >= 0) |
7674 { | 7959 { |
7675 int shift_opnum = get_attr_shift (insn); | 7960 int shift_opnum = get_attr_shift (insn); |
7676 enum attr_type attr_type = get_attr_type (dep); | 7961 enum attr_type attr_type = get_attr_type (dep); |
7700 if (recog_data.operand_type[opno] == OP_IN) | 7985 if (recog_data.operand_type[opno] == OP_IN) |
7701 continue; | 7986 continue; |
7702 | 7987 |
7703 if (reg_overlap_mentioned_p (recog_data.operand[opno], | 7988 if (reg_overlap_mentioned_p (recog_data.operand[opno], |
7704 shifted_operand)) | 7989 shifted_operand)) |
7705 return 2; | 7990 { |
7991 *cost = 2; | |
7992 return false; | |
7993 } | |
7706 } | 7994 } |
7707 } | 7995 } |
7996 } | |
7997 return true; | |
7998 } | |
7999 | |
8000 /* Adjust cost hook for Cortex A9. */ | |
8001 static bool | |
8002 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) | |
8003 { | |
8004 switch (REG_NOTE_KIND (link)) | |
8005 { | |
8006 case REG_DEP_ANTI: | |
8007 *cost = 0; | |
8008 return false; | |
8009 | |
8010 case REG_DEP_TRUE: | |
8011 case REG_DEP_OUTPUT: | |
8012 if (recog_memoized (insn) >= 0 | |
8013 && recog_memoized (dep) >= 0) | |
8014 { | |
8015 if (GET_CODE (PATTERN (insn)) == SET) | |
8016 { | |
8017 if (GET_MODE_CLASS | |
8018 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT | |
8019 || GET_MODE_CLASS | |
8020 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT) | |
8021 { | |
8022 enum attr_type attr_type_insn = get_attr_type (insn); | |
8023 enum attr_type attr_type_dep = get_attr_type (dep); | |
8024 | |
8025 /* By default all dependencies of the form | |
8026 s0 = s0 <op> s1 | |
8027 s0 = s0 <op> s2 | |
8028 have an extra latency of 1 cycle because | |
8029 of the input and output dependency in this | |
8030 case. However this gets modeled as an true | |
8031 dependency and hence all these checks. */ | |
8032 if (REG_P (SET_DEST (PATTERN (insn))) | |
8033 && REG_P (SET_DEST (PATTERN (dep))) | |
8034 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)), | |
8035 SET_DEST (PATTERN (dep)))) | |
8036 { | |
8037 /* FMACS is a special case where the dependant | |
8038 instruction can be issued 3 cycles before | |
8039 the normal latency in case of an output | |
8040 dependency. */ | |
8041 if ((attr_type_insn == TYPE_FMACS | |
8042 || attr_type_insn == TYPE_FMACD) | |
8043 && (attr_type_dep == TYPE_FMACS | |
8044 || attr_type_dep == TYPE_FMACD)) | |
8045 { | |
8046 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) | |
8047 *cost = insn_default_latency (dep) - 3; | |
8048 else | |
8049 *cost = insn_default_latency (dep); | |
8050 return false; | |
8051 } | |
8052 else | |
8053 { | |
8054 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT) | |
8055 *cost = insn_default_latency (dep) + 1; | |
8056 else | |
8057 *cost = insn_default_latency (dep); | |
8058 } | |
8059 return false; | |
8060 } | |
8061 } | |
8062 } | |
8063 } | |
8064 break; | |
8065 | |
8066 default: | |
8067 gcc_unreachable (); | |
8068 } | |
8069 | |
8070 return true; | |
8071 } | |
8072 | |
8073 /* Adjust cost hook for FA726TE. */ | |
8074 static bool | |
8075 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost) | |
8076 { | |
8077 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated) | |
8078 have penalty of 3. */ | |
8079 if (REG_NOTE_KIND (link) == REG_DEP_TRUE | |
8080 && recog_memoized (insn) >= 0 | |
8081 && recog_memoized (dep) >= 0 | |
8082 && get_attr_conds (dep) == CONDS_SET) | |
8083 { | |
8084 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */ | |
8085 if (get_attr_conds (insn) == CONDS_USE | |
8086 && get_attr_type (insn) != TYPE_BRANCH) | |
8087 { | |
8088 *cost = 3; | |
8089 return false; | |
8090 } | |
8091 | |
8092 if (GET_CODE (PATTERN (insn)) == COND_EXEC | |
8093 || get_attr_conds (insn) == CONDS_USE) | |
8094 { | |
8095 *cost = 0; | |
8096 return false; | |
8097 } | |
8098 } | |
8099 | |
8100 return true; | |
8101 } | |
8102 | |
8103 /* This function implements the target macro TARGET_SCHED_ADJUST_COST. | |
8104 It corrects the value of COST based on the relationship between | |
8105 INSN and DEP through the dependence LINK. It returns the new | |
8106 value. There is a per-core adjust_cost hook to adjust scheduler costs | |
8107 and the per-core hook can choose to completely override the generic | |
8108 adjust_cost function. Only put bits of code into arm_adjust_cost that | |
8109 are common across all cores. */ | |
8110 static int | |
8111 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) | |
8112 { | |
8113 rtx i_pat, d_pat; | |
8114 | |
8115 /* When generating Thumb-1 code, we want to place flag-setting operations | |
8116 close to a conditional branch which depends on them, so that we can | |
8117 omit the comparison. */ | |
8118 if (TARGET_THUMB1 | |
8119 && REG_NOTE_KIND (link) == 0 | |
8120 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn | |
8121 && recog_memoized (dep) >= 0 | |
8122 && get_attr_conds (dep) == CONDS_SET) | |
8123 return 0; | |
8124 | |
8125 if (current_tune->sched_adjust_cost != NULL) | |
8126 { | |
8127 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost)) | |
8128 return cost; | |
7708 } | 8129 } |
7709 | 8130 |
7710 /* XXX This is not strictly true for the FPA. */ | 8131 /* XXX This is not strictly true for the FPA. */ |
7711 if (REG_NOTE_KIND (link) == REG_DEP_ANTI | 8132 if (REG_NOTE_KIND (link) == REG_DEP_ANTI |
7712 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) | 8133 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) |
7726 /* This is a load after a store, there is no conflict if the load reads | 8147 /* This is a load after a store, there is no conflict if the load reads |
7727 from a cached area. Assume that loads from the stack, and from the | 8148 from a cached area. Assume that loads from the stack, and from the |
7728 constant pool are cached, and that others will miss. This is a | 8149 constant pool are cached, and that others will miss. This is a |
7729 hack. */ | 8150 hack. */ |
7730 | 8151 |
7731 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem)) | 8152 if ((GET_CODE (src_mem) == SYMBOL_REF |
8153 && CONSTANT_POOL_ADDRESS_P (src_mem)) | |
7732 || reg_mentioned_p (stack_pointer_rtx, src_mem) | 8154 || reg_mentioned_p (stack_pointer_rtx, src_mem) |
7733 || reg_mentioned_p (frame_pointer_rtx, src_mem) | 8155 || reg_mentioned_p (frame_pointer_rtx, src_mem) |
7734 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) | 8156 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) |
7735 return 1; | 8157 return 1; |
7736 } | 8158 } |
8277 /* We can load this constant by using VDUP and a constant in a | 8699 /* We can load this constant by using VDUP and a constant in a |
8278 single ARM register. This will be cheaper than a vector | 8700 single ARM register. This will be cheaper than a vector |
8279 load. */ | 8701 load. */ |
8280 | 8702 |
8281 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); | 8703 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); |
8282 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x), | 8704 return gen_rtx_VEC_DUPLICATE (mode, x); |
8283 UNSPEC_VDUP_N); | |
8284 } | 8705 } |
8285 | 8706 |
8286 /* Generate code to load VALS, which is a PARALLEL containing only | 8707 /* Generate code to load VALS, which is a PARALLEL containing only |
8287 constants (for vec_init) or CONST_VECTOR, efficiently into a | 8708 constants (for vec_init) or CONST_VECTOR, efficiently into a |
8288 register. Returns an RTX to copy into the register, or NULL_RTX | 8709 register. Returns an RTX to copy into the register, or NULL_RTX |
8374 /* Splat a single non-constant element if we can. */ | 8795 /* Splat a single non-constant element if we can. */ |
8375 if (all_same && GET_MODE_SIZE (inner_mode) <= 4) | 8796 if (all_same && GET_MODE_SIZE (inner_mode) <= 4) |
8376 { | 8797 { |
8377 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); | 8798 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); |
8378 emit_insn (gen_rtx_SET (VOIDmode, target, | 8799 emit_insn (gen_rtx_SET (VOIDmode, target, |
8379 gen_rtx_UNSPEC (mode, gen_rtvec (1, x), | 8800 gen_rtx_VEC_DUPLICATE (mode, x))); |
8380 UNSPEC_VDUP_N))); | |
8381 return; | 8801 return; |
8382 } | 8802 } |
8383 | 8803 |
8384 /* One field is non-constant. Load constant then overwrite varying | 8804 /* One field is non-constant. Load constant then overwrite varying |
8385 field. This is more efficient than using the stack. */ | 8805 field. This is more efficient than using the stack. */ |
8386 if (n_var == 1) | 8806 if (n_var == 1) |
8387 { | 8807 { |
8388 rtx copy = copy_rtx (vals); | 8808 rtx copy = copy_rtx (vals); |
8389 rtvec ops; | 8809 rtx index = GEN_INT (one_var); |
8390 | 8810 |
8391 /* Load constant part of vector, substitute neighboring value for | 8811 /* Load constant part of vector, substitute neighboring value for |
8392 varying element. */ | 8812 varying element. */ |
8393 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); | 8813 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); |
8394 neon_expand_vector_init (target, copy); | 8814 neon_expand_vector_init (target, copy); |
8395 | 8815 |
8396 /* Insert variable. */ | 8816 /* Insert variable. */ |
8397 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); | 8817 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); |
8398 ops = gen_rtvec (3, x, target, GEN_INT (one_var)); | 8818 switch (mode) |
8399 emit_insn (gen_rtx_SET (VOIDmode, target, | 8819 { |
8400 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE))); | 8820 case V8QImode: |
8821 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index)); | |
8822 break; | |
8823 case V16QImode: | |
8824 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index)); | |
8825 break; | |
8826 case V4HImode: | |
8827 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index)); | |
8828 break; | |
8829 case V8HImode: | |
8830 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index)); | |
8831 break; | |
8832 case V2SImode: | |
8833 emit_insn (gen_neon_vset_lanev2si (target, x, target, index)); | |
8834 break; | |
8835 case V4SImode: | |
8836 emit_insn (gen_neon_vset_lanev4si (target, x, target, index)); | |
8837 break; | |
8838 case V2SFmode: | |
8839 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index)); | |
8840 break; | |
8841 case V4SFmode: | |
8842 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index)); | |
8843 break; | |
8844 case V2DImode: | |
8845 emit_insn (gen_neon_vset_lanev2di (target, x, target, index)); | |
8846 break; | |
8847 default: | |
8848 gcc_unreachable (); | |
8849 } | |
8401 return; | 8850 return; |
8402 } | 8851 } |
8403 | 8852 |
8404 /* Construct the vector in memory one field at a time | 8853 /* Construct the vector in memory one field at a time |
8405 and load the whole vector. */ | 8854 and load the whole vector. */ |
8603 /* Match: (mem (reg)). */ | 9052 /* Match: (mem (reg)). */ |
8604 if (GET_CODE (ind) == REG) | 9053 if (GET_CODE (ind) == REG) |
8605 return arm_address_register_rtx_p (ind, 0); | 9054 return arm_address_register_rtx_p (ind, 0); |
8606 | 9055 |
8607 /* Allow post-increment with Neon registers. */ | 9056 /* Allow post-increment with Neon registers. */ |
8608 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC)) | 9057 if ((type != 1 && GET_CODE (ind) == POST_INC) |
9058 || (type == 0 && GET_CODE (ind) == PRE_DEC)) | |
8609 return arm_address_register_rtx_p (XEXP (ind, 0), 0); | 9059 return arm_address_register_rtx_p (XEXP (ind, 0), 0); |
8610 | 9060 |
8611 /* FIXME: vld1 allows register post-modify. */ | 9061 /* FIXME: vld1 allows register post-modify. */ |
8612 | 9062 |
8613 /* Match: | 9063 /* Match: |
9111 only be true for the ARM8, ARM9 and StrongARM. If this ever | 9561 only be true for the ARM8, ARM9 and StrongARM. If this ever |
9112 changes, then the test below needs to be reworked. */ | 9562 changes, then the test below needs to be reworked. */ |
9113 if (nops == 2 && arm_ld_sched && add_offset != 0) | 9563 if (nops == 2 && arm_ld_sched && add_offset != 0) |
9114 return false; | 9564 return false; |
9115 | 9565 |
9566 /* XScale has load-store double instructions, but they have stricter | |
9567 alignment requirements than load-store multiple, so we cannot | |
9568 use them. | |
9569 | |
9570 For XScale ldm requires 2 + NREGS cycles to complete and blocks | |
9571 the pipeline until completion. | |
9572 | |
9573 NREGS CYCLES | |
9574 1 3 | |
9575 2 4 | |
9576 3 5 | |
9577 4 6 | |
9578 | |
9579 An ldr instruction takes 1-3 cycles, but does not block the | |
9580 pipeline. | |
9581 | |
9582 NREGS CYCLES | |
9583 1 1-3 | |
9584 2 2-6 | |
9585 3 3-9 | |
9586 4 4-12 | |
9587 | |
9588 Best case ldr will always win. However, the more ldr instructions | |
9589 we issue, the less likely we are to be able to schedule them well. | |
9590 Using ldr instructions also increases code size. | |
9591 | |
9592 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm | |
9593 for counts of 3 or 4 regs. */ | |
9594 if (nops <= 2 && arm_tune_xscale && !optimize_size) | |
9595 return false; | |
9116 return true; | 9596 return true; |
9117 } | 9597 } |
9118 | 9598 |
9119 /* Subroutine of load_multiple_sequence and store_multiple_sequence. | 9599 /* Subroutine of load_multiple_sequence and store_multiple_sequence. |
9120 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute | 9600 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute |
9154 return false; | 9634 return false; |
9155 } | 9635 } |
9156 return true; | 9636 return true; |
9157 } | 9637 } |
9158 | 9638 |
9159 int | 9639 /* Used to determine in a peephole whether a sequence of load |
9160 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base, | 9640 instructions can be changed into a load-multiple instruction. |
9161 HOST_WIDE_INT *load_offset) | 9641 NOPS is the number of separate load instructions we are examining. The |
9642 first NOPS entries in OPERANDS are the destination registers, the | |
9643 next NOPS entries are memory operands. If this function is | |
9644 successful, *BASE is set to the common base register of the memory | |
9645 accesses; *LOAD_OFFSET is set to the first memory location's offset | |
9646 from that base register. | |
9647 REGS is an array filled in with the destination register numbers. | |
9648 SAVED_ORDER (if nonnull), is an array filled in with an order that maps | |
9649 insn numbers to to an ascending order of stores. If CHECK_REGS is true, | |
9650 the sequence of registers in REGS matches the loads from ascending memory | |
9651 locations, and the function verifies that the register numbers are | |
9652 themselves ascending. If CHECK_REGS is false, the register numbers | |
9653 are stored in the order they are found in the operands. */ | |
9654 static int | |
9655 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order, | |
9656 int *base, HOST_WIDE_INT *load_offset, bool check_regs) | |
9162 { | 9657 { |
9163 int unsorted_regs[MAX_LDM_STM_OPS]; | 9658 int unsorted_regs[MAX_LDM_STM_OPS]; |
9164 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; | 9659 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; |
9165 int order[MAX_LDM_STM_OPS]; | 9660 int order[MAX_LDM_STM_OPS]; |
9661 rtx base_reg_rtx = NULL; | |
9166 int base_reg = -1; | 9662 int base_reg = -1; |
9167 int i, ldm_case; | 9663 int i, ldm_case; |
9168 | 9664 |
9169 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be | 9665 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be |
9170 easily extended if required. */ | 9666 easily extended if required. */ |
9204 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) | 9700 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) |
9205 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) | 9701 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) |
9206 == CONST_INT))) | 9702 == CONST_INT))) |
9207 { | 9703 { |
9208 if (i == 0) | 9704 if (i == 0) |
9209 base_reg = REGNO (reg); | |
9210 else | |
9211 { | 9705 { |
9212 if (base_reg != (int) REGNO (reg)) | 9706 base_reg = REGNO (reg); |
9213 /* Not addressed from the same base register. */ | 9707 base_reg_rtx = reg; |
9708 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) | |
9214 return 0; | 9709 return 0; |
9215 } | 9710 } |
9711 else if (base_reg != (int) REGNO (reg)) | |
9712 /* Not addressed from the same base register. */ | |
9713 return 0; | |
9714 | |
9216 unsorted_regs[i] = (GET_CODE (operands[i]) == REG | 9715 unsorted_regs[i] = (GET_CODE (operands[i]) == REG |
9217 ? REGNO (operands[i]) | 9716 ? REGNO (operands[i]) |
9218 : REGNO (SUBREG_REG (operands[i]))); | 9717 : REGNO (SUBREG_REG (operands[i]))); |
9219 | 9718 |
9220 /* If it isn't an integer register, or if it overwrites the | 9719 /* If it isn't an integer register, or if it overwrites the |
9221 base register but isn't the last insn in the list, then | 9720 base register but isn't the last insn in the list, then |
9222 we can't do this. */ | 9721 we can't do this. */ |
9223 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14 | 9722 if (unsorted_regs[i] < 0 |
9723 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) | |
9724 || unsorted_regs[i] > 14 | |
9224 || (i != nops - 1 && unsorted_regs[i] == base_reg)) | 9725 || (i != nops - 1 && unsorted_regs[i] == base_reg)) |
9225 return 0; | 9726 return 0; |
9226 | 9727 |
9227 unsorted_offsets[i] = INTVAL (offset); | 9728 unsorted_offsets[i] = INTVAL (offset); |
9228 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) | 9729 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) |
9236 /* All the useful information has now been extracted from the | 9737 /* All the useful information has now been extracted from the |
9237 operands into unsorted_regs and unsorted_offsets; additionally, | 9738 operands into unsorted_regs and unsorted_offsets; additionally, |
9238 order[0] has been set to the lowest offset in the list. Sort | 9739 order[0] has been set to the lowest offset in the list. Sort |
9239 the offsets into order, verifying that they are adjacent, and | 9740 the offsets into order, verifying that they are adjacent, and |
9240 check that the register numbers are ascending. */ | 9741 check that the register numbers are ascending. */ |
9241 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs)) | 9742 if (!compute_offset_order (nops, unsorted_offsets, order, |
9743 check_regs ? unsorted_regs : NULL)) | |
9242 return 0; | 9744 return 0; |
9243 | 9745 |
9746 if (saved_order) | |
9747 memcpy (saved_order, order, sizeof order); | |
9748 | |
9244 if (base) | 9749 if (base) |
9245 { | 9750 { |
9246 *base = base_reg; | 9751 *base = base_reg; |
9247 | 9752 |
9248 for (i = 0; i < nops; i++) | 9753 for (i = 0; i < nops; i++) |
9249 regs[i] = unsorted_regs[order[i]]; | 9754 regs[i] = unsorted_regs[check_regs ? order[i] : i]; |
9250 | 9755 |
9251 *load_offset = unsorted_offsets[order[0]]; | 9756 *load_offset = unsorted_offsets[order[0]]; |
9252 } | 9757 } |
9758 | |
9759 if (TARGET_THUMB1 | |
9760 && !peep2_reg_dead_p (nops, base_reg_rtx)) | |
9761 return 0; | |
9253 | 9762 |
9254 if (unsorted_offsets[order[0]] == 0) | 9763 if (unsorted_offsets[order[0]] == 0) |
9255 ldm_case = 1; /* ldmia */ | 9764 ldm_case = 1; /* ldmia */ |
9256 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) | 9765 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) |
9257 ldm_case = 2; /* ldmib */ | 9766 ldm_case = 2; /* ldmib */ |
9258 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) | 9767 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) |
9259 ldm_case = 3; /* ldmda */ | 9768 ldm_case = 3; /* ldmda */ |
9260 else if (unsorted_offsets[order[nops - 1]] == -4) | 9769 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) |
9261 ldm_case = 4; /* ldmdb */ | 9770 ldm_case = 4; /* ldmdb */ |
9262 else if (const_ok_for_arm (unsorted_offsets[order[0]]) | 9771 else if (const_ok_for_arm (unsorted_offsets[order[0]]) |
9263 || const_ok_for_arm (-unsorted_offsets[order[0]])) | 9772 || const_ok_for_arm (-unsorted_offsets[order[0]])) |
9264 ldm_case = 5; | 9773 ldm_case = 5; |
9265 else | 9774 else |
9271 return 0; | 9780 return 0; |
9272 | 9781 |
9273 return ldm_case; | 9782 return ldm_case; |
9274 } | 9783 } |
9275 | 9784 |
9276 const char * | 9785 /* Used to determine in a peephole whether a sequence of store instructions can |
9277 emit_ldm_seq (rtx *operands, int nops) | 9786 be changed into a store-multiple instruction. |
9278 { | 9787 NOPS is the number of separate store instructions we are examining. |
9279 int regs[MAX_LDM_STM_OPS]; | 9788 NOPS_TOTAL is the total number of instructions recognized by the peephole |
9280 int base_reg; | 9789 pattern. |
9281 HOST_WIDE_INT offset; | 9790 The first NOPS entries in OPERANDS are the source registers, the next |
9282 char buf[100]; | 9791 NOPS entries are memory operands. If this function is successful, *BASE is |
9283 int i; | 9792 set to the common base register of the memory accesses; *LOAD_OFFSET is set |
9284 | 9793 to the first memory location's offset from that base register. REGS is an |
9285 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset)) | 9794 array filled in with the source register numbers, REG_RTXS (if nonnull) is |
9286 { | 9795 likewise filled with the corresponding rtx's. |
9287 case 1: | 9796 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn |
9288 strcpy (buf, "ldm%(ia%)\t"); | 9797 numbers to to an ascending order of stores. |
9289 break; | 9798 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores |
9290 | 9799 from ascending memory locations, and the function verifies that the register |
9291 case 2: | 9800 numbers are themselves ascending. If CHECK_REGS is false, the register |
9292 strcpy (buf, "ldm%(ib%)\t"); | 9801 numbers are stored in the order they are found in the operands. */ |
9293 break; | 9802 static int |
9294 | 9803 store_multiple_sequence (rtx *operands, int nops, int nops_total, |
9295 case 3: | 9804 int *regs, rtx *reg_rtxs, int *saved_order, int *base, |
9296 strcpy (buf, "ldm%(da%)\t"); | 9805 HOST_WIDE_INT *load_offset, bool check_regs) |
9297 break; | |
9298 | |
9299 case 4: | |
9300 strcpy (buf, "ldm%(db%)\t"); | |
9301 break; | |
9302 | |
9303 case 5: | |
9304 if (offset >= 0) | |
9305 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX, | |
9306 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg], | |
9307 (long) offset); | |
9308 else | |
9309 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX, | |
9310 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg], | |
9311 (long) -offset); | |
9312 output_asm_insn (buf, operands); | |
9313 base_reg = regs[0]; | |
9314 strcpy (buf, "ldm%(ia%)\t"); | |
9315 break; | |
9316 | |
9317 default: | |
9318 gcc_unreachable (); | |
9319 } | |
9320 | |
9321 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX, | |
9322 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]); | |
9323 | |
9324 for (i = 1; i < nops; i++) | |
9325 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX, | |
9326 reg_names[regs[i]]); | |
9327 | |
9328 strcat (buf, "}\t%@ phole ldm"); | |
9329 | |
9330 output_asm_insn (buf, operands); | |
9331 return ""; | |
9332 } | |
9333 | |
9334 int | |
9335 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base, | |
9336 HOST_WIDE_INT * load_offset) | |
9337 { | 9806 { |
9338 int unsorted_regs[MAX_LDM_STM_OPS]; | 9807 int unsorted_regs[MAX_LDM_STM_OPS]; |
9808 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS]; | |
9339 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; | 9809 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; |
9340 int order[MAX_LDM_STM_OPS]; | 9810 int order[MAX_LDM_STM_OPS]; |
9341 int base_reg = -1; | 9811 int base_reg = -1; |
9812 rtx base_reg_rtx = NULL; | |
9342 int i, stm_case; | 9813 int i, stm_case; |
9343 | 9814 |
9344 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be | 9815 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be |
9345 easily extended if required. */ | 9816 easily extended if required. */ |
9346 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); | 9817 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); |
9378 || (GET_CODE (reg) == SUBREG | 9849 || (GET_CODE (reg) == SUBREG |
9379 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) | 9850 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) |
9380 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) | 9851 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) |
9381 == CONST_INT))) | 9852 == CONST_INT))) |
9382 { | 9853 { |
9383 unsorted_regs[i] = (GET_CODE (operands[i]) == REG | 9854 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG |
9384 ? REGNO (operands[i]) | 9855 ? operands[i] : SUBREG_REG (operands[i])); |
9385 : REGNO (SUBREG_REG (operands[i]))); | 9856 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]); |
9857 | |
9386 if (i == 0) | 9858 if (i == 0) |
9387 base_reg = REGNO (reg); | 9859 { |
9860 base_reg = REGNO (reg); | |
9861 base_reg_rtx = reg; | |
9862 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM) | |
9863 return 0; | |
9864 } | |
9388 else if (base_reg != (int) REGNO (reg)) | 9865 else if (base_reg != (int) REGNO (reg)) |
9389 /* Not addressed from the same base register. */ | 9866 /* Not addressed from the same base register. */ |
9390 return 0; | 9867 return 0; |
9391 | 9868 |
9392 /* If it isn't an integer register, then we can't do this. */ | 9869 /* If it isn't an integer register, then we can't do this. */ |
9393 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14) | 9870 if (unsorted_regs[i] < 0 |
9871 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM) | |
9872 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg) | |
9873 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM) | |
9874 || unsorted_regs[i] > 14) | |
9394 return 0; | 9875 return 0; |
9395 | 9876 |
9396 unsorted_offsets[i] = INTVAL (offset); | 9877 unsorted_offsets[i] = INTVAL (offset); |
9397 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) | 9878 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) |
9398 order[0] = i; | 9879 order[0] = i; |
9405 /* All the useful information has now been extracted from the | 9886 /* All the useful information has now been extracted from the |
9406 operands into unsorted_regs and unsorted_offsets; additionally, | 9887 operands into unsorted_regs and unsorted_offsets; additionally, |
9407 order[0] has been set to the lowest offset in the list. Sort | 9888 order[0] has been set to the lowest offset in the list. Sort |
9408 the offsets into order, verifying that they are adjacent, and | 9889 the offsets into order, verifying that they are adjacent, and |
9409 check that the register numbers are ascending. */ | 9890 check that the register numbers are ascending. */ |
9410 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs)) | 9891 if (!compute_offset_order (nops, unsorted_offsets, order, |
9892 check_regs ? unsorted_regs : NULL)) | |
9411 return 0; | 9893 return 0; |
9412 | 9894 |
9895 if (saved_order) | |
9896 memcpy (saved_order, order, sizeof order); | |
9897 | |
9413 if (base) | 9898 if (base) |
9414 { | 9899 { |
9415 *base = base_reg; | 9900 *base = base_reg; |
9416 | 9901 |
9417 for (i = 0; i < nops; i++) | 9902 for (i = 0; i < nops; i++) |
9418 regs[i] = unsorted_regs[order[i]]; | 9903 { |
9904 regs[i] = unsorted_regs[check_regs ? order[i] : i]; | |
9905 if (reg_rtxs) | |
9906 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i]; | |
9907 } | |
9419 | 9908 |
9420 *load_offset = unsorted_offsets[order[0]]; | 9909 *load_offset = unsorted_offsets[order[0]]; |
9421 } | 9910 } |
9911 | |
9912 if (TARGET_THUMB1 | |
9913 && !peep2_reg_dead_p (nops_total, base_reg_rtx)) | |
9914 return 0; | |
9422 | 9915 |
9423 if (unsorted_offsets[order[0]] == 0) | 9916 if (unsorted_offsets[order[0]] == 0) |
9424 stm_case = 1; /* stmia */ | 9917 stm_case = 1; /* stmia */ |
9425 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) | 9918 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) |
9426 stm_case = 2; /* stmib */ | 9919 stm_case = 2; /* stmib */ |
9427 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) | 9920 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) |
9428 stm_case = 3; /* stmda */ | 9921 stm_case = 3; /* stmda */ |
9429 else if (unsorted_offsets[order[nops - 1]] == -4) | 9922 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4) |
9430 stm_case = 4; /* stmdb */ | 9923 stm_case = 4; /* stmdb */ |
9431 else | 9924 else |
9432 return 0; | 9925 return 0; |
9433 | 9926 |
9434 if (!multiple_operation_profitable_p (false, nops, 0)) | 9927 if (!multiple_operation_profitable_p (false, nops, 0)) |
9435 return 0; | 9928 return 0; |
9436 | 9929 |
9437 return stm_case; | 9930 return stm_case; |
9438 } | 9931 } |
9439 | |
9440 const char * | |
9441 emit_stm_seq (rtx *operands, int nops) | |
9442 { | |
9443 int regs[MAX_LDM_STM_OPS]; | |
9444 int base_reg; | |
9445 HOST_WIDE_INT offset; | |
9446 char buf[100]; | |
9447 int i; | |
9448 | |
9449 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset)) | |
9450 { | |
9451 case 1: | |
9452 strcpy (buf, "stm%(ia%)\t"); | |
9453 break; | |
9454 | |
9455 case 2: | |
9456 strcpy (buf, "stm%(ib%)\t"); | |
9457 break; | |
9458 | |
9459 case 3: | |
9460 strcpy (buf, "stm%(da%)\t"); | |
9461 break; | |
9462 | |
9463 case 4: | |
9464 strcpy (buf, "stm%(db%)\t"); | |
9465 break; | |
9466 | |
9467 default: | |
9468 gcc_unreachable (); | |
9469 } | |
9470 | |
9471 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX, | |
9472 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]); | |
9473 | |
9474 for (i = 1; i < nops; i++) | |
9475 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX, | |
9476 reg_names[regs[i]]); | |
9477 | |
9478 strcat (buf, "}\t%@ phole stm"); | |
9479 | |
9480 output_asm_insn (buf, operands); | |
9481 return ""; | |
9482 } | |
9483 | 9932 |
9484 /* Routines for use in generating RTL. */ | 9933 /* Routines for use in generating RTL. */ |
9485 | 9934 |
9486 rtx | 9935 /* Generate a load-multiple instruction. COUNT is the number of loads in |
9487 arm_gen_load_multiple (int base_regno, int count, rtx from, int up, | 9936 the instruction; REGS and MEMS are arrays containing the operands. |
9488 int write_back, rtx basemem, HOST_WIDE_INT *offsetp) | 9937 BASEREG is the base register to be used in addressing the memory operands. |
9489 { | 9938 WBACK_OFFSET is nonzero if the instruction should update the base |
9490 HOST_WIDE_INT offset = *offsetp; | 9939 register. */ |
9940 | |
9941 static rtx | |
9942 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, | |
9943 HOST_WIDE_INT wback_offset) | |
9944 { | |
9491 int i = 0, j; | 9945 int i = 0, j; |
9492 rtx result; | 9946 rtx result; |
9493 int sign = up ? 1 : -1; | 9947 |
9494 rtx mem, addr; | 9948 if (!multiple_operation_profitable_p (false, count, 0)) |
9495 | |
9496 /* XScale has load-store double instructions, but they have stricter | |
9497 alignment requirements than load-store multiple, so we cannot | |
9498 use them. | |
9499 | |
9500 For XScale ldm requires 2 + NREGS cycles to complete and blocks | |
9501 the pipeline until completion. | |
9502 | |
9503 NREGS CYCLES | |
9504 1 3 | |
9505 2 4 | |
9506 3 5 | |
9507 4 6 | |
9508 | |
9509 An ldr instruction takes 1-3 cycles, but does not block the | |
9510 pipeline. | |
9511 | |
9512 NREGS CYCLES | |
9513 1 1-3 | |
9514 2 2-6 | |
9515 3 3-9 | |
9516 4 4-12 | |
9517 | |
9518 Best case ldr will always win. However, the more ldr instructions | |
9519 we issue, the less likely we are to be able to schedule them well. | |
9520 Using ldr instructions also increases code size. | |
9521 | |
9522 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm | |
9523 for counts of 3 or 4 regs. */ | |
9524 if (arm_tune_xscale && count <= 2 && ! optimize_size) | |
9525 { | 9949 { |
9526 rtx seq; | 9950 rtx seq; |
9527 | 9951 |
9528 start_sequence (); | 9952 start_sequence (); |
9529 | 9953 |
9530 for (i = 0; i < count; i++) | 9954 for (i = 0; i < count; i++) |
9531 { | 9955 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]); |
9532 addr = plus_constant (from, i * 4 * sign); | 9956 |
9533 mem = adjust_automodify_address (basemem, SImode, addr, offset); | 9957 if (wback_offset != 0) |
9534 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem); | 9958 emit_move_insn (basereg, plus_constant (basereg, wback_offset)); |
9535 offset += 4 * sign; | |
9536 } | |
9537 | |
9538 if (write_back) | |
9539 { | |
9540 emit_move_insn (from, plus_constant (from, count * 4 * sign)); | |
9541 *offsetp = offset; | |
9542 } | |
9543 | 9959 |
9544 seq = get_insns (); | 9960 seq = get_insns (); |
9545 end_sequence (); | 9961 end_sequence (); |
9546 | 9962 |
9547 return seq; | 9963 return seq; |
9548 } | 9964 } |
9549 | 9965 |
9550 result = gen_rtx_PARALLEL (VOIDmode, | 9966 result = gen_rtx_PARALLEL (VOIDmode, |
9551 rtvec_alloc (count + (write_back ? 1 : 0))); | 9967 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); |
9552 if (write_back) | 9968 if (wback_offset != 0) |
9553 { | 9969 { |
9554 XVECEXP (result, 0, 0) | 9970 XVECEXP (result, 0, 0) |
9555 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign)); | 9971 = gen_rtx_SET (VOIDmode, basereg, |
9972 plus_constant (basereg, wback_offset)); | |
9556 i = 1; | 9973 i = 1; |
9557 count++; | 9974 count++; |
9558 } | 9975 } |
9559 | 9976 |
9560 for (j = 0; i < count; i++, j++) | 9977 for (j = 0; i < count; i++, j++) |
9561 { | 9978 XVECEXP (result, 0, i) |
9562 addr = plus_constant (from, j * 4 * sign); | 9979 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]); |
9563 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset); | 9980 |
9564 XVECEXP (result, 0, i) | 9981 return result; |
9565 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem); | 9982 } |
9566 offset += 4 * sign; | 9983 |
9984 /* Generate a store-multiple instruction. COUNT is the number of stores in | |
9985 the instruction; REGS and MEMS are arrays containing the operands. | |
9986 BASEREG is the base register to be used in addressing the memory operands. | |
9987 WBACK_OFFSET is nonzero if the instruction should update the base | |
9988 register. */ | |
9989 | |
9990 static rtx | |
9991 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg, | |
9992 HOST_WIDE_INT wback_offset) | |
9993 { | |
9994 int i = 0, j; | |
9995 rtx result; | |
9996 | |
9997 if (GET_CODE (basereg) == PLUS) | |
9998 basereg = XEXP (basereg, 0); | |
9999 | |
10000 if (!multiple_operation_profitable_p (false, count, 0)) | |
10001 { | |
10002 rtx seq; | |
10003 | |
10004 start_sequence (); | |
10005 | |
10006 for (i = 0; i < count; i++) | |
10007 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i])); | |
10008 | |
10009 if (wback_offset != 0) | |
10010 emit_move_insn (basereg, plus_constant (basereg, wback_offset)); | |
10011 | |
10012 seq = get_insns (); | |
10013 end_sequence (); | |
10014 | |
10015 return seq; | |
10016 } | |
10017 | |
10018 result = gen_rtx_PARALLEL (VOIDmode, | |
10019 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0))); | |
10020 if (wback_offset != 0) | |
10021 { | |
10022 XVECEXP (result, 0, 0) | |
10023 = gen_rtx_SET (VOIDmode, basereg, | |
10024 plus_constant (basereg, wback_offset)); | |
10025 i = 1; | |
10026 count++; | |
10027 } | |
10028 | |
10029 for (j = 0; i < count; i++, j++) | |
10030 XVECEXP (result, 0, i) | |
10031 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j])); | |
10032 | |
10033 return result; | |
10034 } | |
10035 | |
10036 /* Generate either a load-multiple or a store-multiple instruction. This | |
10037 function can be used in situations where we can start with a single MEM | |
10038 rtx and adjust its address upwards. | |
10039 COUNT is the number of operations in the instruction, not counting a | |
10040 possible update of the base register. REGS is an array containing the | |
10041 register operands. | |
10042 BASEREG is the base register to be used in addressing the memory operands, | |
10043 which are constructed from BASEMEM. | |
10044 WRITE_BACK specifies whether the generated instruction should include an | |
10045 update of the base register. | |
10046 OFFSETP is used to pass an offset to and from this function; this offset | |
10047 is not used when constructing the address (instead BASEMEM should have an | |
10048 appropriate offset in its address), it is used only for setting | |
10049 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/ | |
10050 | |
10051 static rtx | |
10052 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg, | |
10053 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp) | |
10054 { | |
10055 rtx mems[MAX_LDM_STM_OPS]; | |
10056 HOST_WIDE_INT offset = *offsetp; | |
10057 int i; | |
10058 | |
10059 gcc_assert (count <= MAX_LDM_STM_OPS); | |
10060 | |
10061 if (GET_CODE (basereg) == PLUS) | |
10062 basereg = XEXP (basereg, 0); | |
10063 | |
10064 for (i = 0; i < count; i++) | |
10065 { | |
10066 rtx addr = plus_constant (basereg, i * 4); | |
10067 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset); | |
10068 offset += 4; | |
9567 } | 10069 } |
9568 | 10070 |
9569 if (write_back) | 10071 if (write_back) |
9570 *offsetp = offset; | 10072 *offsetp = offset; |
9571 | 10073 |
9572 return result; | 10074 if (is_load) |
10075 return arm_gen_load_multiple_1 (count, regs, mems, basereg, | |
10076 write_back ? 4 * count : 0); | |
10077 else | |
10078 return arm_gen_store_multiple_1 (count, regs, mems, basereg, | |
10079 write_back ? 4 * count : 0); | |
9573 } | 10080 } |
9574 | 10081 |
9575 rtx | 10082 rtx |
9576 arm_gen_store_multiple (int base_regno, int count, rtx to, int up, | 10083 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back, |
9577 int write_back, rtx basemem, HOST_WIDE_INT *offsetp) | 10084 rtx basemem, HOST_WIDE_INT *offsetp) |
9578 { | 10085 { |
9579 HOST_WIDE_INT offset = *offsetp; | 10086 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem, |
9580 int i = 0, j; | 10087 offsetp); |
9581 rtx result; | 10088 } |
9582 int sign = up ? 1 : -1; | 10089 |
9583 rtx mem, addr; | 10090 rtx |
9584 | 10091 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back, |
9585 /* See arm_gen_load_multiple for discussion of | 10092 rtx basemem, HOST_WIDE_INT *offsetp) |
9586 the pros/cons of ldm/stm usage for XScale. */ | 10093 { |
9587 if (arm_tune_xscale && count <= 2 && ! optimize_size) | 10094 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem, |
9588 { | 10095 offsetp); |
9589 rtx seq; | 10096 } |
9590 | 10097 |
9591 start_sequence (); | 10098 /* Called from a peephole2 expander to turn a sequence of loads into an |
9592 | 10099 LDM instruction. OPERANDS are the operands found by the peephole matcher; |
9593 for (i = 0; i < count; i++) | 10100 NOPS indicates how many separate loads we are trying to combine. SORT_REGS |
9594 { | 10101 is true if we can reorder the registers because they are used commutatively |
9595 addr = plus_constant (to, i * 4 * sign); | 10102 subsequently. |
9596 mem = adjust_automodify_address (basemem, SImode, addr, offset); | 10103 Returns true iff we could generate a new instruction. */ |
9597 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i)); | 10104 |
9598 offset += 4 * sign; | 10105 bool |
9599 } | 10106 gen_ldm_seq (rtx *operands, int nops, bool sort_regs) |
9600 | 10107 { |
9601 if (write_back) | 10108 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; |
9602 { | 10109 rtx mems[MAX_LDM_STM_OPS]; |
9603 emit_move_insn (to, plus_constant (to, count * 4 * sign)); | 10110 int i, j, base_reg; |
9604 *offsetp = offset; | 10111 rtx base_reg_rtx; |
9605 } | 10112 HOST_WIDE_INT offset; |
9606 | 10113 int write_back = FALSE; |
9607 seq = get_insns (); | 10114 int ldm_case; |
9608 end_sequence (); | 10115 rtx addr; |
9609 | 10116 |
9610 return seq; | 10117 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order, |
9611 } | 10118 &base_reg, &offset, !sort_regs); |
9612 | 10119 |
9613 result = gen_rtx_PARALLEL (VOIDmode, | 10120 if (ldm_case == 0) |
9614 rtvec_alloc (count + (write_back ? 1 : 0))); | 10121 return false; |
9615 if (write_back) | 10122 |
9616 { | 10123 if (sort_regs) |
9617 XVECEXP (result, 0, 0) | 10124 for (i = 0; i < nops - 1; i++) |
9618 = gen_rtx_SET (VOIDmode, to, | 10125 for (j = i + 1; j < nops; j++) |
9619 plus_constant (to, count * 4 * sign)); | 10126 if (regs[i] > regs[j]) |
9620 i = 1; | 10127 { |
9621 count++; | 10128 int t = regs[i]; |
9622 } | 10129 regs[i] = regs[j]; |
9623 | 10130 regs[j] = t; |
9624 for (j = 0; i < count; i++, j++) | 10131 } |
9625 { | 10132 base_reg_rtx = gen_rtx_REG (Pmode, base_reg); |
9626 addr = plus_constant (to, j * 4 * sign); | 10133 |
9627 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset); | 10134 if (TARGET_THUMB1) |
9628 XVECEXP (result, 0, i) | 10135 { |
9629 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j)); | 10136 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx)); |
9630 offset += 4 * sign; | 10137 gcc_assert (ldm_case == 1 || ldm_case == 5); |
9631 } | 10138 write_back = TRUE; |
9632 | 10139 } |
9633 if (write_back) | 10140 |
9634 *offsetp = offset; | 10141 if (ldm_case == 5) |
9635 | 10142 { |
9636 return result; | 10143 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]); |
10144 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset))); | |
10145 offset = 0; | |
10146 if (!TARGET_THUMB1) | |
10147 { | |
10148 base_reg = regs[0]; | |
10149 base_reg_rtx = newbase; | |
10150 } | |
10151 } | |
10152 | |
10153 for (i = 0; i < nops; i++) | |
10154 { | |
10155 addr = plus_constant (base_reg_rtx, offset + i * 4); | |
10156 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], | |
10157 SImode, addr, 0); | |
10158 } | |
10159 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx, | |
10160 write_back ? offset + i * 4 : 0)); | |
10161 return true; | |
10162 } | |
10163 | |
10164 /* Called from a peephole2 expander to turn a sequence of stores into an | |
10165 STM instruction. OPERANDS are the operands found by the peephole matcher; | |
10166 NOPS indicates how many separate stores we are trying to combine. | |
10167 Returns true iff we could generate a new instruction. */ | |
10168 | |
10169 bool | |
10170 gen_stm_seq (rtx *operands, int nops) | |
10171 { | |
10172 int i; | |
10173 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; | |
10174 rtx mems[MAX_LDM_STM_OPS]; | |
10175 int base_reg; | |
10176 rtx base_reg_rtx; | |
10177 HOST_WIDE_INT offset; | |
10178 int write_back = FALSE; | |
10179 int stm_case; | |
10180 rtx addr; | |
10181 bool base_reg_dies; | |
10182 | |
10183 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL, | |
10184 mem_order, &base_reg, &offset, true); | |
10185 | |
10186 if (stm_case == 0) | |
10187 return false; | |
10188 | |
10189 base_reg_rtx = gen_rtx_REG (Pmode, base_reg); | |
10190 | |
10191 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx); | |
10192 if (TARGET_THUMB1) | |
10193 { | |
10194 gcc_assert (base_reg_dies); | |
10195 write_back = TRUE; | |
10196 } | |
10197 | |
10198 if (stm_case == 5) | |
10199 { | |
10200 gcc_assert (base_reg_dies); | |
10201 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); | |
10202 offset = 0; | |
10203 } | |
10204 | |
10205 addr = plus_constant (base_reg_rtx, offset); | |
10206 | |
10207 for (i = 0; i < nops; i++) | |
10208 { | |
10209 addr = plus_constant (base_reg_rtx, offset + i * 4); | |
10210 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], | |
10211 SImode, addr, 0); | |
10212 } | |
10213 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx, | |
10214 write_back ? offset + i * 4 : 0)); | |
10215 return true; | |
10216 } | |
10217 | |
10218 /* Called from a peephole2 expander to turn a sequence of stores that are | |
10219 preceded by constant loads into an STM instruction. OPERANDS are the | |
10220 operands found by the peephole matcher; NOPS indicates how many | |
10221 separate stores we are trying to combine; there are 2 * NOPS | |
10222 instructions in the peephole. | |
10223 Returns true iff we could generate a new instruction. */ | |
10224 | |
10225 bool | |
10226 gen_const_stm_seq (rtx *operands, int nops) | |
10227 { | |
10228 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS]; | |
10229 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS]; | |
10230 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS]; | |
10231 rtx mems[MAX_LDM_STM_OPS]; | |
10232 int base_reg; | |
10233 rtx base_reg_rtx; | |
10234 HOST_WIDE_INT offset; | |
10235 int write_back = FALSE; | |
10236 int stm_case; | |
10237 rtx addr; | |
10238 bool base_reg_dies; | |
10239 int i, j; | |
10240 HARD_REG_SET allocated; | |
10241 | |
10242 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs, | |
10243 mem_order, &base_reg, &offset, false); | |
10244 | |
10245 if (stm_case == 0) | |
10246 return false; | |
10247 | |
10248 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs); | |
10249 | |
10250 /* If the same register is used more than once, try to find a free | |
10251 register. */ | |
10252 CLEAR_HARD_REG_SET (allocated); | |
10253 for (i = 0; i < nops; i++) | |
10254 { | |
10255 for (j = i + 1; j < nops; j++) | |
10256 if (regs[i] == regs[j]) | |
10257 { | |
10258 rtx t = peep2_find_free_register (0, nops * 2, | |
10259 TARGET_THUMB1 ? "l" : "r", | |
10260 SImode, &allocated); | |
10261 if (t == NULL_RTX) | |
10262 return false; | |
10263 reg_rtxs[i] = t; | |
10264 regs[i] = REGNO (t); | |
10265 } | |
10266 } | |
10267 | |
10268 /* Compute an ordering that maps the register numbers to an ascending | |
10269 sequence. */ | |
10270 reg_order[0] = 0; | |
10271 for (i = 0; i < nops; i++) | |
10272 if (regs[i] < regs[reg_order[0]]) | |
10273 reg_order[0] = i; | |
10274 | |
10275 for (i = 1; i < nops; i++) | |
10276 { | |
10277 int this_order = reg_order[i - 1]; | |
10278 for (j = 0; j < nops; j++) | |
10279 if (regs[j] > regs[reg_order[i - 1]] | |
10280 && (this_order == reg_order[i - 1] | |
10281 || regs[j] < regs[this_order])) | |
10282 this_order = j; | |
10283 reg_order[i] = this_order; | |
10284 } | |
10285 | |
10286 /* Ensure that registers that must be live after the instruction end | |
10287 up with the correct value. */ | |
10288 for (i = 0; i < nops; i++) | |
10289 { | |
10290 int this_order = reg_order[i]; | |
10291 if ((this_order != mem_order[i] | |
10292 || orig_reg_rtxs[this_order] != reg_rtxs[this_order]) | |
10293 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order])) | |
10294 return false; | |
10295 } | |
10296 | |
10297 /* Load the constants. */ | |
10298 for (i = 0; i < nops; i++) | |
10299 { | |
10300 rtx op = operands[2 * nops + mem_order[i]]; | |
10301 sorted_regs[i] = regs[reg_order[i]]; | |
10302 emit_move_insn (reg_rtxs[reg_order[i]], op); | |
10303 } | |
10304 | |
10305 base_reg_rtx = gen_rtx_REG (Pmode, base_reg); | |
10306 | |
10307 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx); | |
10308 if (TARGET_THUMB1) | |
10309 { | |
10310 gcc_assert (base_reg_dies); | |
10311 write_back = TRUE; | |
10312 } | |
10313 | |
10314 if (stm_case == 5) | |
10315 { | |
10316 gcc_assert (base_reg_dies); | |
10317 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset))); | |
10318 offset = 0; | |
10319 } | |
10320 | |
10321 addr = plus_constant (base_reg_rtx, offset); | |
10322 | |
10323 for (i = 0; i < nops; i++) | |
10324 { | |
10325 addr = plus_constant (base_reg_rtx, offset + i * 4); | |
10326 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]], | |
10327 SImode, addr, 0); | |
10328 } | |
10329 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx, | |
10330 write_back ? offset + i * 4 : 0)); | |
10331 return true; | |
9637 } | 10332 } |
9638 | 10333 |
9639 int | 10334 int |
9640 arm_gen_movmemqi (rtx *operands) | 10335 arm_gen_movmemqi (rtx *operands) |
9641 { | 10336 { |
9667 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3); | 10362 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3); |
9668 | 10363 |
9669 for (i = 0; in_words_to_go >= 2; i+=4) | 10364 for (i = 0; in_words_to_go >= 2; i+=4) |
9670 { | 10365 { |
9671 if (in_words_to_go > 4) | 10366 if (in_words_to_go > 4) |
9672 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE, | 10367 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src, |
9673 srcbase, &srcoffset)); | 10368 TRUE, srcbase, &srcoffset)); |
9674 else | 10369 else |
9675 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE, | 10370 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go, |
9676 FALSE, srcbase, &srcoffset)); | 10371 src, FALSE, srcbase, |
10372 &srcoffset)); | |
9677 | 10373 |
9678 if (out_words_to_go) | 10374 if (out_words_to_go) |
9679 { | 10375 { |
9680 if (out_words_to_go > 4) | 10376 if (out_words_to_go > 4) |
9681 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE, | 10377 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst, |
9682 dstbase, &dstoffset)); | 10378 TRUE, dstbase, &dstoffset)); |
9683 else if (out_words_to_go != 1) | 10379 else if (out_words_to_go != 1) |
9684 emit_insn (arm_gen_store_multiple (0, out_words_to_go, | 10380 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, |
9685 dst, TRUE, | 10381 out_words_to_go, dst, |
9686 (last_bytes == 0 | 10382 (last_bytes == 0 |
9687 ? FALSE : TRUE), | 10383 ? FALSE : TRUE), |
9688 dstbase, &dstoffset)); | 10384 dstbase, &dstoffset)); |
9689 else | 10385 else |
9690 { | 10386 { |
10008 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), | 10704 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), |
10009 INTVAL (XEXP (x, 2))); | 10705 INTVAL (XEXP (x, 2))); |
10010 | 10706 |
10011 /* Alternate canonicalizations of the above. These are somewhat cleaner. */ | 10707 /* Alternate canonicalizations of the above. These are somewhat cleaner. */ |
10012 if (GET_CODE (x) == AND | 10708 if (GET_CODE (x) == AND |
10709 && (op == EQ || op == NE) | |
10013 && COMPARISON_P (XEXP (x, 0)) | 10710 && COMPARISON_P (XEXP (x, 0)) |
10014 && COMPARISON_P (XEXP (x, 1))) | 10711 && COMPARISON_P (XEXP (x, 1))) |
10015 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), | 10712 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), |
10016 DOM_CC_X_AND_Y); | 10713 DOM_CC_X_AND_Y); |
10017 | 10714 |
10018 if (GET_CODE (x) == IOR | 10715 if (GET_CODE (x) == IOR |
10716 && (op == EQ || op == NE) | |
10019 && COMPARISON_P (XEXP (x, 0)) | 10717 && COMPARISON_P (XEXP (x, 0)) |
10020 && COMPARISON_P (XEXP (x, 1))) | 10718 && COMPARISON_P (XEXP (x, 1))) |
10021 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), | 10719 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), |
10022 DOM_CC_X_OR_Y); | 10720 DOM_CC_X_OR_Y); |
10023 | 10721 |
10055 if (GET_MODE (x) == SImode && (op == LTU || op == GEU) | 10753 if (GET_MODE (x) == SImode && (op == LTU || op == GEU) |
10056 && GET_CODE (x) == PLUS | 10754 && GET_CODE (x) == PLUS |
10057 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) | 10755 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) |
10058 return CC_Cmode; | 10756 return CC_Cmode; |
10059 | 10757 |
10758 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode) | |
10759 { | |
10760 /* To keep things simple, always use the Cirrus cfcmp64 if it is | |
10761 available. */ | |
10762 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK) | |
10763 return CCmode; | |
10764 | |
10765 switch (op) | |
10766 { | |
10767 case EQ: | |
10768 case NE: | |
10769 /* A DImode comparison against zero can be implemented by | |
10770 or'ing the two halves together. */ | |
10771 if (y == const0_rtx) | |
10772 return CC_Zmode; | |
10773 | |
10774 /* We can do an equality test in three Thumb instructions. */ | |
10775 if (!TARGET_ARM) | |
10776 return CC_Zmode; | |
10777 | |
10778 /* FALLTHROUGH */ | |
10779 | |
10780 case LTU: | |
10781 case LEU: | |
10782 case GTU: | |
10783 case GEU: | |
10784 /* DImode unsigned comparisons can be implemented by cmp + | |
10785 cmpeq without a scratch register. Not worth doing in | |
10786 Thumb-2. */ | |
10787 if (TARGET_ARM) | |
10788 return CC_CZmode; | |
10789 | |
10790 /* FALLTHROUGH */ | |
10791 | |
10792 case LT: | |
10793 case LE: | |
10794 case GT: | |
10795 case GE: | |
10796 /* DImode signed and unsigned comparisons can be implemented | |
10797 by cmp + sbcs with a scratch register, but that does not | |
10798 set the Z flag - we must reverse GT/LE/GTU/LEU. */ | |
10799 gcc_assert (op != EQ && op != NE); | |
10800 return CC_NCVmode; | |
10801 | |
10802 default: | |
10803 gcc_unreachable (); | |
10804 } | |
10805 } | |
10806 | |
10060 return CCmode; | 10807 return CCmode; |
10061 } | 10808 } |
10062 | 10809 |
10063 /* X and Y are two things to compare using CODE. Emit the compare insn and | 10810 /* X and Y are two things to compare using CODE. Emit the compare insn and |
10064 return the rtx for register 0 in the proper mode. FP means this is a | 10811 return the rtx for register 0 in the proper mode. FP means this is a |
10065 floating point compare: I don't think that it is needed on the arm. */ | 10812 floating point compare: I don't think that it is needed on the arm. */ |
10066 rtx | 10813 rtx |
10067 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) | 10814 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) |
10068 { | 10815 { |
10069 enum machine_mode mode = SELECT_CC_MODE (code, x, y); | 10816 enum machine_mode mode; |
10070 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); | 10817 rtx cc_reg; |
10071 | 10818 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode; |
10072 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); | 10819 |
10820 /* We might have X as a constant, Y as a register because of the predicates | |
10821 used for cmpdi. If so, force X to a register here. */ | |
10822 if (dimode_comparison && !REG_P (x)) | |
10823 x = force_reg (DImode, x); | |
10824 | |
10825 mode = SELECT_CC_MODE (code, x, y); | |
10826 cc_reg = gen_rtx_REG (mode, CC_REGNUM); | |
10827 | |
10828 if (dimode_comparison | |
10829 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK) | |
10830 && mode != CC_CZmode) | |
10831 { | |
10832 rtx clobber, set; | |
10833 | |
10834 /* To compare two non-zero values for equality, XOR them and | |
10835 then compare against zero. Not used for ARM mode; there | |
10836 CC_CZmode is cheaper. */ | |
10837 if (mode == CC_Zmode && y != const0_rtx) | |
10838 { | |
10839 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN); | |
10840 y = const0_rtx; | |
10841 } | |
10842 /* A scratch register is required. */ | |
10843 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode)); | |
10844 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y)); | |
10845 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber))); | |
10846 } | |
10847 else | |
10848 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); | |
10073 | 10849 |
10074 return cc_reg; | 10850 return cc_reg; |
10075 } | 10851 } |
10076 | 10852 |
10077 /* Generate a sequence of insns that will generate the correct return | 10853 /* Generate a sequence of insns that will generate the correct return |
11396 return true; | 12172 return true; |
11397 | 12173 |
11398 return false; | 12174 return false; |
11399 } | 12175 } |
11400 | 12176 |
12177 /* Return true if it is possible to inline both the high and low parts | |
12178 of a 64-bit constant into 32-bit data processing instructions. */ | |
12179 bool | |
12180 arm_const_double_by_immediates (rtx val) | |
12181 { | |
12182 enum machine_mode mode = GET_MODE (val); | |
12183 rtx part; | |
12184 | |
12185 if (mode == VOIDmode) | |
12186 mode = DImode; | |
12187 | |
12188 part = gen_highpart_mode (SImode, mode, val); | |
12189 | |
12190 gcc_assert (GET_CODE (part) == CONST_INT); | |
12191 | |
12192 if (!const_ok_for_arm (INTVAL (part))) | |
12193 return false; | |
12194 | |
12195 part = gen_lowpart (SImode, val); | |
12196 | |
12197 gcc_assert (GET_CODE (part) == CONST_INT); | |
12198 | |
12199 if (!const_ok_for_arm (INTVAL (part))) | |
12200 return false; | |
12201 | |
12202 return true; | |
12203 } | |
12204 | |
11401 /* Scan INSN and note any of its operands that need fixing. | 12205 /* Scan INSN and note any of its operands that need fixing. |
11402 If DO_PUSHES is false we do not actually push any of the fixups | 12206 If DO_PUSHES is false we do not actually push any of the fixups |
11403 needed. The function returns TRUE if any fixups were needed/pushed. | 12207 needed. The function returns TRUE if any fixups were needed/pushed. |
11404 This is used by arm_memory_load_p() which needs to know about loads | 12208 This is used by arm_memory_load_p() which needs to know about loads |
11405 of constants that will be converted into minipool loads. */ | 12209 of constants that will be converted into minipool loads. */ |
11469 } | 12273 } |
11470 | 12274 |
11471 return result; | 12275 return result; |
11472 } | 12276 } |
11473 | 12277 |
12278 /* Convert instructions to their cc-clobbering variant if possible, since | |
12279 that allows us to use smaller encodings. */ | |
12280 | |
12281 static void | |
12282 thumb2_reorg (void) | |
12283 { | |
12284 basic_block bb; | |
12285 regset_head live; | |
12286 | |
12287 INIT_REG_SET (&live); | |
12288 | |
12289 /* We are freeing block_for_insn in the toplev to keep compatibility | |
12290 with old MDEP_REORGS that are not CFG based. Recompute it now. */ | |
12291 compute_bb_for_insn (); | |
12292 df_analyze (); | |
12293 | |
12294 FOR_EACH_BB (bb) | |
12295 { | |
12296 rtx insn; | |
12297 | |
12298 COPY_REG_SET (&live, DF_LR_OUT (bb)); | |
12299 df_simulate_initialize_backwards (bb, &live); | |
12300 FOR_BB_INSNS_REVERSE (bb, insn) | |
12301 { | |
12302 if (NONJUMP_INSN_P (insn) | |
12303 && !REGNO_REG_SET_P (&live, CC_REGNUM)) | |
12304 { | |
12305 rtx pat = PATTERN (insn); | |
12306 if (GET_CODE (pat) == SET | |
12307 && low_register_operand (XEXP (pat, 0), SImode) | |
12308 && thumb_16bit_operator (XEXP (pat, 1), SImode) | |
12309 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode) | |
12310 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode)) | |
12311 { | |
12312 rtx dst = XEXP (pat, 0); | |
12313 rtx src = XEXP (pat, 1); | |
12314 rtx op0 = XEXP (src, 0); | |
12315 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH | |
12316 ? XEXP (src, 1) : NULL); | |
12317 | |
12318 if (rtx_equal_p (dst, op0) | |
12319 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS) | |
12320 { | |
12321 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); | |
12322 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); | |
12323 rtvec vec = gen_rtvec (2, pat, clobber); | |
12324 | |
12325 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); | |
12326 INSN_CODE (insn) = -1; | |
12327 } | |
12328 /* We can also handle a commutative operation where the | |
12329 second operand matches the destination. */ | |
12330 else if (op1 && rtx_equal_p (dst, op1)) | |
12331 { | |
12332 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM); | |
12333 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg); | |
12334 rtvec vec; | |
12335 | |
12336 src = copy_rtx (src); | |
12337 XEXP (src, 0) = op1; | |
12338 XEXP (src, 1) = op0; | |
12339 pat = gen_rtx_SET (VOIDmode, dst, src); | |
12340 vec = gen_rtvec (2, pat, clobber); | |
12341 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec); | |
12342 INSN_CODE (insn) = -1; | |
12343 } | |
12344 } | |
12345 } | |
12346 | |
12347 if (NONDEBUG_INSN_P (insn)) | |
12348 df_simulate_one_insn_backwards (bb, insn, &live); | |
12349 } | |
12350 } | |
12351 | |
12352 CLEAR_REG_SET (&live); | |
12353 } | |
12354 | |
11474 /* Gcc puts the pool in the wrong place for ARM, since we can only | 12355 /* Gcc puts the pool in the wrong place for ARM, since we can only |
11475 load addresses a limited distance around the pc. We do some | 12356 load addresses a limited distance around the pc. We do some |
11476 special munging to move the constant pool values to the correct | 12357 special munging to move the constant pool values to the correct |
11477 point in the code. */ | 12358 point in the code. */ |
11478 static void | 12359 static void |
11480 { | 12361 { |
11481 rtx insn; | 12362 rtx insn; |
11482 HOST_WIDE_INT address = 0; | 12363 HOST_WIDE_INT address = 0; |
11483 Mfix * fix; | 12364 Mfix * fix; |
11484 | 12365 |
12366 if (TARGET_THUMB2) | |
12367 thumb2_reorg (); | |
12368 | |
11485 minipool_fix_head = minipool_fix_tail = NULL; | 12369 minipool_fix_head = minipool_fix_tail = NULL; |
11486 | 12370 |
11487 /* The first insn must always be a note, or the code below won't | 12371 /* The first insn must always be a note, or the code below won't |
11488 scan it properly. */ | 12372 scan it properly. */ |
11489 insn = get_insns (); | 12373 insn = get_insns (); |
12077 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands); | 12961 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands); |
12078 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops); | 12962 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops); |
12079 return ""; | 12963 return ""; |
12080 } | 12964 } |
12081 | 12965 |
12082 /* Output a move between double words. | 12966 /* Output a move between double words. It must be REG<-MEM |
12083 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM | 12967 or MEM<-REG. */ |
12084 or MEM<-REG and all MEMs must be offsettable addresses. */ | |
12085 const char * | 12968 const char * |
12086 output_move_double (rtx *operands) | 12969 output_move_double (rtx *operands) |
12087 { | 12970 { |
12088 enum rtx_code code0 = GET_CODE (operands[0]); | 12971 enum rtx_code code0 = GET_CODE (operands[0]); |
12089 enum rtx_code code1 = GET_CODE (operands[1]); | 12972 enum rtx_code code1 = GET_CODE (operands[1]); |
12352 && (INTVAL(otherops[2]) <= -256 | 13235 && (INTVAL(otherops[2]) <= -256 |
12353 || INTVAL(otherops[2]) >= 256)) | 13236 || INTVAL(otherops[2]) >= 256)) |
12354 { | 13237 { |
12355 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) | 13238 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) |
12356 { | 13239 { |
12357 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops); | 13240 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops); |
12358 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); | 13241 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops); |
12359 } | 13242 } |
12360 else | 13243 else |
12361 { | 13244 { |
12362 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); | 13245 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops); |
12363 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops); | 13246 output_asm_insn ("str%?\t%0, [%1], %2", otherops); |
12364 } | 13247 } |
12365 } | 13248 } |
12366 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) | 13249 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) |
12367 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops); | 13250 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops); |
12368 else | 13251 else |
12668 output_asm_insn (buff, ops); | 13551 output_asm_insn (buff, ops); |
12669 | 13552 |
12670 return ""; | 13553 return ""; |
12671 } | 13554 } |
12672 | 13555 |
13556 /* Compute and return the length of neon_mov<mode>, where <mode> is | |
13557 one of VSTRUCT modes: EI, OI, CI or XI. */ | |
13558 int | |
13559 arm_attr_length_move_neon (rtx insn) | |
13560 { | |
13561 rtx reg, mem, addr; | |
13562 int load; | |
13563 enum machine_mode mode; | |
13564 | |
13565 extract_insn_cached (insn); | |
13566 | |
13567 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1])) | |
13568 { | |
13569 mode = GET_MODE (recog_data.operand[0]); | |
13570 switch (mode) | |
13571 { | |
13572 case EImode: | |
13573 case OImode: | |
13574 return 8; | |
13575 case CImode: | |
13576 return 12; | |
13577 case XImode: | |
13578 return 16; | |
13579 default: | |
13580 gcc_unreachable (); | |
13581 } | |
13582 } | |
13583 | |
13584 load = REG_P (recog_data.operand[0]); | |
13585 reg = recog_data.operand[!load]; | |
13586 mem = recog_data.operand[load]; | |
13587 | |
13588 gcc_assert (MEM_P (mem)); | |
13589 | |
13590 mode = GET_MODE (reg); | |
13591 addr = XEXP (mem, 0); | |
13592 | |
13593 /* Strip off const from addresses like (const (plus (...))). */ | |
13594 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS) | |
13595 addr = XEXP (addr, 0); | |
13596 | |
13597 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS) | |
13598 { | |
13599 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2; | |
13600 return insns * 4; | |
13601 } | |
13602 else | |
13603 return 4; | |
13604 } | |
13605 | |
13606 /* Return nonzero if the offset in the address is an immediate. Otherwise, | |
13607 return zero. */ | |
13608 | |
13609 int | |
13610 arm_address_offset_is_imm (rtx insn) | |
13611 { | |
13612 rtx mem, addr; | |
13613 | |
13614 extract_insn_cached (insn); | |
13615 | |
13616 if (REG_P (recog_data.operand[0])) | |
13617 return 0; | |
13618 | |
13619 mem = recog_data.operand[0]; | |
13620 | |
13621 gcc_assert (MEM_P (mem)); | |
13622 | |
13623 addr = XEXP (mem, 0); | |
13624 | |
13625 if (GET_CODE (addr) == REG | |
13626 || (GET_CODE (addr) == PLUS | |
13627 && GET_CODE (XEXP (addr, 0)) == REG | |
13628 && GET_CODE (XEXP (addr, 1)) == CONST_INT)) | |
13629 return 1; | |
13630 else | |
13631 return 0; | |
13632 } | |
13633 | |
12673 /* Output an ADD r, s, #n where n may be too big for one instruction. | 13634 /* Output an ADD r, s, #n where n may be too big for one instruction. |
12674 If adding zero to one register, output nothing. */ | 13635 If adding zero to one register, output nothing. */ |
12675 const char * | 13636 const char * |
12676 output_add_immediate (rtx *operands) | 13637 output_add_immediate (rtx *operands) |
12677 { | 13638 { |
13825 && bit_count(saved_regs_mask) * 4 == count | 14786 && bit_count(saved_regs_mask) * 4 == count |
13826 && !IS_INTERRUPT (func_type) | 14787 && !IS_INTERRUPT (func_type) |
13827 && !crtl->tail_call_emit) | 14788 && !crtl->tail_call_emit) |
13828 { | 14789 { |
13829 unsigned long mask; | 14790 unsigned long mask; |
13830 mask = (1 << (arm_size_return_regs() / 4)) - 1; | 14791 /* Preserve return values, of any size. */ |
14792 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1; | |
13831 mask ^= 0xf; | 14793 mask ^= 0xf; |
13832 mask &= ~saved_regs_mask; | 14794 mask &= ~saved_regs_mask; |
13833 reg = 0; | 14795 reg = 0; |
13834 while (bit_count (mask) * 4 > amount) | 14796 while (bit_count (mask) * 4 > amount) |
13835 { | 14797 { |
14281 { | 15243 { |
14282 return !cfun->machine->lr_save_eliminated | 15244 return !cfun->machine->lr_save_eliminated |
14283 && (!leaf_function_p () | 15245 && (!leaf_function_p () |
14284 || thumb_far_jump_used_p () | 15246 || thumb_far_jump_used_p () |
14285 || df_regs_ever_live_p (LR_REGNUM)); | 15247 || df_regs_ever_live_p (LR_REGNUM)); |
15248 } | |
15249 | |
15250 | |
15251 /* Return true if r3 is used by any of the tail call insns in the | |
15252 current function. */ | |
15253 | |
15254 static bool | |
15255 any_sibcall_uses_r3 (void) | |
15256 { | |
15257 edge_iterator ei; | |
15258 edge e; | |
15259 | |
15260 if (!crtl->tail_call_emit) | |
15261 return false; | |
15262 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) | |
15263 if (e->flags & EDGE_SIBCALL) | |
15264 { | |
15265 rtx call = BB_END (e->src); | |
15266 if (!CALL_P (call)) | |
15267 call = prev_nonnote_nondebug_insn (call); | |
15268 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call)); | |
15269 if (find_regno_fusage (call, USE, 3)) | |
15270 return true; | |
15271 } | |
15272 return false; | |
14286 } | 15273 } |
14287 | 15274 |
14288 | 15275 |
14289 /* Compute the distance from register FROM to register TO. | 15276 /* Compute the distance from register FROM to register TO. |
14290 These can be the arg pointer (26), the soft frame pointer (25), | 15277 These can be the arg pointer (26), the soft frame pointer (25), |
14448 int reg = -1; | 15435 int reg = -1; |
14449 | 15436 |
14450 /* If it is safe to use r3, then do so. This sometimes | 15437 /* If it is safe to use r3, then do so. This sometimes |
14451 generates better code on Thumb-2 by avoiding the need to | 15438 generates better code on Thumb-2 by avoiding the need to |
14452 use 32-bit push/pop instructions. */ | 15439 use 32-bit push/pop instructions. */ |
14453 if (!crtl->tail_call_emit | 15440 if (! any_sibcall_uses_r3 () |
14454 && arm_size_return_regs () <= 12) | 15441 && arm_size_return_regs () <= 12 |
15442 && (offsets->saved_regs_mask & (1 << 3)) == 0) | |
14455 { | 15443 { |
14456 reg = 3; | 15444 reg = 3; |
14457 } | 15445 } |
14458 else | 15446 else |
14459 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) | 15447 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) |
14956 stack_pointer_rtx, insn)); | 15944 stack_pointer_rtx, insn)); |
14957 RTX_FRAME_RELATED_P (insn) = 1; | 15945 RTX_FRAME_RELATED_P (insn) = 1; |
14958 } | 15946 } |
14959 } | 15947 } |
14960 | 15948 |
15949 if (flag_stack_usage) | |
15950 current_function_static_stack_size | |
15951 = offsets->outgoing_args - offsets->saved_args; | |
15952 | |
14961 if (offsets->outgoing_args != offsets->saved_args + saved_regs) | 15953 if (offsets->outgoing_args != offsets->saved_args + saved_regs) |
14962 { | 15954 { |
14963 /* This add can produce multiple insns for a large constant, so we | 15955 /* This add can produce multiple insns for a large constant, so we |
14964 need to get tricky. */ | 15956 need to get tricky. */ |
14965 rtx last = get_last_insn (); | 15957 rtx last = get_last_insn (); |
15003 the call to mcount. Similarly if the user has requested no | 15995 the call to mcount. Similarly if the user has requested no |
15004 scheduling in the prolog. Similarly if we want non-call exceptions | 15996 scheduling in the prolog. Similarly if we want non-call exceptions |
15005 using the EABI unwinder, to prevent faulting instructions from being | 15997 using the EABI unwinder, to prevent faulting instructions from being |
15006 swapped with a stack adjustment. */ | 15998 swapped with a stack adjustment. */ |
15007 if (crtl->profile || !TARGET_SCHED_PROLOG | 15999 if (crtl->profile || !TARGET_SCHED_PROLOG |
15008 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions)) | 16000 || (arm_except_unwind_info (&global_options) == UI_TARGET |
16001 && cfun->can_throw_non_call_exceptions)) | |
15009 emit_insn (gen_blockage ()); | 16002 emit_insn (gen_blockage ()); |
15010 | 16003 |
15011 /* If the link register is being kept alive, with the return address in it, | 16004 /* If the link register is being kept alive, with the return address in it, |
15012 then make sure that it does not get reused by the ce2 pass. */ | 16005 then make sure that it does not get reused by the ce2 pass. */ |
15013 if ((live_regs_mask & (1 << LR_REGNUM)) == 0) | 16006 if ((live_regs_mask & (1 << LR_REGNUM)) == 0) |
15062 doing this instruction unconditionally. | 16055 doing this instruction unconditionally. |
15063 If CODE is 'N' then X is a floating point operand that must be negated | 16056 If CODE is 'N' then X is a floating point operand that must be negated |
15064 before output. | 16057 before output. |
15065 If CODE is 'B' then output a bitwise inverted value of X (a const int). | 16058 If CODE is 'B' then output a bitwise inverted value of X (a const int). |
15066 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */ | 16059 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */ |
15067 void | 16060 static void |
15068 arm_print_operand (FILE *stream, rtx x, int code) | 16061 arm_print_operand (FILE *stream, rtx x, int code) |
15069 { | 16062 { |
15070 switch (code) | 16063 switch (code) |
15071 { | 16064 { |
15072 case '@': | 16065 case '@': |
15240 register, and the value from the higher address is put into the | 16233 register, and the value from the higher address is put into the |
15241 higher numbered register, the load will work regardless of whether | 16234 higher numbered register, the load will work regardless of whether |
15242 the value being loaded is big-wordian or little-wordian. The | 16235 the value being loaded is big-wordian or little-wordian. The |
15243 order of the two register loads can matter however, if the address | 16236 order of the two register loads can matter however, if the address |
15244 of the memory location is actually held in one of the registers | 16237 of the memory location is actually held in one of the registers |
15245 being overwritten by the load. */ | 16238 being overwritten by the load. |
16239 | |
16240 The 'Q' and 'R' constraints are also available for 64-bit | |
16241 constants. */ | |
15246 case 'Q': | 16242 case 'Q': |
16243 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) | |
16244 { | |
16245 rtx part = gen_lowpart (SImode, x); | |
16246 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); | |
16247 return; | |
16248 } | |
16249 | |
15247 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) | 16250 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) |
15248 { | 16251 { |
15249 output_operand_lossage ("invalid operand for code '%c'", code); | 16252 output_operand_lossage ("invalid operand for code '%c'", code); |
15250 return; | 16253 return; |
15251 } | 16254 } |
15252 | 16255 |
15253 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)); | 16256 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)); |
15254 return; | 16257 return; |
15255 | 16258 |
15256 case 'R': | 16259 case 'R': |
16260 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE) | |
16261 { | |
16262 enum machine_mode mode = GET_MODE (x); | |
16263 rtx part; | |
16264 | |
16265 if (mode == VOIDmode) | |
16266 mode = DImode; | |
16267 part = gen_highpart_mode (SImode, mode, x); | |
16268 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part)); | |
16269 return; | |
16270 } | |
16271 | |
15257 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) | 16272 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) |
15258 { | 16273 { |
15259 output_operand_lossage ("invalid operand for code '%c'", code); | 16274 output_operand_lossage ("invalid operand for code '%c'", code); |
15260 return; | 16275 return; |
15261 } | 16276 } |
15574 /* Memory operand for vld1/vst1 instruction. */ | 16589 /* Memory operand for vld1/vst1 instruction. */ |
15575 case 'A': | 16590 case 'A': |
15576 { | 16591 { |
15577 rtx addr; | 16592 rtx addr; |
15578 bool postinc = FALSE; | 16593 bool postinc = FALSE; |
16594 unsigned align, modesize, align_bits; | |
16595 | |
15579 gcc_assert (GET_CODE (x) == MEM); | 16596 gcc_assert (GET_CODE (x) == MEM); |
15580 addr = XEXP (x, 0); | 16597 addr = XEXP (x, 0); |
15581 if (GET_CODE (addr) == POST_INC) | 16598 if (GET_CODE (addr) == POST_INC) |
15582 { | 16599 { |
15583 postinc = 1; | 16600 postinc = 1; |
15584 addr = XEXP (addr, 0); | 16601 addr = XEXP (addr, 0); |
15585 } | 16602 } |
15586 asm_fprintf (stream, "[%r]", REGNO (addr)); | 16603 asm_fprintf (stream, "[%r", REGNO (addr)); |
16604 | |
16605 /* We know the alignment of this access, so we can emit a hint in the | |
16606 instruction (for some alignments) as an aid to the memory subsystem | |
16607 of the target. */ | |
16608 align = MEM_ALIGN (x) >> 3; | |
16609 modesize = GET_MODE_SIZE (GET_MODE (x)); | |
16610 | |
16611 /* Only certain alignment specifiers are supported by the hardware. */ | |
16612 if (modesize == 16 && (align % 32) == 0) | |
16613 align_bits = 256; | |
16614 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0) | |
16615 align_bits = 128; | |
16616 else if ((align % 8) == 0) | |
16617 align_bits = 64; | |
16618 else | |
16619 align_bits = 0; | |
16620 | |
16621 if (align_bits != 0) | |
16622 asm_fprintf (stream, ":%d", align_bits); | |
16623 | |
16624 asm_fprintf (stream, "]"); | |
16625 | |
15587 if (postinc) | 16626 if (postinc) |
15588 fputs("!", stream); | 16627 fputs("!", stream); |
16628 } | |
16629 return; | |
16630 | |
16631 case 'C': | |
16632 { | |
16633 rtx addr; | |
16634 | |
16635 gcc_assert (GET_CODE (x) == MEM); | |
16636 addr = XEXP (x, 0); | |
16637 gcc_assert (GET_CODE (addr) == REG); | |
16638 asm_fprintf (stream, "[%r]", REGNO (addr)); | |
15589 } | 16639 } |
15590 return; | 16640 return; |
15591 | 16641 |
15592 /* Translate an S register number into a D register number and element index. */ | 16642 /* Translate an S register number into a D register number and element index. */ |
15593 case 'y': | 16643 case 'y': |
15679 | 16729 |
15680 output_addr_const (stream, x); | 16730 output_addr_const (stream, x); |
15681 break; | 16731 break; |
15682 } | 16732 } |
15683 } | 16733 } |
16734 } | |
16735 | |
16736 /* Target hook for printing a memory address. */ | |
16737 static void | |
16738 arm_print_operand_address (FILE *stream, rtx x) | |
16739 { | |
16740 if (TARGET_32BIT) | |
16741 { | |
16742 int is_minus = GET_CODE (x) == MINUS; | |
16743 | |
16744 if (GET_CODE (x) == REG) | |
16745 asm_fprintf (stream, "[%r, #0]", REGNO (x)); | |
16746 else if (GET_CODE (x) == PLUS || is_minus) | |
16747 { | |
16748 rtx base = XEXP (x, 0); | |
16749 rtx index = XEXP (x, 1); | |
16750 HOST_WIDE_INT offset = 0; | |
16751 if (GET_CODE (base) != REG | |
16752 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM)) | |
16753 { | |
16754 /* Ensure that BASE is a register. */ | |
16755 /* (one of them must be). */ | |
16756 /* Also ensure the SP is not used as in index register. */ | |
16757 rtx temp = base; | |
16758 base = index; | |
16759 index = temp; | |
16760 } | |
16761 switch (GET_CODE (index)) | |
16762 { | |
16763 case CONST_INT: | |
16764 offset = INTVAL (index); | |
16765 if (is_minus) | |
16766 offset = -offset; | |
16767 asm_fprintf (stream, "[%r, #%wd]", | |
16768 REGNO (base), offset); | |
16769 break; | |
16770 | |
16771 case REG: | |
16772 asm_fprintf (stream, "[%r, %s%r]", | |
16773 REGNO (base), is_minus ? "-" : "", | |
16774 REGNO (index)); | |
16775 break; | |
16776 | |
16777 case MULT: | |
16778 case ASHIFTRT: | |
16779 case LSHIFTRT: | |
16780 case ASHIFT: | |
16781 case ROTATERT: | |
16782 { | |
16783 asm_fprintf (stream, "[%r, %s%r", | |
16784 REGNO (base), is_minus ? "-" : "", | |
16785 REGNO (XEXP (index, 0))); | |
16786 arm_print_operand (stream, index, 'S'); | |
16787 fputs ("]", stream); | |
16788 break; | |
16789 } | |
16790 | |
16791 default: | |
16792 gcc_unreachable (); | |
16793 } | |
16794 } | |
16795 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC | |
16796 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC) | |
16797 { | |
16798 extern enum machine_mode output_memory_reference_mode; | |
16799 | |
16800 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); | |
16801 | |
16802 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC) | |
16803 asm_fprintf (stream, "[%r, #%s%d]!", | |
16804 REGNO (XEXP (x, 0)), | |
16805 GET_CODE (x) == PRE_DEC ? "-" : "", | |
16806 GET_MODE_SIZE (output_memory_reference_mode)); | |
16807 else | |
16808 asm_fprintf (stream, "[%r], #%s%d", | |
16809 REGNO (XEXP (x, 0)), | |
16810 GET_CODE (x) == POST_DEC ? "-" : "", | |
16811 GET_MODE_SIZE (output_memory_reference_mode)); | |
16812 } | |
16813 else if (GET_CODE (x) == PRE_MODIFY) | |
16814 { | |
16815 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0))); | |
16816 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT) | |
16817 asm_fprintf (stream, "#%wd]!", | |
16818 INTVAL (XEXP (XEXP (x, 1), 1))); | |
16819 else | |
16820 asm_fprintf (stream, "%r]!", | |
16821 REGNO (XEXP (XEXP (x, 1), 1))); | |
16822 } | |
16823 else if (GET_CODE (x) == POST_MODIFY) | |
16824 { | |
16825 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0))); | |
16826 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT) | |
16827 asm_fprintf (stream, "#%wd", | |
16828 INTVAL (XEXP (XEXP (x, 1), 1))); | |
16829 else | |
16830 asm_fprintf (stream, "%r", | |
16831 REGNO (XEXP (XEXP (x, 1), 1))); | |
16832 } | |
16833 else output_addr_const (stream, x); | |
16834 } | |
16835 else | |
16836 { | |
16837 if (GET_CODE (x) == REG) | |
16838 asm_fprintf (stream, "[%r]", REGNO (x)); | |
16839 else if (GET_CODE (x) == POST_INC) | |
16840 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0))); | |
16841 else if (GET_CODE (x) == PLUS) | |
16842 { | |
16843 gcc_assert (GET_CODE (XEXP (x, 0)) == REG); | |
16844 if (GET_CODE (XEXP (x, 1)) == CONST_INT) | |
16845 asm_fprintf (stream, "[%r, #%wd]", | |
16846 REGNO (XEXP (x, 0)), | |
16847 INTVAL (XEXP (x, 1))); | |
16848 else | |
16849 asm_fprintf (stream, "[%r, %r]", | |
16850 REGNO (XEXP (x, 0)), | |
16851 REGNO (XEXP (x, 1))); | |
16852 } | |
16853 else | |
16854 output_addr_const (stream, x); | |
16855 } | |
16856 } | |
16857 | |
16858 /* Target hook for indicating whether a punctuation character for | |
16859 TARGET_PRINT_OPERAND is valid. */ | |
16860 static bool | |
16861 arm_print_operand_punct_valid_p (unsigned char code) | |
16862 { | |
16863 return (code == '@' || code == '|' || code == '.' | |
16864 || code == '(' || code == ')' || code == '#' | |
16865 || (TARGET_32BIT && (code == '?')) | |
16866 || (TARGET_THUMB2 && (code == '!')) | |
16867 || (TARGET_THUMB && (code == '_'))); | |
15684 } | 16868 } |
15685 | 16869 |
15686 /* Target hook for assembling integer objects. The ARM version needs to | 16870 /* Target hook for assembling integer objects. The ARM version needs to |
15687 handle word-sized values specially. */ | 16871 handle word-sized values specially. */ |
15688 static bool | 16872 static bool |
15938 default: gcc_unreachable (); | 17122 default: gcc_unreachable (); |
15939 } | 17123 } |
15940 | 17124 |
15941 case CC_Cmode: | 17125 case CC_Cmode: |
15942 switch (comp_code) | 17126 switch (comp_code) |
15943 { | 17127 { |
15944 case LTU: return ARM_CS; | 17128 case LTU: return ARM_CS; |
15945 case GEU: return ARM_CC; | 17129 case GEU: return ARM_CC; |
15946 default: gcc_unreachable (); | 17130 default: gcc_unreachable (); |
15947 } | 17131 } |
17132 | |
17133 case CC_CZmode: | |
17134 switch (comp_code) | |
17135 { | |
17136 case NE: return ARM_NE; | |
17137 case EQ: return ARM_EQ; | |
17138 case GEU: return ARM_CS; | |
17139 case GTU: return ARM_HI; | |
17140 case LEU: return ARM_LS; | |
17141 case LTU: return ARM_CC; | |
17142 default: gcc_unreachable (); | |
17143 } | |
17144 | |
17145 case CC_NCVmode: | |
17146 switch (comp_code) | |
17147 { | |
17148 case GE: return ARM_GE; | |
17149 case LT: return ARM_LT; | |
17150 case GEU: return ARM_CS; | |
17151 case LTU: return ARM_CC; | |
17152 default: gcc_unreachable (); | |
17153 } | |
15948 | 17154 |
15949 case CCmode: | 17155 case CCmode: |
15950 switch (comp_code) | 17156 switch (comp_code) |
15951 { | 17157 { |
15952 case NE: return ARM_NE; | 17158 case NE: return ARM_NE; |
18077 } | 19283 } |
18078 | 19284 |
18079 static enum insn_code | 19285 static enum insn_code |
18080 locate_neon_builtin_icode (int fcode, neon_itype *itype) | 19286 locate_neon_builtin_icode (int fcode, neon_itype *itype) |
18081 { | 19287 { |
18082 neon_builtin_datum key, *found; | 19288 neon_builtin_datum key |
19289 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 }; | |
19290 neon_builtin_datum *found; | |
18083 int idx; | 19291 int idx; |
18084 | 19292 |
18085 key.base_fcode = fcode; | 19293 key.base_fcode = fcode; |
18086 found = (neon_builtin_datum *) | 19294 found = (neon_builtin_datum *) |
18087 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data), | 19295 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data), |
18670 thumb_exit() */ | 19878 thumb_exit() */ |
18671 thumb_exit (f, -1); | 19879 thumb_exit (f, -1); |
18672 return; | 19880 return; |
18673 } | 19881 } |
18674 | 19882 |
18675 if (ARM_EABI_UNWIND_TABLES && push) | 19883 if (push && arm_except_unwind_info (&global_options) == UI_TARGET) |
18676 { | 19884 { |
18677 fprintf (f, "\t.save\t{"); | 19885 fprintf (f, "\t.save\t{"); |
18678 for (regno = 0; regno < 15; regno++) | 19886 for (regno = 0; regno < 15; regno++) |
18679 { | 19887 { |
18680 if (real_regs & (1 << regno)) | 19888 if (real_regs & (1 << regno)) |
19030 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); | 20238 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); |
19031 | 20239 |
19032 /* Return to caller. */ | 20240 /* Return to caller. */ |
19033 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); | 20241 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); |
19034 } | 20242 } |
19035 | |
19036 | 20243 |
20244 /* Scan INSN just before assembler is output for it. | |
20245 For Thumb-1, we track the status of the condition codes; this | |
20246 information is used in the cbranchsi4_insn pattern. */ | |
19037 void | 20247 void |
19038 thumb1_final_prescan_insn (rtx insn) | 20248 thumb1_final_prescan_insn (rtx insn) |
19039 { | 20249 { |
19040 if (flag_print_asm_name) | 20250 if (flag_print_asm_name) |
19041 asm_fprintf (asm_out_file, "%@ 0x%04x\n", | 20251 asm_fprintf (asm_out_file, "%@ 0x%04x\n", |
19042 INSN_ADDRESSES (INSN_UID (insn))); | 20252 INSN_ADDRESSES (INSN_UID (insn))); |
20253 /* Don't overwrite the previous setter when we get to a cbranch. */ | |
20254 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn) | |
20255 { | |
20256 enum attr_conds conds; | |
20257 | |
20258 if (cfun->machine->thumb1_cc_insn) | |
20259 { | |
20260 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn) | |
20261 || modified_in_p (cfun->machine->thumb1_cc_op1, insn)) | |
20262 CC_STATUS_INIT; | |
20263 } | |
20264 conds = get_attr_conds (insn); | |
20265 if (conds == CONDS_SET) | |
20266 { | |
20267 rtx set = single_set (insn); | |
20268 cfun->machine->thumb1_cc_insn = insn; | |
20269 cfun->machine->thumb1_cc_op0 = SET_DEST (set); | |
20270 cfun->machine->thumb1_cc_op1 = const0_rtx; | |
20271 cfun->machine->thumb1_cc_mode = CC_NOOVmode; | |
20272 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn) | |
20273 { | |
20274 rtx src1 = XEXP (SET_SRC (set), 1); | |
20275 if (src1 == const0_rtx) | |
20276 cfun->machine->thumb1_cc_mode = CCmode; | |
20277 } | |
20278 } | |
20279 else if (conds != CONDS_NOCOND) | |
20280 cfun->machine->thumb1_cc_insn = NULL_RTX; | |
20281 } | |
19043 } | 20282 } |
19044 | 20283 |
19045 int | 20284 int |
19046 thumb_shiftable_const (unsigned HOST_WIDE_INT val) | 20285 thumb_shiftable_const (unsigned HOST_WIDE_INT val) |
19047 { | 20286 { |
19145 #else | 20384 #else |
19146 return FALSE; | 20385 return FALSE; |
19147 #endif | 20386 #endif |
19148 } | 20387 } |
19149 | 20388 |
20389 /* Given the stack offsets and register mask in OFFSETS, decide how | |
20390 many additional registers to push instead of subtracting a constant | |
20391 from SP. For epilogues the principle is the same except we use pop. | |
20392 FOR_PROLOGUE indicates which we're generating. */ | |
20393 static int | |
20394 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue) | |
20395 { | |
20396 HOST_WIDE_INT amount; | |
20397 unsigned long live_regs_mask = offsets->saved_regs_mask; | |
20398 /* Extract a mask of the ones we can give to the Thumb's push/pop | |
20399 instruction. */ | |
20400 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff); | |
20401 /* Then count how many other high registers will need to be pushed. */ | |
20402 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00); | |
20403 int n_free, reg_base; | |
20404 | |
20405 if (!for_prologue && frame_pointer_needed) | |
20406 amount = offsets->locals_base - offsets->saved_regs; | |
20407 else | |
20408 amount = offsets->outgoing_args - offsets->saved_regs; | |
20409 | |
20410 /* If the stack frame size is 512 exactly, we can save one load | |
20411 instruction, which should make this a win even when optimizing | |
20412 for speed. */ | |
20413 if (!optimize_size && amount != 512) | |
20414 return 0; | |
20415 | |
20416 /* Can't do this if there are high registers to push. */ | |
20417 if (high_regs_pushed != 0) | |
20418 return 0; | |
20419 | |
20420 /* Shouldn't do it in the prologue if no registers would normally | |
20421 be pushed at all. In the epilogue, also allow it if we'll have | |
20422 a pop insn for the PC. */ | |
20423 if (l_mask == 0 | |
20424 && (for_prologue | |
20425 || TARGET_BACKTRACE | |
20426 || (live_regs_mask & 1 << LR_REGNUM) == 0 | |
20427 || TARGET_INTERWORK | |
20428 || crtl->args.pretend_args_size != 0)) | |
20429 return 0; | |
20430 | |
20431 /* Don't do this if thumb_expand_prologue wants to emit instructions | |
20432 between the push and the stack frame allocation. */ | |
20433 if (for_prologue | |
20434 && ((flag_pic && arm_pic_register != INVALID_REGNUM) | |
20435 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))) | |
20436 return 0; | |
20437 | |
20438 reg_base = 0; | |
20439 n_free = 0; | |
20440 if (!for_prologue) | |
20441 { | |
20442 reg_base = arm_size_return_regs () / UNITS_PER_WORD; | |
20443 live_regs_mask >>= reg_base; | |
20444 } | |
20445 | |
20446 while (reg_base + n_free < 8 && !(live_regs_mask & 1) | |
20447 && (for_prologue || call_used_regs[reg_base + n_free])) | |
20448 { | |
20449 live_regs_mask >>= 1; | |
20450 n_free++; | |
20451 } | |
20452 | |
20453 if (n_free == 0) | |
20454 return 0; | |
20455 gcc_assert (amount / 4 * 4 == amount); | |
20456 | |
20457 if (amount >= 512 && (amount - n_free * 4) < 512) | |
20458 return (amount - 508) / 4; | |
20459 if (amount <= n_free * 4) | |
20460 return amount / 4; | |
20461 return 0; | |
20462 } | |
20463 | |
19150 /* The bits which aren't usefully expanded as rtl. */ | 20464 /* The bits which aren't usefully expanded as rtl. */ |
19151 const char * | 20465 const char * |
19152 thumb_unexpanded_epilogue (void) | 20466 thumb_unexpanded_epilogue (void) |
19153 { | 20467 { |
19154 arm_stack_offsets *offsets; | 20468 arm_stack_offsets *offsets; |
19155 int regno; | 20469 int regno; |
19156 unsigned long live_regs_mask = 0; | 20470 unsigned long live_regs_mask = 0; |
19157 int high_regs_pushed = 0; | 20471 int high_regs_pushed = 0; |
20472 int extra_pop; | |
19158 int had_to_push_lr; | 20473 int had_to_push_lr; |
19159 int size; | 20474 int size; |
19160 | 20475 |
19161 if (cfun->machine->return_used_this_function != 0) | 20476 if (cfun->machine->return_used_this_function != 0) |
19162 return ""; | 20477 return ""; |
19171 /* If we can deduce the registers used from the function's return value. | 20486 /* If we can deduce the registers used from the function's return value. |
19172 This is more reliable that examining df_regs_ever_live_p () because that | 20487 This is more reliable that examining df_regs_ever_live_p () because that |
19173 will be set if the register is ever used in the function, not just if | 20488 will be set if the register is ever used in the function, not just if |
19174 the register is used to hold a return value. */ | 20489 the register is used to hold a return value. */ |
19175 size = arm_size_return_regs (); | 20490 size = arm_size_return_regs (); |
20491 | |
20492 extra_pop = thumb1_extra_regs_pushed (offsets, false); | |
20493 if (extra_pop > 0) | |
20494 { | |
20495 unsigned long extra_mask = (1 << extra_pop) - 1; | |
20496 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD); | |
20497 } | |
19176 | 20498 |
19177 /* The prolog may have pushed some high registers to use as | 20499 /* The prolog may have pushed some high registers to use as |
19178 work registers. e.g. the testsuite file: | 20500 work registers. e.g. the testsuite file: |
19179 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c | 20501 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c |
19180 compiles to produce: | 20502 compiles to produce: |
19255 if (live_regs_mask) | 20577 if (live_regs_mask) |
19256 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL, | 20578 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL, |
19257 live_regs_mask); | 20579 live_regs_mask); |
19258 | 20580 |
19259 /* We have either just popped the return address into the | 20581 /* We have either just popped the return address into the |
19260 PC or it is was kept in LR for the entire function. */ | 20582 PC or it is was kept in LR for the entire function. |
20583 Note that thumb_pushpop has already called thumb_exit if the | |
20584 PC was in the list. */ | |
19261 if (!had_to_push_lr) | 20585 if (!had_to_push_lr) |
19262 thumb_exit (asm_out_file, LR_REGNUM); | 20586 thumb_exit (asm_out_file, LR_REGNUM); |
19263 } | 20587 } |
19264 else | 20588 else |
19265 { | 20589 { |
19311 /* Functions to save and restore machine-specific function data. */ | 20635 /* Functions to save and restore machine-specific function data. */ |
19312 static struct machine_function * | 20636 static struct machine_function * |
19313 arm_init_machine_status (void) | 20637 arm_init_machine_status (void) |
19314 { | 20638 { |
19315 struct machine_function *machine; | 20639 struct machine_function *machine; |
19316 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function)); | 20640 machine = ggc_alloc_cleared_machine_function (); |
19317 | 20641 |
19318 #if ARM_FT_UNKNOWN != 0 | 20642 #if ARM_FT_UNKNOWN != 0 |
19319 machine->func_type = ARM_FT_UNKNOWN; | 20643 machine->func_type = ARM_FT_UNKNOWN; |
19320 #endif | 20644 #endif |
19321 return machine; | 20645 return machine; |
19399 break; | 20723 break; |
19400 | 20724 |
19401 default: | 20725 default: |
19402 gcc_unreachable (); | 20726 gcc_unreachable (); |
19403 } | 20727 } |
19404 } | |
19405 | |
19406 /* Given the stack offsets and register mask in OFFSETS, decide | |
19407 how many additional registers to push instead of subtracting | |
19408 a constant from SP. */ | |
19409 static int | |
19410 thumb1_extra_regs_pushed (arm_stack_offsets *offsets) | |
19411 { | |
19412 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs; | |
19413 unsigned long live_regs_mask = offsets->saved_regs_mask; | |
19414 /* Extract a mask of the ones we can give to the Thumb's push instruction. */ | |
19415 unsigned long l_mask = live_regs_mask & 0x40ff; | |
19416 /* Then count how many other high registers will need to be pushed. */ | |
19417 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00); | |
19418 int n_free; | |
19419 | |
19420 /* If the stack frame size is 512 exactly, we can save one load | |
19421 instruction, which should make this a win even when optimizing | |
19422 for speed. */ | |
19423 if (!optimize_size && amount != 512) | |
19424 return 0; | |
19425 | |
19426 /* Can't do this if there are high registers to push, or if we | |
19427 are not going to do a push at all. */ | |
19428 if (high_regs_pushed != 0 || l_mask == 0) | |
19429 return 0; | |
19430 | |
19431 /* Don't do this if thumb1_expand_prologue wants to emit instructions | |
19432 between the push and the stack frame allocation. */ | |
19433 if ((flag_pic && arm_pic_register != INVALID_REGNUM) | |
19434 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)) | |
19435 return 0; | |
19436 | |
19437 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1) | |
19438 n_free++; | |
19439 | |
19440 if (n_free == 0) | |
19441 return 0; | |
19442 gcc_assert (amount / 4 * 4 == amount); | |
19443 | |
19444 if (amount >= 512 && (amount - n_free * 4) < 512) | |
19445 return (amount - 508) / 4; | |
19446 if (amount <= n_free * 4) | |
19447 return amount / 4; | |
19448 return 0; | |
19449 } | 20728 } |
19450 | 20729 |
19451 /* Generate the rest of a function's prologue. */ | 20730 /* Generate the rest of a function's prologue. */ |
19452 void | 20731 void |
19453 thumb1_expand_prologue (void) | 20732 thumb1_expand_prologue (void) |
19481 | 20760 |
19482 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) | 20761 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) |
19483 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), | 20762 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), |
19484 stack_pointer_rtx); | 20763 stack_pointer_rtx); |
19485 | 20764 |
20765 if (flag_stack_usage) | |
20766 current_function_static_stack_size | |
20767 = offsets->outgoing_args - offsets->saved_args; | |
20768 | |
19486 amount = offsets->outgoing_args - offsets->saved_regs; | 20769 amount = offsets->outgoing_args - offsets->saved_regs; |
19487 amount -= 4 * thumb1_extra_regs_pushed (offsets); | 20770 amount -= 4 * thumb1_extra_regs_pushed (offsets, true); |
19488 if (amount) | 20771 if (amount) |
19489 { | 20772 { |
19490 if (amount < 512) | 20773 if (amount < 512) |
19491 { | 20774 { |
19492 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, | 20775 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, |
19539 the call to mcount. Similarly if the user has requested no | 20822 the call to mcount. Similarly if the user has requested no |
19540 scheduling in the prolog. Similarly if we want non-call exceptions | 20823 scheduling in the prolog. Similarly if we want non-call exceptions |
19541 using the EABI unwinder, to prevent faulting instructions from being | 20824 using the EABI unwinder, to prevent faulting instructions from being |
19542 swapped with a stack adjustment. */ | 20825 swapped with a stack adjustment. */ |
19543 if (crtl->profile || !TARGET_SCHED_PROLOG | 20826 if (crtl->profile || !TARGET_SCHED_PROLOG |
19544 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions)) | 20827 || (arm_except_unwind_info (&global_options) == UI_TARGET |
20828 && cfun->can_throw_non_call_exceptions)) | |
19545 emit_insn (gen_blockage ()); | 20829 emit_insn (gen_blockage ()); |
19546 | 20830 |
19547 cfun->machine->lr_save_eliminated = !thumb_force_lr_save (); | 20831 cfun->machine->lr_save_eliminated = !thumb_force_lr_save (); |
19548 if (live_regs_mask & 0xff) | 20832 if (live_regs_mask & 0xff) |
19549 cfun->machine->lr_save_eliminated = 0; | 20833 cfun->machine->lr_save_eliminated = 0; |
19567 if (frame_pointer_needed) | 20851 if (frame_pointer_needed) |
19568 { | 20852 { |
19569 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); | 20853 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); |
19570 amount = offsets->locals_base - offsets->saved_regs; | 20854 amount = offsets->locals_base - offsets->saved_regs; |
19571 } | 20855 } |
20856 amount -= 4 * thumb1_extra_regs_pushed (offsets, false); | |
19572 | 20857 |
19573 gcc_assert (amount >= 0); | 20858 gcc_assert (amount >= 0); |
19574 if (amount) | 20859 if (amount) |
19575 { | 20860 { |
19576 if (amount < 512) | 20861 if (amount < 512) |
19651 } | 20936 } |
19652 | 20937 |
19653 if (crtl->args.pretend_args_size) | 20938 if (crtl->args.pretend_args_size) |
19654 { | 20939 { |
19655 /* Output unwind directive for the stack adjustment. */ | 20940 /* Output unwind directive for the stack adjustment. */ |
19656 if (ARM_EABI_UNWIND_TABLES) | 20941 if (arm_except_unwind_info (&global_options) == UI_TARGET) |
19657 fprintf (f, "\t.pad #%d\n", | 20942 fprintf (f, "\t.pad #%d\n", |
19658 crtl->args.pretend_args_size); | 20943 crtl->args.pretend_args_size); |
19659 | 20944 |
19660 if (cfun->machine->uses_anonymous_args) | 20945 if (cfun->machine->uses_anonymous_args) |
19661 { | 20946 { |
19721 20 add R7, SP, #16 Point at the start of the backtrace structure. | 21006 20 add R7, SP, #16 Point at the start of the backtrace structure. |
19722 22 mov FP, R7 Put this value into the frame pointer. */ | 21007 22 mov FP, R7 Put this value into the frame pointer. */ |
19723 | 21008 |
19724 work_register = thumb_find_work_register (live_regs_mask); | 21009 work_register = thumb_find_work_register (live_regs_mask); |
19725 | 21010 |
19726 if (ARM_EABI_UNWIND_TABLES) | 21011 if (arm_except_unwind_info (&global_options) == UI_TARGET) |
19727 asm_fprintf (f, "\t.pad #16\n"); | 21012 asm_fprintf (f, "\t.pad #16\n"); |
19728 | 21013 |
19729 asm_fprintf | 21014 asm_fprintf |
19730 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n", | 21015 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n", |
19731 SP_REGNUM, SP_REGNUM); | 21016 SP_REGNUM, SP_REGNUM); |
19789 register. */ | 21074 register. */ |
19790 else if ((l_mask & 0xff) != 0 | 21075 else if ((l_mask & 0xff) != 0 |
19791 || (high_regs_pushed == 0 && l_mask)) | 21076 || (high_regs_pushed == 0 && l_mask)) |
19792 { | 21077 { |
19793 unsigned long mask = l_mask; | 21078 unsigned long mask = l_mask; |
19794 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1; | 21079 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1; |
19795 thumb_pushpop (f, mask, 1, &cfa_offset, mask); | 21080 thumb_pushpop (f, mask, 1, &cfa_offset, mask); |
19796 } | 21081 } |
19797 | 21082 |
19798 if (high_regs_pushed) | 21083 if (high_regs_pushed) |
19799 { | 21084 { |
20165 asm_fprintf (asm_out_file, "\t.syntax unified\n"); | 21450 asm_fprintf (asm_out_file, "\t.syntax unified\n"); |
20166 | 21451 |
20167 if (TARGET_BPABI) | 21452 if (TARGET_BPABI) |
20168 { | 21453 { |
20169 const char *fpu_name; | 21454 const char *fpu_name; |
20170 if (arm_select[0].string) | 21455 if (arm_selected_arch) |
20171 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string); | 21456 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name); |
20172 else if (arm_select[1].string) | |
20173 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string); | |
20174 else | 21457 else |
20175 asm_fprintf (asm_out_file, "\t.cpu %s\n", | 21458 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name); |
20176 all_cores[arm_default_cpu].name); | |
20177 | 21459 |
20178 if (TARGET_SOFT_FLOAT) | 21460 if (TARGET_SOFT_FLOAT) |
20179 { | 21461 { |
20180 if (TARGET_VFP) | 21462 if (TARGET_VFP) |
20181 fpu_name = "softvfp"; | 21463 fpu_name = "softvfp"; |
20566 addr = XEXP (addr, 0); | 21848 addr = XEXP (addr, 0); |
20567 | 21849 |
20568 return !reg_overlap_mentioned_p (value, addr); | 21850 return !reg_overlap_mentioned_p (value, addr); |
20569 } | 21851 } |
20570 | 21852 |
21853 /* Return nonzero if the CONSUMER instruction (a store) does need | |
21854 PRODUCER's value to calculate the address. */ | |
21855 | |
21856 int | |
21857 arm_early_store_addr_dep (rtx producer, rtx consumer) | |
21858 { | |
21859 return !arm_no_early_store_addr_dep (producer, consumer); | |
21860 } | |
21861 | |
21862 /* Return nonzero if the CONSUMER instruction (a load) does need | |
21863 PRODUCER's value to calculate the address. */ | |
21864 | |
21865 int | |
21866 arm_early_load_addr_dep (rtx producer, rtx consumer) | |
21867 { | |
21868 rtx value = PATTERN (producer); | |
21869 rtx addr = PATTERN (consumer); | |
21870 | |
21871 if (GET_CODE (value) == COND_EXEC) | |
21872 value = COND_EXEC_CODE (value); | |
21873 if (GET_CODE (value) == PARALLEL) | |
21874 value = XVECEXP (value, 0, 0); | |
21875 value = XEXP (value, 0); | |
21876 if (GET_CODE (addr) == COND_EXEC) | |
21877 addr = COND_EXEC_CODE (addr); | |
21878 if (GET_CODE (addr) == PARALLEL) | |
21879 addr = XVECEXP (addr, 0, 0); | |
21880 addr = XEXP (addr, 1); | |
21881 | |
21882 return reg_overlap_mentioned_p (value, addr); | |
21883 } | |
21884 | |
20571 /* Return nonzero if the CONSUMER instruction (an ALU op) does not | 21885 /* Return nonzero if the CONSUMER instruction (an ALU op) does not |
20572 have an early register shift value or amount dependency on the | 21886 have an early register shift value or amount dependency on the |
20573 result of PRODUCER. */ | 21887 result of PRODUCER. */ |
20574 | 21888 |
20575 int | 21889 int |
20944 return true; | 22258 return true; |
20945 | 22259 |
20946 return false; | 22260 return false; |
20947 } | 22261 } |
20948 | 22262 |
22263 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword | |
22264 registers when autovectorizing for Neon, at least until multiple vector | |
22265 widths are supported properly by the middle-end. */ | |
22266 | |
22267 static enum machine_mode | |
22268 arm_preferred_simd_mode (enum machine_mode mode) | |
22269 { | |
22270 if (TARGET_NEON) | |
22271 switch (mode) | |
22272 { | |
22273 case SFmode: | |
22274 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode; | |
22275 case SImode: | |
22276 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode; | |
22277 case HImode: | |
22278 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode; | |
22279 case QImode: | |
22280 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode; | |
22281 case DImode: | |
22282 if (TARGET_NEON_VECTORIZE_QUAD) | |
22283 return V2DImode; | |
22284 break; | |
22285 | |
22286 default:; | |
22287 } | |
22288 | |
22289 if (TARGET_REALLY_IWMMXT) | |
22290 switch (mode) | |
22291 { | |
22292 case SImode: | |
22293 return V2SImode; | |
22294 case HImode: | |
22295 return V4HImode; | |
22296 case QImode: | |
22297 return V8QImode; | |
22298 | |
22299 default:; | |
22300 } | |
22301 | |
22302 return word_mode; | |
22303 } | |
22304 | |
22305 /* Implement TARGET_CLASS_LIKELY_SPILLED_P. | |
22306 | |
22307 We need to define this for LO_REGS on thumb. Otherwise we can end up | |
22308 using r0-r4 for function arguments, r7 for the stack frame and don't | |
22309 have enough left over to do doubleword arithmetic. */ | |
22310 | |
22311 static bool | |
22312 arm_class_likely_spilled_p (reg_class_t rclass) | |
22313 { | |
22314 if ((TARGET_THUMB && rclass == LO_REGS) | |
22315 || rclass == CC_REG) | |
22316 return true; | |
22317 | |
22318 return false; | |
22319 } | |
22320 | |
20949 /* Implements target hook small_register_classes_for_mode_p. */ | 22321 /* Implements target hook small_register_classes_for_mode_p. */ |
20950 bool | 22322 bool |
20951 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) | 22323 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) |
20952 { | 22324 { |
20953 return TARGET_THUMB1; | 22325 return TARGET_THUMB1; |
21028 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); | 22400 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); |
21029 | 22401 |
21030 return p; | 22402 return p; |
21031 } | 22403 } |
21032 | 22404 |
21033 #ifdef TARGET_UNWIND_INFO | 22405 #if ARM_UNWIND_INFO |
21034 /* Emit unwind directives for a store-multiple instruction or stack pointer | 22406 /* Emit unwind directives for a store-multiple instruction or stack pointer |
21035 push during alignment. | 22407 push during alignment. |
21036 These should only ever be generated by the function prologue code, so | 22408 These should only ever be generated by the function prologue code, so |
21037 expect them to have a particular form. */ | 22409 expect them to have a particular form. */ |
21038 | 22410 |
21242 static void | 22614 static void |
21243 arm_unwind_emit (FILE * asm_out_file, rtx insn) | 22615 arm_unwind_emit (FILE * asm_out_file, rtx insn) |
21244 { | 22616 { |
21245 rtx pat; | 22617 rtx pat; |
21246 | 22618 |
21247 if (!ARM_EABI_UNWIND_TABLES) | 22619 if (arm_except_unwind_info (&global_options) != UI_TARGET) |
21248 return; | 22620 return; |
21249 | 22621 |
21250 if (!(flag_unwind_tables || crtl->uses_eh_lsda) | 22622 if (!(flag_unwind_tables || crtl->uses_eh_lsda) |
21251 && (TREE_NOTHROW (current_function_decl) | 22623 && (TREE_NOTHROW (current_function_decl) |
21252 || crtl->all_throwers_are_sibcalls)) | 22624 || crtl->all_throwers_are_sibcalls)) |
21292 fputs ("(TARGET2)", asm_out_file); | 22664 fputs ("(TARGET2)", asm_out_file); |
21293 fputc ('\n', asm_out_file); | 22665 fputc ('\n', asm_out_file); |
21294 | 22666 |
21295 return TRUE; | 22667 return TRUE; |
21296 } | 22668 } |
21297 #endif /* TARGET_UNWIND_INFO */ | 22669 |
22670 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */ | |
22671 | |
22672 static void | |
22673 arm_asm_emit_except_personality (rtx personality) | |
22674 { | |
22675 fputs ("\t.personality\t", asm_out_file); | |
22676 output_addr_const (asm_out_file, personality); | |
22677 fputc ('\n', asm_out_file); | |
22678 } | |
22679 | |
22680 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */ | |
22681 | |
22682 static void | |
22683 arm_asm_init_sections (void) | |
22684 { | |
22685 exception_section = get_unnamed_section (0, output_section_asm_op, | |
22686 "\t.handlerdata"); | |
22687 } | |
22688 #endif /* ARM_UNWIND_INFO */ | |
22689 | |
22690 /* Implement TARGET_EXCEPT_UNWIND_INFO. */ | |
22691 | |
22692 static enum unwind_info_type | |
22693 arm_except_unwind_info (struct gcc_options *opts) | |
22694 { | |
22695 /* Honor the --enable-sjlj-exceptions configure switch. */ | |
22696 #ifdef CONFIG_SJLJ_EXCEPTIONS | |
22697 if (CONFIG_SJLJ_EXCEPTIONS) | |
22698 return UI_SJLJ; | |
22699 #endif | |
22700 | |
22701 /* If not using ARM EABI unwind tables... */ | |
22702 if (ARM_UNWIND_INFO) | |
22703 { | |
22704 /* For simplicity elsewhere in this file, indicate that all unwind | |
22705 info is disabled if we're not emitting unwind tables. */ | |
22706 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables) | |
22707 return UI_NONE; | |
22708 else | |
22709 return UI_TARGET; | |
22710 } | |
22711 | |
22712 /* ... we use sjlj exceptions for backwards compatibility. */ | |
22713 return UI_SJLJ; | |
22714 } | |
21298 | 22715 |
21299 | 22716 |
21300 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic | 22717 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic |
21301 stack alignment. */ | 22718 stack alignment. */ |
21302 | 22719 |
21324 /* Output unwind directives for the start/end of a function. */ | 22741 /* Output unwind directives for the start/end of a function. */ |
21325 | 22742 |
21326 void | 22743 void |
21327 arm_output_fn_unwind (FILE * f, bool prologue) | 22744 arm_output_fn_unwind (FILE * f, bool prologue) |
21328 { | 22745 { |
21329 if (!ARM_EABI_UNWIND_TABLES) | 22746 if (arm_except_unwind_info (&global_options) != UI_TARGET) |
21330 return; | 22747 return; |
21331 | 22748 |
21332 if (prologue) | 22749 if (prologue) |
21333 fputs ("\t.fnstart\n", f); | 22750 fputs ("\t.fnstart\n", f); |
21334 else | 22751 else |
21404 fputs ("\t.word\t", file); | 22821 fputs ("\t.word\t", file); |
21405 output_addr_const (file, x); | 22822 output_addr_const (file, x); |
21406 fputs ("(tlsldo)", file); | 22823 fputs ("(tlsldo)", file); |
21407 } | 22824 } |
21408 | 22825 |
21409 bool | 22826 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */ |
22827 | |
22828 static bool | |
21410 arm_output_addr_const_extra (FILE *fp, rtx x) | 22829 arm_output_addr_const_extra (FILE *fp, rtx x) |
21411 { | 22830 { |
21412 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) | 22831 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) |
21413 return arm_emit_tls_decoration (fp, x); | 22832 return arm_emit_tls_decoration (fp, x); |
21414 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL) | 22833 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL) |
21544 { | 22963 { |
21545 switch (arm_tune) | 22964 switch (arm_tune) |
21546 { | 22965 { |
21547 case cortexr4: | 22966 case cortexr4: |
21548 case cortexr4f: | 22967 case cortexr4f: |
22968 case cortexa5: | |
21549 case cortexa8: | 22969 case cortexa8: |
21550 case cortexa9: | 22970 case cortexa9: |
22971 case fa726te: | |
21551 return 2; | 22972 return 2; |
21552 | 22973 |
21553 default: | 22974 default: |
21554 return 1; | 22975 return 1; |
21555 } | 22976 } |
21661 if (TARGET_THUMB) | 23082 if (TARGET_THUMB) |
21662 memcpy (reg_alloc_order, thumb_core_reg_alloc_order, | 23083 memcpy (reg_alloc_order, thumb_core_reg_alloc_order, |
21663 sizeof (thumb_core_reg_alloc_order)); | 23084 sizeof (thumb_core_reg_alloc_order)); |
21664 } | 23085 } |
21665 | 23086 |
21666 /* Set default optimization options. */ | |
21667 void | |
21668 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED) | |
21669 { | |
21670 /* Enable section anchors by default at -O1 or higher. | |
21671 Use 2 to distinguish from an explicit -fsection-anchors | |
21672 given on the command line. */ | |
21673 if (level > 0) | |
21674 flag_section_anchors = 2; | |
21675 } | |
21676 | |
21677 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ | 23087 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ |
21678 | 23088 |
21679 bool | 23089 bool |
21680 arm_frame_pointer_required (void) | 23090 arm_frame_pointer_required (void) |
21681 { | 23091 { |
21690 arm_have_conditional_execution (void) | 23100 arm_have_conditional_execution (void) |
21691 { | 23101 { |
21692 return !TARGET_THUMB1; | 23102 return !TARGET_THUMB1; |
21693 } | 23103 } |
21694 | 23104 |
23105 /* Legitimize a memory reference for sync primitive implemented using | |
23106 ldrex / strex. We currently force the form of the reference to be | |
23107 indirect without offset. We do not yet support the indirect offset | |
23108 addressing supported by some ARM targets for these | |
23109 instructions. */ | |
23110 static rtx | |
23111 arm_legitimize_sync_memory (rtx memory) | |
23112 { | |
23113 rtx addr = force_reg (Pmode, XEXP (memory, 0)); | |
23114 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr); | |
23115 | |
23116 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER); | |
23117 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory); | |
23118 return legitimate_memory; | |
23119 } | |
23120 | |
23121 /* An instruction emitter. */ | |
23122 typedef void (* emit_f) (int label, const char *, rtx *); | |
23123 | |
23124 /* An instruction emitter that emits via the conventional | |
23125 output_asm_insn. */ | |
23126 static void | |
23127 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands) | |
23128 { | |
23129 output_asm_insn (pattern, operands); | |
23130 } | |
23131 | |
23132 /* Count the number of emitted synchronization instructions. */ | |
23133 static unsigned arm_insn_count; | |
23134 | |
23135 /* An emitter that counts emitted instructions but does not actually | |
23136 emit instruction into the the instruction stream. */ | |
23137 static void | |
23138 arm_count (int label, | |
23139 const char *pattern ATTRIBUTE_UNUSED, | |
23140 rtx *operands ATTRIBUTE_UNUSED) | |
23141 { | |
23142 if (! label) | |
23143 ++ arm_insn_count; | |
23144 } | |
23145 | |
23146 /* Construct a pattern using conventional output formatting and feed | |
23147 it to output_asm_insn. Provides a mechanism to construct the | |
23148 output pattern on the fly. Note the hard limit on the pattern | |
23149 buffer size. */ | |
23150 static void ATTRIBUTE_PRINTF_4 | |
23151 arm_output_asm_insn (emit_f emit, int label, rtx *operands, | |
23152 const char *pattern, ...) | |
23153 { | |
23154 va_list ap; | |
23155 char buffer[256]; | |
23156 | |
23157 va_start (ap, pattern); | |
23158 vsprintf (buffer, pattern, ap); | |
23159 va_end (ap); | |
23160 emit (label, buffer, operands); | |
23161 } | |
23162 | |
23163 /* Emit the memory barrier instruction, if any, provided by this | |
23164 target to a specified emitter. */ | |
23165 static void | |
23166 arm_process_output_memory_barrier (emit_f emit, rtx *operands) | |
23167 { | |
23168 if (TARGET_HAVE_DMB) | |
23169 { | |
23170 /* Note we issue a system level barrier. We should consider | |
23171 issuing a inner shareabilty zone barrier here instead, ie. | |
23172 "DMB ISH". */ | |
23173 emit (0, "dmb\tsy", operands); | |
23174 return; | |
23175 } | |
23176 | |
23177 if (TARGET_HAVE_DMB_MCR) | |
23178 { | |
23179 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands); | |
23180 return; | |
23181 } | |
23182 | |
23183 gcc_unreachable (); | |
23184 } | |
23185 | |
23186 /* Emit the memory barrier instruction, if any, provided by this | |
23187 target. */ | |
23188 const char * | |
23189 arm_output_memory_barrier (rtx *operands) | |
23190 { | |
23191 arm_process_output_memory_barrier (arm_emit, operands); | |
23192 return ""; | |
23193 } | |
23194 | |
23195 /* Helper to figure out the instruction suffix required on ldrex/strex | |
23196 for operations on an object of the specified mode. */ | |
23197 static const char * | |
23198 arm_ldrex_suffix (enum machine_mode mode) | |
23199 { | |
23200 switch (mode) | |
23201 { | |
23202 case QImode: return "b"; | |
23203 case HImode: return "h"; | |
23204 case SImode: return ""; | |
23205 case DImode: return "d"; | |
23206 default: | |
23207 gcc_unreachable (); | |
23208 } | |
23209 return ""; | |
23210 } | |
23211 | |
23212 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified | |
23213 mode. */ | |
23214 static void | |
23215 arm_output_ldrex (emit_f emit, | |
23216 enum machine_mode mode, | |
23217 rtx target, | |
23218 rtx memory) | |
23219 { | |
23220 const char *suffix = arm_ldrex_suffix (mode); | |
23221 rtx operands[2]; | |
23222 | |
23223 operands[0] = target; | |
23224 operands[1] = memory; | |
23225 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix); | |
23226 } | |
23227 | |
23228 /* Emit a strex{b,h,d, } instruction appropriate for the specified | |
23229 mode. */ | |
23230 static void | |
23231 arm_output_strex (emit_f emit, | |
23232 enum machine_mode mode, | |
23233 const char *cc, | |
23234 rtx result, | |
23235 rtx value, | |
23236 rtx memory) | |
23237 { | |
23238 const char *suffix = arm_ldrex_suffix (mode); | |
23239 rtx operands[3]; | |
23240 | |
23241 operands[0] = result; | |
23242 operands[1] = value; | |
23243 operands[2] = memory; | |
23244 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix, | |
23245 cc); | |
23246 } | |
23247 | |
23248 /* Helper to emit a two operand instruction. */ | |
23249 static void | |
23250 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s) | |
23251 { | |
23252 rtx operands[2]; | |
23253 | |
23254 operands[0] = d; | |
23255 operands[1] = s; | |
23256 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic); | |
23257 } | |
23258 | |
23259 /* Helper to emit a three operand instruction. */ | |
23260 static void | |
23261 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b) | |
23262 { | |
23263 rtx operands[3]; | |
23264 | |
23265 operands[0] = d; | |
23266 operands[1] = a; | |
23267 operands[2] = b; | |
23268 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic); | |
23269 } | |
23270 | |
23271 /* Emit a load store exclusive synchronization loop. | |
23272 | |
23273 do | |
23274 old_value = [mem] | |
23275 if old_value != required_value | |
23276 break; | |
23277 t1 = sync_op (old_value, new_value) | |
23278 [mem] = t1, t2 = [0|1] | |
23279 while ! t2 | |
23280 | |
23281 Note: | |
23282 t1 == t2 is not permitted | |
23283 t1 == old_value is permitted | |
23284 | |
23285 required_value: | |
23286 | |
23287 RTX register or const_int representing the required old_value for | |
23288 the modify to continue, if NULL no comparsion is performed. */ | |
23289 static void | |
23290 arm_output_sync_loop (emit_f emit, | |
23291 enum machine_mode mode, | |
23292 rtx old_value, | |
23293 rtx memory, | |
23294 rtx required_value, | |
23295 rtx new_value, | |
23296 rtx t1, | |
23297 rtx t2, | |
23298 enum attr_sync_op sync_op, | |
23299 int early_barrier_required) | |
23300 { | |
23301 rtx operands[1]; | |
23302 | |
23303 gcc_assert (t1 != t2); | |
23304 | |
23305 if (early_barrier_required) | |
23306 arm_process_output_memory_barrier (emit, NULL); | |
23307 | |
23308 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX); | |
23309 | |
23310 arm_output_ldrex (emit, mode, old_value, memory); | |
23311 | |
23312 if (required_value) | |
23313 { | |
23314 rtx operands[2]; | |
23315 | |
23316 operands[0] = old_value; | |
23317 operands[1] = required_value; | |
23318 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1"); | |
23319 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX); | |
23320 } | |
23321 | |
23322 switch (sync_op) | |
23323 { | |
23324 case SYNC_OP_ADD: | |
23325 arm_output_op3 (emit, "add", t1, old_value, new_value); | |
23326 break; | |
23327 | |
23328 case SYNC_OP_SUB: | |
23329 arm_output_op3 (emit, "sub", t1, old_value, new_value); | |
23330 break; | |
23331 | |
23332 case SYNC_OP_IOR: | |
23333 arm_output_op3 (emit, "orr", t1, old_value, new_value); | |
23334 break; | |
23335 | |
23336 case SYNC_OP_XOR: | |
23337 arm_output_op3 (emit, "eor", t1, old_value, new_value); | |
23338 break; | |
23339 | |
23340 case SYNC_OP_AND: | |
23341 arm_output_op3 (emit,"and", t1, old_value, new_value); | |
23342 break; | |
23343 | |
23344 case SYNC_OP_NAND: | |
23345 arm_output_op3 (emit, "and", t1, old_value, new_value); | |
23346 arm_output_op2 (emit, "mvn", t1, t1); | |
23347 break; | |
23348 | |
23349 case SYNC_OP_NONE: | |
23350 t1 = new_value; | |
23351 break; | |
23352 } | |
23353 | |
23354 if (t2) | |
23355 { | |
23356 arm_output_strex (emit, mode, "", t2, t1, memory); | |
23357 operands[0] = t2; | |
23358 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); | |
23359 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", | |
23360 LOCAL_LABEL_PREFIX); | |
23361 } | |
23362 else | |
23363 { | |
23364 /* Use old_value for the return value because for some operations | |
23365 the old_value can easily be restored. This saves one register. */ | |
23366 arm_output_strex (emit, mode, "", old_value, t1, memory); | |
23367 operands[0] = old_value; | |
23368 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0"); | |
23369 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=", | |
23370 LOCAL_LABEL_PREFIX); | |
23371 | |
23372 switch (sync_op) | |
23373 { | |
23374 case SYNC_OP_ADD: | |
23375 arm_output_op3 (emit, "sub", old_value, t1, new_value); | |
23376 break; | |
23377 | |
23378 case SYNC_OP_SUB: | |
23379 arm_output_op3 (emit, "add", old_value, t1, new_value); | |
23380 break; | |
23381 | |
23382 case SYNC_OP_XOR: | |
23383 arm_output_op3 (emit, "eor", old_value, t1, new_value); | |
23384 break; | |
23385 | |
23386 case SYNC_OP_NONE: | |
23387 arm_output_op2 (emit, "mov", old_value, required_value); | |
23388 break; | |
23389 | |
23390 default: | |
23391 gcc_unreachable (); | |
23392 } | |
23393 } | |
23394 | |
23395 arm_process_output_memory_barrier (emit, NULL); | |
23396 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX); | |
23397 } | |
23398 | |
23399 static rtx | |
23400 arm_get_sync_operand (rtx *operands, int index, rtx default_value) | |
23401 { | |
23402 if (index > 0) | |
23403 default_value = operands[index - 1]; | |
23404 | |
23405 return default_value; | |
23406 } | |
23407 | |
23408 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \ | |
23409 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT); | |
23410 | |
23411 /* Extract the operands for a synchroniztion instruction from the | |
23412 instructions attributes and emit the instruction. */ | |
23413 static void | |
23414 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands) | |
23415 { | |
23416 rtx result, memory, required_value, new_value, t1, t2; | |
23417 int early_barrier; | |
23418 enum machine_mode mode; | |
23419 enum attr_sync_op sync_op; | |
23420 | |
23421 result = FETCH_SYNC_OPERAND(result, 0); | |
23422 memory = FETCH_SYNC_OPERAND(memory, 0); | |
23423 required_value = FETCH_SYNC_OPERAND(required_value, 0); | |
23424 new_value = FETCH_SYNC_OPERAND(new_value, 0); | |
23425 t1 = FETCH_SYNC_OPERAND(t1, 0); | |
23426 t2 = FETCH_SYNC_OPERAND(t2, 0); | |
23427 early_barrier = | |
23428 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES; | |
23429 sync_op = get_attr_sync_op (insn); | |
23430 mode = GET_MODE (memory); | |
23431 | |
23432 arm_output_sync_loop (emit, mode, result, memory, required_value, | |
23433 new_value, t1, t2, sync_op, early_barrier); | |
23434 } | |
23435 | |
23436 /* Emit a synchronization instruction loop. */ | |
23437 const char * | |
23438 arm_output_sync_insn (rtx insn, rtx *operands) | |
23439 { | |
23440 arm_process_output_sync_insn (arm_emit, insn, operands); | |
23441 return ""; | |
23442 } | |
23443 | |
23444 /* Count the number of machine instruction that will be emitted for a | |
23445 synchronization instruction. Note that the emitter used does not | |
23446 emit instructions, it just counts instructions being carefull not | |
23447 to count labels. */ | |
23448 unsigned int | |
23449 arm_sync_loop_insns (rtx insn, rtx *operands) | |
23450 { | |
23451 arm_insn_count = 0; | |
23452 arm_process_output_sync_insn (arm_count, insn, operands); | |
23453 return arm_insn_count; | |
23454 } | |
23455 | |
23456 /* Helper to call a target sync instruction generator, dealing with | |
23457 the variation in operands required by the different generators. */ | |
23458 static rtx | |
23459 arm_call_generator (struct arm_sync_generator *generator, rtx old_value, | |
23460 rtx memory, rtx required_value, rtx new_value) | |
23461 { | |
23462 switch (generator->op) | |
23463 { | |
23464 case arm_sync_generator_omn: | |
23465 gcc_assert (! required_value); | |
23466 return generator->u.omn (old_value, memory, new_value); | |
23467 | |
23468 case arm_sync_generator_omrn: | |
23469 gcc_assert (required_value); | |
23470 return generator->u.omrn (old_value, memory, required_value, new_value); | |
23471 } | |
23472 | |
23473 return NULL; | |
23474 } | |
23475 | |
23476 /* Expand a synchronization loop. The synchronization loop is expanded | |
23477 as an opaque block of instructions in order to ensure that we do | |
23478 not subsequently get extraneous memory accesses inserted within the | |
23479 critical region. The exclusive access property of ldrex/strex is | |
23480 only guaranteed in there are no intervening memory accesses. */ | |
23481 void | |
23482 arm_expand_sync (enum machine_mode mode, | |
23483 struct arm_sync_generator *generator, | |
23484 rtx target, rtx memory, rtx required_value, rtx new_value) | |
23485 { | |
23486 if (target == NULL) | |
23487 target = gen_reg_rtx (mode); | |
23488 | |
23489 memory = arm_legitimize_sync_memory (memory); | |
23490 if (mode != SImode) | |
23491 { | |
23492 rtx load_temp = gen_reg_rtx (SImode); | |
23493 | |
23494 if (required_value) | |
23495 required_value = convert_modes (SImode, mode, required_value, true); | |
23496 | |
23497 new_value = convert_modes (SImode, mode, new_value, true); | |
23498 emit_insn (arm_call_generator (generator, load_temp, memory, | |
23499 required_value, new_value)); | |
23500 emit_move_insn (target, gen_lowpart (mode, load_temp)); | |
23501 } | |
23502 else | |
23503 { | |
23504 emit_insn (arm_call_generator (generator, target, memory, required_value, | |
23505 new_value)); | |
23506 } | |
23507 } | |
23508 | |
23509 static bool | |
23510 arm_vector_alignment_reachable (const_tree type, bool is_packed) | |
23511 { | |
23512 /* Vectors which aren't in packed structures will not be less aligned than | |
23513 the natural alignment of their element type, so this is safe. */ | |
23514 if (TARGET_NEON && !BYTES_BIG_ENDIAN) | |
23515 return !is_packed; | |
23516 | |
23517 return default_builtin_vector_alignment_reachable (type, is_packed); | |
23518 } | |
23519 | |
23520 static bool | |
23521 arm_builtin_support_vector_misalignment (enum machine_mode mode, | |
23522 const_tree type, int misalignment, | |
23523 bool is_packed) | |
23524 { | |
23525 if (TARGET_NEON && !BYTES_BIG_ENDIAN) | |
23526 { | |
23527 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type); | |
23528 | |
23529 if (is_packed) | |
23530 return align == 1; | |
23531 | |
23532 /* If the misalignment is unknown, we should be able to handle the access | |
23533 so long as it is not to a member of a packed data structure. */ | |
23534 if (misalignment == -1) | |
23535 return true; | |
23536 | |
23537 /* Return true if the misalignment is a multiple of the natural alignment | |
23538 of the vector's element type. This is probably always going to be | |
23539 true in practice, since we've already established that this isn't a | |
23540 packed access. */ | |
23541 return ((misalignment % align) == 0); | |
23542 } | |
23543 | |
23544 return default_builtin_support_vector_misalignment (mode, type, misalignment, | |
23545 is_packed); | |
23546 } | |
23547 | |
23548 static void | |
23549 arm_conditional_register_usage (void) | |
23550 { | |
23551 int regno; | |
23552 | |
23553 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA) | |
23554 { | |
23555 for (regno = FIRST_FPA_REGNUM; | |
23556 regno <= LAST_FPA_REGNUM; ++regno) | |
23557 fixed_regs[regno] = call_used_regs[regno] = 1; | |
23558 } | |
23559 | |
23560 if (TARGET_THUMB1 && optimize_size) | |
23561 { | |
23562 /* When optimizing for size on Thumb-1, it's better not | |
23563 to use the HI regs, because of the overhead of | |
23564 stacking them. */ | |
23565 for (regno = FIRST_HI_REGNUM; | |
23566 regno <= LAST_HI_REGNUM; ++regno) | |
23567 fixed_regs[regno] = call_used_regs[regno] = 1; | |
23568 } | |
23569 | |
23570 /* The link register can be clobbered by any branch insn, | |
23571 but we have no way to track that at present, so mark | |
23572 it as unavailable. */ | |
23573 if (TARGET_THUMB1) | |
23574 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1; | |
23575 | |
23576 if (TARGET_32BIT && TARGET_HARD_FLOAT) | |
23577 { | |
23578 if (TARGET_MAVERICK) | |
23579 { | |
23580 for (regno = FIRST_FPA_REGNUM; | |
23581 regno <= LAST_FPA_REGNUM; ++ regno) | |
23582 fixed_regs[regno] = call_used_regs[regno] = 1; | |
23583 for (regno = FIRST_CIRRUS_FP_REGNUM; | |
23584 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno) | |
23585 { | |
23586 fixed_regs[regno] = 0; | |
23587 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4; | |
23588 } | |
23589 } | |
23590 if (TARGET_VFP) | |
23591 { | |
23592 /* VFPv3 registers are disabled when earlier VFP | |
23593 versions are selected due to the definition of | |
23594 LAST_VFP_REGNUM. */ | |
23595 for (regno = FIRST_VFP_REGNUM; | |
23596 regno <= LAST_VFP_REGNUM; ++ regno) | |
23597 { | |
23598 fixed_regs[regno] = 0; | |
23599 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16 | |
23600 || regno >= FIRST_VFP_REGNUM + 32; | |
23601 } | |
23602 } | |
23603 } | |
23604 | |
23605 if (TARGET_REALLY_IWMMXT) | |
23606 { | |
23607 regno = FIRST_IWMMXT_GR_REGNUM; | |
23608 /* The 2002/10/09 revision of the XScale ABI has wCG0 | |
23609 and wCG1 as call-preserved registers. The 2002/11/21 | |
23610 revision changed this so that all wCG registers are | |
23611 scratch registers. */ | |
23612 for (regno = FIRST_IWMMXT_GR_REGNUM; | |
23613 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno) | |
23614 fixed_regs[regno] = 0; | |
23615 /* The XScale ABI has wR0 - wR9 as scratch registers, | |
23616 the rest as call-preserved registers. */ | |
23617 for (regno = FIRST_IWMMXT_REGNUM; | |
23618 regno <= LAST_IWMMXT_REGNUM; ++ regno) | |
23619 { | |
23620 fixed_regs[regno] = 0; | |
23621 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10; | |
23622 } | |
23623 } | |
23624 | |
23625 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM) | |
23626 { | |
23627 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; | |
23628 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; | |
23629 } | |
23630 else if (TARGET_APCS_STACK) | |
23631 { | |
23632 fixed_regs[10] = 1; | |
23633 call_used_regs[10] = 1; | |
23634 } | |
23635 /* -mcaller-super-interworking reserves r11 for calls to | |
23636 _interwork_r11_call_via_rN(). Making the register global | |
23637 is an easy way of ensuring that it remains valid for all | |
23638 calls. */ | |
23639 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING | |
23640 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) | |
23641 { | |
23642 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; | |
23643 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; | |
23644 if (TARGET_CALLER_INTERWORKING) | |
23645 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1; | |
23646 } | |
23647 SUBTARGET_CONDITIONAL_REGISTER_USAGE | |
23648 } | |
23649 | |
23650 static reg_class_t | |
23651 arm_preferred_rename_class (reg_class_t rclass) | |
23652 { | |
23653 /* Thumb-2 instructions using LO_REGS may be smaller than instructions | |
23654 using GENERIC_REGS. During register rename pass, we prefer LO_REGS, | |
23655 and code size can be reduced. */ | |
23656 if (TARGET_THUMB2 && rclass == GENERAL_REGS) | |
23657 return LO_REGS; | |
23658 else | |
23659 return NO_REGS; | |
23660 } | |
23661 | |
21695 #include "gt-arm.h" | 23662 #include "gt-arm.h" |