comparison gcc/config/arm/arm.c @ 67:f6334be47118

update gcc from gcc-4.6-20100522 to gcc-4.6-20110318
author nobuyasu <dimolto@cr.ie.u-ryukyu.ac.jp>
date Tue, 22 Mar 2011 17:18:12 +0900
parents b7f97abdc517
children 04ced10e8804
comparison
equal deleted inserted replaced
65:65488c3d617d 67:f6334be47118
1 /* Output routines for GCC for ARM. 1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
4 Free Software Foundation, Inc. 4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) 5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk). 6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com). 7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 8
38 #include "flags.h" 38 #include "flags.h"
39 #include "reload.h" 39 #include "reload.h"
40 #include "function.h" 40 #include "function.h"
41 #include "expr.h" 41 #include "expr.h"
42 #include "optabs.h" 42 #include "optabs.h"
43 #include "toplev.h" 43 #include "diagnostic-core.h"
44 #include "recog.h" 44 #include "recog.h"
45 #include "cgraph.h" 45 #include "cgraph.h"
46 #include "ggc.h" 46 #include "ggc.h"
47 #include "except.h" 47 #include "except.h"
48 #include "c-pragma.h" 48 #include "c-family/c-pragma.h" /* ??? */
49 #include "integrate.h" 49 #include "integrate.h"
50 #include "tm_p.h" 50 #include "tm_p.h"
51 #include "target.h" 51 #include "target.h"
52 #include "target-def.h" 52 #include "target-def.h"
53 #include "debug.h" 53 #include "debug.h"
54 #include "langhooks.h" 54 #include "langhooks.h"
55 #include "df.h" 55 #include "df.h"
56 #include "intl.h" 56 #include "intl.h"
57 #include "libfuncs.h" 57 #include "libfuncs.h"
58 #include "params.h"
58 59
59 /* Forward definitions of types. */ 60 /* Forward definitions of types. */
60 typedef struct minipool_node Mnode; 61 typedef struct minipool_node Mnode;
61 typedef struct minipool_fixup Mfix; 62 typedef struct minipool_fixup Mfix;
62 63
63 void (*arm_lang_output_object_attributes_hook)(void); 64 void (*arm_lang_output_object_attributes_hook)(void);
64 65
65 /* Forward function declarations. */ 66 /* Forward function declarations. */
67 static bool arm_needs_doubleword_align (enum machine_mode, const_tree);
66 static int arm_compute_static_chain_stack_bytes (void); 68 static int arm_compute_static_chain_stack_bytes (void);
67 static arm_stack_offsets *arm_get_frame_offsets (void); 69 static arm_stack_offsets *arm_get_frame_offsets (void);
68 static void arm_add_gc_roots (void); 70 static void arm_add_gc_roots (void);
69 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx, 71 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
70 HOST_WIDE_INT, rtx, rtx, int, int); 72 HOST_WIDE_INT, rtx, rtx, int, int);
81 static bool thumb_force_lr_save (void); 83 static bool thumb_force_lr_save (void);
82 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code); 84 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
83 static rtx emit_sfm (int, int); 85 static rtx emit_sfm (int, int);
84 static unsigned arm_size_return_regs (void); 86 static unsigned arm_size_return_regs (void);
85 static bool arm_assemble_integer (rtx, unsigned int, int); 87 static bool arm_assemble_integer (rtx, unsigned int, int);
88 static void arm_print_operand (FILE *, rtx, int);
89 static void arm_print_operand_address (FILE *, rtx);
90 static bool arm_print_operand_punct_valid_p (unsigned char code);
86 static const char *fp_const_from_val (REAL_VALUE_TYPE *); 91 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
87 static arm_cc get_arm_condition_code (rtx); 92 static arm_cc get_arm_condition_code (rtx);
88 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT); 93 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
89 static rtx is_jump_table (rtx); 94 static rtx is_jump_table (rtx);
90 static const char *output_multi_immediate (rtx *, const char *, const char *, 95 static const char *output_multi_immediate (rtx *, const char *, const char *,
157 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 162 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
158 static void emit_constant_insn (rtx cond, rtx pattern); 163 static void emit_constant_insn (rtx cond, rtx pattern);
159 static rtx emit_set_insn (rtx, rtx); 164 static rtx emit_set_insn (rtx, rtx);
160 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode, 165 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
161 tree, bool); 166 tree, bool);
167 static rtx arm_function_arg (CUMULATIVE_ARGS *, enum machine_mode,
168 const_tree, bool);
169 static void arm_function_arg_advance (CUMULATIVE_ARGS *, enum machine_mode,
170 const_tree, bool);
171 static unsigned int arm_function_arg_boundary (enum machine_mode, const_tree);
162 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree, 172 static rtx aapcs_allocate_return_reg (enum machine_mode, const_tree,
163 const_tree); 173 const_tree);
164 static int aapcs_select_return_coproc (const_tree, const_tree); 174 static int aapcs_select_return_coproc (const_tree, const_tree);
165 175
166 #ifdef OBJECT_FORMAT_ELF 176 #ifdef OBJECT_FORMAT_ELF
182 static bool arm_default_short_enums (void); 192 static bool arm_default_short_enums (void);
183 static bool arm_align_anon_bitfield (void); 193 static bool arm_align_anon_bitfield (void);
184 static bool arm_return_in_msb (const_tree); 194 static bool arm_return_in_msb (const_tree);
185 static bool arm_must_pass_in_stack (enum machine_mode, const_tree); 195 static bool arm_must_pass_in_stack (enum machine_mode, const_tree);
186 static bool arm_return_in_memory (const_tree, const_tree); 196 static bool arm_return_in_memory (const_tree, const_tree);
187 #ifdef TARGET_UNWIND_INFO 197 #if ARM_UNWIND_INFO
188 static void arm_unwind_emit (FILE *, rtx); 198 static void arm_unwind_emit (FILE *, rtx);
189 static bool arm_output_ttype (rtx); 199 static bool arm_output_ttype (rtx);
200 static void arm_asm_emit_except_personality (rtx);
201 static void arm_asm_init_sections (void);
190 #endif 202 #endif
203 static enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
191 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int); 204 static void arm_dwarf_handle_frame_unspec (const char *, rtx, int);
192 static rtx arm_dwarf_register_span (rtx); 205 static rtx arm_dwarf_register_span (rtx);
193 206
194 static tree arm_cxx_guard_type (void); 207 static tree arm_cxx_guard_type (void);
195 static bool arm_cxx_guard_mask_bit (void); 208 static bool arm_cxx_guard_mask_bit (void);
202 static bool arm_cxx_use_aeabi_atexit (void); 215 static bool arm_cxx_use_aeabi_atexit (void);
203 static void arm_init_libfuncs (void); 216 static void arm_init_libfuncs (void);
204 static tree arm_build_builtin_va_list (void); 217 static tree arm_build_builtin_va_list (void);
205 static void arm_expand_builtin_va_start (tree, rtx); 218 static void arm_expand_builtin_va_start (tree, rtx);
206 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); 219 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
220 static void arm_option_override (void);
207 static bool arm_handle_option (size_t, const char *, int); 221 static bool arm_handle_option (size_t, const char *, int);
208 static void arm_target_help (void); 222 static void arm_target_help (void);
209 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode); 223 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
210 static bool arm_cannot_copy_insn_p (rtx); 224 static bool arm_cannot_copy_insn_p (rtx);
211 static bool arm_tls_symbol_p (rtx x); 225 static bool arm_tls_symbol_p (rtx x);
212 static int arm_issue_rate (void); 226 static int arm_issue_rate (void);
213 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; 227 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
228 static bool arm_output_addr_const_extra (FILE *, rtx);
214 static bool arm_allocate_stack_slots_for_args (void); 229 static bool arm_allocate_stack_slots_for_args (void);
215 static const char *arm_invalid_parameter_type (const_tree t); 230 static const char *arm_invalid_parameter_type (const_tree t);
216 static const char *arm_invalid_return_type (const_tree t); 231 static const char *arm_invalid_return_type (const_tree t);
217 static tree arm_promoted_type (const_tree t); 232 static tree arm_promoted_type (const_tree t);
218 static tree arm_convert_to_type (tree type, tree expr); 233 static tree arm_convert_to_type (tree type, tree expr);
221 static bool arm_can_eliminate (const int, const int); 236 static bool arm_can_eliminate (const int, const int);
222 static void arm_asm_trampoline_template (FILE *); 237 static void arm_asm_trampoline_template (FILE *);
223 static void arm_trampoline_init (rtx, tree, rtx); 238 static void arm_trampoline_init (rtx, tree, rtx);
224 static rtx arm_trampoline_adjust_address (rtx); 239 static rtx arm_trampoline_adjust_address (rtx);
225 static rtx arm_pic_static_addr (rtx orig, rtx reg); 240 static rtx arm_pic_static_addr (rtx orig, rtx reg);
241 static bool cortex_a9_sched_adjust_cost (rtx, rtx, rtx, int *);
242 static bool xscale_sched_adjust_cost (rtx, rtx, rtx, int *);
243 static bool fa726te_sched_adjust_cost (rtx, rtx, rtx, int *);
244 static enum machine_mode arm_preferred_simd_mode (enum machine_mode);
245 static bool arm_class_likely_spilled_p (reg_class_t);
246 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
247 static bool arm_builtin_support_vector_misalignment (enum machine_mode mode,
248 const_tree type,
249 int misalignment,
250 bool is_packed);
251 static void arm_conditional_register_usage (void);
252 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
226 253
227 254
228 /* Table of machine attributes. */ 255 /* Table of machine attributes. */
229 static const struct attribute_spec arm_attribute_table[] = 256 static const struct attribute_spec arm_attribute_table[] =
230 { 257 {
260 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 287 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
261 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute }, 288 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
262 #endif 289 #endif
263 { NULL, 0, 0, false, false, false, NULL } 290 { NULL, 0, 0, false, false, false, NULL }
264 }; 291 };
292
293 /* Set default optimization options. */
294 static const struct default_options arm_option_optimization_table[] =
295 {
296 /* Enable section anchors by default at -O1 or higher. */
297 { OPT_LEVELS_1_PLUS, OPT_fsection_anchors, NULL, 1 },
298 { OPT_LEVELS_1_PLUS, OPT_fomit_frame_pointer, NULL, 1 },
299 { OPT_LEVELS_NONE, 0, NULL, 0 }
300 };
265 301
266 /* Initialize the GCC target structure. */ 302 /* Initialize the GCC target structure. */
267 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 303 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
268 #undef TARGET_MERGE_DECL_ATTRIBUTES 304 #undef TARGET_MERGE_DECL_ATTRIBUTES
269 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 305 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
283 #undef TARGET_ASM_ALIGNED_SI_OP 319 #undef TARGET_ASM_ALIGNED_SI_OP
284 #define TARGET_ASM_ALIGNED_SI_OP NULL 320 #define TARGET_ASM_ALIGNED_SI_OP NULL
285 #undef TARGET_ASM_INTEGER 321 #undef TARGET_ASM_INTEGER
286 #define TARGET_ASM_INTEGER arm_assemble_integer 322 #define TARGET_ASM_INTEGER arm_assemble_integer
287 323
324 #undef TARGET_PRINT_OPERAND
325 #define TARGET_PRINT_OPERAND arm_print_operand
326 #undef TARGET_PRINT_OPERAND_ADDRESS
327 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
328 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
329 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
330
331 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
332 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
333
288 #undef TARGET_ASM_FUNCTION_PROLOGUE 334 #undef TARGET_ASM_FUNCTION_PROLOGUE
289 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue 335 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
290 336
291 #undef TARGET_ASM_FUNCTION_EPILOGUE 337 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue 338 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
295 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG) 341 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
296 #undef TARGET_HANDLE_OPTION 342 #undef TARGET_HANDLE_OPTION
297 #define TARGET_HANDLE_OPTION arm_handle_option 343 #define TARGET_HANDLE_OPTION arm_handle_option
298 #undef TARGET_HELP 344 #undef TARGET_HELP
299 #define TARGET_HELP arm_target_help 345 #define TARGET_HELP arm_target_help
346 #undef TARGET_OPTION_OVERRIDE
347 #define TARGET_OPTION_OVERRIDE arm_option_override
348 #undef TARGET_OPTION_OPTIMIZATION_TABLE
349 #define TARGET_OPTION_OPTIMIZATION_TABLE arm_option_optimization_table
300 350
301 #undef TARGET_COMP_TYPE_ATTRIBUTES 351 #undef TARGET_COMP_TYPE_ATTRIBUTES
302 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes 352 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
303 353
304 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES 354 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
341 391
342 #undef TARGET_SHIFT_TRUNCATION_MASK 392 #undef TARGET_SHIFT_TRUNCATION_MASK
343 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask 393 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
344 #undef TARGET_VECTOR_MODE_SUPPORTED_P 394 #undef TARGET_VECTOR_MODE_SUPPORTED_P
345 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p 395 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
396 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
397 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
346 398
347 #undef TARGET_MACHINE_DEPENDENT_REORG 399 #undef TARGET_MACHINE_DEPENDENT_REORG
348 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg 400 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
349 401
350 #undef TARGET_INIT_BUILTINS 402 #undef TARGET_INIT_BUILTINS
361 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes 413 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
362 #undef TARGET_PASS_BY_REFERENCE 414 #undef TARGET_PASS_BY_REFERENCE
363 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference 415 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
364 #undef TARGET_ARG_PARTIAL_BYTES 416 #undef TARGET_ARG_PARTIAL_BYTES
365 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes 417 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
418 #undef TARGET_FUNCTION_ARG
419 #define TARGET_FUNCTION_ARG arm_function_arg
420 #undef TARGET_FUNCTION_ARG_ADVANCE
421 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
422 #undef TARGET_FUNCTION_ARG_BOUNDARY
423 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
366 424
367 #undef TARGET_SETUP_INCOMING_VARARGS 425 #undef TARGET_SETUP_INCOMING_VARARGS
368 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs 426 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
369 427
370 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS 428 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
421 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory 479 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
422 480
423 #undef TARGET_MUST_PASS_IN_STACK 481 #undef TARGET_MUST_PASS_IN_STACK
424 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack 482 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
425 483
426 #ifdef TARGET_UNWIND_INFO 484 #if ARM_UNWIND_INFO
427 #undef TARGET_UNWIND_EMIT 485 #undef TARGET_ASM_UNWIND_EMIT
428 #define TARGET_UNWIND_EMIT arm_unwind_emit 486 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
429 487
430 /* EABI unwinding tables use a different format for the typeinfo tables. */ 488 /* EABI unwinding tables use a different format for the typeinfo tables. */
431 #undef TARGET_ASM_TTYPE 489 #undef TARGET_ASM_TTYPE
432 #define TARGET_ASM_TTYPE arm_output_ttype 490 #define TARGET_ASM_TTYPE arm_output_ttype
433 491
434 #undef TARGET_ARM_EABI_UNWINDER 492 #undef TARGET_ARM_EABI_UNWINDER
435 #define TARGET_ARM_EABI_UNWINDER true 493 #define TARGET_ARM_EABI_UNWINDER true
436 #endif /* TARGET_UNWIND_INFO */ 494
495 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
496 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
497
498 #undef TARGET_ASM_INIT_SECTIONS
499 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
500 #endif /* ARM_UNWIND_INFO */
501
502 #undef TARGET_EXCEPT_UNWIND_INFO
503 #define TARGET_EXCEPT_UNWIND_INFO arm_except_unwind_info
437 504
438 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 505 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
439 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec 506 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC arm_dwarf_handle_frame_unspec
440 507
441 #undef TARGET_DWARF_REGISTER_SPAN 508 #undef TARGET_DWARF_REGISTER_SPAN
504 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required 571 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
505 572
506 #undef TARGET_CAN_ELIMINATE 573 #undef TARGET_CAN_ELIMINATE
507 #define TARGET_CAN_ELIMINATE arm_can_eliminate 574 #define TARGET_CAN_ELIMINATE arm_can_eliminate
508 575
576 #undef TARGET_CONDITIONAL_REGISTER_USAGE
577 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
578
579 #undef TARGET_CLASS_LIKELY_SPILLED_P
580 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
581
582 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
583 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
584 arm_vector_alignment_reachable
585
586 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
587 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
588 arm_builtin_support_vector_misalignment
589
590 #undef TARGET_PREFERRED_RENAME_CLASS
591 #define TARGET_PREFERRED_RENAME_CLASS \
592 arm_preferred_rename_class
593
509 struct gcc_target targetm = TARGET_INITIALIZER; 594 struct gcc_target targetm = TARGET_INITIALIZER;
510 595
511 /* Obstack for minipool constant handling. */ 596 /* Obstack for minipool constant handling. */
512 static struct obstack minipool_obstack; 597 static struct obstack minipool_obstack;
513 static char * minipool_startobj; 598 static char * minipool_startobj;
524 /* The processor for which instructions should be scheduled. */ 609 /* The processor for which instructions should be scheduled. */
525 enum processor_type arm_tune = arm_none; 610 enum processor_type arm_tune = arm_none;
526 611
527 /* The current tuning set. */ 612 /* The current tuning set. */
528 const struct tune_params *current_tune; 613 const struct tune_params *current_tune;
529
530 /* The default processor used if not overridden by commandline. */
531 static enum processor_type arm_default_cpu = arm_none;
532 614
533 /* Which floating point hardware to schedule for. */ 615 /* Which floating point hardware to schedule for. */
534 int arm_fpu_attr; 616 int arm_fpu_attr;
535 617
536 /* Which floating popint hardware to use. */ 618 /* Which floating popint hardware to use. */
580 #define FL_DIV (1 << 18) /* Hardware divide. */ 662 #define FL_DIV (1 << 18) /* Hardware divide. */
581 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */ 663 #define FL_VFPV3 (1 << 19) /* Vector Floating Point V3. */
582 #define FL_NEON (1 << 20) /* Neon instructions. */ 664 #define FL_NEON (1 << 20) /* Neon instructions. */
583 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M 665 #define FL_ARCH7EM (1 << 21) /* Instructions present in the ARMv7E-M
584 architecture. */ 666 architecture. */
667 #define FL_ARCH7 (1 << 22) /* Architecture 7. */
585 668
586 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */ 669 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
670
671 /* Flags that only effect tuning, not available instructions. */
672 #define FL_TUNE (FL_WBUF | FL_VFPV2 | FL_STRONG | FL_LDSCHED \
673 | FL_CO_PROC)
587 674
588 #define FL_FOR_ARCH2 FL_NOTM 675 #define FL_FOR_ARCH2 FL_NOTM
589 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32) 676 #define FL_FOR_ARCH3 (FL_FOR_ARCH2 | FL_MODE32)
590 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M) 677 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
591 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4) 678 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
600 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K) 687 #define FL_FOR_ARCH6K (FL_FOR_ARCH6 | FL_ARCH6K)
601 #define FL_FOR_ARCH6Z FL_FOR_ARCH6 688 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
602 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K 689 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
603 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) 690 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
604 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) 691 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
605 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) 692 #define FL_FOR_ARCH7 ((FL_FOR_ARCH6T2 & ~FL_NOTM) | FL_ARCH7)
606 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) 693 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
607 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) 694 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
608 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) 695 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
609 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) 696 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
610 697
638 int arm_arch6 = 0; 725 int arm_arch6 = 0;
639 726
640 /* Nonzero if this chip supports the ARM 6K extensions. */ 727 /* Nonzero if this chip supports the ARM 6K extensions. */
641 int arm_arch6k = 0; 728 int arm_arch6k = 0;
642 729
730 /* Nonzero if this chip supports the ARM 7 extensions. */
731 int arm_arch7 = 0;
732
643 /* Nonzero if instructions not present in the 'M' profile can be used. */ 733 /* Nonzero if instructions not present in the 'M' profile can be used. */
644 int arm_arch_notm = 0; 734 int arm_arch_notm = 0;
645 735
646 /* Nonzero if instructions present in ARMv7E-M can be used. */ 736 /* Nonzero if instructions present in ARMv7E-M can be used. */
647 int arm_arch7em = 0; 737 int arm_arch7em = 0;
671 /* Nonzero if tuning for Cortex-A9. */ 761 /* Nonzero if tuning for Cortex-A9. */
672 int arm_tune_cortex_a9 = 0; 762 int arm_tune_cortex_a9 = 0;
673 763
674 /* Nonzero if generating Thumb instructions. */ 764 /* Nonzero if generating Thumb instructions. */
675 int thumb_code = 0; 765 int thumb_code = 0;
766
767 /* Nonzero if generating Thumb-1 instructions. */
768 int thumb1_code = 0;
676 769
677 /* Nonzero if we should define __THUMB_INTERWORK__ in the 770 /* Nonzero if we should define __THUMB_INTERWORK__ in the
678 preprocessor. 771 preprocessor.
679 XXX This is a bit of a hack, it's intended to help work around 772 XXX This is a bit of a hack, it's intended to help work around
680 problems in GLD which doesn't understand that armv5t code is 773 problems in GLD which doesn't understand that armv5t code is
685 int arm_arch_thumb2; 778 int arm_arch_thumb2;
686 779
687 /* Nonzero if chip supports integer division instruction. */ 780 /* Nonzero if chip supports integer division instruction. */
688 int arm_arch_hwdiv; 781 int arm_arch_hwdiv;
689 782
690 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we 783 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
691 must report the mode of the memory reference from PRINT_OPERAND to 784 we must report the mode of the memory reference from
692 PRINT_OPERAND_ADDRESS. */ 785 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
693 enum machine_mode output_memory_reference_mode; 786 enum machine_mode output_memory_reference_mode;
694 787
695 /* The register number to be used for the PIC offset register. */ 788 /* The register number to be used for the PIC offset register. */
696 unsigned arm_pic_register = INVALID_REGNUM; 789 unsigned arm_pic_register = INVALID_REGNUM;
697 790
698 /* Set to 1 after arm_reorg has started. Reset to start at the start of 791 /* Set to 1 after arm_reorg has started. Reset to start at the start of
699 the next function. */ 792 the next function. */
700 static int after_arm_reorg = 0; 793 static int after_arm_reorg = 0;
701 794
702 static enum arm_pcs arm_pcs_default; 795 enum arm_pcs arm_pcs_default;
703 796
704 /* For an explanation of these variables, see final_prescan_insn below. */ 797 /* For an explanation of these variables, see final_prescan_insn below. */
705 int arm_ccfsm_state; 798 int arm_ccfsm_state;
706 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ 799 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
707 enum arm_cond_code arm_current_cc; 800 enum arm_cond_code arm_current_cc;
801
708 rtx arm_target_insn; 802 rtx arm_target_insn;
709 int arm_target_label; 803 int arm_target_label;
710 /* The number of conditionally executed insns, including the current insn. */ 804 /* The number of conditionally executed insns, including the current insn. */
711 int arm_condexec_count = 0; 805 int arm_condexec_count = 0;
712 /* A bitmask specifying the patterns for the IT block. 806 /* A bitmask specifying the patterns for the IT block.
720 { 814 {
721 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", 815 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
722 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" 816 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
723 }; 817 };
724 818
819 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
820 int arm_regs_in_sequence[] =
821 {
822 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
823 };
824
725 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl") 825 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
726 #define streq(string1, string2) (strcmp (string1, string2) == 0) 826 #define streq(string1, string2) (strcmp (string1, string2) == 0)
727 827
728 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ 828 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
729 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ 829 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
738 const char *arch; 838 const char *arch;
739 const unsigned long flags; 839 const unsigned long flags;
740 const struct tune_params *const tune; 840 const struct tune_params *const tune;
741 }; 841 };
742 842
843
844 #define ARM_PREFETCH_NOT_BENEFICIAL 0, -1, -1
845 #define ARM_PREFETCH_BENEFICIAL(prefetch_slots,l1_size,l1_line_size) \
846 prefetch_slots, \
847 l1_size, \
848 l1_line_size
849
743 const struct tune_params arm_slowmul_tune = 850 const struct tune_params arm_slowmul_tune =
744 { 851 {
745 arm_slowmul_rtx_costs, 852 arm_slowmul_rtx_costs,
746 3 853 NULL,
854 3,
855 ARM_PREFETCH_NOT_BENEFICIAL
747 }; 856 };
748 857
749 const struct tune_params arm_fastmul_tune = 858 const struct tune_params arm_fastmul_tune =
750 { 859 {
751 arm_fastmul_rtx_costs, 860 arm_fastmul_rtx_costs,
752 1 861 NULL,
862 1,
863 ARM_PREFETCH_NOT_BENEFICIAL
753 }; 864 };
754 865
755 const struct tune_params arm_xscale_tune = 866 const struct tune_params arm_xscale_tune =
756 { 867 {
757 arm_xscale_rtx_costs, 868 arm_xscale_rtx_costs,
758 2 869 xscale_sched_adjust_cost,
870 2,
871 ARM_PREFETCH_NOT_BENEFICIAL
759 }; 872 };
760 873
761 const struct tune_params arm_9e_tune = 874 const struct tune_params arm_9e_tune =
762 { 875 {
763 arm_9e_rtx_costs, 876 arm_9e_rtx_costs,
764 1 877 NULL,
878 1,
879 ARM_PREFETCH_NOT_BENEFICIAL
765 }; 880 };
881
882 const struct tune_params arm_cortex_a9_tune =
883 {
884 arm_9e_rtx_costs,
885 cortex_a9_sched_adjust_cost,
886 1,
887 ARM_PREFETCH_BENEFICIAL(4,32,32)
888 };
889
890 const struct tune_params arm_fa726te_tune =
891 {
892 arm_9e_rtx_costs,
893 fa726te_sched_adjust_cost,
894 1,
895 ARM_PREFETCH_NOT_BENEFICIAL
896 };
897
766 898
767 /* Not all of these give usefully different compilation alternatives, 899 /* Not all of these give usefully different compilation alternatives,
768 but there is no simple way of generalizing them. */ 900 but there is no simple way of generalizing them. */
769 static const struct processors all_cores[] = 901 static const struct processors all_cores[] =
770 { 902 {
771 /* ARM Cores */ 903 /* ARM Cores */
772 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ 904 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
773 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, 905 {NAME, IDENT, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
774 #include "arm-cores.def" 906 #include "arm-cores.def"
775 #undef ARM_CORE 907 #undef ARM_CORE
776 {NULL, arm_none, NULL, 0, NULL} 908 {NULL, arm_none, NULL, 0, NULL}
777 }; 909 };
778 910
803 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL}, 935 {"armv6-m", cortexm1, "6M", FL_FOR_ARCH6M, NULL},
804 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL}, 936 {"armv7", cortexa8, "7", FL_CO_PROC | FL_FOR_ARCH7, NULL},
805 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL}, 937 {"armv7-a", cortexa8, "7A", FL_CO_PROC | FL_FOR_ARCH7A, NULL},
806 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL}, 938 {"armv7-r", cortexr4, "7R", FL_CO_PROC | FL_FOR_ARCH7R, NULL},
807 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL}, 939 {"armv7-m", cortexm3, "7M", FL_CO_PROC | FL_FOR_ARCH7M, NULL},
808 {"armv7e-m", cortexm3, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL}, 940 {"armv7e-m", cortexm4, "7EM", FL_CO_PROC | FL_FOR_ARCH7EM, NULL},
809 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL}, 941 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
810 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, 942 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
811 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL}, 943 {"iwmmxt2", iwmmxt2, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
812 {NULL, arm_none, NULL, 0 , NULL} 944 {NULL, arm_none, NULL, 0 , NULL}
813 }; 945 };
814 946
815 struct arm_cpu_select 947
816 { 948 /* These are populated as commandline arguments are processed, or NULL
817 const char * string; 949 if not specified. */
818 const char * name; 950 static const struct processors *arm_selected_arch;
819 const struct processors * processors; 951 static const struct processors *arm_selected_cpu;
820 }; 952 static const struct processors *arm_selected_tune;
821
822 /* This is a magic structure. The 'string' field is magically filled in
823 with a pointer to the value specified by the user on the command line
824 assuming that the user has specified such a value. */
825
826 static struct arm_cpu_select arm_select[] =
827 {
828 /* string name processors */
829 { NULL, "-mcpu=", all_cores },
830 { NULL, "-march=", all_architectures },
831 { NULL, "-mtune=", all_cores }
832 };
833
834 /* Defines representing the indexes into the above table. */
835 #define ARM_OPT_SET_CPU 0
836 #define ARM_OPT_SET_ARCH 1
837 #define ARM_OPT_SET_TUNE 2
838 953
839 /* The name of the preprocessor macro to define for this architecture. */ 954 /* The name of the preprocessor macro to define for this architecture. */
840 955
841 char arm_arch_name[] = "__ARM_ARCH_0UNK__"; 956 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
842 957
1142 TYPE_DECL, 1257 TYPE_DECL,
1143 get_identifier ("__va_list"), 1258 get_identifier ("__va_list"),
1144 va_list_type); 1259 va_list_type);
1145 DECL_ARTIFICIAL (va_list_name) = 1; 1260 DECL_ARTIFICIAL (va_list_name) = 1;
1146 TYPE_NAME (va_list_type) = va_list_name; 1261 TYPE_NAME (va_list_type) = va_list_name;
1262 TYPE_STUB_DECL (va_list_type) = va_list_name;
1147 /* Create the __ap field. */ 1263 /* Create the __ap field. */
1148 ap_field = build_decl (BUILTINS_LOCATION, 1264 ap_field = build_decl (BUILTINS_LOCATION,
1149 FIELD_DECL, 1265 FIELD_DECL,
1150 get_identifier ("__ap"), 1266 get_identifier ("__ap"),
1151 ptr_type_node); 1267 ptr_type_node);
1194 { 1310 {
1195 valist = arm_extract_valist_ptr (valist); 1311 valist = arm_extract_valist_ptr (valist);
1196 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 1312 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
1197 } 1313 }
1198 1314
1315 /* Lookup NAME in SEL. */
1316
1317 static const struct processors *
1318 arm_find_cpu (const char *name, const struct processors *sel, const char *desc)
1319 {
1320 if (!(name && *name))
1321 return NULL;
1322
1323 for (; sel->name != NULL; sel++)
1324 {
1325 if (streq (name, sel->name))
1326 return sel;
1327 }
1328
1329 error ("bad value (%s) for %s switch", name, desc);
1330 return NULL;
1331 }
1332
1199 /* Implement TARGET_HANDLE_OPTION. */ 1333 /* Implement TARGET_HANDLE_OPTION. */
1200 1334
1201 static bool 1335 static bool
1202 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED) 1336 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
1203 { 1337 {
1204 switch (code) 1338 switch (code)
1205 { 1339 {
1206 case OPT_march_: 1340 case OPT_march_:
1207 arm_select[1].string = arg; 1341 arm_selected_arch = arm_find_cpu(arg, all_architectures, "-march");
1208 return true; 1342 return true;
1209 1343
1210 case OPT_mcpu_: 1344 case OPT_mcpu_:
1211 arm_select[0].string = arg; 1345 arm_selected_cpu = arm_find_cpu(arg, all_cores, "-mcpu");
1212 return true; 1346 return true;
1213 1347
1214 case OPT_mhard_float: 1348 case OPT_mhard_float:
1215 target_float_abi_name = "hard"; 1349 target_float_abi_name = "hard";
1216 return true; 1350 return true;
1218 case OPT_msoft_float: 1352 case OPT_msoft_float:
1219 target_float_abi_name = "soft"; 1353 target_float_abi_name = "soft";
1220 return true; 1354 return true;
1221 1355
1222 case OPT_mtune_: 1356 case OPT_mtune_:
1223 arm_select[2].string = arg; 1357 arm_selected_tune = arm_find_cpu(arg, all_cores, "-mtune");
1224 return true; 1358 return true;
1225 1359
1226 default: 1360 default:
1227 return true; 1361 return true;
1228 } 1362 }
1241 replaced by a single function. */ 1375 replaced by a single function. */
1242 if (columns == 0) 1376 if (columns == 0)
1243 { 1377 {
1244 const char *p; 1378 const char *p;
1245 1379
1246 GET_ENVIRONMENT (p, "COLUMNS"); 1380 p = getenv ("COLUMNS");
1247 if (p != NULL) 1381 if (p != NULL)
1248 { 1382 {
1249 int value = atoi (p); 1383 int value = atoi (p);
1250 1384
1251 if (value > 0) 1385 if (value > 0)
1312 } 1446 }
1313 printf ("\n"); 1447 printf ("\n");
1314 1448
1315 } 1449 }
1316 1450
1317 /* Fix up any incompatible options that the user has specified. 1451 /* Fix up any incompatible options that the user has specified. */
1318 This has now turned into a maze. */ 1452 static void
1319 void 1453 arm_option_override (void)
1320 arm_override_options (void)
1321 { 1454 {
1322 unsigned i; 1455 unsigned i;
1323 enum processor_type target_arch_cpu = arm_none; 1456
1324 enum processor_type selected_cpu = arm_none; 1457 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1325 1458 SUBTARGET_OVERRIDE_OPTIONS;
1326 /* Set up the flags based on the cpu/architecture selected by the user. */ 1459 #endif
1327 for (i = ARRAY_SIZE (arm_select); i--;) 1460
1328 { 1461 if (arm_selected_arch)
1329 struct arm_cpu_select * ptr = arm_select + i; 1462 {
1330 1463 if (arm_selected_cpu)
1331 if (ptr->string != NULL && ptr->string[0] != '\0') 1464 {
1332 { 1465 /* Check for conflict between mcpu and march. */
1333 const struct processors * sel; 1466 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
1334 1467 {
1335 for (sel = ptr->processors; sel->name != NULL; sel++) 1468 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
1336 if (streq (ptr->string, sel->name)) 1469 arm_selected_cpu->name, arm_selected_arch->name);
1337 { 1470 /* -march wins for code generation.
1338 /* Set the architecture define. */ 1471 -mcpu wins for default tuning. */
1339 if (i != ARM_OPT_SET_TUNE) 1472 if (!arm_selected_tune)
1340 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); 1473 arm_selected_tune = arm_selected_cpu;
1341 1474
1342 /* Determine the processor core for which we should 1475 arm_selected_cpu = arm_selected_arch;
1343 tune code-generation. */ 1476 }
1344 if (/* -mcpu= is a sensible default. */ 1477 else
1345 i == ARM_OPT_SET_CPU 1478 /* -mcpu wins. */
1346 /* -mtune= overrides -mcpu= and -march=. */ 1479 arm_selected_arch = NULL;
1347 || i == ARM_OPT_SET_TUNE) 1480 }
1348 arm_tune = (enum processor_type) (sel - ptr->processors); 1481 else
1349 1482 /* Pick a CPU based on the architecture. */
1350 /* Remember the CPU associated with this architecture. 1483 arm_selected_cpu = arm_selected_arch;
1351 If no other option is used to set the CPU type, 1484 }
1352 we'll use this to guess the most suitable tuning
1353 options. */
1354 if (i == ARM_OPT_SET_ARCH)
1355 target_arch_cpu = sel->core;
1356
1357 if (i == ARM_OPT_SET_CPU)
1358 selected_cpu = (enum processor_type) (sel - ptr->processors);
1359
1360 if (i != ARM_OPT_SET_TUNE)
1361 {
1362 /* If we have been given an architecture and a processor
1363 make sure that they are compatible. We only generate
1364 a warning though, and we prefer the CPU over the
1365 architecture. */
1366 if (insn_flags != 0 && (insn_flags ^ sel->flags))
1367 warning (0, "switch -mcpu=%s conflicts with -march= switch",
1368 ptr->string);
1369
1370 insn_flags = sel->flags;
1371 }
1372
1373 break;
1374 }
1375
1376 if (sel->name == NULL)
1377 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
1378 }
1379 }
1380
1381 /* Guess the tuning options from the architecture if necessary. */
1382 if (arm_tune == arm_none)
1383 arm_tune = target_arch_cpu;
1384 1485
1385 /* If the user did not specify a processor, choose one for them. */ 1486 /* If the user did not specify a processor, choose one for them. */
1386 if (insn_flags == 0) 1487 if (!arm_selected_cpu)
1387 { 1488 {
1388 const struct processors * sel; 1489 const struct processors * sel;
1389 unsigned int sought; 1490 unsigned int sought;
1390 1491
1391 selected_cpu = (enum processor_type) TARGET_CPU_DEFAULT; 1492 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
1392 if (selected_cpu == arm_none) 1493 if (!arm_selected_cpu->name)
1393 { 1494 {
1394 #ifdef SUBTARGET_CPU_DEFAULT 1495 #ifdef SUBTARGET_CPU_DEFAULT
1395 /* Use the subtarget default CPU if none was specified by 1496 /* Use the subtarget default CPU if none was specified by
1396 configure. */ 1497 configure. */
1397 selected_cpu = (enum processor_type) SUBTARGET_CPU_DEFAULT; 1498 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
1398 #endif 1499 #endif
1399 /* Default to ARM6. */ 1500 /* Default to ARM6. */
1400 if (selected_cpu == arm_none) 1501 if (!arm_selected_cpu->name)
1401 selected_cpu = arm6; 1502 arm_selected_cpu = &all_cores[arm6];
1402 } 1503 }
1403 sel = &all_cores[selected_cpu]; 1504
1404 1505 sel = arm_selected_cpu;
1405 insn_flags = sel->flags; 1506 insn_flags = sel->flags;
1406 1507
1407 /* Now check to see if the user has specified some command line 1508 /* Now check to see if the user has specified some command line
1408 switch that require certain abilities from the cpu. */ 1509 switch that require certain abilities from the cpu. */
1409 sought = 0; 1510 sought = 0;
1460 1561
1461 gcc_assert (best_fit); 1562 gcc_assert (best_fit);
1462 sel = best_fit; 1563 sel = best_fit;
1463 } 1564 }
1464 1565
1465 insn_flags = sel->flags; 1566 arm_selected_cpu = sel;
1466 } 1567 }
1467 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch); 1568 }
1468 arm_default_cpu = (enum processor_type) (sel - all_cores); 1569
1469 if (arm_tune == arm_none) 1570 gcc_assert (arm_selected_cpu);
1470 arm_tune = arm_default_cpu; 1571 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
1471 } 1572 if (!arm_selected_tune)
1472 1573 arm_selected_tune = &all_cores[arm_selected_cpu->core];
1473 /* The processor for which we should tune should now have been 1574
1474 chosen. */ 1575 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
1475 gcc_assert (arm_tune != arm_none); 1576 insn_flags = arm_selected_cpu->flags;
1476 1577
1477 tune_flags = all_cores[(int)arm_tune].flags; 1578 arm_tune = arm_selected_tune->core;
1478 current_tune = all_cores[(int)arm_tune].tune; 1579 tune_flags = arm_selected_tune->flags;
1580 current_tune = arm_selected_tune->tune;
1479 1581
1480 if (target_fp16_format_name) 1582 if (target_fp16_format_name)
1481 { 1583 {
1482 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) 1584 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1483 { 1585 {
1536 } 1638 }
1537 1639
1538 /* Callee super interworking implies thumb interworking. Adding 1640 /* Callee super interworking implies thumb interworking. Adding
1539 this to the flags here simplifies the logic elsewhere. */ 1641 this to the flags here simplifies the logic elsewhere. */
1540 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING) 1642 if (TARGET_THUMB && TARGET_CALLEE_INTERWORKING)
1541 target_flags |= MASK_INTERWORK; 1643 target_flags |= MASK_INTERWORK;
1542 1644
1543 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done 1645 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1544 from here where no function is being compiled currently. */ 1646 from here where no function is being compiled currently. */
1545 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM) 1647 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1546 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); 1648 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1547 1649
1548 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING) 1650 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1549 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); 1651 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1550
1551 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1552 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1553 1652
1554 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) 1653 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1555 { 1654 {
1556 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame"); 1655 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1557 target_flags |= MASK_APCS_FRAME; 1656 target_flags |= MASK_APCS_FRAME;
1584 arm_arch5 = (insn_flags & FL_ARCH5) != 0; 1683 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1585 arm_arch5e = (insn_flags & FL_ARCH5E) != 0; 1684 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1586 arm_arch6 = (insn_flags & FL_ARCH6) != 0; 1685 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1587 arm_arch6k = (insn_flags & FL_ARCH6K) != 0; 1686 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
1588 arm_arch_notm = (insn_flags & FL_NOTM) != 0; 1687 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
1688 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
1589 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0; 1689 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
1590 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0; 1690 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
1591 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0; 1691 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1592 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0; 1692 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1593 1693
1594 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0; 1694 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1595 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0; 1695 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1596 thumb_code = (TARGET_ARM == 0); 1696 thumb_code = TARGET_ARM == 0;
1697 thumb1_code = TARGET_THUMB1 != 0;
1597 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0; 1698 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1598 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0; 1699 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1599 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0; 1700 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1600 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0; 1701 arm_arch_hwdiv = (insn_flags & FL_DIV) != 0;
1601 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0; 1702 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
1856 } 1957 }
1857 1958
1858 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ 1959 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
1859 if (fix_cm3_ldrd == 2) 1960 if (fix_cm3_ldrd == 2)
1860 { 1961 {
1861 if (selected_cpu == cortexm3) 1962 if (arm_selected_cpu->core == cortexm3)
1862 fix_cm3_ldrd = 1; 1963 fix_cm3_ldrd = 1;
1863 else 1964 else
1864 fix_cm3_ldrd = 0; 1965 fix_cm3_ldrd = 0;
1865 } 1966 }
1866 1967
1891 inform (input_location, 1992 inform (input_location,
1892 "-freorder-blocks-and-partition not supported on this architecture"); 1993 "-freorder-blocks-and-partition not supported on this architecture");
1893 flag_reorder_blocks_and_partition = 0; 1994 flag_reorder_blocks_and_partition = 0;
1894 flag_reorder_blocks = 1; 1995 flag_reorder_blocks = 1;
1895 } 1996 }
1997
1998 if (flag_pic)
1999 /* Hoisting PIC address calculations more aggressively provides a small,
2000 but measurable, size reduction for PIC code. Therefore, we decrease
2001 the bar for unrestricted expression hoisting to the cost of PIC address
2002 calculation, which is 2 instructions. */
2003 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
2004 global_options.x_param_values,
2005 global_options_set.x_param_values);
2006
2007 /* ARM EABI defaults to strict volatile bitfields. */
2008 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0)
2009 flag_strict_volatile_bitfields = 1;
2010
2011 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed
2012 it beneficial (signified by setting num_prefetch_slots to 1 or more.) */
2013 if (flag_prefetch_loop_arrays < 0
2014 && HAVE_prefetch
2015 && optimize >= 3
2016 && current_tune->num_prefetch_slots > 0)
2017 flag_prefetch_loop_arrays = 1;
2018
2019 /* Set up parameters to be used in prefetching algorithm. Do not override the
2020 defaults unless we are tuning for a core we have researched values for. */
2021 if (current_tune->num_prefetch_slots > 0)
2022 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2023 current_tune->num_prefetch_slots,
2024 global_options.x_param_values,
2025 global_options_set.x_param_values);
2026 if (current_tune->l1_cache_line_size >= 0)
2027 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2028 current_tune->l1_cache_line_size,
2029 global_options.x_param_values,
2030 global_options_set.x_param_values);
2031 if (current_tune->l1_cache_size >= 0)
2032 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2033 current_tune->l1_cache_size,
2034 global_options.x_param_values,
2035 global_options_set.x_param_values);
1896 2036
1897 /* Register global variables with the garbage collector. */ 2037 /* Register global variables with the garbage collector. */
1898 arm_add_gc_roots (); 2038 arm_add_gc_roots ();
1899 } 2039 }
1900 2040
1980 register values that will never be needed again. This optimization 2120 register values that will never be needed again. This optimization
1981 was added to speed up context switching in a kernel application. */ 2121 was added to speed up context switching in a kernel application. */
1982 if (optimize > 0 2122 if (optimize > 0
1983 && (TREE_NOTHROW (current_function_decl) 2123 && (TREE_NOTHROW (current_function_decl)
1984 || !(flag_unwind_tables 2124 || !(flag_unwind_tables
1985 || (flag_exceptions && !USING_SJLJ_EXCEPTIONS))) 2125 || (flag_exceptions
2126 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
1986 && TREE_THIS_VOLATILE (current_function_decl)) 2127 && TREE_THIS_VOLATILE (current_function_decl))
1987 type |= ARM_FT_VOLATILE; 2128 type |= ARM_FT_VOLATILE;
1988 2129
1989 if (cfun->static_chain_decl != NULL) 2130 if (cfun->static_chain_decl != NULL)
1990 type |= ARM_FT_NESTED; 2131 type |= ARM_FT_NESTED;
2280 } 2421 }
2281 else 2422 else
2282 { 2423 {
2283 HOST_WIDE_INT v; 2424 HOST_WIDE_INT v;
2284 2425
2285 /* Allow repeated pattern. */ 2426 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
2286 v = i & 0xff; 2427 v = i & 0xff;
2287 v |= v << 16; 2428 v |= v << 16;
2288 if (i == v || i == (v | (v << 8))) 2429 if (i == v || i == (v | (v << 8)))
2430 return TRUE;
2431
2432 /* Allow repeated pattern 0xXY00XY00. */
2433 v = i & 0xff00;
2434 v |= v << 16;
2435 if (i == v)
2289 return TRUE; 2436 return TRUE;
2290 } 2437 }
2291 2438
2292 return FALSE; 2439 return FALSE;
2293 } 2440 }
3204 /* Canonicalize a comparison so that we are more likely to recognize it. 3351 /* Canonicalize a comparison so that we are more likely to recognize it.
3205 This can be done for a few constant compares, where we can make the 3352 This can be done for a few constant compares, where we can make the
3206 immediate value easier to load. */ 3353 immediate value easier to load. */
3207 3354
3208 enum rtx_code 3355 enum rtx_code
3209 arm_canonicalize_comparison (enum rtx_code code, enum machine_mode mode, 3356 arm_canonicalize_comparison (enum rtx_code code, rtx *op0, rtx *op1)
3210 rtx * op1) 3357 {
3211 { 3358 enum machine_mode mode;
3212 unsigned HOST_WIDE_INT i = INTVAL (*op1); 3359 unsigned HOST_WIDE_INT i, maxval;
3213 unsigned HOST_WIDE_INT maxval; 3360
3361 mode = GET_MODE (*op0);
3362 if (mode == VOIDmode)
3363 mode = GET_MODE (*op1);
3364
3214 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1; 3365 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
3366
3367 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
3368 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
3369 reversed or (for constant OP1) adjusted to GE/LT. Similarly
3370 for GTU/LEU in Thumb mode. */
3371 if (mode == DImode)
3372 {
3373 rtx tem;
3374
3375 /* To keep things simple, always use the Cirrus cfcmp64 if it is
3376 available. */
3377 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
3378 return code;
3379
3380 if (code == GT || code == LE
3381 || (!TARGET_ARM && (code == GTU || code == LEU)))
3382 {
3383 /* Missing comparison. First try to use an available
3384 comparison. */
3385 if (GET_CODE (*op1) == CONST_INT)
3386 {
3387 i = INTVAL (*op1);
3388 switch (code)
3389 {
3390 case GT:
3391 case LE:
3392 if (i != maxval
3393 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3394 {
3395 *op1 = GEN_INT (i + 1);
3396 return code == GT ? GE : LT;
3397 }
3398 break;
3399 case GTU:
3400 case LEU:
3401 if (i != ~((unsigned HOST_WIDE_INT) 0)
3402 && arm_const_double_by_immediates (GEN_INT (i + 1)))
3403 {
3404 *op1 = GEN_INT (i + 1);
3405 return code == GTU ? GEU : LTU;
3406 }
3407 break;
3408 default:
3409 gcc_unreachable ();
3410 }
3411 }
3412
3413 /* If that did not work, reverse the condition. */
3414 tem = *op0;
3415 *op0 = *op1;
3416 *op1 = tem;
3417 return swap_condition (code);
3418 }
3419
3420 return code;
3421 }
3422
3423 /* Comparisons smaller than DImode. Only adjust comparisons against
3424 an out-of-range constant. */
3425 if (GET_CODE (*op1) != CONST_INT
3426 || const_ok_for_arm (INTVAL (*op1))
3427 || const_ok_for_arm (- INTVAL (*op1)))
3428 return code;
3429
3430 i = INTVAL (*op1);
3215 3431
3216 switch (code) 3432 switch (code)
3217 { 3433 {
3218 case EQ: 3434 case EQ:
3219 case NE: 3435 case NE:
3487 3703
3488 /* Find the first field, ignoring non FIELD_DECL things which will 3704 /* Find the first field, ignoring non FIELD_DECL things which will
3489 have been created by C++. */ 3705 have been created by C++. */
3490 for (field = TYPE_FIELDS (type); 3706 for (field = TYPE_FIELDS (type);
3491 field && TREE_CODE (field) != FIELD_DECL; 3707 field && TREE_CODE (field) != FIELD_DECL;
3492 field = TREE_CHAIN (field)) 3708 field = DECL_CHAIN (field))
3493 continue; 3709 continue;
3494 3710
3495 if (field == NULL) 3711 if (field == NULL)
3496 return false; /* An empty structure. Allowed by an extension to ANSI C. */ 3712 return false; /* An empty structure. Allowed by an extension to ANSI C. */
3497 3713
3506 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) 3722 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
3507 return true; 3723 return true;
3508 3724
3509 /* Now check the remaining fields, if any. Only bitfields are allowed, 3725 /* Now check the remaining fields, if any. Only bitfields are allowed,
3510 since they are not addressable. */ 3726 since they are not addressable. */
3511 for (field = TREE_CHAIN (field); 3727 for (field = DECL_CHAIN (field);
3512 field; 3728 field;
3513 field = TREE_CHAIN (field)) 3729 field = DECL_CHAIN (field))
3514 { 3730 {
3515 if (TREE_CODE (field) != FIELD_DECL) 3731 if (TREE_CODE (field) != FIELD_DECL)
3516 continue; 3732 continue;
3517 3733
3518 if (!DECL_BIT_FIELD_TYPE (field)) 3734 if (!DECL_BIT_FIELD_TYPE (field))
3528 3744
3529 /* Unions can be returned in registers if every element is 3745 /* Unions can be returned in registers if every element is
3530 integral, or can be returned in an integer register. */ 3746 integral, or can be returned in an integer register. */
3531 for (field = TYPE_FIELDS (type); 3747 for (field = TYPE_FIELDS (type);
3532 field; 3748 field;
3533 field = TREE_CHAIN (field)) 3749 field = DECL_CHAIN (field))
3534 { 3750 {
3535 if (TREE_CODE (field) != FIELD_DECL) 3751 if (TREE_CODE (field) != FIELD_DECL)
3536 continue; 3752 continue;
3537 3753
3538 if (FLOAT_TYPE_P (TREE_TYPE (field))) 3754 if (FLOAT_TYPE_P (TREE_TYPE (field)))
3634 if (TARGET_AAPCS_BASED) 3850 if (TARGET_AAPCS_BASED)
3635 { 3851 {
3636 /* Detect varargs functions. These always use the base rules 3852 /* Detect varargs functions. These always use the base rules
3637 (no argument is ever a candidate for a co-processor 3853 (no argument is ever a candidate for a co-processor
3638 register). */ 3854 register). */
3639 bool base_rules = (TYPE_ARG_TYPES (type) != 0 3855 bool base_rules = stdarg_p (type);
3640 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (type)))
3641 != void_type_node));
3642 3856
3643 if (user_convention) 3857 if (user_convention)
3644 { 3858 {
3645 if (user_pcs > ARM_PCS_AAPCS_LOCAL) 3859 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
3646 sorry ("Non-AAPCS derived PCS variant"); 3860 sorry ("non-AAPCS derived PCS variant");
3647 else if (base_rules && user_pcs != ARM_PCS_AAPCS) 3861 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
3648 error ("Variadic functions must use the base AAPCS variant"); 3862 error ("variadic functions must use the base AAPCS variant");
3649 } 3863 }
3650 3864
3651 if (base_rules) 3865 if (base_rules)
3652 return ARM_PCS_AAPCS; 3866 return ARM_PCS_AAPCS;
3653 else if (user_convention) 3867 else if (user_convention)
3788 4002
3789 /* Can't handle incomplete types. */ 4003 /* Can't handle incomplete types. */
3790 if (!COMPLETE_TYPE_P(type)) 4004 if (!COMPLETE_TYPE_P(type))
3791 return -1; 4005 return -1;
3792 4006
3793 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 4007 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3794 { 4008 {
3795 if (TREE_CODE (field) != FIELD_DECL) 4009 if (TREE_CODE (field) != FIELD_DECL)
3796 continue; 4010 continue;
3797 4011
3798 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 4012 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3820 4034
3821 /* Can't handle incomplete types. */ 4035 /* Can't handle incomplete types. */
3822 if (!COMPLETE_TYPE_P(type)) 4036 if (!COMPLETE_TYPE_P(type))
3823 return -1; 4037 return -1;
3824 4038
3825 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 4039 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
3826 { 4040 {
3827 if (TREE_CODE (field) != FIELD_DECL) 4041 if (TREE_CODE (field) != FIELD_DECL)
3828 continue; 4042 continue;
3829 4043
3830 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); 4044 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
3852 /* Return true if PCS_VARIANT should use VFP registers. */ 4066 /* Return true if PCS_VARIANT should use VFP registers. */
3853 static bool 4067 static bool
3854 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) 4068 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
3855 { 4069 {
3856 if (pcs_variant == ARM_PCS_AAPCS_VFP) 4070 if (pcs_variant == ARM_PCS_AAPCS_VFP)
3857 return true; 4071 {
4072 static bool seen_thumb1_vfp = false;
4073
4074 if (TARGET_THUMB1 && !seen_thumb1_vfp)
4075 {
4076 sorry ("Thumb-1 hard-float VFP ABI");
4077 /* sorry() is not immediately fatal, so only display this once. */
4078 seen_thumb1_vfp = true;
4079 }
4080
4081 return true;
4082 }
3858 4083
3859 if (pcs_variant != ARM_PCS_AAPCS_LOCAL) 4084 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
3860 return false; 4085 return false;
3861 4086
3862 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT && 4087 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
4084 4309
4085 #undef AAPCS_CP 4310 #undef AAPCS_CP
4086 4311
4087 static int 4312 static int
4088 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4313 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4089 tree type) 4314 const_tree type)
4090 { 4315 {
4091 int i; 4316 int i;
4092 4317
4093 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) 4318 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
4094 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) 4319 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
4196 4421
4197 /* Lay out a function argument using the AAPCS rules. The rule 4422 /* Lay out a function argument using the AAPCS rules. The rule
4198 numbers referred to here are those in the AAPCS. */ 4423 numbers referred to here are those in the AAPCS. */
4199 static void 4424 static void
4200 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4425 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4201 tree type, int named) 4426 const_tree type, bool named)
4202 { 4427 {
4203 int nregs, nregs2; 4428 int nregs, nregs2;
4204 int ncrn; 4429 int ncrn;
4205 4430
4206 /* We only need to do this once per argument. */ 4431 /* We only need to do this once per argument. */
4360 } 4585 }
4361 } 4586 }
4362 4587
4363 4588
4364 /* Return true if mode/type need doubleword alignment. */ 4589 /* Return true if mode/type need doubleword alignment. */
4365 bool 4590 static bool
4366 arm_needs_doubleword_align (enum machine_mode mode, tree type) 4591 arm_needs_doubleword_align (enum machine_mode mode, const_tree type)
4367 { 4592 {
4368 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY 4593 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
4369 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY)); 4594 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
4370 } 4595 }
4371 4596
4379 This is null for libcalls where that information may 4604 This is null for libcalls where that information may
4380 not be available. 4605 not be available.
4381 CUM is a variable of type CUMULATIVE_ARGS which gives info about 4606 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4382 the preceding args and about the function being called. 4607 the preceding args and about the function being called.
4383 NAMED is nonzero if this argument is a named parameter 4608 NAMED is nonzero if this argument is a named parameter
4384 (otherwise it is an extra parameter matching an ellipsis). */ 4609 (otherwise it is an extra parameter matching an ellipsis).
4385 4610
4386 rtx 4611 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
4612 other arguments are passed on the stack. If (NAMED == 0) (which happens
4613 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
4614 defined), say it is passed in the stack (function_prologue will
4615 indeed make it pass in the stack if necessary). */
4616
4617 static rtx
4387 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4618 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4388 tree type, int named) 4619 const_tree type, bool named)
4389 { 4620 {
4390 int nregs; 4621 int nregs;
4391 4622
4392 /* Handle the special case quickly. Pick an arbitrary value for op2 of 4623 /* Handle the special case quickly. Pick an arbitrary value for op2 of
4393 a call insn (op3 of a call_value insn). */ 4624 a call insn (op3 of a call_value insn). */
4418 /* Put doubleword aligned quantities in even register pairs. */ 4649 /* Put doubleword aligned quantities in even register pairs. */
4419 if (pcum->nregs & 1 4650 if (pcum->nregs & 1
4420 && ARM_DOUBLEWORD_ALIGN 4651 && ARM_DOUBLEWORD_ALIGN
4421 && arm_needs_doubleword_align (mode, type)) 4652 && arm_needs_doubleword_align (mode, type))
4422 pcum->nregs++; 4653 pcum->nregs++;
4423
4424 if (mode == VOIDmode)
4425 /* Pick an arbitrary value for operand 2 of the call insn. */
4426 return const0_rtx;
4427 4654
4428 /* Only allow splitting an arg between regs and memory if all preceding 4655 /* Only allow splitting an arg between regs and memory if all preceding
4429 args were allocated to regs. For args passed by reference we only count 4656 args were allocated to regs. For args passed by reference we only count
4430 the reference pointer. */ 4657 the reference pointer. */
4431 if (pcum->can_split) 4658 if (pcum->can_split)
4437 return NULL_RTX; 4664 return NULL_RTX;
4438 4665
4439 return gen_rtx_REG (mode, pcum->nregs); 4666 return gen_rtx_REG (mode, pcum->nregs);
4440 } 4667 }
4441 4668
4669 static unsigned int
4670 arm_function_arg_boundary (enum machine_mode mode, const_tree type)
4671 {
4672 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
4673 ? DOUBLEWORD_ALIGNMENT
4674 : PARM_BOUNDARY);
4675 }
4676
4442 static int 4677 static int
4443 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4678 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4444 tree type, bool named) 4679 tree type, bool named)
4445 { 4680 {
4446 int nregs = pcum->nregs; 4681 int nregs = pcum->nregs;
4460 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; 4695 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
4461 4696
4462 return 0; 4697 return 0;
4463 } 4698 }
4464 4699
4465 void 4700 /* Update the data in PCUM to advance over an argument
4701 of mode MODE and data type TYPE.
4702 (TYPE is null for libcalls where that information may not be available.) */
4703
4704 static void
4466 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode, 4705 arm_function_arg_advance (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
4467 tree type, bool named) 4706 const_tree type, bool named)
4468 { 4707 {
4469 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) 4708 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
4470 { 4709 {
4471 aapcs_layout_arg (pcum, mode, type, named); 4710 aapcs_layout_arg (pcum, mode, type, named);
4472 4711
4884 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) 5123 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
4885 crtl->uses_pic_offset_table = 1; 5124 crtl->uses_pic_offset_table = 1;
4886 } 5125 }
4887 else 5126 else
4888 { 5127 {
4889 rtx seq; 5128 rtx seq, insn;
4890 5129
4891 if (!cfun->machine->pic_reg) 5130 if (!cfun->machine->pic_reg)
4892 cfun->machine->pic_reg = gen_reg_rtx (Pmode); 5131 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
4893 5132
4894 /* Play games to avoid marking the function as needing pic 5133 /* Play games to avoid marking the function as needing pic
4901 5140
4902 arm_load_pic_register (0UL); 5141 arm_load_pic_register (0UL);
4903 5142
4904 seq = get_insns (); 5143 seq = get_insns ();
4905 end_sequence (); 5144 end_sequence ();
5145
5146 for (insn = seq; insn; insn = NEXT_INSN (insn))
5147 if (INSN_P (insn))
5148 INSN_LOCATOR (insn) = prologue_locator;
5149
4906 /* We can be called during expansion of PHI nodes, where 5150 /* We can be called during expansion of PHI nodes, where
4907 we can't yet emit instructions directly in the final 5151 we can't yet emit instructions directly in the final
4908 insn stream. Queue the insns on the entry edge, they will 5152 insn stream. Queue the insns on the entry edge, they will
4909 be committed after everything else is expanded. */ 5153 be committed after everything else is expanded. */
4910 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR)); 5154 insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR));
4917 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg) 5161 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
4918 { 5162 {
4919 if (GET_CODE (orig) == SYMBOL_REF 5163 if (GET_CODE (orig) == SYMBOL_REF
4920 || GET_CODE (orig) == LABEL_REF) 5164 || GET_CODE (orig) == LABEL_REF)
4921 { 5165 {
4922 rtx pic_ref, address;
4923 rtx insn; 5166 rtx insn;
4924 5167
4925 if (reg == 0) 5168 if (reg == 0)
4926 { 5169 {
4927 gcc_assert (can_create_pseudo_p ()); 5170 gcc_assert (can_create_pseudo_p ());
4928 reg = gen_reg_rtx (Pmode); 5171 reg = gen_reg_rtx (Pmode);
4929 address = gen_reg_rtx (Pmode); 5172 }
4930 }
4931 else
4932 address = reg;
4933 5173
4934 /* VxWorks does not impose a fixed gap between segments; the run-time 5174 /* VxWorks does not impose a fixed gap between segments; the run-time
4935 gap can be different from the object-file gap. We therefore can't 5175 gap can be different from the object-file gap. We therefore can't
4936 use GOTOFF unless we are absolutely sure that the symbol is in the 5176 use GOTOFF unless we are absolutely sure that the symbol is in the
4937 same segment as the GOT. Unfortunately, the flexibility of linker 5177 same segment as the GOT. Unfortunately, the flexibility of linker
4943 && NEED_GOT_RELOC 5183 && NEED_GOT_RELOC
4944 && !TARGET_VXWORKS_RTP) 5184 && !TARGET_VXWORKS_RTP)
4945 insn = arm_pic_static_addr (orig, reg); 5185 insn = arm_pic_static_addr (orig, reg);
4946 else 5186 else
4947 { 5187 {
5188 rtx pat;
5189 rtx mem;
5190
4948 /* If this function doesn't have a pic register, create one now. */ 5191 /* If this function doesn't have a pic register, create one now. */
4949 require_pic_register (); 5192 require_pic_register ();
4950 5193
4951 if (TARGET_32BIT) 5194 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
4952 emit_insn (gen_pic_load_addr_32bit (address, orig)); 5195
4953 else /* TARGET_THUMB1 */ 5196 /* Make the MEM as close to a constant as possible. */
4954 emit_insn (gen_pic_load_addr_thumb1 (address, orig)); 5197 mem = SET_SRC (pat);
4955 5198 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
4956 pic_ref = gen_const_mem (Pmode, 5199 MEM_READONLY_P (mem) = 1;
4957 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, 5200 MEM_NOTRAP_P (mem) = 1;
4958 address)); 5201
4959 insn = emit_move_insn (reg, pic_ref); 5202 insn = emit_insn (pat);
4960 } 5203 }
4961 5204
4962 /* Put a REG_EQUAL note on this insn, so that it can be optimized 5205 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4963 by loop. */ 5206 by loop. */
4964 set_unique_reg_note (insn, REG_EQUAL, orig); 5207 set_unique_reg_note (insn, REG_EQUAL, orig);
5234 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); 5477 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
5235 5478
5236 return FALSE; 5479 return FALSE;
5237 } 5480 }
5238 5481
5482 /* Return true if X will surely end up in an index register after next
5483 splitting pass. */
5484 static bool
5485 will_be_in_index_register (const_rtx x)
5486 {
5487 /* arm.md: calculate_pic_address will split this into a register. */
5488 return GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_SYM;
5489 }
5490
5239 /* Return nonzero if X is a valid ARM state address operand. */ 5491 /* Return nonzero if X is a valid ARM state address operand. */
5240 int 5492 int
5241 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer, 5493 arm_legitimate_address_outer_p (enum machine_mode mode, rtx x, RTX_CODE outer,
5242 int strict_p) 5494 int strict_p)
5243 { 5495 {
5291 { 5543 {
5292 rtx xop0 = XEXP (x, 0); 5544 rtx xop0 = XEXP (x, 0);
5293 rtx xop1 = XEXP (x, 1); 5545 rtx xop1 = XEXP (x, 1);
5294 5546
5295 return ((arm_address_register_rtx_p (xop0, strict_p) 5547 return ((arm_address_register_rtx_p (xop0, strict_p)
5296 && GET_CODE(xop1) == CONST_INT 5548 && ((GET_CODE(xop1) == CONST_INT
5297 && arm_legitimate_index_p (mode, xop1, outer, strict_p)) 5549 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
5550 || (!strict_p && will_be_in_index_register (xop1))))
5298 || (arm_address_register_rtx_p (xop1, strict_p) 5551 || (arm_address_register_rtx_p (xop1, strict_p)
5299 && arm_legitimate_index_p (mode, xop0, outer, strict_p))); 5552 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
5300 } 5553 }
5301 5554
5302 #if 0 5555 #if 0
5378 { 5631 {
5379 rtx xop0 = XEXP (x, 0); 5632 rtx xop0 = XEXP (x, 0);
5380 rtx xop1 = XEXP (x, 1); 5633 rtx xop1 = XEXP (x, 1);
5381 5634
5382 return ((arm_address_register_rtx_p (xop0, strict_p) 5635 return ((arm_address_register_rtx_p (xop0, strict_p)
5383 && thumb2_legitimate_index_p (mode, xop1, strict_p)) 5636 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
5637 || (!strict_p && will_be_in_index_register (xop1))))
5384 || (arm_address_register_rtx_p (xop1, strict_p) 5638 || (arm_address_register_rtx_p (xop1, strict_p)
5385 && thumb2_legitimate_index_p (mode, xop0, strict_p))); 5639 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
5386 } 5640 }
5387 5641
5388 else if (GET_MODE_CLASS (mode) != MODE_FLOAT 5642 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
5405 HOST_WIDE_INT range; 5659 HOST_WIDE_INT range;
5406 enum rtx_code code = GET_CODE (index); 5660 enum rtx_code code = GET_CODE (index);
5407 5661
5408 /* Standard coprocessor addressing modes. */ 5662 /* Standard coprocessor addressing modes. */
5409 if (TARGET_HARD_FLOAT 5663 if (TARGET_HARD_FLOAT
5410 && (TARGET_FPA || TARGET_MAVERICK) 5664 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5411 && (GET_MODE_CLASS (mode) == MODE_FLOAT 5665 && (mode == SFmode || mode == DFmode
5412 || (TARGET_MAVERICK && mode == DImode))) 5666 || (TARGET_MAVERICK && mode == DImode)))
5413 return (code == CONST_INT && INTVAL (index) < 1024 5667 return (code == CONST_INT && INTVAL (index) < 1024
5414 && INTVAL (index) > -1024 5668 && INTVAL (index) > -1024
5415 && (INTVAL (index) & 3) == 0); 5669 && (INTVAL (index) & 3) == 0);
5416 5670
5417 if (TARGET_NEON 5671 /* For quad modes, we restrict the constant offset to be slightly less
5418 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))) 5672 than what the instruction format permits. We do this because for
5673 quad mode moves, we will actually decompose them into two separate
5674 double-mode reads or writes. INDEX must therefore be a valid
5675 (double-mode) offset and so should INDEX+8. */
5676 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5419 return (code == CONST_INT 5677 return (code == CONST_INT
5420 && INTVAL (index) < 1016 5678 && INTVAL (index) < 1016
5679 && INTVAL (index) > -1024
5680 && (INTVAL (index) & 3) == 0);
5681
5682 /* We have no such constraint on double mode offsets, so we permit the
5683 full range of the instruction format. */
5684 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5685 return (code == CONST_INT
5686 && INTVAL (index) < 1024
5421 && INTVAL (index) > -1024 5687 && INTVAL (index) > -1024
5422 && (INTVAL (index) & 3) == 0); 5688 && (INTVAL (index) & 3) == 0);
5423 5689
5424 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) 5690 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5425 return (code == CONST_INT 5691 return (code == CONST_INT
5514 enum rtx_code code = GET_CODE (index); 5780 enum rtx_code code = GET_CODE (index);
5515 5781
5516 /* ??? Combine arm and thumb2 coprocessor addressing modes. */ 5782 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
5517 /* Standard coprocessor addressing modes. */ 5783 /* Standard coprocessor addressing modes. */
5518 if (TARGET_HARD_FLOAT 5784 if (TARGET_HARD_FLOAT
5519 && (TARGET_FPA || TARGET_MAVERICK) 5785 && (TARGET_VFP || TARGET_FPA || TARGET_MAVERICK)
5520 && (GET_MODE_CLASS (mode) == MODE_FLOAT 5786 && (mode == SFmode || mode == DFmode
5521 || (TARGET_MAVERICK && mode == DImode))) 5787 || (TARGET_MAVERICK && mode == DImode)))
5522 return (code == CONST_INT && INTVAL (index) < 1024 5788 return (code == CONST_INT && INTVAL (index) < 1024
5523 && INTVAL (index) > -1024 5789 /* Thumb-2 allows only > -256 index range for it's core register
5790 load/stores. Since we allow SF/DF in core registers, we have
5791 to use the intersection between -256~4096 (core) and -1024~1024
5792 (coprocessor). */
5793 && INTVAL (index) > -256
5524 && (INTVAL (index) & 3) == 0); 5794 && (INTVAL (index) & 3) == 0);
5525 5795
5526 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) 5796 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
5527 { 5797 {
5528 /* For DImode assume values will usually live in core regs 5798 /* For DImode assume values will usually live in core regs
5532 && INTVAL (index) < 1024 5802 && INTVAL (index) < 1024
5533 && INTVAL (index) > -1024 5803 && INTVAL (index) > -1024
5534 && (INTVAL (index) & 3) == 0); 5804 && (INTVAL (index) & 3) == 0);
5535 } 5805 }
5536 5806
5537 if (TARGET_NEON 5807 /* For quad modes, we restrict the constant offset to be slightly less
5538 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))) 5808 than what the instruction format permits. We do this because for
5809 quad mode moves, we will actually decompose them into two separate
5810 double-mode reads or writes. INDEX must therefore be a valid
5811 (double-mode) offset and so should INDEX+8. */
5812 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
5539 return (code == CONST_INT 5813 return (code == CONST_INT
5540 && INTVAL (index) < 1016 5814 && INTVAL (index) < 1016
5815 && INTVAL (index) > -1024
5816 && (INTVAL (index) & 3) == 0);
5817
5818 /* We have no such constraint on double mode offsets, so we permit the
5819 full range of the instruction format. */
5820 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
5821 return (code == CONST_INT
5822 && INTVAL (index) < 1024
5541 && INTVAL (index) > -1024 5823 && INTVAL (index) > -1024
5542 && (INTVAL (index) & 3) == 0); 5824 && (INTVAL (index) & 3) == 0);
5543 5825
5544 if (arm_address_register_rtx_p (index, strict_p) 5826 if (arm_address_register_rtx_p (index, strict_p)
5545 && (GET_MODE_SIZE (mode) <= 4)) 5827 && (GET_MODE_SIZE (mode) <= 4))
5681 permits SP+OFFSET. */ 5963 permits SP+OFFSET. */
5682 if (GET_MODE_SIZE (mode) <= 4 5964 if (GET_MODE_SIZE (mode) <= 4
5683 && XEXP (x, 0) != frame_pointer_rtx 5965 && XEXP (x, 0) != frame_pointer_rtx
5684 && XEXP (x, 1) != frame_pointer_rtx 5966 && XEXP (x, 1) != frame_pointer_rtx
5685 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) 5967 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5686 && thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)) 5968 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
5969 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
5687 return 1; 5970 return 1;
5688 5971
5689 /* REG+const has 5-7 bit offset for non-SP registers. */ 5972 /* REG+const has 5-7 bit offset for non-SP registers. */
5690 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) 5973 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
5691 || XEXP (x, 0) == arg_pointer_rtx) 5974 || XEXP (x, 0) == arg_pointer_rtx)
5708 5991
5709 else if (GET_CODE (XEXP (x, 0)) == REG 5992 else if (GET_CODE (XEXP (x, 0)) == REG
5710 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM 5993 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
5711 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM 5994 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
5712 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER 5995 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
5713 && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_REGISTER)) 5996 && REGNO (XEXP (x, 0))
5997 <= LAST_VIRTUAL_POINTER_REGISTER))
5714 && GET_MODE_SIZE (mode) >= 4 5998 && GET_MODE_SIZE (mode) >= 4
5715 && GET_CODE (XEXP (x, 1)) == CONST_INT 5999 && GET_CODE (XEXP (x, 1)) == CONST_INT
5716 && (INTVAL (XEXP (x, 1)) & 3) == 0) 6000 && (INTVAL (XEXP (x, 1)) & 3) == 0)
5717 return 1; 6001 return 1;
5718 } 6002 }
6106 } 6390 }
6107 6391
6108 return x; 6392 return x;
6109 } 6393 }
6110 6394
6395 bool
6396 arm_legitimize_reload_address (rtx *p,
6397 enum machine_mode mode,
6398 int opnum, int type,
6399 int ind_levels ATTRIBUTE_UNUSED)
6400 {
6401 if (GET_CODE (*p) == PLUS
6402 && GET_CODE (XEXP (*p, 0)) == REG
6403 && ARM_REGNO_OK_FOR_BASE_P (REGNO (XEXP (*p, 0)))
6404 && GET_CODE (XEXP (*p, 1)) == CONST_INT)
6405 {
6406 HOST_WIDE_INT val = INTVAL (XEXP (*p, 1));
6407 HOST_WIDE_INT low, high;
6408
6409 if (mode == DImode || (mode == DFmode && TARGET_SOFT_FLOAT))
6410 low = ((val & 0xf) ^ 0x8) - 0x8;
6411 else if (TARGET_MAVERICK && TARGET_HARD_FLOAT)
6412 /* Need to be careful, -256 is not a valid offset. */
6413 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6414 else if (mode == SImode
6415 || (mode == SFmode && TARGET_SOFT_FLOAT)
6416 || ((mode == HImode || mode == QImode) && ! arm_arch4))
6417 /* Need to be careful, -4096 is not a valid offset. */
6418 low = val >= 0 ? (val & 0xfff) : -((-val) & 0xfff);
6419 else if ((mode == HImode || mode == QImode) && arm_arch4)
6420 /* Need to be careful, -256 is not a valid offset. */
6421 low = val >= 0 ? (val & 0xff) : -((-val) & 0xff);
6422 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6423 && TARGET_HARD_FLOAT && TARGET_FPA)
6424 /* Need to be careful, -1024 is not a valid offset. */
6425 low = val >= 0 ? (val & 0x3ff) : -((-val) & 0x3ff);
6426 else
6427 return false;
6428
6429 high = ((((val - low) & (unsigned HOST_WIDE_INT) 0xffffffff)
6430 ^ (unsigned HOST_WIDE_INT) 0x80000000)
6431 - (unsigned HOST_WIDE_INT) 0x80000000);
6432 /* Check for overflow or zero */
6433 if (low == 0 || high == 0 || (high + low != val))
6434 return false;
6435
6436 /* Reload the high part into a base reg; leave the low part
6437 in the mem. */
6438 *p = gen_rtx_PLUS (GET_MODE (*p),
6439 gen_rtx_PLUS (GET_MODE (*p), XEXP (*p, 0),
6440 GEN_INT (high)),
6441 GEN_INT (low));
6442 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
6443 MODE_BASE_REG_CLASS (mode), GET_MODE (*p),
6444 VOIDmode, 0, 0, opnum, (enum reload_type) type);
6445 return true;
6446 }
6447
6448 return false;
6449 }
6450
6111 rtx 6451 rtx
6112 thumb_legitimize_reload_address (rtx *x_p, 6452 thumb_legitimize_reload_address (rtx *x_p,
6113 enum machine_mode mode, 6453 enum machine_mode mode,
6114 int opnum, int type, 6454 int opnum, int type,
6115 int ind_levels ATTRIBUTE_UNUSED) 6455 int ind_levels ATTRIBUTE_UNUSED)
6217 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG)) 6557 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
6218 6558
6219 #define REG_OR_SUBREG_RTX(X) \ 6559 #define REG_OR_SUBREG_RTX(X) \
6220 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X)) 6560 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
6221 6561
6222 #ifndef COSTS_N_INSNS
6223 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
6224 #endif
6225 static inline int 6562 static inline int
6226 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) 6563 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6227 { 6564 {
6228 enum machine_mode mode = GET_MODE (x); 6565 enum machine_mode mode = GET_MODE (x);
6566 int total;
6229 6567
6230 switch (code) 6568 switch (code)
6231 { 6569 {
6232 case ASHIFT: 6570 case ASHIFT:
6233 case ASHIFTRT: 6571 case ASHIFTRT:
6322 /* XXX a guess. */ 6660 /* XXX a guess. */
6323 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) 6661 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
6324 return 14; 6662 return 14;
6325 return 2; 6663 return 2;
6326 6664
6665 case SIGN_EXTEND:
6327 case ZERO_EXTEND: 6666 case ZERO_EXTEND:
6328 /* XXX still guessing. */ 6667 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
6329 switch (GET_MODE (XEXP (x, 0))) 6668 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
6330 { 6669
6331 case QImode: 6670 if (mode == SImode)
6332 return (1 + (mode == DImode ? 4 : 0) 6671 return total;
6333 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); 6672
6334 6673 if (arm_arch6)
6335 case HImode: 6674 return total + COSTS_N_INSNS (1);
6336 return (4 + (mode == DImode ? 4 : 0) 6675
6337 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); 6676 /* Assume a two-shift sequence. Increase the cost slightly so
6338 6677 we prefer actual shifts over an extend operation. */
6339 case SImode: 6678 return total + 1 + COSTS_N_INSNS (2);
6340 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
6341
6342 default:
6343 return 99;
6344 }
6345 6679
6346 default: 6680 default:
6347 return 99; 6681 return 99;
6348 } 6682 }
6349 } 6683 }
6408 ++*total; 6742 ++*total;
6409 6743
6410 return true; 6744 return true;
6411 6745
6412 case MINUS: 6746 case MINUS:
6413 if (TARGET_THUMB2)
6414 {
6415 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
6416 {
6417 if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode))
6418 *total = COSTS_N_INSNS (1);
6419 else
6420 *total = COSTS_N_INSNS (20);
6421 }
6422 else
6423 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6424 /* Thumb2 does not have RSB, so all arguments must be
6425 registers (subtracting a constant is canonicalized as
6426 addition of the negated constant). */
6427 return false;
6428 }
6429
6430 if (mode == DImode) 6747 if (mode == DImode)
6431 { 6748 {
6432 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 6749 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
6433 if (GET_CODE (XEXP (x, 0)) == CONST_INT 6750 if (GET_CODE (XEXP (x, 0)) == CONST_INT
6434 && const_ok_for_arm (INTVAL (XEXP (x, 0)))) 6751 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
6582 /* Normally the frame registers will be spilt into reg+const during 6899 /* Normally the frame registers will be spilt into reg+const during
6583 reload, so it is a bad idea to combine them with other instructions, 6900 reload, so it is a bad idea to combine them with other instructions,
6584 since then they might not be moved outside of loops. As a compromise 6901 since then they might not be moved outside of loops. As a compromise
6585 we allow integration with ops that have a constant as their second 6902 we allow integration with ops that have a constant as their second
6586 operand. */ 6903 operand. */
6587 if ((REG_OR_SUBREG_REG (XEXP (x, 0)) 6904 if (REG_OR_SUBREG_REG (XEXP (x, 0))
6588 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0))) 6905 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
6589 && GET_CODE (XEXP (x, 1)) != CONST_INT) 6906 && GET_CODE (XEXP (x, 1)) != CONST_INT)
6590 || (REG_OR_SUBREG_REG (XEXP (x, 0)) 6907 *total = COSTS_N_INSNS (1);
6591 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
6592 *total = 4;
6593 6908
6594 if (mode == DImode) 6909 if (mode == DImode)
6595 { 6910 {
6596 *total += COSTS_N_INSNS (2); 6911 *total += COSTS_N_INSNS (2);
6597 if (GET_CODE (XEXP (x, 1)) == CONST_INT 6912 if (GET_CODE (XEXP (x, 1)) == CONST_INT
6825 if (mode == DImode) 7140 if (mode == DImode)
6826 *total += COSTS_N_INSNS (3); 7141 *total += COSTS_N_INSNS (3);
6827 return false; 7142 return false;
6828 7143
6829 case SIGN_EXTEND: 7144 case SIGN_EXTEND:
6830 if (GET_MODE_CLASS (mode) == MODE_INT)
6831 {
6832 *total = 0;
6833 if (mode == DImode)
6834 *total += COSTS_N_INSNS (1);
6835
6836 if (GET_MODE (XEXP (x, 0)) != SImode)
6837 {
6838 if (arm_arch6)
6839 {
6840 if (GET_CODE (XEXP (x, 0)) != MEM)
6841 *total += COSTS_N_INSNS (1);
6842 }
6843 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM)
6844 *total += COSTS_N_INSNS (2);
6845 }
6846
6847 return false;
6848 }
6849
6850 /* Fall through */
6851 case ZERO_EXTEND: 7145 case ZERO_EXTEND:
6852 *total = 0; 7146 *total = 0;
6853 if (GET_MODE_CLASS (mode) == MODE_INT) 7147 if (GET_MODE_CLASS (mode) == MODE_INT)
6854 { 7148 {
7149 rtx op = XEXP (x, 0);
7150 enum machine_mode opmode = GET_MODE (op);
7151
6855 if (mode == DImode) 7152 if (mode == DImode)
6856 *total += COSTS_N_INSNS (1); 7153 *total += COSTS_N_INSNS (1);
6857 7154
6858 if (GET_MODE (XEXP (x, 0)) != SImode) 7155 if (opmode != SImode)
6859 { 7156 {
6860 if (arm_arch6) 7157 if (MEM_P (op))
6861 { 7158 {
6862 if (GET_CODE (XEXP (x, 0)) != MEM) 7159 /* If !arm_arch4, we use one of the extendhisi2_mem
6863 *total += COSTS_N_INSNS (1); 7160 or movhi_bytes patterns for HImode. For a QImode
7161 sign extension, we first zero-extend from memory
7162 and then perform a shift sequence. */
7163 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
7164 *total += COSTS_N_INSNS (2);
6864 } 7165 }
6865 else if (!arm_arch4 || GET_CODE (XEXP (x, 0)) != MEM) 7166 else if (arm_arch6)
6866 *total += COSTS_N_INSNS (GET_MODE (XEXP (x, 0)) == QImode ? 7167 *total += COSTS_N_INSNS (1);
6867 1 : 2); 7168
7169 /* We don't have the necessary insn, so we need to perform some
7170 other operation. */
7171 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
7172 /* An and with constant 255. */
7173 *total += COSTS_N_INSNS (1);
7174 else
7175 /* A shift sequence. Increase costs slightly to avoid
7176 combining two shifts into an extend operation. */
7177 *total += COSTS_N_INSNS (2) + 1;
6868 } 7178 }
6869 7179
6870 return false; 7180 return false;
6871 } 7181 }
6872 7182
6967 7277
6968 case CONST_INT: 7278 case CONST_INT:
6969 if (outer == SET) 7279 if (outer == SET)
6970 { 7280 {
6971 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) 7281 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6972 return 0; 7282 return COSTS_N_INSNS (1);
7283 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
7284 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
7285 return COSTS_N_INSNS (2);
7286 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
6973 if (thumb_shiftable_const (INTVAL (x))) 7287 if (thumb_shiftable_const (INTVAL (x)))
6974 return COSTS_N_INSNS (2); 7288 return COSTS_N_INSNS (2);
6975 return COSTS_N_INSNS (3); 7289 return COSTS_N_INSNS (3);
6976 } 7290 }
6977 else if ((outer == PLUS || outer == COMPARE) 7291 else if ((outer == PLUS || outer == COMPARE)
7071 case MEM: 7385 case MEM:
7072 /* A memory access costs 1 insn if the mode is small, or the address is 7386 /* A memory access costs 1 insn if the mode is small, or the address is
7073 a single register, otherwise it costs one insn per word. */ 7387 a single register, otherwise it costs one insn per word. */
7074 if (REG_P (XEXP (x, 0))) 7388 if (REG_P (XEXP (x, 0)))
7075 *total = COSTS_N_INSNS (1); 7389 *total = COSTS_N_INSNS (1);
7390 else if (flag_pic
7391 && GET_CODE (XEXP (x, 0)) == PLUS
7392 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
7393 /* This will be split into two instructions.
7394 See arm.md:calculate_pic_address. */
7395 *total = COSTS_N_INSNS (2);
7076 else 7396 else
7077 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode)); 7397 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
7078 return true; 7398 return true;
7079 7399
7080 case DIV: 7400 case DIV:
7218 else 7538 else
7219 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode)); 7539 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
7220 return false; 7540 return false;
7221 7541
7222 case SIGN_EXTEND: 7542 case SIGN_EXTEND:
7223 *total = 0;
7224 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
7225 {
7226 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7227 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7228 }
7229 if (mode == DImode)
7230 *total += COSTS_N_INSNS (1);
7231 return false;
7232
7233 case ZERO_EXTEND: 7543 case ZERO_EXTEND:
7234 *total = 0; 7544 return arm_rtx_costs_1 (x, outer_code, total, 0);
7235 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
7236 {
7237 switch (GET_MODE (XEXP (x, 0)))
7238 {
7239 case QImode:
7240 *total += COSTS_N_INSNS (1);
7241 break;
7242
7243 case HImode:
7244 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
7245
7246 case SImode:
7247 break;
7248
7249 default:
7250 *total += COSTS_N_INSNS (2);
7251 }
7252 }
7253
7254 if (mode == DImode)
7255 *total += COSTS_N_INSNS (1);
7256
7257 return false;
7258 7545
7259 case CONST_INT: 7546 case CONST_INT:
7260 if (const_ok_for_arm (INTVAL (x))) 7547 if (const_ok_for_arm (INTVAL (x)))
7261 /* A multiplication by a constant requires another instruction 7548 /* A multiplication by a constant requires another instruction
7262 to load the constant to a register. */ 7549 to load the constant to a register. */
7658 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) 7945 arm_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
7659 { 7946 {
7660 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); 7947 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
7661 } 7948 }
7662 7949
7663 static int 7950 /* Adjust cost hook for XScale. */
7664 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost) 7951 static bool
7665 { 7952 xscale_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
7666 rtx i_pat, d_pat; 7953 {
7667
7668 /* Some true dependencies can have a higher cost depending 7954 /* Some true dependencies can have a higher cost depending
7669 on precisely how certain input operands are used. */ 7955 on precisely how certain input operands are used. */
7670 if (arm_tune_xscale 7956 if (REG_NOTE_KIND(link) == 0
7671 && REG_NOTE_KIND (link) == 0
7672 && recog_memoized (insn) >= 0 7957 && recog_memoized (insn) >= 0
7673 && recog_memoized (dep) >= 0) 7958 && recog_memoized (dep) >= 0)
7674 { 7959 {
7675 int shift_opnum = get_attr_shift (insn); 7960 int shift_opnum = get_attr_shift (insn);
7676 enum attr_type attr_type = get_attr_type (dep); 7961 enum attr_type attr_type = get_attr_type (dep);
7700 if (recog_data.operand_type[opno] == OP_IN) 7985 if (recog_data.operand_type[opno] == OP_IN)
7701 continue; 7986 continue;
7702 7987
7703 if (reg_overlap_mentioned_p (recog_data.operand[opno], 7988 if (reg_overlap_mentioned_p (recog_data.operand[opno],
7704 shifted_operand)) 7989 shifted_operand))
7705 return 2; 7990 {
7991 *cost = 2;
7992 return false;
7993 }
7706 } 7994 }
7707 } 7995 }
7996 }
7997 return true;
7998 }
7999
8000 /* Adjust cost hook for Cortex A9. */
8001 static bool
8002 cortex_a9_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8003 {
8004 switch (REG_NOTE_KIND (link))
8005 {
8006 case REG_DEP_ANTI:
8007 *cost = 0;
8008 return false;
8009
8010 case REG_DEP_TRUE:
8011 case REG_DEP_OUTPUT:
8012 if (recog_memoized (insn) >= 0
8013 && recog_memoized (dep) >= 0)
8014 {
8015 if (GET_CODE (PATTERN (insn)) == SET)
8016 {
8017 if (GET_MODE_CLASS
8018 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
8019 || GET_MODE_CLASS
8020 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
8021 {
8022 enum attr_type attr_type_insn = get_attr_type (insn);
8023 enum attr_type attr_type_dep = get_attr_type (dep);
8024
8025 /* By default all dependencies of the form
8026 s0 = s0 <op> s1
8027 s0 = s0 <op> s2
8028 have an extra latency of 1 cycle because
8029 of the input and output dependency in this
8030 case. However this gets modeled as an true
8031 dependency and hence all these checks. */
8032 if (REG_P (SET_DEST (PATTERN (insn)))
8033 && REG_P (SET_DEST (PATTERN (dep)))
8034 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
8035 SET_DEST (PATTERN (dep))))
8036 {
8037 /* FMACS is a special case where the dependant
8038 instruction can be issued 3 cycles before
8039 the normal latency in case of an output
8040 dependency. */
8041 if ((attr_type_insn == TYPE_FMACS
8042 || attr_type_insn == TYPE_FMACD)
8043 && (attr_type_dep == TYPE_FMACS
8044 || attr_type_dep == TYPE_FMACD))
8045 {
8046 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8047 *cost = insn_default_latency (dep) - 3;
8048 else
8049 *cost = insn_default_latency (dep);
8050 return false;
8051 }
8052 else
8053 {
8054 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
8055 *cost = insn_default_latency (dep) + 1;
8056 else
8057 *cost = insn_default_latency (dep);
8058 }
8059 return false;
8060 }
8061 }
8062 }
8063 }
8064 break;
8065
8066 default:
8067 gcc_unreachable ();
8068 }
8069
8070 return true;
8071 }
8072
8073 /* Adjust cost hook for FA726TE. */
8074 static bool
8075 fa726te_sched_adjust_cost (rtx insn, rtx link, rtx dep, int * cost)
8076 {
8077 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
8078 have penalty of 3. */
8079 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
8080 && recog_memoized (insn) >= 0
8081 && recog_memoized (dep) >= 0
8082 && get_attr_conds (dep) == CONDS_SET)
8083 {
8084 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
8085 if (get_attr_conds (insn) == CONDS_USE
8086 && get_attr_type (insn) != TYPE_BRANCH)
8087 {
8088 *cost = 3;
8089 return false;
8090 }
8091
8092 if (GET_CODE (PATTERN (insn)) == COND_EXEC
8093 || get_attr_conds (insn) == CONDS_USE)
8094 {
8095 *cost = 0;
8096 return false;
8097 }
8098 }
8099
8100 return true;
8101 }
8102
8103 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
8104 It corrects the value of COST based on the relationship between
8105 INSN and DEP through the dependence LINK. It returns the new
8106 value. There is a per-core adjust_cost hook to adjust scheduler costs
8107 and the per-core hook can choose to completely override the generic
8108 adjust_cost function. Only put bits of code into arm_adjust_cost that
8109 are common across all cores. */
8110 static int
8111 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
8112 {
8113 rtx i_pat, d_pat;
8114
8115 /* When generating Thumb-1 code, we want to place flag-setting operations
8116 close to a conditional branch which depends on them, so that we can
8117 omit the comparison. */
8118 if (TARGET_THUMB1
8119 && REG_NOTE_KIND (link) == 0
8120 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
8121 && recog_memoized (dep) >= 0
8122 && get_attr_conds (dep) == CONDS_SET)
8123 return 0;
8124
8125 if (current_tune->sched_adjust_cost != NULL)
8126 {
8127 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
8128 return cost;
7708 } 8129 }
7709 8130
7710 /* XXX This is not strictly true for the FPA. */ 8131 /* XXX This is not strictly true for the FPA. */
7711 if (REG_NOTE_KIND (link) == REG_DEP_ANTI 8132 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
7712 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT) 8133 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
7726 /* This is a load after a store, there is no conflict if the load reads 8147 /* This is a load after a store, there is no conflict if the load reads
7727 from a cached area. Assume that loads from the stack, and from the 8148 from a cached area. Assume that loads from the stack, and from the
7728 constant pool are cached, and that others will miss. This is a 8149 constant pool are cached, and that others will miss. This is a
7729 hack. */ 8150 hack. */
7730 8151
7731 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem)) 8152 if ((GET_CODE (src_mem) == SYMBOL_REF
8153 && CONSTANT_POOL_ADDRESS_P (src_mem))
7732 || reg_mentioned_p (stack_pointer_rtx, src_mem) 8154 || reg_mentioned_p (stack_pointer_rtx, src_mem)
7733 || reg_mentioned_p (frame_pointer_rtx, src_mem) 8155 || reg_mentioned_p (frame_pointer_rtx, src_mem)
7734 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) 8156 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
7735 return 1; 8157 return 1;
7736 } 8158 }
8277 /* We can load this constant by using VDUP and a constant in a 8699 /* We can load this constant by using VDUP and a constant in a
8278 single ARM register. This will be cheaper than a vector 8700 single ARM register. This will be cheaper than a vector
8279 load. */ 8701 load. */
8280 8702
8281 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); 8703 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8282 return gen_rtx_UNSPEC (mode, gen_rtvec (1, x), 8704 return gen_rtx_VEC_DUPLICATE (mode, x);
8283 UNSPEC_VDUP_N);
8284 } 8705 }
8285 8706
8286 /* Generate code to load VALS, which is a PARALLEL containing only 8707 /* Generate code to load VALS, which is a PARALLEL containing only
8287 constants (for vec_init) or CONST_VECTOR, efficiently into a 8708 constants (for vec_init) or CONST_VECTOR, efficiently into a
8288 register. Returns an RTX to copy into the register, or NULL_RTX 8709 register. Returns an RTX to copy into the register, or NULL_RTX
8374 /* Splat a single non-constant element if we can. */ 8795 /* Splat a single non-constant element if we can. */
8375 if (all_same && GET_MODE_SIZE (inner_mode) <= 4) 8796 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
8376 { 8797 {
8377 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); 8798 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
8378 emit_insn (gen_rtx_SET (VOIDmode, target, 8799 emit_insn (gen_rtx_SET (VOIDmode, target,
8379 gen_rtx_UNSPEC (mode, gen_rtvec (1, x), 8800 gen_rtx_VEC_DUPLICATE (mode, x)));
8380 UNSPEC_VDUP_N)));
8381 return; 8801 return;
8382 } 8802 }
8383 8803
8384 /* One field is non-constant. Load constant then overwrite varying 8804 /* One field is non-constant. Load constant then overwrite varying
8385 field. This is more efficient than using the stack. */ 8805 field. This is more efficient than using the stack. */
8386 if (n_var == 1) 8806 if (n_var == 1)
8387 { 8807 {
8388 rtx copy = copy_rtx (vals); 8808 rtx copy = copy_rtx (vals);
8389 rtvec ops; 8809 rtx index = GEN_INT (one_var);
8390 8810
8391 /* Load constant part of vector, substitute neighboring value for 8811 /* Load constant part of vector, substitute neighboring value for
8392 varying element. */ 8812 varying element. */
8393 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); 8813 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
8394 neon_expand_vector_init (target, copy); 8814 neon_expand_vector_init (target, copy);
8395 8815
8396 /* Insert variable. */ 8816 /* Insert variable. */
8397 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); 8817 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
8398 ops = gen_rtvec (3, x, target, GEN_INT (one_var)); 8818 switch (mode)
8399 emit_insn (gen_rtx_SET (VOIDmode, target, 8819 {
8400 gen_rtx_UNSPEC (mode, ops, UNSPEC_VSET_LANE))); 8820 case V8QImode:
8821 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
8822 break;
8823 case V16QImode:
8824 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
8825 break;
8826 case V4HImode:
8827 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
8828 break;
8829 case V8HImode:
8830 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
8831 break;
8832 case V2SImode:
8833 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
8834 break;
8835 case V4SImode:
8836 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
8837 break;
8838 case V2SFmode:
8839 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
8840 break;
8841 case V4SFmode:
8842 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
8843 break;
8844 case V2DImode:
8845 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
8846 break;
8847 default:
8848 gcc_unreachable ();
8849 }
8401 return; 8850 return;
8402 } 8851 }
8403 8852
8404 /* Construct the vector in memory one field at a time 8853 /* Construct the vector in memory one field at a time
8405 and load the whole vector. */ 8854 and load the whole vector. */
8603 /* Match: (mem (reg)). */ 9052 /* Match: (mem (reg)). */
8604 if (GET_CODE (ind) == REG) 9053 if (GET_CODE (ind) == REG)
8605 return arm_address_register_rtx_p (ind, 0); 9054 return arm_address_register_rtx_p (ind, 0);
8606 9055
8607 /* Allow post-increment with Neon registers. */ 9056 /* Allow post-increment with Neon registers. */
8608 if (type != 1 && (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC)) 9057 if ((type != 1 && GET_CODE (ind) == POST_INC)
9058 || (type == 0 && GET_CODE (ind) == PRE_DEC))
8609 return arm_address_register_rtx_p (XEXP (ind, 0), 0); 9059 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
8610 9060
8611 /* FIXME: vld1 allows register post-modify. */ 9061 /* FIXME: vld1 allows register post-modify. */
8612 9062
8613 /* Match: 9063 /* Match:
9111 only be true for the ARM8, ARM9 and StrongARM. If this ever 9561 only be true for the ARM8, ARM9 and StrongARM. If this ever
9112 changes, then the test below needs to be reworked. */ 9562 changes, then the test below needs to be reworked. */
9113 if (nops == 2 && arm_ld_sched && add_offset != 0) 9563 if (nops == 2 && arm_ld_sched && add_offset != 0)
9114 return false; 9564 return false;
9115 9565
9566 /* XScale has load-store double instructions, but they have stricter
9567 alignment requirements than load-store multiple, so we cannot
9568 use them.
9569
9570 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9571 the pipeline until completion.
9572
9573 NREGS CYCLES
9574 1 3
9575 2 4
9576 3 5
9577 4 6
9578
9579 An ldr instruction takes 1-3 cycles, but does not block the
9580 pipeline.
9581
9582 NREGS CYCLES
9583 1 1-3
9584 2 2-6
9585 3 3-9
9586 4 4-12
9587
9588 Best case ldr will always win. However, the more ldr instructions
9589 we issue, the less likely we are to be able to schedule them well.
9590 Using ldr instructions also increases code size.
9591
9592 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9593 for counts of 3 or 4 regs. */
9594 if (nops <= 2 && arm_tune_xscale && !optimize_size)
9595 return false;
9116 return true; 9596 return true;
9117 } 9597 }
9118 9598
9119 /* Subroutine of load_multiple_sequence and store_multiple_sequence. 9599 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9120 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute 9600 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9154 return false; 9634 return false;
9155 } 9635 }
9156 return true; 9636 return true;
9157 } 9637 }
9158 9638
9159 int 9639 /* Used to determine in a peephole whether a sequence of load
9160 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base, 9640 instructions can be changed into a load-multiple instruction.
9161 HOST_WIDE_INT *load_offset) 9641 NOPS is the number of separate load instructions we are examining. The
9642 first NOPS entries in OPERANDS are the destination registers, the
9643 next NOPS entries are memory operands. If this function is
9644 successful, *BASE is set to the common base register of the memory
9645 accesses; *LOAD_OFFSET is set to the first memory location's offset
9646 from that base register.
9647 REGS is an array filled in with the destination register numbers.
9648 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
9649 insn numbers to to an ascending order of stores. If CHECK_REGS is true,
9650 the sequence of registers in REGS matches the loads from ascending memory
9651 locations, and the function verifies that the register numbers are
9652 themselves ascending. If CHECK_REGS is false, the register numbers
9653 are stored in the order they are found in the operands. */
9654 static int
9655 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
9656 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
9162 { 9657 {
9163 int unsorted_regs[MAX_LDM_STM_OPS]; 9658 int unsorted_regs[MAX_LDM_STM_OPS];
9164 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; 9659 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9165 int order[MAX_LDM_STM_OPS]; 9660 int order[MAX_LDM_STM_OPS];
9661 rtx base_reg_rtx = NULL;
9166 int base_reg = -1; 9662 int base_reg = -1;
9167 int i, ldm_case; 9663 int i, ldm_case;
9168 9664
9169 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be 9665 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9170 easily extended if required. */ 9666 easily extended if required. */
9204 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 9700 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9205 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) 9701 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9206 == CONST_INT))) 9702 == CONST_INT)))
9207 { 9703 {
9208 if (i == 0) 9704 if (i == 0)
9209 base_reg = REGNO (reg);
9210 else
9211 { 9705 {
9212 if (base_reg != (int) REGNO (reg)) 9706 base_reg = REGNO (reg);
9213 /* Not addressed from the same base register. */ 9707 base_reg_rtx = reg;
9708 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9214 return 0; 9709 return 0;
9215 } 9710 }
9711 else if (base_reg != (int) REGNO (reg))
9712 /* Not addressed from the same base register. */
9713 return 0;
9714
9216 unsorted_regs[i] = (GET_CODE (operands[i]) == REG 9715 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9217 ? REGNO (operands[i]) 9716 ? REGNO (operands[i])
9218 : REGNO (SUBREG_REG (operands[i]))); 9717 : REGNO (SUBREG_REG (operands[i])));
9219 9718
9220 /* If it isn't an integer register, or if it overwrites the 9719 /* If it isn't an integer register, or if it overwrites the
9221 base register but isn't the last insn in the list, then 9720 base register but isn't the last insn in the list, then
9222 we can't do this. */ 9721 we can't do this. */
9223 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14 9722 if (unsorted_regs[i] < 0
9723 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9724 || unsorted_regs[i] > 14
9224 || (i != nops - 1 && unsorted_regs[i] == base_reg)) 9725 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9225 return 0; 9726 return 0;
9226 9727
9227 unsorted_offsets[i] = INTVAL (offset); 9728 unsorted_offsets[i] = INTVAL (offset);
9228 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) 9729 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9236 /* All the useful information has now been extracted from the 9737 /* All the useful information has now been extracted from the
9237 operands into unsorted_regs and unsorted_offsets; additionally, 9738 operands into unsorted_regs and unsorted_offsets; additionally,
9238 order[0] has been set to the lowest offset in the list. Sort 9739 order[0] has been set to the lowest offset in the list. Sort
9239 the offsets into order, verifying that they are adjacent, and 9740 the offsets into order, verifying that they are adjacent, and
9240 check that the register numbers are ascending. */ 9741 check that the register numbers are ascending. */
9241 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs)) 9742 if (!compute_offset_order (nops, unsorted_offsets, order,
9743 check_regs ? unsorted_regs : NULL))
9242 return 0; 9744 return 0;
9243 9745
9746 if (saved_order)
9747 memcpy (saved_order, order, sizeof order);
9748
9244 if (base) 9749 if (base)
9245 { 9750 {
9246 *base = base_reg; 9751 *base = base_reg;
9247 9752
9248 for (i = 0; i < nops; i++) 9753 for (i = 0; i < nops; i++)
9249 regs[i] = unsorted_regs[order[i]]; 9754 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9250 9755
9251 *load_offset = unsorted_offsets[order[0]]; 9756 *load_offset = unsorted_offsets[order[0]];
9252 } 9757 }
9758
9759 if (TARGET_THUMB1
9760 && !peep2_reg_dead_p (nops, base_reg_rtx))
9761 return 0;
9253 9762
9254 if (unsorted_offsets[order[0]] == 0) 9763 if (unsorted_offsets[order[0]] == 0)
9255 ldm_case = 1; /* ldmia */ 9764 ldm_case = 1; /* ldmia */
9256 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) 9765 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9257 ldm_case = 2; /* ldmib */ 9766 ldm_case = 2; /* ldmib */
9258 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) 9767 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9259 ldm_case = 3; /* ldmda */ 9768 ldm_case = 3; /* ldmda */
9260 else if (unsorted_offsets[order[nops - 1]] == -4) 9769 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9261 ldm_case = 4; /* ldmdb */ 9770 ldm_case = 4; /* ldmdb */
9262 else if (const_ok_for_arm (unsorted_offsets[order[0]]) 9771 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9263 || const_ok_for_arm (-unsorted_offsets[order[0]])) 9772 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9264 ldm_case = 5; 9773 ldm_case = 5;
9265 else 9774 else
9271 return 0; 9780 return 0;
9272 9781
9273 return ldm_case; 9782 return ldm_case;
9274 } 9783 }
9275 9784
9276 const char * 9785 /* Used to determine in a peephole whether a sequence of store instructions can
9277 emit_ldm_seq (rtx *operands, int nops) 9786 be changed into a store-multiple instruction.
9278 { 9787 NOPS is the number of separate store instructions we are examining.
9279 int regs[MAX_LDM_STM_OPS]; 9788 NOPS_TOTAL is the total number of instructions recognized by the peephole
9280 int base_reg; 9789 pattern.
9281 HOST_WIDE_INT offset; 9790 The first NOPS entries in OPERANDS are the source registers, the next
9282 char buf[100]; 9791 NOPS entries are memory operands. If this function is successful, *BASE is
9283 int i; 9792 set to the common base register of the memory accesses; *LOAD_OFFSET is set
9284 9793 to the first memory location's offset from that base register. REGS is an
9285 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset)) 9794 array filled in with the source register numbers, REG_RTXS (if nonnull) is
9286 { 9795 likewise filled with the corresponding rtx's.
9287 case 1: 9796 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
9288 strcpy (buf, "ldm%(ia%)\t"); 9797 numbers to to an ascending order of stores.
9289 break; 9798 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
9290 9799 from ascending memory locations, and the function verifies that the register
9291 case 2: 9800 numbers are themselves ascending. If CHECK_REGS is false, the register
9292 strcpy (buf, "ldm%(ib%)\t"); 9801 numbers are stored in the order they are found in the operands. */
9293 break; 9802 static int
9294 9803 store_multiple_sequence (rtx *operands, int nops, int nops_total,
9295 case 3: 9804 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
9296 strcpy (buf, "ldm%(da%)\t"); 9805 HOST_WIDE_INT *load_offset, bool check_regs)
9297 break;
9298
9299 case 4:
9300 strcpy (buf, "ldm%(db%)\t");
9301 break;
9302
9303 case 5:
9304 if (offset >= 0)
9305 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9306 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9307 (long) offset);
9308 else
9309 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
9310 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
9311 (long) -offset);
9312 output_asm_insn (buf, operands);
9313 base_reg = regs[0];
9314 strcpy (buf, "ldm%(ia%)\t");
9315 break;
9316
9317 default:
9318 gcc_unreachable ();
9319 }
9320
9321 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9322 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9323
9324 for (i = 1; i < nops; i++)
9325 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9326 reg_names[regs[i]]);
9327
9328 strcat (buf, "}\t%@ phole ldm");
9329
9330 output_asm_insn (buf, operands);
9331 return "";
9332 }
9333
9334 int
9335 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9336 HOST_WIDE_INT * load_offset)
9337 { 9806 {
9338 int unsorted_regs[MAX_LDM_STM_OPS]; 9807 int unsorted_regs[MAX_LDM_STM_OPS];
9808 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
9339 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; 9809 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9340 int order[MAX_LDM_STM_OPS]; 9810 int order[MAX_LDM_STM_OPS];
9341 int base_reg = -1; 9811 int base_reg = -1;
9812 rtx base_reg_rtx = NULL;
9342 int i, stm_case; 9813 int i, stm_case;
9343 9814
9344 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be 9815 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9345 easily extended if required. */ 9816 easily extended if required. */
9346 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); 9817 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9378 || (GET_CODE (reg) == SUBREG 9849 || (GET_CODE (reg) == SUBREG
9379 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 9850 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9380 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) 9851 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9381 == CONST_INT))) 9852 == CONST_INT)))
9382 { 9853 {
9383 unsorted_regs[i] = (GET_CODE (operands[i]) == REG 9854 unsorted_reg_rtxs[i] = (GET_CODE (operands[i]) == REG
9384 ? REGNO (operands[i]) 9855 ? operands[i] : SUBREG_REG (operands[i]));
9385 : REGNO (SUBREG_REG (operands[i]))); 9856 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
9857
9386 if (i == 0) 9858 if (i == 0)
9387 base_reg = REGNO (reg); 9859 {
9860 base_reg = REGNO (reg);
9861 base_reg_rtx = reg;
9862 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
9863 return 0;
9864 }
9388 else if (base_reg != (int) REGNO (reg)) 9865 else if (base_reg != (int) REGNO (reg))
9389 /* Not addressed from the same base register. */ 9866 /* Not addressed from the same base register. */
9390 return 0; 9867 return 0;
9391 9868
9392 /* If it isn't an integer register, then we can't do this. */ 9869 /* If it isn't an integer register, then we can't do this. */
9393 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14) 9870 if (unsorted_regs[i] < 0
9871 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
9872 || (TARGET_THUMB2 && unsorted_regs[i] == base_reg)
9873 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
9874 || unsorted_regs[i] > 14)
9394 return 0; 9875 return 0;
9395 9876
9396 unsorted_offsets[i] = INTVAL (offset); 9877 unsorted_offsets[i] = INTVAL (offset);
9397 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) 9878 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9398 order[0] = i; 9879 order[0] = i;
9405 /* All the useful information has now been extracted from the 9886 /* All the useful information has now been extracted from the
9406 operands into unsorted_regs and unsorted_offsets; additionally, 9887 operands into unsorted_regs and unsorted_offsets; additionally,
9407 order[0] has been set to the lowest offset in the list. Sort 9888 order[0] has been set to the lowest offset in the list. Sort
9408 the offsets into order, verifying that they are adjacent, and 9889 the offsets into order, verifying that they are adjacent, and
9409 check that the register numbers are ascending. */ 9890 check that the register numbers are ascending. */
9410 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs)) 9891 if (!compute_offset_order (nops, unsorted_offsets, order,
9892 check_regs ? unsorted_regs : NULL))
9411 return 0; 9893 return 0;
9412 9894
9895 if (saved_order)
9896 memcpy (saved_order, order, sizeof order);
9897
9413 if (base) 9898 if (base)
9414 { 9899 {
9415 *base = base_reg; 9900 *base = base_reg;
9416 9901
9417 for (i = 0; i < nops; i++) 9902 for (i = 0; i < nops; i++)
9418 regs[i] = unsorted_regs[order[i]]; 9903 {
9904 regs[i] = unsorted_regs[check_regs ? order[i] : i];
9905 if (reg_rtxs)
9906 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
9907 }
9419 9908
9420 *load_offset = unsorted_offsets[order[0]]; 9909 *load_offset = unsorted_offsets[order[0]];
9421 } 9910 }
9911
9912 if (TARGET_THUMB1
9913 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
9914 return 0;
9422 9915
9423 if (unsorted_offsets[order[0]] == 0) 9916 if (unsorted_offsets[order[0]] == 0)
9424 stm_case = 1; /* stmia */ 9917 stm_case = 1; /* stmia */
9425 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) 9918 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9426 stm_case = 2; /* stmib */ 9919 stm_case = 2; /* stmib */
9427 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) 9920 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9428 stm_case = 3; /* stmda */ 9921 stm_case = 3; /* stmda */
9429 else if (unsorted_offsets[order[nops - 1]] == -4) 9922 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
9430 stm_case = 4; /* stmdb */ 9923 stm_case = 4; /* stmdb */
9431 else 9924 else
9432 return 0; 9925 return 0;
9433 9926
9434 if (!multiple_operation_profitable_p (false, nops, 0)) 9927 if (!multiple_operation_profitable_p (false, nops, 0))
9435 return 0; 9928 return 0;
9436 9929
9437 return stm_case; 9930 return stm_case;
9438 } 9931 }
9439
9440 const char *
9441 emit_stm_seq (rtx *operands, int nops)
9442 {
9443 int regs[MAX_LDM_STM_OPS];
9444 int base_reg;
9445 HOST_WIDE_INT offset;
9446 char buf[100];
9447 int i;
9448
9449 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
9450 {
9451 case 1:
9452 strcpy (buf, "stm%(ia%)\t");
9453 break;
9454
9455 case 2:
9456 strcpy (buf, "stm%(ib%)\t");
9457 break;
9458
9459 case 3:
9460 strcpy (buf, "stm%(da%)\t");
9461 break;
9462
9463 case 4:
9464 strcpy (buf, "stm%(db%)\t");
9465 break;
9466
9467 default:
9468 gcc_unreachable ();
9469 }
9470
9471 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
9472 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
9473
9474 for (i = 1; i < nops; i++)
9475 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
9476 reg_names[regs[i]]);
9477
9478 strcat (buf, "}\t%@ phole stm");
9479
9480 output_asm_insn (buf, operands);
9481 return "";
9482 }
9483 9932
9484 /* Routines for use in generating RTL. */ 9933 /* Routines for use in generating RTL. */
9485 9934
9486 rtx 9935 /* Generate a load-multiple instruction. COUNT is the number of loads in
9487 arm_gen_load_multiple (int base_regno, int count, rtx from, int up, 9936 the instruction; REGS and MEMS are arrays containing the operands.
9488 int write_back, rtx basemem, HOST_WIDE_INT *offsetp) 9937 BASEREG is the base register to be used in addressing the memory operands.
9489 { 9938 WBACK_OFFSET is nonzero if the instruction should update the base
9490 HOST_WIDE_INT offset = *offsetp; 9939 register. */
9940
9941 static rtx
9942 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9943 HOST_WIDE_INT wback_offset)
9944 {
9491 int i = 0, j; 9945 int i = 0, j;
9492 rtx result; 9946 rtx result;
9493 int sign = up ? 1 : -1; 9947
9494 rtx mem, addr; 9948 if (!multiple_operation_profitable_p (false, count, 0))
9495
9496 /* XScale has load-store double instructions, but they have stricter
9497 alignment requirements than load-store multiple, so we cannot
9498 use them.
9499
9500 For XScale ldm requires 2 + NREGS cycles to complete and blocks
9501 the pipeline until completion.
9502
9503 NREGS CYCLES
9504 1 3
9505 2 4
9506 3 5
9507 4 6
9508
9509 An ldr instruction takes 1-3 cycles, but does not block the
9510 pipeline.
9511
9512 NREGS CYCLES
9513 1 1-3
9514 2 2-6
9515 3 3-9
9516 4 4-12
9517
9518 Best case ldr will always win. However, the more ldr instructions
9519 we issue, the less likely we are to be able to schedule them well.
9520 Using ldr instructions also increases code size.
9521
9522 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
9523 for counts of 3 or 4 regs. */
9524 if (arm_tune_xscale && count <= 2 && ! optimize_size)
9525 { 9949 {
9526 rtx seq; 9950 rtx seq;
9527 9951
9528 start_sequence (); 9952 start_sequence ();
9529 9953
9530 for (i = 0; i < count; i++) 9954 for (i = 0; i < count; i++)
9531 { 9955 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
9532 addr = plus_constant (from, i * 4 * sign); 9956
9533 mem = adjust_automodify_address (basemem, SImode, addr, offset); 9957 if (wback_offset != 0)
9534 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem); 9958 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
9535 offset += 4 * sign;
9536 }
9537
9538 if (write_back)
9539 {
9540 emit_move_insn (from, plus_constant (from, count * 4 * sign));
9541 *offsetp = offset;
9542 }
9543 9959
9544 seq = get_insns (); 9960 seq = get_insns ();
9545 end_sequence (); 9961 end_sequence ();
9546 9962
9547 return seq; 9963 return seq;
9548 } 9964 }
9549 9965
9550 result = gen_rtx_PARALLEL (VOIDmode, 9966 result = gen_rtx_PARALLEL (VOIDmode,
9551 rtvec_alloc (count + (write_back ? 1 : 0))); 9967 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
9552 if (write_back) 9968 if (wback_offset != 0)
9553 { 9969 {
9554 XVECEXP (result, 0, 0) 9970 XVECEXP (result, 0, 0)
9555 = gen_rtx_SET (VOIDmode, from, plus_constant (from, count * 4 * sign)); 9971 = gen_rtx_SET (VOIDmode, basereg,
9972 plus_constant (basereg, wback_offset));
9556 i = 1; 9973 i = 1;
9557 count++; 9974 count++;
9558 } 9975 }
9559 9976
9560 for (j = 0; i < count; i++, j++) 9977 for (j = 0; i < count; i++, j++)
9561 { 9978 XVECEXP (result, 0, i)
9562 addr = plus_constant (from, j * 4 * sign); 9979 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, regs[j]), mems[j]);
9563 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset); 9980
9564 XVECEXP (result, 0, i) 9981 return result;
9565 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem); 9982 }
9566 offset += 4 * sign; 9983
9984 /* Generate a store-multiple instruction. COUNT is the number of stores in
9985 the instruction; REGS and MEMS are arrays containing the operands.
9986 BASEREG is the base register to be used in addressing the memory operands.
9987 WBACK_OFFSET is nonzero if the instruction should update the base
9988 register. */
9989
9990 static rtx
9991 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
9992 HOST_WIDE_INT wback_offset)
9993 {
9994 int i = 0, j;
9995 rtx result;
9996
9997 if (GET_CODE (basereg) == PLUS)
9998 basereg = XEXP (basereg, 0);
9999
10000 if (!multiple_operation_profitable_p (false, count, 0))
10001 {
10002 rtx seq;
10003
10004 start_sequence ();
10005
10006 for (i = 0; i < count; i++)
10007 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
10008
10009 if (wback_offset != 0)
10010 emit_move_insn (basereg, plus_constant (basereg, wback_offset));
10011
10012 seq = get_insns ();
10013 end_sequence ();
10014
10015 return seq;
10016 }
10017
10018 result = gen_rtx_PARALLEL (VOIDmode,
10019 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
10020 if (wback_offset != 0)
10021 {
10022 XVECEXP (result, 0, 0)
10023 = gen_rtx_SET (VOIDmode, basereg,
10024 plus_constant (basereg, wback_offset));
10025 i = 1;
10026 count++;
10027 }
10028
10029 for (j = 0; i < count; i++, j++)
10030 XVECEXP (result, 0, i)
10031 = gen_rtx_SET (VOIDmode, mems[j], gen_rtx_REG (SImode, regs[j]));
10032
10033 return result;
10034 }
10035
10036 /* Generate either a load-multiple or a store-multiple instruction. This
10037 function can be used in situations where we can start with a single MEM
10038 rtx and adjust its address upwards.
10039 COUNT is the number of operations in the instruction, not counting a
10040 possible update of the base register. REGS is an array containing the
10041 register operands.
10042 BASEREG is the base register to be used in addressing the memory operands,
10043 which are constructed from BASEMEM.
10044 WRITE_BACK specifies whether the generated instruction should include an
10045 update of the base register.
10046 OFFSETP is used to pass an offset to and from this function; this offset
10047 is not used when constructing the address (instead BASEMEM should have an
10048 appropriate offset in its address), it is used only for setting
10049 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
10050
10051 static rtx
10052 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
10053 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
10054 {
10055 rtx mems[MAX_LDM_STM_OPS];
10056 HOST_WIDE_INT offset = *offsetp;
10057 int i;
10058
10059 gcc_assert (count <= MAX_LDM_STM_OPS);
10060
10061 if (GET_CODE (basereg) == PLUS)
10062 basereg = XEXP (basereg, 0);
10063
10064 for (i = 0; i < count; i++)
10065 {
10066 rtx addr = plus_constant (basereg, i * 4);
10067 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
10068 offset += 4;
9567 } 10069 }
9568 10070
9569 if (write_back) 10071 if (write_back)
9570 *offsetp = offset; 10072 *offsetp = offset;
9571 10073
9572 return result; 10074 if (is_load)
10075 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
10076 write_back ? 4 * count : 0);
10077 else
10078 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
10079 write_back ? 4 * count : 0);
9573 } 10080 }
9574 10081
9575 rtx 10082 rtx
9576 arm_gen_store_multiple (int base_regno, int count, rtx to, int up, 10083 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
9577 int write_back, rtx basemem, HOST_WIDE_INT *offsetp) 10084 rtx basemem, HOST_WIDE_INT *offsetp)
9578 { 10085 {
9579 HOST_WIDE_INT offset = *offsetp; 10086 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
9580 int i = 0, j; 10087 offsetp);
9581 rtx result; 10088 }
9582 int sign = up ? 1 : -1; 10089
9583 rtx mem, addr; 10090 rtx
9584 10091 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
9585 /* See arm_gen_load_multiple for discussion of 10092 rtx basemem, HOST_WIDE_INT *offsetp)
9586 the pros/cons of ldm/stm usage for XScale. */ 10093 {
9587 if (arm_tune_xscale && count <= 2 && ! optimize_size) 10094 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
9588 { 10095 offsetp);
9589 rtx seq; 10096 }
9590 10097
9591 start_sequence (); 10098 /* Called from a peephole2 expander to turn a sequence of loads into an
9592 10099 LDM instruction. OPERANDS are the operands found by the peephole matcher;
9593 for (i = 0; i < count; i++) 10100 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
9594 { 10101 is true if we can reorder the registers because they are used commutatively
9595 addr = plus_constant (to, i * 4 * sign); 10102 subsequently.
9596 mem = adjust_automodify_address (basemem, SImode, addr, offset); 10103 Returns true iff we could generate a new instruction. */
9597 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i)); 10104
9598 offset += 4 * sign; 10105 bool
9599 } 10106 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
9600 10107 {
9601 if (write_back) 10108 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
9602 { 10109 rtx mems[MAX_LDM_STM_OPS];
9603 emit_move_insn (to, plus_constant (to, count * 4 * sign)); 10110 int i, j, base_reg;
9604 *offsetp = offset; 10111 rtx base_reg_rtx;
9605 } 10112 HOST_WIDE_INT offset;
9606 10113 int write_back = FALSE;
9607 seq = get_insns (); 10114 int ldm_case;
9608 end_sequence (); 10115 rtx addr;
9609 10116
9610 return seq; 10117 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
9611 } 10118 &base_reg, &offset, !sort_regs);
9612 10119
9613 result = gen_rtx_PARALLEL (VOIDmode, 10120 if (ldm_case == 0)
9614 rtvec_alloc (count + (write_back ? 1 : 0))); 10121 return false;
9615 if (write_back) 10122
9616 { 10123 if (sort_regs)
9617 XVECEXP (result, 0, 0) 10124 for (i = 0; i < nops - 1; i++)
9618 = gen_rtx_SET (VOIDmode, to, 10125 for (j = i + 1; j < nops; j++)
9619 plus_constant (to, count * 4 * sign)); 10126 if (regs[i] > regs[j])
9620 i = 1; 10127 {
9621 count++; 10128 int t = regs[i];
9622 } 10129 regs[i] = regs[j];
9623 10130 regs[j] = t;
9624 for (j = 0; i < count; i++, j++) 10131 }
9625 { 10132 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
9626 addr = plus_constant (to, j * 4 * sign); 10133
9627 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset); 10134 if (TARGET_THUMB1)
9628 XVECEXP (result, 0, i) 10135 {
9629 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j)); 10136 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
9630 offset += 4 * sign; 10137 gcc_assert (ldm_case == 1 || ldm_case == 5);
9631 } 10138 write_back = TRUE;
9632 10139 }
9633 if (write_back) 10140
9634 *offsetp = offset; 10141 if (ldm_case == 5)
9635 10142 {
9636 return result; 10143 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
10144 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
10145 offset = 0;
10146 if (!TARGET_THUMB1)
10147 {
10148 base_reg = regs[0];
10149 base_reg_rtx = newbase;
10150 }
10151 }
10152
10153 for (i = 0; i < nops; i++)
10154 {
10155 addr = plus_constant (base_reg_rtx, offset + i * 4);
10156 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10157 SImode, addr, 0);
10158 }
10159 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
10160 write_back ? offset + i * 4 : 0));
10161 return true;
10162 }
10163
10164 /* Called from a peephole2 expander to turn a sequence of stores into an
10165 STM instruction. OPERANDS are the operands found by the peephole matcher;
10166 NOPS indicates how many separate stores we are trying to combine.
10167 Returns true iff we could generate a new instruction. */
10168
10169 bool
10170 gen_stm_seq (rtx *operands, int nops)
10171 {
10172 int i;
10173 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10174 rtx mems[MAX_LDM_STM_OPS];
10175 int base_reg;
10176 rtx base_reg_rtx;
10177 HOST_WIDE_INT offset;
10178 int write_back = FALSE;
10179 int stm_case;
10180 rtx addr;
10181 bool base_reg_dies;
10182
10183 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
10184 mem_order, &base_reg, &offset, true);
10185
10186 if (stm_case == 0)
10187 return false;
10188
10189 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10190
10191 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
10192 if (TARGET_THUMB1)
10193 {
10194 gcc_assert (base_reg_dies);
10195 write_back = TRUE;
10196 }
10197
10198 if (stm_case == 5)
10199 {
10200 gcc_assert (base_reg_dies);
10201 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10202 offset = 0;
10203 }
10204
10205 addr = plus_constant (base_reg_rtx, offset);
10206
10207 for (i = 0; i < nops; i++)
10208 {
10209 addr = plus_constant (base_reg_rtx, offset + i * 4);
10210 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10211 SImode, addr, 0);
10212 }
10213 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
10214 write_back ? offset + i * 4 : 0));
10215 return true;
10216 }
10217
10218 /* Called from a peephole2 expander to turn a sequence of stores that are
10219 preceded by constant loads into an STM instruction. OPERANDS are the
10220 operands found by the peephole matcher; NOPS indicates how many
10221 separate stores we are trying to combine; there are 2 * NOPS
10222 instructions in the peephole.
10223 Returns true iff we could generate a new instruction. */
10224
10225 bool
10226 gen_const_stm_seq (rtx *operands, int nops)
10227 {
10228 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
10229 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
10230 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
10231 rtx mems[MAX_LDM_STM_OPS];
10232 int base_reg;
10233 rtx base_reg_rtx;
10234 HOST_WIDE_INT offset;
10235 int write_back = FALSE;
10236 int stm_case;
10237 rtx addr;
10238 bool base_reg_dies;
10239 int i, j;
10240 HARD_REG_SET allocated;
10241
10242 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
10243 mem_order, &base_reg, &offset, false);
10244
10245 if (stm_case == 0)
10246 return false;
10247
10248 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
10249
10250 /* If the same register is used more than once, try to find a free
10251 register. */
10252 CLEAR_HARD_REG_SET (allocated);
10253 for (i = 0; i < nops; i++)
10254 {
10255 for (j = i + 1; j < nops; j++)
10256 if (regs[i] == regs[j])
10257 {
10258 rtx t = peep2_find_free_register (0, nops * 2,
10259 TARGET_THUMB1 ? "l" : "r",
10260 SImode, &allocated);
10261 if (t == NULL_RTX)
10262 return false;
10263 reg_rtxs[i] = t;
10264 regs[i] = REGNO (t);
10265 }
10266 }
10267
10268 /* Compute an ordering that maps the register numbers to an ascending
10269 sequence. */
10270 reg_order[0] = 0;
10271 for (i = 0; i < nops; i++)
10272 if (regs[i] < regs[reg_order[0]])
10273 reg_order[0] = i;
10274
10275 for (i = 1; i < nops; i++)
10276 {
10277 int this_order = reg_order[i - 1];
10278 for (j = 0; j < nops; j++)
10279 if (regs[j] > regs[reg_order[i - 1]]
10280 && (this_order == reg_order[i - 1]
10281 || regs[j] < regs[this_order]))
10282 this_order = j;
10283 reg_order[i] = this_order;
10284 }
10285
10286 /* Ensure that registers that must be live after the instruction end
10287 up with the correct value. */
10288 for (i = 0; i < nops; i++)
10289 {
10290 int this_order = reg_order[i];
10291 if ((this_order != mem_order[i]
10292 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
10293 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
10294 return false;
10295 }
10296
10297 /* Load the constants. */
10298 for (i = 0; i < nops; i++)
10299 {
10300 rtx op = operands[2 * nops + mem_order[i]];
10301 sorted_regs[i] = regs[reg_order[i]];
10302 emit_move_insn (reg_rtxs[reg_order[i]], op);
10303 }
10304
10305 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
10306
10307 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
10308 if (TARGET_THUMB1)
10309 {
10310 gcc_assert (base_reg_dies);
10311 write_back = TRUE;
10312 }
10313
10314 if (stm_case == 5)
10315 {
10316 gcc_assert (base_reg_dies);
10317 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
10318 offset = 0;
10319 }
10320
10321 addr = plus_constant (base_reg_rtx, offset);
10322
10323 for (i = 0; i < nops; i++)
10324 {
10325 addr = plus_constant (base_reg_rtx, offset + i * 4);
10326 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
10327 SImode, addr, 0);
10328 }
10329 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
10330 write_back ? offset + i * 4 : 0));
10331 return true;
9637 } 10332 }
9638 10333
9639 int 10334 int
9640 arm_gen_movmemqi (rtx *operands) 10335 arm_gen_movmemqi (rtx *operands)
9641 { 10336 {
9667 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3); 10362 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
9668 10363
9669 for (i = 0; in_words_to_go >= 2; i+=4) 10364 for (i = 0; in_words_to_go >= 2; i+=4)
9670 { 10365 {
9671 if (in_words_to_go > 4) 10366 if (in_words_to_go > 4)
9672 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE, 10367 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
9673 srcbase, &srcoffset)); 10368 TRUE, srcbase, &srcoffset));
9674 else 10369 else
9675 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE, 10370 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
9676 FALSE, srcbase, &srcoffset)); 10371 src, FALSE, srcbase,
10372 &srcoffset));
9677 10373
9678 if (out_words_to_go) 10374 if (out_words_to_go)
9679 { 10375 {
9680 if (out_words_to_go > 4) 10376 if (out_words_to_go > 4)
9681 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE, 10377 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
9682 dstbase, &dstoffset)); 10378 TRUE, dstbase, &dstoffset));
9683 else if (out_words_to_go != 1) 10379 else if (out_words_to_go != 1)
9684 emit_insn (arm_gen_store_multiple (0, out_words_to_go, 10380 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
9685 dst, TRUE, 10381 out_words_to_go, dst,
9686 (last_bytes == 0 10382 (last_bytes == 0
9687 ? FALSE : TRUE), 10383 ? FALSE : TRUE),
9688 dstbase, &dstoffset)); 10384 dstbase, &dstoffset));
9689 else 10385 else
9690 { 10386 {
10008 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), 10704 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10009 INTVAL (XEXP (x, 2))); 10705 INTVAL (XEXP (x, 2)));
10010 10706
10011 /* Alternate canonicalizations of the above. These are somewhat cleaner. */ 10707 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
10012 if (GET_CODE (x) == AND 10708 if (GET_CODE (x) == AND
10709 && (op == EQ || op == NE)
10013 && COMPARISON_P (XEXP (x, 0)) 10710 && COMPARISON_P (XEXP (x, 0))
10014 && COMPARISON_P (XEXP (x, 1))) 10711 && COMPARISON_P (XEXP (x, 1)))
10015 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), 10712 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10016 DOM_CC_X_AND_Y); 10713 DOM_CC_X_AND_Y);
10017 10714
10018 if (GET_CODE (x) == IOR 10715 if (GET_CODE (x) == IOR
10716 && (op == EQ || op == NE)
10019 && COMPARISON_P (XEXP (x, 0)) 10717 && COMPARISON_P (XEXP (x, 0))
10020 && COMPARISON_P (XEXP (x, 1))) 10718 && COMPARISON_P (XEXP (x, 1)))
10021 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1), 10719 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
10022 DOM_CC_X_OR_Y); 10720 DOM_CC_X_OR_Y);
10023 10721
10055 if (GET_MODE (x) == SImode && (op == LTU || op == GEU) 10753 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
10056 && GET_CODE (x) == PLUS 10754 && GET_CODE (x) == PLUS
10057 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y))) 10755 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
10058 return CC_Cmode; 10756 return CC_Cmode;
10059 10757
10758 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
10759 {
10760 /* To keep things simple, always use the Cirrus cfcmp64 if it is
10761 available. */
10762 if (TARGET_ARM && TARGET_HARD_FLOAT && TARGET_MAVERICK)
10763 return CCmode;
10764
10765 switch (op)
10766 {
10767 case EQ:
10768 case NE:
10769 /* A DImode comparison against zero can be implemented by
10770 or'ing the two halves together. */
10771 if (y == const0_rtx)
10772 return CC_Zmode;
10773
10774 /* We can do an equality test in three Thumb instructions. */
10775 if (!TARGET_ARM)
10776 return CC_Zmode;
10777
10778 /* FALLTHROUGH */
10779
10780 case LTU:
10781 case LEU:
10782 case GTU:
10783 case GEU:
10784 /* DImode unsigned comparisons can be implemented by cmp +
10785 cmpeq without a scratch register. Not worth doing in
10786 Thumb-2. */
10787 if (TARGET_ARM)
10788 return CC_CZmode;
10789
10790 /* FALLTHROUGH */
10791
10792 case LT:
10793 case LE:
10794 case GT:
10795 case GE:
10796 /* DImode signed and unsigned comparisons can be implemented
10797 by cmp + sbcs with a scratch register, but that does not
10798 set the Z flag - we must reverse GT/LE/GTU/LEU. */
10799 gcc_assert (op != EQ && op != NE);
10800 return CC_NCVmode;
10801
10802 default:
10803 gcc_unreachable ();
10804 }
10805 }
10806
10060 return CCmode; 10807 return CCmode;
10061 } 10808 }
10062 10809
10063 /* X and Y are two things to compare using CODE. Emit the compare insn and 10810 /* X and Y are two things to compare using CODE. Emit the compare insn and
10064 return the rtx for register 0 in the proper mode. FP means this is a 10811 return the rtx for register 0 in the proper mode. FP means this is a
10065 floating point compare: I don't think that it is needed on the arm. */ 10812 floating point compare: I don't think that it is needed on the arm. */
10066 rtx 10813 rtx
10067 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y) 10814 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
10068 { 10815 {
10069 enum machine_mode mode = SELECT_CC_MODE (code, x, y); 10816 enum machine_mode mode;
10070 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); 10817 rtx cc_reg;
10071 10818 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
10072 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); 10819
10820 /* We might have X as a constant, Y as a register because of the predicates
10821 used for cmpdi. If so, force X to a register here. */
10822 if (dimode_comparison && !REG_P (x))
10823 x = force_reg (DImode, x);
10824
10825 mode = SELECT_CC_MODE (code, x, y);
10826 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
10827
10828 if (dimode_comparison
10829 && !(TARGET_HARD_FLOAT && TARGET_MAVERICK)
10830 && mode != CC_CZmode)
10831 {
10832 rtx clobber, set;
10833
10834 /* To compare two non-zero values for equality, XOR them and
10835 then compare against zero. Not used for ARM mode; there
10836 CC_CZmode is cheaper. */
10837 if (mode == CC_Zmode && y != const0_rtx)
10838 {
10839 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
10840 y = const0_rtx;
10841 }
10842 /* A scratch register is required. */
10843 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (SImode));
10844 set = gen_rtx_SET (VOIDmode, cc_reg, gen_rtx_COMPARE (mode, x, y));
10845 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
10846 }
10847 else
10848 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
10073 10849
10074 return cc_reg; 10850 return cc_reg;
10075 } 10851 }
10076 10852
10077 /* Generate a sequence of insns that will generate the correct return 10853 /* Generate a sequence of insns that will generate the correct return
11396 return true; 12172 return true;
11397 12173
11398 return false; 12174 return false;
11399 } 12175 }
11400 12176
12177 /* Return true if it is possible to inline both the high and low parts
12178 of a 64-bit constant into 32-bit data processing instructions. */
12179 bool
12180 arm_const_double_by_immediates (rtx val)
12181 {
12182 enum machine_mode mode = GET_MODE (val);
12183 rtx part;
12184
12185 if (mode == VOIDmode)
12186 mode = DImode;
12187
12188 part = gen_highpart_mode (SImode, mode, val);
12189
12190 gcc_assert (GET_CODE (part) == CONST_INT);
12191
12192 if (!const_ok_for_arm (INTVAL (part)))
12193 return false;
12194
12195 part = gen_lowpart (SImode, val);
12196
12197 gcc_assert (GET_CODE (part) == CONST_INT);
12198
12199 if (!const_ok_for_arm (INTVAL (part)))
12200 return false;
12201
12202 return true;
12203 }
12204
11401 /* Scan INSN and note any of its operands that need fixing. 12205 /* Scan INSN and note any of its operands that need fixing.
11402 If DO_PUSHES is false we do not actually push any of the fixups 12206 If DO_PUSHES is false we do not actually push any of the fixups
11403 needed. The function returns TRUE if any fixups were needed/pushed. 12207 needed. The function returns TRUE if any fixups were needed/pushed.
11404 This is used by arm_memory_load_p() which needs to know about loads 12208 This is used by arm_memory_load_p() which needs to know about loads
11405 of constants that will be converted into minipool loads. */ 12209 of constants that will be converted into minipool loads. */
11469 } 12273 }
11470 12274
11471 return result; 12275 return result;
11472 } 12276 }
11473 12277
12278 /* Convert instructions to their cc-clobbering variant if possible, since
12279 that allows us to use smaller encodings. */
12280
12281 static void
12282 thumb2_reorg (void)
12283 {
12284 basic_block bb;
12285 regset_head live;
12286
12287 INIT_REG_SET (&live);
12288
12289 /* We are freeing block_for_insn in the toplev to keep compatibility
12290 with old MDEP_REORGS that are not CFG based. Recompute it now. */
12291 compute_bb_for_insn ();
12292 df_analyze ();
12293
12294 FOR_EACH_BB (bb)
12295 {
12296 rtx insn;
12297
12298 COPY_REG_SET (&live, DF_LR_OUT (bb));
12299 df_simulate_initialize_backwards (bb, &live);
12300 FOR_BB_INSNS_REVERSE (bb, insn)
12301 {
12302 if (NONJUMP_INSN_P (insn)
12303 && !REGNO_REG_SET_P (&live, CC_REGNUM))
12304 {
12305 rtx pat = PATTERN (insn);
12306 if (GET_CODE (pat) == SET
12307 && low_register_operand (XEXP (pat, 0), SImode)
12308 && thumb_16bit_operator (XEXP (pat, 1), SImode)
12309 && low_register_operand (XEXP (XEXP (pat, 1), 0), SImode)
12310 && low_register_operand (XEXP (XEXP (pat, 1), 1), SImode))
12311 {
12312 rtx dst = XEXP (pat, 0);
12313 rtx src = XEXP (pat, 1);
12314 rtx op0 = XEXP (src, 0);
12315 rtx op1 = (GET_RTX_CLASS (GET_CODE (src)) == RTX_COMM_ARITH
12316 ? XEXP (src, 1) : NULL);
12317
12318 if (rtx_equal_p (dst, op0)
12319 || GET_CODE (src) == PLUS || GET_CODE (src) == MINUS)
12320 {
12321 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12322 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12323 rtvec vec = gen_rtvec (2, pat, clobber);
12324
12325 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12326 INSN_CODE (insn) = -1;
12327 }
12328 /* We can also handle a commutative operation where the
12329 second operand matches the destination. */
12330 else if (op1 && rtx_equal_p (dst, op1))
12331 {
12332 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
12333 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
12334 rtvec vec;
12335
12336 src = copy_rtx (src);
12337 XEXP (src, 0) = op1;
12338 XEXP (src, 1) = op0;
12339 pat = gen_rtx_SET (VOIDmode, dst, src);
12340 vec = gen_rtvec (2, pat, clobber);
12341 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
12342 INSN_CODE (insn) = -1;
12343 }
12344 }
12345 }
12346
12347 if (NONDEBUG_INSN_P (insn))
12348 df_simulate_one_insn_backwards (bb, insn, &live);
12349 }
12350 }
12351
12352 CLEAR_REG_SET (&live);
12353 }
12354
11474 /* Gcc puts the pool in the wrong place for ARM, since we can only 12355 /* Gcc puts the pool in the wrong place for ARM, since we can only
11475 load addresses a limited distance around the pc. We do some 12356 load addresses a limited distance around the pc. We do some
11476 special munging to move the constant pool values to the correct 12357 special munging to move the constant pool values to the correct
11477 point in the code. */ 12358 point in the code. */
11478 static void 12359 static void
11480 { 12361 {
11481 rtx insn; 12362 rtx insn;
11482 HOST_WIDE_INT address = 0; 12363 HOST_WIDE_INT address = 0;
11483 Mfix * fix; 12364 Mfix * fix;
11484 12365
12366 if (TARGET_THUMB2)
12367 thumb2_reorg ();
12368
11485 minipool_fix_head = minipool_fix_tail = NULL; 12369 minipool_fix_head = minipool_fix_tail = NULL;
11486 12370
11487 /* The first insn must always be a note, or the code below won't 12371 /* The first insn must always be a note, or the code below won't
11488 scan it properly. */ 12372 scan it properly. */
11489 insn = get_insns (); 12373 insn = get_insns ();
12077 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands); 12961 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
12078 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops); 12962 output_asm_insn ("ldm%(fd%)\t%|sp!, {%0, %1}", ops);
12079 return ""; 12963 return "";
12080 } 12964 }
12081 12965
12082 /* Output a move between double words. 12966 /* Output a move between double words. It must be REG<-MEM
12083 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM 12967 or MEM<-REG. */
12084 or MEM<-REG and all MEMs must be offsettable addresses. */
12085 const char * 12968 const char *
12086 output_move_double (rtx *operands) 12969 output_move_double (rtx *operands)
12087 { 12970 {
12088 enum rtx_code code0 = GET_CODE (operands[0]); 12971 enum rtx_code code0 = GET_CODE (operands[0]);
12089 enum rtx_code code1 = GET_CODE (operands[1]); 12972 enum rtx_code code1 = GET_CODE (operands[1]);
12352 && (INTVAL(otherops[2]) <= -256 13235 && (INTVAL(otherops[2]) <= -256
12353 || INTVAL(otherops[2]) >= 256)) 13236 || INTVAL(otherops[2]) >= 256))
12354 { 13237 {
12355 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) 13238 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12356 { 13239 {
12357 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops); 13240 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
12358 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); 13241 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
12359 } 13242 }
12360 else 13243 else
12361 { 13244 {
12362 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops); 13245 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
12363 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops); 13246 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
12364 } 13247 }
12365 } 13248 }
12366 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY) 13249 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
12367 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops); 13250 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
12368 else 13251 else
12668 output_asm_insn (buff, ops); 13551 output_asm_insn (buff, ops);
12669 13552
12670 return ""; 13553 return "";
12671 } 13554 }
12672 13555
13556 /* Compute and return the length of neon_mov<mode>, where <mode> is
13557 one of VSTRUCT modes: EI, OI, CI or XI. */
13558 int
13559 arm_attr_length_move_neon (rtx insn)
13560 {
13561 rtx reg, mem, addr;
13562 int load;
13563 enum machine_mode mode;
13564
13565 extract_insn_cached (insn);
13566
13567 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
13568 {
13569 mode = GET_MODE (recog_data.operand[0]);
13570 switch (mode)
13571 {
13572 case EImode:
13573 case OImode:
13574 return 8;
13575 case CImode:
13576 return 12;
13577 case XImode:
13578 return 16;
13579 default:
13580 gcc_unreachable ();
13581 }
13582 }
13583
13584 load = REG_P (recog_data.operand[0]);
13585 reg = recog_data.operand[!load];
13586 mem = recog_data.operand[load];
13587
13588 gcc_assert (MEM_P (mem));
13589
13590 mode = GET_MODE (reg);
13591 addr = XEXP (mem, 0);
13592
13593 /* Strip off const from addresses like (const (plus (...))). */
13594 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
13595 addr = XEXP (addr, 0);
13596
13597 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
13598 {
13599 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
13600 return insns * 4;
13601 }
13602 else
13603 return 4;
13604 }
13605
13606 /* Return nonzero if the offset in the address is an immediate. Otherwise,
13607 return zero. */
13608
13609 int
13610 arm_address_offset_is_imm (rtx insn)
13611 {
13612 rtx mem, addr;
13613
13614 extract_insn_cached (insn);
13615
13616 if (REG_P (recog_data.operand[0]))
13617 return 0;
13618
13619 mem = recog_data.operand[0];
13620
13621 gcc_assert (MEM_P (mem));
13622
13623 addr = XEXP (mem, 0);
13624
13625 if (GET_CODE (addr) == REG
13626 || (GET_CODE (addr) == PLUS
13627 && GET_CODE (XEXP (addr, 0)) == REG
13628 && GET_CODE (XEXP (addr, 1)) == CONST_INT))
13629 return 1;
13630 else
13631 return 0;
13632 }
13633
12673 /* Output an ADD r, s, #n where n may be too big for one instruction. 13634 /* Output an ADD r, s, #n where n may be too big for one instruction.
12674 If adding zero to one register, output nothing. */ 13635 If adding zero to one register, output nothing. */
12675 const char * 13636 const char *
12676 output_add_immediate (rtx *operands) 13637 output_add_immediate (rtx *operands)
12677 { 13638 {
13825 && bit_count(saved_regs_mask) * 4 == count 14786 && bit_count(saved_regs_mask) * 4 == count
13826 && !IS_INTERRUPT (func_type) 14787 && !IS_INTERRUPT (func_type)
13827 && !crtl->tail_call_emit) 14788 && !crtl->tail_call_emit)
13828 { 14789 {
13829 unsigned long mask; 14790 unsigned long mask;
13830 mask = (1 << (arm_size_return_regs() / 4)) - 1; 14791 /* Preserve return values, of any size. */
14792 mask = (1 << ((arm_size_return_regs() + 3) / 4)) - 1;
13831 mask ^= 0xf; 14793 mask ^= 0xf;
13832 mask &= ~saved_regs_mask; 14794 mask &= ~saved_regs_mask;
13833 reg = 0; 14795 reg = 0;
13834 while (bit_count (mask) * 4 > amount) 14796 while (bit_count (mask) * 4 > amount)
13835 { 14797 {
14281 { 15243 {
14282 return !cfun->machine->lr_save_eliminated 15244 return !cfun->machine->lr_save_eliminated
14283 && (!leaf_function_p () 15245 && (!leaf_function_p ()
14284 || thumb_far_jump_used_p () 15246 || thumb_far_jump_used_p ()
14285 || df_regs_ever_live_p (LR_REGNUM)); 15247 || df_regs_ever_live_p (LR_REGNUM));
15248 }
15249
15250
15251 /* Return true if r3 is used by any of the tail call insns in the
15252 current function. */
15253
15254 static bool
15255 any_sibcall_uses_r3 (void)
15256 {
15257 edge_iterator ei;
15258 edge e;
15259
15260 if (!crtl->tail_call_emit)
15261 return false;
15262 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
15263 if (e->flags & EDGE_SIBCALL)
15264 {
15265 rtx call = BB_END (e->src);
15266 if (!CALL_P (call))
15267 call = prev_nonnote_nondebug_insn (call);
15268 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
15269 if (find_regno_fusage (call, USE, 3))
15270 return true;
15271 }
15272 return false;
14286 } 15273 }
14287 15274
14288 15275
14289 /* Compute the distance from register FROM to register TO. 15276 /* Compute the distance from register FROM to register TO.
14290 These can be the arg pointer (26), the soft frame pointer (25), 15277 These can be the arg pointer (26), the soft frame pointer (25),
14448 int reg = -1; 15435 int reg = -1;
14449 15436
14450 /* If it is safe to use r3, then do so. This sometimes 15437 /* If it is safe to use r3, then do so. This sometimes
14451 generates better code on Thumb-2 by avoiding the need to 15438 generates better code on Thumb-2 by avoiding the need to
14452 use 32-bit push/pop instructions. */ 15439 use 32-bit push/pop instructions. */
14453 if (!crtl->tail_call_emit 15440 if (! any_sibcall_uses_r3 ()
14454 && arm_size_return_regs () <= 12) 15441 && arm_size_return_regs () <= 12
15442 && (offsets->saved_regs_mask & (1 << 3)) == 0)
14455 { 15443 {
14456 reg = 3; 15444 reg = 3;
14457 } 15445 }
14458 else 15446 else
14459 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++) 15447 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
14956 stack_pointer_rtx, insn)); 15944 stack_pointer_rtx, insn));
14957 RTX_FRAME_RELATED_P (insn) = 1; 15945 RTX_FRAME_RELATED_P (insn) = 1;
14958 } 15946 }
14959 } 15947 }
14960 15948
15949 if (flag_stack_usage)
15950 current_function_static_stack_size
15951 = offsets->outgoing_args - offsets->saved_args;
15952
14961 if (offsets->outgoing_args != offsets->saved_args + saved_regs) 15953 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
14962 { 15954 {
14963 /* This add can produce multiple insns for a large constant, so we 15955 /* This add can produce multiple insns for a large constant, so we
14964 need to get tricky. */ 15956 need to get tricky. */
14965 rtx last = get_last_insn (); 15957 rtx last = get_last_insn ();
15003 the call to mcount. Similarly if the user has requested no 15995 the call to mcount. Similarly if the user has requested no
15004 scheduling in the prolog. Similarly if we want non-call exceptions 15996 scheduling in the prolog. Similarly if we want non-call exceptions
15005 using the EABI unwinder, to prevent faulting instructions from being 15997 using the EABI unwinder, to prevent faulting instructions from being
15006 swapped with a stack adjustment. */ 15998 swapped with a stack adjustment. */
15007 if (crtl->profile || !TARGET_SCHED_PROLOG 15999 if (crtl->profile || !TARGET_SCHED_PROLOG
15008 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions)) 16000 || (arm_except_unwind_info (&global_options) == UI_TARGET
16001 && cfun->can_throw_non_call_exceptions))
15009 emit_insn (gen_blockage ()); 16002 emit_insn (gen_blockage ());
15010 16003
15011 /* If the link register is being kept alive, with the return address in it, 16004 /* If the link register is being kept alive, with the return address in it,
15012 then make sure that it does not get reused by the ce2 pass. */ 16005 then make sure that it does not get reused by the ce2 pass. */
15013 if ((live_regs_mask & (1 << LR_REGNUM)) == 0) 16006 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
15062 doing this instruction unconditionally. 16055 doing this instruction unconditionally.
15063 If CODE is 'N' then X is a floating point operand that must be negated 16056 If CODE is 'N' then X is a floating point operand that must be negated
15064 before output. 16057 before output.
15065 If CODE is 'B' then output a bitwise inverted value of X (a const int). 16058 If CODE is 'B' then output a bitwise inverted value of X (a const int).
15066 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */ 16059 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
15067 void 16060 static void
15068 arm_print_operand (FILE *stream, rtx x, int code) 16061 arm_print_operand (FILE *stream, rtx x, int code)
15069 { 16062 {
15070 switch (code) 16063 switch (code)
15071 { 16064 {
15072 case '@': 16065 case '@':
15240 register, and the value from the higher address is put into the 16233 register, and the value from the higher address is put into the
15241 higher numbered register, the load will work regardless of whether 16234 higher numbered register, the load will work regardless of whether
15242 the value being loaded is big-wordian or little-wordian. The 16235 the value being loaded is big-wordian or little-wordian. The
15243 order of the two register loads can matter however, if the address 16236 order of the two register loads can matter however, if the address
15244 of the memory location is actually held in one of the registers 16237 of the memory location is actually held in one of the registers
15245 being overwritten by the load. */ 16238 being overwritten by the load.
16239
16240 The 'Q' and 'R' constraints are also available for 64-bit
16241 constants. */
15246 case 'Q': 16242 case 'Q':
16243 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16244 {
16245 rtx part = gen_lowpart (SImode, x);
16246 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16247 return;
16248 }
16249
15247 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) 16250 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15248 { 16251 {
15249 output_operand_lossage ("invalid operand for code '%c'", code); 16252 output_operand_lossage ("invalid operand for code '%c'", code);
15250 return; 16253 return;
15251 } 16254 }
15252 16255
15253 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0)); 16256 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
15254 return; 16257 return;
15255 16258
15256 case 'R': 16259 case 'R':
16260 if (GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST_DOUBLE)
16261 {
16262 enum machine_mode mode = GET_MODE (x);
16263 rtx part;
16264
16265 if (mode == VOIDmode)
16266 mode = DImode;
16267 part = gen_highpart_mode (SImode, mode, x);
16268 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
16269 return;
16270 }
16271
15257 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM) 16272 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
15258 { 16273 {
15259 output_operand_lossage ("invalid operand for code '%c'", code); 16274 output_operand_lossage ("invalid operand for code '%c'", code);
15260 return; 16275 return;
15261 } 16276 }
15574 /* Memory operand for vld1/vst1 instruction. */ 16589 /* Memory operand for vld1/vst1 instruction. */
15575 case 'A': 16590 case 'A':
15576 { 16591 {
15577 rtx addr; 16592 rtx addr;
15578 bool postinc = FALSE; 16593 bool postinc = FALSE;
16594 unsigned align, modesize, align_bits;
16595
15579 gcc_assert (GET_CODE (x) == MEM); 16596 gcc_assert (GET_CODE (x) == MEM);
15580 addr = XEXP (x, 0); 16597 addr = XEXP (x, 0);
15581 if (GET_CODE (addr) == POST_INC) 16598 if (GET_CODE (addr) == POST_INC)
15582 { 16599 {
15583 postinc = 1; 16600 postinc = 1;
15584 addr = XEXP (addr, 0); 16601 addr = XEXP (addr, 0);
15585 } 16602 }
15586 asm_fprintf (stream, "[%r]", REGNO (addr)); 16603 asm_fprintf (stream, "[%r", REGNO (addr));
16604
16605 /* We know the alignment of this access, so we can emit a hint in the
16606 instruction (for some alignments) as an aid to the memory subsystem
16607 of the target. */
16608 align = MEM_ALIGN (x) >> 3;
16609 modesize = GET_MODE_SIZE (GET_MODE (x));
16610
16611 /* Only certain alignment specifiers are supported by the hardware. */
16612 if (modesize == 16 && (align % 32) == 0)
16613 align_bits = 256;
16614 else if ((modesize == 8 || modesize == 16) && (align % 16) == 0)
16615 align_bits = 128;
16616 else if ((align % 8) == 0)
16617 align_bits = 64;
16618 else
16619 align_bits = 0;
16620
16621 if (align_bits != 0)
16622 asm_fprintf (stream, ":%d", align_bits);
16623
16624 asm_fprintf (stream, "]");
16625
15587 if (postinc) 16626 if (postinc)
15588 fputs("!", stream); 16627 fputs("!", stream);
16628 }
16629 return;
16630
16631 case 'C':
16632 {
16633 rtx addr;
16634
16635 gcc_assert (GET_CODE (x) == MEM);
16636 addr = XEXP (x, 0);
16637 gcc_assert (GET_CODE (addr) == REG);
16638 asm_fprintf (stream, "[%r]", REGNO (addr));
15589 } 16639 }
15590 return; 16640 return;
15591 16641
15592 /* Translate an S register number into a D register number and element index. */ 16642 /* Translate an S register number into a D register number and element index. */
15593 case 'y': 16643 case 'y':
15679 16729
15680 output_addr_const (stream, x); 16730 output_addr_const (stream, x);
15681 break; 16731 break;
15682 } 16732 }
15683 } 16733 }
16734 }
16735
16736 /* Target hook for printing a memory address. */
16737 static void
16738 arm_print_operand_address (FILE *stream, rtx x)
16739 {
16740 if (TARGET_32BIT)
16741 {
16742 int is_minus = GET_CODE (x) == MINUS;
16743
16744 if (GET_CODE (x) == REG)
16745 asm_fprintf (stream, "[%r, #0]", REGNO (x));
16746 else if (GET_CODE (x) == PLUS || is_minus)
16747 {
16748 rtx base = XEXP (x, 0);
16749 rtx index = XEXP (x, 1);
16750 HOST_WIDE_INT offset = 0;
16751 if (GET_CODE (base) != REG
16752 || (GET_CODE (index) == REG && REGNO (index) == SP_REGNUM))
16753 {
16754 /* Ensure that BASE is a register. */
16755 /* (one of them must be). */
16756 /* Also ensure the SP is not used as in index register. */
16757 rtx temp = base;
16758 base = index;
16759 index = temp;
16760 }
16761 switch (GET_CODE (index))
16762 {
16763 case CONST_INT:
16764 offset = INTVAL (index);
16765 if (is_minus)
16766 offset = -offset;
16767 asm_fprintf (stream, "[%r, #%wd]",
16768 REGNO (base), offset);
16769 break;
16770
16771 case REG:
16772 asm_fprintf (stream, "[%r, %s%r]",
16773 REGNO (base), is_minus ? "-" : "",
16774 REGNO (index));
16775 break;
16776
16777 case MULT:
16778 case ASHIFTRT:
16779 case LSHIFTRT:
16780 case ASHIFT:
16781 case ROTATERT:
16782 {
16783 asm_fprintf (stream, "[%r, %s%r",
16784 REGNO (base), is_minus ? "-" : "",
16785 REGNO (XEXP (index, 0)));
16786 arm_print_operand (stream, index, 'S');
16787 fputs ("]", stream);
16788 break;
16789 }
16790
16791 default:
16792 gcc_unreachable ();
16793 }
16794 }
16795 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
16796 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
16797 {
16798 extern enum machine_mode output_memory_reference_mode;
16799
16800 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16801
16802 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
16803 asm_fprintf (stream, "[%r, #%s%d]!",
16804 REGNO (XEXP (x, 0)),
16805 GET_CODE (x) == PRE_DEC ? "-" : "",
16806 GET_MODE_SIZE (output_memory_reference_mode));
16807 else
16808 asm_fprintf (stream, "[%r], #%s%d",
16809 REGNO (XEXP (x, 0)),
16810 GET_CODE (x) == POST_DEC ? "-" : "",
16811 GET_MODE_SIZE (output_memory_reference_mode));
16812 }
16813 else if (GET_CODE (x) == PRE_MODIFY)
16814 {
16815 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
16816 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16817 asm_fprintf (stream, "#%wd]!",
16818 INTVAL (XEXP (XEXP (x, 1), 1)));
16819 else
16820 asm_fprintf (stream, "%r]!",
16821 REGNO (XEXP (XEXP (x, 1), 1)));
16822 }
16823 else if (GET_CODE (x) == POST_MODIFY)
16824 {
16825 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
16826 if (GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
16827 asm_fprintf (stream, "#%wd",
16828 INTVAL (XEXP (XEXP (x, 1), 1)));
16829 else
16830 asm_fprintf (stream, "%r",
16831 REGNO (XEXP (XEXP (x, 1), 1)));
16832 }
16833 else output_addr_const (stream, x);
16834 }
16835 else
16836 {
16837 if (GET_CODE (x) == REG)
16838 asm_fprintf (stream, "[%r]", REGNO (x));
16839 else if (GET_CODE (x) == POST_INC)
16840 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
16841 else if (GET_CODE (x) == PLUS)
16842 {
16843 gcc_assert (GET_CODE (XEXP (x, 0)) == REG);
16844 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
16845 asm_fprintf (stream, "[%r, #%wd]",
16846 REGNO (XEXP (x, 0)),
16847 INTVAL (XEXP (x, 1)));
16848 else
16849 asm_fprintf (stream, "[%r, %r]",
16850 REGNO (XEXP (x, 0)),
16851 REGNO (XEXP (x, 1)));
16852 }
16853 else
16854 output_addr_const (stream, x);
16855 }
16856 }
16857
16858 /* Target hook for indicating whether a punctuation character for
16859 TARGET_PRINT_OPERAND is valid. */
16860 static bool
16861 arm_print_operand_punct_valid_p (unsigned char code)
16862 {
16863 return (code == '@' || code == '|' || code == '.'
16864 || code == '(' || code == ')' || code == '#'
16865 || (TARGET_32BIT && (code == '?'))
16866 || (TARGET_THUMB2 && (code == '!'))
16867 || (TARGET_THUMB && (code == '_')));
15684 } 16868 }
15685 16869
15686 /* Target hook for assembling integer objects. The ARM version needs to 16870 /* Target hook for assembling integer objects. The ARM version needs to
15687 handle word-sized values specially. */ 16871 handle word-sized values specially. */
15688 static bool 16872 static bool
15938 default: gcc_unreachable (); 17122 default: gcc_unreachable ();
15939 } 17123 }
15940 17124
15941 case CC_Cmode: 17125 case CC_Cmode:
15942 switch (comp_code) 17126 switch (comp_code)
15943 { 17127 {
15944 case LTU: return ARM_CS; 17128 case LTU: return ARM_CS;
15945 case GEU: return ARM_CC; 17129 case GEU: return ARM_CC;
15946 default: gcc_unreachable (); 17130 default: gcc_unreachable ();
15947 } 17131 }
17132
17133 case CC_CZmode:
17134 switch (comp_code)
17135 {
17136 case NE: return ARM_NE;
17137 case EQ: return ARM_EQ;
17138 case GEU: return ARM_CS;
17139 case GTU: return ARM_HI;
17140 case LEU: return ARM_LS;
17141 case LTU: return ARM_CC;
17142 default: gcc_unreachable ();
17143 }
17144
17145 case CC_NCVmode:
17146 switch (comp_code)
17147 {
17148 case GE: return ARM_GE;
17149 case LT: return ARM_LT;
17150 case GEU: return ARM_CS;
17151 case LTU: return ARM_CC;
17152 default: gcc_unreachable ();
17153 }
15948 17154
15949 case CCmode: 17155 case CCmode:
15950 switch (comp_code) 17156 switch (comp_code)
15951 { 17157 {
15952 case NE: return ARM_NE; 17158 case NE: return ARM_NE;
18077 } 19283 }
18078 19284
18079 static enum insn_code 19285 static enum insn_code
18080 locate_neon_builtin_icode (int fcode, neon_itype *itype) 19286 locate_neon_builtin_icode (int fcode, neon_itype *itype)
18081 { 19287 {
18082 neon_builtin_datum key, *found; 19288 neon_builtin_datum key
19289 = { NULL, (neon_itype) 0, 0, { CODE_FOR_nothing }, 0, 0 };
19290 neon_builtin_datum *found;
18083 int idx; 19291 int idx;
18084 19292
18085 key.base_fcode = fcode; 19293 key.base_fcode = fcode;
18086 found = (neon_builtin_datum *) 19294 found = (neon_builtin_datum *)
18087 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data), 19295 bsearch (&key, &neon_builtin_data[0], ARRAY_SIZE (neon_builtin_data),
18670 thumb_exit() */ 19878 thumb_exit() */
18671 thumb_exit (f, -1); 19879 thumb_exit (f, -1);
18672 return; 19880 return;
18673 } 19881 }
18674 19882
18675 if (ARM_EABI_UNWIND_TABLES && push) 19883 if (push && arm_except_unwind_info (&global_options) == UI_TARGET)
18676 { 19884 {
18677 fprintf (f, "\t.save\t{"); 19885 fprintf (f, "\t.save\t{");
18678 for (regno = 0; regno < 15; regno++) 19886 for (regno = 0; regno < 15; regno++)
18679 { 19887 {
18680 if (real_regs & (1 << regno)) 19888 if (real_regs & (1 << regno))
19030 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM); 20238 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
19031 20239
19032 /* Return to caller. */ 20240 /* Return to caller. */
19033 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr); 20241 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
19034 } 20242 }
19035
19036 20243
20244 /* Scan INSN just before assembler is output for it.
20245 For Thumb-1, we track the status of the condition codes; this
20246 information is used in the cbranchsi4_insn pattern. */
19037 void 20247 void
19038 thumb1_final_prescan_insn (rtx insn) 20248 thumb1_final_prescan_insn (rtx insn)
19039 { 20249 {
19040 if (flag_print_asm_name) 20250 if (flag_print_asm_name)
19041 asm_fprintf (asm_out_file, "%@ 0x%04x\n", 20251 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
19042 INSN_ADDRESSES (INSN_UID (insn))); 20252 INSN_ADDRESSES (INSN_UID (insn)));
20253 /* Don't overwrite the previous setter when we get to a cbranch. */
20254 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
20255 {
20256 enum attr_conds conds;
20257
20258 if (cfun->machine->thumb1_cc_insn)
20259 {
20260 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
20261 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
20262 CC_STATUS_INIT;
20263 }
20264 conds = get_attr_conds (insn);
20265 if (conds == CONDS_SET)
20266 {
20267 rtx set = single_set (insn);
20268 cfun->machine->thumb1_cc_insn = insn;
20269 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
20270 cfun->machine->thumb1_cc_op1 = const0_rtx;
20271 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
20272 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
20273 {
20274 rtx src1 = XEXP (SET_SRC (set), 1);
20275 if (src1 == const0_rtx)
20276 cfun->machine->thumb1_cc_mode = CCmode;
20277 }
20278 }
20279 else if (conds != CONDS_NOCOND)
20280 cfun->machine->thumb1_cc_insn = NULL_RTX;
20281 }
19043 } 20282 }
19044 20283
19045 int 20284 int
19046 thumb_shiftable_const (unsigned HOST_WIDE_INT val) 20285 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
19047 { 20286 {
19145 #else 20384 #else
19146 return FALSE; 20385 return FALSE;
19147 #endif 20386 #endif
19148 } 20387 }
19149 20388
20389 /* Given the stack offsets and register mask in OFFSETS, decide how
20390 many additional registers to push instead of subtracting a constant
20391 from SP. For epilogues the principle is the same except we use pop.
20392 FOR_PROLOGUE indicates which we're generating. */
20393 static int
20394 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
20395 {
20396 HOST_WIDE_INT amount;
20397 unsigned long live_regs_mask = offsets->saved_regs_mask;
20398 /* Extract a mask of the ones we can give to the Thumb's push/pop
20399 instruction. */
20400 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
20401 /* Then count how many other high registers will need to be pushed. */
20402 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
20403 int n_free, reg_base;
20404
20405 if (!for_prologue && frame_pointer_needed)
20406 amount = offsets->locals_base - offsets->saved_regs;
20407 else
20408 amount = offsets->outgoing_args - offsets->saved_regs;
20409
20410 /* If the stack frame size is 512 exactly, we can save one load
20411 instruction, which should make this a win even when optimizing
20412 for speed. */
20413 if (!optimize_size && amount != 512)
20414 return 0;
20415
20416 /* Can't do this if there are high registers to push. */
20417 if (high_regs_pushed != 0)
20418 return 0;
20419
20420 /* Shouldn't do it in the prologue if no registers would normally
20421 be pushed at all. In the epilogue, also allow it if we'll have
20422 a pop insn for the PC. */
20423 if (l_mask == 0
20424 && (for_prologue
20425 || TARGET_BACKTRACE
20426 || (live_regs_mask & 1 << LR_REGNUM) == 0
20427 || TARGET_INTERWORK
20428 || crtl->args.pretend_args_size != 0))
20429 return 0;
20430
20431 /* Don't do this if thumb_expand_prologue wants to emit instructions
20432 between the push and the stack frame allocation. */
20433 if (for_prologue
20434 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
20435 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
20436 return 0;
20437
20438 reg_base = 0;
20439 n_free = 0;
20440 if (!for_prologue)
20441 {
20442 reg_base = arm_size_return_regs () / UNITS_PER_WORD;
20443 live_regs_mask >>= reg_base;
20444 }
20445
20446 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
20447 && (for_prologue || call_used_regs[reg_base + n_free]))
20448 {
20449 live_regs_mask >>= 1;
20450 n_free++;
20451 }
20452
20453 if (n_free == 0)
20454 return 0;
20455 gcc_assert (amount / 4 * 4 == amount);
20456
20457 if (amount >= 512 && (amount - n_free * 4) < 512)
20458 return (amount - 508) / 4;
20459 if (amount <= n_free * 4)
20460 return amount / 4;
20461 return 0;
20462 }
20463
19150 /* The bits which aren't usefully expanded as rtl. */ 20464 /* The bits which aren't usefully expanded as rtl. */
19151 const char * 20465 const char *
19152 thumb_unexpanded_epilogue (void) 20466 thumb_unexpanded_epilogue (void)
19153 { 20467 {
19154 arm_stack_offsets *offsets; 20468 arm_stack_offsets *offsets;
19155 int regno; 20469 int regno;
19156 unsigned long live_regs_mask = 0; 20470 unsigned long live_regs_mask = 0;
19157 int high_regs_pushed = 0; 20471 int high_regs_pushed = 0;
20472 int extra_pop;
19158 int had_to_push_lr; 20473 int had_to_push_lr;
19159 int size; 20474 int size;
19160 20475
19161 if (cfun->machine->return_used_this_function != 0) 20476 if (cfun->machine->return_used_this_function != 0)
19162 return ""; 20477 return "";
19171 /* If we can deduce the registers used from the function's return value. 20486 /* If we can deduce the registers used from the function's return value.
19172 This is more reliable that examining df_regs_ever_live_p () because that 20487 This is more reliable that examining df_regs_ever_live_p () because that
19173 will be set if the register is ever used in the function, not just if 20488 will be set if the register is ever used in the function, not just if
19174 the register is used to hold a return value. */ 20489 the register is used to hold a return value. */
19175 size = arm_size_return_regs (); 20490 size = arm_size_return_regs ();
20491
20492 extra_pop = thumb1_extra_regs_pushed (offsets, false);
20493 if (extra_pop > 0)
20494 {
20495 unsigned long extra_mask = (1 << extra_pop) - 1;
20496 live_regs_mask |= extra_mask << (size / UNITS_PER_WORD);
20497 }
19176 20498
19177 /* The prolog may have pushed some high registers to use as 20499 /* The prolog may have pushed some high registers to use as
19178 work registers. e.g. the testsuite file: 20500 work registers. e.g. the testsuite file:
19179 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c 20501 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
19180 compiles to produce: 20502 compiles to produce:
19255 if (live_regs_mask) 20577 if (live_regs_mask)
19256 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL, 20578 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
19257 live_regs_mask); 20579 live_regs_mask);
19258 20580
19259 /* We have either just popped the return address into the 20581 /* We have either just popped the return address into the
19260 PC or it is was kept in LR for the entire function. */ 20582 PC or it is was kept in LR for the entire function.
20583 Note that thumb_pushpop has already called thumb_exit if the
20584 PC was in the list. */
19261 if (!had_to_push_lr) 20585 if (!had_to_push_lr)
19262 thumb_exit (asm_out_file, LR_REGNUM); 20586 thumb_exit (asm_out_file, LR_REGNUM);
19263 } 20587 }
19264 else 20588 else
19265 { 20589 {
19311 /* Functions to save and restore machine-specific function data. */ 20635 /* Functions to save and restore machine-specific function data. */
19312 static struct machine_function * 20636 static struct machine_function *
19313 arm_init_machine_status (void) 20637 arm_init_machine_status (void)
19314 { 20638 {
19315 struct machine_function *machine; 20639 struct machine_function *machine;
19316 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function)); 20640 machine = ggc_alloc_cleared_machine_function ();
19317 20641
19318 #if ARM_FT_UNKNOWN != 0 20642 #if ARM_FT_UNKNOWN != 0
19319 machine->func_type = ARM_FT_UNKNOWN; 20643 machine->func_type = ARM_FT_UNKNOWN;
19320 #endif 20644 #endif
19321 return machine; 20645 return machine;
19399 break; 20723 break;
19400 20724
19401 default: 20725 default:
19402 gcc_unreachable (); 20726 gcc_unreachable ();
19403 } 20727 }
19404 }
19405
19406 /* Given the stack offsets and register mask in OFFSETS, decide
19407 how many additional registers to push instead of subtracting
19408 a constant from SP. */
19409 static int
19410 thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
19411 {
19412 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
19413 unsigned long live_regs_mask = offsets->saved_regs_mask;
19414 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19415 unsigned long l_mask = live_regs_mask & 0x40ff;
19416 /* Then count how many other high registers will need to be pushed. */
19417 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19418 int n_free;
19419
19420 /* If the stack frame size is 512 exactly, we can save one load
19421 instruction, which should make this a win even when optimizing
19422 for speed. */
19423 if (!optimize_size && amount != 512)
19424 return 0;
19425
19426 /* Can't do this if there are high registers to push, or if we
19427 are not going to do a push at all. */
19428 if (high_regs_pushed != 0 || l_mask == 0)
19429 return 0;
19430
19431 /* Don't do this if thumb1_expand_prologue wants to emit instructions
19432 between the push and the stack frame allocation. */
19433 if ((flag_pic && arm_pic_register != INVALID_REGNUM)
19434 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
19435 return 0;
19436
19437 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
19438 n_free++;
19439
19440 if (n_free == 0)
19441 return 0;
19442 gcc_assert (amount / 4 * 4 == amount);
19443
19444 if (amount >= 512 && (amount - n_free * 4) < 512)
19445 return (amount - 508) / 4;
19446 if (amount <= n_free * 4)
19447 return amount / 4;
19448 return 0;
19449 } 20728 }
19450 20729
19451 /* Generate the rest of a function's prologue. */ 20730 /* Generate the rest of a function's prologue. */
19452 void 20731 void
19453 thumb1_expand_prologue (void) 20732 thumb1_expand_prologue (void)
19481 20760
19482 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) 20761 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19483 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), 20762 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19484 stack_pointer_rtx); 20763 stack_pointer_rtx);
19485 20764
20765 if (flag_stack_usage)
20766 current_function_static_stack_size
20767 = offsets->outgoing_args - offsets->saved_args;
20768
19486 amount = offsets->outgoing_args - offsets->saved_regs; 20769 amount = offsets->outgoing_args - offsets->saved_regs;
19487 amount -= 4 * thumb1_extra_regs_pushed (offsets); 20770 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
19488 if (amount) 20771 if (amount)
19489 { 20772 {
19490 if (amount < 512) 20773 if (amount < 512)
19491 { 20774 {
19492 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 20775 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19539 the call to mcount. Similarly if the user has requested no 20822 the call to mcount. Similarly if the user has requested no
19540 scheduling in the prolog. Similarly if we want non-call exceptions 20823 scheduling in the prolog. Similarly if we want non-call exceptions
19541 using the EABI unwinder, to prevent faulting instructions from being 20824 using the EABI unwinder, to prevent faulting instructions from being
19542 swapped with a stack adjustment. */ 20825 swapped with a stack adjustment. */
19543 if (crtl->profile || !TARGET_SCHED_PROLOG 20826 if (crtl->profile || !TARGET_SCHED_PROLOG
19544 || (ARM_EABI_UNWIND_TABLES && flag_non_call_exceptions)) 20827 || (arm_except_unwind_info (&global_options) == UI_TARGET
20828 && cfun->can_throw_non_call_exceptions))
19545 emit_insn (gen_blockage ()); 20829 emit_insn (gen_blockage ());
19546 20830
19547 cfun->machine->lr_save_eliminated = !thumb_force_lr_save (); 20831 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
19548 if (live_regs_mask & 0xff) 20832 if (live_regs_mask & 0xff)
19549 cfun->machine->lr_save_eliminated = 0; 20833 cfun->machine->lr_save_eliminated = 0;
19567 if (frame_pointer_needed) 20851 if (frame_pointer_needed)
19568 { 20852 {
19569 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx)); 20853 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
19570 amount = offsets->locals_base - offsets->saved_regs; 20854 amount = offsets->locals_base - offsets->saved_regs;
19571 } 20855 }
20856 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
19572 20857
19573 gcc_assert (amount >= 0); 20858 gcc_assert (amount >= 0);
19574 if (amount) 20859 if (amount)
19575 { 20860 {
19576 if (amount < 512) 20861 if (amount < 512)
19651 } 20936 }
19652 20937
19653 if (crtl->args.pretend_args_size) 20938 if (crtl->args.pretend_args_size)
19654 { 20939 {
19655 /* Output unwind directive for the stack adjustment. */ 20940 /* Output unwind directive for the stack adjustment. */
19656 if (ARM_EABI_UNWIND_TABLES) 20941 if (arm_except_unwind_info (&global_options) == UI_TARGET)
19657 fprintf (f, "\t.pad #%d\n", 20942 fprintf (f, "\t.pad #%d\n",
19658 crtl->args.pretend_args_size); 20943 crtl->args.pretend_args_size);
19659 20944
19660 if (cfun->machine->uses_anonymous_args) 20945 if (cfun->machine->uses_anonymous_args)
19661 { 20946 {
19721 20 add R7, SP, #16 Point at the start of the backtrace structure. 21006 20 add R7, SP, #16 Point at the start of the backtrace structure.
19722 22 mov FP, R7 Put this value into the frame pointer. */ 21007 22 mov FP, R7 Put this value into the frame pointer. */
19723 21008
19724 work_register = thumb_find_work_register (live_regs_mask); 21009 work_register = thumb_find_work_register (live_regs_mask);
19725 21010
19726 if (ARM_EABI_UNWIND_TABLES) 21011 if (arm_except_unwind_info (&global_options) == UI_TARGET)
19727 asm_fprintf (f, "\t.pad #16\n"); 21012 asm_fprintf (f, "\t.pad #16\n");
19728 21013
19729 asm_fprintf 21014 asm_fprintf
19730 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n", 21015 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
19731 SP_REGNUM, SP_REGNUM); 21016 SP_REGNUM, SP_REGNUM);
19789 register. */ 21074 register. */
19790 else if ((l_mask & 0xff) != 0 21075 else if ((l_mask & 0xff) != 0
19791 || (high_regs_pushed == 0 && l_mask)) 21076 || (high_regs_pushed == 0 && l_mask))
19792 { 21077 {
19793 unsigned long mask = l_mask; 21078 unsigned long mask = l_mask;
19794 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1; 21079 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
19795 thumb_pushpop (f, mask, 1, &cfa_offset, mask); 21080 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
19796 } 21081 }
19797 21082
19798 if (high_regs_pushed) 21083 if (high_regs_pushed)
19799 { 21084 {
20165 asm_fprintf (asm_out_file, "\t.syntax unified\n"); 21450 asm_fprintf (asm_out_file, "\t.syntax unified\n");
20166 21451
20167 if (TARGET_BPABI) 21452 if (TARGET_BPABI)
20168 { 21453 {
20169 const char *fpu_name; 21454 const char *fpu_name;
20170 if (arm_select[0].string) 21455 if (arm_selected_arch)
20171 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_select[0].string); 21456 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
20172 else if (arm_select[1].string)
20173 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_select[1].string);
20174 else 21457 else
20175 asm_fprintf (asm_out_file, "\t.cpu %s\n", 21458 asm_fprintf (asm_out_file, "\t.cpu %s\n", arm_selected_cpu->name);
20176 all_cores[arm_default_cpu].name);
20177 21459
20178 if (TARGET_SOFT_FLOAT) 21460 if (TARGET_SOFT_FLOAT)
20179 { 21461 {
20180 if (TARGET_VFP) 21462 if (TARGET_VFP)
20181 fpu_name = "softvfp"; 21463 fpu_name = "softvfp";
20566 addr = XEXP (addr, 0); 21848 addr = XEXP (addr, 0);
20567 21849
20568 return !reg_overlap_mentioned_p (value, addr); 21850 return !reg_overlap_mentioned_p (value, addr);
20569 } 21851 }
20570 21852
21853 /* Return nonzero if the CONSUMER instruction (a store) does need
21854 PRODUCER's value to calculate the address. */
21855
21856 int
21857 arm_early_store_addr_dep (rtx producer, rtx consumer)
21858 {
21859 return !arm_no_early_store_addr_dep (producer, consumer);
21860 }
21861
21862 /* Return nonzero if the CONSUMER instruction (a load) does need
21863 PRODUCER's value to calculate the address. */
21864
21865 int
21866 arm_early_load_addr_dep (rtx producer, rtx consumer)
21867 {
21868 rtx value = PATTERN (producer);
21869 rtx addr = PATTERN (consumer);
21870
21871 if (GET_CODE (value) == COND_EXEC)
21872 value = COND_EXEC_CODE (value);
21873 if (GET_CODE (value) == PARALLEL)
21874 value = XVECEXP (value, 0, 0);
21875 value = XEXP (value, 0);
21876 if (GET_CODE (addr) == COND_EXEC)
21877 addr = COND_EXEC_CODE (addr);
21878 if (GET_CODE (addr) == PARALLEL)
21879 addr = XVECEXP (addr, 0, 0);
21880 addr = XEXP (addr, 1);
21881
21882 return reg_overlap_mentioned_p (value, addr);
21883 }
21884
20571 /* Return nonzero if the CONSUMER instruction (an ALU op) does not 21885 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
20572 have an early register shift value or amount dependency on the 21886 have an early register shift value or amount dependency on the
20573 result of PRODUCER. */ 21887 result of PRODUCER. */
20574 21888
20575 int 21889 int
20944 return true; 22258 return true;
20945 22259
20946 return false; 22260 return false;
20947 } 22261 }
20948 22262
22263 /* Use the option -mvectorize-with-neon-quad to override the use of doubleword
22264 registers when autovectorizing for Neon, at least until multiple vector
22265 widths are supported properly by the middle-end. */
22266
22267 static enum machine_mode
22268 arm_preferred_simd_mode (enum machine_mode mode)
22269 {
22270 if (TARGET_NEON)
22271 switch (mode)
22272 {
22273 case SFmode:
22274 return TARGET_NEON_VECTORIZE_QUAD ? V4SFmode : V2SFmode;
22275 case SImode:
22276 return TARGET_NEON_VECTORIZE_QUAD ? V4SImode : V2SImode;
22277 case HImode:
22278 return TARGET_NEON_VECTORIZE_QUAD ? V8HImode : V4HImode;
22279 case QImode:
22280 return TARGET_NEON_VECTORIZE_QUAD ? V16QImode : V8QImode;
22281 case DImode:
22282 if (TARGET_NEON_VECTORIZE_QUAD)
22283 return V2DImode;
22284 break;
22285
22286 default:;
22287 }
22288
22289 if (TARGET_REALLY_IWMMXT)
22290 switch (mode)
22291 {
22292 case SImode:
22293 return V2SImode;
22294 case HImode:
22295 return V4HImode;
22296 case QImode:
22297 return V8QImode;
22298
22299 default:;
22300 }
22301
22302 return word_mode;
22303 }
22304
22305 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
22306
22307 We need to define this for LO_REGS on thumb. Otherwise we can end up
22308 using r0-r4 for function arguments, r7 for the stack frame and don't
22309 have enough left over to do doubleword arithmetic. */
22310
22311 static bool
22312 arm_class_likely_spilled_p (reg_class_t rclass)
22313 {
22314 if ((TARGET_THUMB && rclass == LO_REGS)
22315 || rclass == CC_REG)
22316 return true;
22317
22318 return false;
22319 }
22320
20949 /* Implements target hook small_register_classes_for_mode_p. */ 22321 /* Implements target hook small_register_classes_for_mode_p. */
20950 bool 22322 bool
20951 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) 22323 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
20952 { 22324 {
20953 return TARGET_THUMB1; 22325 return TARGET_THUMB1;
21028 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i); 22400 XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
21029 22401
21030 return p; 22402 return p;
21031 } 22403 }
21032 22404
21033 #ifdef TARGET_UNWIND_INFO 22405 #if ARM_UNWIND_INFO
21034 /* Emit unwind directives for a store-multiple instruction or stack pointer 22406 /* Emit unwind directives for a store-multiple instruction or stack pointer
21035 push during alignment. 22407 push during alignment.
21036 These should only ever be generated by the function prologue code, so 22408 These should only ever be generated by the function prologue code, so
21037 expect them to have a particular form. */ 22409 expect them to have a particular form. */
21038 22410
21242 static void 22614 static void
21243 arm_unwind_emit (FILE * asm_out_file, rtx insn) 22615 arm_unwind_emit (FILE * asm_out_file, rtx insn)
21244 { 22616 {
21245 rtx pat; 22617 rtx pat;
21246 22618
21247 if (!ARM_EABI_UNWIND_TABLES) 22619 if (arm_except_unwind_info (&global_options) != UI_TARGET)
21248 return; 22620 return;
21249 22621
21250 if (!(flag_unwind_tables || crtl->uses_eh_lsda) 22622 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
21251 && (TREE_NOTHROW (current_function_decl) 22623 && (TREE_NOTHROW (current_function_decl)
21252 || crtl->all_throwers_are_sibcalls)) 22624 || crtl->all_throwers_are_sibcalls))
21292 fputs ("(TARGET2)", asm_out_file); 22664 fputs ("(TARGET2)", asm_out_file);
21293 fputc ('\n', asm_out_file); 22665 fputc ('\n', asm_out_file);
21294 22666
21295 return TRUE; 22667 return TRUE;
21296 } 22668 }
21297 #endif /* TARGET_UNWIND_INFO */ 22669
22670 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
22671
22672 static void
22673 arm_asm_emit_except_personality (rtx personality)
22674 {
22675 fputs ("\t.personality\t", asm_out_file);
22676 output_addr_const (asm_out_file, personality);
22677 fputc ('\n', asm_out_file);
22678 }
22679
22680 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
22681
22682 static void
22683 arm_asm_init_sections (void)
22684 {
22685 exception_section = get_unnamed_section (0, output_section_asm_op,
22686 "\t.handlerdata");
22687 }
22688 #endif /* ARM_UNWIND_INFO */
22689
22690 /* Implement TARGET_EXCEPT_UNWIND_INFO. */
22691
22692 static enum unwind_info_type
22693 arm_except_unwind_info (struct gcc_options *opts)
22694 {
22695 /* Honor the --enable-sjlj-exceptions configure switch. */
22696 #ifdef CONFIG_SJLJ_EXCEPTIONS
22697 if (CONFIG_SJLJ_EXCEPTIONS)
22698 return UI_SJLJ;
22699 #endif
22700
22701 /* If not using ARM EABI unwind tables... */
22702 if (ARM_UNWIND_INFO)
22703 {
22704 /* For simplicity elsewhere in this file, indicate that all unwind
22705 info is disabled if we're not emitting unwind tables. */
22706 if (!opts->x_flag_exceptions && !opts->x_flag_unwind_tables)
22707 return UI_NONE;
22708 else
22709 return UI_TARGET;
22710 }
22711
22712 /* ... we use sjlj exceptions for backwards compatibility. */
22713 return UI_SJLJ;
22714 }
21298 22715
21299 22716
21300 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic 22717 /* Handle UNSPEC DWARF call frame instructions. These are needed for dynamic
21301 stack alignment. */ 22718 stack alignment. */
21302 22719
21324 /* Output unwind directives for the start/end of a function. */ 22741 /* Output unwind directives for the start/end of a function. */
21325 22742
21326 void 22743 void
21327 arm_output_fn_unwind (FILE * f, bool prologue) 22744 arm_output_fn_unwind (FILE * f, bool prologue)
21328 { 22745 {
21329 if (!ARM_EABI_UNWIND_TABLES) 22746 if (arm_except_unwind_info (&global_options) != UI_TARGET)
21330 return; 22747 return;
21331 22748
21332 if (prologue) 22749 if (prologue)
21333 fputs ("\t.fnstart\n", f); 22750 fputs ("\t.fnstart\n", f);
21334 else 22751 else
21404 fputs ("\t.word\t", file); 22821 fputs ("\t.word\t", file);
21405 output_addr_const (file, x); 22822 output_addr_const (file, x);
21406 fputs ("(tlsldo)", file); 22823 fputs ("(tlsldo)", file);
21407 } 22824 }
21408 22825
21409 bool 22826 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
22827
22828 static bool
21410 arm_output_addr_const_extra (FILE *fp, rtx x) 22829 arm_output_addr_const_extra (FILE *fp, rtx x)
21411 { 22830 {
21412 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) 22831 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
21413 return arm_emit_tls_decoration (fp, x); 22832 return arm_emit_tls_decoration (fp, x);
21414 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL) 22833 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
21544 { 22963 {
21545 switch (arm_tune) 22964 switch (arm_tune)
21546 { 22965 {
21547 case cortexr4: 22966 case cortexr4:
21548 case cortexr4f: 22967 case cortexr4f:
22968 case cortexa5:
21549 case cortexa8: 22969 case cortexa8:
21550 case cortexa9: 22970 case cortexa9:
22971 case fa726te:
21551 return 2; 22972 return 2;
21552 22973
21553 default: 22974 default:
21554 return 1; 22975 return 1;
21555 } 22976 }
21661 if (TARGET_THUMB) 23082 if (TARGET_THUMB)
21662 memcpy (reg_alloc_order, thumb_core_reg_alloc_order, 23083 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
21663 sizeof (thumb_core_reg_alloc_order)); 23084 sizeof (thumb_core_reg_alloc_order));
21664 } 23085 }
21665 23086
21666 /* Set default optimization options. */
21667 void
21668 arm_optimization_options (int level, int size ATTRIBUTE_UNUSED)
21669 {
21670 /* Enable section anchors by default at -O1 or higher.
21671 Use 2 to distinguish from an explicit -fsection-anchors
21672 given on the command line. */
21673 if (level > 0)
21674 flag_section_anchors = 2;
21675 }
21676
21677 /* Implement TARGET_FRAME_POINTER_REQUIRED. */ 23087 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
21678 23088
21679 bool 23089 bool
21680 arm_frame_pointer_required (void) 23090 arm_frame_pointer_required (void)
21681 { 23091 {
21690 arm_have_conditional_execution (void) 23100 arm_have_conditional_execution (void)
21691 { 23101 {
21692 return !TARGET_THUMB1; 23102 return !TARGET_THUMB1;
21693 } 23103 }
21694 23104
23105 /* Legitimize a memory reference for sync primitive implemented using
23106 ldrex / strex. We currently force the form of the reference to be
23107 indirect without offset. We do not yet support the indirect offset
23108 addressing supported by some ARM targets for these
23109 instructions. */
23110 static rtx
23111 arm_legitimize_sync_memory (rtx memory)
23112 {
23113 rtx addr = force_reg (Pmode, XEXP (memory, 0));
23114 rtx legitimate_memory = gen_rtx_MEM (GET_MODE (memory), addr);
23115
23116 set_mem_alias_set (legitimate_memory, ALIAS_SET_MEMORY_BARRIER);
23117 MEM_VOLATILE_P (legitimate_memory) = MEM_VOLATILE_P (memory);
23118 return legitimate_memory;
23119 }
23120
23121 /* An instruction emitter. */
23122 typedef void (* emit_f) (int label, const char *, rtx *);
23123
23124 /* An instruction emitter that emits via the conventional
23125 output_asm_insn. */
23126 static void
23127 arm_emit (int label ATTRIBUTE_UNUSED, const char *pattern, rtx *operands)
23128 {
23129 output_asm_insn (pattern, operands);
23130 }
23131
23132 /* Count the number of emitted synchronization instructions. */
23133 static unsigned arm_insn_count;
23134
23135 /* An emitter that counts emitted instructions but does not actually
23136 emit instruction into the the instruction stream. */
23137 static void
23138 arm_count (int label,
23139 const char *pattern ATTRIBUTE_UNUSED,
23140 rtx *operands ATTRIBUTE_UNUSED)
23141 {
23142 if (! label)
23143 ++ arm_insn_count;
23144 }
23145
23146 /* Construct a pattern using conventional output formatting and feed
23147 it to output_asm_insn. Provides a mechanism to construct the
23148 output pattern on the fly. Note the hard limit on the pattern
23149 buffer size. */
23150 static void ATTRIBUTE_PRINTF_4
23151 arm_output_asm_insn (emit_f emit, int label, rtx *operands,
23152 const char *pattern, ...)
23153 {
23154 va_list ap;
23155 char buffer[256];
23156
23157 va_start (ap, pattern);
23158 vsprintf (buffer, pattern, ap);
23159 va_end (ap);
23160 emit (label, buffer, operands);
23161 }
23162
23163 /* Emit the memory barrier instruction, if any, provided by this
23164 target to a specified emitter. */
23165 static void
23166 arm_process_output_memory_barrier (emit_f emit, rtx *operands)
23167 {
23168 if (TARGET_HAVE_DMB)
23169 {
23170 /* Note we issue a system level barrier. We should consider
23171 issuing a inner shareabilty zone barrier here instead, ie.
23172 "DMB ISH". */
23173 emit (0, "dmb\tsy", operands);
23174 return;
23175 }
23176
23177 if (TARGET_HAVE_DMB_MCR)
23178 {
23179 emit (0, "mcr\tp15, 0, r0, c7, c10, 5", operands);
23180 return;
23181 }
23182
23183 gcc_unreachable ();
23184 }
23185
23186 /* Emit the memory barrier instruction, if any, provided by this
23187 target. */
23188 const char *
23189 arm_output_memory_barrier (rtx *operands)
23190 {
23191 arm_process_output_memory_barrier (arm_emit, operands);
23192 return "";
23193 }
23194
23195 /* Helper to figure out the instruction suffix required on ldrex/strex
23196 for operations on an object of the specified mode. */
23197 static const char *
23198 arm_ldrex_suffix (enum machine_mode mode)
23199 {
23200 switch (mode)
23201 {
23202 case QImode: return "b";
23203 case HImode: return "h";
23204 case SImode: return "";
23205 case DImode: return "d";
23206 default:
23207 gcc_unreachable ();
23208 }
23209 return "";
23210 }
23211
23212 /* Emit an ldrex{b,h,d, } instruction appropriate for the specified
23213 mode. */
23214 static void
23215 arm_output_ldrex (emit_f emit,
23216 enum machine_mode mode,
23217 rtx target,
23218 rtx memory)
23219 {
23220 const char *suffix = arm_ldrex_suffix (mode);
23221 rtx operands[2];
23222
23223 operands[0] = target;
23224 operands[1] = memory;
23225 arm_output_asm_insn (emit, 0, operands, "ldrex%s\t%%0, %%C1", suffix);
23226 }
23227
23228 /* Emit a strex{b,h,d, } instruction appropriate for the specified
23229 mode. */
23230 static void
23231 arm_output_strex (emit_f emit,
23232 enum machine_mode mode,
23233 const char *cc,
23234 rtx result,
23235 rtx value,
23236 rtx memory)
23237 {
23238 const char *suffix = arm_ldrex_suffix (mode);
23239 rtx operands[3];
23240
23241 operands[0] = result;
23242 operands[1] = value;
23243 operands[2] = memory;
23244 arm_output_asm_insn (emit, 0, operands, "strex%s%s\t%%0, %%1, %%C2", suffix,
23245 cc);
23246 }
23247
23248 /* Helper to emit a two operand instruction. */
23249 static void
23250 arm_output_op2 (emit_f emit, const char *mnemonic, rtx d, rtx s)
23251 {
23252 rtx operands[2];
23253
23254 operands[0] = d;
23255 operands[1] = s;
23256 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1", mnemonic);
23257 }
23258
23259 /* Helper to emit a three operand instruction. */
23260 static void
23261 arm_output_op3 (emit_f emit, const char *mnemonic, rtx d, rtx a, rtx b)
23262 {
23263 rtx operands[3];
23264
23265 operands[0] = d;
23266 operands[1] = a;
23267 operands[2] = b;
23268 arm_output_asm_insn (emit, 0, operands, "%s\t%%0, %%1, %%2", mnemonic);
23269 }
23270
23271 /* Emit a load store exclusive synchronization loop.
23272
23273 do
23274 old_value = [mem]
23275 if old_value != required_value
23276 break;
23277 t1 = sync_op (old_value, new_value)
23278 [mem] = t1, t2 = [0|1]
23279 while ! t2
23280
23281 Note:
23282 t1 == t2 is not permitted
23283 t1 == old_value is permitted
23284
23285 required_value:
23286
23287 RTX register or const_int representing the required old_value for
23288 the modify to continue, if NULL no comparsion is performed. */
23289 static void
23290 arm_output_sync_loop (emit_f emit,
23291 enum machine_mode mode,
23292 rtx old_value,
23293 rtx memory,
23294 rtx required_value,
23295 rtx new_value,
23296 rtx t1,
23297 rtx t2,
23298 enum attr_sync_op sync_op,
23299 int early_barrier_required)
23300 {
23301 rtx operands[1];
23302
23303 gcc_assert (t1 != t2);
23304
23305 if (early_barrier_required)
23306 arm_process_output_memory_barrier (emit, NULL);
23307
23308 arm_output_asm_insn (emit, 1, operands, "%sLSYT%%=:", LOCAL_LABEL_PREFIX);
23309
23310 arm_output_ldrex (emit, mode, old_value, memory);
23311
23312 if (required_value)
23313 {
23314 rtx operands[2];
23315
23316 operands[0] = old_value;
23317 operands[1] = required_value;
23318 arm_output_asm_insn (emit, 0, operands, "cmp\t%%0, %%1");
23319 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYB%%=", LOCAL_LABEL_PREFIX);
23320 }
23321
23322 switch (sync_op)
23323 {
23324 case SYNC_OP_ADD:
23325 arm_output_op3 (emit, "add", t1, old_value, new_value);
23326 break;
23327
23328 case SYNC_OP_SUB:
23329 arm_output_op3 (emit, "sub", t1, old_value, new_value);
23330 break;
23331
23332 case SYNC_OP_IOR:
23333 arm_output_op3 (emit, "orr", t1, old_value, new_value);
23334 break;
23335
23336 case SYNC_OP_XOR:
23337 arm_output_op3 (emit, "eor", t1, old_value, new_value);
23338 break;
23339
23340 case SYNC_OP_AND:
23341 arm_output_op3 (emit,"and", t1, old_value, new_value);
23342 break;
23343
23344 case SYNC_OP_NAND:
23345 arm_output_op3 (emit, "and", t1, old_value, new_value);
23346 arm_output_op2 (emit, "mvn", t1, t1);
23347 break;
23348
23349 case SYNC_OP_NONE:
23350 t1 = new_value;
23351 break;
23352 }
23353
23354 if (t2)
23355 {
23356 arm_output_strex (emit, mode, "", t2, t1, memory);
23357 operands[0] = t2;
23358 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23359 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23360 LOCAL_LABEL_PREFIX);
23361 }
23362 else
23363 {
23364 /* Use old_value for the return value because for some operations
23365 the old_value can easily be restored. This saves one register. */
23366 arm_output_strex (emit, mode, "", old_value, t1, memory);
23367 operands[0] = old_value;
23368 arm_output_asm_insn (emit, 0, operands, "teq\t%%0, #0");
23369 arm_output_asm_insn (emit, 0, operands, "bne\t%sLSYT%%=",
23370 LOCAL_LABEL_PREFIX);
23371
23372 switch (sync_op)
23373 {
23374 case SYNC_OP_ADD:
23375 arm_output_op3 (emit, "sub", old_value, t1, new_value);
23376 break;
23377
23378 case SYNC_OP_SUB:
23379 arm_output_op3 (emit, "add", old_value, t1, new_value);
23380 break;
23381
23382 case SYNC_OP_XOR:
23383 arm_output_op3 (emit, "eor", old_value, t1, new_value);
23384 break;
23385
23386 case SYNC_OP_NONE:
23387 arm_output_op2 (emit, "mov", old_value, required_value);
23388 break;
23389
23390 default:
23391 gcc_unreachable ();
23392 }
23393 }
23394
23395 arm_process_output_memory_barrier (emit, NULL);
23396 arm_output_asm_insn (emit, 1, operands, "%sLSYB%%=:", LOCAL_LABEL_PREFIX);
23397 }
23398
23399 static rtx
23400 arm_get_sync_operand (rtx *operands, int index, rtx default_value)
23401 {
23402 if (index > 0)
23403 default_value = operands[index - 1];
23404
23405 return default_value;
23406 }
23407
23408 #define FETCH_SYNC_OPERAND(NAME, DEFAULT) \
23409 arm_get_sync_operand (operands, (int) get_attr_sync_##NAME (insn), DEFAULT);
23410
23411 /* Extract the operands for a synchroniztion instruction from the
23412 instructions attributes and emit the instruction. */
23413 static void
23414 arm_process_output_sync_insn (emit_f emit, rtx insn, rtx *operands)
23415 {
23416 rtx result, memory, required_value, new_value, t1, t2;
23417 int early_barrier;
23418 enum machine_mode mode;
23419 enum attr_sync_op sync_op;
23420
23421 result = FETCH_SYNC_OPERAND(result, 0);
23422 memory = FETCH_SYNC_OPERAND(memory, 0);
23423 required_value = FETCH_SYNC_OPERAND(required_value, 0);
23424 new_value = FETCH_SYNC_OPERAND(new_value, 0);
23425 t1 = FETCH_SYNC_OPERAND(t1, 0);
23426 t2 = FETCH_SYNC_OPERAND(t2, 0);
23427 early_barrier =
23428 get_attr_sync_release_barrier (insn) == SYNC_RELEASE_BARRIER_YES;
23429 sync_op = get_attr_sync_op (insn);
23430 mode = GET_MODE (memory);
23431
23432 arm_output_sync_loop (emit, mode, result, memory, required_value,
23433 new_value, t1, t2, sync_op, early_barrier);
23434 }
23435
23436 /* Emit a synchronization instruction loop. */
23437 const char *
23438 arm_output_sync_insn (rtx insn, rtx *operands)
23439 {
23440 arm_process_output_sync_insn (arm_emit, insn, operands);
23441 return "";
23442 }
23443
23444 /* Count the number of machine instruction that will be emitted for a
23445 synchronization instruction. Note that the emitter used does not
23446 emit instructions, it just counts instructions being carefull not
23447 to count labels. */
23448 unsigned int
23449 arm_sync_loop_insns (rtx insn, rtx *operands)
23450 {
23451 arm_insn_count = 0;
23452 arm_process_output_sync_insn (arm_count, insn, operands);
23453 return arm_insn_count;
23454 }
23455
23456 /* Helper to call a target sync instruction generator, dealing with
23457 the variation in operands required by the different generators. */
23458 static rtx
23459 arm_call_generator (struct arm_sync_generator *generator, rtx old_value,
23460 rtx memory, rtx required_value, rtx new_value)
23461 {
23462 switch (generator->op)
23463 {
23464 case arm_sync_generator_omn:
23465 gcc_assert (! required_value);
23466 return generator->u.omn (old_value, memory, new_value);
23467
23468 case arm_sync_generator_omrn:
23469 gcc_assert (required_value);
23470 return generator->u.omrn (old_value, memory, required_value, new_value);
23471 }
23472
23473 return NULL;
23474 }
23475
23476 /* Expand a synchronization loop. The synchronization loop is expanded
23477 as an opaque block of instructions in order to ensure that we do
23478 not subsequently get extraneous memory accesses inserted within the
23479 critical region. The exclusive access property of ldrex/strex is
23480 only guaranteed in there are no intervening memory accesses. */
23481 void
23482 arm_expand_sync (enum machine_mode mode,
23483 struct arm_sync_generator *generator,
23484 rtx target, rtx memory, rtx required_value, rtx new_value)
23485 {
23486 if (target == NULL)
23487 target = gen_reg_rtx (mode);
23488
23489 memory = arm_legitimize_sync_memory (memory);
23490 if (mode != SImode)
23491 {
23492 rtx load_temp = gen_reg_rtx (SImode);
23493
23494 if (required_value)
23495 required_value = convert_modes (SImode, mode, required_value, true);
23496
23497 new_value = convert_modes (SImode, mode, new_value, true);
23498 emit_insn (arm_call_generator (generator, load_temp, memory,
23499 required_value, new_value));
23500 emit_move_insn (target, gen_lowpart (mode, load_temp));
23501 }
23502 else
23503 {
23504 emit_insn (arm_call_generator (generator, target, memory, required_value,
23505 new_value));
23506 }
23507 }
23508
23509 static bool
23510 arm_vector_alignment_reachable (const_tree type, bool is_packed)
23511 {
23512 /* Vectors which aren't in packed structures will not be less aligned than
23513 the natural alignment of their element type, so this is safe. */
23514 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23515 return !is_packed;
23516
23517 return default_builtin_vector_alignment_reachable (type, is_packed);
23518 }
23519
23520 static bool
23521 arm_builtin_support_vector_misalignment (enum machine_mode mode,
23522 const_tree type, int misalignment,
23523 bool is_packed)
23524 {
23525 if (TARGET_NEON && !BYTES_BIG_ENDIAN)
23526 {
23527 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
23528
23529 if (is_packed)
23530 return align == 1;
23531
23532 /* If the misalignment is unknown, we should be able to handle the access
23533 so long as it is not to a member of a packed data structure. */
23534 if (misalignment == -1)
23535 return true;
23536
23537 /* Return true if the misalignment is a multiple of the natural alignment
23538 of the vector's element type. This is probably always going to be
23539 true in practice, since we've already established that this isn't a
23540 packed access. */
23541 return ((misalignment % align) == 0);
23542 }
23543
23544 return default_builtin_support_vector_misalignment (mode, type, misalignment,
23545 is_packed);
23546 }
23547
23548 static void
23549 arm_conditional_register_usage (void)
23550 {
23551 int regno;
23552
23553 if (TARGET_SOFT_FLOAT || TARGET_THUMB1 || !TARGET_FPA)
23554 {
23555 for (regno = FIRST_FPA_REGNUM;
23556 regno <= LAST_FPA_REGNUM; ++regno)
23557 fixed_regs[regno] = call_used_regs[regno] = 1;
23558 }
23559
23560 if (TARGET_THUMB1 && optimize_size)
23561 {
23562 /* When optimizing for size on Thumb-1, it's better not
23563 to use the HI regs, because of the overhead of
23564 stacking them. */
23565 for (regno = FIRST_HI_REGNUM;
23566 regno <= LAST_HI_REGNUM; ++regno)
23567 fixed_regs[regno] = call_used_regs[regno] = 1;
23568 }
23569
23570 /* The link register can be clobbered by any branch insn,
23571 but we have no way to track that at present, so mark
23572 it as unavailable. */
23573 if (TARGET_THUMB1)
23574 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
23575
23576 if (TARGET_32BIT && TARGET_HARD_FLOAT)
23577 {
23578 if (TARGET_MAVERICK)
23579 {
23580 for (regno = FIRST_FPA_REGNUM;
23581 regno <= LAST_FPA_REGNUM; ++ regno)
23582 fixed_regs[regno] = call_used_regs[regno] = 1;
23583 for (regno = FIRST_CIRRUS_FP_REGNUM;
23584 regno <= LAST_CIRRUS_FP_REGNUM; ++ regno)
23585 {
23586 fixed_regs[regno] = 0;
23587 call_used_regs[regno] = regno < FIRST_CIRRUS_FP_REGNUM + 4;
23588 }
23589 }
23590 if (TARGET_VFP)
23591 {
23592 /* VFPv3 registers are disabled when earlier VFP
23593 versions are selected due to the definition of
23594 LAST_VFP_REGNUM. */
23595 for (regno = FIRST_VFP_REGNUM;
23596 regno <= LAST_VFP_REGNUM; ++ regno)
23597 {
23598 fixed_regs[regno] = 0;
23599 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
23600 || regno >= FIRST_VFP_REGNUM + 32;
23601 }
23602 }
23603 }
23604
23605 if (TARGET_REALLY_IWMMXT)
23606 {
23607 regno = FIRST_IWMMXT_GR_REGNUM;
23608 /* The 2002/10/09 revision of the XScale ABI has wCG0
23609 and wCG1 as call-preserved registers. The 2002/11/21
23610 revision changed this so that all wCG registers are
23611 scratch registers. */
23612 for (regno = FIRST_IWMMXT_GR_REGNUM;
23613 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
23614 fixed_regs[regno] = 0;
23615 /* The XScale ABI has wR0 - wR9 as scratch registers,
23616 the rest as call-preserved registers. */
23617 for (regno = FIRST_IWMMXT_REGNUM;
23618 regno <= LAST_IWMMXT_REGNUM; ++ regno)
23619 {
23620 fixed_regs[regno] = 0;
23621 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
23622 }
23623 }
23624
23625 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
23626 {
23627 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23628 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
23629 }
23630 else if (TARGET_APCS_STACK)
23631 {
23632 fixed_regs[10] = 1;
23633 call_used_regs[10] = 1;
23634 }
23635 /* -mcaller-super-interworking reserves r11 for calls to
23636 _interwork_r11_call_via_rN(). Making the register global
23637 is an easy way of ensuring that it remains valid for all
23638 calls. */
23639 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
23640 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
23641 {
23642 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23643 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23644 if (TARGET_CALLER_INTERWORKING)
23645 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
23646 }
23647 SUBTARGET_CONDITIONAL_REGISTER_USAGE
23648 }
23649
23650 static reg_class_t
23651 arm_preferred_rename_class (reg_class_t rclass)
23652 {
23653 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
23654 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
23655 and code size can be reduced. */
23656 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
23657 return LO_REGS;
23658 else
23659 return NO_REGS;
23660 }
23661
21695 #include "gt-arm.h" 23662 #include "gt-arm.h"