Mercurial > hg > CbC > CbC_gcc
view gcc/config/arm/arm.c @ 158:494b0b89df80 default tip
...
author | Shinji KONO <kono@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 25 May 2020 18:13:55 +0900 |
parents | 1830386684a0 |
children |
line wrap: on
line source
/* Output routines for GCC for ARM. Copyright (C) 1991-2020 Free Software Foundation, Inc. Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) and Martin Simmons (@harleqn.co.uk). More major hacks by Richard Earnshaw (rearnsha@arm.com). This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ #define IN_TARGET_CODE 1 #include "config.h" #define INCLUDE_STRING #include "system.h" #include "coretypes.h" #include "backend.h" #include "target.h" #include "rtl.h" #include "tree.h" #include "memmodel.h" #include "cfghooks.h" #include "df.h" #include "tm_p.h" #include "stringpool.h" #include "attribs.h" #include "optabs.h" #include "regs.h" #include "emit-rtl.h" #include "recog.h" #include "cgraph.h" #include "diagnostic-core.h" #include "alias.h" #include "fold-const.h" #include "stor-layout.h" #include "calls.h" #include "varasm.h" #include "output.h" #include "insn-attr.h" #include "flags.h" #include "reload.h" #include "explow.h" #include "expr.h" #include "cfgrtl.h" #include "sched-int.h" #include "common/common-target.h" #include "langhooks.h" #include "intl.h" #include "libfuncs.h" #include "opts.h" #include "dumpfile.h" #include "target-globals.h" #include "builtins.h" #include "tm-constrs.h" #include "rtl-iter.h" #include "optabs-libfuncs.h" #include "gimplify.h" #include "gimple.h" #include "selftest.h" /* This file should be included last. */ #include "target-def.h" /* Forward definitions of types. */ typedef struct minipool_node Mnode; typedef struct minipool_fixup Mfix; /* The last .arch and .fpu assembly strings that we printed. */ static std::string arm_last_printed_arch_string; static std::string arm_last_printed_fpu_string; void (*arm_lang_output_object_attributes_hook)(void); struct four_ints { int i[4]; }; /* Forward function declarations. */ static bool arm_const_not_ok_for_debug_p (rtx); static int arm_needs_doubleword_align (machine_mode, const_tree); static int arm_compute_static_chain_stack_bytes (void); static arm_stack_offsets *arm_get_frame_offsets (void); static void arm_compute_frame_layout (void); static void arm_add_gc_roots (void); static int arm_gen_constant (enum rtx_code, machine_mode, rtx, unsigned HOST_WIDE_INT, rtx, rtx, int, int); static unsigned bit_count (unsigned long); static unsigned bitmap_popcount (const sbitmap); static int arm_address_register_rtx_p (rtx, int); static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int); static bool is_called_in_ARM_mode (tree); static int thumb2_legitimate_index_p (machine_mode, rtx, int); static int thumb1_base_register_rtx_p (rtx, machine_mode, int); static rtx arm_legitimize_address (rtx, rtx, machine_mode); static reg_class_t arm_preferred_reload_class (rtx, reg_class_t); static rtx thumb_legitimize_address (rtx, rtx, machine_mode); inline static int thumb1_index_register_rtx_p (rtx, int); static int thumb_far_jump_used_p (void); static bool thumb_force_lr_save (void); static unsigned arm_size_return_regs (void); static bool arm_assemble_integer (rtx, unsigned int, int); static void arm_print_operand (FILE *, rtx, int); static void arm_print_operand_address (FILE *, machine_mode, rtx); static bool arm_print_operand_punct_valid_p (unsigned char code); static const char *fp_const_from_val (REAL_VALUE_TYPE *); static arm_cc get_arm_condition_code (rtx); static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *); static const char *output_multi_immediate (rtx *, const char *, const char *, int, HOST_WIDE_INT); static const char *shift_op (rtx, HOST_WIDE_INT *); static struct machine_function *arm_init_machine_status (void); static void thumb_exit (FILE *, int); static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *); static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT); static Mnode *add_minipool_forward_ref (Mfix *); static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT); static Mnode *add_minipool_backward_ref (Mfix *); static void assign_minipool_offsets (Mfix *); static void arm_print_value (FILE *, rtx); static void dump_minipool (rtx_insn *); static int arm_barrier_cost (rtx_insn *); static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT); static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT); static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *, machine_mode, rtx); static void arm_reorg (void); static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int); static unsigned long arm_compute_save_reg0_reg12_mask (void); static unsigned long arm_compute_save_core_reg_mask (void); static unsigned long arm_isr_value (tree); static unsigned long arm_compute_func_type (void); static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *); static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *); static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *); #if TARGET_DLLIMPORT_DECL_ATTRIBUTES static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *); #endif static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *); static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *); static void arm_output_function_epilogue (FILE *); static void arm_output_function_prologue (FILE *); static int arm_comp_type_attributes (const_tree, const_tree); static void arm_set_default_type_attributes (tree); static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int); static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int); static int optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, struct four_ints *return_sequence); static int optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val, struct four_ints *return_sequence, int i); static int arm_get_strip_length (int); static bool arm_function_ok_for_sibcall (tree, tree); static machine_mode arm_promote_function_mode (const_tree, machine_mode, int *, const_tree, int); static bool arm_return_in_memory (const_tree, const_tree); static rtx arm_function_value (const_tree, const_tree, bool); static rtx arm_libcall_value_1 (machine_mode); static rtx arm_libcall_value (machine_mode, const_rtx); static bool arm_function_value_regno_p (const unsigned int); static void arm_internal_label (FILE *, const char *, unsigned long); static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree); static bool arm_have_conditional_execution (void); static bool arm_cannot_force_const_mem (machine_mode, rtx); static bool arm_legitimate_constant_p (machine_mode, rtx); static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool); static int arm_insn_cost (rtx_insn *, bool); static int arm_address_cost (rtx, machine_mode, addr_space_t, bool); static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t); static int arm_memory_move_cost (machine_mode, reg_class_t, bool); static void emit_constant_insn (rtx cond, rtx pattern); static rtx_insn *emit_set_insn (rtx, rtx); static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx); static rtx emit_multi_reg_push (unsigned long, unsigned long); static void arm_emit_multi_reg_pop (unsigned long); static int vfp_emit_fstmd (int, int); static void arm_emit_vfp_multi_reg_pop (int, int, rtx); static int arm_arg_partial_bytes (cumulative_args_t, const function_arg_info &); static rtx arm_function_arg (cumulative_args_t, const function_arg_info &); static void arm_function_arg_advance (cumulative_args_t, const function_arg_info &); static pad_direction arm_function_arg_padding (machine_mode, const_tree); static unsigned int arm_function_arg_boundary (machine_mode, const_tree); static rtx aapcs_allocate_return_reg (machine_mode, const_tree, const_tree); static rtx aapcs_libcall_value (machine_mode); static int aapcs_select_return_coproc (const_tree, const_tree); #ifdef OBJECT_FORMAT_ELF static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED; static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED; #endif #ifndef ARM_PE static void arm_encode_section_info (tree, rtx, int); #endif static void arm_file_end (void); static void arm_file_start (void); static void arm_insert_attributes (tree, tree *); static void arm_setup_incoming_varargs (cumulative_args_t, const function_arg_info &, int *, int); static bool arm_pass_by_reference (cumulative_args_t, const function_arg_info &); static bool arm_promote_prototypes (const_tree); static bool arm_default_short_enums (void); static bool arm_align_anon_bitfield (void); static bool arm_return_in_msb (const_tree); static bool arm_must_pass_in_stack (const function_arg_info &); static bool arm_return_in_memory (const_tree, const_tree); #if ARM_UNWIND_INFO static void arm_unwind_emit (FILE *, rtx_insn *); static bool arm_output_ttype (rtx); static void arm_asm_emit_except_personality (rtx); #endif static void arm_asm_init_sections (void); static rtx arm_dwarf_register_span (rtx); static tree arm_cxx_guard_type (void); static bool arm_cxx_guard_mask_bit (void); static tree arm_get_cookie_size (tree); static bool arm_cookie_has_size (void); static bool arm_cxx_cdtor_returns_this (void); static bool arm_cxx_key_method_may_be_inline (void); static void arm_cxx_determine_class_data_visibility (tree); static bool arm_cxx_class_data_always_comdat (void); static bool arm_cxx_use_aeabi_atexit (void); static void arm_init_libfuncs (void); static tree arm_build_builtin_va_list (void); static void arm_expand_builtin_va_start (tree, rtx); static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *); static void arm_option_override (void); static void arm_option_save (struct cl_target_option *, struct gcc_options *); static void arm_option_restore (struct gcc_options *, struct cl_target_option *); static void arm_override_options_after_change (void); static void arm_option_print (FILE *, int, struct cl_target_option *); static void arm_set_current_function (tree); static bool arm_can_inline_p (tree, tree); static void arm_relayout_function (tree); static bool arm_valid_target_attribute_p (tree, tree, tree, int); static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode); static bool arm_sched_can_speculate_insn (rtx_insn *); static bool arm_macro_fusion_p (void); static bool arm_cannot_copy_insn_p (rtx_insn *); static int arm_issue_rate (void); static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int); static int arm_first_cycle_multipass_dfa_lookahead (void); static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int); static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED; static bool arm_output_addr_const_extra (FILE *, rtx); static bool arm_allocate_stack_slots_for_args (void); static bool arm_warn_func_return (tree); static tree arm_promoted_type (const_tree t); static bool arm_scalar_mode_supported_p (scalar_mode); static bool arm_frame_pointer_required (void); static bool arm_can_eliminate (const int, const int); static void arm_asm_trampoline_template (FILE *); static void arm_trampoline_init (rtx, tree, rtx); static rtx arm_trampoline_adjust_address (rtx); static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg); static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *); static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *); static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *); static bool arm_array_mode_supported_p (machine_mode, unsigned HOST_WIDE_INT); static machine_mode arm_preferred_simd_mode (scalar_mode); static bool arm_class_likely_spilled_p (reg_class_t); static HOST_WIDE_INT arm_vector_alignment (const_tree type); static bool arm_vector_alignment_reachable (const_tree type, bool is_packed); static bool arm_builtin_support_vector_misalignment (machine_mode mode, const_tree type, int misalignment, bool is_packed); static void arm_conditional_register_usage (void); static enum flt_eval_method arm_excess_precision (enum excess_precision_type); static reg_class_t arm_preferred_rename_class (reg_class_t rclass); static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool); static int arm_default_branch_cost (bool, bool); static int arm_cortex_a5_branch_cost (bool, bool); static int arm_cortex_m_branch_cost (bool, bool); static int arm_cortex_m7_branch_cost (bool, bool); static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx, const vec_perm_indices &); static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*); static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int misalign ATTRIBUTE_UNUSED); static unsigned arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, struct _stmt_vec_info *stmt_info, int misalign, enum vect_cost_model_location where); static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value); static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void); static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*); static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT, const_tree); static section *arm_function_section (tree, enum node_frequency, bool, bool); static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num); static unsigned int arm_elf_section_type_flags (tree decl, const char *name, int reloc); static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *); static opt_scalar_float_mode arm_floatn_mode (int, bool); static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode); static bool arm_hard_regno_mode_ok (unsigned int, machine_mode); static bool arm_modes_tieable_p (machine_mode, machine_mode); static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT); static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &, vec<const char *> &, vec<rtx> &, HARD_REG_SET &); /* Table of machine attributes. */ static const struct attribute_spec arm_attribute_table[] = { /* { name, min_len, max_len, decl_req, type_req, fn_type_req, affects_type_identity, handler, exclude } */ /* Function calls made to this symbol must be done indirectly, because it may lie outside of the 26 bit addressing range of a normal function call. */ { "long_call", 0, 0, false, true, true, false, NULL, NULL }, /* Whereas these functions are always known to reside within the 26 bit addressing range. */ { "short_call", 0, 0, false, true, true, false, NULL, NULL }, /* Specify the procedure call conventions for a function. */ { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute, NULL }, /* Interrupt Service Routines have special prologue and epilogue requirements. */ { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute, NULL }, { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute, NULL }, { "naked", 0, 0, true, false, false, false, arm_handle_fndecl_attribute, NULL }, #ifdef ARM_PE /* ARM/PE has three new attributes: interfacearm - ? dllexport - for exporting a function/variable that will live in a dll dllimport - for importing a function/variable from a dll Microsoft allows multiple declspecs in one __declspec, separating them with spaces. We do NOT support this. Instead, use __declspec multiple times. */ { "dllimport", 0, 0, true, false, false, false, NULL, NULL }, { "dllexport", 0, 0, true, false, false, false, NULL, NULL }, { "interfacearm", 0, 0, true, false, false, false, arm_handle_fndecl_attribute, NULL }, #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute, NULL }, { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute, NULL }, { "notshared", 0, 0, false, true, false, false, arm_handle_notshared_attribute, NULL }, #endif /* ARMv8-M Security Extensions support. */ { "cmse_nonsecure_entry", 0, 0, true, false, false, false, arm_handle_cmse_nonsecure_entry, NULL }, { "cmse_nonsecure_call", 0, 0, true, false, false, true, arm_handle_cmse_nonsecure_call, NULL }, { NULL, 0, 0, false, false, false, false, NULL, NULL } }; /* Initialize the GCC target structure. */ #if TARGET_DLLIMPORT_DECL_ATTRIBUTES #undef TARGET_MERGE_DECL_ATTRIBUTES #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes #endif #undef TARGET_CHECK_BUILTIN_CALL #define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call #undef TARGET_LEGITIMIZE_ADDRESS #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address #undef TARGET_ATTRIBUTE_TABLE #define TARGET_ATTRIBUTE_TABLE arm_attribute_table #undef TARGET_INSERT_ATTRIBUTES #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes #undef TARGET_ASM_FILE_START #define TARGET_ASM_FILE_START arm_file_start #undef TARGET_ASM_FILE_END #define TARGET_ASM_FILE_END arm_file_end #undef TARGET_ASM_ALIGNED_SI_OP #define TARGET_ASM_ALIGNED_SI_OP NULL #undef TARGET_ASM_INTEGER #define TARGET_ASM_INTEGER arm_assemble_integer #undef TARGET_PRINT_OPERAND #define TARGET_PRINT_OPERAND arm_print_operand #undef TARGET_PRINT_OPERAND_ADDRESS #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra #undef TARGET_ASM_FUNCTION_PROLOGUE #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue #undef TARGET_ASM_FUNCTION_EPILOGUE #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue #undef TARGET_CAN_INLINE_P #define TARGET_CAN_INLINE_P arm_can_inline_p #undef TARGET_RELAYOUT_FUNCTION #define TARGET_RELAYOUT_FUNCTION arm_relayout_function #undef TARGET_OPTION_OVERRIDE #define TARGET_OPTION_OVERRIDE arm_option_override #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change #undef TARGET_OPTION_SAVE #define TARGET_OPTION_SAVE arm_option_save #undef TARGET_OPTION_RESTORE #define TARGET_OPTION_RESTORE arm_option_restore #undef TARGET_OPTION_PRINT #define TARGET_OPTION_PRINT arm_option_print #undef TARGET_COMP_TYPE_ATTRIBUTES #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes #undef TARGET_SCHED_CAN_SPECULATE_INSN #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn #undef TARGET_SCHED_MACRO_FUSION_P #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p #undef TARGET_SCHED_MACRO_FUSION_PAIR_P #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes #undef TARGET_SCHED_ADJUST_COST #define TARGET_SCHED_ADJUST_COST arm_adjust_cost #undef TARGET_SET_CURRENT_FUNCTION #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function #undef TARGET_OPTION_VALID_ATTRIBUTE_P #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p #undef TARGET_SCHED_REORDER #define TARGET_SCHED_REORDER arm_sched_reorder #undef TARGET_REGISTER_MOVE_COST #define TARGET_REGISTER_MOVE_COST arm_register_move_cost #undef TARGET_MEMORY_MOVE_COST #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost #undef TARGET_ENCODE_SECTION_INFO #ifdef ARM_PE #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info #else #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info #endif #undef TARGET_STRIP_NAME_ENCODING #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding #undef TARGET_ASM_INTERNAL_LABEL #define TARGET_ASM_INTERNAL_LABEL arm_internal_label #undef TARGET_FLOATN_MODE #define TARGET_FLOATN_MODE arm_floatn_mode #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall #undef TARGET_FUNCTION_VALUE #define TARGET_FUNCTION_VALUE arm_function_value #undef TARGET_LIBCALL_VALUE #define TARGET_LIBCALL_VALUE arm_libcall_value #undef TARGET_FUNCTION_VALUE_REGNO_P #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p #undef TARGET_ASM_OUTPUT_MI_THUNK #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk #undef TARGET_RTX_COSTS #define TARGET_RTX_COSTS arm_rtx_costs #undef TARGET_ADDRESS_COST #define TARGET_ADDRESS_COST arm_address_cost #undef TARGET_INSN_COST #define TARGET_INSN_COST arm_insn_cost #undef TARGET_SHIFT_TRUNCATION_MASK #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask #undef TARGET_VECTOR_MODE_SUPPORTED_P #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p #undef TARGET_ARRAY_MODE_SUPPORTED_P #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \ arm_autovectorize_vector_modes #undef TARGET_MACHINE_DEPENDENT_REORG #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg #undef TARGET_INIT_BUILTINS #define TARGET_INIT_BUILTINS arm_init_builtins #undef TARGET_EXPAND_BUILTIN #define TARGET_EXPAND_BUILTIN arm_expand_builtin #undef TARGET_BUILTIN_DECL #define TARGET_BUILTIN_DECL arm_builtin_decl #undef TARGET_INIT_LIBFUNCS #define TARGET_INIT_LIBFUNCS arm_init_libfuncs #undef TARGET_PROMOTE_FUNCTION_MODE #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode #undef TARGET_PROMOTE_PROTOTYPES #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes #undef TARGET_PASS_BY_REFERENCE #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference #undef TARGET_ARG_PARTIAL_BYTES #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes #undef TARGET_FUNCTION_ARG #define TARGET_FUNCTION_ARG arm_function_arg #undef TARGET_FUNCTION_ARG_ADVANCE #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance #undef TARGET_FUNCTION_ARG_PADDING #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding #undef TARGET_FUNCTION_ARG_BOUNDARY #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary #undef TARGET_SETUP_INCOMING_VARARGS #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args #undef TARGET_ASM_TRAMPOLINE_TEMPLATE #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template #undef TARGET_TRAMPOLINE_INIT #define TARGET_TRAMPOLINE_INIT arm_trampoline_init #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address #undef TARGET_WARN_FUNC_RETURN #define TARGET_WARN_FUNC_RETURN arm_warn_func_return #undef TARGET_DEFAULT_SHORT_ENUMS #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums #undef TARGET_ALIGN_ANON_BITFIELD #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield #undef TARGET_NARROW_VOLATILE_BITFIELD #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false #undef TARGET_CXX_GUARD_TYPE #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type #undef TARGET_CXX_GUARD_MASK_BIT #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit #undef TARGET_CXX_GET_COOKIE_SIZE #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size #undef TARGET_CXX_COOKIE_HAS_SIZE #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size #undef TARGET_CXX_CDTOR_RETURNS_THIS #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline #undef TARGET_CXX_USE_AEABI_ATEXIT #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \ arm_cxx_determine_class_data_visibility #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat #undef TARGET_RETURN_IN_MSB #define TARGET_RETURN_IN_MSB arm_return_in_msb #undef TARGET_RETURN_IN_MEMORY #define TARGET_RETURN_IN_MEMORY arm_return_in_memory #undef TARGET_MUST_PASS_IN_STACK #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack #if ARM_UNWIND_INFO #undef TARGET_ASM_UNWIND_EMIT #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit /* EABI unwinding tables use a different format for the typeinfo tables. */ #undef TARGET_ASM_TTYPE #define TARGET_ASM_TTYPE arm_output_ttype #undef TARGET_ARM_EABI_UNWINDER #define TARGET_ARM_EABI_UNWINDER true #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality #endif /* ARM_UNWIND_INFO */ #undef TARGET_ASM_INIT_SECTIONS #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections #undef TARGET_DWARF_REGISTER_SPAN #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span #undef TARGET_CANNOT_COPY_INSN_P #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p #ifdef HAVE_AS_TLS #undef TARGET_HAVE_TLS #define TARGET_HAVE_TLS true #endif #undef TARGET_HAVE_CONDITIONAL_EXECUTION #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution #undef TARGET_LEGITIMATE_CONSTANT_P #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p #undef TARGET_CANNOT_FORCE_CONST_MEM #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem #undef TARGET_MAX_ANCHOR_OFFSET #define TARGET_MAX_ANCHOR_OFFSET 4095 /* The minimum is set such that the total size of the block for a particular anchor is -4088 + 1 + 4095 bytes, which is divisible by eight, ensuring natural spacing of anchors. */ #undef TARGET_MIN_ANCHOR_OFFSET #define TARGET_MIN_ANCHOR_OFFSET -4088 #undef TARGET_SCHED_ISSUE_RATE #define TARGET_SCHED_ISSUE_RATE arm_issue_rate #undef TARGET_SCHED_VARIABLE_ISSUE #define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ arm_first_cycle_multipass_dfa_lookahead #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \ arm_first_cycle_multipass_dfa_lookahead_guard #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE arm_mangle_type #undef TARGET_INVALID_CONVERSION #define TARGET_INVALID_CONVERSION arm_invalid_conversion #undef TARGET_INVALID_UNARY_OP #define TARGET_INVALID_UNARY_OP arm_invalid_unary_op #undef TARGET_INVALID_BINARY_OP #define TARGET_INVALID_BINARY_OP arm_invalid_binary_op #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv #undef TARGET_BUILD_BUILTIN_VA_LIST #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list #undef TARGET_EXPAND_BUILTIN_VA_START #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start #undef TARGET_GIMPLIFY_VA_ARG_EXPR #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr #ifdef HAVE_AS_TLS #undef TARGET_ASM_OUTPUT_DWARF_DTPREL #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel #endif #undef TARGET_LEGITIMATE_ADDRESS_P #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p #undef TARGET_PREFERRED_RELOAD_CLASS #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class #undef TARGET_PROMOTED_TYPE #define TARGET_PROMOTED_TYPE arm_promoted_type #undef TARGET_SCALAR_MODE_SUPPORTED_P #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p #undef TARGET_COMPUTE_FRAME_LAYOUT #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout #undef TARGET_FRAME_POINTER_REQUIRED #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required #undef TARGET_CAN_ELIMINATE #define TARGET_CAN_ELIMINATE arm_can_eliminate #undef TARGET_CONDITIONAL_REGISTER_USAGE #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage #undef TARGET_CLASS_LIKELY_SPILLED_P #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p #undef TARGET_VECTORIZE_BUILTINS #define TARGET_VECTORIZE_BUILTINS #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ arm_builtin_vectorized_function #undef TARGET_VECTOR_ALIGNMENT #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \ arm_vector_alignment_reachable #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \ arm_builtin_support_vector_misalignment #undef TARGET_PREFERRED_RENAME_CLASS #define TARGET_PREFERRED_RENAME_CLASS \ arm_preferred_rename_class #undef TARGET_VECTORIZE_VEC_PERM_CONST #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ arm_builtin_vectorization_cost #undef TARGET_VECTORIZE_ADD_STMT_COST #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost #undef TARGET_CANONICALIZE_COMPARISON #define TARGET_CANONICALIZE_COMPARISON \ arm_canonicalize_comparison #undef TARGET_ASAN_SHADOW_OFFSET #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset #undef MAX_INSN_PER_IT_BLOCK #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4) #undef TARGET_CAN_USE_DOLOOP_P #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true #undef TARGET_SCHED_FUSION_PRIORITY #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority #undef TARGET_ASM_FUNCTION_SECTION #define TARGET_ASM_FUNCTION_SECTION arm_function_section #undef TARGET_ASM_ELF_FLAGS_NUMERIC #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric #undef TARGET_SECTION_TYPE_FLAGS #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags #undef TARGET_EXPAND_DIVMOD_LIBFUNC #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc #undef TARGET_C_EXCESS_PRECISION #define TARGET_C_EXCESS_PRECISION arm_excess_precision /* Although the architecture reserves bits 0 and 1, only the former is used for ARM/Thumb ISA selection in v7 and earlier versions. */ #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2 #undef TARGET_FIXED_CONDITION_CODE_REGS #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs #undef TARGET_HARD_REGNO_NREGS #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs #undef TARGET_HARD_REGNO_MODE_OK #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok #undef TARGET_MODES_TIEABLE_P #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p #undef TARGET_CAN_CHANGE_MODE_CLASS #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment #undef TARGET_MD_ASM_ADJUST #define TARGET_MD_ASM_ADJUST arm_md_asm_adjust /* Obstack for minipool constant handling. */ static struct obstack minipool_obstack; static char * minipool_startobj; /* The maximum number of insns skipped which will be conditionalised if possible. */ static int max_insns_skipped = 5; extern FILE * asm_out_file; /* True if we are currently building a constant table. */ int making_const_table; /* The processor for which instructions should be scheduled. */ enum processor_type arm_tune = TARGET_CPU_arm_none; /* The current tuning set. */ const struct tune_params *current_tune; /* Which floating point hardware to schedule for. */ int arm_fpu_attr; /* Used for Thumb call_via trampolines. */ rtx thumb_call_via_label[14]; static int thumb_call_reg_needed; /* The bits in this mask specify which instruction scheduling options should be used. */ unsigned int tune_flags = 0; /* The highest ARM architecture version supported by the target. */ enum base_architecture arm_base_arch = BASE_ARCH_0; /* Active target architecture and tuning. */ struct arm_build_target arm_active_target; /* The following are used in the arm.md file as equivalents to bits in the above two flag variables. */ /* Nonzero if this chip supports the ARM Architecture 4 extensions. */ int arm_arch4 = 0; /* Nonzero if this chip supports the ARM Architecture 4t extensions. */ int arm_arch4t = 0; /* Nonzero if this chip supports the ARM Architecture 5T extensions. */ int arm_arch5t = 0; /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */ int arm_arch5te = 0; /* Nonzero if this chip supports the ARM Architecture 6 extensions. */ int arm_arch6 = 0; /* Nonzero if this chip supports the ARM 6K extensions. */ int arm_arch6k = 0; /* Nonzero if this chip supports the ARM 6KZ extensions. */ int arm_arch6kz = 0; /* Nonzero if instructions present in ARMv6-M can be used. */ int arm_arch6m = 0; /* Nonzero if this chip supports the ARM 7 extensions. */ int arm_arch7 = 0; /* Nonzero if this chip supports the Large Physical Address Extension. */ int arm_arch_lpae = 0; /* Nonzero if instructions not present in the 'M' profile can be used. */ int arm_arch_notm = 0; /* Nonzero if instructions present in ARMv7E-M can be used. */ int arm_arch7em = 0; /* Nonzero if instructions present in ARMv8 can be used. */ int arm_arch8 = 0; /* Nonzero if this chip supports the ARMv8.1 extensions. */ int arm_arch8_1 = 0; /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */ int arm_arch8_2 = 0; /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */ int arm_arch8_3 = 0; /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */ int arm_arch8_4 = 0; /* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline extensions. */ int arm_arch8_1m_main = 0; /* Nonzero if this chip supports the FP16 instructions extension of ARM Architecture 8.2. */ int arm_fp16_inst = 0; /* Nonzero if this chip can benefit from load scheduling. */ int arm_ld_sched = 0; /* Nonzero if this chip is a StrongARM. */ int arm_tune_strongarm = 0; /* Nonzero if this chip supports Intel Wireless MMX technology. */ int arm_arch_iwmmxt = 0; /* Nonzero if this chip supports Intel Wireless MMX2 technology. */ int arm_arch_iwmmxt2 = 0; /* Nonzero if this chip is an XScale. */ int arm_arch_xscale = 0; /* Nonzero if tuning for XScale */ int arm_tune_xscale = 0; /* Nonzero if we want to tune for stores that access the write-buffer. This typically means an ARM6 or ARM7 with MMU or MPU. */ int arm_tune_wbuf = 0; /* Nonzero if tuning for Cortex-A9. */ int arm_tune_cortex_a9 = 0; /* Nonzero if we should define __THUMB_INTERWORK__ in the preprocessor. XXX This is a bit of a hack, it's intended to help work around problems in GLD which doesn't understand that armv5t code is interworking clean. */ int arm_cpp_interwork = 0; /* Nonzero if chip supports Thumb 1. */ int arm_arch_thumb1; /* Nonzero if chip supports Thumb 2. */ int arm_arch_thumb2; /* Nonzero if chip supports integer division instruction. */ int arm_arch_arm_hwdiv; int arm_arch_thumb_hwdiv; /* Nonzero if chip disallows volatile memory access in IT block. */ int arm_arch_no_volatile_ce; /* Nonzero if we shouldn't use literal pools. */ bool arm_disable_literal_pool = false; /* The register number to be used for the PIC offset register. */ unsigned arm_pic_register = INVALID_REGNUM; enum arm_pcs arm_pcs_default; /* For an explanation of these variables, see final_prescan_insn below. */ int arm_ccfsm_state; /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */ enum arm_cond_code arm_current_cc; rtx arm_target_insn; int arm_target_label; /* The number of conditionally executed insns, including the current insn. */ int arm_condexec_count = 0; /* A bitmask specifying the patterns for the IT block. Zero means do not output an IT block before this insn. */ int arm_condexec_mask = 0; /* The number of bits used in arm_condexec_mask. */ int arm_condexec_masklen = 0; /* Nonzero if chip supports the ARMv8 CRC instructions. */ int arm_arch_crc = 0; /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */ int arm_arch_dotprod = 0; /* Nonzero if chip supports the ARMv8-M security extensions. */ int arm_arch_cmse = 0; /* Nonzero if the core has a very small, high-latency, multiply unit. */ int arm_m_profile_small_mul = 0; /* Nonzero if chip supports the AdvSIMD I8MM instructions. */ int arm_arch_i8mm = 0; /* Nonzero if chip supports the BFloat16 instructions. */ int arm_arch_bf16 = 0; /* The condition codes of the ARM, and the inverse function. */ static const char * const arm_condition_codes[] = { "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc", "hi", "ls", "ge", "lt", "gt", "le", "al", "nv" }; /* The register numbers in sequence, for passing to arm_gen_load_multiple. */ int arm_regs_in_sequence[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; #define DEF_FP_SYSREG(reg) #reg, const char *fp_sysreg_names[NB_FP_SYSREGS] = { FP_SYSREGS }; #undef DEF_FP_SYSREG #define ARM_LSL_NAME "lsl" #define streq(string1, string2) (strcmp (string1, string2) == 0) #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \ | (1 << SP_REGNUM) | (1 << PC_REGNUM) \ | (1 << PIC_OFFSET_TABLE_REGNUM))) /* Initialization code. */ struct cpu_tune { enum processor_type scheduler; unsigned int tune_flags; const struct tune_params *tune; }; #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 } #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \ { \ num_slots, \ l1_size, \ l1_line_size \ } /* arm generic vectorizer costs. */ static const struct cpu_vec_costs arm_default_vec_cost = { 1, /* scalar_stmt_cost. */ 1, /* scalar load_cost. */ 1, /* scalar_store_cost. */ 1, /* vec_stmt_cost. */ 1, /* vec_to_scalar_cost. */ 1, /* scalar_to_vec_cost. */ 1, /* vec_align_load_cost. */ 1, /* vec_unalign_load_cost. */ 1, /* vec_unalign_store_cost. */ 1, /* vec_store_cost. */ 3, /* cond_taken_branch_cost. */ 1, /* cond_not_taken_branch_cost. */ }; /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */ #include "aarch-cost-tables.h" const struct cpu_cost_table cortexa9_extra_costs = { /* ALU */ { 0, /* arith. */ 0, /* logical. */ 0, /* shift. */ COSTS_N_INSNS (1), /* shift_reg. */ COSTS_N_INSNS (1), /* arith_shift. */ COSTS_N_INSNS (2), /* arith_shift_reg. */ 0, /* log_shift. */ COSTS_N_INSNS (1), /* log_shift_reg. */ COSTS_N_INSNS (1), /* extend. */ COSTS_N_INSNS (2), /* extend_arith. */ COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ 0, /* clz. */ 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, { /* MULT SImode */ { COSTS_N_INSNS (3), /* simple. */ COSTS_N_INSNS (3), /* flag_setting. */ COSTS_N_INSNS (2), /* extend. */ COSTS_N_INSNS (3), /* add. */ COSTS_N_INSNS (2), /* extend_add. */ COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */ }, /* MULT DImode */ { 0, /* simple (N/A). */ 0, /* flag_setting (N/A). */ COSTS_N_INSNS (4), /* extend. */ 0, /* add (N/A). */ COSTS_N_INSNS (4), /* extend_add. */ 0 /* idiv (N/A). */ } }, /* LD/ST */ { COSTS_N_INSNS (2), /* load. */ COSTS_N_INSNS (2), /* load_sign_extend. */ COSTS_N_INSNS (2), /* ldrd. */ COSTS_N_INSNS (2), /* ldm_1st. */ 1, /* ldm_regs_per_insn_1st. */ 2, /* ldm_regs_per_insn_subsequent. */ COSTS_N_INSNS (5), /* loadf. */ COSTS_N_INSNS (5), /* loadd. */ COSTS_N_INSNS (1), /* load_unaligned. */ COSTS_N_INSNS (2), /* store. */ COSTS_N_INSNS (2), /* strd. */ COSTS_N_INSNS (2), /* stm_1st. */ 1, /* stm_regs_per_insn_1st. */ 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (1), /* storef. */ COSTS_N_INSNS (1), /* stored. */ COSTS_N_INSNS (1), /* store_unaligned. */ COSTS_N_INSNS (1), /* loadv. */ COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ { COSTS_N_INSNS (14), /* div. */ COSTS_N_INSNS (4), /* mult. */ COSTS_N_INSNS (7), /* mult_addsub. */ COSTS_N_INSNS (30), /* fma. */ COSTS_N_INSNS (3), /* addsub. */ COSTS_N_INSNS (1), /* fpconst. */ COSTS_N_INSNS (1), /* neg. */ COSTS_N_INSNS (3), /* compare. */ COSTS_N_INSNS (3), /* widen. */ COSTS_N_INSNS (3), /* narrow. */ COSTS_N_INSNS (3), /* toint. */ COSTS_N_INSNS (3), /* fromint. */ COSTS_N_INSNS (3) /* roundint. */ }, /* FP DFmode */ { COSTS_N_INSNS (24), /* div. */ COSTS_N_INSNS (5), /* mult. */ COSTS_N_INSNS (8), /* mult_addsub. */ COSTS_N_INSNS (30), /* fma. */ COSTS_N_INSNS (3), /* addsub. */ COSTS_N_INSNS (1), /* fpconst. */ COSTS_N_INSNS (1), /* neg. */ COSTS_N_INSNS (3), /* compare. */ COSTS_N_INSNS (3), /* widen. */ COSTS_N_INSNS (3), /* narrow. */ COSTS_N_INSNS (3), /* toint. */ COSTS_N_INSNS (3), /* fromint. */ COSTS_N_INSNS (3) /* roundint. */ } }, /* Vector */ { COSTS_N_INSNS (1) /* alu. */ } }; const struct cpu_cost_table cortexa8_extra_costs = { /* ALU */ { 0, /* arith. */ 0, /* logical. */ COSTS_N_INSNS (1), /* shift. */ 0, /* shift_reg. */ COSTS_N_INSNS (1), /* arith_shift. */ 0, /* arith_shift_reg. */ COSTS_N_INSNS (1), /* log_shift. */ 0, /* log_shift_reg. */ 0, /* extend. */ 0, /* extend_arith. */ 0, /* bfi. */ 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, { /* MULT SImode */ { COSTS_N_INSNS (1), /* simple. */ COSTS_N_INSNS (1), /* flag_setting. */ COSTS_N_INSNS (1), /* extend. */ COSTS_N_INSNS (1), /* add. */ COSTS_N_INSNS (1), /* extend_add. */ COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */ }, /* MULT DImode */ { 0, /* simple (N/A). */ 0, /* flag_setting (N/A). */ COSTS_N_INSNS (2), /* extend. */ 0, /* add (N/A). */ COSTS_N_INSNS (2), /* extend_add. */ 0 /* idiv (N/A). */ } }, /* LD/ST */ { COSTS_N_INSNS (1), /* load. */ COSTS_N_INSNS (1), /* load_sign_extend. */ COSTS_N_INSNS (1), /* ldrd. */ COSTS_N_INSNS (1), /* ldm_1st. */ 1, /* ldm_regs_per_insn_1st. */ 2, /* ldm_regs_per_insn_subsequent. */ COSTS_N_INSNS (1), /* loadf. */ COSTS_N_INSNS (1), /* loadd. */ COSTS_N_INSNS (1), /* load_unaligned. */ COSTS_N_INSNS (1), /* store. */ COSTS_N_INSNS (1), /* strd. */ COSTS_N_INSNS (1), /* stm_1st. */ 1, /* stm_regs_per_insn_1st. */ 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (1), /* storef. */ COSTS_N_INSNS (1), /* stored. */ COSTS_N_INSNS (1), /* store_unaligned. */ COSTS_N_INSNS (1), /* loadv. */ COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ { COSTS_N_INSNS (36), /* div. */ COSTS_N_INSNS (11), /* mult. */ COSTS_N_INSNS (20), /* mult_addsub. */ COSTS_N_INSNS (30), /* fma. */ COSTS_N_INSNS (9), /* addsub. */ COSTS_N_INSNS (3), /* fpconst. */ COSTS_N_INSNS (3), /* neg. */ COSTS_N_INSNS (6), /* compare. */ COSTS_N_INSNS (4), /* widen. */ COSTS_N_INSNS (4), /* narrow. */ COSTS_N_INSNS (8), /* toint. */ COSTS_N_INSNS (8), /* fromint. */ COSTS_N_INSNS (8) /* roundint. */ }, /* FP DFmode */ { COSTS_N_INSNS (64), /* div. */ COSTS_N_INSNS (16), /* mult. */ COSTS_N_INSNS (25), /* mult_addsub. */ COSTS_N_INSNS (30), /* fma. */ COSTS_N_INSNS (9), /* addsub. */ COSTS_N_INSNS (3), /* fpconst. */ COSTS_N_INSNS (3), /* neg. */ COSTS_N_INSNS (6), /* compare. */ COSTS_N_INSNS (6), /* widen. */ COSTS_N_INSNS (6), /* narrow. */ COSTS_N_INSNS (8), /* toint. */ COSTS_N_INSNS (8), /* fromint. */ COSTS_N_INSNS (8) /* roundint. */ } }, /* Vector */ { COSTS_N_INSNS (1) /* alu. */ } }; const struct cpu_cost_table cortexa5_extra_costs = { /* ALU */ { 0, /* arith. */ 0, /* logical. */ COSTS_N_INSNS (1), /* shift. */ COSTS_N_INSNS (1), /* shift_reg. */ COSTS_N_INSNS (1), /* arith_shift. */ COSTS_N_INSNS (1), /* arith_shift_reg. */ COSTS_N_INSNS (1), /* log_shift. */ COSTS_N_INSNS (1), /* log_shift_reg. */ COSTS_N_INSNS (1), /* extend. */ COSTS_N_INSNS (1), /* extend_arith. */ COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ COSTS_N_INSNS (1), /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, { /* MULT SImode */ { 0, /* simple. */ COSTS_N_INSNS (1), /* flag_setting. */ COSTS_N_INSNS (1), /* extend. */ COSTS_N_INSNS (1), /* add. */ COSTS_N_INSNS (1), /* extend_add. */ COSTS_N_INSNS (7) /* idiv. */ }, /* MULT DImode */ { 0, /* simple (N/A). */ 0, /* flag_setting (N/A). */ COSTS_N_INSNS (1), /* extend. */ 0, /* add. */ COSTS_N_INSNS (2), /* extend_add. */ 0 /* idiv (N/A). */ } }, /* LD/ST */ { COSTS_N_INSNS (1), /* load. */ COSTS_N_INSNS (1), /* load_sign_extend. */ COSTS_N_INSNS (6), /* ldrd. */ COSTS_N_INSNS (1), /* ldm_1st. */ 1, /* ldm_regs_per_insn_1st. */ 2, /* ldm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* loadf. */ COSTS_N_INSNS (4), /* loadd. */ COSTS_N_INSNS (1), /* load_unaligned. */ COSTS_N_INSNS (1), /* store. */ COSTS_N_INSNS (3), /* strd. */ COSTS_N_INSNS (1), /* stm_1st. */ 1, /* stm_regs_per_insn_1st. */ 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (2), /* stored. */ COSTS_N_INSNS (1), /* store_unaligned. */ COSTS_N_INSNS (1), /* loadv. */ COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ { COSTS_N_INSNS (15), /* div. */ COSTS_N_INSNS (3), /* mult. */ COSTS_N_INSNS (7), /* mult_addsub. */ COSTS_N_INSNS (7), /* fma. */ COSTS_N_INSNS (3), /* addsub. */ COSTS_N_INSNS (3), /* fpconst. */ COSTS_N_INSNS (3), /* neg. */ COSTS_N_INSNS (3), /* compare. */ COSTS_N_INSNS (3), /* widen. */ COSTS_N_INSNS (3), /* narrow. */ COSTS_N_INSNS (3), /* toint. */ COSTS_N_INSNS (3), /* fromint. */ COSTS_N_INSNS (3) /* roundint. */ }, /* FP DFmode */ { COSTS_N_INSNS (30), /* div. */ COSTS_N_INSNS (6), /* mult. */ COSTS_N_INSNS (10), /* mult_addsub. */ COSTS_N_INSNS (7), /* fma. */ COSTS_N_INSNS (3), /* addsub. */ COSTS_N_INSNS (3), /* fpconst. */ COSTS_N_INSNS (3), /* neg. */ COSTS_N_INSNS (3), /* compare. */ COSTS_N_INSNS (3), /* widen. */ COSTS_N_INSNS (3), /* narrow. */ COSTS_N_INSNS (3), /* toint. */ COSTS_N_INSNS (3), /* fromint. */ COSTS_N_INSNS (3) /* roundint. */ } }, /* Vector */ { COSTS_N_INSNS (1) /* alu. */ } }; const struct cpu_cost_table cortexa7_extra_costs = { /* ALU */ { 0, /* arith. */ 0, /* logical. */ COSTS_N_INSNS (1), /* shift. */ COSTS_N_INSNS (1), /* shift_reg. */ COSTS_N_INSNS (1), /* arith_shift. */ COSTS_N_INSNS (1), /* arith_shift_reg. */ COSTS_N_INSNS (1), /* log_shift. */ COSTS_N_INSNS (1), /* log_shift_reg. */ COSTS_N_INSNS (1), /* extend. */ COSTS_N_INSNS (1), /* extend_arith. */ COSTS_N_INSNS (1), /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ COSTS_N_INSNS (1), /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, { /* MULT SImode */ { 0, /* simple. */ COSTS_N_INSNS (1), /* flag_setting. */ COSTS_N_INSNS (1), /* extend. */ COSTS_N_INSNS (1), /* add. */ COSTS_N_INSNS (1), /* extend_add. */ COSTS_N_INSNS (7) /* idiv. */ }, /* MULT DImode */ { 0, /* simple (N/A). */ 0, /* flag_setting (N/A). */ COSTS_N_INSNS (1), /* extend. */ 0, /* add. */ COSTS_N_INSNS (2), /* extend_add. */ 0 /* idiv (N/A). */ } }, /* LD/ST */ { COSTS_N_INSNS (1), /* load. */ COSTS_N_INSNS (1), /* load_sign_extend. */ COSTS_N_INSNS (3), /* ldrd. */ COSTS_N_INSNS (1), /* ldm_1st. */ 1, /* ldm_regs_per_insn_1st. */ 2, /* ldm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* loadf. */ COSTS_N_INSNS (2), /* loadd. */ COSTS_N_INSNS (1), /* load_unaligned. */ COSTS_N_INSNS (1), /* store. */ COSTS_N_INSNS (3), /* strd. */ COSTS_N_INSNS (1), /* stm_1st. */ 1, /* stm_regs_per_insn_1st. */ 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (2), /* stored. */ COSTS_N_INSNS (1), /* store_unaligned. */ COSTS_N_INSNS (1), /* loadv. */ COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ { COSTS_N_INSNS (15), /* div. */ COSTS_N_INSNS (3), /* mult. */ COSTS_N_INSNS (7), /* mult_addsub. */ COSTS_N_INSNS (7), /* fma. */ COSTS_N_INSNS (3), /* addsub. */ COSTS_N_INSNS (3), /* fpconst. */ COSTS_N_INSNS (3), /* neg. */ COSTS_N_INSNS (3), /* compare. */ COSTS_N_INSNS (3), /* widen. */ COSTS_N_INSNS (3), /* narrow. */ COSTS_N_INSNS (3), /* toint. */ COSTS_N_INSNS (3), /* fromint. */ COSTS_N_INSNS (3) /* roundint. */ }, /* FP DFmode */ { COSTS_N_INSNS (30), /* div. */ COSTS_N_INSNS (6), /* mult. */ COSTS_N_INSNS (10), /* mult_addsub. */ COSTS_N_INSNS (7), /* fma. */ COSTS_N_INSNS (3), /* addsub. */ COSTS_N_INSNS (3), /* fpconst. */ COSTS_N_INSNS (3), /* neg. */ COSTS_N_INSNS (3), /* compare. */ COSTS_N_INSNS (3), /* widen. */ COSTS_N_INSNS (3), /* narrow. */ COSTS_N_INSNS (3), /* toint. */ COSTS_N_INSNS (3), /* fromint. */ COSTS_N_INSNS (3) /* roundint. */ } }, /* Vector */ { COSTS_N_INSNS (1) /* alu. */ } }; const struct cpu_cost_table cortexa12_extra_costs = { /* ALU */ { 0, /* arith. */ 0, /* logical. */ 0, /* shift. */ COSTS_N_INSNS (1), /* shift_reg. */ COSTS_N_INSNS (1), /* arith_shift. */ COSTS_N_INSNS (1), /* arith_shift_reg. */ COSTS_N_INSNS (1), /* log_shift. */ COSTS_N_INSNS (1), /* log_shift_reg. */ 0, /* extend. */ COSTS_N_INSNS (1), /* extend_arith. */ 0, /* bfi. */ COSTS_N_INSNS (1), /* bfx. */ COSTS_N_INSNS (1), /* clz. */ COSTS_N_INSNS (1), /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, /* MULT SImode */ { { COSTS_N_INSNS (2), /* simple. */ COSTS_N_INSNS (3), /* flag_setting. */ COSTS_N_INSNS (2), /* extend. */ COSTS_N_INSNS (3), /* add. */ COSTS_N_INSNS (2), /* extend_add. */ COSTS_N_INSNS (18) /* idiv. */ }, /* MULT DImode */ { 0, /* simple (N/A). */ 0, /* flag_setting (N/A). */ COSTS_N_INSNS (3), /* extend. */ 0, /* add (N/A). */ COSTS_N_INSNS (3), /* extend_add. */ 0 /* idiv (N/A). */ } }, /* LD/ST */ { COSTS_N_INSNS (3), /* load. */ COSTS_N_INSNS (3), /* load_sign_extend. */ COSTS_N_INSNS (3), /* ldrd. */ COSTS_N_INSNS (3), /* ldm_1st. */ 1, /* ldm_regs_per_insn_1st. */ 2, /* ldm_regs_per_insn_subsequent. */ COSTS_N_INSNS (3), /* loadf. */ COSTS_N_INSNS (3), /* loadd. */ 0, /* load_unaligned. */ 0, /* store. */ 0, /* strd. */ 0, /* stm_1st. */ 1, /* stm_regs_per_insn_1st. */ 2, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (2), /* stored. */ 0, /* store_unaligned. */ COSTS_N_INSNS (1), /* loadv. */ COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ { COSTS_N_INSNS (17), /* div. */ COSTS_N_INSNS (4), /* mult. */ COSTS_N_INSNS (8), /* mult_addsub. */ COSTS_N_INSNS (8), /* fma. */ COSTS_N_INSNS (4), /* addsub. */ COSTS_N_INSNS (2), /* fpconst. */ COSTS_N_INSNS (2), /* neg. */ COSTS_N_INSNS (2), /* compare. */ COSTS_N_INSNS (4), /* widen. */ COSTS_N_INSNS (4), /* narrow. */ COSTS_N_INSNS (4), /* toint. */ COSTS_N_INSNS (4), /* fromint. */ COSTS_N_INSNS (4) /* roundint. */ }, /* FP DFmode */ { COSTS_N_INSNS (31), /* div. */ COSTS_N_INSNS (4), /* mult. */ COSTS_N_INSNS (8), /* mult_addsub. */ COSTS_N_INSNS (8), /* fma. */ COSTS_N_INSNS (4), /* addsub. */ COSTS_N_INSNS (2), /* fpconst. */ COSTS_N_INSNS (2), /* neg. */ COSTS_N_INSNS (2), /* compare. */ COSTS_N_INSNS (4), /* widen. */ COSTS_N_INSNS (4), /* narrow. */ COSTS_N_INSNS (4), /* toint. */ COSTS_N_INSNS (4), /* fromint. */ COSTS_N_INSNS (4) /* roundint. */ } }, /* Vector */ { COSTS_N_INSNS (1) /* alu. */ } }; const struct cpu_cost_table cortexa15_extra_costs = { /* ALU */ { 0, /* arith. */ 0, /* logical. */ 0, /* shift. */ 0, /* shift_reg. */ COSTS_N_INSNS (1), /* arith_shift. */ COSTS_N_INSNS (1), /* arith_shift_reg. */ COSTS_N_INSNS (1), /* log_shift. */ COSTS_N_INSNS (1), /* log_shift_reg. */ 0, /* extend. */ COSTS_N_INSNS (1), /* extend_arith. */ COSTS_N_INSNS (1), /* bfi. */ 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ 0, /* non_exec. */ true /* non_exec_costs_exec. */ }, /* MULT SImode */ { { COSTS_N_INSNS (2), /* simple. */ COSTS_N_INSNS (3), /* flag_setting. */ COSTS_N_INSNS (2), /* extend. */ COSTS_N_INSNS (2), /* add. */ COSTS_N_INSNS (2), /* extend_add. */ COSTS_N_INSNS (18) /* idiv. */ }, /* MULT DImode */ { 0, /* simple (N/A). */ 0, /* flag_setting (N/A). */ COSTS_N_INSNS (3), /* extend. */ 0, /* add (N/A). */ COSTS_N_INSNS (3), /* extend_add. */ 0 /* idiv (N/A). */ } }, /* LD/ST */ { COSTS_N_INSNS (3), /* load. */ COSTS_N_INSNS (3), /* load_sign_extend. */ COSTS_N_INSNS (3), /* ldrd. */ COSTS_N_INSNS (4), /* ldm_1st. */ 1, /* ldm_regs_per_insn_1st. */ 2, /* ldm_regs_per_insn_subsequent. */ COSTS_N_INSNS (4), /* loadf. */ COSTS_N_INSNS (4), /* loadd. */ 0, /* load_unaligned. */ 0, /* store. */ 0, /* strd. */ COSTS_N_INSNS (1), /* stm_1st. */ 1, /* stm_regs_per_insn_1st. */ 2, /* stm_regs_per_insn_subsequent. */ 0, /* storef. */ 0, /* stored. */ 0, /* store_unaligned. */ COSTS_N_INSNS (1), /* loadv. */ COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ { COSTS_N_INSNS (17), /* div. */ COSTS_N_INSNS (4), /* mult. */ COSTS_N_INSNS (8), /* mult_addsub. */ COSTS_N_INSNS (8), /* fma. */ COSTS_N_INSNS (4), /* addsub. */ COSTS_N_INSNS (2), /* fpconst. */ COSTS_N_INSNS (2), /* neg. */ COSTS_N_INSNS (5), /* compare. */ COSTS_N_INSNS (4), /* widen. */ COSTS_N_INSNS (4), /* narrow. */ COSTS_N_INSNS (4), /* toint. */ COSTS_N_INSNS (4), /* fromint. */ COSTS_N_INSNS (4) /* roundint. */ }, /* FP DFmode */ { COSTS_N_INSNS (31), /* div. */ COSTS_N_INSNS (4), /* mult. */ COSTS_N_INSNS (8), /* mult_addsub. */ COSTS_N_INSNS (8), /* fma. */ COSTS_N_INSNS (4), /* addsub. */ COSTS_N_INSNS (2), /* fpconst. */ COSTS_N_INSNS (2), /* neg. */ COSTS_N_INSNS (2), /* compare. */ COSTS_N_INSNS (4), /* widen. */ COSTS_N_INSNS (4), /* narrow. */ COSTS_N_INSNS (4), /* toint. */ COSTS_N_INSNS (4), /* fromint. */ COSTS_N_INSNS (4) /* roundint. */ } }, /* Vector */ { COSTS_N_INSNS (1) /* alu. */ } }; const struct cpu_cost_table v7m_extra_costs = { /* ALU */ { 0, /* arith. */ 0, /* logical. */ 0, /* shift. */ 0, /* shift_reg. */ 0, /* arith_shift. */ COSTS_N_INSNS (1), /* arith_shift_reg. */ 0, /* log_shift. */ COSTS_N_INSNS (1), /* log_shift_reg. */ 0, /* extend. */ COSTS_N_INSNS (1), /* extend_arith. */ 0, /* bfi. */ 0, /* bfx. */ 0, /* clz. */ 0, /* rev. */ COSTS_N_INSNS (1), /* non_exec. */ false /* non_exec_costs_exec. */ }, { /* MULT SImode */ { COSTS_N_INSNS (1), /* simple. */ COSTS_N_INSNS (1), /* flag_setting. */ COSTS_N_INSNS (2), /* extend. */ COSTS_N_INSNS (1), /* add. */ COSTS_N_INSNS (3), /* extend_add. */ COSTS_N_INSNS (8) /* idiv. */ }, /* MULT DImode */ { 0, /* simple (N/A). */ 0, /* flag_setting (N/A). */ COSTS_N_INSNS (2), /* extend. */ 0, /* add (N/A). */ COSTS_N_INSNS (3), /* extend_add. */ 0 /* idiv (N/A). */ } }, /* LD/ST */ { COSTS_N_INSNS (2), /* load. */ 0, /* load_sign_extend. */ COSTS_N_INSNS (3), /* ldrd. */ COSTS_N_INSNS (2), /* ldm_1st. */ 1, /* ldm_regs_per_insn_1st. */ 1, /* ldm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* loadf. */ COSTS_N_INSNS (3), /* loadd. */ COSTS_N_INSNS (1), /* load_unaligned. */ COSTS_N_INSNS (2), /* store. */ COSTS_N_INSNS (3), /* strd. */ COSTS_N_INSNS (2), /* stm_1st. */ 1, /* stm_regs_per_insn_1st. */ 1, /* stm_regs_per_insn_subsequent. */ COSTS_N_INSNS (2), /* storef. */ COSTS_N_INSNS (3), /* stored. */ COSTS_N_INSNS (1), /* store_unaligned. */ COSTS_N_INSNS (1), /* loadv. */ COSTS_N_INSNS (1) /* storev. */ }, { /* FP SFmode */ { COSTS_N_INSNS (7), /* div. */ COSTS_N_INSNS (2), /* mult. */ COSTS_N_INSNS (5), /* mult_addsub. */ COSTS_N_INSNS (3), /* fma. */ COSTS_N_INSNS (1), /* addsub. */ 0, /* fpconst. */ 0, /* neg. */ 0, /* compare. */ 0, /* widen. */ 0, /* narrow. */ 0, /* toint. */ 0, /* fromint. */ 0 /* roundint. */ }, /* FP DFmode */ { COSTS_N_INSNS (15), /* div. */ COSTS_N_INSNS (5), /* mult. */ COSTS_N_INSNS (7), /* mult_addsub. */ COSTS_N_INSNS (7), /* fma. */ COSTS_N_INSNS (3), /* addsub. */ 0, /* fpconst. */ 0, /* neg. */ 0, /* compare. */ 0, /* widen. */ 0, /* narrow. */ 0, /* toint. */ 0, /* fromint. */ 0 /* roundint. */ } }, /* Vector */ { COSTS_N_INSNS (1) /* alu. */ } }; const struct addr_mode_cost_table generic_addr_mode_costs = { /* int. */ { COSTS_N_INSNS (0), /* AMO_DEFAULT. */ COSTS_N_INSNS (0), /* AMO_NO_WB. */ COSTS_N_INSNS (0) /* AMO_WB. */ }, /* float. */ { COSTS_N_INSNS (0), /* AMO_DEFAULT. */ COSTS_N_INSNS (0), /* AMO_NO_WB. */ COSTS_N_INSNS (0) /* AMO_WB. */ }, /* vector. */ { COSTS_N_INSNS (0), /* AMO_DEFAULT. */ COSTS_N_INSNS (0), /* AMO_NO_WB. */ COSTS_N_INSNS (0) /* AMO_WB. */ } }; const struct tune_params arm_slowmul_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 3, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_fastmul_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; /* StrongARM has early execution of branches, so a sequence that is worth skipping is shorter. Set max_insns_skipped to a lower value. */ const struct tune_params arm_strongarm_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 3, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_xscale_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ xscale_sched_adjust_cost, arm_default_branch_cost, &arm_default_vec_cost, 2, /* Constant limit. */ 3, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_9e_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_marvell_pj4_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_v6t2_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; /* Generic Cortex tuning. Use more specific tunings if appropriate. */ const struct tune_params arm_cortex_tune = { &generic_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a8_tune = { &cortexa8_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_TRUE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a7_tune = { &cortexa7_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_TRUE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a15_tune = { &cortexa15_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_ALL, tune_params::PREF_NEON_STRINGOPS_TRUE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_FULL }; const struct tune_params arm_cortex_a35_tune = { &cortexa53_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_TRUE, FUSE_OPS (tune_params::FUSE_MOVW_MOVT), tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a53_tune = { &cortexa53_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_TRUE, FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC), tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a57_tune = { &cortexa57_extra_costs, &generic_addr_mode_costs, /* addressing mode costs */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_ALL, tune_params::PREF_NEON_STRINGOPS_TRUE, FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC), tune_params::SCHED_AUTOPREF_FULL }; const struct tune_params arm_exynosm1_tune = { &exynosm1_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 2, /* Max cond insns. */ 8, /* Memset max inline. */ 3, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */ tune_params::DISPARAGE_FLAGS_ALL, tune_params::PREF_NEON_STRINGOPS_TRUE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_xgene1_tune = { &xgene1_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 2, /* Max cond insns. */ 32, /* Memset max inline. */ 4, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_ALL, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; /* Branches can be dual-issued on Cortex-A5, so conditional execution is less appealing. Set max_insns_skipped to a low value. */ const struct tune_params arm_cortex_a5_tune = { &cortexa5_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_cortex_a5_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 1, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_TRUE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a9_tune = { &cortexa9_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ cortex_a9_sched_adjust_cost, arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_BENEFICIAL(4,32,32), tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a12_tune = { &cortexa12_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, /* Vectorizer costs. */ 1, /* Constant limit. */ 2, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_ALL, tune_params::PREF_NEON_STRINGOPS_TRUE, FUSE_OPS (tune_params::FUSE_MOVW_MOVT), tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_cortex_a73_tune = { &cortexa57_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, /* Vectorizer costs. */ 1, /* Constant limit. */ 2, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_TRUE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_ALL, tune_params::PREF_NEON_STRINGOPS_TRUE, FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT), tune_params::SCHED_AUTOPREF_FULL }; /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single cycle to execute each. An LDR from the constant pool also takes two cycles to execute, but mildly increases pipelining opportunity (consecutive loads/stores can be pipelined together, saving one cycle), and may also improve icache utilisation. Hence we prefer the constant pool for such processors. */ const struct tune_params arm_v7m_tune = { &v7m_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_cortex_m_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 2, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; /* Cortex-M7 tuning. */ const struct tune_params arm_cortex_m7_tune = { &v7m_extra_costs, &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_cortex_m7_branch_cost, &arm_default_vec_cost, 0, /* Constant limit. */ 1, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and cortex-m23. */ const struct tune_params arm_v6m_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ NULL, /* Sched adj cost. */ arm_default_branch_cost, &arm_default_vec_cost, /* Vectorizer costs. */ 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 1, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_FALSE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; const struct tune_params arm_fa726te_tune = { &generic_extra_costs, /* Insn extra costs. */ &generic_addr_mode_costs, /* Addressing mode costs. */ fa726te_sched_adjust_cost, arm_default_branch_cost, &arm_default_vec_cost, 1, /* Constant limit. */ 5, /* Max cond insns. */ 8, /* Memset max inline. */ 2, /* Issue rate. */ ARM_PREFETCH_NOT_BENEFICIAL, tune_params::PREF_CONST_POOL_TRUE, tune_params::PREF_LDRD_FALSE, tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */ tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */ tune_params::DISPARAGE_FLAGS_NEITHER, tune_params::PREF_NEON_STRINGOPS_FALSE, tune_params::FUSE_NOTHING, tune_params::SCHED_AUTOPREF_OFF }; /* Auto-generated CPU, FPU and architecture tables. */ #include "arm-cpu-data.h" /* The name of the preprocessor macro to define for this architecture. PROFILE is replaced by the architecture name (eg. 8A) in arm_option_override () and is thus chosen to be big enough to hold the longest architecture name. */ char arm_arch_name[] = "__ARM_ARCH_PROFILE__"; /* Supported TLS relocations. */ enum tls_reloc { TLS_GD32, TLS_GD32_FDPIC, TLS_LDM32, TLS_LDM32_FDPIC, TLS_LDO32, TLS_IE32, TLS_IE32_FDPIC, TLS_LE32, TLS_DESCSEQ /* GNU scheme */ }; /* The maximum number of insns to be used when loading a constant. */ inline static int arm_constant_limit (bool size_p) { return size_p ? 1 : current_tune->constant_limit; } /* Emit an insn that's a simple single-set. Both the operands must be known to be valid. */ inline static rtx_insn * emit_set_insn (rtx x, rtx y) { return emit_insn (gen_rtx_SET (x, y)); } /* Return the number of bits set in VALUE. */ static unsigned bit_count (unsigned long value) { unsigned long count = 0; while (value) { count++; value &= value - 1; /* Clear the least-significant set bit. */ } return count; } /* Return the number of bits set in BMAP. */ static unsigned bitmap_popcount (const sbitmap bmap) { unsigned int count = 0; unsigned int n = 0; sbitmap_iterator sbi; EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi) count++; return count; } typedef struct { machine_mode mode; const char *name; } arm_fixed_mode_set; /* A small helper for setting fixed-point library libfuncs. */ static void arm_set_fixed_optab_libfunc (optab optable, machine_mode mode, const char *funcname, const char *modename, int num_suffix) { char buffer[50]; if (num_suffix == 0) sprintf (buffer, "__gnu_%s%s", funcname, modename); else sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix); set_optab_libfunc (optable, mode, buffer); } static void arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to, machine_mode from, const char *funcname, const char *toname, const char *fromname) { char buffer[50]; const char *maybe_suffix_2 = ""; /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */ if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to) && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to) && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to)) maybe_suffix_2 = "2"; sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname, maybe_suffix_2); set_conv_libfunc (optable, to, from, buffer); } static GTY(()) rtx speculation_barrier_libfunc; /* Record that we have no arithmetic or comparison libfuncs for machine mode MODE. */ static void arm_block_arith_comp_libfuncs_for_mode (machine_mode mode) { /* Arithmetic. */ set_optab_libfunc (add_optab, mode, NULL); set_optab_libfunc (sdiv_optab, mode, NULL); set_optab_libfunc (smul_optab, mode, NULL); set_optab_libfunc (neg_optab, mode, NULL); set_optab_libfunc (sub_optab, mode, NULL); /* Comparisons. */ set_optab_libfunc (eq_optab, mode, NULL); set_optab_libfunc (ne_optab, mode, NULL); set_optab_libfunc (lt_optab, mode, NULL); set_optab_libfunc (le_optab, mode, NULL); set_optab_libfunc (ge_optab, mode, NULL); set_optab_libfunc (gt_optab, mode, NULL); set_optab_libfunc (unord_optab, mode, NULL); } /* Set up library functions unique to ARM. */ static void arm_init_libfuncs (void) { machine_mode mode_iter; /* For Linux, we have access to kernel support for atomic operations. */ if (arm_abi == ARM_ABI_AAPCS_LINUX) init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE); /* There are no special library functions unless we are using the ARM BPABI. */ if (!TARGET_BPABI) return; /* The functions below are described in Section 4 of the "Run-Time ABI for the ARM architecture", Version 1.0. */ /* Double-precision floating-point arithmetic. Table 2. */ set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd"); set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv"); set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul"); set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg"); set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub"); /* Double-precision comparisons. Table 3. */ set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq"); set_optab_libfunc (ne_optab, DFmode, NULL); set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt"); set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple"); set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge"); set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt"); set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun"); /* Single-precision floating-point arithmetic. Table 4. */ set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd"); set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv"); set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul"); set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg"); set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub"); /* Single-precision comparisons. Table 5. */ set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq"); set_optab_libfunc (ne_optab, SFmode, NULL); set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt"); set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple"); set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge"); set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt"); set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun"); /* Floating-point to integer conversions. Table 6. */ set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz"); set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz"); set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz"); set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz"); set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz"); set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz"); set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz"); set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz"); /* Conversions between floating types. Table 7. */ set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f"); set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d"); /* Integer to floating-point conversions. Table 8. */ set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d"); set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d"); set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d"); set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d"); set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f"); set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f"); set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f"); set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f"); /* Long long. Table 9. */ set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul"); set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod"); set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod"); set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl"); set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr"); set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr"); set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp"); set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp"); /* Integer (32/32->32) division. \S 4.3.1. */ set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod"); set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod"); /* The divmod functions are designed so that they can be used for plain division, even though they return both the quotient and the remainder. The quotient is returned in the usual location (i.e., r0 for SImode, {r0, r1} for DImode), just as would be expected for an ordinary division routine. Because the AAPCS calling conventions specify that all of { r0, r1, r2, r3 } are callee-saved registers, there is no need to tell the compiler explicitly that those registers are clobbered by these routines. */ set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod"); set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod"); /* For SImode division the ABI provides div-without-mod routines, which are faster. */ set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv"); set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv"); /* We don't have mod libcalls. Fortunately gcc knows how to use the divmod libcalls instead. */ set_optab_libfunc (smod_optab, DImode, NULL); set_optab_libfunc (umod_optab, DImode, NULL); set_optab_libfunc (smod_optab, SImode, NULL); set_optab_libfunc (umod_optab, SImode, NULL); /* Half-precision float operations. The compiler handles all operations with NULL libfuncs by converting the SFmode. */ switch (arm_fp16_format) { case ARM_FP16_FORMAT_IEEE: case ARM_FP16_FORMAT_ALTERNATIVE: /* Conversions. */ set_conv_libfunc (trunc_optab, HFmode, SFmode, (arm_fp16_format == ARM_FP16_FORMAT_IEEE ? "__gnu_f2h_ieee" : "__gnu_f2h_alternative")); set_conv_libfunc (sext_optab, SFmode, HFmode, (arm_fp16_format == ARM_FP16_FORMAT_IEEE ? "__gnu_h2f_ieee" : "__gnu_h2f_alternative")); set_conv_libfunc (trunc_optab, HFmode, DFmode, (arm_fp16_format == ARM_FP16_FORMAT_IEEE ? "__gnu_d2h_ieee" : "__gnu_d2h_alternative")); arm_block_arith_comp_libfuncs_for_mode (HFmode); break; default: break; } /* For all possible libcalls in BFmode, record NULL. */ FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT) { set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL); set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL); set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL); set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL); } arm_block_arith_comp_libfuncs_for_mode (BFmode); /* Use names prefixed with __gnu_ for fixed-point helper functions. */ { const arm_fixed_mode_set fixed_arith_modes[] = { { E_QQmode, "qq" }, { E_UQQmode, "uqq" }, { E_HQmode, "hq" }, { E_UHQmode, "uhq" }, { E_SQmode, "sq" }, { E_USQmode, "usq" }, { E_DQmode, "dq" }, { E_UDQmode, "udq" }, { E_TQmode, "tq" }, { E_UTQmode, "utq" }, { E_HAmode, "ha" }, { E_UHAmode, "uha" }, { E_SAmode, "sa" }, { E_USAmode, "usa" }, { E_DAmode, "da" }, { E_UDAmode, "uda" }, { E_TAmode, "ta" }, { E_UTAmode, "uta" } }; const arm_fixed_mode_set fixed_conv_modes[] = { { E_QQmode, "qq" }, { E_UQQmode, "uqq" }, { E_HQmode, "hq" }, { E_UHQmode, "uhq" }, { E_SQmode, "sq" }, { E_USQmode, "usq" }, { E_DQmode, "dq" }, { E_UDQmode, "udq" }, { E_TQmode, "tq" }, { E_UTQmode, "utq" }, { E_HAmode, "ha" }, { E_UHAmode, "uha" }, { E_SAmode, "sa" }, { E_USAmode, "usa" }, { E_DAmode, "da" }, { E_UDAmode, "uda" }, { E_TAmode, "ta" }, { E_UTAmode, "uta" }, { E_QImode, "qi" }, { E_HImode, "hi" }, { E_SImode, "si" }, { E_DImode, "di" }, { E_TImode, "ti" }, { E_SFmode, "sf" }, { E_DFmode, "df" } }; unsigned int i, j; for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++) { arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode, "add", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode, "ssadd", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode, "usadd", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode, "sub", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode, "sssub", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode, "ussub", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode, "mul", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode, "ssmul", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode, "usmul", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode, "div", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode, "udiv", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode, "ssdiv", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode, "usdiv", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode, "neg", fixed_arith_modes[i].name, 2); arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode, "ssneg", fixed_arith_modes[i].name, 2); arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode, "usneg", fixed_arith_modes[i].name, 2); arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode, "ashl", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode, "ashr", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode, "lshr", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode, "ssashl", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode, "usashl", fixed_arith_modes[i].name, 3); arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode, "cmp", fixed_arith_modes[i].name, 2); } for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++) for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++) { if (i == j || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode) && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode))) continue; arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode, fixed_conv_modes[j].mode, "fract", fixed_conv_modes[i].name, fixed_conv_modes[j].name); arm_set_fixed_conv_libfunc (satfract_optab, fixed_conv_modes[i].mode, fixed_conv_modes[j].mode, "satfract", fixed_conv_modes[i].name, fixed_conv_modes[j].name); arm_set_fixed_conv_libfunc (fractuns_optab, fixed_conv_modes[i].mode, fixed_conv_modes[j].mode, "fractuns", fixed_conv_modes[i].name, fixed_conv_modes[j].name); arm_set_fixed_conv_libfunc (satfractuns_optab, fixed_conv_modes[i].mode, fixed_conv_modes[j].mode, "satfractuns", fixed_conv_modes[i].name, fixed_conv_modes[j].name); } } if (TARGET_AAPCS_BASED) synchronize_libfunc = init_one_libfunc ("__sync_synchronize"); speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier"); } /* On AAPCS systems, this is the "struct __va_list". */ static GTY(()) tree va_list_type; /* Return the type to use as __builtin_va_list. */ static tree arm_build_builtin_va_list (void) { tree va_list_name; tree ap_field; if (!TARGET_AAPCS_BASED) return std_build_builtin_va_list (); /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type defined as: struct __va_list { void *__ap; }; The C Library ABI further reinforces this definition in \S 4.1. We must follow this definition exactly. The structure tag name is visible in C++ mangled names, and thus forms a part of the ABI. The field name may be used by people who #include <stdarg.h>. */ /* Create the type. */ va_list_type = lang_hooks.types.make_type (RECORD_TYPE); /* Give it the required name. */ va_list_name = build_decl (BUILTINS_LOCATION, TYPE_DECL, get_identifier ("__va_list"), va_list_type); DECL_ARTIFICIAL (va_list_name) = 1; TYPE_NAME (va_list_type) = va_list_name; TYPE_STUB_DECL (va_list_type) = va_list_name; /* Create the __ap field. */ ap_field = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("__ap"), ptr_type_node); DECL_ARTIFICIAL (ap_field) = 1; DECL_FIELD_CONTEXT (ap_field) = va_list_type; TYPE_FIELDS (va_list_type) = ap_field; /* Compute its layout. */ layout_type (va_list_type); return va_list_type; } /* Return an expression of type "void *" pointing to the next available argument in a variable-argument list. VALIST is the user-level va_list object, of type __builtin_va_list. */ static tree arm_extract_valist_ptr (tree valist) { if (TREE_TYPE (valist) == error_mark_node) return error_mark_node; /* On an AAPCS target, the pointer is stored within "struct va_list". */ if (TARGET_AAPCS_BASED) { tree ap_field = TYPE_FIELDS (TREE_TYPE (valist)); valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field), valist, ap_field, NULL_TREE); } return valist; } /* Implement TARGET_EXPAND_BUILTIN_VA_START. */ static void arm_expand_builtin_va_start (tree valist, rtx nextarg) { valist = arm_extract_valist_ptr (valist); std_expand_builtin_va_start (valist, nextarg); } /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */ static tree arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p, gimple_seq *post_p) { valist = arm_extract_valist_ptr (valist); return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); } /* Check any incompatible options that the user has specified. */ static void arm_option_check_internal (struct gcc_options *opts) { int flags = opts->x_target_flags; /* iWMMXt and NEON are incompatible. */ if (TARGET_IWMMXT && bitmap_bit_p (arm_active_target.isa, isa_bit_neon)) error ("iWMMXt and NEON are incompatible"); /* Make sure that the processor choice does not conflict with any of the other command line choices. */ if (TARGET_ARM_P (flags) && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm)) error ("target CPU does not support ARM mode"); /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */ if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags)) warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb"); if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING) warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb"); /* If this target is normally configured to use APCS frames, warn if they are turned off and debugging is turned on. */ if (TARGET_ARM_P (flags) && write_symbols != NO_DEBUG && !TARGET_APCS_FRAME && (TARGET_DEFAULT & MASK_APCS_FRAME)) warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible " "debugging"); /* iWMMXt unsupported under Thumb mode. */ if (TARGET_THUMB_P (flags) && TARGET_IWMMXT) error ("iWMMXt unsupported under Thumb mode"); if (TARGET_HARD_TP && TARGET_THUMB1_P (flags)) error ("cannot use %<-mtp=cp15%> with 16-bit Thumb"); if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic) { error ("RTP PIC is incompatible with Thumb"); flag_pic = 0; } if (target_pure_code || target_slow_flash_data) { const char *flag = (target_pure_code ? "-mpure-code" : "-mslow-flash-data"); bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON; /* We only support -mslow-flash-data on M-profile targets with MOVT. */ if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes)) error ("%s only supports non-pic code on M-profile targets with the " "MOVT instruction", flag); /* We only support -mpure-code on M-profile targets. */ if (target_pure_code && common_unsupported_modes) error ("%s only supports non-pic code on M-profile targets", flag); /* Cannot load addresses: -mslow-flash-data forbids literal pool and -mword-relocations forbids relocation of MOVT/MOVW. */ if (target_word_relocations) error ("%s incompatible with %<-mword-relocations%>", flag); } } /* Recompute the global settings depending on target attribute options. */ static void arm_option_params_internal (void) { /* If we are not using the default (ARM mode) section anchor offset ranges, then set the correct ranges now. */ if (TARGET_THUMB1) { /* Thumb-1 LDR instructions cannot have negative offsets. Permissible positive offset ranges are 5-bit (for byte loads), 6-bit (for halfword loads), or 7-bit (for word loads). Empirical results suggest a 7-bit anchor range gives the best overall code size. */ targetm.min_anchor_offset = 0; targetm.max_anchor_offset = 127; } else if (TARGET_THUMB2) { /* The minimum is set such that the total size of the block for a particular anchor is 248 + 1 + 4095 bytes, which is divisible by eight, ensuring natural spacing of anchors. */ targetm.min_anchor_offset = -248; targetm.max_anchor_offset = 4095; } else { targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET; targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET; } /* Increase the number of conditional instructions with -Os. */ max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped; /* For THUMB2, we limit the conditional sequence to one IT block. */ if (TARGET_THUMB2) max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK); if (TARGET_THUMB1) targetm.md_asm_adjust = thumb1_md_asm_adjust; else targetm.md_asm_adjust = arm_md_asm_adjust; } /* True if -mflip-thumb should next add an attribute for the default mode, false if it should next add an attribute for the opposite mode. */ static GTY(()) bool thumb_flipper; /* Options after initial target override. */ static GTY(()) tree init_optimize; static void arm_override_options_after_change_1 (struct gcc_options *opts) { /* -falign-functions without argument: supply one. */ if (opts->x_flag_align_functions && !opts->x_str_align_functions) opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags) && opts->x_optimize_size ? "2" : "4"; } /* Implement targetm.override_options_after_change. */ static void arm_override_options_after_change (void) { arm_configure_build_target (&arm_active_target, TREE_TARGET_OPTION (target_option_default_node), &global_options_set, false); arm_override_options_after_change_1 (&global_options); } /* Implement TARGET_OPTION_SAVE. */ static void arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts) { ptr->x_arm_arch_string = opts->x_arm_arch_string; ptr->x_arm_cpu_string = opts->x_arm_cpu_string; ptr->x_arm_tune_string = opts->x_arm_tune_string; } /* Implement TARGET_OPTION_RESTORE. */ static void arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr) { opts->x_arm_arch_string = ptr->x_arm_arch_string; opts->x_arm_cpu_string = ptr->x_arm_cpu_string; opts->x_arm_tune_string = ptr->x_arm_tune_string; arm_configure_build_target (&arm_active_target, ptr, &global_options_set, false); } /* Reset options between modes that the user has specified. */ static void arm_option_override_internal (struct gcc_options *opts, struct gcc_options *opts_set) { arm_override_options_after_change_1 (opts); if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb)) { /* The default is to enable interworking, so this warning message would be confusing to users who have just compiled with eg, -march=armv4. */ /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */ opts->x_target_flags &= ~MASK_INTERWORK; } if (TARGET_THUMB_P (opts->x_target_flags) && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb)) { warning (0, "target CPU does not support THUMB instructions"); opts->x_target_flags &= ~MASK_THUMB; } if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags)) { /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */ opts->x_target_flags &= ~MASK_APCS_FRAME; } /* Callee super interworking implies thumb interworking. Adding this to the flags here simplifies the logic elsewhere. */ if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING) opts->x_target_flags |= MASK_INTERWORK; /* need to remember initial values so combinaisons of options like -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */ cl_optimization *to = TREE_OPTIMIZATION (init_optimize); if (! opts_set->x_arm_restrict_it) opts->x_arm_restrict_it = arm_arch8; /* ARM execution state and M profile don't have [restrict] IT. */ if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm) opts->x_arm_restrict_it = 0; /* Use the IT size from CPU specific tuning unless -mrestrict-it is used. */ if (!opts_set->x_arm_restrict_it && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string)) opts->x_arm_restrict_it = 0; /* Enable -munaligned-access by default for - all ARMv6 architecture-based processors when compiling for a 32-bit ISA i.e. Thumb2 and ARM state only. - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors. - ARMv8 architecture-base processors. Disable -munaligned-access by default for - all pre-ARMv6 architecture-based processors - ARMv6-M architecture-based processors - ARMv8-M Baseline processors. */ if (! opts_set->x_unaligned_access) { opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags) && arm_arch6 && (arm_arch_notm || arm_arch7)); } else if (opts->x_unaligned_access == 1 && !(arm_arch6 && (arm_arch_notm || arm_arch7))) { warning (0, "target CPU does not support unaligned accesses"); opts->x_unaligned_access = 0; } /* Don't warn since it's on by default in -O2. */ if (TARGET_THUMB1_P (opts->x_target_flags)) opts->x_flag_schedule_insns = 0; else opts->x_flag_schedule_insns = to->x_flag_schedule_insns; /* Disable shrink-wrap when optimizing function for size, since it tends to generate additional returns. */ if (optimize_function_for_size_p (cfun) && TARGET_THUMB2_P (opts->x_target_flags)) opts->x_flag_shrink_wrap = false; else opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap; /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn - epilogue_insns - does not accurately model the corresponding insns emitted in the asm file. In particular, see the comment in thumb_exit 'Find out how many of the (return) argument registers we can corrupt'. As a consequence, the epilogue may clobber registers without fipa-ra finding out about it. Therefore, disable fipa-ra in Thumb1 mode. TODO: Accurately model clobbers for epilogue_insns and reenable fipa-ra. */ if (TARGET_THUMB1_P (opts->x_target_flags)) opts->x_flag_ipa_ra = 0; else opts->x_flag_ipa_ra = to->x_flag_ipa_ra; /* Thumb2 inline assembly code should always use unified syntax. This will apply to ARM and Thumb1 eventually. */ if (TARGET_THUMB2_P (opts->x_target_flags)) opts->x_inline_asm_unified = true; #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS SUBTARGET_OVERRIDE_INTERNAL_OPTIONS; #endif } static sbitmap isa_all_fpubits_internal; static sbitmap isa_all_fpbits; static sbitmap isa_quirkbits; /* Configure a build target TARGET from the user-specified options OPTS and OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and architecture have been specified, but the two are not identical. */ void arm_configure_build_target (struct arm_build_target *target, struct cl_target_option *opts, struct gcc_options *opts_set, bool warn_compatible) { const cpu_option *arm_selected_tune = NULL; const arch_option *arm_selected_arch = NULL; const cpu_option *arm_selected_cpu = NULL; const arm_fpu_desc *arm_selected_fpu = NULL; const char *tune_opts = NULL; const char *arch_opts = NULL; const char *cpu_opts = NULL; bitmap_clear (target->isa); target->core_name = NULL; target->arch_name = NULL; if (opts_set->x_arm_arch_string) { arm_selected_arch = arm_parse_arch_option_name (all_architectures, "-march", opts->x_arm_arch_string); arch_opts = strchr (opts->x_arm_arch_string, '+'); } if (opts_set->x_arm_cpu_string) { arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu", opts->x_arm_cpu_string); cpu_opts = strchr (opts->x_arm_cpu_string, '+'); arm_selected_tune = arm_selected_cpu; /* If taking the tuning from -mcpu, we don't need to rescan the options for tuning. */ } if (opts_set->x_arm_tune_string) { arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune", opts->x_arm_tune_string); tune_opts = strchr (opts->x_arm_tune_string, '+'); } if (arm_selected_arch) { arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits); arm_parse_option_features (target->isa, &arm_selected_arch->common, arch_opts); if (arm_selected_cpu) { auto_sbitmap cpu_isa (isa_num_bits); auto_sbitmap isa_delta (isa_num_bits); arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits); arm_parse_option_features (cpu_isa, &arm_selected_cpu->common, cpu_opts); bitmap_xor (isa_delta, cpu_isa, target->isa); /* Ignore any bits that are quirk bits. */ bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits); /* Ignore (for now) any bits that might be set by -mfpu. */ bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits_internal); /* And if the target ISA lacks floating point, ignore any extensions that depend on that. */ if (!bitmap_bit_p (target->isa, isa_bit_vfpv2)) bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits); if (!bitmap_empty_p (isa_delta)) { if (warn_compatible) warning (0, "switch %<-mcpu=%s%> conflicts " "with %<-march=%s%> switch", arm_selected_cpu->common.name, arm_selected_arch->common.name); /* -march wins for code generation. -mcpu wins for default tuning. */ if (!arm_selected_tune) arm_selected_tune = arm_selected_cpu; arm_selected_cpu = all_cores + arm_selected_arch->tune_id; target->arch_name = arm_selected_arch->common.name; } else { /* Architecture and CPU are essentially the same. Prefer the CPU setting. */ arm_selected_arch = all_architectures + arm_selected_cpu->arch; target->core_name = arm_selected_cpu->common.name; /* Copy the CPU's capabilities, so that we inherit the appropriate extensions and quirks. */ bitmap_copy (target->isa, cpu_isa); } } else { /* Pick a CPU based on the architecture. */ arm_selected_cpu = all_cores + arm_selected_arch->tune_id; target->arch_name = arm_selected_arch->common.name; /* Note: target->core_name is left unset in this path. */ } } else if (arm_selected_cpu) { target->core_name = arm_selected_cpu->common.name; arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits); arm_parse_option_features (target->isa, &arm_selected_cpu->common, cpu_opts); arm_selected_arch = all_architectures + arm_selected_cpu->arch; } /* If the user did not specify a processor or architecture, choose one for them. */ else { const cpu_option *sel; auto_sbitmap sought_isa (isa_num_bits); bitmap_clear (sought_isa); auto_sbitmap default_isa (isa_num_bits); arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU", TARGET_CPU_DEFAULT); cpu_opts = strchr (TARGET_CPU_DEFAULT, '+'); gcc_assert (arm_selected_cpu->common.name); /* RWE: All of the selection logic below (to the end of this 'if' clause) looks somewhat suspect. It appears to be mostly there to support forcing thumb support when the default CPU does not have thumb (somewhat dubious in terms of what the user might be expecting). I think it should be removed once support for the pre-thumb era cores is removed. */ sel = arm_selected_cpu; arm_initialize_isa (default_isa, sel->common.isa_bits); arm_parse_option_features (default_isa, &arm_selected_cpu->common, cpu_opts); /* Now check to see if the user has specified any command line switches that require certain abilities from the cpu. */ if (TARGET_INTERWORK || TARGET_THUMB) bitmap_set_bit (sought_isa, isa_bit_thumb); /* If there are such requirements and the default CPU does not satisfy them, we need to run over the complete list of cores looking for one that is satisfactory. */ if (!bitmap_empty_p (sought_isa) && !bitmap_subset_p (sought_isa, default_isa)) { auto_sbitmap candidate_isa (isa_num_bits); /* We're only interested in a CPU with at least the capabilities of the default CPU and the required additional features. */ bitmap_ior (default_isa, default_isa, sought_isa); /* Try to locate a CPU type that supports all of the abilities of the default CPU, plus the extra abilities requested by the user. */ for (sel = all_cores; sel->common.name != NULL; sel++) { arm_initialize_isa (candidate_isa, sel->common.isa_bits); /* An exact match? */ if (bitmap_equal_p (default_isa, candidate_isa)) break; } if (sel->common.name == NULL) { unsigned current_bit_count = isa_num_bits; const cpu_option *best_fit = NULL; /* Ideally we would like to issue an error message here saying that it was not possible to find a CPU compatible with the default CPU, but which also supports the command line options specified by the programmer, and so they ought to use the -mcpu=<name> command line option to override the default CPU type. If we cannot find a CPU that has exactly the characteristics of the default CPU and the given command line options we scan the array again looking for a best match. The best match must have at least the capabilities of the perfect match. */ for (sel = all_cores; sel->common.name != NULL; sel++) { arm_initialize_isa (candidate_isa, sel->common.isa_bits); if (bitmap_subset_p (default_isa, candidate_isa)) { unsigned count; bitmap_and_compl (candidate_isa, candidate_isa, default_isa); count = bitmap_popcount (candidate_isa); if (count < current_bit_count) { best_fit = sel; current_bit_count = count; } } gcc_assert (best_fit); sel = best_fit; } } arm_selected_cpu = sel; } /* Now we know the CPU, we can finally initialize the target structure. */ target->core_name = arm_selected_cpu->common.name; arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits); arm_parse_option_features (target->isa, &arm_selected_cpu->common, cpu_opts); arm_selected_arch = all_architectures + arm_selected_cpu->arch; } gcc_assert (arm_selected_cpu); gcc_assert (arm_selected_arch); if (opts->x_arm_fpu_index != TARGET_FPU_auto) { arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index]; auto_sbitmap fpu_bits (isa_num_bits); arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits); bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal); bitmap_ior (target->isa, target->isa, fpu_bits); } if (!arm_selected_tune) arm_selected_tune = arm_selected_cpu; else /* Validate the features passed to -mtune. */ arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts); const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores]; /* Finish initializing the target structure. */ target->arch_pp_name = arm_selected_arch->arch; target->base_arch = arm_selected_arch->base_arch; target->profile = arm_selected_arch->profile; target->tune_flags = tune_data->tune_flags; target->tune = tune_data->tune; target->tune_core = tune_data->scheduler; arm_option_reconfigure_globals (); } /* Fix up any incompatible options that the user has specified. */ static void arm_option_override (void) { static const enum isa_feature fpu_bitlist_internal[] = { ISA_ALL_FPU_INTERNAL, isa_nobit }; static const enum isa_feature fp_bitlist[] = { ISA_ALL_FP, isa_nobit }; static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit}; cl_target_option opts; isa_quirkbits = sbitmap_alloc (isa_num_bits); arm_initialize_isa (isa_quirkbits, quirk_bitlist); isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits); isa_all_fpbits = sbitmap_alloc (isa_num_bits); arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal); arm_initialize_isa (isa_all_fpbits, fp_bitlist); arm_active_target.isa = sbitmap_alloc (isa_num_bits); if (!global_options_set.x_arm_fpu_index) { bool ok; int fpu_index; ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index, CL_TARGET); gcc_assert (ok); arm_fpu_index = (enum fpu_type) fpu_index; } cl_target_option_save (&opts, &global_options); arm_configure_build_target (&arm_active_target, &opts, &global_options_set, true); #ifdef SUBTARGET_OVERRIDE_OPTIONS SUBTARGET_OVERRIDE_OPTIONS; #endif /* Initialize boolean versions of the architectural flags, for use in the arm.md file and for enabling feature flags. */ arm_option_reconfigure_globals (); arm_tune = arm_active_target.tune_core; tune_flags = arm_active_target.tune_flags; current_tune = arm_active_target.tune; /* TBD: Dwarf info for apcs frame is not handled yet. */ if (TARGET_APCS_FRAME) flag_shrink_wrap = false; if (TARGET_APCS_STACK && !TARGET_APCS_FRAME) { warning (0, "%<-mapcs-stack-check%> incompatible with " "%<-mno-apcs-frame%>"); target_flags |= MASK_APCS_FRAME; } if (TARGET_POKE_FUNCTION_NAME) target_flags |= MASK_APCS_FRAME; if (TARGET_APCS_REENT && flag_pic) error ("%<-fpic%> and %<-mapcs-reent%> are incompatible"); if (TARGET_APCS_REENT) warning (0, "APCS reentrant code not supported. Ignored"); /* Set up some tuning parameters. */ arm_ld_sched = (tune_flags & TF_LDSCHED) != 0; arm_tune_strongarm = (tune_flags & TF_STRONG) != 0; arm_tune_wbuf = (tune_flags & TF_WBUF) != 0; arm_tune_xscale = (tune_flags & TF_XSCALE) != 0; arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0; arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0; /* For arm2/3 there is no need to do any scheduling if we are doing software floating-point. */ if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32)) flag_schedule_insns = flag_schedule_insns_after_reload = 0; /* Override the default structure alignment for AAPCS ABI. */ if (!global_options_set.x_arm_structure_size_boundary) { if (TARGET_AAPCS_BASED) arm_structure_size_boundary = 8; } else { warning (0, "option %<-mstructure-size-boundary%> is deprecated"); if (arm_structure_size_boundary != 8 && arm_structure_size_boundary != 32 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64)) { if (ARM_DOUBLEWORD_ALIGN) warning (0, "structure size boundary can only be set to 8, 32 or 64"); else warning (0, "structure size boundary can only be set to 8 or 32"); arm_structure_size_boundary = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY); } } if (TARGET_VXWORKS_RTP) { if (!global_options_set.x_arm_pic_data_is_text_relative) arm_pic_data_is_text_relative = 0; } else if (flag_pic && !arm_pic_data_is_text_relative && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE)) /* When text & data segments don't have a fixed displacement, the intended use is with a single, read only, pic base register. Unless the user explicitly requested not to do that, set it. */ target_flags |= MASK_SINGLE_PIC_BASE; /* If stack checking is disabled, we can use r10 as the PIC register, which keeps r9 available. The EABI specifies r9 as the PIC register. */ if (flag_pic && TARGET_SINGLE_PIC_BASE) { if (TARGET_VXWORKS_RTP) warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>"); arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10; } if (flag_pic && TARGET_VXWORKS_RTP) arm_pic_register = 9; /* If in FDPIC mode then force arm_pic_register to be r9. */ if (TARGET_FDPIC) { arm_pic_register = FDPIC_REGNUM; if (TARGET_THUMB1) sorry ("FDPIC mode is not supported in Thumb-1 mode"); } if (arm_pic_register_string != NULL) { int pic_register = decode_reg_name (arm_pic_register_string); if (!flag_pic) warning (0, "%<-mpic-register=%> is useless without %<-fpic%>"); /* Prevent the user from choosing an obviously stupid PIC register. */ else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register) || pic_register == HARD_FRAME_POINTER_REGNUM || pic_register == STACK_POINTER_REGNUM || pic_register >= PC_REGNUM || (TARGET_VXWORKS_RTP && (unsigned int) pic_register != arm_pic_register)) error ("unable to use %qs for PIC register", arm_pic_register_string); else arm_pic_register = pic_register; } if (flag_pic) target_word_relocations = 1; /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */ if (fix_cm3_ldrd == 2) { if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd)) fix_cm3_ldrd = 1; else fix_cm3_ldrd = 0; } /* Hot/Cold partitioning is not currently supported, since we can't handle literal pool placement in that case. */ if (flag_reorder_blocks_and_partition) { inform (input_location, "%<-freorder-blocks-and-partition%> not supported " "on this architecture"); flag_reorder_blocks_and_partition = 0; flag_reorder_blocks = 1; } if (flag_pic) /* Hoisting PIC address calculations more aggressively provides a small, but measurable, size reduction for PIC code. Therefore, we decrease the bar for unrestricted expression hoisting to the cost of PIC address calculation, which is 2 instructions. */ SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_gcse_unrestricted_cost, 2); /* ARM EABI defaults to strict volatile bitfields. */ if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0 && abi_version_at_least(2)) flag_strict_volatile_bitfields = 1; /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we have deemed it beneficial (signified by setting prefetch.num_slots to 1 or more). */ if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3 && current_tune->prefetch.num_slots > 0) flag_prefetch_loop_arrays = 1; /* Set up parameters to be used in prefetching algorithm. Do not override the defaults unless we are tuning for a core we have researched values for. */ if (current_tune->prefetch.num_slots > 0) SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_simultaneous_prefetches, current_tune->prefetch.num_slots); if (current_tune->prefetch.l1_cache_line_size >= 0) SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_l1_cache_line_size, current_tune->prefetch.l1_cache_line_size); if (current_tune->prefetch.l1_cache_size >= 0) SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_l1_cache_size, current_tune->prefetch.l1_cache_size); /* Look through ready list and all of queue for instructions relevant for L2 auto-prefetcher. */ int sched_autopref_queue_depth; switch (current_tune->sched_autopref) { case tune_params::SCHED_AUTOPREF_OFF: sched_autopref_queue_depth = -1; break; case tune_params::SCHED_AUTOPREF_RANK: sched_autopref_queue_depth = 0; break; case tune_params::SCHED_AUTOPREF_FULL: sched_autopref_queue_depth = max_insn_queue_index + 1; break; default: gcc_unreachable (); } SET_OPTION_IF_UNSET (&global_options, &global_options_set, param_sched_autopref_queue_depth, sched_autopref_queue_depth); /* Currently, for slow flash data, we just disable literal pools. We also disable it for pure-code. */ if (target_slow_flash_data || target_pure_code) arm_disable_literal_pool = true; /* Disable scheduling fusion by default if it's not armv7 processor or doesn't prefer ldrd/strd. */ if (flag_schedule_fusion == 2 && (!arm_arch7 || !current_tune->prefer_ldrd_strd)) flag_schedule_fusion = 0; /* Need to remember initial options before they are overriden. */ init_optimize = build_optimization_node (&global_options); arm_options_perform_arch_sanity_checks (); arm_option_override_internal (&global_options, &global_options_set); arm_option_check_internal (&global_options); arm_option_params_internal (); /* Create the default target_options structure. */ target_option_default_node = target_option_current_node = build_target_option_node (&global_options); /* Register global variables with the garbage collector. */ arm_add_gc_roots (); /* Init initial mode for testing. */ thumb_flipper = TARGET_THUMB; } /* Reconfigure global status flags from the active_target.isa. */ void arm_option_reconfigure_globals (void) { sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name); arm_base_arch = arm_active_target.base_arch; /* Initialize boolean versions of the architectural flags, for use in the arm.md file. */ arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4); arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb); arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t); arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te); arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6); arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k); arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm); arm_arch6m = arm_arch6 && !arm_arch_notm; arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7); arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em); arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8); arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1); arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2); arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3); arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4); arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1m_main); arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb); arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2); arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale); arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt); arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2); arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv); arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv); arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32); arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse); arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae); arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm); arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16); arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16); if (arm_fp16_inst) { if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE) error ("selected fp16 options are incompatible"); arm_fp16_format = ARM_FP16_FORMAT_IEEE; } /* And finally, set up some quirks. */ arm_arch_no_volatile_ce = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce); arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_armv6kz); /* Use the cp15 method if it is available. */ if (target_thread_pointer == TP_AUTO) { if (arm_arch6k && !TARGET_THUMB1) target_thread_pointer = TP_CP15; else target_thread_pointer = TP_SOFT; } } /* Perform some validation between the desired architecture and the rest of the options. */ void arm_options_perform_arch_sanity_checks (void) { /* V5T code we generate is completely interworking capable, so we turn off TARGET_INTERWORK here to avoid many tests later on. */ /* XXX However, we must pass the right pre-processor defines to CPP or GLD can get confused. This is a hack. */ if (TARGET_INTERWORK) arm_cpp_interwork = 1; if (arm_arch5t) target_flags &= ~MASK_INTERWORK; if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN) error ("iwmmxt requires an AAPCS compatible ABI for proper operation"); if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT) error ("iwmmxt abi requires an iwmmxt capable cpu"); /* BPABI targets use linker tricks to allow interworking on cores without thumb support. */ if (TARGET_INTERWORK && !TARGET_BPABI && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb)) { warning (0, "target CPU does not support interworking" ); target_flags &= ~MASK_INTERWORK; } /* If soft-float is specified then don't use FPU. */ if (TARGET_SOFT_FLOAT) arm_fpu_attr = FPU_NONE; else arm_fpu_attr = FPU_VFP; if (TARGET_AAPCS_BASED) { if (TARGET_CALLER_INTERWORKING) error ("AAPCS does not support %<-mcaller-super-interworking%>"); else if (TARGET_CALLEE_INTERWORKING) error ("AAPCS does not support %<-mcallee-super-interworking%>"); } /* __fp16 support currently assumes the core has ldrh. */ if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE) sorry ("__fp16 and no ldrh"); if (use_cmse && !arm_arch_cmse) error ("target CPU does not support ARMv8-M Security Extensions"); /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions and ARMv8-M Baseline and Mainline do not allow such configuration. */ if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM) error ("ARMv8-M Security Extensions incompatible with selected FPU"); if (TARGET_AAPCS_BASED) { if (arm_abi == ARM_ABI_IWMMXT) arm_pcs_default = ARM_PCS_AAPCS_IWMMXT; else if (TARGET_HARD_FLOAT_ABI) { arm_pcs_default = ARM_PCS_AAPCS_VFP; if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)) error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU"); } else arm_pcs_default = ARM_PCS_AAPCS; } else { if (arm_float_abi == ARM_FLOAT_ABI_HARD) sorry ("%<-mfloat-abi=hard%> and VFP"); if (arm_abi == ARM_ABI_APCS) arm_pcs_default = ARM_PCS_APCS; else arm_pcs_default = ARM_PCS_ATPCS; } } /* Test whether a local function descriptor is canonical, i.e., whether we can use GOTOFFFUNCDESC to compute the address of the function. */ static bool arm_fdpic_local_funcdesc_p (rtx fnx) { tree fn; enum symbol_visibility vis; bool ret; if (!TARGET_FDPIC) return true; if (! SYMBOL_REF_LOCAL_P (fnx)) return false; fn = SYMBOL_REF_DECL (fnx); if (! fn) return false; vis = DECL_VISIBILITY (fn); if (vis == VISIBILITY_PROTECTED) /* Private function descriptors for protected functions are not canonical. Temporarily change the visibility to global so that we can ensure uniqueness of funcdesc pointers. */ DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT; ret = default_binds_local_p_1 (fn, flag_pic); DECL_VISIBILITY (fn) = vis; return ret; } static void arm_add_gc_roots (void) { gcc_obstack_init(&minipool_obstack); minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0); } /* A table of known ARM exception types. For use with the interrupt function attribute. */ typedef struct { const char *const arg; const unsigned long return_value; } isr_attribute_arg; static const isr_attribute_arg isr_attribute_args [] = { { "IRQ", ARM_FT_ISR }, { "irq", ARM_FT_ISR }, { "FIQ", ARM_FT_FIQ }, { "fiq", ARM_FT_FIQ }, { "ABORT", ARM_FT_ISR }, { "abort", ARM_FT_ISR }, { "ABORT", ARM_FT_ISR }, { "abort", ARM_FT_ISR }, { "UNDEF", ARM_FT_EXCEPTION }, { "undef", ARM_FT_EXCEPTION }, { "SWI", ARM_FT_EXCEPTION }, { "swi", ARM_FT_EXCEPTION }, { NULL, ARM_FT_NORMAL } }; /* Returns the (interrupt) function type of the current function, or ARM_FT_UNKNOWN if the type cannot be determined. */ static unsigned long arm_isr_value (tree argument) { const isr_attribute_arg * ptr; const char * arg; if (!arm_arch_notm) return ARM_FT_NORMAL | ARM_FT_STACKALIGN; /* No argument - default to IRQ. */ if (argument == NULL_TREE) return ARM_FT_ISR; /* Get the value of the argument. */ if (TREE_VALUE (argument) == NULL_TREE || TREE_CODE (TREE_VALUE (argument)) != STRING_CST) return ARM_FT_UNKNOWN; arg = TREE_STRING_POINTER (TREE_VALUE (argument)); /* Check it against the list of known arguments. */ for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++) if (streq (arg, ptr->arg)) return ptr->return_value; /* An unrecognized interrupt type. */ return ARM_FT_UNKNOWN; } /* Computes the type of the current function. */ static unsigned long arm_compute_func_type (void) { unsigned long type = ARM_FT_UNKNOWN; tree a; tree attr; gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL); /* Decide if the current function is volatile. Such functions never return, and many memory cycles can be saved by not storing register values that will never be needed again. This optimization was added to speed up context switching in a kernel application. */ if (optimize > 0 && (TREE_NOTHROW (current_function_decl) || !(flag_unwind_tables || (flag_exceptions && arm_except_unwind_info (&global_options) != UI_SJLJ))) && TREE_THIS_VOLATILE (current_function_decl)) type |= ARM_FT_VOLATILE; if (cfun->static_chain_decl != NULL) type |= ARM_FT_NESTED; attr = DECL_ATTRIBUTES (current_function_decl); a = lookup_attribute ("naked", attr); if (a != NULL_TREE) type |= ARM_FT_NAKED; a = lookup_attribute ("isr", attr); if (a == NULL_TREE) a = lookup_attribute ("interrupt", attr); if (a == NULL_TREE) type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL; else type |= arm_isr_value (TREE_VALUE (a)); if (lookup_attribute ("cmse_nonsecure_entry", attr)) type |= ARM_FT_CMSE_ENTRY; return type; } /* Returns the type of the current function. */ unsigned long arm_current_func_type (void) { if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN) cfun->machine->func_type = arm_compute_func_type (); return cfun->machine->func_type; } bool arm_allocate_stack_slots_for_args (void) { /* Naked functions should not allocate stack slots for arguments. */ return !IS_NAKED (arm_current_func_type ()); } static bool arm_warn_func_return (tree decl) { /* Naked functions are implemented entirely in assembly, including the return sequence, so suppress warnings about this. */ return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE; } /* Output assembler code for a block containing the constant parts of a trampoline, leaving space for the variable parts. On the ARM, (if r8 is the static chain regnum, and remembering that referencing pc adds an offset of 8) the trampoline looks like: ldr r8, [pc, #0] ldr pc, [pc] .word static chain value .word function's address XXX FIXME: When the trampoline returns, r8 will be clobbered. In FDPIC mode, the trampoline looks like: .word trampoline address .word trampoline GOT address ldr r12, [pc, #8] ; #4 for Arm mode ldr r9, [pc, #8] ; #4 for Arm mode ldr pc, [pc, #8] ; #4 for Arm mode .word static chain value .word GOT address .word function's address */ static void arm_asm_trampoline_template (FILE *f) { fprintf (f, "\t.syntax unified\n"); if (TARGET_FDPIC) { /* The first two words are a function descriptor pointing to the trampoline code just below. */ if (TARGET_ARM) fprintf (f, "\t.arm\n"); else if (TARGET_THUMB2) fprintf (f, "\t.thumb\n"); else /* Only ARM and Thumb-2 are supported. */ gcc_unreachable (); assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); /* Trampoline code which sets the static chain register but also PIC register before jumping into real code. */ asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n", STATIC_CHAIN_REGNUM, PC_REGNUM, TARGET_THUMB2 ? 8 : 4); asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n", PIC_OFFSET_TABLE_REGNUM, PC_REGNUM, TARGET_THUMB2 ? 8 : 4); asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n", PC_REGNUM, PC_REGNUM, TARGET_THUMB2 ? 8 : 4); assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); } else if (TARGET_ARM) { fprintf (f, "\t.arm\n"); asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM); } else if (TARGET_THUMB2) { fprintf (f, "\t.thumb\n"); /* The Thumb-2 trampoline is similar to the arm implementation. Unlike 16-bit Thumb, we enter the stub in thumb mode. */ asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", STATIC_CHAIN_REGNUM, PC_REGNUM); asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM); } else { ASM_OUTPUT_ALIGN (f, 2); fprintf (f, "\t.code\t16\n"); fprintf (f, ".Ltrampoline_start:\n"); asm_fprintf (f, "\tpush\t{r0, r1}\n"); asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM); asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM); asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM); asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM); } assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); assemble_aligned_integer (UNITS_PER_WORD, const0_rtx); } /* Emit RTL insns to initialize the variable parts of a trampoline. */ static void arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value) { rtx fnaddr, mem, a_tramp; emit_block_move (m_tramp, assemble_trampoline_template (), GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL); if (TARGET_FDPIC) { rtx funcdesc = XEXP (DECL_RTL (fndecl), 0); rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc); rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4)); /* The function start address is at offset 8, but in Thumb mode we want bit 0 set to 1 to indicate Thumb-ness, hence 9 below. */ rtx trampoline_code_start = plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8); /* Write initial funcdesc which points to the trampoline. */ mem = adjust_address (m_tramp, SImode, 0); emit_move_insn (mem, trampoline_code_start); mem = adjust_address (m_tramp, SImode, 4); emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM)); /* Setup static chain. */ mem = adjust_address (m_tramp, SImode, 20); emit_move_insn (mem, chain_value); /* GOT + real function entry point. */ mem = adjust_address (m_tramp, SImode, 24); emit_move_insn (mem, gotaddr); mem = adjust_address (m_tramp, SImode, 28); emit_move_insn (mem, fnaddr); } else { mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12); emit_move_insn (mem, chain_value); mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16); fnaddr = XEXP (DECL_RTL (fndecl), 0); emit_move_insn (mem, fnaddr); } a_tramp = XEXP (m_tramp, 0); emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"), LCT_NORMAL, VOIDmode, a_tramp, Pmode, plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode); } /* Thumb trampolines should be entered in thumb mode, so set the bottom bit of the address. */ static rtx arm_trampoline_adjust_address (rtx addr) { /* For FDPIC don't fix trampoline address since it's a function descriptor and not a function address. */ if (TARGET_THUMB && !TARGET_FDPIC) addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx, NULL, 0, OPTAB_LIB_WIDEN); return addr; } /* Return 1 if it is possible to return using a single instruction. If SIBLING is non-null, this is a test for a return before a sibling call. SIBLING is the call insn, so we can examine its register usage. */ int use_return_insn (int iscond, rtx sibling) { int regno; unsigned int func_type; unsigned long saved_int_regs; unsigned HOST_WIDE_INT stack_adjust; arm_stack_offsets *offsets; /* Never use a return instruction before reload has run. */ if (!reload_completed) return 0; func_type = arm_current_func_type (); /* Naked, volatile and stack alignment functions need special consideration. */ if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN)) return 0; /* So do interrupt functions that use the frame pointer and Thumb interrupt functions. */ if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB)) return 0; if (TARGET_LDRD && current_tune->prefer_ldrd_strd && !optimize_function_for_size_p (cfun)) return 0; offsets = arm_get_frame_offsets (); stack_adjust = offsets->outgoing_args - offsets->saved_regs; /* As do variadic functions. */ if (crtl->args.pretend_args_size || cfun->machine->uses_anonymous_args /* Or if the function calls __builtin_eh_return () */ || crtl->calls_eh_return /* Or if the function calls alloca */ || cfun->calls_alloca /* Or if there is a stack adjustment. However, if the stack pointer is saved on the stack, we can use a pre-incrementing stack load. */ || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed && stack_adjust == 4)) /* Or if the static chain register was saved above the frame, under the assumption that the stack pointer isn't saved on the stack. */ || (!(TARGET_APCS_FRAME && frame_pointer_needed) && arm_compute_static_chain_stack_bytes() != 0)) return 0; saved_int_regs = offsets->saved_regs_mask; /* Unfortunately, the insn ldmib sp, {..., sp, ...} triggers a bug on most SA-110 based devices, such that the stack pointer won't be correctly restored if the instruction takes a page fault. We work around this problem by popping r3 along with the other registers, since that is never slower than executing another instruction. We test for !arm_arch5t here, because code for any architecture less than this could potentially be run on one of the buggy chips. */ if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM) { /* Validate that r3 is a call-clobbered register (always true in the default abi) ... */ if (!call_used_or_fixed_reg_p (3)) return 0; /* ... that it isn't being used for a return value ... */ if (arm_size_return_regs () >= (4 * UNITS_PER_WORD)) return 0; /* ... or for a tail-call argument ... */ if (sibling) { gcc_assert (CALL_P (sibling)); if (find_regno_fusage (sibling, USE, 3)) return 0; } /* ... and that there are no call-saved registers in r0-r2 (always true in the default ABI). */ if (saved_int_regs & 0x7) return 0; } /* Can't be done if interworking with Thumb, and any registers have been stacked. */ if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type)) return 0; /* On StrongARM, conditional returns are expensive if they aren't taken and multiple registers have been stacked. */ if (iscond && arm_tune_strongarm) { /* Conditional return when just the LR is stored is a simple conditional-load instruction, that's not expensive. */ if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM)) return 0; if (flag_pic && arm_pic_register != INVALID_REGNUM && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)) return 0; } /* ARMv8-M nonsecure entry function need to use bxns to return and thus need several instructions if anything needs to be popped. Armv8.1-M Mainline also needs several instructions to save and restore FP context. */ if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE)) return 0; /* If there are saved registers but the LR isn't saved, then we need two instructions for the return. */ if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM))) return 0; /* Can't be done if any of the VFP regs are pushed, since this also requires an insn. */ if (TARGET_HARD_FLOAT) for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++) if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno)) return 0; if (TARGET_REALLY_IWMMXT) for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++) if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno)) return 0; return 1; } /* Return TRUE if we should try to use a simple_return insn, i.e. perform shrink-wrapping if possible. This is the case if we need to emit a prologue, which we can test by looking at the offsets. */ bool use_simple_return_p (void) { arm_stack_offsets *offsets; /* Note this function can be called before or after reload. */ if (!reload_completed) arm_compute_frame_layout (); offsets = arm_get_frame_offsets (); return offsets->outgoing_args != 0; } /* Return TRUE if int I is a valid immediate ARM constant. */ int const_ok_for_arm (HOST_WIDE_INT i) { int lowbit; /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must be all zero, or all one. */ if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != ((~(unsigned HOST_WIDE_INT) 0) & ~(unsigned HOST_WIDE_INT) 0xffffffff))) return FALSE; i &= (unsigned HOST_WIDE_INT) 0xffffffff; /* Fast return for 0 and small values. We must do this for zero, since the code below can't handle that one case. */ if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0) return TRUE; /* Get the number of trailing zeros. */ lowbit = ffs((int) i) - 1; /* Only even shifts are allowed in ARM mode so round down to the nearest even number. */ if (TARGET_ARM) lowbit &= ~1; if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0) return TRUE; if (TARGET_ARM) { /* Allow rotated constants in ARM mode. */ if (lowbit <= 4 && ((i & ~0xc000003f) == 0 || (i & ~0xf000000f) == 0 || (i & ~0xfc000003) == 0)) return TRUE; } else if (TARGET_THUMB2) { HOST_WIDE_INT v; /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */ v = i & 0xff; v |= v << 16; if (i == v || i == (v | (v << 8))) return TRUE; /* Allow repeated pattern 0xXY00XY00. */ v = i & 0xff00; v |= v << 16; if (i == v) return TRUE; } else if (TARGET_HAVE_MOVT) { /* Thumb-1 Targets with MOVT. */ if (i > 0xffff) return FALSE; else return TRUE; } return FALSE; } /* Return true if I is a valid constant for the operation CODE. */ int const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code) { if (const_ok_for_arm (i)) return 1; switch (code) { case SET: /* See if we can use movw. */ if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0) return 1; else /* Otherwise, try mvn. */ return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); case PLUS: /* See if we can use addw or subw. */ if (TARGET_THUMB2 && ((i & 0xfffff000) == 0 || ((-i) & 0xfffff000) == 0)) return 1; /* Fall through. */ case COMPARE: case EQ: case NE: case GT: case LE: case LT: case GE: case GEU: case LTU: case GTU: case LEU: case UNORDERED: case ORDERED: case UNEQ: case UNGE: case UNLT: case UNGT: case UNLE: return const_ok_for_arm (ARM_SIGN_EXTEND (-i)); case MINUS: /* Should only occur with (MINUS I reg) => rsb */ case XOR: return 0; case IOR: if (TARGET_THUMB2) return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); return 0; case AND: return const_ok_for_arm (ARM_SIGN_EXTEND (~i)); default: gcc_unreachable (); } } /* Return true if I is a valid di mode constant for the operation CODE. */ int const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code) { HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF; HOST_WIDE_INT lo_val = i & 0xFFFFFFFF; rtx hi = GEN_INT (hi_val); rtx lo = GEN_INT (lo_val); if (TARGET_THUMB1) return 0; switch (code) { case AND: case IOR: case XOR: return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF; case PLUS: return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode); default: return 0; } } /* Emit a sequence of movs/adds/shift to produce a 32-bit constant. Avoid generating useless code when one of the bytes is zero. */ void thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1) { bool mov_done_p = false; int i; /* Emit upper 3 bytes if needed. */ for (i = 0; i < 3; i++) { int byte = (op1 >> (8 * (3 - i))) & 0xff; if (byte) { emit_set_insn (op0, mov_done_p ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte)) : GEN_INT (byte)); mov_done_p = true; } if (mov_done_p) emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8))); } /* Emit lower byte if needed. */ if (!mov_done_p) emit_set_insn (op0, GEN_INT (op1 & 0xff)); else if (op1 & 0xff) emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff))); } /* Emit a sequence of insns to handle a large constant. CODE is the code of the operation required, it can be any of SET, PLUS, IOR, AND, XOR, MINUS; MODE is the mode in which the operation is being performed; VAL is the integer to operate on; SOURCE is the other operand (a register, or a null-pointer for SET); SUBTARGETS means it is safe to create scratch registers if that will either produce a simpler sequence, or we will want to cse the values. Return value is the number of insns emitted. */ /* ??? Tweak this for thumb2. */ int arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn, HOST_WIDE_INT val, rtx target, rtx source, int subtargets) { rtx cond; if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC) cond = COND_EXEC_TEST (PATTERN (insn)); else cond = NULL_RTX; if (subtargets || code == SET || (REG_P (target) && REG_P (source) && REGNO (target) != REGNO (source))) { /* After arm_reorg has been called, we can't fix up expensive constants by pushing them into memory so we must synthesize them in-line, regardless of the cost. This is only likely to be more costly on chips that have load delay slots and we are compiling without running the scheduler (so no splitting occurred before the final instruction emission). Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c */ if (!cfun->machine->after_arm_reorg && !cond && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, 1, 0) > (arm_constant_limit (optimize_function_for_size_p (cfun)) + (code != SET)))) { if (code == SET) { /* Currently SET is the only monadic value for CODE, all the rest are diadic. */ if (TARGET_USE_MOVT) arm_emit_movpair (target, GEN_INT (val)); else emit_set_insn (target, GEN_INT (val)); return 1; } else { rtx temp = subtargets ? gen_reg_rtx (mode) : target; if (TARGET_USE_MOVT) arm_emit_movpair (temp, GEN_INT (val)); else emit_set_insn (temp, GEN_INT (val)); /* For MINUS, the value is subtracted from, since we never have subtraction of a constant. */ if (code == MINUS) emit_set_insn (target, gen_rtx_MINUS (mode, temp, source)); else emit_set_insn (target, gen_rtx_fmt_ee (code, mode, source, temp)); return 2; } } } return arm_gen_constant (code, mode, cond, val, target, source, subtargets, 1); } /* Return a sequence of integers, in RETURN_SEQUENCE that fit into ARM/THUMB2 immediates, and add up to VAL. Thr function return value gives the number of insns required. */ static int optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val, struct four_ints *return_sequence) { int best_consecutive_zeros = 0; int i; int best_start = 0; int insns1, insns2; struct four_ints tmp_sequence; /* If we aren't targeting ARM, the best place to start is always at the bottom, otherwise look more closely. */ if (TARGET_ARM) { for (i = 0; i < 32; i += 2) { int consecutive_zeros = 0; if (!(val & (3 << i))) { while ((i < 32) && !(val & (3 << i))) { consecutive_zeros += 2; i += 2; } if (consecutive_zeros > best_consecutive_zeros) { best_consecutive_zeros = consecutive_zeros; best_start = i - consecutive_zeros; } i -= 2; } } } /* So long as it won't require any more insns to do so, it's desirable to emit a small constant (in bits 0...9) in the last insn. This way there is more chance that it can be combined with a later addressing insn to form a pre-indexed load or store operation. Consider: *((volatile int *)0xe0000100) = 1; *((volatile int *)0xe0000110) = 2; We want this to wind up as: mov rA, #0xe0000000 mov rB, #1 str rB, [rA, #0x100] mov rB, #2 str rB, [rA, #0x110] rather than having to synthesize both large constants from scratch. Therefore, we calculate how many insns would be required to emit the constant starting from `best_start', and also starting from zero (i.e. with bit 31 first to be output). If `best_start' doesn't yield a shorter sequence, we may as well use zero. */ insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start); if (best_start != 0 && ((HOST_WIDE_INT_1U << best_start) < val)) { insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0); if (insns2 <= insns1) { *return_sequence = tmp_sequence; insns1 = insns2; } } return insns1; } /* As for optimal_immediate_sequence, but starting at bit-position I. */ static int optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val, struct four_ints *return_sequence, int i) { int remainder = val & 0xffffffff; int insns = 0; /* Try and find a way of doing the job in either two or three instructions. In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned location. We start at position I. This may be the MSB, or optimial_immediate_sequence may have positioned it at the largest block of zeros that are aligned on a 2-bit boundary. We then fill up the temps, wrapping around to the top of the word when we drop off the bottom. In the worst case this code should produce no more than four insns. In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit constants, shifted to any arbitrary location. We should always start at the MSB. */ do { int end; unsigned int b1, b2, b3, b4; unsigned HOST_WIDE_INT result; int loc; gcc_assert (insns < 4); if (i <= 0) i += 32; /* First, find the next normal 12/8-bit shifted/rotated immediate. */ if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1))))) { loc = i; if (i <= 12 && TARGET_THUMB2 && code == PLUS) /* We can use addw/subw for the last 12 bits. */ result = remainder; else { /* Use an 8-bit shifted/rotated immediate. */ end = i - 8; if (end < 0) end += 32; result = remainder & ((0x0ff << end) | ((i < end) ? (0xff >> (32 - end)) : 0)); i -= 8; } } else { /* Arm allows rotates by a multiple of two. Thumb-2 allows arbitrary shifts. */ i -= TARGET_ARM ? 2 : 1; continue; } /* Next, see if we can do a better job with a thumb2 replicated constant. We do it this way around to catch the cases like 0x01F001E0 where two 8-bit immediates would work, but a replicated constant would make it worse. TODO: 16-bit constants that don't clear all the bits, but still win. TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */ if (TARGET_THUMB2) { b1 = (remainder & 0xff000000) >> 24; b2 = (remainder & 0x00ff0000) >> 16; b3 = (remainder & 0x0000ff00) >> 8; b4 = remainder & 0xff; if (loc > 24) { /* The 8-bit immediate already found clears b1 (and maybe b2), but must leave b3 and b4 alone. */ /* First try to find a 32-bit replicated constant that clears almost everything. We can assume that we can't do it in one, or else we wouldn't be here. */ unsigned int tmp = b1 & b2 & b3 & b4; unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16) + (tmp << 24); unsigned int matching_bytes = (tmp == b1) + (tmp == b2) + (tmp == b3) + (tmp == b4); if (tmp && (matching_bytes >= 3 || (matching_bytes == 2 && const_ok_for_op (remainder & ~tmp2, code)))) { /* At least 3 of the bytes match, and the fourth has at least as many bits set, or two of the bytes match and it will only require one more insn to finish. */ result = tmp2; i = tmp != b1 ? 32 : tmp != b2 ? 24 : tmp != b3 ? 16 : 8; } /* Second, try to find a 16-bit replicated constant that can leave three of the bytes clear. If b2 or b4 is already zero, then we can. If the 8-bit from above would not clear b2 anyway, then we still win. */ else if (b1 == b3 && (!b2 || !b4 || (remainder & 0x00ff0000 & ~result))) { result = remainder & 0xff00ff00; i = 24; } } else if (loc > 16) { /* The 8-bit immediate already found clears b2 (and maybe b3) and we don't get here unless b1 is alredy clear, but it will leave b4 unchanged. */ /* If we can clear b2 and b4 at once, then we win, since the 8-bits couldn't possibly reach that far. */ if (b2 == b4) { result = remainder & 0x00ff00ff; i = 16; } } } return_sequence->i[insns++] = result; remainder &= ~result; if (code == SET || code == MINUS) code = PLUS; } while (remainder); return insns; } /* Emit an instruction with the indicated PATTERN. If COND is non-NULL, conditionalize the execution of the instruction on COND being true. */ static void emit_constant_insn (rtx cond, rtx pattern) { if (cond) pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern); emit_insn (pattern); } /* As above, but extra parameter GENERATE which, if clear, suppresses RTL generation. */ static int arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond, unsigned HOST_WIDE_INT val, rtx target, rtx source, int subtargets, int generate) { int can_invert = 0; int can_negate = 0; int final_invert = 0; int i; int set_sign_bit_copies = 0; int clear_sign_bit_copies = 0; int clear_zero_bit_copies = 0; int set_zero_bit_copies = 0; int insns = 0, neg_insns, inv_insns; unsigned HOST_WIDE_INT temp1, temp2; unsigned HOST_WIDE_INT remainder = val & 0xffffffff; struct four_ints *immediates; struct four_ints pos_immediates, neg_immediates, inv_immediates; /* Find out which operations are safe for a given CODE. Also do a quick check for degenerate cases; these can occur when DImode operations are split. */ switch (code) { case SET: can_invert = 1; break; case PLUS: can_negate = 1; break; case IOR: if (remainder == 0xffffffff) { if (generate) emit_constant_insn (cond, gen_rtx_SET (target, GEN_INT (ARM_SIGN_EXTEND (val)))); return 1; } if (remainder == 0) { if (reload_completed && rtx_equal_p (target, source)) return 0; if (generate) emit_constant_insn (cond, gen_rtx_SET (target, source)); return 1; } break; case AND: if (remainder == 0) { if (generate) emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx)); return 1; } if (remainder == 0xffffffff) { if (reload_completed && rtx_equal_p (target, source)) return 0; if (generate) emit_constant_insn (cond, gen_rtx_SET (target, source)); return 1; } can_invert = 1; break; case XOR: if (remainder == 0) { if (reload_completed && rtx_equal_p (target, source)) return 0; if (generate) emit_constant_insn (cond, gen_rtx_SET (target, source)); return 1; } if (remainder == 0xffffffff) { if (generate) emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_NOT (mode, source))); return 1; } final_invert = 1; break; case MINUS: /* We treat MINUS as (val - source), since (source - val) is always passed as (source + (-val)). */ if (remainder == 0) { if (generate) emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_NEG (mode, source))); return 1; } if (const_ok_for_arm (val)) { if (generate) emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_MINUS (mode, GEN_INT (val), source))); return 1; } break; default: gcc_unreachable (); } /* If we can do it in one insn get out quickly. */ if (const_ok_for_op (val, code)) { if (generate) emit_constant_insn (cond, gen_rtx_SET (target, (source ? gen_rtx_fmt_ee (code, mode, source, GEN_INT (val)) : GEN_INT (val)))); return 1; } /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single insn. */ if (code == AND && (i = exact_log2 (remainder + 1)) > 0 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode))) { if (generate) { if (mode == SImode && i == 16) /* Use UXTH in preference to UBFX, since on Thumb2 it's a smaller insn. */ emit_constant_insn (cond, gen_zero_extendhisi2 (target, gen_lowpart (HImode, source))); else /* Extz only supports SImode, but we can coerce the operands into that mode. */ emit_constant_insn (cond, gen_extzv_t2 (gen_lowpart (SImode, target), gen_lowpart (SImode, source), GEN_INT (i), const0_rtx)); } return 1; } /* Calculate a few attributes that may be useful for specific optimizations. */ /* Count number of leading zeros. */ for (i = 31; i >= 0; i--) { if ((remainder & (1 << i)) == 0) clear_sign_bit_copies++; else break; } /* Count number of leading 1's. */ for (i = 31; i >= 0; i--) { if ((remainder & (1 << i)) != 0) set_sign_bit_copies++; else break; } /* Count number of trailing zero's. */ for (i = 0; i <= 31; i++) { if ((remainder & (1 << i)) == 0) clear_zero_bit_copies++; else break; } /* Count number of trailing 1's. */ for (i = 0; i <= 31; i++) { if ((remainder & (1 << i)) != 0) set_zero_bit_copies++; else break; } switch (code) { case SET: /* See if we can do this by sign_extending a constant that is known to be negative. This is a good, way of doing it, since the shift may well merge into a subsequent insn. */ if (set_sign_bit_copies > 1) { if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (remainder << (set_sign_bit_copies - 1)))) { if (generate) { rtx new_src = subtargets ? gen_reg_rtx (mode) : target; emit_constant_insn (cond, gen_rtx_SET (new_src, GEN_INT (temp1))); emit_constant_insn (cond, gen_ashrsi3 (target, new_src, GEN_INT (set_sign_bit_copies - 1))); } return 2; } /* For an inverted constant, we will need to set the low bits, these will be shifted out of harm's way. */ temp1 |= (1 << (set_sign_bit_copies - 1)) - 1; if (const_ok_for_arm (~temp1)) { if (generate) { rtx new_src = subtargets ? gen_reg_rtx (mode) : target; emit_constant_insn (cond, gen_rtx_SET (new_src, GEN_INT (temp1))); emit_constant_insn (cond, gen_ashrsi3 (target, new_src, GEN_INT (set_sign_bit_copies - 1))); } return 2; } } /* See if we can calculate the value as the difference between two valid immediates. */ if (clear_sign_bit_copies + clear_zero_bit_copies <= 16) { int topshift = clear_sign_bit_copies & ~1; temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift)) & (0xff000000 >> topshift)); /* If temp1 is zero, then that means the 9 most significant bits of remainder were 1 and we've caused it to overflow. When topshift is 0 we don't need to do anything since we can borrow from 'bit 32'. */ if (temp1 == 0 && topshift != 0) temp1 = 0x80000000 >> (topshift - 1); temp2 = ARM_SIGN_EXTEND (temp1 - remainder); if (const_ok_for_arm (temp2)) { if (generate) { rtx new_src = subtargets ? gen_reg_rtx (mode) : target; emit_constant_insn (cond, gen_rtx_SET (new_src, GEN_INT (temp1))); emit_constant_insn (cond, gen_addsi3 (target, new_src, GEN_INT (-temp2))); } return 2; } } /* See if we can generate this by setting the bottom (or the top) 16 bits, and then shifting these into the other half of the word. We only look for the simplest cases, to do more would cost too much. Be careful, however, not to generate this when the alternative would take fewer insns. */ if (val & 0xffff0000) { temp1 = remainder & 0xffff0000; temp2 = remainder & 0x0000ffff; /* Overlaps outside this range are best done using other methods. */ for (i = 9; i < 24; i++) { if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder) && !const_ok_for_arm (temp2)) { rtx new_src = (subtargets ? (generate ? gen_reg_rtx (mode) : NULL_RTX) : target); insns = arm_gen_constant (code, mode, cond, temp2, new_src, source, subtargets, generate); source = new_src; if (generate) emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_IOR (mode, gen_rtx_ASHIFT (mode, source, GEN_INT (i)), source))); return insns + 1; } } /* Don't duplicate cases already considered. */ for (i = 17; i < 24; i++) { if (((temp1 | (temp1 >> i)) == remainder) && !const_ok_for_arm (temp1)) { rtx new_src = (subtargets ? (generate ? gen_reg_rtx (mode) : NULL_RTX) : target); insns = arm_gen_constant (code, mode, cond, temp1, new_src, source, subtargets, generate); source = new_src; if (generate) emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_IOR (mode, gen_rtx_LSHIFTRT (mode, source, GEN_INT (i)), source))); return insns + 1; } } } break; case IOR: case XOR: /* If we have IOR or XOR, and the constant can be loaded in a single instruction, and we can find a temporary to put it in, then this can be done in two instructions instead of 3-4. */ if (subtargets /* TARGET can't be NULL if SUBTARGETS is 0 */ || (reload_completed && !reg_mentioned_p (target, source))) { if (const_ok_for_arm (ARM_SIGN_EXTEND (~val))) { if (generate) { rtx sub = subtargets ? gen_reg_rtx (mode) : target; emit_constant_insn (cond, gen_rtx_SET (sub, GEN_INT (val))); emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_fmt_ee (code, mode, source, sub))); } return 2; } } if (code == XOR) break; /* Convert. x = y | constant ( which is composed of set_sign_bit_copies of leading 1s and the remainder 0s for e.g. 0xfff00000) x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies) This can be done in 2 instructions by using shifts with mov or mvn. e.g. for x = x | 0xfff00000; we generate. mvn r0, r0, asl #12 mvn r0, r0, lsr #12 */ if (set_sign_bit_copies > 8 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val) { if (generate) { rtx sub = subtargets ? gen_reg_rtx (mode) : target; rtx shift = GEN_INT (set_sign_bit_copies); emit_constant_insn (cond, gen_rtx_SET (sub, gen_rtx_NOT (mode, gen_rtx_ASHIFT (mode, source, shift)))); emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_NOT (mode, gen_rtx_LSHIFTRT (mode, sub, shift)))); } return 2; } /* Convert x = y | constant (which has set_zero_bit_copies number of trailing ones). to x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies). For eg. r0 = r0 | 0xfff mvn r0, r0, lsr #12 mvn r0, r0, asl #12 */ if (set_zero_bit_copies > 8 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder) { if (generate) { rtx sub = subtargets ? gen_reg_rtx (mode) : target; rtx shift = GEN_INT (set_zero_bit_copies); emit_constant_insn (cond, gen_rtx_SET (sub, gen_rtx_NOT (mode, gen_rtx_LSHIFTRT (mode, source, shift)))); emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_NOT (mode, gen_rtx_ASHIFT (mode, sub, shift)))); } return 2; } /* This will never be reached for Thumb2 because orn is a valid instruction. This is for Thumb1 and the ARM 32 bit cases. x = y | constant (such that ~constant is a valid constant) Transform this to x = ~(~y & ~constant). */ if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val))) { if (generate) { rtx sub = subtargets ? gen_reg_rtx (mode) : target; emit_constant_insn (cond, gen_rtx_SET (sub, gen_rtx_NOT (mode, source))); source = sub; if (subtargets) sub = gen_reg_rtx (mode); emit_constant_insn (cond, gen_rtx_SET (sub, gen_rtx_AND (mode, source, GEN_INT (temp1)))); emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_NOT (mode, sub))); } return 3; } break; case AND: /* See if two shifts will do 2 or more insn's worth of work. */ if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24) { HOST_WIDE_INT shift_mask = ((0xffffffff << (32 - clear_sign_bit_copies)) & 0xffffffff); if ((remainder | shift_mask) != 0xffffffff) { HOST_WIDE_INT new_val = ARM_SIGN_EXTEND (remainder | shift_mask); if (generate) { rtx new_src = subtargets ? gen_reg_rtx (mode) : target; insns = arm_gen_constant (AND, SImode, cond, new_val, new_src, source, subtargets, 1); source = new_src; } else { rtx targ = subtargets ? NULL_RTX : target; insns = arm_gen_constant (AND, mode, cond, new_val, targ, source, subtargets, 0); } } if (generate) { rtx new_src = subtargets ? gen_reg_rtx (mode) : target; rtx shift = GEN_INT (clear_sign_bit_copies); emit_insn (gen_ashlsi3 (new_src, source, shift)); emit_insn (gen_lshrsi3 (target, new_src, shift)); } return insns + 2; } if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24) { HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1; if ((remainder | shift_mask) != 0xffffffff) { HOST_WIDE_INT new_val = ARM_SIGN_EXTEND (remainder | shift_mask); if (generate) { rtx new_src = subtargets ? gen_reg_rtx (mode) : target; insns = arm_gen_constant (AND, mode, cond, new_val, new_src, source, subtargets, 1); source = new_src; } else { rtx targ = subtargets ? NULL_RTX : target; insns = arm_gen_constant (AND, mode, cond, new_val, targ, source, subtargets, 0); } } if (generate) { rtx new_src = subtargets ? gen_reg_rtx (mode) : target; rtx shift = GEN_INT (clear_zero_bit_copies); emit_insn (gen_lshrsi3 (new_src, source, shift)); emit_insn (gen_ashlsi3 (target, new_src, shift)); } return insns + 2; } break; default: break; } /* Calculate what the instruction sequences would be if we generated it normally, negated, or inverted. */ if (code == AND) /* AND cannot be split into multiple insns, so invert and use BIC. */ insns = 99; else insns = optimal_immediate_sequence (code, remainder, &pos_immediates); if (can_negate) neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff, &neg_immediates); else neg_insns = 99; if (can_invert || final_invert) inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff, &inv_immediates); else inv_insns = 99; immediates = &pos_immediates; /* Is the negated immediate sequence more efficient? */ if (neg_insns < insns && neg_insns <= inv_insns) { insns = neg_insns; immediates = &neg_immediates; } else can_negate = 0; /* Is the inverted immediate sequence more efficient? We must allow for an extra NOT instruction for XOR operations, although there is some chance that the final 'mvn' will get optimized later. */ if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns)) { insns = inv_insns; immediates = &inv_immediates; } else { can_invert = 0; final_invert = 0; } /* Now output the chosen sequence as instructions. */ if (generate) { for (i = 0; i < insns; i++) { rtx new_src, temp1_rtx; temp1 = immediates->i[i]; if (code == SET || code == MINUS) new_src = (subtargets ? gen_reg_rtx (mode) : target); else if ((final_invert || i < (insns - 1)) && subtargets) new_src = gen_reg_rtx (mode); else new_src = target; if (can_invert) temp1 = ~temp1; else if (can_negate) temp1 = -temp1; temp1 = trunc_int_for_mode (temp1, mode); temp1_rtx = GEN_INT (temp1); if (code == SET) ; else if (code == MINUS) temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source); else temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx); emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx)); source = new_src; if (code == SET) { can_negate = can_invert; can_invert = 0; code = PLUS; } else if (code == MINUS) code = PLUS; } } if (final_invert) { if (generate) emit_constant_insn (cond, gen_rtx_SET (target, gen_rtx_NOT (mode, source))); insns++; } return insns; } /* Return TRUE if op is a constant where both the low and top words are suitable for RSB/RSC instructions. This is never true for Thumb, since we do not have RSC in that case. */ static bool arm_const_double_prefer_rsbs_rsc (rtx op) { /* Thumb lacks RSC, so we never prefer that sequence. */ if (TARGET_THUMB || !CONST_INT_P (op)) return false; HOST_WIDE_INT hi, lo; lo = UINTVAL (op) & 0xffffffffULL; hi = UINTVAL (op) >> 32; return const_ok_for_arm (lo) && const_ok_for_arm (hi); } /* Canonicalize a comparison so that we are more likely to recognize it. This can be done for a few constant compares, where we can make the immediate value easier to load. */ static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1, bool op0_preserve_value) { machine_mode mode; unsigned HOST_WIDE_INT i, maxval; mode = GET_MODE (*op0); if (mode == VOIDmode) mode = GET_MODE (*op1); maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1; /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc). In ARM mode we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either reversed or (for constant OP1) adjusted to GE/LT. Similarly for GTU/LEU in Thumb mode. */ if (mode == DImode) { if (*code == GT || *code == LE || *code == GTU || *code == LEU) { /* Missing comparison. First try to use an available comparison. */ if (CONST_INT_P (*op1)) { i = INTVAL (*op1); switch (*code) { case GT: case LE: if (i != maxval) { /* Try to convert to GE/LT, unless that would be more expensive. */ if (!arm_const_double_by_immediates (GEN_INT (i + 1)) && arm_const_double_prefer_rsbs_rsc (*op1)) return; *op1 = GEN_INT (i + 1); *code = *code == GT ? GE : LT; return; } break; case GTU: case LEU: if (i != ~((unsigned HOST_WIDE_INT) 0)) { /* Try to convert to GEU/LTU, unless that would be more expensive. */ if (!arm_const_double_by_immediates (GEN_INT (i + 1)) && arm_const_double_prefer_rsbs_rsc (*op1)) return; *op1 = GEN_INT (i + 1); *code = *code == GTU ? GEU : LTU; return; } break; default: gcc_unreachable (); } } if (!op0_preserve_value) { std::swap (*op0, *op1); *code = (int)swap_condition ((enum rtx_code)*code); } } return; } /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)), to facilitate possible combining with a cmp into 'ands'. */ if (mode == SImode && GET_CODE (*op0) == ZERO_EXTEND && GET_CODE (XEXP (*op0, 0)) == SUBREG && GET_MODE (XEXP (*op0, 0)) == QImode && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode && subreg_lowpart_p (XEXP (*op0, 0)) && *op1 == const0_rtx) *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)), GEN_INT (255)); /* Comparisons smaller than DImode. Only adjust comparisons against an out-of-range constant. */ if (!CONST_INT_P (*op1) || const_ok_for_arm (INTVAL (*op1)) || const_ok_for_arm (- INTVAL (*op1))) return; i = INTVAL (*op1); switch (*code) { case EQ: case NE: return; case GT: case LE: if (i != maxval && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) { *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1)); *code = *code == GT ? GE : LT; return; } break; case GE: case LT: if (i != ~maxval && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) { *op1 = GEN_INT (i - 1); *code = *code == GE ? GT : LE; return; } break; case GTU: case LEU: if (i != ~((unsigned HOST_WIDE_INT) 0) && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1)))) { *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1)); *code = *code == GTU ? GEU : LTU; return; } break; case GEU: case LTU: if (i != 0 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1)))) { *op1 = GEN_INT (i - 1); *code = *code == GEU ? GTU : LEU; return; } break; default: gcc_unreachable (); } } /* Define how to find the value returned by a function. */ static rtx arm_function_value(const_tree type, const_tree func, bool outgoing ATTRIBUTE_UNUSED) { machine_mode mode; int unsignedp ATTRIBUTE_UNUSED; rtx r ATTRIBUTE_UNUSED; mode = TYPE_MODE (type); if (TARGET_AAPCS_BASED) return aapcs_allocate_return_reg (mode, type, func); /* Promote integer types. */ if (INTEGRAL_TYPE_P (type)) mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1); /* Promotes small structs returned in a register to full-word size for big-endian AAPCS. */ if (arm_return_in_msb (type)) { HOST_WIDE_INT size = int_size_in_bytes (type); if (size % UNITS_PER_WORD != 0) { size += UNITS_PER_WORD - size % UNITS_PER_WORD; mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require (); } } return arm_libcall_value_1 (mode); } /* libcall hashtable helpers. */ struct libcall_hasher : nofree_ptr_hash <const rtx_def> { static inline hashval_t hash (const rtx_def *); static inline bool equal (const rtx_def *, const rtx_def *); static inline void remove (rtx_def *); }; inline bool libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2) { return rtx_equal_p (p1, p2); } inline hashval_t libcall_hasher::hash (const rtx_def *p1) { return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE); } typedef hash_table<libcall_hasher> libcall_table_type; static void add_libcall (libcall_table_type *htab, rtx libcall) { *htab->find_slot (libcall, INSERT) = libcall; } static bool arm_libcall_uses_aapcs_base (const_rtx libcall) { static bool init_done = false; static libcall_table_type *libcall_htab = NULL; if (!init_done) { init_done = true; libcall_htab = new libcall_table_type (31); add_libcall (libcall_htab, convert_optab_libfunc (sfloat_optab, SFmode, SImode)); add_libcall (libcall_htab, convert_optab_libfunc (sfloat_optab, DFmode, SImode)); add_libcall (libcall_htab, convert_optab_libfunc (sfloat_optab, SFmode, DImode)); add_libcall (libcall_htab, convert_optab_libfunc (sfloat_optab, DFmode, DImode)); add_libcall (libcall_htab, convert_optab_libfunc (ufloat_optab, SFmode, SImode)); add_libcall (libcall_htab, convert_optab_libfunc (ufloat_optab, DFmode, SImode)); add_libcall (libcall_htab, convert_optab_libfunc (ufloat_optab, SFmode, DImode)); add_libcall (libcall_htab, convert_optab_libfunc (ufloat_optab, DFmode, DImode)); add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, SFmode, HFmode)); add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, HFmode, SFmode)); add_libcall (libcall_htab, convert_optab_libfunc (sfix_optab, SImode, DFmode)); add_libcall (libcall_htab, convert_optab_libfunc (ufix_optab, SImode, DFmode)); add_libcall (libcall_htab, convert_optab_libfunc (sfix_optab, DImode, DFmode)); add_libcall (libcall_htab, convert_optab_libfunc (ufix_optab, DImode, DFmode)); add_libcall (libcall_htab, convert_optab_libfunc (sfix_optab, DImode, SFmode)); add_libcall (libcall_htab, convert_optab_libfunc (ufix_optab, DImode, SFmode)); /* Values from double-precision helper functions are returned in core registers if the selected core only supports single-precision arithmetic, even if we are using the hard-float ABI. The same is true for single-precision helpers, but we will never be using the hard-float ABI on a CPU which doesn't support single-precision operations in hardware. */ add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode)); add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode)); add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode, SFmode)); add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode, DFmode)); add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, HFmode, DFmode)); } return libcall && libcall_htab->find (libcall) != NULL; } static rtx arm_libcall_value_1 (machine_mode mode) { if (TARGET_AAPCS_BASED) return aapcs_libcall_value (mode); else if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode)) return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM); else return gen_rtx_REG (mode, ARG_REGISTER (1)); } /* Define how to find the value returned by a library function assuming the value has mode MODE. */ static rtx arm_libcall_value (machine_mode mode, const_rtx libcall) { if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS && GET_MODE_CLASS (mode) == MODE_FLOAT) { /* The following libcalls return their result in integer registers, even though they return a floating point value. */ if (arm_libcall_uses_aapcs_base (libcall)) return gen_rtx_REG (mode, ARG_REGISTER(1)); } return arm_libcall_value_1 (mode); } /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */ static bool arm_function_value_regno_p (const unsigned int regno) { if (regno == ARG_REGISTER (1) || (TARGET_32BIT && TARGET_AAPCS_BASED && TARGET_HARD_FLOAT && regno == FIRST_VFP_REGNUM) || (TARGET_IWMMXT_ABI && regno == FIRST_IWMMXT_REGNUM)) return true; return false; } /* Determine the amount of memory needed to store the possible return registers of an untyped call. */ int arm_apply_result_size (void) { int size = 16; if (TARGET_32BIT) { if (TARGET_HARD_FLOAT_ABI) size += 32; if (TARGET_IWMMXT_ABI) size += 8; } return size; } /* Decide whether TYPE should be returned in memory (true) or in a register (false). FNTYPE is the type of the function making the call. */ static bool arm_return_in_memory (const_tree type, const_tree fntype) { HOST_WIDE_INT size; size = int_size_in_bytes (type); /* Negative if not fixed size. */ if (TARGET_AAPCS_BASED) { /* Simple, non-aggregate types (ie not including vectors and complex) are always returned in a register (or registers). We don't care about which register here, so we can short-cut some of the detail. */ if (!AGGREGATE_TYPE_P (type) && TREE_CODE (type) != VECTOR_TYPE && TREE_CODE (type) != COMPLEX_TYPE) return false; /* Any return value that is no larger than one word can be returned in r0. */ if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD) return false; /* Check any available co-processors to see if they accept the type as a register candidate (VFP, for example, can return some aggregates in consecutive registers). These aren't available if the call is variadic. */ if (aapcs_select_return_coproc (type, fntype) >= 0) return false; /* Vector values should be returned using ARM registers, not memory (unless they're over 16 bytes, which will break since we only have four call-clobbered registers to play with). */ if (TREE_CODE (type) == VECTOR_TYPE) return (size < 0 || size > (4 * UNITS_PER_WORD)); /* The rest go in memory. */ return true; } if (TREE_CODE (type) == VECTOR_TYPE) return (size < 0 || size > (4 * UNITS_PER_WORD)); if (!AGGREGATE_TYPE_P (type) && (TREE_CODE (type) != VECTOR_TYPE)) /* All simple types are returned in registers. */ return false; if (arm_abi != ARM_ABI_APCS) { /* ATPCS and later return aggregate types in memory only if they are larger than a word (or are variable size). */ return (size < 0 || size > UNITS_PER_WORD); } /* For the arm-wince targets we choose to be compatible with Microsoft's ARM and Thumb compilers, which always return aggregates in memory. */ #ifndef ARM_WINCE /* All structures/unions bigger than one word are returned in memory. Also catch the case where int_size_in_bytes returns -1. In this case the aggregate is either huge or of variable size, and in either case we will want to return it via memory and not in a register. */ if (size < 0 || size > UNITS_PER_WORD) return true; if (TREE_CODE (type) == RECORD_TYPE) { tree field; /* For a struct the APCS says that we only return in a register if the type is 'integer like' and every addressable element has an offset of zero. For practical purposes this means that the structure can have at most one non bit-field element and that this element must be the first one in the structure. */ /* Find the first field, ignoring non FIELD_DECL things which will have been created by C++. */ for (field = TYPE_FIELDS (type); field && TREE_CODE (field) != FIELD_DECL; field = DECL_CHAIN (field)) continue; if (field == NULL) return false; /* An empty structure. Allowed by an extension to ANSI C. */ /* Check that the first field is valid for returning in a register. */ /* ... Floats are not allowed */ if (FLOAT_TYPE_P (TREE_TYPE (field))) return true; /* ... Aggregates that are not themselves valid for returning in a register are not allowed. */ if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) return true; /* Now check the remaining fields, if any. Only bitfields are allowed, since they are not addressable. */ for (field = DECL_CHAIN (field); field; field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; if (!DECL_BIT_FIELD_TYPE (field)) return true; } return false; } if (TREE_CODE (type) == UNION_TYPE) { tree field; /* Unions can be returned in registers if every element is integral, or can be returned in an integer register. */ for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; if (FLOAT_TYPE_P (TREE_TYPE (field))) return true; if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE)) return true; } return false; } #endif /* not ARM_WINCE */ /* Return all other types in memory. */ return true; } const struct pcs_attribute_arg { const char *arg; enum arm_pcs value; } pcs_attribute_args[] = { {"aapcs", ARM_PCS_AAPCS}, {"aapcs-vfp", ARM_PCS_AAPCS_VFP}, #if 0 /* We could recognize these, but changes would be needed elsewhere * to implement them. */ {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT}, {"atpcs", ARM_PCS_ATPCS}, {"apcs", ARM_PCS_APCS}, #endif {NULL, ARM_PCS_UNKNOWN} }; static enum arm_pcs arm_pcs_from_attribute (tree attr) { const struct pcs_attribute_arg *ptr; const char *arg; /* Get the value of the argument. */ if (TREE_VALUE (attr) == NULL_TREE || TREE_CODE (TREE_VALUE (attr)) != STRING_CST) return ARM_PCS_UNKNOWN; arg = TREE_STRING_POINTER (TREE_VALUE (attr)); /* Check it against the list of known arguments. */ for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++) if (streq (arg, ptr->arg)) return ptr->value; /* An unrecognized interrupt type. */ return ARM_PCS_UNKNOWN; } /* Get the PCS variant to use for this call. TYPE is the function's type specification, DECL is the specific declartion. DECL may be null if the call could be indirect or if this is a library call. */ static enum arm_pcs arm_get_pcs_model (const_tree type, const_tree decl) { bool user_convention = false; enum arm_pcs user_pcs = arm_pcs_default; tree attr; gcc_assert (type); attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type)); if (attr) { user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr)); user_convention = true; } if (TARGET_AAPCS_BASED) { /* Detect varargs functions. These always use the base rules (no argument is ever a candidate for a co-processor register). */ bool base_rules = stdarg_p (type); if (user_convention) { if (user_pcs > ARM_PCS_AAPCS_LOCAL) sorry ("non-AAPCS derived PCS variant"); else if (base_rules && user_pcs != ARM_PCS_AAPCS) error ("variadic functions must use the base AAPCS variant"); } if (base_rules) return ARM_PCS_AAPCS; else if (user_convention) return user_pcs; else if (decl && flag_unit_at_a_time) { /* Local functions never leak outside this compilation unit, so we are free to use whatever conventions are appropriate. */ /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */ cgraph_node *local_info_node = cgraph_node::local_info_node (CONST_CAST_TREE (decl)); if (local_info_node && local_info_node->local) return ARM_PCS_AAPCS_LOCAL; } } else if (user_convention && user_pcs != arm_pcs_default) sorry ("PCS variant"); /* For everything else we use the target's default. */ return arm_pcs_default; } static void aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, const_tree fntype ATTRIBUTE_UNUSED, rtx libcall ATTRIBUTE_UNUSED, const_tree fndecl ATTRIBUTE_UNUSED) { /* Record the unallocated VFP registers. */ pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1; pcum->aapcs_vfp_reg_alloc = 0; } /* Walk down the type tree of TYPE counting consecutive base elements. If *MODEP is VOIDmode, then set it to the first valid floating point type. If a non-floating point type is found, or if a floating point type that doesn't match a non-VOIDmode *MODEP is found, then return -1, otherwise return the count in the sub-tree. */ static int aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep) { machine_mode mode; HOST_WIDE_INT size; switch (TREE_CODE (type)) { case REAL_TYPE: mode = TYPE_MODE (type); if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode) return -1; if (*modep == VOIDmode) *modep = mode; if (*modep == mode) return 1; break; case COMPLEX_TYPE: mode = TYPE_MODE (TREE_TYPE (type)); if (mode != DFmode && mode != SFmode) return -1; if (*modep == VOIDmode) *modep = mode; if (*modep == mode) return 2; break; case VECTOR_TYPE: /* Use V2SImode and V4SImode as representatives of all 64-bit and 128-bit vector types, whether or not those modes are supported with the present options. */ size = int_size_in_bytes (type); switch (size) { case 8: mode = V2SImode; break; case 16: mode = V4SImode; break; default: return -1; } if (*modep == VOIDmode) *modep = mode; /* Vector modes are considered to be opaque: two vectors are equivalent for the purposes of being homogeneous aggregates if they are the same size. */ if (*modep == mode) return 1; break; case ARRAY_TYPE: { int count; tree index = TYPE_DOMAIN (type); /* Can't handle incomplete types nor sizes that are not fixed. */ if (!COMPLETE_TYPE_P (type) || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) return -1; count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep); if (count == -1 || !index || !TYPE_MAX_VALUE (index) || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index)) || !TYPE_MIN_VALUE (index) || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index)) || count < 0) return -1; count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index)) - tree_to_uhwi (TYPE_MIN_VALUE (index))); /* There must be no padding. */ if (wi::to_wide (TYPE_SIZE (type)) != count * GET_MODE_BITSIZE (*modep)) return -1; return count; } case RECORD_TYPE: { int count = 0; int sub_count; tree field; /* Can't handle incomplete types nor sizes that are not fixed. */ if (!COMPLETE_TYPE_P (type) || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) return -1; for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); if (sub_count < 0) return -1; count += sub_count; } /* There must be no padding. */ if (wi::to_wide (TYPE_SIZE (type)) != count * GET_MODE_BITSIZE (*modep)) return -1; return count; } case UNION_TYPE: case QUAL_UNION_TYPE: { /* These aren't very interesting except in a degenerate case. */ int count = 0; int sub_count; tree field; /* Can't handle incomplete types nor sizes that are not fixed. */ if (!COMPLETE_TYPE_P (type) || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST) return -1; for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) { if (TREE_CODE (field) != FIELD_DECL) continue; sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep); if (sub_count < 0) return -1; count = count > sub_count ? count : sub_count; } /* There must be no padding. */ if (wi::to_wide (TYPE_SIZE (type)) != count * GET_MODE_BITSIZE (*modep)) return -1; return count; } default: break; } return -1; } /* Return true if PCS_VARIANT should use VFP registers. */ static bool use_vfp_abi (enum arm_pcs pcs_variant, bool is_double) { if (pcs_variant == ARM_PCS_AAPCS_VFP) { static bool seen_thumb1_vfp = false; if (TARGET_THUMB1 && !seen_thumb1_vfp) { sorry ("Thumb-1 hard-float VFP ABI"); /* sorry() is not immediately fatal, so only display this once. */ seen_thumb1_vfp = true; } return true; } if (pcs_variant != ARM_PCS_AAPCS_LOCAL) return false; return (TARGET_32BIT && TARGET_HARD_FLOAT && (TARGET_VFP_DOUBLE || !is_double)); } /* Return true if an argument whose type is TYPE, or mode is MODE, is suitable for passing or returning in VFP registers for the PCS variant selected. If it is, then *BASE_MODE is updated to contain a machine mode describing each element of the argument's type and *COUNT to hold the number of such elements. */ static bool aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant, machine_mode mode, const_tree type, machine_mode *base_mode, int *count) { machine_mode new_mode = VOIDmode; /* If we have the type information, prefer that to working things out from the mode. */ if (type) { int ag_count = aapcs_vfp_sub_candidate (type, &new_mode); if (ag_count > 0 && ag_count <= 4) *count = ag_count; else return false; } else if (GET_MODE_CLASS (mode) == MODE_FLOAT || GET_MODE_CLASS (mode) == MODE_VECTOR_INT || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) { *count = 1; new_mode = mode; } else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT) { *count = 2; new_mode = (mode == DCmode ? DFmode : SFmode); } else return false; if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1)) return false; *base_mode = new_mode; if (TARGET_GENERAL_REGS_ONLY) error ("argument of type %qT not permitted with -mgeneral-regs-only", type); return true; } static bool aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant, machine_mode mode, const_tree type) { int count ATTRIBUTE_UNUSED; machine_mode ag_mode ATTRIBUTE_UNUSED; if (!use_vfp_abi (pcs_variant, false)) return false; return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, &ag_mode, &count); } static bool aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type) { if (!use_vfp_abi (pcum->pcs_variant, false)) return false; return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type, &pcum->aapcs_vfp_rmode, &pcum->aapcs_vfp_rcount); } /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there for the behaviour of this function. */ static bool aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type ATTRIBUTE_UNUSED) { int rmode_size = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode)); int shift = rmode_size / GET_MODE_SIZE (SFmode); unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1; int regno; for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift) if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask) { pcum->aapcs_vfp_reg_alloc = mask << regno; if (mode == BLKmode || (mode == TImode && ! TARGET_NEON) || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode)) { int i; int rcount = pcum->aapcs_vfp_rcount; int rshift = shift; machine_mode rmode = pcum->aapcs_vfp_rmode; rtx par; if (!TARGET_NEON) { /* Avoid using unsupported vector modes. */ if (rmode == V2SImode) rmode = DImode; else if (rmode == V4SImode) { rmode = DImode; rcount *= 2; rshift /= 2; } } par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount)); for (i = 0; i < rcount; i++) { rtx tmp = gen_rtx_REG (rmode, FIRST_VFP_REGNUM + regno + i * rshift); tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, GEN_INT (i * GET_MODE_SIZE (rmode))); XVECEXP (par, 0, i) = tmp; } pcum->aapcs_reg = par; } else pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno); return true; } return false; } /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the comment there for the behaviour of this function. */ static rtx aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED, machine_mode mode, const_tree type ATTRIBUTE_UNUSED) { if (!use_vfp_abi (pcs_variant, false)) return NULL; if (mode == BLKmode || (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode) && !TARGET_NEON)) { int count; machine_mode ag_mode; int i; rtx par; int shift; aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type, &ag_mode, &count); if (!TARGET_NEON) { if (ag_mode == V2SImode) ag_mode = DImode; else if (ag_mode == V4SImode) { ag_mode = DImode; count *= 2; } } shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode); par = gen_rtx_PARALLEL (mode, rtvec_alloc (count)); for (i = 0; i < count; i++) { rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift); tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, GEN_INT (i * GET_MODE_SIZE (ag_mode))); XVECEXP (par, 0, i) = tmp; } return par; } return gen_rtx_REG (mode, FIRST_VFP_REGNUM); } static void aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED, machine_mode mode ATTRIBUTE_UNUSED, const_tree type ATTRIBUTE_UNUSED) { pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc; pcum->aapcs_vfp_reg_alloc = 0; return; } #define AAPCS_CP(X) \ { \ aapcs_ ## X ## _cum_init, \ aapcs_ ## X ## _is_call_candidate, \ aapcs_ ## X ## _allocate, \ aapcs_ ## X ## _is_return_candidate, \ aapcs_ ## X ## _allocate_return_reg, \ aapcs_ ## X ## _advance \ } /* Table of co-processors that can be used to pass arguments in registers. Idealy no arugment should be a candidate for more than one co-processor table entry, but the table is processed in order and stops after the first match. If that entry then fails to put the argument into a co-processor register, the argument will go on the stack. */ static struct { /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */ void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree); /* Return true if an argument of mode MODE (or type TYPE if MODE is BLKmode) is a candidate for this co-processor's registers; this function should ignore any position-dependent state in CUMULATIVE_ARGS and only use call-type dependent information. */ bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree); /* Return true if the argument does get a co-processor register; it should set aapcs_reg to an RTX of the register allocated as is required for a return from FUNCTION_ARG. */ bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree); /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can be returned in this co-processor's registers. */ bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree); /* Allocate and return an RTX element to hold the return type of a call. This routine must not fail and will only be called if is_return_candidate returned true with the same parameters. */ rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree); /* Finish processing this argument and prepare to start processing the next one. */ void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree); } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] = { AAPCS_CP(vfp) }; #undef AAPCS_CP static int aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type) { int i; for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type)) return i; return -1; } static int aapcs_select_return_coproc (const_tree type, const_tree fntype) { /* We aren't passed a decl, so we can't check that a call is local. However, it isn't clear that that would be a win anyway, since it might limit some tail-calling opportunities. */ enum arm_pcs pcs_variant; if (fntype) { const_tree fndecl = NULL_TREE; if (TREE_CODE (fntype) == FUNCTION_DECL) { fndecl = fntype; fntype = TREE_TYPE (fntype); } pcs_variant = arm_get_pcs_model (fntype, fndecl); } else pcs_variant = arm_pcs_default; if (pcs_variant != ARM_PCS_AAPCS) { int i; for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, TYPE_MODE (type), type)) return i; } return -1; } static rtx aapcs_allocate_return_reg (machine_mode mode, const_tree type, const_tree fntype) { /* We aren't passed a decl, so we can't check that a call is local. However, it isn't clear that that would be a win anyway, since it might limit some tail-calling opportunities. */ enum arm_pcs pcs_variant; int unsignedp ATTRIBUTE_UNUSED; if (fntype) { const_tree fndecl = NULL_TREE; if (TREE_CODE (fntype) == FUNCTION_DECL) { fndecl = fntype; fntype = TREE_TYPE (fntype); } pcs_variant = arm_get_pcs_model (fntype, fndecl); } else pcs_variant = arm_pcs_default; /* Promote integer types. */ if (type && INTEGRAL_TYPE_P (type)) mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1); if (pcs_variant != ARM_PCS_AAPCS) { int i; for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode, type)) return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant, mode, type); } /* Promotes small structs returned in a register to full-word size for big-endian AAPCS. */ if (type && arm_return_in_msb (type)) { HOST_WIDE_INT size = int_size_in_bytes (type); if (size % UNITS_PER_WORD != 0) { size += UNITS_PER_WORD - size % UNITS_PER_WORD; mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require (); } } return gen_rtx_REG (mode, R0_REGNUM); } static rtx aapcs_libcall_value (machine_mode mode) { if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode) && GET_MODE_SIZE (mode) <= 4) mode = SImode; return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE); } /* Lay out a function argument using the AAPCS rules. The rule numbers referred to here are those in the AAPCS. */ static void aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode, const_tree type, bool named) { int nregs, nregs2; int ncrn; /* We only need to do this once per argument. */ if (pcum->aapcs_arg_processed) return; pcum->aapcs_arg_processed = true; /* Special case: if named is false then we are handling an incoming anonymous argument which is on the stack. */ if (!named) return; /* Is this a potential co-processor register candidate? */ if (pcum->pcs_variant != ARM_PCS_AAPCS) { int slot = aapcs_select_call_coproc (pcum, mode, type); pcum->aapcs_cprc_slot = slot; /* We don't have to apply any of the rules from part B of the preparation phase, these are handled elsewhere in the compiler. */ if (slot >= 0) { /* A Co-processor register candidate goes either in its own class of registers or on the stack. */ if (!pcum->aapcs_cprc_failed[slot]) { /* C1.cp - Try to allocate the argument to co-processor registers. */ if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type)) return; /* C2.cp - Put the argument on the stack and note that we can't assign any more candidates in this slot. We also need to note that we have allocated stack space, so that we won't later try to split a non-cprc candidate between core registers and the stack. */ pcum->aapcs_cprc_failed[slot] = true; pcum->can_split = false; } /* We didn't get a register, so this argument goes on the stack. */ gcc_assert (pcum->can_split == false); return; } } /* C3 - For double-word aligned arguments, round the NCRN up to the next even number. */ ncrn = pcum->aapcs_ncrn; if (ncrn & 1) { int res = arm_needs_doubleword_align (mode, type); /* Only warn during RTL expansion of call stmts, otherwise we would warn e.g. during gimplification even on functions that will be always inlined, and we'd warn multiple times. Don't warn when called in expand_function_start either, as we warn instead in arm_function_arg_boundary in that case. */ if (res < 0 && warn_psabi && currently_expanding_gimple_stmt) inform (input_location, "parameter passing for argument of type " "%qT changed in GCC 7.1", type); else if (res > 0) ncrn++; } nregs = ARM_NUM_REGS2(mode, type); /* Sigh, this test should really assert that nregs > 0, but a GCC extension allows empty structs and then gives them empty size; it then allows such a structure to be passed by value. For some of the code below we have to pretend that such an argument has non-zero size so that we 'locate' it correctly either in registers or on the stack. */ gcc_assert (nregs >= 0); nregs2 = nregs ? nregs : 1; /* C4 - Argument fits entirely in core registers. */ if (ncrn + nregs2 <= NUM_ARG_REGS) { pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); pcum->aapcs_next_ncrn = ncrn + nregs; return; } /* C5 - Some core registers left and there are no arguments already on the stack: split this argument between the remaining core registers and the stack. */ if (ncrn < NUM_ARG_REGS && pcum->can_split) { pcum->aapcs_reg = gen_rtx_REG (mode, ncrn); pcum->aapcs_next_ncrn = NUM_ARG_REGS; pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD; return; } /* C6 - NCRN is set to 4. */ pcum->aapcs_next_ncrn = NUM_ARG_REGS; /* C7,C8 - arugment goes on the stack. We have nothing to do here. */ return; } /* Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function whose data type is FNTYPE. For a library call, FNTYPE is NULL. */ void arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype, rtx libname, tree fndecl ATTRIBUTE_UNUSED) { /* Long call handling. */ if (fntype) pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl); else pcum->pcs_variant = arm_pcs_default; if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) { if (arm_libcall_uses_aapcs_base (libname)) pcum->pcs_variant = ARM_PCS_AAPCS; pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0; pcum->aapcs_reg = NULL_RTX; pcum->aapcs_partial = 0; pcum->aapcs_arg_processed = false; pcum->aapcs_cprc_slot = -1; pcum->can_split = true; if (pcum->pcs_variant != ARM_PCS_AAPCS) { int i; for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++) { pcum->aapcs_cprc_failed[i] = false; aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl); } } return; } /* Legacy ABIs */ /* On the ARM, the offset starts at 0. */ pcum->nregs = 0; pcum->iwmmxt_nregs = 0; pcum->can_split = true; /* Varargs vectors are treated the same as long long. named_count avoids having to change the way arm handles 'named' */ pcum->named_count = 0; pcum->nargs = 0; if (TARGET_REALLY_IWMMXT && fntype) { tree fn_arg; for (fn_arg = TYPE_ARG_TYPES (fntype); fn_arg; fn_arg = TREE_CHAIN (fn_arg)) pcum->named_count += 1; if (! pcum->named_count) pcum->named_count = INT_MAX; } } /* Return 2 if double word alignment is required for argument passing, but wasn't required before the fix for PR88469. Return 1 if double word alignment is required for argument passing. Return -1 if double word alignment used to be required for argument passing before PR77728 ABI fix, but is not required anymore. Return 0 if double word alignment is not required and wasn't requried before either. */ static int arm_needs_doubleword_align (machine_mode mode, const_tree type) { if (!type) return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY; /* Scalar and vector types: Use natural alignment, i.e. of base type. */ if (!AGGREGATE_TYPE_P (type)) return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY; /* Array types: Use member alignment of element type. */ if (TREE_CODE (type) == ARRAY_TYPE) return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY; int ret = 0; int ret2 = 0; /* Record/aggregate types: Use greatest member alignment of any member. */ for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field)) if (DECL_ALIGN (field) > PARM_BOUNDARY) { if (TREE_CODE (field) == FIELD_DECL) return 1; else /* Before PR77728 fix, we were incorrectly considering also other aggregate fields, like VAR_DECLs, TYPE_DECLs etc. Make sure we can warn about that with -Wpsabi. */ ret = -1; } else if (TREE_CODE (field) == FIELD_DECL && DECL_BIT_FIELD_TYPE (field) && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY) ret2 = 1; if (ret2) return 2; return ret; } /* Determine where to put an argument to a function. Value is zero to push the argument on the stack, or a hard register in which to store the argument. CUM is a variable of type CUMULATIVE_ARGS which gives info about the preceding args and about the function being called. ARG is a description of the argument. On the ARM, normally the first 16 bytes are passed in registers r0-r3; all other arguments are passed on the stack. If (NAMED == 0) (which happens only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is defined), say it is passed in the stack (function_prologue will indeed make it pass in the stack if necessary). */ static rtx arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); int nregs; /* Handle the special case quickly. Pick an arbitrary value for op2 of a call insn (op3 of a call_value insn). */ if (arg.end_marker_p ()) return const0_rtx; if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) { aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named); return pcum->aapcs_reg; } /* Varargs vectors are treated the same as long long. named_count avoids having to change the way arm handles 'named' */ if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode) && pcum->named_count > pcum->nargs + 1) { if (pcum->iwmmxt_nregs <= 9) return gen_rtx_REG (arg.mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM); else { pcum->can_split = false; return NULL_RTX; } } /* Put doubleword aligned quantities in even register pairs. */ if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN) { int res = arm_needs_doubleword_align (arg.mode, arg.type); if (res < 0 && warn_psabi) inform (input_location, "parameter passing for argument of type " "%qT changed in GCC 7.1", arg.type); else if (res > 0) { pcum->nregs++; if (res > 1 && warn_psabi) inform (input_location, "parameter passing for argument of type " "%qT changed in GCC 9.1", arg.type); } } /* Only allow splitting an arg between regs and memory if all preceding args were allocated to regs. For args passed by reference we only count the reference pointer. */ if (pcum->can_split) nregs = 1; else nregs = ARM_NUM_REGS2 (arg.mode, arg.type); if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS) return NULL_RTX; return gen_rtx_REG (arg.mode, pcum->nregs); } static unsigned int arm_function_arg_boundary (machine_mode mode, const_tree type) { if (!ARM_DOUBLEWORD_ALIGN) return PARM_BOUNDARY; int res = arm_needs_doubleword_align (mode, type); if (res < 0 && warn_psabi) inform (input_location, "parameter passing for argument of type %qT " "changed in GCC 7.1", type); if (res > 1 && warn_psabi) inform (input_location, "parameter passing for argument of type " "%qT changed in GCC 9.1", type); return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY; } static int arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); int nregs = pcum->nregs; if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) { aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named); return pcum->aapcs_partial; } if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode)) return 0; if (NUM_ARG_REGS > nregs && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type)) && pcum->can_split) return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD; return 0; } /* Update the data in PCUM to advance over argument ARG. */ static void arm_function_arg_advance (cumulative_args_t pcum_v, const function_arg_info &arg) { CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v); if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL) { aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named); if (pcum->aapcs_cprc_slot >= 0) { aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode, arg.type); pcum->aapcs_cprc_slot = -1; } /* Generic stuff. */ pcum->aapcs_arg_processed = false; pcum->aapcs_ncrn = pcum->aapcs_next_ncrn; pcum->aapcs_reg = NULL_RTX; pcum->aapcs_partial = 0; } else { pcum->nargs += 1; if (arm_vector_mode_supported_p (arg.mode) && pcum->named_count > pcum->nargs && TARGET_IWMMXT_ABI) pcum->iwmmxt_nregs += 1; else pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type); } } /* Variable sized types are passed by reference. This is a GCC extension to the ARM ABI. */ static bool arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg) { return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST; } /* Encode the current state of the #pragma [no_]long_calls. */ typedef enum { OFF, /* No #pragma [no_]long_calls is in effect. */ LONG, /* #pragma long_calls is in effect. */ SHORT /* #pragma no_long_calls is in effect. */ } arm_pragma_enum; static arm_pragma_enum arm_pragma_long_calls = OFF; void arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) { arm_pragma_long_calls = LONG; } void arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED) { arm_pragma_long_calls = SHORT; } void arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED) { arm_pragma_long_calls = OFF; } /* Handle an attribute requiring a FUNCTION_DECL; arguments as in struct attribute_spec.handler. */ static tree arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) { if (TREE_CODE (*node) != FUNCTION_DECL) { warning (OPT_Wattributes, "%qE attribute only applies to functions", name); *no_add_attrs = true; } return NULL_TREE; } /* Handle an "interrupt" or "isr" attribute; arguments as in struct attribute_spec.handler. */ static tree arm_handle_isr_attribute (tree *node, tree name, tree args, int flags, bool *no_add_attrs) { if (DECL_P (*node)) { if (TREE_CODE (*node) != FUNCTION_DECL) { warning (OPT_Wattributes, "%qE attribute only applies to functions", name); *no_add_attrs = true; } /* FIXME: the argument if any is checked for type attributes; should it be checked for decl ones? */ } else { if (TREE_CODE (*node) == FUNCTION_TYPE || TREE_CODE (*node) == METHOD_TYPE) { if (arm_isr_value (args) == ARM_FT_UNKNOWN) { warning (OPT_Wattributes, "%qE attribute ignored", name); *no_add_attrs = true; } } else if (TREE_CODE (*node) == POINTER_TYPE && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE) && arm_isr_value (args) != ARM_FT_UNKNOWN) { *node = build_variant_type_copy (*node); TREE_TYPE (*node) = build_type_attribute_variant (TREE_TYPE (*node), tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node)))); *no_add_attrs = true; } else { /* Possibly pass this attribute on from the type to a decl. */ if (flags & ((int) ATTR_FLAG_DECL_NEXT | (int) ATTR_FLAG_FUNCTION_NEXT | (int) ATTR_FLAG_ARRAY_NEXT)) { *no_add_attrs = true; return tree_cons (name, args, NULL_TREE); } else { warning (OPT_Wattributes, "%qE attribute ignored", name); } } } return NULL_TREE; } /* Handle a "pcs" attribute; arguments as in struct attribute_spec.handler. */ static tree arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) { if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN) { warning (OPT_Wattributes, "%qE attribute ignored", name); *no_add_attrs = true; } return NULL_TREE; } #if TARGET_DLLIMPORT_DECL_ATTRIBUTES /* Handle the "notshared" attribute. This attribute is another way of requesting hidden visibility. ARM's compiler supports "__declspec(notshared)"; we support the same thing via an attribute. */ static tree arm_handle_notshared_attribute (tree *node, tree name ATTRIBUTE_UNUSED, tree args ATTRIBUTE_UNUSED, int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) { tree decl = TYPE_NAME (*node); if (decl) { DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN; DECL_VISIBILITY_SPECIFIED (decl) = 1; *no_add_attrs = false; } return NULL_TREE; } #endif /* This function returns true if a function with declaration FNDECL and type FNTYPE uses the stack to pass arguments or return variables and false otherwise. This is used for functions with the attributes 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue diagnostic messages if the stack is used. NAME is the name of the attribute used. */ static bool cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype) { function_args_iterator args_iter; CUMULATIVE_ARGS args_so_far_v; cumulative_args_t args_so_far; bool first_param = true; tree arg_type, prev_arg_type = NULL_TREE, ret_type; /* Error out if any argument is passed on the stack. */ arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl); args_so_far = pack_cumulative_args (&args_so_far_v); FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter) { rtx arg_rtx; prev_arg_type = arg_type; if (VOID_TYPE_P (arg_type)) continue; function_arg_info arg (arg_type, /*named=*/true); if (!first_param) /* ??? We should advance after processing the argument and pass the argument we're advancing past. */ arm_function_arg_advance (args_so_far, arg); arg_rtx = arm_function_arg (args_so_far, arg); if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg)) { error ("%qE attribute not available to functions with arguments " "passed on the stack", name); return true; } first_param = false; } /* Error out for variadic functions since we cannot control how many arguments will be passed and thus stack could be used. stdarg_p () is not used for the checking to avoid browsing arguments twice. */ if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type)) { error ("%qE attribute not available to functions with variable number " "of arguments", name); return true; } /* Error out if return value is passed on the stack. */ ret_type = TREE_TYPE (fntype); if (arm_return_in_memory (ret_type, fntype)) { error ("%qE attribute not available to functions that return value on " "the stack", name); return true; } return false; } /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this function will check whether the attribute is allowed here and will add the attribute to the function declaration tree or otherwise issue a warning. */ static tree arm_handle_cmse_nonsecure_entry (tree *node, tree name, tree /* args */, int /* flags */, bool *no_add_attrs) { tree fndecl; if (!use_cmse) { *no_add_attrs = true; warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> " "option.", name); return NULL_TREE; } /* Ignore attribute for function types. */ if (TREE_CODE (*node) != FUNCTION_DECL) { warning (OPT_Wattributes, "%qE attribute only applies to functions", name); *no_add_attrs = true; return NULL_TREE; } fndecl = *node; /* Warn for static linkage functions. */ if (!TREE_PUBLIC (fndecl)) { warning (OPT_Wattributes, "%qE attribute has no effect on functions " "with static linkage", name); *no_add_attrs = true; return NULL_TREE; } *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name, TREE_TYPE (fndecl)); return NULL_TREE; } /* Called upon detection of the use of the cmse_nonsecure_call attribute, this function will check whether the attribute is allowed here and will add the attribute to the function type tree or otherwise issue a diagnostic. The reason we check this at declaration time is to only allow the use of the attribute with declarations of function pointers and not function declarations. This function checks NODE is of the expected type and issues diagnostics otherwise using NAME. If it is not of the expected type *NO_ADD_ATTRS will be set to true. */ static tree arm_handle_cmse_nonsecure_call (tree *node, tree name, tree /* args */, int /* flags */, bool *no_add_attrs) { tree decl = NULL_TREE, fntype = NULL_TREE; tree type; if (!use_cmse) { *no_add_attrs = true; warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> " "option.", name); return NULL_TREE; } if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL) { decl = *node; fntype = TREE_TYPE (decl); } while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE) fntype = TREE_TYPE (fntype); if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE) { warning (OPT_Wattributes, "%qE attribute only applies to base type of a " "function pointer", name); *no_add_attrs = true; return NULL_TREE; } *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype); if (*no_add_attrs) return NULL_TREE; /* Prevent trees being shared among function types with and without cmse_nonsecure_call attribute. */ type = TREE_TYPE (decl); type = build_distinct_type_copy (type); TREE_TYPE (decl) = type; fntype = type; while (TREE_CODE (fntype) != FUNCTION_TYPE) { type = fntype; fntype = TREE_TYPE (fntype); fntype = build_distinct_type_copy (fntype); TREE_TYPE (type) = fntype; } /* Construct a type attribute and add it to the function type. */ tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE, TYPE_ATTRIBUTES (fntype)); TYPE_ATTRIBUTES (fntype) = attrs; return NULL_TREE; } /* Return 0 if the attributes for two types are incompatible, 1 if they are compatible, and 2 if they are nearly compatible (which causes a warning to be generated). */ static int arm_comp_type_attributes (const_tree type1, const_tree type2) { int l1, l2, s1, s2; /* Check for mismatch of non-default calling convention. */ if (TREE_CODE (type1) != FUNCTION_TYPE) return 1; /* Check for mismatched call attributes. */ l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL; l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL; s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL; s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL; /* Only bother to check if an attribute is defined. */ if (l1 | l2 | s1 | s2) { /* If one type has an attribute, the other must have the same attribute. */ if ((l1 != l2) || (s1 != s2)) return 0; /* Disallow mixed attributes. */ if ((l1 & s2) || (l2 & s1)) return 0; } /* Check for mismatched ISR attribute. */ l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL; if (! l1) l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL; l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL; if (! l2) l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL; if (l1 != l2) return 0; l1 = lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (type1)) != NULL; l2 = lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (type2)) != NULL; if (l1 != l2) return 0; return 1; } /* Assigns default attributes to newly defined type. This is used to set short_call/long_call attributes for function types of functions defined inside corresponding #pragma scopes. */ static void arm_set_default_type_attributes (tree type) { /* Add __attribute__ ((long_call)) to all functions, when inside #pragma long_calls or __attribute__ ((short_call)), when inside #pragma no_long_calls. */ if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE) { tree type_attr_list, attr_name; type_attr_list = TYPE_ATTRIBUTES (type); if (arm_pragma_long_calls == LONG) attr_name = get_identifier ("long_call"); else if (arm_pragma_long_calls == SHORT) attr_name = get_identifier ("short_call"); else return; type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list); TYPE_ATTRIBUTES (type) = type_attr_list; } } /* Return true if DECL is known to be linked into section SECTION. */ static bool arm_function_in_section_p (tree decl, section *section) { /* We can only be certain about the prevailing symbol definition. */ if (!decl_binds_to_current_def_p (decl)) return false; /* If DECL_SECTION_NAME is set, assume it is trustworthy. */ if (!DECL_SECTION_NAME (decl)) { /* Make sure that we will not create a unique section for DECL. */ if (flag_function_sections || DECL_COMDAT_GROUP (decl)) return false; } return function_section (decl) == section; } /* Return nonzero if a 32-bit "long_call" should be generated for a call from the current function to DECL. We generate a long_call if the function: a. has an __attribute__((long call)) or b. is within the scope of a #pragma long_calls or c. the -mlong-calls command line switch has been specified However we do not generate a long call if the function: d. has an __attribute__ ((short_call)) or e. is inside the scope of a #pragma no_long_calls or f. is defined in the same section as the current function. */ bool arm_is_long_call_p (tree decl) { tree attrs; if (!decl) return TARGET_LONG_CALLS; attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl)); if (lookup_attribute ("short_call", attrs)) return false; /* For "f", be conservative, and only cater for cases in which the whole of the current function is placed in the same section. */ if (!flag_reorder_blocks_and_partition && TREE_CODE (decl) == FUNCTION_DECL && arm_function_in_section_p (decl, current_function_section ())) return false; if (lookup_attribute ("long_call", attrs)) return true; return TARGET_LONG_CALLS; } /* Return nonzero if it is ok to make a tail-call to DECL. */ static bool arm_function_ok_for_sibcall (tree decl, tree exp) { unsigned long func_type; if (cfun->machine->sibcall_blocked) return false; if (TARGET_FDPIC) { /* In FDPIC, never tailcall something for which we have no decl: the target function could be in a different module, requiring a different FDPIC register value. */ if (decl == NULL) return false; } /* Never tailcall something if we are generating code for Thumb-1. */ if (TARGET_THUMB1) return false; /* The PIC register is live on entry to VxWorks PLT entries, so we must make the call before restoring the PIC register. */ if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl)) return false; /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP may be used both as target of the call and base register for restoring the VFP registers */ if (TARGET_APCS_FRAME && TARGET_ARM && TARGET_HARD_FLOAT && decl && arm_is_long_call_p (decl)) return false; /* If we are interworking and the function is not declared static then we can't tail-call it unless we know that it exists in this compilation unit (since it might be a Thumb routine). */ if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl)) return false; func_type = arm_current_func_type (); /* Never tailcall from an ISR routine - it needs a special exit sequence. */ if (IS_INTERRUPT (func_type)) return false; /* ARMv8-M non-secure entry functions need to return with bxns which is only generated for entry functions themselves. */ if (IS_CMSE_ENTRY (arm_current_func_type ())) return false; /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls, this would complicate matters for later code generation. */ if (TREE_CODE (exp) == CALL_EXPR) { tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype))) return false; } if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) { /* Check that the return value locations are the same. For example that we aren't returning a value from the sibling in a VFP register but then need to transfer it to a core register. */ rtx a, b; tree decl_or_type = decl; /* If it is an indirect function pointer, get the function type. */ if (!decl) decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); a = arm_function_value (TREE_TYPE (exp), decl_or_type, false); b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), cfun->decl, false); if (!rtx_equal_p (a, b)) return false; } /* Never tailcall if function may be called with a misaligned SP. */ if (IS_STACKALIGN (func_type)) return false; /* The AAPCS says that, on bare-metal, calls to unresolved weak references should become a NOP. Don't convert such calls into sibling calls. */ if (TARGET_AAPCS_BASED && arm_abi == ARM_ABI_AAPCS && decl && DECL_WEAK (decl)) return false; /* We cannot do a tailcall for an indirect call by descriptor if all the argument registers are used because the only register left to load the address is IP and it will already contain the static chain. */ if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines) { tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp))); CUMULATIVE_ARGS cum; cumulative_args_t cum_v; arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE); cum_v = pack_cumulative_args (&cum); for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t)) { tree type = TREE_VALUE (t); if (!VOID_TYPE_P (type)) { function_arg_info arg (type, /*named=*/true); arm_function_arg_advance (cum_v, arg); } } function_arg_info arg (integer_type_node, /*named=*/true); if (!arm_function_arg (cum_v, arg)) return false; } /* Everything else is ok. */ return true; } /* Addressing mode support functions. */ /* Return nonzero if X is a legitimate immediate operand when compiling for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */ int legitimate_pic_operand_p (rtx x) { if (GET_CODE (x) == SYMBOL_REF || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)) return 0; return 1; } /* Record that the current function needs a PIC register. If PIC_REG is null, a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In both case cfun->machine->pic_reg is initialized if we have not already done so. COMPUTE_NOW decide whether and where to set the PIC register. If true, PIC register is reloaded in the current position of the instruction stream irregardless of whether it was loaded before. Otherwise, it is only loaded if not already done so (crtl->uses_pic_offset_table is null). Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG is only supported iff COMPUTE_NOW is false. */ static void require_pic_register (rtx pic_reg, bool compute_now) { gcc_assert (compute_now == (pic_reg != NULL_RTX)); /* A lot of the logic here is made obscure by the fact that this routine gets called as part of the rtx cost estimation process. We don't want those calls to affect any assumptions about the real function; and further, we can't call entry_of_function() until we start the real expansion process. */ if (!crtl->uses_pic_offset_table || compute_now) { gcc_assert (can_create_pseudo_p () || (pic_reg != NULL_RTX && REG_P (pic_reg) && GET_MODE (pic_reg) == Pmode)); if (arm_pic_register != INVALID_REGNUM && !compute_now && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM)) { if (!cfun->machine->pic_reg) cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register); /* Play games to avoid marking the function as needing pic if we are being called as part of the cost-estimation process. */ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) crtl->uses_pic_offset_table = 1; } else { rtx_insn *seq, *insn; if (pic_reg == NULL_RTX) pic_reg = gen_reg_rtx (Pmode); if (!cfun->machine->pic_reg) cfun->machine->pic_reg = pic_reg; /* Play games to avoid marking the function as needing pic if we are being called as part of the cost-estimation process. */ if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl) { crtl->uses_pic_offset_table = 1; start_sequence (); if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM && arm_pic_register > LAST_LO_REGNUM && !compute_now) emit_move_insn (cfun->machine->pic_reg, gen_rtx_REG (Pmode, arm_pic_register)); else arm_load_pic_register (0UL, pic_reg); seq = get_insns (); end_sequence (); for (insn = seq; insn; insn = NEXT_INSN (insn)) if (INSN_P (insn)) INSN_LOCATION (insn) = prologue_location; /* We can be called during expansion of PHI nodes, where we can't yet emit instructions directly in the final insn stream. Queue the insns on the entry edge, they will be committed after everything else is expanded. */ if (currently_expanding_to_rtl) insert_insn_on_edge (seq, single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun))); else emit_insn (seq); } } } } /* Generate insns to calculate the address of ORIG in pic mode. */ static rtx_insn * calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig) { rtx pat; rtx mem; pat = gen_calculate_pic_address (reg, pic_reg, orig); /* Make the MEM as close to a constant as possible. */ mem = SET_SRC (pat); gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem)); MEM_READONLY_P (mem) = 1; MEM_NOTRAP_P (mem) = 1; return emit_insn (pat); } /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is created to hold the result of the load. If not NULL, PIC_REG indicates which register to use as PIC register, otherwise it is decided by register allocator. COMPUTE_NOW forces the PIC register to be loaded at the current location in the instruction stream, irregardless of whether it was loaded previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG is only supported iff COMPUTE_NOW is false. Returns the register REG into which the PIC load is performed. */ rtx legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg, bool compute_now) { gcc_assert (compute_now == (pic_reg != NULL_RTX)); if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF) { if (reg == 0) { gcc_assert (can_create_pseudo_p ()); reg = gen_reg_rtx (Pmode); } /* VxWorks does not impose a fixed gap between segments; the run-time gap can be different from the object-file gap. We therefore can't use GOTOFF unless we are absolutely sure that the symbol is in the same segment as the GOT. Unfortunately, the flexibility of linker scripts means that we can't be sure of that in general, so assume that GOTOFF is never valid on VxWorks. */ /* References to weak symbols cannot be resolved locally: they may be overridden by a non-weak definition at link time. */ rtx_insn *insn; if ((GET_CODE (orig) == LABEL_REF || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig) && (SYMBOL_REF_DECL (orig) ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1) && (!SYMBOL_REF_FUNCTION_P (orig) || arm_fdpic_local_funcdesc_p (orig)))) && NEED_GOT_RELOC && arm_pic_data_is_text_relative) insn = arm_pic_static_addr (orig, reg); else { /* If this function doesn't have a pic register, create one now. */ require_pic_register (pic_reg, compute_now); if (pic_reg == NULL_RTX) pic_reg = cfun->machine->pic_reg; insn = calculate_pic_address_constant (reg, pic_reg, orig); } /* Put a REG_EQUAL note on this insn, so that it can be optimized by loop. */ set_unique_reg_note (insn, REG_EQUAL, orig); return reg; } else if (GET_CODE (orig) == CONST) { rtx base, offset; if (GET_CODE (XEXP (orig, 0)) == PLUS && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg) return orig; /* Handle the case where we have: const (UNSPEC_TLS). */ if (GET_CODE (XEXP (orig, 0)) == UNSPEC && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS) return orig; /* Handle the case where we have: const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a CONST_INT. */ if (GET_CODE (XEXP (orig, 0)) == PLUS && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS) { gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1))); return orig; } if (reg == 0) { gcc_assert (can_create_pseudo_p ()); reg = gen_reg_rtx (Pmode); } gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS); base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg, pic_reg, compute_now); offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode, base == reg ? 0 : reg, pic_reg, compute_now); if (CONST_INT_P (offset)) { /* The base register doesn't really matter, we only want to test the index for the appropriate mode. */ if (!arm_legitimate_index_p (mode, offset, SET, 0)) { gcc_assert (can_create_pseudo_p ()); offset = force_reg (Pmode, offset); } if (CONST_INT_P (offset)) return plus_constant (Pmode, base, INTVAL (offset)); } if (GET_MODE_SIZE (mode) > 4 && (GET_MODE_CLASS (mode) == MODE_INT || TARGET_SOFT_FLOAT)) { emit_insn (gen_addsi3 (reg, base, offset)); return reg; } return gen_rtx_PLUS (Pmode, base, offset); } return orig; } /* Whether a register is callee saved or not. This is necessary because high registers are marked as caller saved when optimizing for size on Thumb-1 targets despite being callee saved in order to avoid using them. */ #define callee_saved_reg_p(reg) \ (!call_used_or_fixed_reg_p (reg) \ || (TARGET_THUMB1 && optimize_size \ && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM)) /* Return a mask for the call-clobbered low registers that are unused at the end of the prologue. */ static unsigned long thumb1_prologue_unused_call_clobbered_lo_regs (void) { unsigned long mask = 0; bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)); for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++) if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg)) mask |= 1 << (reg - FIRST_LO_REGNUM); return mask; } /* Similarly for the start of the epilogue. */ static unsigned long thumb1_epilogue_unused_call_clobbered_lo_regs (void) { unsigned long mask = 0; bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun)); for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++) if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg)) mask |= 1 << (reg - FIRST_LO_REGNUM); return mask; } /* Find a spare register to use during the prolog of a function. */ static int thumb_find_work_register (unsigned long pushed_regs_mask) { int reg; unsigned long unused_regs = thumb1_prologue_unused_call_clobbered_lo_regs (); /* Check the argument registers first as these are call-used. The register allocation order means that sometimes r3 might be used but earlier argument registers might not, so check them all. */ for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--) if (unused_regs & (1 << (reg - FIRST_LO_REGNUM))) return reg; /* Otherwise look for a call-saved register that is going to be pushed. */ for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --) if (pushed_regs_mask & (1 << reg)) return reg; if (TARGET_THUMB2) { /* Thumb-2 can use high regs. */ for (reg = FIRST_HI_REGNUM; reg < 15; reg ++) if (pushed_regs_mask & (1 << reg)) return reg; } /* Something went wrong - thumb_compute_save_reg_mask() should have arranged for a suitable register to be pushed. */ gcc_unreachable (); } static GTY(()) int pic_labelno; /* Generate code to load the PIC register. In thumb mode SCRATCH is a low register. */ void arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg) { rtx l1, labelno, pic_tmp, pic_rtx; if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE || TARGET_FDPIC) return; gcc_assert (flag_pic); if (pic_reg == NULL_RTX) pic_reg = cfun->machine->pic_reg; if (TARGET_VXWORKS_RTP) { pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg))); pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); } else { /* We use an UNSPEC rather than a LABEL_REF because this label never appears in the code stream. */ labelno = GEN_INT (pic_labelno++); l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); l1 = gen_rtx_CONST (VOIDmode, l1); /* On the ARM the PC register contains 'dot + 8' at the time of the addition, on the Thumb it is 'dot + 4'. */ pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), UNSPEC_GOTSYM_OFF); pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); if (TARGET_32BIT) { emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); } else /* TARGET_THUMB1 */ { if (arm_pic_register != INVALID_REGNUM && REGNO (pic_reg) > LAST_LO_REGNUM) { /* We will have pushed the pic register, so we should always be able to find a work register. */ pic_tmp = gen_rtx_REG (SImode, thumb_find_work_register (saved_regs)); emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx)); emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp)); emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); } else if (arm_pic_register != INVALID_REGNUM && arm_pic_register > LAST_LO_REGNUM && REGNO (pic_reg) <= LAST_LO_REGNUM) { emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg); emit_use (gen_rtx_REG (Pmode, arm_pic_register)); } else emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno)); } } /* Need to emit this whether or not we obey regdecls, since setjmp/longjmp can cause life info to screw up. */ emit_use (pic_reg); } /* Try to determine whether an object, referenced via ORIG, will be placed in the text or data segment. This is used in FDPIC mode, to decide which relocations to use when accessing ORIG. *IS_READONLY is set to true if ORIG is a read-only location, false otherwise. Return true if we could determine the location of ORIG, false otherwise. *IS_READONLY is valid only when we return true. */ static bool arm_is_segment_info_known (rtx orig, bool *is_readonly) { *is_readonly = false; if (GET_CODE (orig) == LABEL_REF) { *is_readonly = true; return true; } if (SYMBOL_REF_P (orig)) { if (CONSTANT_POOL_ADDRESS_P (orig)) { *is_readonly = true; return true; } if (SYMBOL_REF_LOCAL_P (orig) && !SYMBOL_REF_EXTERNAL_P (orig) && SYMBOL_REF_DECL (orig) && (!DECL_P (SYMBOL_REF_DECL (orig)) || !DECL_COMMON (SYMBOL_REF_DECL (orig)))) { tree decl = SYMBOL_REF_DECL (orig); tree init = (TREE_CODE (decl) == VAR_DECL) ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR) ? decl : 0; int reloc = 0; bool named_section, readonly; if (init && init != error_mark_node) reloc = compute_reloc_for_constant (init); named_section = TREE_CODE (decl) == VAR_DECL && lookup_attribute ("section", DECL_ATTRIBUTES (decl)); readonly = decl_readonly_section (decl, reloc); /* We don't know where the link script will put a named section, so return false in such a case. */ if (named_section) return false; *is_readonly = readonly; return true; } /* We don't know. */ return false; } gcc_unreachable (); } /* Generate code to load the address of a static var when flag_pic is set. */ static rtx_insn * arm_pic_static_addr (rtx orig, rtx reg) { rtx l1, labelno, offset_rtx; rtx_insn *insn; gcc_assert (flag_pic); bool is_readonly = false; bool info_known = false; if (TARGET_FDPIC && SYMBOL_REF_P (orig) && !SYMBOL_REF_FUNCTION_P (orig)) info_known = arm_is_segment_info_known (orig, &is_readonly); if (TARGET_FDPIC && SYMBOL_REF_P (orig) && !SYMBOL_REF_FUNCTION_P (orig) && !info_known) { /* We don't know where orig is stored, so we have be pessimistic and use a GOT relocation. */ rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM); insn = calculate_pic_address_constant (reg, pic_reg, orig); } else if (TARGET_FDPIC && SYMBOL_REF_P (orig) && (SYMBOL_REF_FUNCTION_P (orig) || !is_readonly)) { /* We use the GOTOFF relocation. */ rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM); rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM); emit_insn (gen_movsi (reg, l1)); insn = emit_insn (gen_addsi3 (reg, reg, pic_reg)); } else { /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use PC-relative access. */ /* We use an UNSPEC rather than a LABEL_REF because this label never appears in the code stream. */ labelno = GEN_INT (pic_labelno++); l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); l1 = gen_rtx_CONST (VOIDmode, l1); /* On the ARM the PC register contains 'dot + 8' at the time of the addition, on the Thumb it is 'dot + 4'. */ offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4); offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx), UNSPEC_SYMBOL_OFFSET); offset_rtx = gen_rtx_CONST (Pmode, offset_rtx); insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno)); } return insn; } /* Return nonzero if X is valid as an ARM state addressing register. */ static int arm_address_register_rtx_p (rtx x, int strict_p) { int regno; if (!REG_P (x)) return 0; regno = REGNO (x); if (strict_p) return ARM_REGNO_OK_FOR_BASE_P (regno); return (regno <= LAST_ARM_REGNUM || regno >= FIRST_PSEUDO_REGISTER || regno == FRAME_POINTER_REGNUM || regno == ARG_POINTER_REGNUM); } /* Return TRUE if this rtx is the difference of a symbol and a label, and will reduce to a PC-relative relocation in the object file. Expressions like this can be left alone when generating PIC, rather than forced through the GOT. */ static int pcrel_constant_p (rtx x) { if (GET_CODE (x) == MINUS) return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1)); return FALSE; } /* Return true if X will surely end up in an index register after next splitting pass. */ static bool will_be_in_index_register (const_rtx x) { /* arm.md: calculate_pic_address will split this into a register. */ return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM); } /* Return nonzero if X is a valid ARM state address operand. */ int arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer, int strict_p) { bool use_ldrd; enum rtx_code code = GET_CODE (x); if (arm_address_register_rtx_p (x, strict_p)) return 1; use_ldrd = (TARGET_LDRD && (mode == DImode || mode == DFmode)); if (code == POST_INC || code == PRE_DEC || ((code == PRE_INC || code == POST_DEC) && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) return arm_address_register_rtx_p (XEXP (x, 0), strict_p); else if ((code == POST_MODIFY || code == PRE_MODIFY) && arm_address_register_rtx_p (XEXP (x, 0), strict_p) && GET_CODE (XEXP (x, 1)) == PLUS && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) { rtx addend = XEXP (XEXP (x, 1), 1); /* Don't allow ldrd post increment by register because it's hard to fixup invalid register choices. */ if (use_ldrd && GET_CODE (x) == POST_MODIFY && REG_P (addend)) return 0; return ((use_ldrd || GET_MODE_SIZE (mode) <= 4) && arm_legitimate_index_p (mode, addend, outer, strict_p)); } /* After reload constants split into minipools will have addresses from a LABEL_REF. */ else if (reload_completed && (code == LABEL_REF || (code == CONST && GET_CODE (XEXP (x, 0)) == PLUS && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) return 1; else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) return 0; else if (code == PLUS) { rtx xop0 = XEXP (x, 0); rtx xop1 = XEXP (x, 1); return ((arm_address_register_rtx_p (xop0, strict_p) && ((CONST_INT_P (xop1) && arm_legitimate_index_p (mode, xop1, outer, strict_p)) || (!strict_p && will_be_in_index_register (xop1)))) || (arm_address_register_rtx_p (xop1, strict_p) && arm_legitimate_index_p (mode, xop0, outer, strict_p))); } #if 0 /* Reload currently can't handle MINUS, so disable this for now */ else if (GET_CODE (x) == MINUS) { rtx xop0 = XEXP (x, 0); rtx xop1 = XEXP (x, 1); return (arm_address_register_rtx_p (xop0, strict_p) && arm_legitimate_index_p (mode, xop1, outer, strict_p)); } #endif else if (GET_MODE_CLASS (mode) != MODE_FLOAT && code == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) && ! (flag_pic && symbol_mentioned_p (get_pool_constant (x)) && ! pcrel_constant_p (get_pool_constant (x)))) return 1; return 0; } /* Return true if we can avoid creating a constant pool entry for x. */ static bool can_avoid_literal_pool_for_label_p (rtx x) { /* Normally we can assign constant values to target registers without the help of constant pool. But there are cases we have to use constant pool like: 1) assign a label to register. 2) sign-extend a 8bit value to 32bit and then assign to register. Constant pool access in format: (set (reg r0) (mem (symbol_ref (".LC0")))) will cause the use of literal pool (later in function arm_reorg). So here we mark such format as an invalid format, then the compiler will adjust it into: (set (reg r0) (symbol_ref (".LC0"))) (set (reg r0) (mem (reg r0))). No extra register is required, and (mem (reg r0)) won't cause the use of literal pools. */ if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) return 1; return 0; } /* Return nonzero if X is a valid Thumb-2 address operand. */ static int thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p) { bool use_ldrd; enum rtx_code code = GET_CODE (x); if (arm_address_register_rtx_p (x, strict_p)) return 1; use_ldrd = (TARGET_LDRD && (mode == DImode || mode == DFmode)); if (code == POST_INC || code == PRE_DEC || ((code == PRE_INC || code == POST_DEC) && (use_ldrd || GET_MODE_SIZE (mode) <= 4))) return arm_address_register_rtx_p (XEXP (x, 0), strict_p); else if ((code == POST_MODIFY || code == PRE_MODIFY) && arm_address_register_rtx_p (XEXP (x, 0), strict_p) && GET_CODE (XEXP (x, 1)) == PLUS && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0))) { /* Thumb-2 only has autoincrement by constant. */ rtx addend = XEXP (XEXP (x, 1), 1); HOST_WIDE_INT offset; if (!CONST_INT_P (addend)) return 0; offset = INTVAL(addend); if (GET_MODE_SIZE (mode) <= 4) return (offset > -256 && offset < 256); return (use_ldrd && offset > -1024 && offset < 1024 && (offset & 3) == 0); } /* After reload constants split into minipools will have addresses from a LABEL_REF. */ else if (reload_completed && (code == LABEL_REF || (code == CONST && GET_CODE (XEXP (x, 0)) == PLUS && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) return 1; else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode))) return 0; else if (code == PLUS) { rtx xop0 = XEXP (x, 0); rtx xop1 = XEXP (x, 1); return ((arm_address_register_rtx_p (xop0, strict_p) && (thumb2_legitimate_index_p (mode, xop1, strict_p) || (!strict_p && will_be_in_index_register (xop1)))) || (arm_address_register_rtx_p (xop1, strict_p) && thumb2_legitimate_index_p (mode, xop0, strict_p))); } else if (can_avoid_literal_pool_for_label_p (x)) return 0; else if (GET_MODE_CLASS (mode) != MODE_FLOAT && code == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) && ! (flag_pic && symbol_mentioned_p (get_pool_constant (x)) && ! pcrel_constant_p (get_pool_constant (x)))) return 1; return 0; } /* Return nonzero if INDEX is valid for an address index operand in ARM state. */ static int arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer, int strict_p) { HOST_WIDE_INT range; enum rtx_code code = GET_CODE (index); /* Standard coprocessor addressing modes. */ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) return (code == CONST_INT && INTVAL (index) < 1024 && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); /* For quad modes, we restrict the constant offset to be slightly less than what the instruction format permits. We do this because for quad mode moves, we will actually decompose them into two separate double-mode reads or writes. INDEX must therefore be a valid (double-mode) offset and so should INDEX+8. */ if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) return (code == CONST_INT && INTVAL (index) < 1016 && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); /* We have no such constraint on double mode offsets, so we permit the full range of the instruction format. */ if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) return (code == CONST_INT && INTVAL (index) < 1024 && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) return (code == CONST_INT && INTVAL (index) < 1024 && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); if (arm_address_register_rtx_p (index, strict_p) && (GET_MODE_SIZE (mode) <= 4)) return 1; if (mode == DImode || mode == DFmode) { if (code == CONST_INT) { HOST_WIDE_INT val = INTVAL (index); /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD. If vldr is selected it uses arm_coproc_mem_operand. */ if (TARGET_LDRD) return val > -256 && val < 256; else return val > -4096 && val < 4092; } return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p); } if (GET_MODE_SIZE (mode) <= 4 && ! (arm_arch4 && (mode == HImode || mode == HFmode || (mode == QImode && outer == SIGN_EXTEND)))) { if (code == MULT) { rtx xiop0 = XEXP (index, 0); rtx xiop1 = XEXP (index, 1); return ((arm_address_register_rtx_p (xiop0, strict_p) && power_of_two_operand (xiop1, SImode)) || (arm_address_register_rtx_p (xiop1, strict_p) && power_of_two_operand (xiop0, SImode))); } else if (code == LSHIFTRT || code == ASHIFTRT || code == ASHIFT || code == ROTATERT) { rtx op = XEXP (index, 1); return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) && CONST_INT_P (op) && INTVAL (op) > 0 && INTVAL (op) <= 31); } } /* For ARM v4 we may be doing a sign-extend operation during the load. */ if (arm_arch4) { if (mode == HImode || mode == HFmode || (outer == SIGN_EXTEND && mode == QImode)) range = 256; else range = 4096; } else range = (mode == HImode || mode == HFmode) ? 4095 : 4096; return (code == CONST_INT && INTVAL (index) < range && INTVAL (index) > -range); } /* Return true if OP is a valid index scaling factor for Thumb-2 address index operand. i.e. 1, 2, 4 or 8. */ static bool thumb2_index_mul_operand (rtx op) { HOST_WIDE_INT val; if (!CONST_INT_P (op)) return false; val = INTVAL(op); return (val == 1 || val == 2 || val == 4 || val == 8); } /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */ static int thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p) { enum rtx_code code = GET_CODE (index); /* ??? Combine arm and thumb2 coprocessor addressing modes. */ /* Standard coprocessor addressing modes. */ if (TARGET_HARD_FLOAT && (mode == SFmode || mode == DFmode)) return (code == CONST_INT && INTVAL (index) < 1024 /* Thumb-2 allows only > -256 index range for it's core register load/stores. Since we allow SF/DF in core registers, we have to use the intersection between -256~4096 (core) and -1024~1024 (coprocessor). */ && INTVAL (index) > -256 && (INTVAL (index) & 3) == 0); if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode)) { /* For DImode assume values will usually live in core regs and only allow LDRD addressing modes. */ if (!TARGET_LDRD || mode != DImode) return (code == CONST_INT && INTVAL (index) < 1024 && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); } /* For quad modes, we restrict the constant offset to be slightly less than what the instruction format permits. We do this because for quad mode moves, we will actually decompose them into two separate double-mode reads or writes. INDEX must therefore be a valid (double-mode) offset and so should INDEX+8. */ if (TARGET_NEON && VALID_NEON_QREG_MODE (mode)) return (code == CONST_INT && INTVAL (index) < 1016 && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); /* We have no such constraint on double mode offsets, so we permit the full range of the instruction format. */ if (TARGET_NEON && VALID_NEON_DREG_MODE (mode)) return (code == CONST_INT && INTVAL (index) < 1024 && INTVAL (index) > -1024 && (INTVAL (index) & 3) == 0); if (arm_address_register_rtx_p (index, strict_p) && (GET_MODE_SIZE (mode) <= 4)) return 1; if (mode == DImode || mode == DFmode) { if (code == CONST_INT) { HOST_WIDE_INT val = INTVAL (index); /* Thumb-2 ldrd only has reg+const addressing modes. Assume we emit ldrd or 2x ldr if !TARGET_LDRD. If vldr is selected it uses arm_coproc_mem_operand. */ if (TARGET_LDRD) return IN_RANGE (val, -1020, 1020) && (val & 3) == 0; else return IN_RANGE (val, -255, 4095 - 4); } else return 0; } if (code == MULT) { rtx xiop0 = XEXP (index, 0); rtx xiop1 = XEXP (index, 1); return ((arm_address_register_rtx_p (xiop0, strict_p) && thumb2_index_mul_operand (xiop1)) || (arm_address_register_rtx_p (xiop1, strict_p) && thumb2_index_mul_operand (xiop0))); } else if (code == ASHIFT) { rtx op = XEXP (index, 1); return (arm_address_register_rtx_p (XEXP (index, 0), strict_p) && CONST_INT_P (op) && INTVAL (op) > 0 && INTVAL (op) <= 3); } return (code == CONST_INT && INTVAL (index) < 4096 && INTVAL (index) > -256); } /* Return nonzero if X is valid as a 16-bit Thumb state base register. */ static int thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p) { int regno; if (!REG_P (x)) return 0; regno = REGNO (x); if (strict_p) return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode); return (regno <= LAST_LO_REGNUM || regno > LAST_VIRTUAL_REGISTER || regno == FRAME_POINTER_REGNUM || (GET_MODE_SIZE (mode) >= 4 && (regno == STACK_POINTER_REGNUM || regno >= FIRST_PSEUDO_REGISTER || x == hard_frame_pointer_rtx || x == arg_pointer_rtx))); } /* Return nonzero if x is a legitimate index register. This is the case for any base register that can access a QImode object. */ inline static int thumb1_index_register_rtx_p (rtx x, int strict_p) { return thumb1_base_register_rtx_p (x, QImode, strict_p); } /* Return nonzero if x is a legitimate 16-bit Thumb-state address. The AP may be eliminated to either the SP or the FP, so we use the least common denominator, e.g. SImode, and offsets from 0 to 64. ??? Verify whether the above is the right approach. ??? Also, the FP may be eliminated to the SP, so perhaps that needs special handling also. ??? Look at how the mips16 port solves this problem. It probably uses better ways to solve some of these problems. Although it is not incorrect, we don't accept QImode and HImode addresses based on the frame pointer or arg pointer until the reload pass starts. This is so that eliminating such addresses into stack based ones won't produce impossible code. */ int thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p) { if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x)) return 0; /* ??? Not clear if this is right. Experiment. */ if (GET_MODE_SIZE (mode) < 4 && !(reload_in_progress || reload_completed) && (reg_mentioned_p (frame_pointer_rtx, x) || reg_mentioned_p (arg_pointer_rtx, x) || reg_mentioned_p (virtual_incoming_args_rtx, x) || reg_mentioned_p (virtual_outgoing_args_rtx, x) || reg_mentioned_p (virtual_stack_dynamic_rtx, x) || reg_mentioned_p (virtual_stack_vars_rtx, x))) return 0; /* Accept any base register. SP only in SImode or larger. */ else if (thumb1_base_register_rtx_p (x, mode, strict_p)) return 1; /* This is PC relative data before arm_reorg runs. */ else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x) && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic && !arm_disable_literal_pool) return 1; /* This is PC relative data after arm_reorg runs. */ else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode) && reload_completed && (GET_CODE (x) == LABEL_REF || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF && CONST_INT_P (XEXP (XEXP (x, 0), 1))))) return 1; /* Post-inc indexing only supported for SImode and larger. */ else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)) return 1; else if (GET_CODE (x) == PLUS) { /* REG+REG address can be any two index registers. */ /* We disallow FRAME+REG addressing since we know that FRAME will be replaced with STACK, and SP relative addressing only permits SP+OFFSET. */ if (GET_MODE_SIZE (mode) <= 4 && XEXP (x, 0) != frame_pointer_rtx && XEXP (x, 1) != frame_pointer_rtx && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p) || (!strict_p && will_be_in_index_register (XEXP (x, 1))))) return 1; /* REG+const has 5-7 bit offset for non-SP registers. */ else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p) || XEXP (x, 0) == arg_pointer_rtx) && CONST_INT_P (XEXP (x, 1)) && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1)))) return 1; /* REG+const has 10-bit offset for SP, but only SImode and larger is supported. */ /* ??? Should probably check for DI/DFmode overflow here just like GO_IF_LEGITIMATE_OFFSET does. */ else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM && GET_MODE_SIZE (mode) >= 4 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) >= 0 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024 && (INTVAL (XEXP (x, 1)) & 3) == 0) return 1; else if (REG_P (XEXP (x, 0)) && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER && REGNO (XEXP (x, 0)) <= LAST_VIRTUAL_POINTER_REGISTER)) && GET_MODE_SIZE (mode) >= 4 && CONST_INT_P (XEXP (x, 1)) && (INTVAL (XEXP (x, 1)) & 3) == 0) return 1; } else if (GET_MODE_CLASS (mode) != MODE_FLOAT && GET_MODE_SIZE (mode) == 4 && GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x) && !arm_disable_literal_pool && ! (flag_pic && symbol_mentioned_p (get_pool_constant (x)) && ! pcrel_constant_p (get_pool_constant (x)))) return 1; return 0; } /* Return nonzero if VAL can be used as an offset in a Thumb-state address instruction of mode MODE. */ int thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val) { switch (GET_MODE_SIZE (mode)) { case 1: return val >= 0 && val < 32; case 2: return val >= 0 && val < 64 && (val & 1) == 0; default: return (val >= 0 && (val + GET_MODE_SIZE (mode)) <= 128 && (val & 3) == 0); } } bool arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p) { if (TARGET_ARM) return arm_legitimate_address_outer_p (mode, x, SET, strict_p); else if (TARGET_THUMB2) return thumb2_legitimate_address_p (mode, x, strict_p); else /* if (TARGET_THUMB1) */ return thumb1_legitimate_address_p (mode, x, strict_p); } /* Worker function for TARGET_PREFERRED_RELOAD_CLASS. Given an rtx X being reloaded into a reg required to be in class CLASS, return the class of reg to actually use. In general this is just CLASS, but for the Thumb core registers and immediate constants we prefer a LO_REGS class or a subset. */ static reg_class_t arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass) { if (TARGET_32BIT) return rclass; else { if (rclass == GENERAL_REGS) return LO_REGS; else return rclass; } } /* Build the SYMBOL_REF for __tls_get_addr. */ static GTY(()) rtx tls_get_addr_libfunc; static rtx get_tls_get_addr (void) { if (!tls_get_addr_libfunc) tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr"); return tls_get_addr_libfunc; } rtx arm_load_tp (rtx target) { if (!target) target = gen_reg_rtx (SImode); if (TARGET_HARD_TP) { /* Can return in any reg. */ emit_insn (gen_load_tp_hard (target)); } else { /* Always returned in r0. Immediately copy the result into a pseudo, otherwise other uses of r0 (e.g. setting up function arguments) may clobber the value. */ rtx tmp; if (TARGET_FDPIC) { rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM); rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM); emit_insn (gen_load_tp_soft_fdpic ()); /* Restore r9. */ emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg)); } else emit_insn (gen_load_tp_soft ()); tmp = gen_rtx_REG (SImode, R0_REGNUM); emit_move_insn (target, tmp); } return target; } static rtx load_tls_operand (rtx x, rtx reg) { rtx tmp; if (reg == NULL_RTX) reg = gen_reg_rtx (SImode); tmp = gen_rtx_CONST (SImode, x); emit_move_insn (reg, tmp); return reg; } static rtx_insn * arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc) { rtx label, labelno = NULL_RTX, sum; gcc_assert (reloc != TLS_DESCSEQ); start_sequence (); if (TARGET_FDPIC) { sum = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (reloc)), UNSPEC_TLS); } else { labelno = GEN_INT (pic_labelno++); label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); label = gen_rtx_CONST (VOIDmode, label); sum = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, x, GEN_INT (reloc), label, GEN_INT (TARGET_ARM ? 8 : 4)), UNSPEC_TLS); } reg = load_tls_operand (sum, reg); if (TARGET_FDPIC) emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM))); else if (TARGET_ARM) emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); else emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */ Pmode, reg, Pmode); rtx_insn *insns = get_insns (); end_sequence (); return insns; } static rtx arm_tls_descseq_addr (rtx x, rtx reg) { rtx labelno = GEN_INT (pic_labelno++); rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); rtx sum = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ), gen_rtx_CONST (VOIDmode, label), GEN_INT (!TARGET_ARM)), UNSPEC_TLS); rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM)); emit_insn (gen_tlscall (x, labelno)); if (!reg) reg = gen_reg_rtx (SImode); else gcc_assert (REGNO (reg) != R0_REGNUM); emit_move_insn (reg, reg0); return reg; } rtx legitimize_tls_address (rtx x, rtx reg) { rtx dest, tp, label, labelno, sum, ret, eqv, addend; rtx_insn *insns; unsigned int model = SYMBOL_REF_TLS_MODEL (x); switch (model) { case TLS_MODEL_GLOBAL_DYNAMIC: if (TARGET_GNU2_TLS) { gcc_assert (!TARGET_FDPIC); reg = arm_tls_descseq_addr (x, reg); tp = arm_load_tp (NULL_RTX); dest = gen_rtx_PLUS (Pmode, tp, reg); } else { /* Original scheme */ if (TARGET_FDPIC) insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC); else insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32); dest = gen_reg_rtx (Pmode); emit_libcall_block (insns, dest, ret, x); } return dest; case TLS_MODEL_LOCAL_DYNAMIC: if (TARGET_GNU2_TLS) { gcc_assert (!TARGET_FDPIC); reg = arm_tls_descseq_addr (x, reg); tp = arm_load_tp (NULL_RTX); dest = gen_rtx_PLUS (Pmode, tp, reg); } else { if (TARGET_FDPIC) insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC); else insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32); /* Attach a unique REG_EQUIV, to allow the RTL optimizers to share the LDM result with other LD model accesses. */ eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), UNSPEC_TLS); dest = gen_reg_rtx (Pmode); emit_libcall_block (insns, dest, ret, eqv); /* Load the addend. */ addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LDO32)), UNSPEC_TLS); addend = force_reg (SImode, gen_rtx_CONST (SImode, addend)); dest = gen_rtx_PLUS (Pmode, dest, addend); } return dest; case TLS_MODEL_INITIAL_EXEC: if (TARGET_FDPIC) { sum = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)), UNSPEC_TLS); reg = load_tls_operand (sum, reg); emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM))); emit_move_insn (reg, gen_rtx_MEM (Pmode, reg)); } else { labelno = GEN_INT (pic_labelno++); label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); label = gen_rtx_CONST (VOIDmode, label); sum = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, x, GEN_INT (TLS_IE32), label, GEN_INT (TARGET_ARM ? 8 : 4)), UNSPEC_TLS); reg = load_tls_operand (sum, reg); if (TARGET_ARM) emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); else if (TARGET_THUMB2) emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno)); else { emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); emit_move_insn (reg, gen_const_mem (SImode, reg)); } } tp = arm_load_tp (NULL_RTX); return gen_rtx_PLUS (Pmode, tp, reg); case TLS_MODEL_LOCAL_EXEC: tp = arm_load_tp (NULL_RTX); reg = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x, GEN_INT (TLS_LE32)), UNSPEC_TLS); reg = force_reg (SImode, gen_rtx_CONST (SImode, reg)); return gen_rtx_PLUS (Pmode, tp, reg); default: abort (); } } /* Try machine-dependent ways of modifying an illegitimate address to be legitimate. If we find one, return the new, valid address. */ rtx arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode) { if (arm_tls_referenced_p (x)) { rtx addend = NULL; if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) { addend = XEXP (XEXP (x, 0), 1); x = XEXP (XEXP (x, 0), 0); } if (GET_CODE (x) != SYMBOL_REF) return x; gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0); x = legitimize_tls_address (x, NULL_RTX); if (addend) { x = gen_rtx_PLUS (SImode, x, addend); orig_x = x; } else return x; } if (TARGET_THUMB1) return thumb_legitimize_address (x, orig_x, mode); if (GET_CODE (x) == PLUS) { rtx xop0 = XEXP (x, 0); rtx xop1 = XEXP (x, 1); if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0)) xop0 = force_reg (SImode, xop0); if (CONSTANT_P (xop1) && !CONST_INT_P (xop1) && !symbol_mentioned_p (xop1)) xop1 = force_reg (SImode, xop1); if (ARM_BASE_REGISTER_RTX_P (xop0) && CONST_INT_P (xop1)) { HOST_WIDE_INT n, low_n; rtx base_reg, val; n = INTVAL (xop1); /* VFP addressing modes actually allow greater offsets, but for now we just stick with the lowest common denominator. */ if (mode == DImode || mode == DFmode) { low_n = n & 0x0f; n &= ~0x0f; if (low_n > 4) { n += 16; low_n -= 16; } } else { low_n = ((mode) == TImode ? 0 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff)); n -= low_n; } base_reg = gen_reg_rtx (SImode); val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX); emit_move_insn (base_reg, val); x = plus_constant (Pmode, base_reg, low_n); } else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) x = gen_rtx_PLUS (SImode, xop0, xop1); } /* XXX We don't allow MINUS any more -- see comment in arm_legitimate_address_outer_p (). */ else if (GET_CODE (x) == MINUS) { rtx xop0 = XEXP (x, 0); rtx xop1 = XEXP (x, 1); if (CONSTANT_P (xop0)) xop0 = force_reg (SImode, xop0); if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1)) xop1 = force_reg (SImode, xop1); if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1)) x = gen_rtx_MINUS (SImode, xop0, xop1); } /* Make sure to take full advantage of the pre-indexed addressing mode with absolute addresses which often allows for the base register to be factorized for multiple adjacent memory references, and it might even allows for the mini pool to be avoided entirely. */ else if (CONST_INT_P (x) && optimize > 0) { unsigned int bits; HOST_WIDE_INT mask, base, index; rtx base_reg; /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can only use a 8-bit index. So let's use a 12-bit index for SImode only and hope that arm_gen_constant will enable LDRB to use more bits. */ bits = (mode == SImode) ? 12 : 8; mask = (1 << bits) - 1; base = INTVAL (x) & ~mask; index = INTVAL (x) & mask; if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2) { /* It'll most probably be more efficient to generate the base with more bits set and use a negative index instead. Don't do this for Thumb as negative offsets are much more limited. */ base |= mask; index -= mask; } base_reg = force_reg (SImode, GEN_INT (base)); x = plus_constant (Pmode, base_reg, index); } if (flag_pic) { /* We need to find and carefully transform any SYMBOL and LABEL references; so go back to the original address expression. */ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX, false /*compute_now*/); if (new_x != orig_x) x = new_x; } return x; } /* Try machine-dependent ways of modifying an illegitimate Thumb address to be legitimate. If we find one, return the new, valid address. */ rtx thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode) { if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1)) && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode) || INTVAL (XEXP (x, 1)) < 0)) { rtx xop0 = XEXP (x, 0); rtx xop1 = XEXP (x, 1); HOST_WIDE_INT offset = INTVAL (xop1); /* Try and fold the offset into a biasing of the base register and then offsetting that. Don't do this when optimizing for space since it can cause too many CSEs. */ if (optimize_size && offset >= 0 && offset < 256 + 31 * GET_MODE_SIZE (mode)) { HOST_WIDE_INT delta; if (offset >= 256) delta = offset - (256 - GET_MODE_SIZE (mode)); else if (offset < 32 * GET_MODE_SIZE (mode) + 8) delta = 31 * GET_MODE_SIZE (mode); else delta = offset & (~31 * GET_MODE_SIZE (mode)); xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta), NULL_RTX); x = plus_constant (Pmode, xop0, delta); } else if (offset < 0 && offset > -256) /* Small negative offsets are best done with a subtract before the dereference, forcing these into a register normally takes two instructions. */ x = force_operand (x, NULL_RTX); else { /* For the remaining cases, force the constant into a register. */ xop1 = force_reg (SImode, xop1); x = gen_rtx_PLUS (SImode, xop0, xop1); } } else if (GET_CODE (x) == PLUS && s_register_operand (XEXP (x, 1), SImode) && !s_register_operand (XEXP (x, 0), SImode)) { rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX); x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1)); } if (flag_pic) { /* We need to find and carefully transform any SYMBOL and LABEL references; so go back to the original address expression. */ rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX, false /*compute_now*/); if (new_x != orig_x) x = new_x; } return x; } /* Return TRUE if X contains any TLS symbol references. */ bool arm_tls_referenced_p (rtx x) { if (! TARGET_HAVE_TLS) return false; subrtx_iterator::array_type array; FOR_EACH_SUBRTX (iter, array, x, ALL) { const_rtx x = *iter; if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0) { /* ARM currently does not provide relocations to encode TLS variables into AArch32 instructions, only data, so there is no way to currently implement these if a literal pool is disabled. */ if (arm_disable_literal_pool) sorry ("accessing thread-local storage is not currently supported " "with %<-mpure-code%> or %<-mslow-flash-data%>"); return true; } /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are TLS offsets, not real symbol references. */ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) iter.skip_subrtxes (); } return false; } /* Implement TARGET_LEGITIMATE_CONSTANT_P. On the ARM, allow any integer (invalid ones are removed later by insn patterns), nice doubles and symbol_refs which refer to the function's constant pool XXX. When generating pic allow anything. */ static bool arm_legitimate_constant_p_1 (machine_mode, rtx x) { return flag_pic || !label_mentioned_p (x); } static bool thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high RTX. These RTX must therefore be allowed for Thumb-1 so that when run for ARMv8-M Baseline or later the result is valid. */ if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH) x = XEXP (x, 0); return (CONST_INT_P (x) || CONST_DOUBLE_P (x) || CONSTANT_ADDRESS_P (x) || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF) || flag_pic); } static bool arm_legitimate_constant_p (machine_mode mode, rtx x) { return (!arm_cannot_force_const_mem (mode, x) && (TARGET_32BIT ? arm_legitimate_constant_p_1 (mode, x) : thumb_legitimate_constant_p (mode, x))); } /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */ static bool arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x) { rtx base, offset; split_const (x, &base, &offset); if (SYMBOL_REF_P (base)) { /* Function symbols cannot have an offset due to the Thumb bit. */ if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION) && INTVAL (offset) != 0) return true; if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P && !offset_within_block_p (base, INTVAL (offset))) return true; } return arm_tls_referenced_p (x); } #define REG_OR_SUBREG_REG(X) \ (REG_P (X) \ || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X)))) #define REG_OR_SUBREG_RTX(X) \ (REG_P (X) ? (X) : SUBREG_REG (X)) static inline int thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) { machine_mode mode = GET_MODE (x); int total, words; switch (code) { case ASHIFT: case ASHIFTRT: case LSHIFTRT: case ROTATERT: return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); case PLUS: case MINUS: case COMPARE: case NEG: case NOT: return COSTS_N_INSNS (1); case MULT: if (arm_arch6m && arm_m_profile_small_mul) return COSTS_N_INSNS (32); if (CONST_INT_P (XEXP (x, 1))) { int cycles = 0; unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1)); while (i) { i >>= 2; cycles++; } return COSTS_N_INSNS (2) + cycles; } return COSTS_N_INSNS (1) + 16; case SET: /* A SET doesn't have a mode, so let's look at the SET_DEST to get the mode. */ words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); return (COSTS_N_INSNS (words) + 4 * ((MEM_P (SET_SRC (x))) + MEM_P (SET_DEST (x)))); case CONST_INT: if (outer == SET) { if (UINTVAL (x) < 256 /* 16-bit constant. */ || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))) return 0; if (thumb_shiftable_const (INTVAL (x))) return COSTS_N_INSNS (2); return arm_disable_literal_pool ? COSTS_N_INSNS (8) : COSTS_N_INSNS (3); } else if ((outer == PLUS || outer == COMPARE) && INTVAL (x) < 256 && INTVAL (x) > -256) return 0; else if ((outer == IOR || outer == XOR || outer == AND) && INTVAL (x) < 256 && INTVAL (x) >= -256) return COSTS_N_INSNS (1); else if (outer == AND) { int i; /* This duplicates the tests in the andsi3 expander. */ for (i = 9; i <= 31; i++) if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x) || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x)) return COSTS_N_INSNS (2); } else if (outer == ASHIFT || outer == ASHIFTRT || outer == LSHIFTRT) return 0; return COSTS_N_INSNS (2); case CONST: case CONST_DOUBLE: case LABEL_REF: case SYMBOL_REF: return COSTS_N_INSNS (3); case UDIV: case UMOD: case DIV: case MOD: return 100; case TRUNCATE: return 99; case AND: case XOR: case IOR: /* XXX guess. */ return 8; case MEM: /* XXX another guess. */ /* Memory costs quite a lot for the first word, but subsequent words load at the equivalent of a single insn each. */ return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) ? 4 : 0)); case IF_THEN_ELSE: /* XXX a guess. */ if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) return 14; return 2; case SIGN_EXTEND: case ZERO_EXTEND: total = mode == DImode ? COSTS_N_INSNS (1) : 0; total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code); if (mode == SImode) return total; if (arm_arch6) return total + COSTS_N_INSNS (1); /* Assume a two-shift sequence. Increase the cost slightly so we prefer actual shifts over an extend operation. */ return total + 1 + COSTS_N_INSNS (2); default: return 99; } } /* Estimates the size cost of thumb1 instructions. For now most of the code is copied from thumb1_rtx_costs. We need more fine grain tuning when we have more related test cases. */ static inline int thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) { machine_mode mode = GET_MODE (x); int words, cost; switch (code) { case ASHIFT: case ASHIFTRT: case LSHIFTRT: case ROTATERT: return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2); case PLUS: case MINUS: /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1 defined by RTL expansion, especially for the expansion of multiplication. */ if ((GET_CODE (XEXP (x, 0)) == MULT && power_of_two_operand (XEXP (XEXP (x,0),1), SImode)) || (GET_CODE (XEXP (x, 1)) == MULT && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))) return COSTS_N_INSNS (2); /* Fall through. */ case COMPARE: case NEG: case NOT: return COSTS_N_INSNS (1); case MULT: if (CONST_INT_P (XEXP (x, 1))) { /* Thumb1 mul instruction can't operate on const. We must Load it into a register first. */ int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); /* For the targets which have a very small and high-latency multiply unit, we prefer to synthesize the mult with up to 5 instructions, giving a good balance between size and performance. */ if (arm_arch6m && arm_m_profile_small_mul) return COSTS_N_INSNS (5); else return COSTS_N_INSNS (1) + const_size; } return COSTS_N_INSNS (1); case SET: /* A SET doesn't have a mode, so let's look at the SET_DEST to get the mode. */ words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x)))); cost = COSTS_N_INSNS (words); if (satisfies_constraint_J (SET_SRC (x)) || satisfies_constraint_K (SET_SRC (x)) /* Too big an immediate for a 2-byte mov, using MOVT. */ || (CONST_INT_P (SET_SRC (x)) && UINTVAL (SET_SRC (x)) >= 256 && TARGET_HAVE_MOVT && satisfies_constraint_j (SET_SRC (x))) /* thumb1_movdi_insn. */ || ((words > 1) && MEM_P (SET_SRC (x)))) cost += COSTS_N_INSNS (1); return cost; case CONST_INT: if (outer == SET) { if (UINTVAL (x) < 256) return COSTS_N_INSNS (1); /* movw is 4byte long. */ if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)) return COSTS_N_INSNS (2); /* See split "TARGET_THUMB1 && satisfies_constraint_J". */ if (INTVAL (x) >= -255 && INTVAL (x) <= -1) return COSTS_N_INSNS (2); /* See split "TARGET_THUMB1 && satisfies_constraint_K". */ if (thumb_shiftable_const (INTVAL (x))) return COSTS_N_INSNS (2); return arm_disable_literal_pool ? COSTS_N_INSNS (8) : COSTS_N_INSNS (3); } else if ((outer == PLUS || outer == COMPARE) && INTVAL (x) < 256 && INTVAL (x) > -256) return 0; else if ((outer == IOR || outer == XOR || outer == AND) && INTVAL (x) < 256 && INTVAL (x) >= -256) return COSTS_N_INSNS (1); else if (outer == AND) { int i; /* This duplicates the tests in the andsi3 expander. */ for (i = 9; i <= 31; i++) if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x) || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x)) return COSTS_N_INSNS (2); } else if (outer == ASHIFT || outer == ASHIFTRT || outer == LSHIFTRT) return 0; return COSTS_N_INSNS (2); case CONST: case CONST_DOUBLE: case LABEL_REF: case SYMBOL_REF: return COSTS_N_INSNS (3); case UDIV: case UMOD: case DIV: case MOD: return 100; case TRUNCATE: return 99; case AND: case XOR: case IOR: return COSTS_N_INSNS (1); case MEM: return (COSTS_N_INSNS (1) + COSTS_N_INSNS (1) * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) ? COSTS_N_INSNS (1) : 0)); case IF_THEN_ELSE: /* XXX a guess. */ if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) return 14; return 2; case ZERO_EXTEND: /* XXX still guessing. */ switch (GET_MODE (XEXP (x, 0))) { case E_QImode: return (1 + (mode == DImode ? 4 : 0) + (MEM_P (XEXP (x, 0)) ? 10 : 0)); case E_HImode: return (4 + (mode == DImode ? 4 : 0) + (MEM_P (XEXP (x, 0)) ? 10 : 0)); case E_SImode: return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0)); default: return 99; } default: return 99; } } /* Helper function for arm_rtx_costs. If one operand of the OP, a PLUS, adds the carry flag, then return the other operand. If neither is a carry, return OP unchanged. */ static rtx strip_carry_operation (rtx op) { gcc_assert (GET_CODE (op) == PLUS); if (arm_carry_operation (XEXP (op, 0), GET_MODE (op))) return XEXP (op, 1); else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op))) return XEXP (op, 0); return op; } /* Helper function for arm_rtx_costs. If the operand is a valid shift operand, then return the operand that is being shifted. If the shift is not by a constant, then set SHIFT_REG to point to the operand. Return NULL if OP is not a shifter operand. */ static rtx shifter_op_p (rtx op, rtx *shift_reg) { enum rtx_code code = GET_CODE (op); if (code == MULT && CONST_INT_P (XEXP (op, 1)) && exact_log2 (INTVAL (XEXP (op, 1))) > 0) return XEXP (op, 0); else if (code == ROTATE && CONST_INT_P (XEXP (op, 1))) return XEXP (op, 0); else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT || code == ASHIFTRT) { if (!CONST_INT_P (XEXP (op, 1))) *shift_reg = XEXP (op, 1); return XEXP (op, 0); } return NULL; } static bool arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost) { const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost; rtx_code code = GET_CODE (x); gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE); switch (XINT (x, 1)) { case UNSPEC_UNALIGNED_LOAD: /* We can only do unaligned loads into the integer unit, and we can't use LDM or LDRD. */ *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); if (speed_p) *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load + extra_cost->ldst.load_unaligned); #ifdef NOT_YET *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), ADDR_SPACE_GENERIC, speed_p); #endif return true; case UNSPEC_UNALIGNED_STORE: *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x))); if (speed_p) *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store + extra_cost->ldst.store_unaligned); *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p); #ifdef NOT_YET *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x), ADDR_SPACE_GENERIC, speed_p); #endif return true; case UNSPEC_VRINTZ: case UNSPEC_VRINTP: case UNSPEC_VRINTM: case UNSPEC_VRINTR: case UNSPEC_VRINTX: case UNSPEC_VRINTA: if (speed_p) *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint; return true; default: *cost = COSTS_N_INSNS (2); break; } return true; } /* Cost of a libcall. We assume one insn per argument, an amount for the call (one insn for -Os) and then one for processing the result. */ #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2)) #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \ do \ { \ shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \ if (shift_op != NULL \ && arm_rtx_shift_left_p (XEXP (x, IDX))) \ { \ if (shift_reg) \ { \ if (speed_p) \ *cost += extra_cost->alu.arith_shift_reg; \ *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \ ASHIFT, 1, speed_p); \ } \ else if (speed_p) \ *cost += extra_cost->alu.arith_shift; \ \ *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \ ASHIFT, 0, speed_p) \ + rtx_cost (XEXP (x, 1 - IDX), \ GET_MODE (shift_op), \ OP, 1, speed_p)); \ return true; \ } \ } \ while (0) /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM, considering the costs of the addressing mode and memory access separately. */ static bool arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost, int *cost, bool speed_p) { machine_mode mode = GET_MODE (x); *cost = COSTS_N_INSNS (1); if (flag_pic && GET_CODE (XEXP (x, 0)) == PLUS && will_be_in_index_register (XEXP (XEXP (x, 0), 1))) /* This will be split into two instructions. Add the cost of the additional instruction here. The cost of the memory access is computed below. See arm.md:calculate_pic_address. */ *cost += COSTS_N_INSNS (1); /* Calculate cost of the addressing mode. */ if (speed_p) { arm_addr_mode_op op_type; switch (GET_CODE (XEXP (x, 0))) { default: case REG: op_type = AMO_DEFAULT; break; case MINUS: /* MINUS does not appear in RTL, but the architecture supports it, so handle this case defensively. */ /* fall through */ case PLUS: op_type = AMO_NO_WB; break; case PRE_INC: case PRE_DEC: case POST_INC: case POST_DEC: case PRE_MODIFY: case POST_MODIFY: op_type = AMO_WB; break; } if (VECTOR_MODE_P (mode)) *cost += current_tune->addr_mode_costs->vector[op_type]; else if (FLOAT_MODE_P (mode)) *cost += current_tune->addr_mode_costs->fp[op_type]; else *cost += current_tune->addr_mode_costs->integer[op_type]; } /* Calculate cost of memory access. */ if (speed_p) { if (FLOAT_MODE_P (mode)) { if (GET_MODE_SIZE (mode) == 8) *cost += extra_cost->ldst.loadd; else *cost += extra_cost->ldst.loadf; } else if (VECTOR_MODE_P (mode)) *cost += extra_cost->ldst.loadv; else { /* Integer modes */ if (GET_MODE_SIZE (mode) == 8) *cost += extra_cost->ldst.ldrd; else *cost += extra_cost->ldst.load; } } return true; } /* RTX costs. Make an estimate of the cost of executing the operation X, which is contained within an operation with code OUTER_CODE. SPEED_P indicates whether the cost desired is the performance cost, or the size cost. The estimate is stored in COST and the return value is TRUE if the cost calculation is final, or FALSE if the caller should recurse through the operands of X to add additional costs. We currently make no attempt to model the size savings of Thumb-2 16-bit instructions. At the normal points in compilation where this code is called we have no measure of whether the condition flags are live or not, and thus no realistic way to determine what the size will eventually be. */ static bool arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code, const struct cpu_cost_table *extra_cost, int *cost, bool speed_p) { machine_mode mode = GET_MODE (x); *cost = COSTS_N_INSNS (1); if (TARGET_THUMB1) { if (speed_p) *cost = thumb1_rtx_costs (x, code, outer_code); else *cost = thumb1_size_rtx_costs (x, code, outer_code); return true; } switch (code) { case SET: *cost = 0; /* SET RTXs don't have a mode so we get it from the destination. */ mode = GET_MODE (SET_DEST (x)); if (REG_P (SET_SRC (x)) && REG_P (SET_DEST (x))) { /* Assume that most copies can be done with a single insn, unless we don't have HW FP, in which case everything larger than word mode will require two insns. */ *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT && GET_MODE_SIZE (mode) > 4) || mode == DImode) ? 2 : 1); /* Conditional register moves can be encoded in 16 bits in Thumb mode. */ if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC) *cost >>= 1; return true; } if (CONST_INT_P (SET_SRC (x))) { /* Handle CONST_INT here, since the value doesn't have a mode and we would otherwise be unable to work out the true cost. */ *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET, 0, speed_p); outer_code = SET; /* Slightly lower the cost of setting a core reg to a constant. This helps break up chains and allows for better scheduling. */ if (REG_P (SET_DEST (x)) && REGNO (SET_DEST (x)) <= LR_REGNUM) *cost -= 1; x = SET_SRC (x); /* Immediate moves with an immediate in the range [0, 255] can be encoded in 16 bits in Thumb mode. */ if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode && INTVAL (x) >= 0 && INTVAL (x) <=255) *cost >>= 1; goto const_int_cost; } return false; case MEM: return arm_mem_costs (x, extra_cost, cost, speed_p); case PARALLEL: { /* Calculations of LDM costs are complex. We assume an initial cost (ldm_1st) which will load the number of registers mentioned in ldm_regs_per_insn_1st registers; then each additional ldm_regs_per_insn_subsequent registers cost one more insn. The formula for N regs is thus: ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0) + ldm_regs_per_insn_subsequent - 1) / ldm_regs_per_insn_subsequent). Additional costs may also be added for addressing. A similar formula is used for STM. */ bool is_ldm = load_multiple_operation (x, SImode); bool is_stm = store_multiple_operation (x, SImode); if (is_ldm || is_stm) { if (speed_p) { HOST_WIDE_INT nregs = XVECLEN (x, 0); HOST_WIDE_INT regs_per_insn_1st = is_ldm ? extra_cost->ldst.ldm_regs_per_insn_1st : extra_cost->ldst.stm_regs_per_insn_1st; HOST_WIDE_INT regs_per_insn_sub = is_ldm ? extra_cost->ldst.ldm_regs_per_insn_subsequent : extra_cost->ldst.stm_regs_per_insn_subsequent; *cost += regs_per_insn_1st + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0)) + regs_per_insn_sub - 1) / regs_per_insn_sub); return true; } } return false; } case DIV: case UDIV: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) *cost += COSTS_N_INSNS (speed_p ? extra_cost->fp[mode != SFmode].div : 0); else if (mode == SImode && TARGET_IDIV) *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0); else *cost = LIBCALL_COST (2); /* Make the cost of sdiv more expensive so when both sdiv and udiv are possible udiv is prefered. */ *cost += (code == DIV ? COSTS_N_INSNS (1) : 0); return false; /* All arguments must be in registers. */ case MOD: /* MOD by a power of 2 can be expanded as: rsbs r1, r0, #0 and r0, r0, #(n - 1) and r1, r1, #(n - 1) rsbpl r0, r1, #0. */ if (CONST_INT_P (XEXP (x, 1)) && exact_log2 (INTVAL (XEXP (x, 1))) > 0 && mode == SImode) { *cost += COSTS_N_INSNS (3); if (speed_p) *cost += 2 * extra_cost->alu.logical + extra_cost->alu.arith; return true; } /* Fall-through. */ case UMOD: /* Make the cost of sdiv more expensive so when both sdiv and udiv are possible udiv is prefered. */ *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0); return false; /* All arguments must be in registers. */ case ROTATE: if (mode == SImode && REG_P (XEXP (x, 1))) { *cost += (COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p)); if (speed_p) *cost += extra_cost->alu.shift_reg; return true; } /* Fall through */ case ROTATERT: case ASHIFT: case LSHIFTRT: case ASHIFTRT: if (mode == DImode && CONST_INT_P (XEXP (x, 1))) { *cost += (COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p)); if (speed_p) *cost += 2 * extra_cost->alu.shift; /* Slightly disparage left shift by 1 at so we prefer adddi3. */ if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode)) *cost += 1; return true; } else if (mode == SImode) { *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); /* Slightly disparage register shifts at -Os, but not by much. */ if (!CONST_INT_P (XEXP (x, 1))) *cost += (speed_p ? extra_cost->alu.shift_reg : 1 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p)); return true; } else if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4) { if (code == ASHIFT) { *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); /* Slightly disparage register shifts at -Os, but not by much. */ if (!CONST_INT_P (XEXP (x, 1))) *cost += (speed_p ? extra_cost->alu.shift_reg : 1 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p)); } else if (code == LSHIFTRT || code == ASHIFTRT) { if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1))) { /* Can use SBFX/UBFX. */ if (speed_p) *cost += extra_cost->alu.bfx; *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); } else { *cost += COSTS_N_INSNS (1); *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); if (speed_p) { if (CONST_INT_P (XEXP (x, 1))) *cost += 2 * extra_cost->alu.shift; else *cost += (extra_cost->alu.shift + extra_cost->alu.shift_reg); } else /* Slightly disparage register shifts. */ *cost += !CONST_INT_P (XEXP (x, 1)); } } else /* Rotates. */ { *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1))); *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); if (speed_p) { if (CONST_INT_P (XEXP (x, 1))) *cost += (2 * extra_cost->alu.shift + extra_cost->alu.log_shift); else *cost += (extra_cost->alu.shift + extra_cost->alu.shift_reg + extra_cost->alu.log_shift_reg); } } return true; } *cost = LIBCALL_COST (2); return false; case BSWAP: if (arm_arch6) { if (mode == SImode) { if (speed_p) *cost += extra_cost->alu.rev; return false; } } else { /* No rev instruction available. Look at arm_legacy_rev and thumb_legacy_rev for the form of RTL used then. */ if (TARGET_THUMB) { *cost += COSTS_N_INSNS (9); if (speed_p) { *cost += 6 * extra_cost->alu.shift; *cost += 3 * extra_cost->alu.logical; } } else { *cost += COSTS_N_INSNS (4); if (speed_p) { *cost += 2 * extra_cost->alu.shift; *cost += extra_cost->alu.arith_shift; *cost += 2 * extra_cost->alu.logical; } } return true; } return false; case MINUS: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) { if (GET_CODE (XEXP (x, 0)) == MULT || GET_CODE (XEXP (x, 1)) == MULT) { rtx mul_op0, mul_op1, sub_op; if (speed_p) *cost += extra_cost->fp[mode != SFmode].mult_addsub; if (GET_CODE (XEXP (x, 0)) == MULT) { mul_op0 = XEXP (XEXP (x, 0), 0); mul_op1 = XEXP (XEXP (x, 0), 1); sub_op = XEXP (x, 1); } else { mul_op0 = XEXP (XEXP (x, 1), 0); mul_op1 = XEXP (XEXP (x, 1), 1); sub_op = XEXP (x, 0); } /* The first operand of the multiply may be optionally negated. */ if (GET_CODE (mul_op0) == NEG) mul_op0 = XEXP (mul_op0, 0); *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p) + rtx_cost (mul_op1, mode, code, 0, speed_p) + rtx_cost (sub_op, mode, code, 0, speed_p)); return true; } if (speed_p) *cost += extra_cost->fp[mode != SFmode].addsub; return false; } if (mode == SImode) { rtx shift_by_reg = NULL; rtx shift_op; rtx non_shift_op; rtx op0 = XEXP (x, 0); rtx op1 = XEXP (x, 1); /* Factor out any borrow operation. There's more than one way of expressing this; try to recognize them all. */ if (GET_CODE (op0) == MINUS) { if (arm_borrow_operation (op1, SImode)) { op1 = XEXP (op0, 1); op0 = XEXP (op0, 0); } else if (arm_borrow_operation (XEXP (op0, 1), SImode)) op0 = XEXP (op0, 0); } else if (GET_CODE (op1) == PLUS && arm_borrow_operation (XEXP (op1, 0), SImode)) op1 = XEXP (op1, 0); else if (GET_CODE (op0) == NEG && arm_borrow_operation (op1, SImode)) { /* Negate with carry-in. For Thumb2 this is done with SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the RSC instruction that exists in Arm mode. */ if (speed_p) *cost += (TARGET_THUMB2 ? extra_cost->alu.arith_shift : extra_cost->alu.arith); *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p); return true; } /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm. Note we do mean ~borrow here. */ else if (TARGET_ARM && arm_carry_operation (op0, SImode)) { *cost += rtx_cost (op1, mode, code, 1, speed_p); return true; } shift_op = shifter_op_p (op0, &shift_by_reg); if (shift_op == NULL) { shift_op = shifter_op_p (op1, &shift_by_reg); non_shift_op = op0; } else non_shift_op = op1; if (shift_op != NULL) { if (shift_by_reg != NULL) { if (speed_p) *cost += extra_cost->alu.arith_shift_reg; *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p); } else if (speed_p) *cost += extra_cost->alu.arith_shift; *cost += rtx_cost (shift_op, mode, code, 0, speed_p); *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p); return true; } if (arm_arch_thumb2 && GET_CODE (XEXP (x, 1)) == MULT) { /* MLS. */ if (speed_p) *cost += extra_cost->mult[0].add; *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p); *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p); *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p); return true; } if (CONST_INT_P (op0)) { int insns = arm_gen_constant (MINUS, SImode, NULL_RTX, INTVAL (op0), NULL_RTX, NULL_RTX, 1, 0); *cost = COSTS_N_INSNS (insns); if (speed_p) *cost += insns * extra_cost->alu.arith; *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p); return true; } else if (speed_p) *cost += extra_cost->alu.arith; /* Don't recurse as we don't want to cost any borrow that we've stripped. */ *cost += rtx_cost (op0, mode, MINUS, 0, speed_p); *cost += rtx_cost (op1, mode, MINUS, 1, speed_p); return true; } if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4) { rtx shift_op, shift_reg; shift_reg = NULL; /* We check both sides of the MINUS for shifter operands since, unlike PLUS, it's not commutative. */ HANDLE_NARROW_SHIFT_ARITH (MINUS, 0); HANDLE_NARROW_SHIFT_ARITH (MINUS, 1); /* Slightly disparage, as we might need to widen the result. */ *cost += 1; if (speed_p) *cost += extra_cost->alu.arith; if (CONST_INT_P (XEXP (x, 0))) { *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p); return true; } return false; } if (mode == DImode) { *cost += COSTS_N_INSNS (1); if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND) { rtx op1 = XEXP (x, 1); if (speed_p) *cost += 2 * extra_cost->alu.arith; if (GET_CODE (op1) == ZERO_EXTEND) *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND, 0, speed_p); else *cost += rtx_cost (op1, mode, MINUS, 1, speed_p); *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND, 0, speed_p); return true; } else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) { if (speed_p) *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift; *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p)); return true; } else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND) { if (speed_p) *cost += (extra_cost->alu.arith + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND ? extra_cost->alu.arith : extra_cost->alu.arith_shift)); *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p) + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode, GET_CODE (XEXP (x, 1)), 0, speed_p)); return true; } if (speed_p) *cost += 2 * extra_cost->alu.arith; return false; } /* Vector mode? */ *cost = LIBCALL_COST (2); return false; case PLUS: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) { if (GET_CODE (XEXP (x, 0)) == MULT) { rtx mul_op0, mul_op1, add_op; if (speed_p) *cost += extra_cost->fp[mode != SFmode].mult_addsub; mul_op0 = XEXP (XEXP (x, 0), 0); mul_op1 = XEXP (XEXP (x, 0), 1); add_op = XEXP (x, 1); *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p) + rtx_cost (mul_op1, mode, code, 0, speed_p) + rtx_cost (add_op, mode, code, 0, speed_p)); return true; } if (speed_p) *cost += extra_cost->fp[mode != SFmode].addsub; return false; } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) { *cost = LIBCALL_COST (2); return false; } /* Narrow modes can be synthesized in SImode, but the range of useful sub-operations is limited. Check for shift operations on one of the operands. Only left shifts can be used in the narrow modes. */ if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4) { rtx shift_op, shift_reg; shift_reg = NULL; HANDLE_NARROW_SHIFT_ARITH (PLUS, 0); if (CONST_INT_P (XEXP (x, 1))) { int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, INTVAL (XEXP (x, 1)), NULL_RTX, NULL_RTX, 1, 0); *cost = COSTS_N_INSNS (insns); if (speed_p) *cost += insns * extra_cost->alu.arith; /* Slightly penalize a narrow operation as the result may need widening. */ *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p); return true; } /* Slightly penalize a narrow operation as the result may need widening. */ *cost += 1; if (speed_p) *cost += extra_cost->alu.arith; return false; } if (mode == SImode) { rtx shift_op, shift_reg; if (TARGET_INT_SIMD && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)) { /* UXTA[BH] or SXTA[BH]. */ if (speed_p) *cost += extra_cost->alu.extend_arith; *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p)); return true; } rtx op0 = XEXP (x, 0); rtx op1 = XEXP (x, 1); /* Handle a side effect of adding in the carry to an addition. */ if (GET_CODE (op0) == PLUS && arm_carry_operation (op1, mode)) { op1 = XEXP (op0, 1); op0 = XEXP (op0, 0); } else if (GET_CODE (op1) == PLUS && arm_carry_operation (op0, mode)) { op0 = XEXP (op1, 0); op1 = XEXP (op1, 1); } else if (GET_CODE (op0) == PLUS) { op0 = strip_carry_operation (op0); if (swap_commutative_operands_p (op0, op1)) std::swap (op0, op1); } if (arm_carry_operation (op0, mode)) { /* Adding the carry to a register is a canonicalization of adding 0 to the register plus the carry. */ if (speed_p) *cost += extra_cost->alu.arith; *cost += rtx_cost (op1, mode, PLUS, 1, speed_p); return true; } shift_reg = NULL; shift_op = shifter_op_p (op0, &shift_reg); if (shift_op != NULL) { if (shift_reg) { if (speed_p) *cost += extra_cost->alu.arith_shift_reg; *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p); } else if (speed_p) *cost += extra_cost->alu.arith_shift; *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p) + rtx_cost (op1, mode, PLUS, 1, speed_p)); return true; } if (GET_CODE (op0) == MULT) { rtx mul_op = op0; if (TARGET_DSP_MULTIPLY && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16))) || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1)) && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1)) && (INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))))) { /* SMLA[BT][BT]. */ if (speed_p) *cost += extra_cost->mult[0].extend_add; *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode, SIGN_EXTEND, 0, speed_p) + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode, SIGN_EXTEND, 0, speed_p) + rtx_cost (op1, mode, PLUS, 1, speed_p)); return true; } if (speed_p) *cost += extra_cost->mult[0].add; *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p) + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p) + rtx_cost (op1, mode, PLUS, 1, speed_p)); return true; } if (CONST_INT_P (op1)) { int insns = arm_gen_constant (PLUS, SImode, NULL_RTX, INTVAL (op1), NULL_RTX, NULL_RTX, 1, 0); *cost = COSTS_N_INSNS (insns); if (speed_p) *cost += insns * extra_cost->alu.arith; *cost += rtx_cost (op0, mode, PLUS, 0, speed_p); return true; } if (speed_p) *cost += extra_cost->alu.arith; /* Don't recurse here because we want to test the operands without any carry operation. */ *cost += rtx_cost (op0, mode, PLUS, 0, speed_p); *cost += rtx_cost (op1, mode, PLUS, 1, speed_p); return true; } if (mode == DImode) { if (GET_CODE (XEXP (x, 0)) == MULT && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND) || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND))) { if (speed_p) *cost += extra_cost->mult[1].extend_add; *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, ZERO_EXTEND, 0, speed_p) + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode, ZERO_EXTEND, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p)); return true; } *cost += COSTS_N_INSNS (1); if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND) { if (speed_p) *cost += (extra_cost->alu.arith + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND ? extra_cost->alu.arith : extra_cost->alu.arith_shift)); *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p)); return true; } if (speed_p) *cost += 2 * extra_cost->alu.arith; return false; } /* Vector mode? */ *cost = LIBCALL_COST (2); return false; case IOR: if (mode == SImode && arm_arch6 && aarch_rev16_p (x)) { if (speed_p) *cost += extra_cost->alu.rev; return true; } /* Fall through. */ case AND: case XOR: if (mode == SImode) { enum rtx_code subcode = GET_CODE (XEXP (x, 0)); rtx op0 = XEXP (x, 0); rtx shift_op, shift_reg; if (subcode == NOT && (code == AND || (code == IOR && TARGET_THUMB2))) op0 = XEXP (op0, 0); shift_reg = NULL; shift_op = shifter_op_p (op0, &shift_reg); if (shift_op != NULL) { if (shift_reg) { if (speed_p) *cost += extra_cost->alu.log_shift_reg; *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p); } else if (speed_p) *cost += extra_cost->alu.log_shift; *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p)); return true; } if (CONST_INT_P (XEXP (x, 1))) { int insns = arm_gen_constant (code, SImode, NULL_RTX, INTVAL (XEXP (x, 1)), NULL_RTX, NULL_RTX, 1, 0); *cost = COSTS_N_INSNS (insns); if (speed_p) *cost += insns * extra_cost->alu.logical; *cost += rtx_cost (op0, mode, code, 0, speed_p); return true; } if (speed_p) *cost += extra_cost->alu.logical; *cost += (rtx_cost (op0, mode, code, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p)); return true; } if (mode == DImode) { rtx op0 = XEXP (x, 0); enum rtx_code subcode = GET_CODE (op0); *cost += COSTS_N_INSNS (1); if (subcode == NOT && (code == AND || (code == IOR && TARGET_THUMB2))) op0 = XEXP (op0, 0); if (GET_CODE (op0) == ZERO_EXTEND) { if (speed_p) *cost += 2 * extra_cost->alu.logical; *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p)); return true; } else if (GET_CODE (op0) == SIGN_EXTEND) { if (speed_p) *cost += extra_cost->alu.logical + extra_cost->alu.log_shift; *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p)); return true; } if (speed_p) *cost += 2 * extra_cost->alu.logical; return true; } /* Vector mode? */ *cost = LIBCALL_COST (2); return false; case MULT: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) { rtx op0 = XEXP (x, 0); if (GET_CODE (op0) == NEG && !flag_rounding_math) op0 = XEXP (op0, 0); if (speed_p) *cost += extra_cost->fp[mode != SFmode].mult; *cost += (rtx_cost (op0, mode, MULT, 0, speed_p) + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p)); return true; } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) { *cost = LIBCALL_COST (2); return false; } if (mode == SImode) { if (TARGET_DSP_MULTIPLY && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND || (GET_CODE (XEXP (x, 1)) == ASHIFTRT && CONST_INT_P (XEXP (XEXP (x, 1), 1)) && INTVAL (XEXP (XEXP (x, 1), 1)) == 16))) || (GET_CODE (XEXP (x, 0)) == ASHIFTRT && CONST_INT_P (XEXP (XEXP (x, 0), 1)) && INTVAL (XEXP (XEXP (x, 0), 1)) == 16 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND || (GET_CODE (XEXP (x, 1)) == ASHIFTRT && CONST_INT_P (XEXP (XEXP (x, 1), 1)) && (INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))))) { /* SMUL[TB][TB]. */ if (speed_p) *cost += extra_cost->mult[0].extend; *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, SIGN_EXTEND, 0, speed_p); *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, SIGN_EXTEND, 1, speed_p); return true; } if (speed_p) *cost += extra_cost->mult[0].simple; return false; } if (mode == DImode) { if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND) || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)) { if (speed_p) *cost += extra_cost->mult[1].extend; *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND, 0, speed_p) + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode, ZERO_EXTEND, 0, speed_p)); return true; } *cost = LIBCALL_COST (2); return false; } /* Vector mode? */ *cost = LIBCALL_COST (2); return false; case NEG: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) { if (GET_CODE (XEXP (x, 0)) == MULT) { /* VNMUL. */ *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p); return true; } if (speed_p) *cost += extra_cost->fp[mode != SFmode].neg; return false; } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) { *cost = LIBCALL_COST (1); return false; } if (mode == SImode) { if (GET_CODE (XEXP (x, 0)) == ABS) { *cost += COSTS_N_INSNS (1); /* Assume the non-flag-changing variant. */ if (speed_p) *cost += (extra_cost->alu.log_shift + extra_cost->alu.arith_shift); *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p); return true; } if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE) { *cost += COSTS_N_INSNS (1); /* No extra cost for MOV imm and MVN imm. */ /* If the comparison op is using the flags, there's no further cost, otherwise we need to add the cost of the comparison. */ if (!(REG_P (XEXP (XEXP (x, 0), 0)) && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM && XEXP (XEXP (x, 0), 1) == const0_rtx)) { mode = GET_MODE (XEXP (XEXP (x, 0), 0)); *cost += (COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE, 0, speed_p) + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE, 1, speed_p)); if (speed_p) *cost += extra_cost->alu.arith; } return true; } if (speed_p) *cost += extra_cost->alu.arith; return false; } if (GET_MODE_CLASS (mode) == MODE_INT && GET_MODE_SIZE (mode) < 4) { /* Slightly disparage, as we might need an extend operation. */ *cost += 1; if (speed_p) *cost += extra_cost->alu.arith; return false; } if (mode == DImode) { *cost += COSTS_N_INSNS (1); if (speed_p) *cost += 2 * extra_cost->alu.arith; return false; } /* Vector mode? */ *cost = LIBCALL_COST (1); return false; case NOT: if (mode == SImode) { rtx shift_op; rtx shift_reg = NULL; shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); if (shift_op) { if (shift_reg != NULL) { if (speed_p) *cost += extra_cost->alu.log_shift_reg; *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p); } else if (speed_p) *cost += extra_cost->alu.log_shift; *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p); return true; } if (speed_p) *cost += extra_cost->alu.logical; return false; } if (mode == DImode) { *cost += COSTS_N_INSNS (1); return false; } /* Vector mode? */ *cost += LIBCALL_COST (1); return false; case IF_THEN_ELSE: { if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) { *cost += COSTS_N_INSNS (3); return true; } int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p); int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p); *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p); /* Assume that if one arm of the if_then_else is a register, that it will be tied with the result and eliminate the conditional insn. */ if (REG_P (XEXP (x, 1))) *cost += op2cost; else if (REG_P (XEXP (x, 2))) *cost += op1cost; else { if (speed_p) { if (extra_cost->alu.non_exec_costs_exec) *cost += op1cost + op2cost + extra_cost->alu.non_exec; else *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec; } else *cost += op1cost + op2cost; } } return true; case COMPARE: if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx) *cost = 0; else { machine_mode op0mode; /* We'll mostly assume that the cost of a compare is the cost of the LHS. However, there are some notable exceptions. */ /* Floating point compares are never done as side-effects. */ op0mode = GET_MODE (XEXP (x, 0)); if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT && (op0mode == SFmode || !TARGET_VFP_SINGLE)) { if (speed_p) *cost += extra_cost->fp[op0mode != SFmode].compare; if (XEXP (x, 1) == CONST0_RTX (op0mode)) { *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p); return true; } return false; } else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT) { *cost = LIBCALL_COST (2); return false; } /* DImode compares normally take two insns. */ if (op0mode == DImode) { *cost += COSTS_N_INSNS (1); if (speed_p) *cost += 2 * extra_cost->alu.arith; return false; } if (op0mode == SImode) { rtx shift_op; rtx shift_reg; if (XEXP (x, 1) == const0_rtx && !(REG_P (XEXP (x, 0)) || (GET_CODE (XEXP (x, 0)) == SUBREG && REG_P (SUBREG_REG (XEXP (x, 0)))))) { *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p); /* Multiply operations that set the flags are often significantly more expensive. */ if (speed_p && GET_CODE (XEXP (x, 0)) == MULT && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode)) *cost += extra_cost->mult[0].flag_setting; if (speed_p && GET_CODE (XEXP (x, 0)) == PLUS && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0), 0), 1), mode)) *cost += extra_cost->mult[0].flag_setting; return true; } shift_reg = NULL; shift_op = shifter_op_p (XEXP (x, 0), &shift_reg); if (shift_op != NULL) { if (shift_reg != NULL) { *cost += rtx_cost (shift_reg, op0mode, ASHIFT, 1, speed_p); if (speed_p) *cost += extra_cost->alu.arith_shift_reg; } else if (speed_p) *cost += extra_cost->alu.arith_shift; *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p); *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p); return true; } if (speed_p) *cost += extra_cost->alu.arith; if (CONST_INT_P (XEXP (x, 1)) && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) { *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p); return true; } return false; } /* Vector mode? */ *cost = LIBCALL_COST (2); return false; } return true; case EQ: case NE: case LT: case LE: case GT: case GE: case LTU: case LEU: case GEU: case GTU: case ORDERED: case UNORDERED: case UNEQ: case UNLE: case UNLT: case UNGE: case UNGT: case LTGT: if (outer_code == SET) { /* Is it a store-flag operation? */ if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM && XEXP (x, 1) == const0_rtx) { /* Thumb also needs an IT insn. */ *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1); return true; } if (XEXP (x, 1) == const0_rtx) { switch (code) { case LT: /* LSR Rd, Rn, #31. */ if (speed_p) *cost += extra_cost->alu.shift; break; case EQ: /* RSBS T1, Rn, #0 ADC Rd, Rn, T1. */ case NE: /* SUBS T1, Rn, #1 SBC Rd, Rn, T1. */ *cost += COSTS_N_INSNS (1); break; case LE: /* RSBS T1, Rn, Rn, LSR #31 ADC Rd, Rn, T1. */ *cost += COSTS_N_INSNS (1); if (speed_p) *cost += extra_cost->alu.arith_shift; break; case GT: /* RSB Rd, Rn, Rn, ASR #1 LSR Rd, Rd, #31. */ *cost += COSTS_N_INSNS (1); if (speed_p) *cost += (extra_cost->alu.arith_shift + extra_cost->alu.shift); break; case GE: /* ASR Rd, Rn, #31 ADD Rd, Rn, #1. */ *cost += COSTS_N_INSNS (1); if (speed_p) *cost += extra_cost->alu.shift; break; default: /* Remaining cases are either meaningless or would take three insns anyway. */ *cost = COSTS_N_INSNS (3); break; } *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); return true; } else { *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2); if (CONST_INT_P (XEXP (x, 1)) && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE)) { *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); return true; } return false; } } /* Not directly inside a set. If it involves the condition code register it must be the condition for a branch, cond_exec or I_T_E operation. Since the comparison is performed elsewhere this is just the control part which has no additional cost. */ else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM && XEXP (x, 1) == const0_rtx) { *cost = 0; return true; } return false; case ABS: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) { if (speed_p) *cost += extra_cost->fp[mode != SFmode].neg; return false; } else if (GET_MODE_CLASS (mode) == MODE_FLOAT) { *cost = LIBCALL_COST (1); return false; } if (mode == SImode) { if (speed_p) *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift; return false; } /* Vector mode? */ *cost = LIBCALL_COST (1); return false; case SIGN_EXTEND: if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode) && MEM_P (XEXP (x, 0))) { if (mode == DImode) *cost += COSTS_N_INSNS (1); if (!speed_p) return true; if (GET_MODE (XEXP (x, 0)) == SImode) *cost += extra_cost->ldst.load; else *cost += extra_cost->ldst.load_sign_extend; if (mode == DImode) *cost += extra_cost->alu.shift; return true; } /* Widening from less than 32-bits requires an extend operation. */ if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) { /* We have SXTB/SXTH. */ *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); if (speed_p) *cost += extra_cost->alu.extend; } else if (GET_MODE (XEXP (x, 0)) != SImode) { /* Needs two shifts. */ *cost += COSTS_N_INSNS (1); *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); if (speed_p) *cost += 2 * extra_cost->alu.shift; } /* Widening beyond 32-bits requires one more insn. */ if (mode == DImode) { *cost += COSTS_N_INSNS (1); if (speed_p) *cost += extra_cost->alu.shift; } return true; case ZERO_EXTEND: if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode || GET_MODE (XEXP (x, 0)) == QImode) && MEM_P (XEXP (x, 0))) { *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); if (mode == DImode) *cost += COSTS_N_INSNS (1); /* No speed penalty. */ return true; } /* Widening from less than 32-bits requires an extend operation. */ if (GET_MODE (XEXP (x, 0)) == QImode) { /* UXTB can be a shorter instruction in Thumb2, but it might be slower than the AND Rd, Rn, #255 alternative. When optimizing for speed it should never be slower to use AND, and we don't really model 16-bit vs 32-bit insns here. */ if (speed_p) *cost += extra_cost->alu.logical; } else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6) { /* We have UXTB/UXTH. */ *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); if (speed_p) *cost += extra_cost->alu.extend; } else if (GET_MODE (XEXP (x, 0)) != SImode) { /* Needs two shifts. It's marginally preferable to use shifts rather than two BIC instructions as the second shift may merge with a subsequent insn as a shifter op. */ *cost = COSTS_N_INSNS (2); *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); if (speed_p) *cost += 2 * extra_cost->alu.shift; } /* Widening beyond 32-bits requires one more insn. */ if (mode == DImode) { *cost += COSTS_N_INSNS (1); /* No speed penalty. */ } return true; case CONST_INT: *cost = 0; /* CONST_INT has no mode, so we cannot tell for sure how many insns are really going to be needed. The best we can do is look at the value passed. If it fits in SImode, then assume that's the mode it will be used for. Otherwise assume it will be used in DImode. */ if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode)) mode = SImode; else mode = DImode; /* Avoid blowing up in arm_gen_constant (). */ if (!(outer_code == PLUS || outer_code == AND || outer_code == IOR || outer_code == XOR || outer_code == MINUS)) outer_code = SET; const_int_cost: if (mode == SImode) { *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL, INTVAL (x), NULL, NULL, 0, 0)); /* Extra costs? */ } else { *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL, trunc_int_for_mode (INTVAL (x), SImode), NULL, NULL, 0, 0) + arm_gen_constant (outer_code, SImode, NULL, INTVAL (x) >> 32, NULL, NULL, 0, 0)); /* Extra costs? */ } return true; case CONST: case LABEL_REF: case SYMBOL_REF: if (speed_p) { if (arm_arch_thumb2 && !flag_pic) *cost += COSTS_N_INSNS (1); else *cost += extra_cost->ldst.load; } else *cost += COSTS_N_INSNS (1); if (flag_pic) { *cost += COSTS_N_INSNS (1); if (speed_p) *cost += extra_cost->alu.arith; } return true; case CONST_FIXED: *cost = COSTS_N_INSNS (4); /* Fixme. */ return true; case CONST_DOUBLE: if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT && (mode == SFmode || !TARGET_VFP_SINGLE)) { if (vfp3_const_double_rtx (x)) { if (speed_p) *cost += extra_cost->fp[mode == DFmode].fpconst; return true; } if (speed_p) { if (mode == DFmode) *cost += extra_cost->ldst.loadd; else *cost += extra_cost->ldst.loadf; } else *cost += COSTS_N_INSNS (1 + (mode == DFmode)); return true; } *cost = COSTS_N_INSNS (4); return true; case CONST_VECTOR: /* Fixme. */ if (TARGET_NEON && TARGET_HARD_FLOAT && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)) && neon_immediate_valid_for_move (x, mode, NULL, NULL)) *cost = COSTS_N_INSNS (1); else *cost = COSTS_N_INSNS (4); return true; case HIGH: case LO_SUM: /* When optimizing for size, we prefer constant pool entries to MOVW/MOVT pairs, so bump the cost of these slightly. */ if (!speed_p) *cost += 1; return true; case CLZ: if (speed_p) *cost += extra_cost->alu.clz; return false; case SMIN: if (XEXP (x, 1) == const0_rtx) { if (speed_p) *cost += extra_cost->alu.log_shift; *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); return true; } /* Fall through. */ case SMAX: case UMIN: case UMAX: *cost += COSTS_N_INSNS (1); return false; case TRUNCATE: if (GET_CODE (XEXP (x, 0)) == ASHIFTRT && CONST_INT_P (XEXP (XEXP (x, 0), 1)) && INTVAL (XEXP (XEXP (x, 0), 1)) == 32 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND) || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == ZERO_EXTEND)))) { if (speed_p) *cost += extra_cost->mult[1].extend; *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode, ZERO_EXTEND, 0, speed_p) + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode, ZERO_EXTEND, 0, speed_p)); return true; } *cost = LIBCALL_COST (1); return false; case UNSPEC_VOLATILE: case UNSPEC: return arm_unspec_cost (x, outer_code, speed_p, cost); case PC: /* Reading the PC is like reading any other register. Writing it is more expensive, but we take that into account elsewhere. */ *cost = 0; return true; case ZERO_EXTRACT: /* TODO: Simple zero_extract of bottom bits using AND. */ /* Fall through. */ case SIGN_EXTRACT: if (arm_arch6 && mode == SImode && CONST_INT_P (XEXP (x, 1)) && CONST_INT_P (XEXP (x, 2))) { if (speed_p) *cost += extra_cost->alu.bfx; *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); return true; } /* Without UBFX/SBFX, need to resort to shift operations. */ *cost += COSTS_N_INSNS (1); if (speed_p) *cost += 2 * extra_cost->alu.shift; *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p); return true; case FLOAT_EXTEND: if (TARGET_HARD_FLOAT) { if (speed_p) *cost += extra_cost->fp[mode == DFmode].widen; if (!TARGET_VFP5 && GET_MODE (XEXP (x, 0)) == HFmode) { /* Pre v8, widening HF->DF is a two-step process, first widening to SFmode. */ *cost += COSTS_N_INSNS (1); if (speed_p) *cost += extra_cost->fp[0].widen; } *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); return true; } *cost = LIBCALL_COST (1); return false; case FLOAT_TRUNCATE: if (TARGET_HARD_FLOAT) { if (speed_p) *cost += extra_cost->fp[mode == DFmode].narrow; *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p); return true; /* Vector modes? */ } *cost = LIBCALL_COST (1); return false; case FMA: if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA) { rtx op0 = XEXP (x, 0); rtx op1 = XEXP (x, 1); rtx op2 = XEXP (x, 2); /* vfms or vfnma. */ if (GET_CODE (op0) == NEG) op0 = XEXP (op0, 0); /* vfnms or vfnma. */ if (GET_CODE (op2) == NEG) op2 = XEXP (op2, 0); *cost += rtx_cost (op0, mode, FMA, 0, speed_p); *cost += rtx_cost (op1, mode, FMA, 1, speed_p); *cost += rtx_cost (op2, mode, FMA, 2, speed_p); if (speed_p) *cost += extra_cost->fp[mode ==DFmode].fma; return true; } *cost = LIBCALL_COST (3); return false; case FIX: case UNSIGNED_FIX: if (TARGET_HARD_FLOAT) { /* The *combine_vcvtf2i reduces a vmul+vcvt into a vcvt fixed-point conversion. */ if (code == FIX && mode == SImode && GET_CODE (XEXP (x, 0)) == FIX && GET_MODE (XEXP (x, 0)) == SFmode && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1)) > 0) { if (speed_p) *cost += extra_cost->fp[0].toint; *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode, code, 0, speed_p); return true; } if (GET_MODE_CLASS (mode) == MODE_INT) { mode = GET_MODE (XEXP (x, 0)); if (speed_p) *cost += extra_cost->fp[mode == DFmode].toint; /* Strip of the 'cost' of rounding towards zero. */ if (GET_CODE (XEXP (x, 0)) == FIX) *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code, 0, speed_p); else *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p); /* ??? Increase the cost to deal with transferring from FP -> CORE registers? */ return true; } else if (GET_MODE_CLASS (mode) == MODE_FLOAT && TARGET_VFP5) { if (speed_p) *cost += extra_cost->fp[mode == DFmode].roundint; return false; } /* Vector costs? */ } *cost = LIBCALL_COST (1); return false; case FLOAT: case UNSIGNED_FLOAT: if (TARGET_HARD_FLOAT) { /* ??? Increase the cost to deal with transferring from CORE -> FP registers? */ if (speed_p) *cost += extra_cost->fp[mode == DFmode].fromint; return false; } *cost = LIBCALL_COST (1); return false; case CALL: return true; case ASM_OPERANDS: { /* Just a guess. Guess number of instructions in the asm plus one insn per input. Always a minimum of COSTS_N_INSNS (1) though (see PR60663). */ int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x))); int num_operands = ASM_OPERANDS_INPUT_LENGTH (x); *cost = COSTS_N_INSNS (asm_length + num_operands); return true; } default: if (mode != VOIDmode) *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode)); else *cost = COSTS_N_INSNS (4); /* Who knows? */ return false; } } #undef HANDLE_NARROW_SHIFT_ARITH /* RTX costs entry point. */ static bool arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code, int opno ATTRIBUTE_UNUSED, int *total, bool speed) { bool result; int code = GET_CODE (x); gcc_assert (current_tune->insn_extra_cost); result = arm_rtx_costs_internal (x, (enum rtx_code) code, (enum rtx_code) outer_code, current_tune->insn_extra_cost, total, speed); if (dump_file && arm_verbose_cost) { print_rtl_single (dump_file, x); fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold", *total, result ? "final" : "partial"); } return result; } static int arm_insn_cost (rtx_insn *insn, bool speed) { int cost; /* Don't cost a simple reg-reg move at a full insn cost: such moves will likely disappear during register allocation. */ if (!reload_completed && GET_CODE (PATTERN (insn)) == SET && REG_P (SET_DEST (PATTERN (insn))) && REG_P (SET_SRC (PATTERN (insn)))) return 2; cost = pattern_cost (PATTERN (insn), speed); /* If the cost is zero, then it's likely a complex insn. We don't want the cost of these to be less than something we know about. */ return cost ? cost : COSTS_N_INSNS (2); } /* All address computations that can be done are free, but rtx cost returns the same for practically all of them. So we weight the different types of address here in the order (most pref first): PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */ static inline int arm_arm_address_cost (rtx x) { enum rtx_code c = GET_CODE (x); if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC) return 0; if (c == MEM || c == LABEL_REF || c == SYMBOL_REF) return 10; if (c == PLUS) { if (CONST_INT_P (XEXP (x, 1))) return 2; if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1))) return 3; return 4; } return 6; } static inline int arm_thumb_address_cost (rtx x) { enum rtx_code c = GET_CODE (x); if (c == REG) return 1; if (c == PLUS && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1))) return 1; return 2; } static int arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED, addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED) { return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x); } /* Adjust cost hook for XScale. */ static bool xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int * cost) { /* Some true dependencies can have a higher cost depending on precisely how certain input operands are used. */ if (dep_type == 0 && recog_memoized (insn) >= 0 && recog_memoized (dep) >= 0) { int shift_opnum = get_attr_shift (insn); enum attr_type attr_type = get_attr_type (dep); /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted operand for INSN. If we have a shifted input operand and the instruction we depend on is another ALU instruction, then we may have to account for an additional stall. */ if (shift_opnum != 0 && (attr_type == TYPE_ALU_SHIFT_IMM || attr_type == TYPE_ALUS_SHIFT_IMM || attr_type == TYPE_LOGIC_SHIFT_IMM || attr_type == TYPE_LOGICS_SHIFT_IMM || attr_type == TYPE_ALU_SHIFT_REG || attr_type == TYPE_ALUS_SHIFT_REG || attr_type == TYPE_LOGIC_SHIFT_REG || attr_type == TYPE_LOGICS_SHIFT_REG || attr_type == TYPE_MOV_SHIFT || attr_type == TYPE_MVN_SHIFT || attr_type == TYPE_MOV_SHIFT_REG || attr_type == TYPE_MVN_SHIFT_REG)) { rtx shifted_operand; int opno; /* Get the shifted operand. */ extract_insn (insn); shifted_operand = recog_data.operand[shift_opnum]; /* Iterate over all the operands in DEP. If we write an operand that overlaps with SHIFTED_OPERAND, then we have increase the cost of this dependency. */ extract_insn (dep); preprocess_constraints (dep); for (opno = 0; opno < recog_data.n_operands; opno++) { /* We can ignore strict inputs. */ if (recog_data.operand_type[opno] == OP_IN) continue; if (reg_overlap_mentioned_p (recog_data.operand[opno], shifted_operand)) { *cost = 2; return false; } } } } return true; } /* Adjust cost hook for Cortex A9. */ static bool cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int * cost) { switch (dep_type) { case REG_DEP_ANTI: *cost = 0; return false; case REG_DEP_TRUE: case REG_DEP_OUTPUT: if (recog_memoized (insn) >= 0 && recog_memoized (dep) >= 0) { if (GET_CODE (PATTERN (insn)) == SET) { if (GET_MODE_CLASS (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT || GET_MODE_CLASS (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT) { enum attr_type attr_type_insn = get_attr_type (insn); enum attr_type attr_type_dep = get_attr_type (dep); /* By default all dependencies of the form s0 = s0 <op> s1 s0 = s0 <op> s2 have an extra latency of 1 cycle because of the input and output dependency in this case. However this gets modeled as an true dependency and hence all these checks. */ if (REG_P (SET_DEST (PATTERN (insn))) && reg_set_p (SET_DEST (PATTERN (insn)), dep)) { /* FMACS is a special case where the dependent instruction can be issued 3 cycles before the normal latency in case of an output dependency. */ if ((attr_type_insn == TYPE_FMACS || attr_type_insn == TYPE_FMACD) && (attr_type_dep == TYPE_FMACS || attr_type_dep == TYPE_FMACD)) { if (dep_type == REG_DEP_OUTPUT) *cost = insn_default_latency (dep) - 3; else *cost = insn_default_latency (dep); return false; } else { if (dep_type == REG_DEP_OUTPUT) *cost = insn_default_latency (dep) + 1; else *cost = insn_default_latency (dep); } return false; } } } } break; default: gcc_unreachable (); } return true; } /* Adjust cost hook for FA726TE. */ static bool fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int * cost) { /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated) have penalty of 3. */ if (dep_type == REG_DEP_TRUE && recog_memoized (insn) >= 0 && recog_memoized (dep) >= 0 && get_attr_conds (dep) == CONDS_SET) { /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */ if (get_attr_conds (insn) == CONDS_USE && get_attr_type (insn) != TYPE_BRANCH) { *cost = 3; return false; } if (GET_CODE (PATTERN (insn)) == COND_EXEC || get_attr_conds (insn) == CONDS_USE) { *cost = 0; return false; } } return true; } /* Implement TARGET_REGISTER_MOVE_COST. Moves between VFP_REGS and GENERAL_REGS are a single insn, but it is typically more expensive than a single memory access. We set the cost to less than two memory accesses so that floating point to integer conversion does not go through memory. */ int arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED, reg_class_t from, reg_class_t to) { if (TARGET_32BIT) { if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to)) || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to))) return 15; else if ((from == IWMMXT_REGS && to != IWMMXT_REGS) || (from != IWMMXT_REGS && to == IWMMXT_REGS)) return 4; else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS) return 20; else return 2; } else { if (from == HI_REGS || to == HI_REGS) return 4; else return 2; } } /* Implement TARGET_MEMORY_MOVE_COST. */ int arm_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in ATTRIBUTE_UNUSED) { if (TARGET_32BIT) return 10; else { if (GET_MODE_SIZE (mode) < 4) return 8; else return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2)); } } /* Vectorizer cost model implementation. */ /* Implement targetm.vectorize.builtin_vectorization_cost. */ static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, tree vectype, int misalign ATTRIBUTE_UNUSED) { unsigned elements; switch (type_of_cost) { case scalar_stmt: return current_tune->vec_costs->scalar_stmt_cost; case scalar_load: return current_tune->vec_costs->scalar_load_cost; case scalar_store: return current_tune->vec_costs->scalar_store_cost; case vector_stmt: return current_tune->vec_costs->vec_stmt_cost; case vector_load: return current_tune->vec_costs->vec_align_load_cost; case vector_store: return current_tune->vec_costs->vec_store_cost; case vec_to_scalar: return current_tune->vec_costs->vec_to_scalar_cost; case scalar_to_vec: return current_tune->vec_costs->scalar_to_vec_cost; case unaligned_load: case vector_gather_load: return current_tune->vec_costs->vec_unalign_load_cost; case unaligned_store: case vector_scatter_store: return current_tune->vec_costs->vec_unalign_store_cost; case cond_branch_taken: return current_tune->vec_costs->cond_taken_branch_cost; case cond_branch_not_taken: return current_tune->vec_costs->cond_not_taken_branch_cost; case vec_perm: case vec_promote_demote: return current_tune->vec_costs->vec_stmt_cost; case vec_construct: elements = TYPE_VECTOR_SUBPARTS (vectype); return elements / 2 + 1; default: gcc_unreachable (); } } /* Implement targetm.vectorize.add_stmt_cost. */ static unsigned arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind, struct _stmt_vec_info *stmt_info, int misalign, enum vect_cost_model_location where) { unsigned *cost = (unsigned *) data; unsigned retval = 0; if (flag_vect_cost_model) { tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE; int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign); /* Statements in an inner loop relative to the loop being vectorized are weighted more heavily. The value here is arbitrary and could potentially be improved with analysis. */ if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info)) count *= 50; /* FIXME. */ retval = (unsigned) (count * stmt_cost); cost[where] += retval; } return retval; } /* Return true if and only if this insn can dual-issue only as older. */ static bool cortexa7_older_only (rtx_insn *insn) { if (recog_memoized (insn) < 0) return false; switch (get_attr_type (insn)) { case TYPE_ALU_DSP_REG: case TYPE_ALU_SREG: case TYPE_ALUS_SREG: case TYPE_LOGIC_REG: case TYPE_LOGICS_REG: case TYPE_ADC_REG: case TYPE_ADCS_REG: case TYPE_ADR: case TYPE_BFM: case TYPE_REV: case TYPE_MVN_REG: case TYPE_SHIFT_IMM: case TYPE_SHIFT_REG: case TYPE_LOAD_BYTE: case TYPE_LOAD_4: case TYPE_STORE_4: case TYPE_FFARITHS: case TYPE_FADDS: case TYPE_FFARITHD: case TYPE_FADDD: case TYPE_FMOV: case TYPE_F_CVT: case TYPE_FCMPS: case TYPE_FCMPD: case TYPE_FCONSTS: case TYPE_FCONSTD: case TYPE_FMULS: case TYPE_FMACS: case TYPE_FMULD: case TYPE_FMACD: case TYPE_FDIVS: case TYPE_FDIVD: case TYPE_F_MRC: case TYPE_F_MRRC: case TYPE_F_FLAG: case TYPE_F_LOADS: case TYPE_F_STORES: return true; default: return false; } } /* Return true if and only if this insn can dual-issue as younger. */ static bool cortexa7_younger (FILE *file, int verbose, rtx_insn *insn) { if (recog_memoized (insn) < 0) { if (verbose > 5) fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn)); return false; } switch (get_attr_type (insn)) { case TYPE_ALU_IMM: case TYPE_ALUS_IMM: case TYPE_LOGIC_IMM: case TYPE_LOGICS_IMM: case TYPE_EXTEND: case TYPE_MVN_IMM: case TYPE_MOV_IMM: case TYPE_MOV_REG: case TYPE_MOV_SHIFT: case TYPE_MOV_SHIFT_REG: case TYPE_BRANCH: case TYPE_CALL: return true; default: return false; } } /* Look for an instruction that can dual issue only as an older instruction, and move it in front of any instructions that can dual-issue as younger, while preserving the relative order of all other instructions in the ready list. This is a hueuristic to help dual-issue in later cycles, by postponing issue of more flexible instructions. This heuristic may affect dual issue opportunities in the current cycle. */ static void cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp, int clock) { int i; int first_older_only = -1, first_younger = -1; if (verbose > 5) fprintf (file, ";; sched_reorder for cycle %d with %d insns in ready list\n", clock, *n_readyp); /* Traverse the ready list from the head (the instruction to issue first), and looking for the first instruction that can issue as younger and the first instruction that can dual-issue only as older. */ for (i = *n_readyp - 1; i >= 0; i--) { rtx_insn *insn = ready[i]; if (cortexa7_older_only (insn)) { first_older_only = i; if (verbose > 5) fprintf (file, ";; reorder older found %d\n", INSN_UID (insn)); break; } else if (cortexa7_younger (file, verbose, insn) && first_younger == -1) first_younger = i; } /* Nothing to reorder because either no younger insn found or insn that can dual-issue only as older appears before any insn that can dual-issue as younger. */ if (first_younger == -1) { if (verbose > 5) fprintf (file, ";; sched_reorder nothing to reorder as no younger\n"); return; } /* Nothing to reorder because no older-only insn in the ready list. */ if (first_older_only == -1) { if (verbose > 5) fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n"); return; } /* Move first_older_only insn before first_younger. */ if (verbose > 5) fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n", INSN_UID(ready [first_older_only]), INSN_UID(ready [first_younger])); rtx_insn *first_older_only_insn = ready [first_older_only]; for (i = first_older_only; i < first_younger; i++) { ready[i] = ready[i+1]; } ready[i] = first_older_only_insn; return; } /* Implement TARGET_SCHED_REORDER. */ static int arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp, int clock) { switch (arm_tune) { case TARGET_CPU_cortexa7: cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock); break; default: /* Do nothing for other cores. */ break; } return arm_issue_rate (); } /* This function implements the target macro TARGET_SCHED_ADJUST_COST. It corrects the value of COST based on the relationship between INSN and DEP through the dependence LINK. It returns the new value. There is a per-core adjust_cost hook to adjust scheduler costs and the per-core hook can choose to completely override the generic adjust_cost function. Only put bits of code into arm_adjust_cost that are common across all cores. */ static int arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost, unsigned int) { rtx i_pat, d_pat; /* When generating Thumb-1 code, we want to place flag-setting operations close to a conditional branch which depends on them, so that we can omit the comparison. */ if (TARGET_THUMB1 && dep_type == 0 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn && recog_memoized (dep) >= 0 && get_attr_conds (dep) == CONDS_SET) return 0; if (current_tune->sched_adjust_cost != NULL) { if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost)) return cost; } /* XXX Is this strictly true? */ if (dep_type == REG_DEP_ANTI || dep_type == REG_DEP_OUTPUT) return 0; /* Call insns don't incur a stall, even if they follow a load. */ if (dep_type == 0 && CALL_P (insn)) return 1; if ((i_pat = single_set (insn)) != NULL && MEM_P (SET_SRC (i_pat)) && (d_pat = single_set (dep)) != NULL && MEM_P (SET_DEST (d_pat))) { rtx src_mem = XEXP (SET_SRC (i_pat), 0); /* This is a load after a store, there is no conflict if the load reads from a cached area. Assume that loads from the stack, and from the constant pool are cached, and that others will miss. This is a hack. */ if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem)) || reg_mentioned_p (stack_pointer_rtx, src_mem) || reg_mentioned_p (frame_pointer_rtx, src_mem) || reg_mentioned_p (hard_frame_pointer_rtx, src_mem)) return 1; } return cost; } int arm_max_conditional_execute (void) { return max_insns_skipped; } static int arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED) { if (TARGET_32BIT) return (TARGET_THUMB2 && !speed_p) ? 1 : 4; else return (optimize > 0) ? 2 : 0; } static int arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p) { return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); } /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles" on Cortex-M4, where P varies from 1 to 3 according to some criteria), since sequences of non-executed instructions in IT blocks probably take the same amount of time as executed instructions (and the IT instruction itself takes space in icache). This function was experimentally determined to give good results on a popular embedded benchmark. */ static int arm_cortex_m_branch_cost (bool speed_p, bool predictable_p) { return (TARGET_32BIT && speed_p) ? 1 : arm_default_branch_cost (speed_p, predictable_p); } static int arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p) { return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p); } static bool fp_consts_inited = false; static REAL_VALUE_TYPE value_fp0; static void init_fp_table (void) { REAL_VALUE_TYPE r; r = REAL_VALUE_ATOF ("0", DFmode); value_fp0 = r; fp_consts_inited = true; } /* Return TRUE if rtx X is a valid immediate FP constant. */ int arm_const_double_rtx (rtx x) { const REAL_VALUE_TYPE *r; if (!fp_consts_inited) init_fp_table (); r = CONST_DOUBLE_REAL_VALUE (x); if (REAL_VALUE_MINUS_ZERO (*r)) return 0; if (real_equal (r, &value_fp0)) return 1; return 0; } /* VFPv3 has a fairly wide range of representable immediates, formed from "quarter-precision" floating-point values. These can be evaluated using this formula (with ^ for exponentiation): -1^s * n * 2^-r Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that 16 <= n <= 31 and 0 <= r <= 7. These values are mapped onto an 8-bit integer ABCDEFGH s.t. - A (most-significant) is the sign bit. - BCD are the exponent (encoded as r XOR 3). - EFGH are the mantissa (encoded as n - 16). */ /* Return an integer index for a VFPv3 immediate operand X suitable for the fconst[sd] instruction, or -1 if X isn't suitable. */ static int vfp3_const_double_index (rtx x) { REAL_VALUE_TYPE r, m; int sign, exponent; unsigned HOST_WIDE_INT mantissa, mant_hi; unsigned HOST_WIDE_INT mask; int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1; bool fail; if (!TARGET_VFP3 || !CONST_DOUBLE_P (x)) return -1; r = *CONST_DOUBLE_REAL_VALUE (x); /* We can't represent these things, so detect them first. */ if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) return -1; /* Extract sign, exponent and mantissa. */ sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; r = real_value_abs (&r); exponent = REAL_EXP (&r); /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the highest (sign) bit, with a fixed binary point at bit point_pos. WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 bits for the mantissa, this may fail (low bits would be lost). */ real_ldexp (&m, &r, point_pos - exponent); wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2); mantissa = w.elt (0); mant_hi = w.elt (1); /* If there are bits set in the low part of the mantissa, we can't represent this value. */ if (mantissa != 0) return -1; /* Now make it so that mantissa contains the most-significant bits, and move the point_pos to indicate that the least-significant bits have been discarded. */ point_pos -= HOST_BITS_PER_WIDE_INT; mantissa = mant_hi; /* We can permit four significant bits of mantissa only, plus a high bit which is always 1. */ mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1; if ((mantissa & mask) != 0) return -1; /* Now we know the mantissa is in range, chop off the unneeded bits. */ mantissa >>= point_pos - 5; /* The mantissa may be zero. Disallow that case. (It's possible to load the floating-point immediate zero with Neon using an integer-zero load, but that case is handled elsewhere.) */ if (mantissa == 0) return -1; gcc_assert (mantissa >= 16 && mantissa <= 31); /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where normalized significands are in the range [1, 2). (Our mantissa is shifted left 4 places at this point relative to normalized IEEE754 values). GCC internally uses [0.5, 1) (see real.c), so the exponent returned from REAL_EXP must be altered. */ exponent = 5 - exponent; if (exponent < 0 || exponent > 7) return -1; /* Sign, mantissa and exponent are now in the correct form to plug into the formula described in the comment above. */ return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16); } /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */ int vfp3_const_double_rtx (rtx x) { if (!TARGET_VFP3) return 0; return vfp3_const_double_index (x) != -1; } /* Recognize immediates which can be used in various Neon instructions. Legal immediates are described by the following table (for VMVN variants, the bitwise inverse of the constant shown is recognized. In either case, VMOV is output and the correct instruction to use for a given constant is chosen by the assembler). The constant shown is replicated across all elements of the destination vector. insn elems variant constant (binary) ---- ----- ------- ----------------- vmov i32 0 00000000 00000000 00000000 abcdefgh vmov i32 1 00000000 00000000 abcdefgh 00000000 vmov i32 2 00000000 abcdefgh 00000000 00000000 vmov i32 3 abcdefgh 00000000 00000000 00000000 vmov i16 4 00000000 abcdefgh vmov i16 5 abcdefgh 00000000 vmvn i32 6 00000000 00000000 00000000 abcdefgh vmvn i32 7 00000000 00000000 abcdefgh 00000000 vmvn i32 8 00000000 abcdefgh 00000000 00000000 vmvn i32 9 abcdefgh 00000000 00000000 00000000 vmvn i16 10 00000000 abcdefgh vmvn i16 11 abcdefgh 00000000 vmov i32 12 00000000 00000000 abcdefgh 11111111 vmvn i32 13 00000000 00000000 abcdefgh 11111111 vmov i32 14 00000000 abcdefgh 11111111 11111111 vmvn i32 15 00000000 abcdefgh 11111111 11111111 vmov i8 16 abcdefgh vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd eeeeeeee ffffffff gggggggg hhhhhhhh vmov f32 18 aBbbbbbc defgh000 00000000 00000000 vmov f32 19 00000000 00000000 00000000 00000000 For case 18, B = !b. Representable values are exactly those accepted by vfp3_const_double_index, but are output as floating-point numbers rather than indices. For case 19, we will change it to vmov.i32 when assembling. Variants 0-5 (inclusive) may also be used as immediates for the second operand of VORR/VBIC instructions. The INVERSE argument causes the bitwise inverse of the given operand to be recognized instead (used for recognizing legal immediates for the VAND/VORN pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be output, rather than the real insns vbic/vorr). INVERSE makes no difference to the recognition of float vectors. The return value is the variant of immediate as shown in the above table, or -1 if the given value doesn't match any of the listed patterns. */ static int neon_valid_immediate (rtx op, machine_mode mode, int inverse, rtx *modconst, int *elementwidth) { #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \ matches = 1; \ for (i = 0; i < idx; i += (STRIDE)) \ if (!(TEST)) \ matches = 0; \ if (matches) \ { \ immtype = (CLASS); \ elsize = (ELSIZE); \ break; \ } unsigned int i, elsize = 0, idx = 0, n_elts; unsigned int innersize; unsigned char bytes[16] = {}; int immtype = -1, matches; unsigned int invmask = inverse ? 0xff : 0; bool vector = GET_CODE (op) == CONST_VECTOR; if (vector) n_elts = CONST_VECTOR_NUNITS (op); else { n_elts = 1; gcc_assert (mode != VOIDmode); } innersize = GET_MODE_UNIT_SIZE (mode); /* Vectors of float constants. */ if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT) { rtx el0 = CONST_VECTOR_ELT (op, 0); if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0))) return -1; /* FP16 vectors cannot be represented. */ if (GET_MODE_INNER (mode) == HFmode) return -1; /* All elements in the vector must be the same. Note that 0.0 and -0.0 are distinct in this context. */ if (!const_vec_duplicate_p (op)) return -1; if (modconst) *modconst = CONST_VECTOR_ELT (op, 0); if (elementwidth) *elementwidth = 0; if (el0 == CONST0_RTX (GET_MODE (el0))) return 19; else return 18; } /* The tricks done in the code below apply for little-endian vector layout. For big-endian vectors only allow vectors of the form { a, a, a..., a }. FIXME: Implement logic for big-endian vectors. */ if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op)) return -1; /* Splat vector constant out into a byte vector. */ for (i = 0; i < n_elts; i++) { rtx el = vector ? CONST_VECTOR_ELT (op, i) : op; unsigned HOST_WIDE_INT elpart; gcc_assert (CONST_INT_P (el)); elpart = INTVAL (el); for (unsigned int byte = 0; byte < innersize; byte++) { bytes[idx++] = (elpart & 0xff) ^ invmask; elpart >>= BITS_PER_UNIT; } } /* Sanity check. */ gcc_assert (idx == GET_MODE_SIZE (mode)); do { CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0 && bytes[i + 2] == 0 && bytes[i + 3] == 0); CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1] && bytes[i + 2] == 0 && bytes[i + 3] == 0); CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]); CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0); CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]); CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1] && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]); CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff); CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]); CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1] && bytes[i + 2] == 0 && bytes[i + 3] == 0); CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1] && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff); CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0); CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff); CHECK (1, 8, 16, bytes[i] == bytes[0]); CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff) && bytes[i] == bytes[(i + 8) % idx]); } while (0); if (immtype == -1) return -1; if (elementwidth) *elementwidth = elsize; if (modconst) { unsigned HOST_WIDE_INT imm = 0; /* Un-invert bytes of recognized vector, if necessary. */ if (invmask != 0) for (i = 0; i < idx; i++) bytes[i] ^= invmask; if (immtype == 17) { /* FIXME: Broken on 32-bit H_W_I hosts. */ gcc_assert (sizeof (HOST_WIDE_INT) == 8); for (i = 0; i < 8; i++) imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0) << (i * BITS_PER_UNIT); *modconst = GEN_INT (imm); } else { unsigned HOST_WIDE_INT imm = 0; for (i = 0; i < elsize / BITS_PER_UNIT; i++) imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT); *modconst = GEN_INT (imm); } } return immtype; #undef CHECK } /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly, VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for float elements), and a modified constant (whatever should be output for a VMOV) in *MODCONST. */ int neon_immediate_valid_for_move (rtx op, machine_mode mode, rtx *modconst, int *elementwidth) { rtx tmpconst; int tmpwidth; int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth); if (retval == -1) return 0; if (modconst) *modconst = tmpconst; if (elementwidth) *elementwidth = tmpwidth; return 1; } /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If the immediate is valid, write a constant suitable for using as an operand to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */ int neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse, rtx *modconst, int *elementwidth) { rtx tmpconst; int tmpwidth; int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth); if (retval < 0 || retval > 5) return 0; if (modconst) *modconst = tmpconst; if (elementwidth) *elementwidth = tmpwidth; return 1; } /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If the immediate is valid, write a constant suitable for using as an operand to VSHR/VSHL to *MODCONST and the corresponding element width to *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift, because they have different limitations. */ int neon_immediate_valid_for_shift (rtx op, machine_mode mode, rtx *modconst, int *elementwidth, bool isleftshift) { unsigned int innersize = GET_MODE_UNIT_SIZE (mode); unsigned int n_elts = CONST_VECTOR_NUNITS (op), i; unsigned HOST_WIDE_INT last_elt = 0; unsigned HOST_WIDE_INT maxshift; /* Split vector constant out into a byte vector. */ for (i = 0; i < n_elts; i++) { rtx el = CONST_VECTOR_ELT (op, i); unsigned HOST_WIDE_INT elpart; if (CONST_INT_P (el)) elpart = INTVAL (el); else if (CONST_DOUBLE_P (el)) return 0; else gcc_unreachable (); if (i != 0 && elpart != last_elt) return 0; last_elt = elpart; } /* Shift less than element size. */ maxshift = innersize * 8; if (isleftshift) { /* Left shift immediate value can be from 0 to <size>-1. */ if (last_elt >= maxshift) return 0; } else { /* Right shift immediate value can be from 1 to <size>. */ if (last_elt == 0 || last_elt > maxshift) return 0; } if (elementwidth) *elementwidth = innersize * 8; if (modconst) *modconst = CONST_VECTOR_ELT (op, 0); return 1; } /* Return a string suitable for output of Neon immediate logic operation MNEM. */ char * neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode, int inverse, int quad) { int width, is_valid; static char templ[40]; is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width); gcc_assert (is_valid != 0); if (quad) sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width); else sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width); return templ; } /* Return a string suitable for output of Neon immediate shift operation (VSHR or VSHL) MNEM. */ char * neon_output_shift_immediate (const char *mnem, char sign, rtx *op2, machine_mode mode, int quad, bool isleftshift) { int width, is_valid; static char templ[40]; is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift); gcc_assert (is_valid != 0); if (quad) sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width); else sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width); return templ; } /* Output a sequence of pairwise operations to implement a reduction. NOTE: We do "too much work" here, because pairwise operations work on two registers-worth of operands in one go. Unfortunately we can't exploit those extra calculations to do the full operation in fewer steps, I don't think. Although all vector elements of the result but the first are ignored, we actually calculate the same result in each of the elements. An alternative such as initially loading a vector with zero to use as each of the second operands would use up an additional register and take an extra instruction, for no particular gain. */ void neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode, rtx (*reduc) (rtx, rtx, rtx)) { unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode); rtx tmpsum = op1; for (i = parts / 2; i >= 1; i /= 2) { rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode); emit_insn (reduc (dest, tmpsum, tmpsum)); tmpsum = dest; } } /* If VALS is a vector constant that can be loaded into a register using VDUP, generate instructions to do so and return an RTX to assign to the register. Otherwise return NULL_RTX. */ static rtx neon_vdup_constant (rtx vals) { machine_mode mode = GET_MODE (vals); machine_mode inner_mode = GET_MODE_INNER (mode); rtx x; if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4) return NULL_RTX; if (!const_vec_duplicate_p (vals, &x)) /* The elements are not all the same. We could handle repeating patterns of a mode larger than INNER_MODE here (e.g. int8x8_t {0, C, 0, C, 0, C, 0, C} which can be loaded using vdup.i16). */ return NULL_RTX; /* We can load this constant by using VDUP and a constant in a single ARM register. This will be cheaper than a vector load. */ x = copy_to_mode_reg (inner_mode, x); return gen_vec_duplicate (mode, x); } /* Generate code to load VALS, which is a PARALLEL containing only constants (for vec_init) or CONST_VECTOR, efficiently into a register. Returns an RTX to copy into the register, or NULL_RTX for a PARALLEL that cannot be converted into a CONST_VECTOR. */ rtx neon_make_constant (rtx vals) { machine_mode mode = GET_MODE (vals); rtx target; rtx const_vec = NULL_RTX; int n_elts = GET_MODE_NUNITS (mode); int n_const = 0; int i; if (GET_CODE (vals) == CONST_VECTOR) const_vec = vals; else if (GET_CODE (vals) == PARALLEL) { /* A CONST_VECTOR must contain only CONST_INTs and CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF). Only store valid constants in a CONST_VECTOR. */ for (i = 0; i < n_elts; ++i) { rtx x = XVECEXP (vals, 0, i); if (CONST_INT_P (x) || CONST_DOUBLE_P (x)) n_const++; } if (n_const == n_elts) const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)); } else gcc_unreachable (); if (const_vec != NULL && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL)) /* Load using VMOV. On Cortex-A8 this takes one cycle. */ return const_vec; else if ((target = neon_vdup_constant (vals)) != NULL_RTX) /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON pipeline cycle; creating the constant takes one or two ARM pipeline cycles. */ return target; else if (const_vec != NULL_RTX) /* Load from constant pool. On Cortex-A8 this takes two cycles (for either double or quad vectors). We cannot take advantage of single-cycle VLD1 because we need a PC-relative addressing mode. */ return const_vec; else /* A PARALLEL containing something not valid inside CONST_VECTOR. We cannot construct an initializer. */ return NULL_RTX; } /* Initialize vector TARGET to VALS. */ void neon_expand_vector_init (rtx target, rtx vals) { machine_mode mode = GET_MODE (target); machine_mode inner_mode = GET_MODE_INNER (mode); int n_elts = GET_MODE_NUNITS (mode); int n_var = 0, one_var = -1; bool all_same = true; rtx x, mem; int i; for (i = 0; i < n_elts; ++i) { x = XVECEXP (vals, 0, i); if (!CONSTANT_P (x)) ++n_var, one_var = i; if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) all_same = false; } if (n_var == 0) { rtx constant = neon_make_constant (vals); if (constant != NULL_RTX) { emit_move_insn (target, constant); return; } } /* Splat a single non-constant element if we can. */ if (all_same && GET_MODE_SIZE (inner_mode) <= 4) { x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0)); emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x))); return; } /* One field is non-constant. Load constant then overwrite varying field. This is more efficient than using the stack. */ if (n_var == 1) { rtx copy = copy_rtx (vals); rtx merge_mask = GEN_INT (1 << one_var); /* Load constant part of vector, substitute neighboring value for varying element. */ XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts); neon_expand_vector_init (target, copy); /* Insert variable. */ x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var)); emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target)); return; } /* Construct the vector in memory one field at a time and load the whole vector. */ mem = assign_stack_temp (mode, GET_MODE_SIZE (mode)); for (i = 0; i < n_elts; i++) emit_move_insn (adjust_address_nv (mem, inner_mode, i * GET_MODE_SIZE (inner_mode)), XVECEXP (vals, 0, i)); emit_move_insn (target, mem); } /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise ERR if it doesn't. EXP indicates the source location, which includes the inlining history for intrinsics. */ static void bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, const_tree exp, const char *desc) { HOST_WIDE_INT lane; gcc_assert (CONST_INT_P (operand)); lane = INTVAL (operand); if (lane < low || lane >= high) { if (exp) error ("%K%s %wd out of range %wd - %wd", exp, desc, lane, low, high - 1); else error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1); } } /* Bounds-check lanes. */ void neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high, const_tree exp) { bounds_check (operand, low, high, exp, "lane"); } /* Bounds-check constants. */ void arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high) { bounds_check (operand, low, high, NULL_TREE, "constant"); } HOST_WIDE_INT neon_element_bits (machine_mode mode) { return GET_MODE_UNIT_BITSIZE (mode); } /* Predicates for `match_operand' and `match_operator'. */ /* Return TRUE if OP is a valid coprocessor memory address pattern. WB is true if full writeback address modes are allowed and is false if limited writeback address modes (POST_INC and PRE_DEC) are allowed. */ int arm_coproc_mem_operand (rtx op, bool wb) { rtx ind; /* Reject eliminable registers. */ if (! (reload_in_progress || reload_completed || lra_in_progress) && ( reg_mentioned_p (frame_pointer_rtx, op) || reg_mentioned_p (arg_pointer_rtx, op) || reg_mentioned_p (virtual_incoming_args_rtx, op) || reg_mentioned_p (virtual_outgoing_args_rtx, op) || reg_mentioned_p (virtual_stack_dynamic_rtx, op) || reg_mentioned_p (virtual_stack_vars_rtx, op))) return FALSE; /* Constants are converted into offsets from labels. */ if (!MEM_P (op)) return FALSE; ind = XEXP (op, 0); if (reload_completed && (GET_CODE (ind) == LABEL_REF || (GET_CODE (ind) == CONST && GET_CODE (XEXP (ind, 0)) == PLUS && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) return TRUE; /* Match: (mem (reg)). */ if (REG_P (ind)) return arm_address_register_rtx_p (ind, 0); /* Autoincremment addressing modes. POST_INC and PRE_DEC are acceptable in any case (subject to verification by arm_address_register_rtx_p). We need WB to be true to accept PRE_INC and POST_DEC. */ if (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC || (wb && (GET_CODE (ind) == PRE_INC || GET_CODE (ind) == POST_DEC))) return arm_address_register_rtx_p (XEXP (ind, 0), 0); if (wb && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY) && arm_address_register_rtx_p (XEXP (ind, 0), 0) && GET_CODE (XEXP (ind, 1)) == PLUS && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0))) ind = XEXP (ind, 1); /* Match: (plus (reg) (const)). */ if (GET_CODE (ind) == PLUS && REG_P (XEXP (ind, 0)) && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) && CONST_INT_P (XEXP (ind, 1)) && INTVAL (XEXP (ind, 1)) > -1024 && INTVAL (XEXP (ind, 1)) < 1024 && (INTVAL (XEXP (ind, 1)) & 3) == 0) return TRUE; return FALSE; } /* Return TRUE if OP is a memory operand which we can load or store a vector to/from. TYPE is one of the following values: 0 - Vector load/stor (vldr) 1 - Core registers (ldm) 2 - Element/structure loads (vld1) */ int neon_vector_mem_operand (rtx op, int type, bool strict) { rtx ind; /* Reject eliminable registers. */ if (strict && ! (reload_in_progress || reload_completed) && (reg_mentioned_p (frame_pointer_rtx, op) || reg_mentioned_p (arg_pointer_rtx, op) || reg_mentioned_p (virtual_incoming_args_rtx, op) || reg_mentioned_p (virtual_outgoing_args_rtx, op) || reg_mentioned_p (virtual_stack_dynamic_rtx, op) || reg_mentioned_p (virtual_stack_vars_rtx, op))) return FALSE; /* Constants are converted into offsets from labels. */ if (!MEM_P (op)) return FALSE; ind = XEXP (op, 0); if (reload_completed && (GET_CODE (ind) == LABEL_REF || (GET_CODE (ind) == CONST && GET_CODE (XEXP (ind, 0)) == PLUS && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) return TRUE; /* Match: (mem (reg)). */ if (REG_P (ind)) return arm_address_register_rtx_p (ind, 0); /* Allow post-increment with Neon registers. */ if ((type != 1 && GET_CODE (ind) == POST_INC) || (type == 0 && GET_CODE (ind) == PRE_DEC)) return arm_address_register_rtx_p (XEXP (ind, 0), 0); /* Allow post-increment by register for VLDn */ if (type == 2 && GET_CODE (ind) == POST_MODIFY && GET_CODE (XEXP (ind, 1)) == PLUS && REG_P (XEXP (XEXP (ind, 1), 1))) return true; /* Match: (plus (reg) (const)). */ if (type == 0 && GET_CODE (ind) == PLUS && REG_P (XEXP (ind, 0)) && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode) && CONST_INT_P (XEXP (ind, 1)) && INTVAL (XEXP (ind, 1)) > -1024 /* For quad modes, we restrict the constant offset to be slightly less than what the instruction format permits. We have no such constraint on double mode offsets. (This must match arm_legitimate_index_p.) */ && (INTVAL (XEXP (ind, 1)) < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024)) && (INTVAL (XEXP (ind, 1)) & 3) == 0) return TRUE; return FALSE; } /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct type. */ int neon_struct_mem_operand (rtx op) { rtx ind; /* Reject eliminable registers. */ if (! (reload_in_progress || reload_completed) && ( reg_mentioned_p (frame_pointer_rtx, op) || reg_mentioned_p (arg_pointer_rtx, op) || reg_mentioned_p (virtual_incoming_args_rtx, op) || reg_mentioned_p (virtual_outgoing_args_rtx, op) || reg_mentioned_p (virtual_stack_dynamic_rtx, op) || reg_mentioned_p (virtual_stack_vars_rtx, op))) return FALSE; /* Constants are converted into offsets from labels. */ if (!MEM_P (op)) return FALSE; ind = XEXP (op, 0); if (reload_completed && (GET_CODE (ind) == LABEL_REF || (GET_CODE (ind) == CONST && GET_CODE (XEXP (ind, 0)) == PLUS && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF && CONST_INT_P (XEXP (XEXP (ind, 0), 1))))) return TRUE; /* Match: (mem (reg)). */ if (REG_P (ind)) return arm_address_register_rtx_p (ind, 0); /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */ if (GET_CODE (ind) == POST_INC || GET_CODE (ind) == PRE_DEC) return arm_address_register_rtx_p (XEXP (ind, 0), 0); return FALSE; } /* Prepares the operands for the VCMLA by lane instruction such that the right register number is selected. This instruction is special in that it always requires a D register, however there is a choice to be made between Dn[0], Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers. The VCMLA by lane function always selects two values. For instance given D0 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be used by the instruction. However given V4SF then index 0 and 1 are valid as D0[0] or D1[0] are both valid. This function centralizes that information based on OPERANDS, OPERANDS[3] will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be updated to contain the right index. */ rtx * neon_vcmla_lane_prepare_operands (rtx *operands) { int lane = INTVAL (operands[4]); machine_mode constmode = SImode; machine_mode mode = GET_MODE (operands[3]); int regno = REGNO (operands[3]); regno = ((regno - FIRST_VFP_REGNUM) >> 1); if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4) { operands[3] = gen_int_mode (regno + 1, constmode); operands[4] = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode); } else { operands[3] = gen_int_mode (regno, constmode); operands[4] = gen_int_mode (lane, constmode); } return operands; } /* Return true if X is a register that will be eliminated later on. */ int arm_eliminable_register (rtx x) { return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM || REGNO (x) == ARG_POINTER_REGNUM || (REGNO (x) >= FIRST_VIRTUAL_REGISTER && REGNO (x) <= LAST_VIRTUAL_REGISTER)); } /* Return GENERAL_REGS if a scratch register required to reload x to/from coprocessor registers. Otherwise return NO_REGS. */ enum reg_class coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb) { if (mode == HFmode) { if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST) return GENERAL_REGS; if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true)) return NO_REGS; return GENERAL_REGS; } /* The neon move patterns handle all legitimate vector and struct addresses. */ if (TARGET_NEON && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR) && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT || VALID_NEON_STRUCT_MODE (mode))) return NO_REGS; if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode)) return NO_REGS; return GENERAL_REGS; } /* Values which must be returned in the most-significant end of the return register. */ static bool arm_return_in_msb (const_tree valtype) { return (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN && (AGGREGATE_TYPE_P (valtype) || TREE_CODE (valtype) == COMPLEX_TYPE || FIXED_POINT_TYPE_P (valtype))); } /* Return TRUE if X references a SYMBOL_REF. */ int symbol_mentioned_p (rtx x) { const char * fmt; int i; if (GET_CODE (x) == SYMBOL_REF) return 1; /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they are constant offsets, not symbols. */ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) return 0; fmt = GET_RTX_FORMAT (GET_CODE (x)); for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) { if (fmt[i] == 'E') { int j; for (j = XVECLEN (x, i) - 1; j >= 0; j--) if (symbol_mentioned_p (XVECEXP (x, i, j))) return 1; } else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i))) return 1; } return 0; } /* Return TRUE if X references a LABEL_REF. */ int label_mentioned_p (rtx x) { const char * fmt; int i; if (GET_CODE (x) == LABEL_REF) return 1; /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing instruction, but they are constant offsets, not symbols. */ if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS) return 0; fmt = GET_RTX_FORMAT (GET_CODE (x)); for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) { if (fmt[i] == 'E') { int j; for (j = XVECLEN (x, i) - 1; j >= 0; j--) if (label_mentioned_p (XVECEXP (x, i, j))) return 1; } else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i))) return 1; } return 0; } int tls_mentioned_p (rtx x) { switch (GET_CODE (x)) { case CONST: return tls_mentioned_p (XEXP (x, 0)); case UNSPEC: if (XINT (x, 1) == UNSPEC_TLS) return 1; /* Fall through. */ default: return 0; } } /* Must not copy any rtx that uses a pc-relative address. Also, disallow copying of load-exclusive instructions that may appear after splitting of compare-and-swap-style operations so as to prevent those loops from being transformed away from their canonical forms (see PR 69904). */ static bool arm_cannot_copy_insn_p (rtx_insn *insn) { /* The tls call insn cannot be copied, as it is paired with a data word. */ if (recog_memoized (insn) == CODE_FOR_tlscall) return true; subrtx_iterator::array_type array; FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL) { const_rtx x = *iter; if (GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_BASE || XINT (x, 1) == UNSPEC_PIC_UNIFIED)) return true; } rtx set = single_set (insn); if (set) { rtx src = SET_SRC (set); if (GET_CODE (src) == ZERO_EXTEND) src = XEXP (src, 0); /* Catch the load-exclusive and load-acquire operations. */ if (GET_CODE (src) == UNSPEC_VOLATILE && (XINT (src, 1) == VUNSPEC_LL || XINT (src, 1) == VUNSPEC_LAX)) return true; } return false; } enum rtx_code minmax_code (rtx x) { enum rtx_code code = GET_CODE (x); switch (code) { case SMAX: return GE; case SMIN: return LE; case UMIN: return LEU; case UMAX: return GEU; default: gcc_unreachable (); } } /* Match pair of min/max operators that can be implemented via usat/ssat. */ bool arm_sat_operator_match (rtx lo_bound, rtx hi_bound, int *mask, bool *signed_sat) { /* The high bound must be a power of two minus one. */ int log = exact_log2 (INTVAL (hi_bound) + 1); if (log == -1) return false; /* The low bound is either zero (for usat) or one less than the negation of the high bound (for ssat). */ if (INTVAL (lo_bound) == 0) { if (mask) *mask = log; if (signed_sat) *signed_sat = false; return true; } if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1) { if (mask) *mask = log + 1; if (signed_sat) *signed_sat = true; return true; } return false; } /* Return 1 if memory locations are adjacent. */ int adjacent_mem_locations (rtx a, rtx b) { /* We don't guarantee to preserve the order of these memory refs. */ if (volatile_refs_p (a) || volatile_refs_p (b)) return 0; if ((REG_P (XEXP (a, 0)) || (GET_CODE (XEXP (a, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (a, 0), 1)))) && (REG_P (XEXP (b, 0)) || (GET_CODE (XEXP (b, 0)) == PLUS && CONST_INT_P (XEXP (XEXP (b, 0), 1))))) { HOST_WIDE_INT val0 = 0, val1 = 0; rtx reg0, reg1; int val_diff; if (GET_CODE (XEXP (a, 0)) == PLUS) { reg0 = XEXP (XEXP (a, 0), 0); val0 = INTVAL (XEXP (XEXP (a, 0), 1)); } else reg0 = XEXP (a, 0); if (GET_CODE (XEXP (b, 0)) == PLUS) { reg1 = XEXP (XEXP (b, 0), 0); val1 = INTVAL (XEXP (XEXP (b, 0), 1)); } else reg1 = XEXP (b, 0); /* Don't accept any offset that will require multiple instructions to handle, since this would cause the arith_adjacentmem pattern to output an overlong sequence. */ if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS)) return 0; /* Don't allow an eliminable register: register elimination can make the offset too large. */ if (arm_eliminable_register (reg0)) return 0; val_diff = val1 - val0; if (arm_ld_sched) { /* If the target has load delay slots, then there's no benefit to using an ldm instruction unless the offset is zero and we are optimizing for size. */ return (optimize_size && (REGNO (reg0) == REGNO (reg1)) && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4) && (val_diff == 4 || val_diff == -4)); } return ((REGNO (reg0) == REGNO (reg1)) && (val_diff == 4 || val_diff == -4)); } return 0; } /* Return true if OP is a valid load or store multiple operation. LOAD is true for load operations, false for store operations. CONSECUTIVE is true if the register numbers in the operation must be consecutive in the register bank. RETURN_PC is true if value is to be loaded in PC. The pattern we are trying to match for load is: [(SET (R_d0) (MEM (PLUS (addr) (offset)))) (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>)))) : : (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>)))) ] where 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))). 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn). 3. If consecutive is TRUE, then for kth register being loaded, REGNO (R_dk) = REGNO (R_d0) + k. The pattern for store is similar. */ bool ldm_stm_operation_p (rtx op, bool load, machine_mode mode, bool consecutive, bool return_pc) { HOST_WIDE_INT count = XVECLEN (op, 0); rtx reg, mem, addr; unsigned regno; unsigned first_regno; HOST_WIDE_INT i = 1, base = 0, offset = 0; rtx elt; bool addr_reg_in_reglist = false; bool update = false; int reg_increment; int offset_adj; int regs_per_val; /* If not in SImode, then registers must be consecutive (e.g., VLDM instructions for DFmode). */ gcc_assert ((mode == SImode) || consecutive); /* Setting return_pc for stores is illegal. */ gcc_assert (!return_pc || load); /* Set up the increments and the regs per val based on the mode. */ reg_increment = GET_MODE_SIZE (mode); regs_per_val = reg_increment / 4; offset_adj = return_pc ? 1 : 0; if (count <= 1 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj))))) return false; /* Check if this is a write-back. */ elt = XVECEXP (op, 0, offset_adj); if (GET_CODE (SET_SRC (elt)) == PLUS) { i++; base = 1; update = true; /* The offset adjustment must be the number of registers being popped times the size of a single register. */ if (!REG_P (SET_DEST (elt)) || !REG_P (XEXP (SET_SRC (elt), 0)) || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0))) || !CONST_INT_P (XEXP (SET_SRC (elt), 1)) || INTVAL (XEXP (SET_SRC (elt), 1)) != ((count - 1 - offset_adj) * reg_increment)) return false; } i = i + offset_adj; base = base + offset_adj; /* Perform a quick check so we don't blow up below. If only one reg is loaded, success depends on the type: VLDM can do just one reg, LDM must do at least two. */ if ((count <= i) && (mode == SImode)) return false; elt = XVECEXP (op, 0, i - 1); if (GET_CODE (elt) != SET) return false; if (load) { reg = SET_DEST (elt); mem = SET_SRC (elt); } else { reg = SET_SRC (elt); mem = SET_DEST (elt); } if (!REG_P (reg) || !MEM_P (mem)) return false; regno = REGNO (reg); first_regno = regno; addr = XEXP (mem, 0); if (GET_CODE (addr) == PLUS) {