view gcc/config/arm/arm.c @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
line wrap: on
line source

/* Output routines for GCC for ARM.
   Copyright (C) 1991-2020 Free Software Foundation, Inc.
   Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
   and Martin Simmons (@harleqn.co.uk).
   More major hacks by Richard Earnshaw (rearnsha@arm.com).

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published
   by the Free Software Foundation; either version 3, or (at your
   option) any later version.

   GCC is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
   License for more details.

   You should have received a copy of the GNU General Public License
   along with GCC; see the file COPYING3.  If not see
   <http://www.gnu.org/licenses/>.  */

#define IN_TARGET_CODE 1

#include "config.h"
#define INCLUDE_STRING
#include "system.h"
#include "coretypes.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "memmodel.h"
#include "cfghooks.h"
#include "df.h"
#include "tm_p.h"
#include "stringpool.h"
#include "attribs.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "cgraph.h"
#include "diagnostic-core.h"
#include "alias.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "calls.h"
#include "varasm.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "reload.h"
#include "explow.h"
#include "expr.h"
#include "cfgrtl.h"
#include "sched-int.h"
#include "common/common-target.h"
#include "langhooks.h"
#include "intl.h"
#include "libfuncs.h"
#include "opts.h"
#include "dumpfile.h"
#include "target-globals.h"
#include "builtins.h"
#include "tm-constrs.h"
#include "rtl-iter.h"
#include "optabs-libfuncs.h"
#include "gimplify.h"
#include "gimple.h"
#include "selftest.h"

/* This file should be included last.  */
#include "target-def.h"

/* Forward definitions of types.  */
typedef struct minipool_node    Mnode;
typedef struct minipool_fixup   Mfix;

/* The last .arch and .fpu assembly strings that we printed.  */
static std::string arm_last_printed_arch_string;
static std::string arm_last_printed_fpu_string;

void (*arm_lang_output_object_attributes_hook)(void);

struct four_ints
{
  int i[4];
};

/* Forward function declarations.  */
static bool arm_const_not_ok_for_debug_p (rtx);
static int arm_needs_doubleword_align (machine_mode, const_tree);
static int arm_compute_static_chain_stack_bytes (void);
static arm_stack_offsets *arm_get_frame_offsets (void);
static void arm_compute_frame_layout (void);
static void arm_add_gc_roots (void);
static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
			     unsigned HOST_WIDE_INT, rtx, rtx, int, int);
static unsigned bit_count (unsigned long);
static unsigned bitmap_popcount (const sbitmap);
static int arm_address_register_rtx_p (rtx, int);
static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
static bool is_called_in_ARM_mode (tree);
static int thumb2_legitimate_index_p (machine_mode, rtx, int);
static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
static rtx arm_legitimize_address (rtx, rtx, machine_mode);
static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
inline static int thumb1_index_register_rtx_p (rtx, int);
static int thumb_far_jump_used_p (void);
static bool thumb_force_lr_save (void);
static unsigned arm_size_return_regs (void);
static bool arm_assemble_integer (rtx, unsigned int, int);
static void arm_print_operand (FILE *, rtx, int);
static void arm_print_operand_address (FILE *, machine_mode, rtx);
static bool arm_print_operand_punct_valid_p (unsigned char code);
static const char *fp_const_from_val (REAL_VALUE_TYPE *);
static arm_cc get_arm_condition_code (rtx);
static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
static const char *output_multi_immediate (rtx *, const char *, const char *,
					   int, HOST_WIDE_INT);
static const char *shift_op (rtx, HOST_WIDE_INT *);
static struct machine_function *arm_init_machine_status (void);
static void thumb_exit (FILE *, int);
static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
static Mnode *add_minipool_forward_ref (Mfix *);
static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
static Mnode *add_minipool_backward_ref (Mfix *);
static void assign_minipool_offsets (Mfix *);
static void arm_print_value (FILE *, rtx);
static void dump_minipool (rtx_insn *);
static int arm_barrier_cost (rtx_insn *);
static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
			       machine_mode, rtx);
static void arm_reorg (void);
static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
static unsigned long arm_compute_save_reg0_reg12_mask (void);
static unsigned long arm_compute_save_core_reg_mask (void);
static unsigned long arm_isr_value (tree);
static unsigned long arm_compute_func_type (void);
static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
#endif
static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
static void arm_output_function_epilogue (FILE *);
static void arm_output_function_prologue (FILE *);
static int arm_comp_type_attributes (const_tree, const_tree);
static void arm_set_default_type_attributes (tree);
static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
static int optimal_immediate_sequence (enum rtx_code code,
				       unsigned HOST_WIDE_INT val,
				       struct four_ints *return_sequence);
static int optimal_immediate_sequence_1 (enum rtx_code code,
					 unsigned HOST_WIDE_INT val,
					 struct four_ints *return_sequence,
					 int i);
static int arm_get_strip_length (int);
static bool arm_function_ok_for_sibcall (tree, tree);
static machine_mode arm_promote_function_mode (const_tree,
						    machine_mode, int *,
						    const_tree, int);
static bool arm_return_in_memory (const_tree, const_tree);
static rtx arm_function_value (const_tree, const_tree, bool);
static rtx arm_libcall_value_1 (machine_mode);
static rtx arm_libcall_value (machine_mode, const_rtx);
static bool arm_function_value_regno_p (const unsigned int);
static void arm_internal_label (FILE *, const char *, unsigned long);
static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
				 tree);
static bool arm_have_conditional_execution (void);
static bool arm_cannot_force_const_mem (machine_mode, rtx);
static bool arm_legitimate_constant_p (machine_mode, rtx);
static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
static int arm_insn_cost (rtx_insn *, bool);
static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
static void emit_constant_insn (rtx cond, rtx pattern);
static rtx_insn *emit_set_insn (rtx, rtx);
static void arm_add_cfa_adjust_cfa_note (rtx, int, rtx, rtx);
static rtx emit_multi_reg_push (unsigned long, unsigned long);
static void arm_emit_multi_reg_pop (unsigned long);
static int vfp_emit_fstmd (int, int);
static void arm_emit_vfp_multi_reg_pop (int, int, rtx);
static int arm_arg_partial_bytes (cumulative_args_t,
				  const function_arg_info &);
static rtx arm_function_arg (cumulative_args_t, const function_arg_info &);
static void arm_function_arg_advance (cumulative_args_t,
				      const function_arg_info &);
static pad_direction arm_function_arg_padding (machine_mode, const_tree);
static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
				      const_tree);
static rtx aapcs_libcall_value (machine_mode);
static int aapcs_select_return_coproc (const_tree, const_tree);

#ifdef OBJECT_FORMAT_ELF
static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
#endif
#ifndef ARM_PE
static void arm_encode_section_info (tree, rtx, int);
#endif

static void arm_file_end (void);
static void arm_file_start (void);
static void arm_insert_attributes (tree, tree *);

static void arm_setup_incoming_varargs (cumulative_args_t,
					const function_arg_info &, int *, int);
static bool arm_pass_by_reference (cumulative_args_t,
				   const function_arg_info &);
static bool arm_promote_prototypes (const_tree);
static bool arm_default_short_enums (void);
static bool arm_align_anon_bitfield (void);
static bool arm_return_in_msb (const_tree);
static bool arm_must_pass_in_stack (const function_arg_info &);
static bool arm_return_in_memory (const_tree, const_tree);
#if ARM_UNWIND_INFO
static void arm_unwind_emit (FILE *, rtx_insn *);
static bool arm_output_ttype (rtx);
static void arm_asm_emit_except_personality (rtx);
#endif
static void arm_asm_init_sections (void);
static rtx arm_dwarf_register_span (rtx);

static tree arm_cxx_guard_type (void);
static bool arm_cxx_guard_mask_bit (void);
static tree arm_get_cookie_size (tree);
static bool arm_cookie_has_size (void);
static bool arm_cxx_cdtor_returns_this (void);
static bool arm_cxx_key_method_may_be_inline (void);
static void arm_cxx_determine_class_data_visibility (tree);
static bool arm_cxx_class_data_always_comdat (void);
static bool arm_cxx_use_aeabi_atexit (void);
static void arm_init_libfuncs (void);
static tree arm_build_builtin_va_list (void);
static void arm_expand_builtin_va_start (tree, rtx);
static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
static void arm_option_override (void);
static void arm_option_save (struct cl_target_option *, struct gcc_options *);
static void arm_option_restore (struct gcc_options *,
				struct cl_target_option *);
static void arm_override_options_after_change (void);
static void arm_option_print (FILE *, int, struct cl_target_option *);
static void arm_set_current_function (tree);
static bool arm_can_inline_p (tree, tree);
static void arm_relayout_function (tree);
static bool arm_valid_target_attribute_p (tree, tree, tree, int);
static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
static bool arm_sched_can_speculate_insn (rtx_insn *);
static bool arm_macro_fusion_p (void);
static bool arm_cannot_copy_insn_p (rtx_insn *);
static int arm_issue_rate (void);
static int arm_sched_variable_issue (FILE *, int, rtx_insn *, int);
static int arm_first_cycle_multipass_dfa_lookahead (void);
static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
static bool arm_output_addr_const_extra (FILE *, rtx);
static bool arm_allocate_stack_slots_for_args (void);
static bool arm_warn_func_return (tree);
static tree arm_promoted_type (const_tree t);
static bool arm_scalar_mode_supported_p (scalar_mode);
static bool arm_frame_pointer_required (void);
static bool arm_can_eliminate (const int, const int);
static void arm_asm_trampoline_template (FILE *);
static void arm_trampoline_init (rtx, tree, rtx);
static rtx arm_trampoline_adjust_address (rtx);
static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
static bool arm_array_mode_supported_p (machine_mode,
					unsigned HOST_WIDE_INT);
static machine_mode arm_preferred_simd_mode (scalar_mode);
static bool arm_class_likely_spilled_p (reg_class_t);
static HOST_WIDE_INT arm_vector_alignment (const_tree type);
static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
static bool arm_builtin_support_vector_misalignment (machine_mode mode,
						     const_tree type,
						     int misalignment,
						     bool is_packed);
static void arm_conditional_register_usage (void);
static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
static unsigned int arm_autovectorize_vector_modes (vector_modes *, bool);
static int arm_default_branch_cost (bool, bool);
static int arm_cortex_a5_branch_cost (bool, bool);
static int arm_cortex_m_branch_cost (bool, bool);
static int arm_cortex_m7_branch_cost (bool, bool);

static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
					  const vec_perm_indices &);

static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);

static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
					   tree vectype,
					   int misalign ATTRIBUTE_UNUSED);
static unsigned arm_add_stmt_cost (void *data, int count,
				   enum vect_cost_for_stmt kind,
				   struct _stmt_vec_info *stmt_info,
				   int misalign,
				   enum vect_cost_model_location where);

static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
					 bool op0_preserve_value);
static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);

static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
				     const_tree);
static section *arm_function_section (tree, enum node_frequency, bool, bool);
static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
						int reloc);
static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
static opt_scalar_float_mode arm_floatn_mode (int, bool);
static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
static bool arm_modes_tieable_p (machine_mode, machine_mode);
static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
static rtx_insn * thumb1_md_asm_adjust (vec<rtx> &, vec<rtx> &,
					vec<const char *> &, vec<rtx> &,
					HARD_REG_SET &);

/* Table of machine attributes.  */
static const struct attribute_spec arm_attribute_table[] =
{
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
       affects_type_identity, handler, exclude } */
  /* Function calls made to this symbol must be done indirectly, because
     it may lie outside of the 26 bit addressing range of a normal function
     call.  */
  { "long_call",    0, 0, false, true,  true,  false, NULL, NULL },
  /* Whereas these functions are always known to reside within the 26 bit
     addressing range.  */
  { "short_call",   0, 0, false, true,  true,  false, NULL, NULL },
  /* Specify the procedure call conventions for a function.  */
  { "pcs",          1, 1, false, true,  true,  false, arm_handle_pcs_attribute,
    NULL },
  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
  { "isr",          0, 1, false, false, false, false, arm_handle_isr_attribute,
    NULL },
  { "interrupt",    0, 1, false, false, false, false, arm_handle_isr_attribute,
    NULL },
  { "naked",        0, 0, true,  false, false, false,
    arm_handle_fndecl_attribute, NULL },
#ifdef ARM_PE
  /* ARM/PE has three new attributes:
     interfacearm - ?
     dllexport - for exporting a function/variable that will live in a dll
     dllimport - for importing a function/variable from a dll

     Microsoft allows multiple declspecs in one __declspec, separating
     them with spaces.  We do NOT support this.  Instead, use __declspec
     multiple times.
  */
  { "dllimport",    0, 0, true,  false, false, false, NULL, NULL },
  { "dllexport",    0, 0, true,  false, false, false, NULL, NULL },
  { "interfacearm", 0, 0, true,  false, false, false,
    arm_handle_fndecl_attribute, NULL },
#elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
  { "dllimport",    0, 0, false, false, false, false, handle_dll_attribute,
    NULL },
  { "dllexport",    0, 0, false, false, false, false, handle_dll_attribute,
    NULL },
  { "notshared",    0, 0, false, true, false, false,
    arm_handle_notshared_attribute, NULL },
#endif
  /* ARMv8-M Security Extensions support.  */
  { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
    arm_handle_cmse_nonsecure_entry, NULL },
  { "cmse_nonsecure_call", 0, 0, true, false, false, true,
    arm_handle_cmse_nonsecure_call, NULL },
  { NULL, 0, 0, false, false, false, false, NULL, NULL }
};

/* Initialize the GCC target structure.  */
#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
#undef  TARGET_MERGE_DECL_ATTRIBUTES
#define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
#endif

#undef TARGET_CHECK_BUILTIN_CALL
#define TARGET_CHECK_BUILTIN_CALL arm_check_builtin_call

#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address

#undef  TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE arm_attribute_table

#undef  TARGET_INSERT_ATTRIBUTES
#define TARGET_INSERT_ATTRIBUTES arm_insert_attributes

#undef TARGET_ASM_FILE_START
#define TARGET_ASM_FILE_START arm_file_start
#undef TARGET_ASM_FILE_END
#define TARGET_ASM_FILE_END arm_file_end

#undef  TARGET_ASM_ALIGNED_SI_OP
#define TARGET_ASM_ALIGNED_SI_OP NULL
#undef  TARGET_ASM_INTEGER
#define TARGET_ASM_INTEGER arm_assemble_integer

#undef TARGET_PRINT_OPERAND
#define TARGET_PRINT_OPERAND arm_print_operand
#undef TARGET_PRINT_OPERAND_ADDRESS
#define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
#define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p

#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra

#undef  TARGET_ASM_FUNCTION_PROLOGUE
#define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue

#undef  TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue

#undef TARGET_CAN_INLINE_P
#define TARGET_CAN_INLINE_P arm_can_inline_p

#undef TARGET_RELAYOUT_FUNCTION
#define TARGET_RELAYOUT_FUNCTION arm_relayout_function

#undef  TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE arm_option_override

#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change

#undef TARGET_OPTION_SAVE
#define TARGET_OPTION_SAVE arm_option_save

#undef TARGET_OPTION_RESTORE
#define TARGET_OPTION_RESTORE arm_option_restore

#undef TARGET_OPTION_PRINT
#define TARGET_OPTION_PRINT arm_option_print

#undef  TARGET_COMP_TYPE_ATTRIBUTES
#define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes

#undef TARGET_SCHED_CAN_SPECULATE_INSN
#define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn

#undef TARGET_SCHED_MACRO_FUSION_P
#define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p

#undef TARGET_SCHED_MACRO_FUSION_PAIR_P
#define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p

#undef  TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
#define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes

#undef  TARGET_SCHED_ADJUST_COST
#define TARGET_SCHED_ADJUST_COST arm_adjust_cost

#undef TARGET_SET_CURRENT_FUNCTION
#define TARGET_SET_CURRENT_FUNCTION arm_set_current_function

#undef TARGET_OPTION_VALID_ATTRIBUTE_P
#define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p

#undef TARGET_SCHED_REORDER
#define TARGET_SCHED_REORDER arm_sched_reorder

#undef TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST arm_register_move_cost

#undef TARGET_MEMORY_MOVE_COST
#define TARGET_MEMORY_MOVE_COST arm_memory_move_cost

#undef TARGET_ENCODE_SECTION_INFO
#ifdef ARM_PE
#define TARGET_ENCODE_SECTION_INFO  arm_pe_encode_section_info
#else
#define TARGET_ENCODE_SECTION_INFO  arm_encode_section_info
#endif

#undef  TARGET_STRIP_NAME_ENCODING
#define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding

#undef  TARGET_ASM_INTERNAL_LABEL
#define TARGET_ASM_INTERNAL_LABEL arm_internal_label

#undef TARGET_FLOATN_MODE
#define TARGET_FLOATN_MODE arm_floatn_mode

#undef  TARGET_FUNCTION_OK_FOR_SIBCALL
#define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall

#undef  TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE arm_function_value

#undef  TARGET_LIBCALL_VALUE
#define TARGET_LIBCALL_VALUE arm_libcall_value

#undef TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p

#undef  TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk

#undef  TARGET_RTX_COSTS
#define TARGET_RTX_COSTS arm_rtx_costs
#undef  TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST arm_address_cost
#undef TARGET_INSN_COST
#define TARGET_INSN_COST arm_insn_cost

#undef TARGET_SHIFT_TRUNCATION_MASK
#define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
#undef TARGET_VECTOR_MODE_SUPPORTED_P
#define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
#undef TARGET_ARRAY_MODE_SUPPORTED_P
#define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
#undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
#define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
#undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES
#define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_MODES \
  arm_autovectorize_vector_modes

#undef  TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG arm_reorg

#undef  TARGET_INIT_BUILTINS
#define TARGET_INIT_BUILTINS  arm_init_builtins
#undef  TARGET_EXPAND_BUILTIN
#define TARGET_EXPAND_BUILTIN arm_expand_builtin
#undef  TARGET_BUILTIN_DECL
#define TARGET_BUILTIN_DECL arm_builtin_decl

#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS arm_init_libfuncs

#undef TARGET_PROMOTE_FUNCTION_MODE
#define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
#undef TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
#undef TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
#undef TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
#undef TARGET_FUNCTION_ARG
#define TARGET_FUNCTION_ARG arm_function_arg
#undef TARGET_FUNCTION_ARG_ADVANCE
#define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
#undef TARGET_FUNCTION_ARG_PADDING
#define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
#undef TARGET_FUNCTION_ARG_BOUNDARY
#define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary

#undef  TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs

#undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args

#undef TARGET_ASM_TRAMPOLINE_TEMPLATE
#define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT arm_trampoline_init
#undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
#define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address

#undef TARGET_WARN_FUNC_RETURN
#define TARGET_WARN_FUNC_RETURN arm_warn_func_return

#undef TARGET_DEFAULT_SHORT_ENUMS
#define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums

#undef TARGET_ALIGN_ANON_BITFIELD
#define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield

#undef TARGET_NARROW_VOLATILE_BITFIELD
#define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false

#undef TARGET_CXX_GUARD_TYPE
#define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type

#undef TARGET_CXX_GUARD_MASK_BIT
#define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit

#undef TARGET_CXX_GET_COOKIE_SIZE
#define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size

#undef TARGET_CXX_COOKIE_HAS_SIZE
#define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size

#undef TARGET_CXX_CDTOR_RETURNS_THIS
#define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this

#undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
#define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline

#undef TARGET_CXX_USE_AEABI_ATEXIT
#define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit

#undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
#define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
  arm_cxx_determine_class_data_visibility

#undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
#define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat

#undef TARGET_RETURN_IN_MSB
#define TARGET_RETURN_IN_MSB arm_return_in_msb

#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY arm_return_in_memory

#undef TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack

#if ARM_UNWIND_INFO
#undef TARGET_ASM_UNWIND_EMIT
#define TARGET_ASM_UNWIND_EMIT arm_unwind_emit

/* EABI unwinding tables use a different format for the typeinfo tables.  */
#undef TARGET_ASM_TTYPE
#define TARGET_ASM_TTYPE arm_output_ttype

#undef TARGET_ARM_EABI_UNWINDER
#define TARGET_ARM_EABI_UNWINDER true

#undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
#define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality

#endif /* ARM_UNWIND_INFO */

#undef TARGET_ASM_INIT_SECTIONS
#define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections

#undef TARGET_DWARF_REGISTER_SPAN
#define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span

#undef  TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p

#ifdef HAVE_AS_TLS
#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS true
#endif

#undef TARGET_HAVE_CONDITIONAL_EXECUTION
#define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution

#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p

#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem

#undef TARGET_MAX_ANCHOR_OFFSET
#define TARGET_MAX_ANCHOR_OFFSET 4095

/* The minimum is set such that the total size of the block
   for a particular anchor is -4088 + 1 + 4095 bytes, which is
   divisible by eight, ensuring natural spacing of anchors.  */
#undef TARGET_MIN_ANCHOR_OFFSET
#define TARGET_MIN_ANCHOR_OFFSET -4088

#undef TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE arm_issue_rate

#undef TARGET_SCHED_VARIABLE_ISSUE
#define TARGET_SCHED_VARIABLE_ISSUE arm_sched_variable_issue

#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
  arm_first_cycle_multipass_dfa_lookahead

#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
  arm_first_cycle_multipass_dfa_lookahead_guard

#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE arm_mangle_type

#undef TARGET_INVALID_CONVERSION
#define TARGET_INVALID_CONVERSION arm_invalid_conversion

#undef TARGET_INVALID_UNARY_OP
#define TARGET_INVALID_UNARY_OP arm_invalid_unary_op

#undef TARGET_INVALID_BINARY_OP
#define TARGET_INVALID_BINARY_OP arm_invalid_binary_op

#undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
#define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv

#undef TARGET_BUILD_BUILTIN_VA_LIST
#define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
#undef TARGET_EXPAND_BUILTIN_VA_START
#define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
#undef TARGET_GIMPLIFY_VA_ARG_EXPR
#define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr

#ifdef HAVE_AS_TLS
#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
#define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
#endif

#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P	arm_legitimate_address_p

#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class

#undef TARGET_PROMOTED_TYPE
#define TARGET_PROMOTED_TYPE arm_promoted_type

#undef TARGET_SCALAR_MODE_SUPPORTED_P
#define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p

#undef TARGET_COMPUTE_FRAME_LAYOUT
#define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout

#undef TARGET_FRAME_POINTER_REQUIRED
#define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required

#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE arm_can_eliminate

#undef TARGET_CONDITIONAL_REGISTER_USAGE
#define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage

#undef TARGET_CLASS_LIKELY_SPILLED_P
#define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p

#undef TARGET_VECTORIZE_BUILTINS
#define TARGET_VECTORIZE_BUILTINS

#undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
#define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
  arm_builtin_vectorized_function

#undef TARGET_VECTOR_ALIGNMENT
#define TARGET_VECTOR_ALIGNMENT arm_vector_alignment

#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
  arm_vector_alignment_reachable

#undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
#define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
  arm_builtin_support_vector_misalignment

#undef TARGET_PREFERRED_RENAME_CLASS
#define TARGET_PREFERRED_RENAME_CLASS \
  arm_preferred_rename_class

#undef TARGET_VECTORIZE_VEC_PERM_CONST
#define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const

#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
  arm_builtin_vectorization_cost
#undef TARGET_VECTORIZE_ADD_STMT_COST
#define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost

#undef TARGET_CANONICALIZE_COMPARISON
#define TARGET_CANONICALIZE_COMPARISON \
  arm_canonicalize_comparison

#undef TARGET_ASAN_SHADOW_OFFSET
#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset

#undef MAX_INSN_PER_IT_BLOCK
#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)

#undef TARGET_CAN_USE_DOLOOP_P
#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost

#undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
#define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p

#undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
#define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true

#undef TARGET_SCHED_FUSION_PRIORITY
#define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority

#undef  TARGET_ASM_FUNCTION_SECTION
#define TARGET_ASM_FUNCTION_SECTION arm_function_section

#undef TARGET_ASM_ELF_FLAGS_NUMERIC
#define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric

#undef TARGET_SECTION_TYPE_FLAGS
#define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags

#undef TARGET_EXPAND_DIVMOD_LIBFUNC
#define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc

#undef TARGET_C_EXCESS_PRECISION
#define TARGET_C_EXCESS_PRECISION arm_excess_precision

/* Although the architecture reserves bits 0 and 1, only the former is
   used for ARM/Thumb ISA selection in v7 and earlier versions.  */
#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2

#undef TARGET_FIXED_CONDITION_CODE_REGS
#define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs

#undef TARGET_HARD_REGNO_NREGS
#define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
#undef TARGET_HARD_REGNO_MODE_OK
#define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok

#undef TARGET_MODES_TIEABLE_P
#define TARGET_MODES_TIEABLE_P arm_modes_tieable_p

#undef TARGET_CAN_CHANGE_MODE_CLASS
#define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class

#undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment

#undef TARGET_MD_ASM_ADJUST
#define TARGET_MD_ASM_ADJUST arm_md_asm_adjust

/* Obstack for minipool constant handling.  */
static struct obstack minipool_obstack;
static char *         minipool_startobj;

/* The maximum number of insns skipped which
   will be conditionalised if possible.  */
static int max_insns_skipped = 5;

extern FILE * asm_out_file;

/* True if we are currently building a constant table.  */
int making_const_table;

/* The processor for which instructions should be scheduled.  */
enum processor_type arm_tune = TARGET_CPU_arm_none;

/* The current tuning set.  */
const struct tune_params *current_tune;

/* Which floating point hardware to schedule for.  */
int arm_fpu_attr;

/* Used for Thumb call_via trampolines.  */
rtx thumb_call_via_label[14];
static int thumb_call_reg_needed;

/* The bits in this mask specify which instruction scheduling options should
   be used.  */
unsigned int tune_flags = 0;

/* The highest ARM architecture version supported by the
   target.  */
enum base_architecture arm_base_arch = BASE_ARCH_0;

/* Active target architecture and tuning.  */

struct arm_build_target arm_active_target;

/* The following are used in the arm.md file as equivalents to bits
   in the above two flag variables.  */

/* Nonzero if this chip supports the ARM Architecture 4 extensions.  */
int arm_arch4 = 0;

/* Nonzero if this chip supports the ARM Architecture 4t extensions.  */
int arm_arch4t = 0;

/* Nonzero if this chip supports the ARM Architecture 5T extensions.  */
int arm_arch5t = 0;

/* Nonzero if this chip supports the ARM Architecture 5TE extensions.  */
int arm_arch5te = 0;

/* Nonzero if this chip supports the ARM Architecture 6 extensions.  */
int arm_arch6 = 0;

/* Nonzero if this chip supports the ARM 6K extensions.  */
int arm_arch6k = 0;

/* Nonzero if this chip supports the ARM 6KZ extensions.  */
int arm_arch6kz = 0;

/* Nonzero if instructions present in ARMv6-M can be used.  */
int arm_arch6m = 0;

/* Nonzero if this chip supports the ARM 7 extensions.  */
int arm_arch7 = 0;

/* Nonzero if this chip supports the Large Physical Address Extension.  */
int arm_arch_lpae = 0;

/* Nonzero if instructions not present in the 'M' profile can be used.  */
int arm_arch_notm = 0;

/* Nonzero if instructions present in ARMv7E-M can be used.  */
int arm_arch7em = 0;

/* Nonzero if instructions present in ARMv8 can be used.  */
int arm_arch8 = 0;

/* Nonzero if this chip supports the ARMv8.1 extensions.  */
int arm_arch8_1 = 0;

/* Nonzero if this chip supports the ARM Architecture 8.2 extensions.  */
int arm_arch8_2 = 0;

/* Nonzero if this chip supports the ARM Architecture 8.3 extensions.  */
int arm_arch8_3 = 0;

/* Nonzero if this chip supports the ARM Architecture 8.4 extensions.  */
int arm_arch8_4 = 0;
/* Nonzero if this chip supports the ARM Architecture 8.1-M Mainline
   extensions.  */
int arm_arch8_1m_main = 0;

/* Nonzero if this chip supports the FP16 instructions extension of ARM
   Architecture 8.2.  */
int arm_fp16_inst = 0;

/* Nonzero if this chip can benefit from load scheduling.  */
int arm_ld_sched = 0;

/* Nonzero if this chip is a StrongARM.  */
int arm_tune_strongarm = 0;

/* Nonzero if this chip supports Intel Wireless MMX technology.  */
int arm_arch_iwmmxt = 0;

/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
int arm_arch_iwmmxt2 = 0;

/* Nonzero if this chip is an XScale.  */
int arm_arch_xscale = 0;

/* Nonzero if tuning for XScale  */
int arm_tune_xscale = 0;

/* Nonzero if we want to tune for stores that access the write-buffer.
   This typically means an ARM6 or ARM7 with MMU or MPU.  */
int arm_tune_wbuf = 0;

/* Nonzero if tuning for Cortex-A9.  */
int arm_tune_cortex_a9 = 0;

/* Nonzero if we should define __THUMB_INTERWORK__ in the
   preprocessor.
   XXX This is a bit of a hack, it's intended to help work around
   problems in GLD which doesn't understand that armv5t code is
   interworking clean.  */
int arm_cpp_interwork = 0;

/* Nonzero if chip supports Thumb 1.  */
int arm_arch_thumb1;

/* Nonzero if chip supports Thumb 2.  */
int arm_arch_thumb2;

/* Nonzero if chip supports integer division instruction.  */
int arm_arch_arm_hwdiv;
int arm_arch_thumb_hwdiv;

/* Nonzero if chip disallows volatile memory access in IT block.  */
int arm_arch_no_volatile_ce;

/* Nonzero if we shouldn't use literal pools.  */
bool arm_disable_literal_pool = false;

/* The register number to be used for the PIC offset register.  */
unsigned arm_pic_register = INVALID_REGNUM;

enum arm_pcs arm_pcs_default;

/* For an explanation of these variables, see final_prescan_insn below.  */
int arm_ccfsm_state;
/* arm_current_cc is also used for Thumb-2 cond_exec blocks.  */
enum arm_cond_code arm_current_cc;

rtx arm_target_insn;
int arm_target_label;
/* The number of conditionally executed insns, including the current insn.  */
int arm_condexec_count = 0;
/* A bitmask specifying the patterns for the IT block.
   Zero means do not output an IT block before this insn. */
int arm_condexec_mask = 0;
/* The number of bits used in arm_condexec_mask.  */
int arm_condexec_masklen = 0;

/* Nonzero if chip supports the ARMv8 CRC instructions.  */
int arm_arch_crc = 0;

/* Nonzero if chip supports the AdvSIMD Dot Product instructions.  */
int arm_arch_dotprod = 0;

/* Nonzero if chip supports the ARMv8-M security extensions.  */
int arm_arch_cmse = 0;

/* Nonzero if the core has a very small, high-latency, multiply unit.  */
int arm_m_profile_small_mul = 0;

/* Nonzero if chip supports the AdvSIMD I8MM instructions.  */
int arm_arch_i8mm = 0;

/* Nonzero if chip supports the BFloat16 instructions.  */
int arm_arch_bf16 = 0;

/* The condition codes of the ARM, and the inverse function.  */
static const char * const arm_condition_codes[] =
{
  "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
  "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
};

/* The register numbers in sequence, for passing to arm_gen_load_multiple.  */
int arm_regs_in_sequence[] =
{
  0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
};

#define DEF_FP_SYSREG(reg) #reg,
const char *fp_sysreg_names[NB_FP_SYSREGS] = {
  FP_SYSREGS
};
#undef DEF_FP_SYSREG

#define ARM_LSL_NAME "lsl"
#define streq(string1, string2) (strcmp (string1, string2) == 0)

#define THUMB2_WORK_REGS (0xff & ~(  (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
				   | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
				   | (1 << PIC_OFFSET_TABLE_REGNUM)))

/* Initialization code.  */

struct cpu_tune
{
  enum processor_type scheduler;
  unsigned int tune_flags;
  const struct tune_params *tune;
};

#define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
#define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
  {								\
    num_slots,							\
    l1_size,							\
    l1_line_size						\
  }

/* arm generic vectorizer costs.  */
static const
struct cpu_vec_costs arm_default_vec_cost = {
  1,					/* scalar_stmt_cost.  */
  1,					/* scalar load_cost.  */
  1,					/* scalar_store_cost.  */
  1,					/* vec_stmt_cost.  */
  1,					/* vec_to_scalar_cost.  */
  1,					/* scalar_to_vec_cost.  */
  1,					/* vec_align_load_cost.  */
  1,					/* vec_unalign_load_cost.  */
  1,					/* vec_unalign_store_cost.  */
  1,					/* vec_store_cost.  */
  3,					/* cond_taken_branch_cost.  */
  1,					/* cond_not_taken_branch_cost.  */
};

/* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h  */
#include "aarch-cost-tables.h"



const struct cpu_cost_table cortexa9_extra_costs =
{
  /* ALU */
  {
    0,			/* arith.  */
    0,			/* logical.  */
    0,			/* shift.  */
    COSTS_N_INSNS (1),	/* shift_reg.  */
    COSTS_N_INSNS (1),	/* arith_shift.  */
    COSTS_N_INSNS (2),	/* arith_shift_reg.  */
    0,			/* log_shift.  */
    COSTS_N_INSNS (1),	/* log_shift_reg.  */
    COSTS_N_INSNS (1),	/* extend.  */
    COSTS_N_INSNS (2),	/* extend_arith.  */
    COSTS_N_INSNS (1),	/* bfi.  */
    COSTS_N_INSNS (1),	/* bfx.  */
    0,			/* clz.  */
    0,			/* rev.  */
    0,			/* non_exec.  */
    true		/* non_exec_costs_exec.  */
  },
  {
    /* MULT SImode */
    {
      COSTS_N_INSNS (3),	/* simple.  */
      COSTS_N_INSNS (3),	/* flag_setting.  */
      COSTS_N_INSNS (2),	/* extend.  */
      COSTS_N_INSNS (3),	/* add.  */
      COSTS_N_INSNS (2),	/* extend_add.  */
      COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A9.  */
    },
    /* MULT DImode */
    {
      0,			/* simple (N/A).  */
      0,			/* flag_setting (N/A).  */
      COSTS_N_INSNS (4),	/* extend.  */
      0,			/* add (N/A).  */
      COSTS_N_INSNS (4),	/* extend_add.  */
      0				/* idiv (N/A).  */
    }
  },
  /* LD/ST */
  {
    COSTS_N_INSNS (2),	/* load.  */
    COSTS_N_INSNS (2),	/* load_sign_extend.  */
    COSTS_N_INSNS (2),	/* ldrd.  */
    COSTS_N_INSNS (2),	/* ldm_1st.  */
    1,			/* ldm_regs_per_insn_1st.  */
    2,			/* ldm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (5),	/* loadf.  */
    COSTS_N_INSNS (5),	/* loadd.  */
    COSTS_N_INSNS (1),  /* load_unaligned.  */
    COSTS_N_INSNS (2),	/* store.  */
    COSTS_N_INSNS (2),	/* strd.  */
    COSTS_N_INSNS (2),	/* stm_1st.  */
    1,			/* stm_regs_per_insn_1st.  */
    2,			/* stm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (1),	/* storef.  */
    COSTS_N_INSNS (1),	/* stored.  */
    COSTS_N_INSNS (1),	/* store_unaligned.  */
    COSTS_N_INSNS (1),	/* loadv.  */
    COSTS_N_INSNS (1)	/* storev.  */
  },
  {
    /* FP SFmode */
    {
      COSTS_N_INSNS (14),	/* div.  */
      COSTS_N_INSNS (4),	/* mult.  */
      COSTS_N_INSNS (7),	/* mult_addsub. */
      COSTS_N_INSNS (30),	/* fma.  */
      COSTS_N_INSNS (3),	/* addsub.  */
      COSTS_N_INSNS (1),	/* fpconst.  */
      COSTS_N_INSNS (1),	/* neg.  */
      COSTS_N_INSNS (3),	/* compare.  */
      COSTS_N_INSNS (3),	/* widen.  */
      COSTS_N_INSNS (3),	/* narrow.  */
      COSTS_N_INSNS (3),	/* toint.  */
      COSTS_N_INSNS (3),	/* fromint.  */
      COSTS_N_INSNS (3)		/* roundint.  */
    },
    /* FP DFmode */
    {
      COSTS_N_INSNS (24),	/* div.  */
      COSTS_N_INSNS (5),	/* mult.  */
      COSTS_N_INSNS (8),	/* mult_addsub.  */
      COSTS_N_INSNS (30),	/* fma.  */
      COSTS_N_INSNS (3),	/* addsub.  */
      COSTS_N_INSNS (1),	/* fpconst.  */
      COSTS_N_INSNS (1),	/* neg.  */
      COSTS_N_INSNS (3),	/* compare.  */
      COSTS_N_INSNS (3),	/* widen.  */
      COSTS_N_INSNS (3),	/* narrow.  */
      COSTS_N_INSNS (3),	/* toint.  */
      COSTS_N_INSNS (3),	/* fromint.  */
      COSTS_N_INSNS (3)		/* roundint.  */
    }
  },
  /* Vector */
  {
    COSTS_N_INSNS (1)	/* alu.  */
  }
};

const struct cpu_cost_table cortexa8_extra_costs =
{
  /* ALU */
  {
    0,			/* arith.  */
    0,			/* logical.  */
    COSTS_N_INSNS (1),	/* shift.  */
    0,			/* shift_reg.  */
    COSTS_N_INSNS (1),	/* arith_shift.  */
    0,			/* arith_shift_reg.  */
    COSTS_N_INSNS (1),	/* log_shift.  */
    0,			/* log_shift_reg.  */
    0,			/* extend.  */
    0,			/* extend_arith.  */
    0,			/* bfi.  */
    0,			/* bfx.  */
    0,			/* clz.  */
    0,			/* rev.  */
    0,			/* non_exec.  */
    true		/* non_exec_costs_exec.  */
  },
  {
    /* MULT SImode */
    {
      COSTS_N_INSNS (1),	/* simple.  */
      COSTS_N_INSNS (1),	/* flag_setting.  */
      COSTS_N_INSNS (1),	/* extend.  */
      COSTS_N_INSNS (1),	/* add.  */
      COSTS_N_INSNS (1),	/* extend_add.  */
      COSTS_N_INSNS (30)	/* idiv.  No HW div on Cortex A8.  */
    },
    /* MULT DImode */
    {
      0,			/* simple (N/A).  */
      0,			/* flag_setting (N/A).  */
      COSTS_N_INSNS (2),	/* extend.  */
      0,			/* add (N/A).  */
      COSTS_N_INSNS (2),	/* extend_add.  */
      0				/* idiv (N/A).  */
    }
  },
  /* LD/ST */
  {
    COSTS_N_INSNS (1),	/* load.  */
    COSTS_N_INSNS (1),	/* load_sign_extend.  */
    COSTS_N_INSNS (1),	/* ldrd.  */
    COSTS_N_INSNS (1),	/* ldm_1st.  */
    1,			/* ldm_regs_per_insn_1st.  */
    2,			/* ldm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (1),	/* loadf.  */
    COSTS_N_INSNS (1),	/* loadd.  */
    COSTS_N_INSNS (1),  /* load_unaligned.  */
    COSTS_N_INSNS (1),	/* store.  */
    COSTS_N_INSNS (1),	/* strd.  */
    COSTS_N_INSNS (1),	/* stm_1st.  */
    1,			/* stm_regs_per_insn_1st.  */
    2,			/* stm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (1),	/* storef.  */
    COSTS_N_INSNS (1),	/* stored.  */
    COSTS_N_INSNS (1),	/* store_unaligned.  */
    COSTS_N_INSNS (1),	/* loadv.  */
    COSTS_N_INSNS (1)	/* storev.  */
  },
  {
    /* FP SFmode */
    {
      COSTS_N_INSNS (36),	/* div.  */
      COSTS_N_INSNS (11),	/* mult.  */
      COSTS_N_INSNS (20),	/* mult_addsub. */
      COSTS_N_INSNS (30),	/* fma.  */
      COSTS_N_INSNS (9),	/* addsub.  */
      COSTS_N_INSNS (3),	/* fpconst.  */
      COSTS_N_INSNS (3),	/* neg.  */
      COSTS_N_INSNS (6),	/* compare.  */
      COSTS_N_INSNS (4),	/* widen.  */
      COSTS_N_INSNS (4),	/* narrow.  */
      COSTS_N_INSNS (8),	/* toint.  */
      COSTS_N_INSNS (8),	/* fromint.  */
      COSTS_N_INSNS (8)		/* roundint.  */
    },
    /* FP DFmode */
    {
      COSTS_N_INSNS (64),	/* div.  */
      COSTS_N_INSNS (16),	/* mult.  */
      COSTS_N_INSNS (25),	/* mult_addsub.  */
      COSTS_N_INSNS (30),	/* fma.  */
      COSTS_N_INSNS (9),	/* addsub.  */
      COSTS_N_INSNS (3),	/* fpconst.  */
      COSTS_N_INSNS (3),	/* neg.  */
      COSTS_N_INSNS (6),	/* compare.  */
      COSTS_N_INSNS (6),	/* widen.  */
      COSTS_N_INSNS (6),	/* narrow.  */
      COSTS_N_INSNS (8),	/* toint.  */
      COSTS_N_INSNS (8),	/* fromint.  */
      COSTS_N_INSNS (8)		/* roundint.  */
    }
  },
  /* Vector */
  {
    COSTS_N_INSNS (1)	/* alu.  */
  }
};

const struct cpu_cost_table cortexa5_extra_costs =
{
  /* ALU */
  {
    0,			/* arith.  */
    0,			/* logical.  */
    COSTS_N_INSNS (1),	/* shift.  */
    COSTS_N_INSNS (1),	/* shift_reg.  */
    COSTS_N_INSNS (1),	/* arith_shift.  */
    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
    COSTS_N_INSNS (1),	/* log_shift.  */
    COSTS_N_INSNS (1),	/* log_shift_reg.  */
    COSTS_N_INSNS (1),	/* extend.  */
    COSTS_N_INSNS (1),	/* extend_arith.  */
    COSTS_N_INSNS (1),	/* bfi.  */
    COSTS_N_INSNS (1),	/* bfx.  */
    COSTS_N_INSNS (1),	/* clz.  */
    COSTS_N_INSNS (1),	/* rev.  */
    0,			/* non_exec.  */
    true		/* non_exec_costs_exec.  */
  },

  {
    /* MULT SImode */
    {
      0,			/* simple.  */
      COSTS_N_INSNS (1),	/* flag_setting.  */
      COSTS_N_INSNS (1),	/* extend.  */
      COSTS_N_INSNS (1),	/* add.  */
      COSTS_N_INSNS (1),	/* extend_add.  */
      COSTS_N_INSNS (7)		/* idiv.  */
    },
    /* MULT DImode */
    {
      0,			/* simple (N/A).  */
      0,			/* flag_setting (N/A).  */
      COSTS_N_INSNS (1),	/* extend.  */
      0,			/* add.  */
      COSTS_N_INSNS (2),	/* extend_add.  */
      0				/* idiv (N/A).  */
    }
  },
  /* LD/ST */
  {
    COSTS_N_INSNS (1),	/* load.  */
    COSTS_N_INSNS (1),	/* load_sign_extend.  */
    COSTS_N_INSNS (6),	/* ldrd.  */
    COSTS_N_INSNS (1),	/* ldm_1st.  */
    1,			/* ldm_regs_per_insn_1st.  */
    2,			/* ldm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (2),	/* loadf.  */
    COSTS_N_INSNS (4),	/* loadd.  */
    COSTS_N_INSNS (1),	/* load_unaligned.  */
    COSTS_N_INSNS (1),	/* store.  */
    COSTS_N_INSNS (3),	/* strd.  */
    COSTS_N_INSNS (1),	/* stm_1st.  */
    1,			/* stm_regs_per_insn_1st.  */
    2,			/* stm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (2),	/* storef.  */
    COSTS_N_INSNS (2),	/* stored.  */
    COSTS_N_INSNS (1),	/* store_unaligned.  */
    COSTS_N_INSNS (1),	/* loadv.  */
    COSTS_N_INSNS (1)	/* storev.  */
  },
  {
    /* FP SFmode */
    {
      COSTS_N_INSNS (15),	/* div.  */
      COSTS_N_INSNS (3),	/* mult.  */
      COSTS_N_INSNS (7),	/* mult_addsub. */
      COSTS_N_INSNS (7),	/* fma.  */
      COSTS_N_INSNS (3),	/* addsub.  */
      COSTS_N_INSNS (3),	/* fpconst.  */
      COSTS_N_INSNS (3),	/* neg.  */
      COSTS_N_INSNS (3),	/* compare.  */
      COSTS_N_INSNS (3),	/* widen.  */
      COSTS_N_INSNS (3),	/* narrow.  */
      COSTS_N_INSNS (3),	/* toint.  */
      COSTS_N_INSNS (3),	/* fromint.  */
      COSTS_N_INSNS (3)		/* roundint.  */
    },
    /* FP DFmode */
    {
      COSTS_N_INSNS (30),	/* div.  */
      COSTS_N_INSNS (6),	/* mult.  */
      COSTS_N_INSNS (10),	/* mult_addsub.  */
      COSTS_N_INSNS (7),	/* fma.  */
      COSTS_N_INSNS (3),	/* addsub.  */
      COSTS_N_INSNS (3),	/* fpconst.  */
      COSTS_N_INSNS (3),	/* neg.  */
      COSTS_N_INSNS (3),	/* compare.  */
      COSTS_N_INSNS (3),	/* widen.  */
      COSTS_N_INSNS (3),	/* narrow.  */
      COSTS_N_INSNS (3),	/* toint.  */
      COSTS_N_INSNS (3),	/* fromint.  */
      COSTS_N_INSNS (3)		/* roundint.  */
    }
  },
  /* Vector */
  {
    COSTS_N_INSNS (1)	/* alu.  */
  }
};


const struct cpu_cost_table cortexa7_extra_costs =
{
  /* ALU */
  {
    0,			/* arith.  */
    0,			/* logical.  */
    COSTS_N_INSNS (1),	/* shift.  */
    COSTS_N_INSNS (1),	/* shift_reg.  */
    COSTS_N_INSNS (1),	/* arith_shift.  */
    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
    COSTS_N_INSNS (1),	/* log_shift.  */
    COSTS_N_INSNS (1),	/* log_shift_reg.  */
    COSTS_N_INSNS (1),	/* extend.  */
    COSTS_N_INSNS (1),	/* extend_arith.  */
    COSTS_N_INSNS (1),	/* bfi.  */
    COSTS_N_INSNS (1),	/* bfx.  */
    COSTS_N_INSNS (1),	/* clz.  */
    COSTS_N_INSNS (1),	/* rev.  */
    0,			/* non_exec.  */
    true		/* non_exec_costs_exec.  */
  },

  {
    /* MULT SImode */
    {
      0,			/* simple.  */
      COSTS_N_INSNS (1),	/* flag_setting.  */
      COSTS_N_INSNS (1),	/* extend.  */
      COSTS_N_INSNS (1),	/* add.  */
      COSTS_N_INSNS (1),	/* extend_add.  */
      COSTS_N_INSNS (7)		/* idiv.  */
    },
    /* MULT DImode */
    {
      0,			/* simple (N/A).  */
      0,			/* flag_setting (N/A).  */
      COSTS_N_INSNS (1),	/* extend.  */
      0,			/* add.  */
      COSTS_N_INSNS (2),	/* extend_add.  */
      0				/* idiv (N/A).  */
    }
  },
  /* LD/ST */
  {
    COSTS_N_INSNS (1),	/* load.  */
    COSTS_N_INSNS (1),	/* load_sign_extend.  */
    COSTS_N_INSNS (3),	/* ldrd.  */
    COSTS_N_INSNS (1),	/* ldm_1st.  */
    1,			/* ldm_regs_per_insn_1st.  */
    2,			/* ldm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (2),	/* loadf.  */
    COSTS_N_INSNS (2),	/* loadd.  */
    COSTS_N_INSNS (1),	/* load_unaligned.  */
    COSTS_N_INSNS (1),	/* store.  */
    COSTS_N_INSNS (3),	/* strd.  */
    COSTS_N_INSNS (1),	/* stm_1st.  */
    1,			/* stm_regs_per_insn_1st.  */
    2,			/* stm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (2),	/* storef.  */
    COSTS_N_INSNS (2),	/* stored.  */
    COSTS_N_INSNS (1),	/* store_unaligned.  */
    COSTS_N_INSNS (1),	/* loadv.  */
    COSTS_N_INSNS (1)	/* storev.  */
  },
  {
    /* FP SFmode */
    {
      COSTS_N_INSNS (15),	/* div.  */
      COSTS_N_INSNS (3),	/* mult.  */
      COSTS_N_INSNS (7),	/* mult_addsub. */
      COSTS_N_INSNS (7),	/* fma.  */
      COSTS_N_INSNS (3),	/* addsub.  */
      COSTS_N_INSNS (3),	/* fpconst.  */
      COSTS_N_INSNS (3),	/* neg.  */
      COSTS_N_INSNS (3),	/* compare.  */
      COSTS_N_INSNS (3),	/* widen.  */
      COSTS_N_INSNS (3),	/* narrow.  */
      COSTS_N_INSNS (3),	/* toint.  */
      COSTS_N_INSNS (3),	/* fromint.  */
      COSTS_N_INSNS (3)		/* roundint.  */
    },
    /* FP DFmode */
    {
      COSTS_N_INSNS (30),	/* div.  */
      COSTS_N_INSNS (6),	/* mult.  */
      COSTS_N_INSNS (10),	/* mult_addsub.  */
      COSTS_N_INSNS (7),	/* fma.  */
      COSTS_N_INSNS (3),	/* addsub.  */
      COSTS_N_INSNS (3),	/* fpconst.  */
      COSTS_N_INSNS (3),	/* neg.  */
      COSTS_N_INSNS (3),	/* compare.  */
      COSTS_N_INSNS (3),	/* widen.  */
      COSTS_N_INSNS (3),	/* narrow.  */
      COSTS_N_INSNS (3),	/* toint.  */
      COSTS_N_INSNS (3),	/* fromint.  */
      COSTS_N_INSNS (3)		/* roundint.  */
    }
  },
  /* Vector */
  {
    COSTS_N_INSNS (1)	/* alu.  */
  }
};

const struct cpu_cost_table cortexa12_extra_costs =
{
  /* ALU */
  {
    0,			/* arith.  */
    0,			/* logical.  */
    0,			/* shift.  */
    COSTS_N_INSNS (1),	/* shift_reg.  */
    COSTS_N_INSNS (1),	/* arith_shift.  */
    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
    COSTS_N_INSNS (1),	/* log_shift.  */
    COSTS_N_INSNS (1),	/* log_shift_reg.  */
    0,			/* extend.  */
    COSTS_N_INSNS (1),	/* extend_arith.  */
    0,			/* bfi.  */
    COSTS_N_INSNS (1),	/* bfx.  */
    COSTS_N_INSNS (1),	/* clz.  */
    COSTS_N_INSNS (1),	/* rev.  */
    0,			/* non_exec.  */
    true		/* non_exec_costs_exec.  */
  },
  /* MULT SImode */
  {
    {
      COSTS_N_INSNS (2),	/* simple.  */
      COSTS_N_INSNS (3),	/* flag_setting.  */
      COSTS_N_INSNS (2),	/* extend.  */
      COSTS_N_INSNS (3),	/* add.  */
      COSTS_N_INSNS (2),	/* extend_add.  */
      COSTS_N_INSNS (18)	/* idiv.  */
    },
    /* MULT DImode */
    {
      0,			/* simple (N/A).  */
      0,			/* flag_setting (N/A).  */
      COSTS_N_INSNS (3),	/* extend.  */
      0,			/* add (N/A).  */
      COSTS_N_INSNS (3),	/* extend_add.  */
      0				/* idiv (N/A).  */
    }
  },
  /* LD/ST */
  {
    COSTS_N_INSNS (3),	/* load.  */
    COSTS_N_INSNS (3),	/* load_sign_extend.  */
    COSTS_N_INSNS (3),	/* ldrd.  */
    COSTS_N_INSNS (3),	/* ldm_1st.  */
    1,			/* ldm_regs_per_insn_1st.  */
    2,			/* ldm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (3),	/* loadf.  */
    COSTS_N_INSNS (3),	/* loadd.  */
    0,			/* load_unaligned.  */
    0,			/* store.  */
    0,			/* strd.  */
    0,			/* stm_1st.  */
    1,			/* stm_regs_per_insn_1st.  */
    2,			/* stm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (2),	/* storef.  */
    COSTS_N_INSNS (2),	/* stored.  */
    0,			/* store_unaligned.  */
    COSTS_N_INSNS (1),	/* loadv.  */
    COSTS_N_INSNS (1)	/* storev.  */
  },
  {
    /* FP SFmode */
    {
      COSTS_N_INSNS (17),	/* div.  */
      COSTS_N_INSNS (4),	/* mult.  */
      COSTS_N_INSNS (8),	/* mult_addsub. */
      COSTS_N_INSNS (8),	/* fma.  */
      COSTS_N_INSNS (4),	/* addsub.  */
      COSTS_N_INSNS (2),	/* fpconst. */
      COSTS_N_INSNS (2),	/* neg.  */
      COSTS_N_INSNS (2),	/* compare.  */
      COSTS_N_INSNS (4),	/* widen.  */
      COSTS_N_INSNS (4),	/* narrow.  */
      COSTS_N_INSNS (4),	/* toint.  */
      COSTS_N_INSNS (4),	/* fromint.  */
      COSTS_N_INSNS (4)		/* roundint.  */
    },
    /* FP DFmode */
    {
      COSTS_N_INSNS (31),	/* div.  */
      COSTS_N_INSNS (4),	/* mult.  */
      COSTS_N_INSNS (8),	/* mult_addsub.  */
      COSTS_N_INSNS (8),	/* fma.  */
      COSTS_N_INSNS (4),	/* addsub.  */
      COSTS_N_INSNS (2),	/* fpconst.  */
      COSTS_N_INSNS (2),	/* neg.  */
      COSTS_N_INSNS (2),	/* compare.  */
      COSTS_N_INSNS (4),	/* widen.  */
      COSTS_N_INSNS (4),	/* narrow.  */
      COSTS_N_INSNS (4),	/* toint.  */
      COSTS_N_INSNS (4),	/* fromint.  */
      COSTS_N_INSNS (4)		/* roundint.  */
    }
  },
  /* Vector */
  {
    COSTS_N_INSNS (1)	/* alu.  */
  }
};

const struct cpu_cost_table cortexa15_extra_costs =
{
  /* ALU */
  {
    0,			/* arith.  */
    0,			/* logical.  */
    0,			/* shift.  */
    0,			/* shift_reg.  */
    COSTS_N_INSNS (1),	/* arith_shift.  */
    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
    COSTS_N_INSNS (1),	/* log_shift.  */
    COSTS_N_INSNS (1),	/* log_shift_reg.  */
    0,			/* extend.  */
    COSTS_N_INSNS (1),	/* extend_arith.  */
    COSTS_N_INSNS (1),	/* bfi.  */
    0,			/* bfx.  */
    0,			/* clz.  */
    0,			/* rev.  */
    0,			/* non_exec.  */
    true		/* non_exec_costs_exec.  */
  },
  /* MULT SImode */
  {
    {
      COSTS_N_INSNS (2),	/* simple.  */
      COSTS_N_INSNS (3),	/* flag_setting.  */
      COSTS_N_INSNS (2),	/* extend.  */
      COSTS_N_INSNS (2),	/* add.  */
      COSTS_N_INSNS (2),	/* extend_add.  */
      COSTS_N_INSNS (18)	/* idiv.  */
    },
    /* MULT DImode */
    {
      0,			/* simple (N/A).  */
      0,			/* flag_setting (N/A).  */
      COSTS_N_INSNS (3),	/* extend.  */
      0,			/* add (N/A).  */
      COSTS_N_INSNS (3),	/* extend_add.  */
      0				/* idiv (N/A).  */
    }
  },
  /* LD/ST */
  {
    COSTS_N_INSNS (3),	/* load.  */
    COSTS_N_INSNS (3),	/* load_sign_extend.  */
    COSTS_N_INSNS (3),	/* ldrd.  */
    COSTS_N_INSNS (4),	/* ldm_1st.  */
    1,			/* ldm_regs_per_insn_1st.  */
    2,			/* ldm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (4),	/* loadf.  */
    COSTS_N_INSNS (4),	/* loadd.  */
    0,			/* load_unaligned.  */
    0,			/* store.  */
    0,			/* strd.  */
    COSTS_N_INSNS (1),	/* stm_1st.  */
    1,			/* stm_regs_per_insn_1st.  */
    2,			/* stm_regs_per_insn_subsequent.  */
    0,			/* storef.  */
    0,			/* stored.  */
    0,			/* store_unaligned.  */
    COSTS_N_INSNS (1),	/* loadv.  */
    COSTS_N_INSNS (1)	/* storev.  */
  },
  {
    /* FP SFmode */
    {
      COSTS_N_INSNS (17),	/* div.  */
      COSTS_N_INSNS (4),	/* mult.  */
      COSTS_N_INSNS (8),	/* mult_addsub. */
      COSTS_N_INSNS (8),	/* fma.  */
      COSTS_N_INSNS (4),	/* addsub.  */
      COSTS_N_INSNS (2),	/* fpconst. */
      COSTS_N_INSNS (2),	/* neg.  */
      COSTS_N_INSNS (5),	/* compare.  */
      COSTS_N_INSNS (4),	/* widen.  */
      COSTS_N_INSNS (4),	/* narrow.  */
      COSTS_N_INSNS (4),	/* toint.  */
      COSTS_N_INSNS (4),	/* fromint.  */
      COSTS_N_INSNS (4)		/* roundint.  */
    },
    /* FP DFmode */
    {
      COSTS_N_INSNS (31),	/* div.  */
      COSTS_N_INSNS (4),	/* mult.  */
      COSTS_N_INSNS (8),	/* mult_addsub.  */
      COSTS_N_INSNS (8),	/* fma.  */
      COSTS_N_INSNS (4),	/* addsub.  */
      COSTS_N_INSNS (2),	/* fpconst.  */
      COSTS_N_INSNS (2),	/* neg.  */
      COSTS_N_INSNS (2),	/* compare.  */
      COSTS_N_INSNS (4),	/* widen.  */
      COSTS_N_INSNS (4),	/* narrow.  */
      COSTS_N_INSNS (4),	/* toint.  */
      COSTS_N_INSNS (4),	/* fromint.  */
      COSTS_N_INSNS (4)		/* roundint.  */
    }
  },
  /* Vector */
  {
    COSTS_N_INSNS (1)	/* alu.  */
  }
};

const struct cpu_cost_table v7m_extra_costs =
{
  /* ALU */
  {
    0,			/* arith.  */
    0,			/* logical.  */
    0,			/* shift.  */
    0,			/* shift_reg.  */
    0,			/* arith_shift.  */
    COSTS_N_INSNS (1),	/* arith_shift_reg.  */
    0,			/* log_shift.  */
    COSTS_N_INSNS (1),	/* log_shift_reg.  */
    0,			/* extend.  */
    COSTS_N_INSNS (1),	/* extend_arith.  */
    0,			/* bfi.  */
    0,			/* bfx.  */
    0,			/* clz.  */
    0,			/* rev.  */
    COSTS_N_INSNS (1),	/* non_exec.  */
    false		/* non_exec_costs_exec.  */
  },
  {
    /* MULT SImode */
    {
      COSTS_N_INSNS (1),	/* simple.  */
      COSTS_N_INSNS (1),	/* flag_setting.  */
      COSTS_N_INSNS (2),	/* extend.  */
      COSTS_N_INSNS (1),	/* add.  */
      COSTS_N_INSNS (3),	/* extend_add.  */
      COSTS_N_INSNS (8)		/* idiv.  */
    },
    /* MULT DImode */
    {
      0,			/* simple (N/A).  */
      0,			/* flag_setting (N/A).  */
      COSTS_N_INSNS (2),	/* extend.  */
      0,			/* add (N/A).  */
      COSTS_N_INSNS (3),	/* extend_add.  */
      0				/* idiv (N/A).  */
    }
  },
  /* LD/ST */
  {
    COSTS_N_INSNS (2),	/* load.  */
    0,			/* load_sign_extend.  */
    COSTS_N_INSNS (3),	/* ldrd.  */
    COSTS_N_INSNS (2),	/* ldm_1st.  */
    1,			/* ldm_regs_per_insn_1st.  */
    1,			/* ldm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (2),	/* loadf.  */
    COSTS_N_INSNS (3),	/* loadd.  */
    COSTS_N_INSNS (1),  /* load_unaligned.  */
    COSTS_N_INSNS (2),	/* store.  */
    COSTS_N_INSNS (3),	/* strd.  */
    COSTS_N_INSNS (2),	/* stm_1st.  */
    1,			/* stm_regs_per_insn_1st.  */
    1,			/* stm_regs_per_insn_subsequent.  */
    COSTS_N_INSNS (2),	/* storef.  */
    COSTS_N_INSNS (3),	/* stored.  */
    COSTS_N_INSNS (1),	/* store_unaligned.  */
    COSTS_N_INSNS (1),	/* loadv.  */
    COSTS_N_INSNS (1)	/* storev.  */
  },
  {
    /* FP SFmode */
    {
      COSTS_N_INSNS (7),	/* div.  */
      COSTS_N_INSNS (2),	/* mult.  */
      COSTS_N_INSNS (5),	/* mult_addsub.  */
      COSTS_N_INSNS (3),	/* fma.  */
      COSTS_N_INSNS (1),	/* addsub.  */
      0,			/* fpconst.  */
      0,			/* neg.  */
      0,			/* compare.  */
      0,			/* widen.  */
      0,			/* narrow.  */
      0,			/* toint.  */
      0,			/* fromint.  */
      0				/* roundint.  */
    },
    /* FP DFmode */
    {
      COSTS_N_INSNS (15),	/* div.  */
      COSTS_N_INSNS (5),	/* mult.  */
      COSTS_N_INSNS (7),	/* mult_addsub.  */
      COSTS_N_INSNS (7),	/* fma.  */
      COSTS_N_INSNS (3),	/* addsub.  */
      0,			/* fpconst.  */
      0,			/* neg.  */
      0,			/* compare.  */
      0,			/* widen.  */
      0,			/* narrow.  */
      0,			/* toint.  */
      0,			/* fromint.  */
      0				/* roundint.  */
    }
  },
  /* Vector */
  {
    COSTS_N_INSNS (1)	/* alu.  */
  }
};

const struct addr_mode_cost_table generic_addr_mode_costs =
{
  /* int.  */
  {
    COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
    COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
    COSTS_N_INSNS (0)	/* AMO_WB.  */
  },
  /* float.  */
  {
    COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
    COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
    COSTS_N_INSNS (0)	/* AMO_WB.  */
  },
  /* vector.  */
  {
    COSTS_N_INSNS (0),	/* AMO_DEFAULT.  */
    COSTS_N_INSNS (0),	/* AMO_NO_WB.  */
    COSTS_N_INSNS (0)	/* AMO_WB.  */
  }
};

const struct tune_params arm_slowmul_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  3,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_fastmul_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

/* StrongARM has early execution of branches, so a sequence that is worth
   skipping is shorter.  Set max_insns_skipped to a lower value.  */

const struct tune_params arm_strongarm_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  3,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_xscale_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  xscale_sched_adjust_cost,
  arm_default_branch_cost,
  &arm_default_vec_cost,
  2,						/* Constant limit.  */
  3,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_9e_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_marvell_pj4_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_v6t2_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};


/* Generic Cortex tuning.  Use more specific tunings if appropriate.  */
const struct tune_params arm_cortex_tune =
{
  &generic_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a8_tune =
{
  &cortexa8_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a7_tune =
{
  &cortexa7_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a15_tune =
{
  &cortexa15_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  2,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  3,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_TRUE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_ALL,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_FULL
};

const struct tune_params arm_cortex_a35_tune =
{
  &cortexa53_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a53_tune =
{
  &cortexa53_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a57_tune =
{
  &cortexa57_extra_costs,
  &generic_addr_mode_costs,		/* addressing mode costs */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  2,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  3,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_TRUE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_ALL,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
  tune_params::SCHED_AUTOPREF_FULL
};

const struct tune_params arm_exynosm1_tune =
{
  &exynosm1_extra_costs,
  &generic_addr_mode_costs,			/* Addressing mode costs.  */
  NULL,						/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  2,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  3,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_TRUE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,	/* ARM.  */
  tune_params::DISPARAGE_FLAGS_ALL,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_xgene1_tune =
{
  &xgene1_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  2,						/* Max cond insns.  */
  32,						/* Memset max inline.  */
  4,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_TRUE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_ALL,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

/* Branches can be dual-issued on Cortex-A5, so conditional execution is
   less appealing.  Set max_insns_skipped to a low value.  */

const struct tune_params arm_cortex_a5_tune =
{
  &cortexa5_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_cortex_a5_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  1,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a9_tune =
{
  &cortexa9_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  cortex_a9_sched_adjust_cost,
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_BENEFICIAL(4,32,32),
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a12_tune =
{
  &cortexa12_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,                        /* Vectorizer costs.  */
  1,						/* Constant limit.  */
  2,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_TRUE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_ALL,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_cortex_a73_tune =
{
  &cortexa57_extra_costs,
  &generic_addr_mode_costs,			/* Addressing mode costs.  */
  NULL,						/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,			/* Vectorizer costs.  */
  1,						/* Constant limit.  */
  2,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_TRUE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_ALL,
  tune_params::PREF_NEON_STRINGOPS_TRUE,
  FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
  tune_params::SCHED_AUTOPREF_FULL
};

/* armv7m tuning.  On Cortex-M4 cores for example, MOVW/MOVT take a single
   cycle to execute each.  An LDR from the constant pool also takes two cycles
   to execute, but mildly increases pipelining opportunity (consecutive
   loads/stores can be pipelined together, saving one cycle), and may also
   improve icache utilisation.  Hence we prefer the constant pool for such
   processors.  */

const struct tune_params arm_v7m_tune =
{
  &v7m_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_cortex_m_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  2,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

/* Cortex-M7 tuning.  */

const struct tune_params arm_cortex_m7_tune =
{
  &v7m_extra_costs,
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_cortex_m7_branch_cost,
  &arm_default_vec_cost,
  0,						/* Constant limit.  */
  1,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

/* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
   arm_v6t2_tune.  It is used for cortex-m0, cortex-m1, cortex-m0plus and
   cortex-m23.  */
const struct tune_params arm_v6m_tune =
{
  &generic_extra_costs,			/* Insn extra costs.  */
  &generic_addr_mode_costs,		/* Addressing mode costs.  */
  NULL,					/* Sched adj cost.  */
  arm_default_branch_cost,
  &arm_default_vec_cost,                        /* Vectorizer costs.  */
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  1,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_FALSE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

const struct tune_params arm_fa726te_tune =
{
  &generic_extra_costs,				/* Insn extra costs.  */
  &generic_addr_mode_costs,			/* Addressing mode costs.  */
  fa726te_sched_adjust_cost,
  arm_default_branch_cost,
  &arm_default_vec_cost,
  1,						/* Constant limit.  */
  5,						/* Max cond insns.  */
  8,						/* Memset max inline.  */
  2,						/* Issue rate.  */
  ARM_PREFETCH_NOT_BENEFICIAL,
  tune_params::PREF_CONST_POOL_TRUE,
  tune_params::PREF_LDRD_FALSE,
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* Thumb.  */
  tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE,		/* ARM.  */
  tune_params::DISPARAGE_FLAGS_NEITHER,
  tune_params::PREF_NEON_STRINGOPS_FALSE,
  tune_params::FUSE_NOTHING,
  tune_params::SCHED_AUTOPREF_OFF
};

/* Auto-generated CPU, FPU and architecture tables.  */
#include "arm-cpu-data.h"

/* The name of the preprocessor macro to define for this architecture.  PROFILE
   is replaced by the architecture name (eg. 8A) in arm_option_override () and
   is thus chosen to be big enough to hold the longest architecture name.  */

char arm_arch_name[] = "__ARM_ARCH_PROFILE__";

/* Supported TLS relocations.  */

enum tls_reloc {
  TLS_GD32,
  TLS_GD32_FDPIC,
  TLS_LDM32,
  TLS_LDM32_FDPIC,
  TLS_LDO32,
  TLS_IE32,
  TLS_IE32_FDPIC,
  TLS_LE32,
  TLS_DESCSEQ	/* GNU scheme */
};

/* The maximum number of insns to be used when loading a constant.  */
inline static int
arm_constant_limit (bool size_p)
{
  return size_p ? 1 : current_tune->constant_limit;
}

/* Emit an insn that's a simple single-set.  Both the operands must be known
   to be valid.  */
inline static rtx_insn *
emit_set_insn (rtx x, rtx y)
{
  return emit_insn (gen_rtx_SET (x, y));
}

/* Return the number of bits set in VALUE.  */
static unsigned
bit_count (unsigned long value)
{
  unsigned long count = 0;

  while (value)
    {
      count++;
      value &= value - 1;  /* Clear the least-significant set bit.  */
    }

  return count;
}

/* Return the number of bits set in BMAP.  */
static unsigned
bitmap_popcount (const sbitmap bmap)
{
  unsigned int count = 0;
  unsigned int n = 0;
  sbitmap_iterator sbi;

  EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
    count++;
  return count;
}

typedef struct
{
  machine_mode mode;
  const char *name;
} arm_fixed_mode_set;

/* A small helper for setting fixed-point library libfuncs.  */

static void
arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
			     const char *funcname, const char *modename,
			     int num_suffix)
{
  char buffer[50];

  if (num_suffix == 0)
    sprintf (buffer, "__gnu_%s%s", funcname, modename);
  else
    sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);

  set_optab_libfunc (optable, mode, buffer);
}

static void
arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
			    machine_mode from, const char *funcname,
			    const char *toname, const char *fromname)
{
  char buffer[50];
  const char *maybe_suffix_2 = "";

  /* Follow the logic for selecting a "2" suffix in fixed-bit.h.  */
  if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
      && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
      && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
    maybe_suffix_2 = "2";

  sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
	   maybe_suffix_2);

  set_conv_libfunc (optable, to, from, buffer);
}

static GTY(()) rtx speculation_barrier_libfunc;

/* Record that we have no arithmetic or comparison libfuncs for
   machine mode MODE.  */

static void
arm_block_arith_comp_libfuncs_for_mode (machine_mode mode)
{
  /* Arithmetic.  */
  set_optab_libfunc (add_optab, mode, NULL);
  set_optab_libfunc (sdiv_optab, mode, NULL);
  set_optab_libfunc (smul_optab, mode, NULL);
  set_optab_libfunc (neg_optab, mode, NULL);
  set_optab_libfunc (sub_optab, mode, NULL);

  /* Comparisons.  */
  set_optab_libfunc (eq_optab, mode, NULL);
  set_optab_libfunc (ne_optab, mode, NULL);
  set_optab_libfunc (lt_optab, mode, NULL);
  set_optab_libfunc (le_optab, mode, NULL);
  set_optab_libfunc (ge_optab, mode, NULL);
  set_optab_libfunc (gt_optab, mode, NULL);
  set_optab_libfunc (unord_optab, mode, NULL);
}

/* Set up library functions unique to ARM.  */
static void
arm_init_libfuncs (void)
{
  machine_mode mode_iter;

  /* For Linux, we have access to kernel support for atomic operations.  */
  if (arm_abi == ARM_ABI_AAPCS_LINUX)
    init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);

  /* There are no special library functions unless we are using the
     ARM BPABI.  */
  if (!TARGET_BPABI)
    return;

  /* The functions below are described in Section 4 of the "Run-Time
     ABI for the ARM architecture", Version 1.0.  */

  /* Double-precision floating-point arithmetic.  Table 2.  */
  set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
  set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
  set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
  set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
  set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");

  /* Double-precision comparisons.  Table 3.  */
  set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
  set_optab_libfunc (ne_optab, DFmode, NULL);
  set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
  set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
  set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
  set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
  set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");

  /* Single-precision floating-point arithmetic.  Table 4.  */
  set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
  set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
  set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
  set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
  set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");

  /* Single-precision comparisons.  Table 5.  */
  set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
  set_optab_libfunc (ne_optab, SFmode, NULL);
  set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
  set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
  set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
  set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
  set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");

  /* Floating-point to integer conversions.  Table 6.  */
  set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
  set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
  set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
  set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
  set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
  set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
  set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
  set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");

  /* Conversions between floating types.  Table 7.  */
  set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
  set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");

  /* Integer to floating-point conversions.  Table 8.  */
  set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
  set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
  set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
  set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
  set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
  set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
  set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
  set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");

  /* Long long.  Table 9.  */
  set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
  set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
  set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
  set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
  set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
  set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
  set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
  set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");

  /* Integer (32/32->32) division.  \S 4.3.1.  */
  set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
  set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");

  /* The divmod functions are designed so that they can be used for
     plain division, even though they return both the quotient and the
     remainder.  The quotient is returned in the usual location (i.e.,
     r0 for SImode, {r0, r1} for DImode), just as would be expected
     for an ordinary division routine.  Because the AAPCS calling
     conventions specify that all of { r0, r1, r2, r3 } are
     callee-saved registers, there is no need to tell the compiler
     explicitly that those registers are clobbered by these
     routines.  */
  set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
  set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");

  /* For SImode division the ABI provides div-without-mod routines,
     which are faster.  */
  set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
  set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");

  /* We don't have mod libcalls.  Fortunately gcc knows how to use the
     divmod libcalls instead.  */
  set_optab_libfunc (smod_optab, DImode, NULL);
  set_optab_libfunc (umod_optab, DImode, NULL);
  set_optab_libfunc (smod_optab, SImode, NULL);
  set_optab_libfunc (umod_optab, SImode, NULL);

  /* Half-precision float operations.  The compiler handles all operations
     with NULL libfuncs by converting the SFmode.  */
  switch (arm_fp16_format)
    {
    case ARM_FP16_FORMAT_IEEE:
    case ARM_FP16_FORMAT_ALTERNATIVE:

      /* Conversions.  */
      set_conv_libfunc (trunc_optab, HFmode, SFmode,
			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
			 ? "__gnu_f2h_ieee"
			 : "__gnu_f2h_alternative"));
      set_conv_libfunc (sext_optab, SFmode, HFmode,
			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
			 ? "__gnu_h2f_ieee"
			 : "__gnu_h2f_alternative"));

      set_conv_libfunc (trunc_optab, HFmode, DFmode,
			(arm_fp16_format == ARM_FP16_FORMAT_IEEE
			 ? "__gnu_d2h_ieee"
			 : "__gnu_d2h_alternative"));

      arm_block_arith_comp_libfuncs_for_mode (HFmode);
      break;

    default:
      break;
    }

  /* For all possible libcalls in BFmode, record NULL.  */
  FOR_EACH_MODE_IN_CLASS (mode_iter, MODE_FLOAT)
    {
      set_conv_libfunc (trunc_optab, BFmode, mode_iter, NULL);
      set_conv_libfunc (trunc_optab, mode_iter, BFmode, NULL);
      set_conv_libfunc (sext_optab, mode_iter, BFmode, NULL);
      set_conv_libfunc (sext_optab, BFmode, mode_iter, NULL);
    }
  arm_block_arith_comp_libfuncs_for_mode (BFmode);

  /* Use names prefixed with __gnu_ for fixed-point helper functions.  */
  {
    const arm_fixed_mode_set fixed_arith_modes[] =
      {
	{ E_QQmode, "qq" },
	{ E_UQQmode, "uqq" },
	{ E_HQmode, "hq" },
	{ E_UHQmode, "uhq" },
	{ E_SQmode, "sq" },
	{ E_USQmode, "usq" },
	{ E_DQmode, "dq" },
	{ E_UDQmode, "udq" },
	{ E_TQmode, "tq" },
	{ E_UTQmode, "utq" },
	{ E_HAmode, "ha" },
	{ E_UHAmode, "uha" },
	{ E_SAmode, "sa" },
	{ E_USAmode, "usa" },
	{ E_DAmode, "da" },
	{ E_UDAmode, "uda" },
	{ E_TAmode, "ta" },
	{ E_UTAmode, "uta" }
      };
    const arm_fixed_mode_set fixed_conv_modes[] =
      {
	{ E_QQmode, "qq" },
	{ E_UQQmode, "uqq" },
	{ E_HQmode, "hq" },
	{ E_UHQmode, "uhq" },
	{ E_SQmode, "sq" },
	{ E_USQmode, "usq" },
	{ E_DQmode, "dq" },
	{ E_UDQmode, "udq" },
	{ E_TQmode, "tq" },
	{ E_UTQmode, "utq" },
	{ E_HAmode, "ha" },
	{ E_UHAmode, "uha" },
	{ E_SAmode, "sa" },
	{ E_USAmode, "usa" },
	{ E_DAmode, "da" },
	{ E_UDAmode, "uda" },
	{ E_TAmode, "ta" },
	{ E_UTAmode, "uta" },
	{ E_QImode, "qi" },
	{ E_HImode, "hi" },
	{ E_SImode, "si" },
	{ E_DImode, "di" },
	{ E_TImode, "ti" },
	{ E_SFmode, "sf" },
	{ E_DFmode, "df" }
      };
    unsigned int i, j;

    for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
      {
	arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
				     "add", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
				     "ssadd", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
				     "usadd", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
				     "sub", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
				     "sssub", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
				     "ussub", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
				     "mul", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
				     "ssmul", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
				     "usmul", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
				     "div", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
				     "udiv", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
				     "ssdiv", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
				     "usdiv", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
				     "neg", fixed_arith_modes[i].name, 2);
	arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
				     "ssneg", fixed_arith_modes[i].name, 2);
	arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
				     "usneg", fixed_arith_modes[i].name, 2);
	arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
				     "ashl", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
				     "ashr", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
				     "lshr", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
				     "ssashl", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
				     "usashl", fixed_arith_modes[i].name, 3);
	arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
				     "cmp", fixed_arith_modes[i].name, 2);
      }

    for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
      for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
	{
	  if (i == j
	      || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
		  && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
	    continue;

	  arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
				      fixed_conv_modes[j].mode, "fract",
				      fixed_conv_modes[i].name,
				      fixed_conv_modes[j].name);
	  arm_set_fixed_conv_libfunc (satfract_optab,
				      fixed_conv_modes[i].mode,
				      fixed_conv_modes[j].mode, "satfract",
				      fixed_conv_modes[i].name,
				      fixed_conv_modes[j].name);
	  arm_set_fixed_conv_libfunc (fractuns_optab,
				      fixed_conv_modes[i].mode,
				      fixed_conv_modes[j].mode, "fractuns",
				      fixed_conv_modes[i].name,
				      fixed_conv_modes[j].name);
	  arm_set_fixed_conv_libfunc (satfractuns_optab,
				      fixed_conv_modes[i].mode,
				      fixed_conv_modes[j].mode, "satfractuns",
				      fixed_conv_modes[i].name,
				      fixed_conv_modes[j].name);
	}
  }

  if (TARGET_AAPCS_BASED)
    synchronize_libfunc = init_one_libfunc ("__sync_synchronize");

  speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
}

/* On AAPCS systems, this is the "struct __va_list".  */
static GTY(()) tree va_list_type;

/* Return the type to use as __builtin_va_list.  */
static tree
arm_build_builtin_va_list (void)
{
  tree va_list_name;
  tree ap_field;

  if (!TARGET_AAPCS_BASED)
    return std_build_builtin_va_list ();

  /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
     defined as:

       struct __va_list
       {
	 void *__ap;
       };

     The C Library ABI further reinforces this definition in \S
     4.1.

     We must follow this definition exactly.  The structure tag
     name is visible in C++ mangled names, and thus forms a part
     of the ABI.  The field name may be used by people who
     #include <stdarg.h>.  */
  /* Create the type.  */
  va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
  /* Give it the required name.  */
  va_list_name = build_decl (BUILTINS_LOCATION,
			     TYPE_DECL,
			     get_identifier ("__va_list"),
			     va_list_type);
  DECL_ARTIFICIAL (va_list_name) = 1;
  TYPE_NAME (va_list_type) = va_list_name;
  TYPE_STUB_DECL (va_list_type) = va_list_name;
  /* Create the __ap field.  */
  ap_field = build_decl (BUILTINS_LOCATION,
			 FIELD_DECL,
			 get_identifier ("__ap"),
			 ptr_type_node);
  DECL_ARTIFICIAL (ap_field) = 1;
  DECL_FIELD_CONTEXT (ap_field) = va_list_type;
  TYPE_FIELDS (va_list_type) = ap_field;
  /* Compute its layout.  */
  layout_type (va_list_type);

  return va_list_type;
}

/* Return an expression of type "void *" pointing to the next
   available argument in a variable-argument list.  VALIST is the
   user-level va_list object, of type __builtin_va_list.  */
static tree
arm_extract_valist_ptr (tree valist)
{
  if (TREE_TYPE (valist) == error_mark_node)
    return error_mark_node;

  /* On an AAPCS target, the pointer is stored within "struct
     va_list".  */
  if (TARGET_AAPCS_BASED)
    {
      tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
      valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
		       valist, ap_field, NULL_TREE);
    }

  return valist;
}

/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
static void
arm_expand_builtin_va_start (tree valist, rtx nextarg)
{
  valist = arm_extract_valist_ptr (valist);
  std_expand_builtin_va_start (valist, nextarg);
}

/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
static tree
arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
			  gimple_seq *post_p)
{
  valist = arm_extract_valist_ptr (valist);
  return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
}

/* Check any incompatible options that the user has specified.  */
static void
arm_option_check_internal (struct gcc_options *opts)
{
  int flags = opts->x_target_flags;

  /* iWMMXt and NEON are incompatible.  */
  if (TARGET_IWMMXT
      && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
    error ("iWMMXt and NEON are incompatible");

  /* Make sure that the processor choice does not conflict with any of the
     other command line choices.  */
  if (TARGET_ARM_P (flags)
      && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
    error ("target CPU does not support ARM mode");

  /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet.  */
  if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
    warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");

  if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
    warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");

  /* If this target is normally configured to use APCS frames, warn if they
     are turned off and debugging is turned on.  */
  if (TARGET_ARM_P (flags)
      && write_symbols != NO_DEBUG
      && !TARGET_APCS_FRAME
      && (TARGET_DEFAULT & MASK_APCS_FRAME))
    warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
	     "debugging");

  /* iWMMXt unsupported under Thumb mode.  */
  if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
    error ("iWMMXt unsupported under Thumb mode");

  if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
    error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");

  if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
    {
      error ("RTP PIC is incompatible with Thumb");
      flag_pic = 0;
    }

  if (target_pure_code || target_slow_flash_data)
    {
      const char *flag = (target_pure_code ? "-mpure-code" :
					     "-mslow-flash-data");
      bool common_unsupported_modes = arm_arch_notm || flag_pic || TARGET_NEON;

      /* We only support -mslow-flash-data on M-profile targets with
	 MOVT.  */
      if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
	error ("%s only supports non-pic code on M-profile targets with the "
	       "MOVT instruction", flag);

      /* We only support -mpure-code on M-profile targets.  */
      if (target_pure_code && common_unsupported_modes)
	error ("%s only supports non-pic code on M-profile targets", flag);

      /* Cannot load addresses: -mslow-flash-data forbids literal pool and
	 -mword-relocations forbids relocation of MOVT/MOVW.  */
      if (target_word_relocations)
	error ("%s incompatible with %<-mword-relocations%>", flag);
    }
}

/* Recompute the global settings depending on target attribute options.  */

static void
arm_option_params_internal (void)
{
  /* If we are not using the default (ARM mode) section anchor offset
     ranges, then set the correct ranges now.  */
  if (TARGET_THUMB1)
    {
      /* Thumb-1 LDR instructions cannot have negative offsets.
         Permissible positive offset ranges are 5-bit (for byte loads),
         6-bit (for halfword loads), or 7-bit (for word loads).
         Empirical results suggest a 7-bit anchor range gives the best
         overall code size.  */
      targetm.min_anchor_offset = 0;
      targetm.max_anchor_offset = 127;
    }
  else if (TARGET_THUMB2)
    {
      /* The minimum is set such that the total size of the block
         for a particular anchor is 248 + 1 + 4095 bytes, which is
         divisible by eight, ensuring natural spacing of anchors.  */
      targetm.min_anchor_offset = -248;
      targetm.max_anchor_offset = 4095;
    }
  else
    {
      targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
      targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
    }

  /* Increase the number of conditional instructions with -Os.  */
  max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;

  /* For THUMB2, we limit the conditional sequence to one IT block.  */
  if (TARGET_THUMB2)
    max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);

  if (TARGET_THUMB1)
    targetm.md_asm_adjust = thumb1_md_asm_adjust;
  else
    targetm.md_asm_adjust = arm_md_asm_adjust;
}

/* True if -mflip-thumb should next add an attribute for the default
   mode, false if it should next add an attribute for the opposite mode.  */
static GTY(()) bool thumb_flipper;

/* Options after initial target override.  */
static GTY(()) tree init_optimize;

static void
arm_override_options_after_change_1 (struct gcc_options *opts)
{
  /* -falign-functions without argument: supply one.  */
  if (opts->x_flag_align_functions && !opts->x_str_align_functions)
    opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
      && opts->x_optimize_size ? "2" : "4";
}

/* Implement targetm.override_options_after_change.  */

static void
arm_override_options_after_change (void)
{
  arm_configure_build_target (&arm_active_target,
			      TREE_TARGET_OPTION (target_option_default_node),
			      &global_options_set, false);

  arm_override_options_after_change_1 (&global_options);
}

/* Implement TARGET_OPTION_SAVE.  */
static void
arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
{
  ptr->x_arm_arch_string = opts->x_arm_arch_string;
  ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
  ptr->x_arm_tune_string = opts->x_arm_tune_string;
}

/* Implement TARGET_OPTION_RESTORE.  */
static void
arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
{
  opts->x_arm_arch_string = ptr->x_arm_arch_string;
  opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
  opts->x_arm_tune_string = ptr->x_arm_tune_string;
  arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
			      false);
}

/* Reset options between modes that the user has specified.  */
static void
arm_option_override_internal (struct gcc_options *opts,
			      struct gcc_options *opts_set)
{
  arm_override_options_after_change_1 (opts);

  if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
    {
      /* The default is to enable interworking, so this warning message would
	 be confusing to users who have just compiled with
	 eg, -march=armv4.  */
      /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
      opts->x_target_flags &= ~MASK_INTERWORK;
    }

  if (TARGET_THUMB_P (opts->x_target_flags)
      && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
    {
      warning (0, "target CPU does not support THUMB instructions");
      opts->x_target_flags &= ~MASK_THUMB;
    }

  if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
    {
      /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
      opts->x_target_flags &= ~MASK_APCS_FRAME;
    }

  /* Callee super interworking implies thumb interworking.  Adding
     this to the flags here simplifies the logic elsewhere.  */
  if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
    opts->x_target_flags |= MASK_INTERWORK;

  /* need to remember initial values so combinaisons of options like
     -mflip-thumb -mthumb -fno-schedule-insns work for any attribute.  */
  cl_optimization *to = TREE_OPTIMIZATION (init_optimize);

  if (! opts_set->x_arm_restrict_it)
    opts->x_arm_restrict_it = arm_arch8;

  /* ARM execution state and M profile don't have [restrict] IT.  */
  if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
    opts->x_arm_restrict_it = 0;

  /* Use the IT size from CPU specific tuning unless -mrestrict-it is used.  */
  if (!opts_set->x_arm_restrict_it
      && (opts_set->x_arm_cpu_string || opts_set->x_arm_tune_string))
    opts->x_arm_restrict_it = 0;

  /* Enable -munaligned-access by default for
     - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
     i.e. Thumb2 and ARM state only.
     - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
     - ARMv8 architecture-base processors.

     Disable -munaligned-access by default for
     - all pre-ARMv6 architecture-based processors
     - ARMv6-M architecture-based processors
     - ARMv8-M Baseline processors.  */

  if (! opts_set->x_unaligned_access)
    {
      opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
			  && arm_arch6 && (arm_arch_notm || arm_arch7));
    }
  else if (opts->x_unaligned_access == 1
	   && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
    {
      warning (0, "target CPU does not support unaligned accesses");
     opts->x_unaligned_access = 0;
    }

  /* Don't warn since it's on by default in -O2.  */
  if (TARGET_THUMB1_P (opts->x_target_flags))
    opts->x_flag_schedule_insns = 0;
  else
    opts->x_flag_schedule_insns = to->x_flag_schedule_insns;

  /* Disable shrink-wrap when optimizing function for size, since it tends to
     generate additional returns.  */
  if (optimize_function_for_size_p (cfun)
      && TARGET_THUMB2_P (opts->x_target_flags))
    opts->x_flag_shrink_wrap = false;
  else
    opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;

  /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
     - epilogue_insns - does not accurately model the corresponding insns
     emitted in the asm file.  In particular, see the comment in thumb_exit
     'Find out how many of the (return) argument registers we can corrupt'.
     As a consequence, the epilogue may clobber registers without fipa-ra
     finding out about it.  Therefore, disable fipa-ra in Thumb1 mode.
     TODO: Accurately model clobbers for epilogue_insns and reenable
     fipa-ra.  */
  if (TARGET_THUMB1_P (opts->x_target_flags))
    opts->x_flag_ipa_ra = 0;
  else
    opts->x_flag_ipa_ra = to->x_flag_ipa_ra;

  /* Thumb2 inline assembly code should always use unified syntax.
     This will apply to ARM and Thumb1 eventually.  */
  if (TARGET_THUMB2_P (opts->x_target_flags))
    opts->x_inline_asm_unified = true;

#ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
  SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
#endif
}

static sbitmap isa_all_fpubits_internal;
static sbitmap isa_all_fpbits;
static sbitmap isa_quirkbits;

/* Configure a build target TARGET from the user-specified options OPTS and
   OPTS_SET.  If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
   architecture have been specified, but the two are not identical.  */
void
arm_configure_build_target (struct arm_build_target *target,
			    struct cl_target_option *opts,
			    struct gcc_options *opts_set,
			    bool warn_compatible)
{
  const cpu_option *arm_selected_tune = NULL;
  const arch_option *arm_selected_arch = NULL;
  const cpu_option *arm_selected_cpu = NULL;
  const arm_fpu_desc *arm_selected_fpu = NULL;
  const char *tune_opts = NULL;
  const char *arch_opts = NULL;
  const char *cpu_opts = NULL;

  bitmap_clear (target->isa);
  target->core_name = NULL;
  target->arch_name = NULL;

  if (opts_set->x_arm_arch_string)
    {
      arm_selected_arch = arm_parse_arch_option_name (all_architectures,
						      "-march",
						      opts->x_arm_arch_string);
      arch_opts = strchr (opts->x_arm_arch_string, '+');
    }

  if (opts_set->x_arm_cpu_string)
    {
      arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
						    opts->x_arm_cpu_string);
      cpu_opts = strchr (opts->x_arm_cpu_string, '+');
      arm_selected_tune = arm_selected_cpu;
      /* If taking the tuning from -mcpu, we don't need to rescan the
	 options for tuning.  */
    }

  if (opts_set->x_arm_tune_string)
    {
      arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
						     opts->x_arm_tune_string);
      tune_opts = strchr (opts->x_arm_tune_string, '+');
    }

  if (arm_selected_arch)
    {
      arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
      arm_parse_option_features (target->isa, &arm_selected_arch->common,
				 arch_opts);

      if (arm_selected_cpu)
	{
	  auto_sbitmap cpu_isa (isa_num_bits);
	  auto_sbitmap isa_delta (isa_num_bits);

	  arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
	  arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
				     cpu_opts);
	  bitmap_xor (isa_delta, cpu_isa, target->isa);
	  /* Ignore any bits that are quirk bits.  */
	  bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
	  /* Ignore (for now) any bits that might be set by -mfpu.  */
	  bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits_internal);

	  /* And if the target ISA lacks floating point, ignore any
	     extensions that depend on that.  */
	  if (!bitmap_bit_p (target->isa, isa_bit_vfpv2))
	    bitmap_and_compl (isa_delta, isa_delta, isa_all_fpbits);

	  if (!bitmap_empty_p (isa_delta))
	    {
	      if (warn_compatible)
		warning (0, "switch %<-mcpu=%s%> conflicts "
			 "with %<-march=%s%> switch",
			 arm_selected_cpu->common.name,
			 arm_selected_arch->common.name);
	      /* -march wins for code generation.
		 -mcpu wins for default tuning.  */
	      if (!arm_selected_tune)
		arm_selected_tune = arm_selected_cpu;

	      arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
	      target->arch_name = arm_selected_arch->common.name;
	    }
	  else
	    {
	      /* Architecture and CPU are essentially the same.
		 Prefer the CPU setting.  */
	      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
	      target->core_name = arm_selected_cpu->common.name;
	      /* Copy the CPU's capabilities, so that we inherit the
		 appropriate extensions and quirks.  */
	      bitmap_copy (target->isa, cpu_isa);
	    }
	}
      else
	{
	  /* Pick a CPU based on the architecture.  */
	  arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
	  target->arch_name = arm_selected_arch->common.name;
	  /* Note: target->core_name is left unset in this path.  */
	}
    }
  else if (arm_selected_cpu)
    {
      target->core_name = arm_selected_cpu->common.name;
      arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
      arm_parse_option_features (target->isa, &arm_selected_cpu->common,
				 cpu_opts);
      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
    }
  /* If the user did not specify a processor or architecture, choose
     one for them.  */
  else
    {
      const cpu_option *sel;
      auto_sbitmap sought_isa (isa_num_bits);
      bitmap_clear (sought_isa);
      auto_sbitmap default_isa (isa_num_bits);

      arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
						    TARGET_CPU_DEFAULT);
      cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
      gcc_assert (arm_selected_cpu->common.name);

      /* RWE: All of the selection logic below (to the end of this
	 'if' clause) looks somewhat suspect.  It appears to be mostly
	 there to support forcing thumb support when the default CPU
	 does not have thumb (somewhat dubious in terms of what the
	 user might be expecting).  I think it should be removed once
	 support for the pre-thumb era cores is removed.  */
      sel = arm_selected_cpu;
      arm_initialize_isa (default_isa, sel->common.isa_bits);
      arm_parse_option_features (default_isa, &arm_selected_cpu->common,
				 cpu_opts);

      /* Now check to see if the user has specified any command line
	 switches that require certain abilities from the cpu.  */

      if (TARGET_INTERWORK || TARGET_THUMB)
	bitmap_set_bit (sought_isa, isa_bit_thumb);

      /* If there are such requirements and the default CPU does not
	 satisfy them, we need to run over the complete list of
	 cores looking for one that is satisfactory.  */
      if (!bitmap_empty_p (sought_isa)
	  && !bitmap_subset_p (sought_isa, default_isa))
	{
	  auto_sbitmap candidate_isa (isa_num_bits);
	  /* We're only interested in a CPU with at least the
	     capabilities of the default CPU and the required
	     additional features.  */
	  bitmap_ior (default_isa, default_isa, sought_isa);

	  /* Try to locate a CPU type that supports all of the abilities
	     of the default CPU, plus the extra abilities requested by
	     the user.  */
	  for (sel = all_cores; sel->common.name != NULL; sel++)
	    {
	      arm_initialize_isa (candidate_isa, sel->common.isa_bits);
	      /* An exact match?  */
	      if (bitmap_equal_p (default_isa, candidate_isa))
		break;
	    }

	  if (sel->common.name == NULL)
	    {
	      unsigned current_bit_count = isa_num_bits;
	      const cpu_option *best_fit = NULL;

	      /* Ideally we would like to issue an error message here
		 saying that it was not possible to find a CPU compatible
		 with the default CPU, but which also supports the command
		 line options specified by the programmer, and so they
		 ought to use the -mcpu=<name> command line option to
		 override the default CPU type.

		 If we cannot find a CPU that has exactly the
		 characteristics of the default CPU and the given
		 command line options we scan the array again looking
		 for a best match.  The best match must have at least
		 the capabilities of the perfect match.  */
	      for (sel = all_cores; sel->common.name != NULL; sel++)
		{
		  arm_initialize_isa (candidate_isa, sel->common.isa_bits);

		  if (bitmap_subset_p (default_isa, candidate_isa))
		    {
		      unsigned count;

		      bitmap_and_compl (candidate_isa, candidate_isa,
					default_isa);
		      count = bitmap_popcount (candidate_isa);

		      if (count < current_bit_count)
			{
			  best_fit = sel;
			  current_bit_count = count;
			}
		    }

		  gcc_assert (best_fit);
		  sel = best_fit;
		}
	    }
	  arm_selected_cpu = sel;
	}

      /* Now we know the CPU, we can finally initialize the target
	 structure.  */
      target->core_name = arm_selected_cpu->common.name;
      arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
      arm_parse_option_features (target->isa, &arm_selected_cpu->common,
				 cpu_opts);
      arm_selected_arch = all_architectures + arm_selected_cpu->arch;
    }

  gcc_assert (arm_selected_cpu);
  gcc_assert (arm_selected_arch);

  if (opts->x_arm_fpu_index != TARGET_FPU_auto)
    {
      arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
      auto_sbitmap fpu_bits (isa_num_bits);

      arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
      bitmap_and_compl (target->isa, target->isa, isa_all_fpubits_internal);
      bitmap_ior (target->isa, target->isa, fpu_bits);
    }

  if (!arm_selected_tune)
    arm_selected_tune = arm_selected_cpu;
  else /* Validate the features passed to -mtune.  */
    arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);

  const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];

  /* Finish initializing the target structure.  */
  target->arch_pp_name = arm_selected_arch->arch;
  target->base_arch = arm_selected_arch->base_arch;
  target->profile = arm_selected_arch->profile;

  target->tune_flags = tune_data->tune_flags;
  target->tune = tune_data->tune;
  target->tune_core = tune_data->scheduler;
  arm_option_reconfigure_globals ();
}

/* Fix up any incompatible options that the user has specified.  */
static void
arm_option_override (void)
{
  static const enum isa_feature fpu_bitlist_internal[]
    = { ISA_ALL_FPU_INTERNAL, isa_nobit };
  static const enum isa_feature fp_bitlist[]
    = { ISA_ALL_FP, isa_nobit };
  static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
  cl_target_option opts;

  isa_quirkbits = sbitmap_alloc (isa_num_bits);
  arm_initialize_isa (isa_quirkbits, quirk_bitlist);

  isa_all_fpubits_internal = sbitmap_alloc (isa_num_bits);
  isa_all_fpbits = sbitmap_alloc (isa_num_bits);
  arm_initialize_isa (isa_all_fpubits_internal, fpu_bitlist_internal);
  arm_initialize_isa (isa_all_fpbits, fp_bitlist);

  arm_active_target.isa = sbitmap_alloc (isa_num_bits);

  if (!global_options_set.x_arm_fpu_index)
    {
      bool ok;
      int fpu_index;

      ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
				  CL_TARGET);
      gcc_assert (ok);
      arm_fpu_index = (enum fpu_type) fpu_index;
    }

  cl_target_option_save (&opts, &global_options);
  arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
			      true);

#ifdef SUBTARGET_OVERRIDE_OPTIONS
  SUBTARGET_OVERRIDE_OPTIONS;
#endif

  /* Initialize boolean versions of the architectural flags, for use
     in the arm.md file and for enabling feature flags.  */
  arm_option_reconfigure_globals ();

  arm_tune = arm_active_target.tune_core;
  tune_flags = arm_active_target.tune_flags;
  current_tune = arm_active_target.tune;

  /* TBD: Dwarf info for apcs frame is not handled yet.  */
  if (TARGET_APCS_FRAME)
    flag_shrink_wrap = false;

  if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
    {
      warning (0, "%<-mapcs-stack-check%> incompatible with "
	       "%<-mno-apcs-frame%>");
      target_flags |= MASK_APCS_FRAME;
    }

  if (TARGET_POKE_FUNCTION_NAME)
    target_flags |= MASK_APCS_FRAME;

  if (TARGET_APCS_REENT && flag_pic)
    error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");

  if (TARGET_APCS_REENT)
    warning (0, "APCS reentrant code not supported.  Ignored");

  /* Set up some tuning parameters.  */
  arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
  arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
  arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
  arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
  arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
  arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;

  /* For arm2/3 there is no need to do any scheduling if we are doing
     software floating-point.  */
  if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
    flag_schedule_insns = flag_schedule_insns_after_reload = 0;

  /* Override the default structure alignment for AAPCS ABI.  */
  if (!global_options_set.x_arm_structure_size_boundary)
    {
      if (TARGET_AAPCS_BASED)
	arm_structure_size_boundary = 8;
    }
  else
    {
      warning (0, "option %<-mstructure-size-boundary%> is deprecated");

      if (arm_structure_size_boundary != 8
	  && arm_structure_size_boundary != 32
	  && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
	{
	  if (ARM_DOUBLEWORD_ALIGN)
	    warning (0,
		     "structure size boundary can only be set to 8, 32 or 64");
	  else
	    warning (0, "structure size boundary can only be set to 8 or 32");
	  arm_structure_size_boundary
	    = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
	}
    }

  if (TARGET_VXWORKS_RTP)
    {
      if (!global_options_set.x_arm_pic_data_is_text_relative)
	arm_pic_data_is_text_relative = 0;
    }
  else if (flag_pic
	   && !arm_pic_data_is_text_relative
	   && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
    /* When text & data segments don't have a fixed displacement, the
       intended use is with a single, read only, pic base register.
       Unless the user explicitly requested not to do that, set
       it.  */
    target_flags |= MASK_SINGLE_PIC_BASE;

  /* If stack checking is disabled, we can use r10 as the PIC register,
     which keeps r9 available.  The EABI specifies r9 as the PIC register.  */
  if (flag_pic && TARGET_SINGLE_PIC_BASE)
    {
      if (TARGET_VXWORKS_RTP)
	warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
      arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
    }

  if (flag_pic && TARGET_VXWORKS_RTP)
    arm_pic_register = 9;

  /* If in FDPIC mode then force arm_pic_register to be r9.  */
  if (TARGET_FDPIC)
    {
      arm_pic_register = FDPIC_REGNUM;
      if (TARGET_THUMB1)
	sorry ("FDPIC mode is not supported in Thumb-1 mode");
    }

  if (arm_pic_register_string != NULL)
    {
      int pic_register = decode_reg_name (arm_pic_register_string);

      if (!flag_pic)
	warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");

      /* Prevent the user from choosing an obviously stupid PIC register.  */
      else if (pic_register < 0 || call_used_or_fixed_reg_p (pic_register)
	       || pic_register == HARD_FRAME_POINTER_REGNUM
	       || pic_register == STACK_POINTER_REGNUM
	       || pic_register >= PC_REGNUM
	       || (TARGET_VXWORKS_RTP
		   && (unsigned int) pic_register != arm_pic_register))
	error ("unable to use %qs for PIC register", arm_pic_register_string);
      else
	arm_pic_register = pic_register;
    }

  if (flag_pic)
    target_word_relocations = 1;

  /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores.  */
  if (fix_cm3_ldrd == 2)
    {
      if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
	fix_cm3_ldrd = 1;
      else
	fix_cm3_ldrd = 0;
    }

  /* Hot/Cold partitioning is not currently supported, since we can't
     handle literal pool placement in that case.  */
  if (flag_reorder_blocks_and_partition)
    {
      inform (input_location,
	      "%<-freorder-blocks-and-partition%> not supported "
	      "on this architecture");
      flag_reorder_blocks_and_partition = 0;
      flag_reorder_blocks = 1;
    }

  if (flag_pic)
    /* Hoisting PIC address calculations more aggressively provides a small,
       but measurable, size reduction for PIC code.  Therefore, we decrease
       the bar for unrestricted expression hoisting to the cost of PIC address
       calculation, which is 2 instructions.  */
    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
			 param_gcse_unrestricted_cost, 2);

  /* ARM EABI defaults to strict volatile bitfields.  */
  if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
      && abi_version_at_least(2))
    flag_strict_volatile_bitfields = 1;

  /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
     have deemed it beneficial (signified by setting
     prefetch.num_slots to 1 or more).  */
  if (flag_prefetch_loop_arrays < 0
      && HAVE_prefetch
      && optimize >= 3
      && current_tune->prefetch.num_slots > 0)
    flag_prefetch_loop_arrays = 1;

  /* Set up parameters to be used in prefetching algorithm.  Do not
     override the defaults unless we are tuning for a core we have
     researched values for.  */
  if (current_tune->prefetch.num_slots > 0)
    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
			 param_simultaneous_prefetches,
			 current_tune->prefetch.num_slots);
  if (current_tune->prefetch.l1_cache_line_size >= 0)
    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
			 param_l1_cache_line_size,
			 current_tune->prefetch.l1_cache_line_size);
  if (current_tune->prefetch.l1_cache_size >= 0)
    SET_OPTION_IF_UNSET (&global_options, &global_options_set,
			 param_l1_cache_size,
			 current_tune->prefetch.l1_cache_size);

  /* Look through ready list and all of queue for instructions
     relevant for L2 auto-prefetcher.  */
  int sched_autopref_queue_depth;

  switch (current_tune->sched_autopref)
    {
    case tune_params::SCHED_AUTOPREF_OFF:
      sched_autopref_queue_depth = -1;
      break;

    case tune_params::SCHED_AUTOPREF_RANK:
      sched_autopref_queue_depth = 0;
      break;

    case tune_params::SCHED_AUTOPREF_FULL:
      sched_autopref_queue_depth = max_insn_queue_index + 1;
      break;

    default:
      gcc_unreachable ();
    }

  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
		       param_sched_autopref_queue_depth,
		       sched_autopref_queue_depth);

  /* Currently, for slow flash data, we just disable literal pools.  We also
     disable it for pure-code.  */
  if (target_slow_flash_data || target_pure_code)
    arm_disable_literal_pool = true;

  /* Disable scheduling fusion by default if it's not armv7 processor
     or doesn't prefer ldrd/strd.  */
  if (flag_schedule_fusion == 2
      && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
    flag_schedule_fusion = 0;

  /* Need to remember initial options before they are overriden.  */
  init_optimize = build_optimization_node (&global_options);

  arm_options_perform_arch_sanity_checks ();
  arm_option_override_internal (&global_options, &global_options_set);
  arm_option_check_internal (&global_options);
  arm_option_params_internal ();

  /* Create the default target_options structure.  */
  target_option_default_node = target_option_current_node
    = build_target_option_node (&global_options);

  /* Register global variables with the garbage collector.  */
  arm_add_gc_roots ();

  /* Init initial mode for testing.  */
  thumb_flipper = TARGET_THUMB;
}


/* Reconfigure global status flags from the active_target.isa.  */
void
arm_option_reconfigure_globals (void)
{
  sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
  arm_base_arch = arm_active_target.base_arch;

  /* Initialize boolean versions of the architectural flags, for use
     in the arm.md file.  */
  arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
  arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
  arm_arch5t =  bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
  arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
  arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
  arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
  arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
  arm_arch6m = arm_arch6 && !arm_arch_notm;
  arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
  arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
  arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
  arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
  arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
  arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
  arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
  arm_arch8_1m_main = bitmap_bit_p (arm_active_target.isa,
				    isa_bit_armv8_1m_main);
  arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
  arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
  arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
  arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
  arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
  arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
  arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
  arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
  arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
  arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
  arm_arch_i8mm = bitmap_bit_p (arm_active_target.isa, isa_bit_i8mm);
  arm_arch_bf16 = bitmap_bit_p (arm_active_target.isa, isa_bit_bf16);

  arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
  if (arm_fp16_inst)
    {
      if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
	error ("selected fp16 options are incompatible");
      arm_fp16_format = ARM_FP16_FORMAT_IEEE;
    }

  /* And finally, set up some quirks.  */
  arm_arch_no_volatile_ce
    = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
  arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
					    isa_bit_quirk_armv6kz);

  /* Use the cp15 method if it is available.  */
  if (target_thread_pointer == TP_AUTO)
    {
      if (arm_arch6k && !TARGET_THUMB1)
	target_thread_pointer = TP_CP15;
      else
	target_thread_pointer = TP_SOFT;
    }
}

/* Perform some validation between the desired architecture and the rest of the
   options.  */
void
arm_options_perform_arch_sanity_checks (void)
{
  /* V5T code we generate is completely interworking capable, so we turn off
     TARGET_INTERWORK here to avoid many tests later on.  */

  /* XXX However, we must pass the right pre-processor defines to CPP
     or GLD can get confused.  This is a hack.  */
  if (TARGET_INTERWORK)
    arm_cpp_interwork = 1;

  if (arm_arch5t)
    target_flags &= ~MASK_INTERWORK;

  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");

  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
    error ("iwmmxt abi requires an iwmmxt capable cpu");

  /* BPABI targets use linker tricks to allow interworking on cores
     without thumb support.  */
  if (TARGET_INTERWORK
      && !TARGET_BPABI
      && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
    {
      warning (0, "target CPU does not support interworking" );
      target_flags &= ~MASK_INTERWORK;
    }

  /* If soft-float is specified then don't use FPU.  */
  if (TARGET_SOFT_FLOAT)
    arm_fpu_attr = FPU_NONE;
  else
    arm_fpu_attr = FPU_VFP;

  if (TARGET_AAPCS_BASED)
    {
      if (TARGET_CALLER_INTERWORKING)
	error ("AAPCS does not support %<-mcaller-super-interworking%>");
      else
	if (TARGET_CALLEE_INTERWORKING)
	  error ("AAPCS does not support %<-mcallee-super-interworking%>");
    }

  /* __fp16 support currently assumes the core has ldrh.  */
  if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
    sorry ("__fp16 and no ldrh");

  if (use_cmse && !arm_arch_cmse)
    error ("target CPU does not support ARMv8-M Security Extensions");

  /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
     and ARMv8-M Baseline and Mainline do not allow such configuration.  */
  if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
    error ("ARMv8-M Security Extensions incompatible with selected FPU");


  if (TARGET_AAPCS_BASED)
    {
      if (arm_abi == ARM_ABI_IWMMXT)
	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
      else if (TARGET_HARD_FLOAT_ABI)
	{
	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
	    error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
	}
      else
	arm_pcs_default = ARM_PCS_AAPCS;
    }
  else
    {
      if (arm_float_abi == ARM_FLOAT_ABI_HARD)
	sorry ("%<-mfloat-abi=hard%> and VFP");

      if (arm_abi == ARM_ABI_APCS)
	arm_pcs_default = ARM_PCS_APCS;
      else
	arm_pcs_default = ARM_PCS_ATPCS;
    }
}

/* Test whether a local function descriptor is canonical, i.e.,
   whether we can use GOTOFFFUNCDESC to compute the address of the
   function.  */
static bool
arm_fdpic_local_funcdesc_p (rtx fnx)
{
  tree fn;
  enum symbol_visibility vis;
  bool ret;

  if (!TARGET_FDPIC)
    return true;

  if (! SYMBOL_REF_LOCAL_P (fnx))
    return false;

  fn = SYMBOL_REF_DECL (fnx);

  if (! fn)
    return false;

  vis = DECL_VISIBILITY (fn);

  if (vis == VISIBILITY_PROTECTED)
    /* Private function descriptors for protected functions are not
       canonical.  Temporarily change the visibility to global so that
       we can ensure uniqueness of funcdesc pointers.  */
    DECL_VISIBILITY (fn) = VISIBILITY_DEFAULT;

  ret = default_binds_local_p_1 (fn, flag_pic);

  DECL_VISIBILITY (fn) = vis;

  return ret;
}

static void
arm_add_gc_roots (void)
{
  gcc_obstack_init(&minipool_obstack);
  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
}

/* A table of known ARM exception types.
   For use with the interrupt function attribute.  */

typedef struct
{
  const char *const arg;
  const unsigned long return_value;
}
isr_attribute_arg;

static const isr_attribute_arg isr_attribute_args [] =
{
  { "IRQ",   ARM_FT_ISR },
  { "irq",   ARM_FT_ISR },
  { "FIQ",   ARM_FT_FIQ },
  { "fiq",   ARM_FT_FIQ },
  { "ABORT", ARM_FT_ISR },
  { "abort", ARM_FT_ISR },
  { "ABORT", ARM_FT_ISR },
  { "abort", ARM_FT_ISR },
  { "UNDEF", ARM_FT_EXCEPTION },
  { "undef", ARM_FT_EXCEPTION },
  { "SWI",   ARM_FT_EXCEPTION },
  { "swi",   ARM_FT_EXCEPTION },
  { NULL,    ARM_FT_NORMAL }
};

/* Returns the (interrupt) function type of the current
   function, or ARM_FT_UNKNOWN if the type cannot be determined.  */

static unsigned long
arm_isr_value (tree argument)
{
  const isr_attribute_arg * ptr;
  const char *              arg;

  if (!arm_arch_notm)
    return ARM_FT_NORMAL | ARM_FT_STACKALIGN;

  /* No argument - default to IRQ.  */
  if (argument == NULL_TREE)
    return ARM_FT_ISR;

  /* Get the value of the argument.  */
  if (TREE_VALUE (argument) == NULL_TREE
      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
    return ARM_FT_UNKNOWN;

  arg = TREE_STRING_POINTER (TREE_VALUE (argument));

  /* Check it against the list of known arguments.  */
  for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
    if (streq (arg, ptr->arg))
      return ptr->return_value;

  /* An unrecognized interrupt type.  */
  return ARM_FT_UNKNOWN;
}

/* Computes the type of the current function.  */

static unsigned long
arm_compute_func_type (void)
{
  unsigned long type = ARM_FT_UNKNOWN;
  tree a;
  tree attr;

  gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);

  /* Decide if the current function is volatile.  Such functions
     never return, and many memory cycles can be saved by not storing
     register values that will never be needed again.  This optimization
     was added to speed up context switching in a kernel application.  */
  if (optimize > 0
      && (TREE_NOTHROW (current_function_decl)
          || !(flag_unwind_tables
               || (flag_exceptions
		   && arm_except_unwind_info (&global_options) != UI_SJLJ)))
      && TREE_THIS_VOLATILE (current_function_decl))
    type |= ARM_FT_VOLATILE;

  if (cfun->static_chain_decl != NULL)
    type |= ARM_FT_NESTED;

  attr = DECL_ATTRIBUTES (current_function_decl);

  a = lookup_attribute ("naked", attr);
  if (a != NULL_TREE)
    type |= ARM_FT_NAKED;

  a = lookup_attribute ("isr", attr);
  if (a == NULL_TREE)
    a = lookup_attribute ("interrupt", attr);

  if (a == NULL_TREE)
    type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
  else
    type |= arm_isr_value (TREE_VALUE (a));

  if (lookup_attribute ("cmse_nonsecure_entry", attr))
    type |= ARM_FT_CMSE_ENTRY;

  return type;
}

/* Returns the type of the current function.  */

unsigned long
arm_current_func_type (void)
{
  if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
    cfun->machine->func_type = arm_compute_func_type ();

  return cfun->machine->func_type;
}

bool
arm_allocate_stack_slots_for_args (void)
{
  /* Naked functions should not allocate stack slots for arguments.  */
  return !IS_NAKED (arm_current_func_type ());
}

static bool
arm_warn_func_return (tree decl)
{
  /* Naked functions are implemented entirely in assembly, including the
     return sequence, so suppress warnings about this.  */
  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
}


/* Output assembler code for a block containing the constant parts
   of a trampoline, leaving space for the variable parts.

   On the ARM, (if r8 is the static chain regnum, and remembering that
   referencing pc adds an offset of 8) the trampoline looks like:
	   ldr 		r8, [pc, #0]
	   ldr		pc, [pc]
	   .word	static chain value
	   .word	function's address
   XXX FIXME: When the trampoline returns, r8 will be clobbered.

   In FDPIC mode, the trampoline looks like:
	   .word	trampoline address
	   .word	trampoline GOT address
	   ldr 		r12, [pc, #8] ; #4 for Arm mode
	   ldr 		r9,  [pc, #8] ; #4 for Arm mode
	   ldr		pc,  [pc, #8] ; #4 for Arm mode
	   .word	static chain value
	   .word	GOT address
	   .word	function's address
*/

static void
arm_asm_trampoline_template (FILE *f)
{
  fprintf (f, "\t.syntax unified\n");

  if (TARGET_FDPIC)
    {
      /* The first two words are a function descriptor pointing to the
	 trampoline code just below.  */
      if (TARGET_ARM)
	fprintf (f, "\t.arm\n");
      else if (TARGET_THUMB2)
	fprintf (f, "\t.thumb\n");
      else
	/* Only ARM and Thumb-2 are supported.  */
	gcc_unreachable ();

      assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
      assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
      /* Trampoline code which sets the static chain register but also
	 PIC register before jumping into real code.  */
      asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
		   STATIC_CHAIN_REGNUM, PC_REGNUM,
		   TARGET_THUMB2 ? 8 : 4);
      asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
		   PIC_OFFSET_TABLE_REGNUM, PC_REGNUM,
		   TARGET_THUMB2 ? 8 : 4);
      asm_fprintf (f, "\tldr\t%r, [%r, #%d]\n",
		   PC_REGNUM, PC_REGNUM,
		   TARGET_THUMB2 ? 8 : 4);
      assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
    }
  else if (TARGET_ARM)
    {
      fprintf (f, "\t.arm\n");
      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
      asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
    }
  else if (TARGET_THUMB2)
    {
      fprintf (f, "\t.thumb\n");
      /* The Thumb-2 trampoline is similar to the arm implementation.
	 Unlike 16-bit Thumb, we enter the stub in thumb mode.  */
      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
		   STATIC_CHAIN_REGNUM, PC_REGNUM);
      asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
    }
  else
    {
      ASM_OUTPUT_ALIGN (f, 2);
      fprintf (f, "\t.code\t16\n");
      fprintf (f, ".Ltrampoline_start:\n");
      asm_fprintf (f, "\tpush\t{r0, r1}\n");
      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
      asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
      asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
      asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
      asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
    }
  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
  assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
}

/* Emit RTL insns to initialize the variable parts of a trampoline.  */

static void
arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
{
  rtx fnaddr, mem, a_tramp;

  emit_block_move (m_tramp, assemble_trampoline_template (),
		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);

  if (TARGET_FDPIC)
    {
      rtx funcdesc = XEXP (DECL_RTL (fndecl), 0);
      rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
      rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
      /* The function start address is at offset 8, but in Thumb mode
	 we want bit 0 set to 1 to indicate Thumb-ness, hence 9
	 below.  */
      rtx trampoline_code_start
	= plus_constant (Pmode, XEXP (m_tramp, 0), TARGET_THUMB2 ? 9 : 8);

      /* Write initial funcdesc which points to the trampoline.  */
      mem = adjust_address (m_tramp, SImode, 0);
      emit_move_insn (mem, trampoline_code_start);
      mem = adjust_address (m_tramp, SImode, 4);
      emit_move_insn (mem, gen_rtx_REG (Pmode, PIC_OFFSET_TABLE_REGNUM));
      /* Setup static chain.  */
      mem = adjust_address (m_tramp, SImode, 20);
      emit_move_insn (mem, chain_value);
      /* GOT + real function entry point.  */
      mem = adjust_address (m_tramp, SImode, 24);
      emit_move_insn (mem, gotaddr);
      mem = adjust_address (m_tramp, SImode, 28);
      emit_move_insn (mem, fnaddr);
    }
  else
    {
      mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
      emit_move_insn (mem, chain_value);

      mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
      fnaddr = XEXP (DECL_RTL (fndecl), 0);
      emit_move_insn (mem, fnaddr);
    }

  a_tramp = XEXP (m_tramp, 0);
  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
		     LCT_NORMAL, VOIDmode, a_tramp, Pmode,
		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
}

/* Thumb trampolines should be entered in thumb mode, so set
   the bottom bit of the address.  */

static rtx
arm_trampoline_adjust_address (rtx addr)
{
  /* For FDPIC don't fix trampoline address since it's a function
     descriptor and not a function address.  */
  if (TARGET_THUMB && !TARGET_FDPIC)
    addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
				NULL, 0, OPTAB_LIB_WIDEN);
  return addr;
}

/* Return 1 if it is possible to return using a single instruction.
   If SIBLING is non-null, this is a test for a return before a sibling
   call.  SIBLING is the call insn, so we can examine its register usage.  */

int
use_return_insn (int iscond, rtx sibling)
{
  int regno;
  unsigned int func_type;
  unsigned long saved_int_regs;
  unsigned HOST_WIDE_INT stack_adjust;
  arm_stack_offsets *offsets;

  /* Never use a return instruction before reload has run.  */
  if (!reload_completed)
    return 0;

  func_type = arm_current_func_type ();

  /* Naked, volatile and stack alignment functions need special
     consideration.  */
  if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
    return 0;

  /* So do interrupt functions that use the frame pointer and Thumb
     interrupt functions.  */
  if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
    return 0;

  if (TARGET_LDRD && current_tune->prefer_ldrd_strd
      && !optimize_function_for_size_p (cfun))
    return 0;

  offsets = arm_get_frame_offsets ();
  stack_adjust = offsets->outgoing_args - offsets->saved_regs;

  /* As do variadic functions.  */
  if (crtl->args.pretend_args_size
      || cfun->machine->uses_anonymous_args
      /* Or if the function calls __builtin_eh_return () */
      || crtl->calls_eh_return
      /* Or if the function calls alloca */
      || cfun->calls_alloca
      /* Or if there is a stack adjustment.  However, if the stack pointer
	 is saved on the stack, we can use a pre-incrementing stack load.  */
      || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
				 && stack_adjust == 4))
      /* Or if the static chain register was saved above the frame, under the
	 assumption that the stack pointer isn't saved on the stack.  */
      || (!(TARGET_APCS_FRAME && frame_pointer_needed)
          && arm_compute_static_chain_stack_bytes() != 0))
    return 0;

  saved_int_regs = offsets->saved_regs_mask;

  /* Unfortunately, the insn

       ldmib sp, {..., sp, ...}

     triggers a bug on most SA-110 based devices, such that the stack
     pointer won't be correctly restored if the instruction takes a
     page fault.  We work around this problem by popping r3 along with
     the other registers, since that is never slower than executing
     another instruction.

     We test for !arm_arch5t here, because code for any architecture
     less than this could potentially be run on one of the buggy
     chips.  */
  if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
    {
      /* Validate that r3 is a call-clobbered register (always true in
	 the default abi) ...  */
      if (!call_used_or_fixed_reg_p (3))
	return 0;

      /* ... that it isn't being used for a return value ... */
      if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
	return 0;

      /* ... or for a tail-call argument ...  */
      if (sibling)
	{
	  gcc_assert (CALL_P (sibling));

	  if (find_regno_fusage (sibling, USE, 3))
	    return 0;
	}

      /* ... and that there are no call-saved registers in r0-r2
	 (always true in the default ABI).  */
      if (saved_int_regs & 0x7)
	return 0;
    }

  /* Can't be done if interworking with Thumb, and any registers have been
     stacked.  */
  if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
    return 0;

  /* On StrongARM, conditional returns are expensive if they aren't
     taken and multiple registers have been stacked.  */
  if (iscond && arm_tune_strongarm)
    {
      /* Conditional return when just the LR is stored is a simple
	 conditional-load instruction, that's not expensive.  */
      if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
	return 0;

      if (flag_pic
	  && arm_pic_register != INVALID_REGNUM
	  && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
	return 0;
    }

  /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
     several instructions if anything needs to be popped.  Armv8.1-M Mainline
     also needs several instructions to save and restore FP context.  */
  if (IS_CMSE_ENTRY (func_type) && (saved_int_regs || TARGET_HAVE_FPCXT_CMSE))
    return 0;

  /* If there are saved registers but the LR isn't saved, then we need
     two instructions for the return.  */
  if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
    return 0;

  /* Can't be done if any of the VFP regs are pushed,
     since this also requires an insn.  */
  if (TARGET_HARD_FLOAT)
    for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
      if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
	return 0;

  if (TARGET_REALLY_IWMMXT)
    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
      if (df_regs_ever_live_p (regno) && ! call_used_or_fixed_reg_p (regno))
	return 0;

  return 1;
}

/* Return TRUE if we should try to use a simple_return insn, i.e. perform
   shrink-wrapping if possible.  This is the case if we need to emit a
   prologue, which we can test by looking at the offsets.  */
bool
use_simple_return_p (void)
{
  arm_stack_offsets *offsets;

  /* Note this function can be called before or after reload.  */
  if (!reload_completed)
    arm_compute_frame_layout ();

  offsets = arm_get_frame_offsets ();
  return offsets->outgoing_args != 0;
}

/* Return TRUE if int I is a valid immediate ARM constant.  */

int
const_ok_for_arm (HOST_WIDE_INT i)
{
  int lowbit;

  /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
     be all zero, or all one.  */
  if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
      && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
	  != ((~(unsigned HOST_WIDE_INT) 0)
	      & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
    return FALSE;

  i &= (unsigned HOST_WIDE_INT) 0xffffffff;

  /* Fast return for 0 and small values.  We must do this for zero, since
     the code below can't handle that one case.  */
  if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
    return TRUE;

  /* Get the number of trailing zeros.  */
  lowbit = ffs((int) i) - 1;

  /* Only even shifts are allowed in ARM mode so round down to the
     nearest even number.  */
  if (TARGET_ARM)
    lowbit &= ~1;

  if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
    return TRUE;

  if (TARGET_ARM)
    {
      /* Allow rotated constants in ARM mode.  */
      if (lowbit <= 4
	   && ((i & ~0xc000003f) == 0
	       || (i & ~0xf000000f) == 0
	       || (i & ~0xfc000003) == 0))
	return TRUE;
    }
  else if (TARGET_THUMB2)
    {
      HOST_WIDE_INT v;

      /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY.  */
      v = i & 0xff;
      v |= v << 16;
      if (i == v || i == (v | (v << 8)))
	return TRUE;

      /* Allow repeated pattern 0xXY00XY00.  */
      v = i & 0xff00;
      v |= v << 16;
      if (i == v)
	return TRUE;
    }
  else if (TARGET_HAVE_MOVT)
    {
      /* Thumb-1 Targets with MOVT.  */
      if (i > 0xffff)
	return FALSE;
      else
	return TRUE;
    }

  return FALSE;
}

/* Return true if I is a valid constant for the operation CODE.  */
int
const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
{
  if (const_ok_for_arm (i))
    return 1;

  switch (code)
    {
    case SET:
      /* See if we can use movw.  */
      if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
	return 1;
      else
	/* Otherwise, try mvn.  */
	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));

    case PLUS:
      /* See if we can use addw or subw.  */
      if (TARGET_THUMB2
	  && ((i & 0xfffff000) == 0
	      || ((-i) & 0xfffff000) == 0))
	return 1;
      /* Fall through.  */
    case COMPARE:
    case EQ:
    case NE:
    case GT:
    case LE:
    case LT:
    case GE:
    case GEU:
    case LTU:
    case GTU:
    case LEU:
    case UNORDERED:
    case ORDERED:
    case UNEQ:
    case UNGE:
    case UNLT:
    case UNGT:
    case UNLE:
      return const_ok_for_arm (ARM_SIGN_EXTEND (-i));

    case MINUS:		/* Should only occur with (MINUS I reg) => rsb */
    case XOR:
      return 0;

    case IOR:
      if (TARGET_THUMB2)
	return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
      return 0;

    case AND:
      return const_ok_for_arm (ARM_SIGN_EXTEND (~i));

    default:
      gcc_unreachable ();
    }
}

/* Return true if I is a valid di mode constant for the operation CODE.  */
int
const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
{
  HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
  HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
  rtx hi = GEN_INT (hi_val);
  rtx lo = GEN_INT (lo_val);

  if (TARGET_THUMB1)
    return 0;

  switch (code)
    {
    case AND:
    case IOR:
    case XOR:
      return const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF
	     || const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF;
    case PLUS:
      return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);

    default:
      return 0;
    }
}

/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
   Avoid generating useless code when one of the bytes is zero.  */
void
thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
{
  bool mov_done_p = false;
  int i;

  /* Emit upper 3 bytes if needed.  */
  for (i = 0; i < 3; i++)
    {
      int byte = (op1 >> (8 * (3 - i))) & 0xff;

      if (byte)
	{
	  emit_set_insn (op0, mov_done_p
			 ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
			 : GEN_INT (byte));
	  mov_done_p = true;
	}

      if (mov_done_p)
	emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
    }

  /* Emit lower byte if needed.  */
  if (!mov_done_p)
    emit_set_insn (op0, GEN_INT (op1 & 0xff));
  else if (op1 & 0xff)
    emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
}

/* Emit a sequence of insns to handle a large constant.
   CODE is the code of the operation required, it can be any of SET, PLUS,
   IOR, AND, XOR, MINUS;
   MODE is the mode in which the operation is being performed;
   VAL is the integer to operate on;
   SOURCE is the other operand (a register, or a null-pointer for SET);
   SUBTARGETS means it is safe to create scratch registers if that will
   either produce a simpler sequence, or we will want to cse the values.
   Return value is the number of insns emitted.  */

/* ??? Tweak this for thumb2.  */
int
arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
		    HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
{
  rtx cond;

  if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
    cond = COND_EXEC_TEST (PATTERN (insn));
  else
    cond = NULL_RTX;

  if (subtargets || code == SET
      || (REG_P (target) && REG_P (source)
	  && REGNO (target) != REGNO (source)))
    {
      /* After arm_reorg has been called, we can't fix up expensive
	 constants by pushing them into memory so we must synthesize
	 them in-line, regardless of the cost.  This is only likely to
	 be more costly on chips that have load delay slots and we are
	 compiling without running the scheduler (so no splitting
	 occurred before the final instruction emission).

	 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
      */
      if (!cfun->machine->after_arm_reorg
	  && !cond
	  && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
				1, 0)
	      > (arm_constant_limit (optimize_function_for_size_p (cfun))
		 + (code != SET))))
	{
	  if (code == SET)
	    {
	      /* Currently SET is the only monadic value for CODE, all
		 the rest are diadic.  */
	      if (TARGET_USE_MOVT)
		arm_emit_movpair (target, GEN_INT (val));
	      else
		emit_set_insn (target, GEN_INT (val));

	      return 1;
	    }
	  else
	    {
	      rtx temp = subtargets ? gen_reg_rtx (mode) : target;

	      if (TARGET_USE_MOVT)
		arm_emit_movpair (temp, GEN_INT (val));
	      else
		emit_set_insn (temp, GEN_INT (val));

	      /* For MINUS, the value is subtracted from, since we never
		 have subtraction of a constant.  */
	      if (code == MINUS)
		emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
	      else
		emit_set_insn (target,
			       gen_rtx_fmt_ee (code, mode, source, temp));
	      return 2;
	    }
	}
    }

  return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
			   1);
}

/* Return a sequence of integers, in RETURN_SEQUENCE that fit into
   ARM/THUMB2 immediates, and add up to VAL.
   Thr function return value gives the number of insns required.  */
static int
optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
			    struct four_ints *return_sequence)
{
  int best_consecutive_zeros = 0;
  int i;
  int best_start = 0;
  int insns1, insns2;
  struct four_ints tmp_sequence;

  /* If we aren't targeting ARM, the best place to start is always at
     the bottom, otherwise look more closely.  */
  if (TARGET_ARM)
    {
      for (i = 0; i < 32; i += 2)
	{
	  int consecutive_zeros = 0;

	  if (!(val & (3 << i)))
	    {
	      while ((i < 32) && !(val & (3 << i)))
		{
		  consecutive_zeros += 2;
		  i += 2;
		}
	      if (consecutive_zeros > best_consecutive_zeros)
		{
		  best_consecutive_zeros = consecutive_zeros;
		  best_start = i - consecutive_zeros;
		}
	      i -= 2;
	    }
	}
    }

  /* So long as it won't require any more insns to do so, it's
     desirable to emit a small constant (in bits 0...9) in the last
     insn.  This way there is more chance that it can be combined with
     a later addressing insn to form a pre-indexed load or store
     operation.  Consider:

	   *((volatile int *)0xe0000100) = 1;
	   *((volatile int *)0xe0000110) = 2;

     We want this to wind up as:

	    mov rA, #0xe0000000
	    mov rB, #1
	    str rB, [rA, #0x100]
	    mov rB, #2
	    str rB, [rA, #0x110]

     rather than having to synthesize both large constants from scratch.

     Therefore, we calculate how many insns would be required to emit
     the constant starting from `best_start', and also starting from
     zero (i.e. with bit 31 first to be output).  If `best_start' doesn't
     yield a shorter sequence, we may as well use zero.  */
  insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
  if (best_start != 0
      && ((HOST_WIDE_INT_1U << best_start) < val))
    {
      insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
      if (insns2 <= insns1)
	{
	  *return_sequence = tmp_sequence;
	  insns1 = insns2;
	}
    }

  return insns1;
}

/* As for optimal_immediate_sequence, but starting at bit-position I.  */
static int
optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
			     struct four_ints *return_sequence, int i)
{
  int remainder = val & 0xffffffff;
  int insns = 0;

  /* Try and find a way of doing the job in either two or three
     instructions.

     In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
     location.  We start at position I.  This may be the MSB, or
     optimial_immediate_sequence may have positioned it at the largest block
     of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
     wrapping around to the top of the word when we drop off the bottom.
     In the worst case this code should produce no more than four insns.

     In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
     constants, shifted to any arbitrary location.  We should always start
     at the MSB.  */
  do
    {
      int end;
      unsigned int b1, b2, b3, b4;
      unsigned HOST_WIDE_INT result;
      int loc;

      gcc_assert (insns < 4);

      if (i <= 0)
	i += 32;

      /* First, find the next normal 12/8-bit shifted/rotated immediate.  */
      if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
	{
	  loc = i;
	  if (i <= 12 && TARGET_THUMB2 && code == PLUS)
	    /* We can use addw/subw for the last 12 bits.  */
	    result = remainder;
	  else
	    {
	      /* Use an 8-bit shifted/rotated immediate.  */
	      end = i - 8;
	      if (end < 0)
		end += 32;
	      result = remainder & ((0x0ff << end)
				   | ((i < end) ? (0xff >> (32 - end))
						: 0));
	      i -= 8;
	    }
	}
      else
	{
	  /* Arm allows rotates by a multiple of two. Thumb-2 allows
	     arbitrary shifts.  */
	  i -= TARGET_ARM ? 2 : 1;
	  continue;
	}

      /* Next, see if we can do a better job with a thumb2 replicated
	 constant.

         We do it this way around to catch the cases like 0x01F001E0 where
	 two 8-bit immediates would work, but a replicated constant would
	 make it worse.

         TODO: 16-bit constants that don't clear all the bits, but still win.
         TODO: Arithmetic splitting for set/add/sub, rather than bitwise.  */
      if (TARGET_THUMB2)
	{
	  b1 = (remainder & 0xff000000) >> 24;
	  b2 = (remainder & 0x00ff0000) >> 16;
	  b3 = (remainder & 0x0000ff00) >> 8;
	  b4 = remainder & 0xff;

	  if (loc > 24)
	    {
	      /* The 8-bit immediate already found clears b1 (and maybe b2),
		 but must leave b3 and b4 alone.  */

	      /* First try to find a 32-bit replicated constant that clears
		 almost everything.  We can assume that we can't do it in one,
		 or else we wouldn't be here.  */
	      unsigned int tmp = b1 & b2 & b3 & b4;
	      unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
				  + (tmp << 24);
	      unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
					    + (tmp == b3) + (tmp == b4);
	      if (tmp
		  && (matching_bytes >= 3
		      || (matching_bytes == 2
			  && const_ok_for_op (remainder & ~tmp2, code))))
		{
		  /* At least 3 of the bytes match, and the fourth has at
		     least as many bits set, or two of the bytes match
		     and it will only require one more insn to finish.  */
		  result = tmp2;
		  i = tmp != b1 ? 32
		      : tmp != b2 ? 24
		      : tmp != b3 ? 16
		      : 8;
		}

	      /* Second, try to find a 16-bit replicated constant that can
		 leave three of the bytes clear.  If b2 or b4 is already
		 zero, then we can.  If the 8-bit from above would not
		 clear b2 anyway, then we still win.  */
	      else if (b1 == b3 && (!b2 || !b4
			       || (remainder & 0x00ff0000 & ~result)))
		{
		  result = remainder & 0xff00ff00;
		  i = 24;
		}
	    }
	  else if (loc > 16)
	    {
	      /* The 8-bit immediate already found clears b2 (and maybe b3)
		 and we don't get here unless b1 is alredy clear, but it will
		 leave b4 unchanged.  */

	      /* If we can clear b2 and b4 at once, then we win, since the
		 8-bits couldn't possibly reach that far.  */
	      if (b2 == b4)
		{
		  result = remainder & 0x00ff00ff;
		  i = 16;
		}
	    }
	}

      return_sequence->i[insns++] = result;
      remainder &= ~result;

      if (code == SET || code == MINUS)
	code = PLUS;
    }
  while (remainder);

  return insns;
}

/* Emit an instruction with the indicated PATTERN.  If COND is
   non-NULL, conditionalize the execution of the instruction on COND
   being true.  */

static void
emit_constant_insn (rtx cond, rtx pattern)
{
  if (cond)
    pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
  emit_insn (pattern);
}

/* As above, but extra parameter GENERATE which, if clear, suppresses
   RTL generation.  */

static int
arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
		  unsigned HOST_WIDE_INT val, rtx target, rtx source,
		  int subtargets, int generate)
{
  int can_invert = 0;
  int can_negate = 0;
  int final_invert = 0;
  int i;
  int set_sign_bit_copies = 0;
  int clear_sign_bit_copies = 0;
  int clear_zero_bit_copies = 0;
  int set_zero_bit_copies = 0;
  int insns = 0, neg_insns, inv_insns;
  unsigned HOST_WIDE_INT temp1, temp2;
  unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
  struct four_ints *immediates;
  struct four_ints pos_immediates, neg_immediates, inv_immediates;

  /* Find out which operations are safe for a given CODE.  Also do a quick
     check for degenerate cases; these can occur when DImode operations
     are split.  */
  switch (code)
    {
    case SET:
      can_invert = 1;
      break;

    case PLUS:
      can_negate = 1;
      break;

    case IOR:
      if (remainder == 0xffffffff)
	{
	  if (generate)
	    emit_constant_insn (cond,
				gen_rtx_SET (target,
					     GEN_INT (ARM_SIGN_EXTEND (val))));
	  return 1;
	}

      if (remainder == 0)
	{
	  if (reload_completed && rtx_equal_p (target, source))
	    return 0;

	  if (generate)
	    emit_constant_insn (cond, gen_rtx_SET (target, source));
	  return 1;
	}
      break;

    case AND:
      if (remainder == 0)
	{
	  if (generate)
	    emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
	  return 1;
	}
      if (remainder == 0xffffffff)
	{
	  if (reload_completed && rtx_equal_p (target, source))
	    return 0;
	  if (generate)
	    emit_constant_insn (cond, gen_rtx_SET (target, source));
	  return 1;
	}
      can_invert = 1;
      break;

    case XOR:
      if (remainder == 0)
	{
	  if (reload_completed && rtx_equal_p (target, source))
	    return 0;
	  if (generate)
	    emit_constant_insn (cond, gen_rtx_SET (target, source));
	  return 1;
	}

      if (remainder == 0xffffffff)
	{
	  if (generate)
	    emit_constant_insn (cond,
				gen_rtx_SET (target,
					     gen_rtx_NOT (mode, source)));
	  return 1;
	}
      final_invert = 1;
      break;

    case MINUS:
      /* We treat MINUS as (val - source), since (source - val) is always
	 passed as (source + (-val)).  */
      if (remainder == 0)
	{
	  if (generate)
	    emit_constant_insn (cond,
				gen_rtx_SET (target,
					     gen_rtx_NEG (mode, source)));
	  return 1;
	}
      if (const_ok_for_arm (val))
	{
	  if (generate)
	    emit_constant_insn (cond,
				gen_rtx_SET (target,
					     gen_rtx_MINUS (mode, GEN_INT (val),
							    source)));
	  return 1;
	}

      break;

    default:
      gcc_unreachable ();
    }

  /* If we can do it in one insn get out quickly.  */
  if (const_ok_for_op (val, code))
    {
      if (generate)
	emit_constant_insn (cond,
			    gen_rtx_SET (target,
					 (source
					  ? gen_rtx_fmt_ee (code, mode, source,
							    GEN_INT (val))
					  : GEN_INT (val))));
      return 1;
    }

  /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
     insn.  */
  if (code == AND && (i = exact_log2 (remainder + 1)) > 0
      && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
    {
      if (generate)
	{
	  if (mode == SImode && i == 16)
	    /* Use UXTH in preference to UBFX, since on Thumb2 it's a
	       smaller insn.  */
	    emit_constant_insn (cond,
				gen_zero_extendhisi2
				(target, gen_lowpart (HImode, source)));
	  else
	    /* Extz only supports SImode, but we can coerce the operands
	       into that mode.  */
	    emit_constant_insn (cond,
				gen_extzv_t2 (gen_lowpart (SImode, target),
					      gen_lowpart (SImode, source),
					      GEN_INT (i), const0_rtx));
	}

      return 1;
    }

  /* Calculate a few attributes that may be useful for specific
     optimizations.  */
  /* Count number of leading zeros.  */
  for (i = 31; i >= 0; i--)
    {
      if ((remainder & (1 << i)) == 0)
	clear_sign_bit_copies++;
      else
	break;
    }

  /* Count number of leading 1's.  */
  for (i = 31; i >= 0; i--)
    {
      if ((remainder & (1 << i)) != 0)
	set_sign_bit_copies++;
      else
	break;
    }

  /* Count number of trailing zero's.  */
  for (i = 0; i <= 31; i++)
    {
      if ((remainder & (1 << i)) == 0)
	clear_zero_bit_copies++;
      else
	break;
    }

  /* Count number of trailing 1's.  */
  for (i = 0; i <= 31; i++)
    {
      if ((remainder & (1 << i)) != 0)
	set_zero_bit_copies++;
      else
	break;
    }

  switch (code)
    {
    case SET:
      /* See if we can do this by sign_extending a constant that is known
	 to be negative.  This is a good, way of doing it, since the shift
	 may well merge into a subsequent insn.  */
      if (set_sign_bit_copies > 1)
	{
	  if (const_ok_for_arm
	      (temp1 = ARM_SIGN_EXTEND (remainder
					<< (set_sign_bit_copies - 1))))
	    {
	      if (generate)
		{
		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
		  emit_constant_insn (cond,
				      gen_rtx_SET (new_src, GEN_INT (temp1)));
		  emit_constant_insn (cond,
				      gen_ashrsi3 (target, new_src,
						   GEN_INT (set_sign_bit_copies - 1)));
		}
	      return 2;
	    }
	  /* For an inverted constant, we will need to set the low bits,
	     these will be shifted out of harm's way.  */
	  temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
	  if (const_ok_for_arm (~temp1))
	    {
	      if (generate)
		{
		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
		  emit_constant_insn (cond,
				      gen_rtx_SET (new_src, GEN_INT (temp1)));
		  emit_constant_insn (cond,
				      gen_ashrsi3 (target, new_src,
						   GEN_INT (set_sign_bit_copies - 1)));
		}
	      return 2;
	    }
	}

      /* See if we can calculate the value as the difference between two
	 valid immediates.  */
      if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
	{
	  int topshift = clear_sign_bit_copies & ~1;

	  temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
				   & (0xff000000 >> topshift));

	  /* If temp1 is zero, then that means the 9 most significant
	     bits of remainder were 1 and we've caused it to overflow.
	     When topshift is 0 we don't need to do anything since we
	     can borrow from 'bit 32'.  */
	  if (temp1 == 0 && topshift != 0)
	    temp1 = 0x80000000 >> (topshift - 1);

	  temp2 = ARM_SIGN_EXTEND (temp1 - remainder);

	  if (const_ok_for_arm (temp2))
	    {
	      if (generate)
		{
		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
		  emit_constant_insn (cond,
				      gen_rtx_SET (new_src, GEN_INT (temp1)));
		  emit_constant_insn (cond,
				      gen_addsi3 (target, new_src,
						  GEN_INT (-temp2)));
		}

	      return 2;
	    }
	}

      /* See if we can generate this by setting the bottom (or the top)
	 16 bits, and then shifting these into the other half of the
	 word.  We only look for the simplest cases, to do more would cost
	 too much.  Be careful, however, not to generate this when the
	 alternative would take fewer insns.  */
      if (val & 0xffff0000)
	{
	  temp1 = remainder & 0xffff0000;
	  temp2 = remainder & 0x0000ffff;

	  /* Overlaps outside this range are best done using other methods.  */
	  for (i = 9; i < 24; i++)
	    {
	      if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
		  && !const_ok_for_arm (temp2))
		{
		  rtx new_src = (subtargets
				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
				 : target);
		  insns = arm_gen_constant (code, mode, cond, temp2, new_src,
					    source, subtargets, generate);
		  source = new_src;
		  if (generate)
		    emit_constant_insn
		      (cond,
		       gen_rtx_SET
		       (target,
			gen_rtx_IOR (mode,
				     gen_rtx_ASHIFT (mode, source,
						     GEN_INT (i)),
				     source)));
		  return insns + 1;
		}
	    }

	  /* Don't duplicate cases already considered.  */
	  for (i = 17; i < 24; i++)
	    {
	      if (((temp1 | (temp1 >> i)) == remainder)
		  && !const_ok_for_arm (temp1))
		{
		  rtx new_src = (subtargets
				 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
				 : target);
		  insns = arm_gen_constant (code, mode, cond, temp1, new_src,
					    source, subtargets, generate);
		  source = new_src;
		  if (generate)
		    emit_constant_insn
		      (cond,
		       gen_rtx_SET (target,
				    gen_rtx_IOR
				    (mode,
				     gen_rtx_LSHIFTRT (mode, source,
						       GEN_INT (i)),
				     source)));
		  return insns + 1;
		}
	    }
	}
      break;

    case IOR:
    case XOR:
      /* If we have IOR or XOR, and the constant can be loaded in a
	 single instruction, and we can find a temporary to put it in,
	 then this can be done in two instructions instead of 3-4.  */
      if (subtargets
	  /* TARGET can't be NULL if SUBTARGETS is 0 */
	  || (reload_completed && !reg_mentioned_p (target, source)))
	{
	  if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
	    {
	      if (generate)
		{
		  rtx sub = subtargets ? gen_reg_rtx (mode) : target;

		  emit_constant_insn (cond,
				      gen_rtx_SET (sub, GEN_INT (val)));
		  emit_constant_insn (cond,
				      gen_rtx_SET (target,
						   gen_rtx_fmt_ee (code, mode,
								   source, sub)));
		}
	      return 2;
	    }
	}

      if (code == XOR)
	break;

      /*  Convert.
	  x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
	                     and the remainder 0s for e.g. 0xfff00000)
	  x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)

	  This can be done in 2 instructions by using shifts with mov or mvn.
	  e.g. for
	  x = x | 0xfff00000;
	  we generate.
	  mvn	r0, r0, asl #12
	  mvn	r0, r0, lsr #12  */
      if (set_sign_bit_copies > 8
	  && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
	{
	  if (generate)
	    {
	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
	      rtx shift = GEN_INT (set_sign_bit_copies);

	      emit_constant_insn
		(cond,
		 gen_rtx_SET (sub,
			      gen_rtx_NOT (mode,
					   gen_rtx_ASHIFT (mode,
							   source,
							   shift))));
	      emit_constant_insn
		(cond,
		 gen_rtx_SET (target,
			      gen_rtx_NOT (mode,
					   gen_rtx_LSHIFTRT (mode, sub,
							     shift))));
	    }
	  return 2;
	}

      /* Convert
	  x = y | constant (which has set_zero_bit_copies number of trailing ones).
	   to
	  x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).

	  For eg. r0 = r0 | 0xfff
	       mvn	r0, r0, lsr #12
	       mvn	r0, r0, asl #12

      */
      if (set_zero_bit_copies > 8
	  && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
	{
	  if (generate)
	    {
	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
	      rtx shift = GEN_INT (set_zero_bit_copies);

	      emit_constant_insn
		(cond,
		 gen_rtx_SET (sub,
			      gen_rtx_NOT (mode,
					   gen_rtx_LSHIFTRT (mode,
							     source,
							     shift))));
	      emit_constant_insn
		(cond,
		 gen_rtx_SET (target,
			      gen_rtx_NOT (mode,
					   gen_rtx_ASHIFT (mode, sub,
							   shift))));
	    }
	  return 2;
	}

      /* This will never be reached for Thumb2 because orn is a valid
	 instruction. This is for Thumb1 and the ARM 32 bit cases.

	 x = y | constant (such that ~constant is a valid constant)
	 Transform this to
	 x = ~(~y & ~constant).
      */
      if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
	{
	  if (generate)
	    {
	      rtx sub = subtargets ? gen_reg_rtx (mode) : target;
	      emit_constant_insn (cond,
				  gen_rtx_SET (sub,
					       gen_rtx_NOT (mode, source)));
	      source = sub;
	      if (subtargets)
		sub = gen_reg_rtx (mode);
	      emit_constant_insn (cond,
				  gen_rtx_SET (sub,
					       gen_rtx_AND (mode, source,
							    GEN_INT (temp1))));
	      emit_constant_insn (cond,
				  gen_rtx_SET (target,
					       gen_rtx_NOT (mode, sub)));
	    }
	  return 3;
	}
      break;

    case AND:
      /* See if two shifts will do 2 or more insn's worth of work.  */
      if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
	{
	  HOST_WIDE_INT shift_mask = ((0xffffffff
				       << (32 - clear_sign_bit_copies))
				      & 0xffffffff);

	  if ((remainder | shift_mask) != 0xffffffff)
	    {
	      HOST_WIDE_INT new_val
	        = ARM_SIGN_EXTEND (remainder | shift_mask);

	      if (generate)
		{
		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
		  insns = arm_gen_constant (AND, SImode, cond, new_val,
					    new_src, source, subtargets, 1);
		  source = new_src;
		}
	      else
		{
		  rtx targ = subtargets ? NULL_RTX : target;
		  insns = arm_gen_constant (AND, mode, cond, new_val,
					    targ, source, subtargets, 0);
		}
	    }

	  if (generate)
	    {
	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
	      rtx shift = GEN_INT (clear_sign_bit_copies);

	      emit_insn (gen_ashlsi3 (new_src, source, shift));
	      emit_insn (gen_lshrsi3 (target, new_src, shift));
	    }

	  return insns + 2;
	}

      if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
	{
	  HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;

	  if ((remainder | shift_mask) != 0xffffffff)
	    {
	      HOST_WIDE_INT new_val
	        = ARM_SIGN_EXTEND (remainder | shift_mask);
	      if (generate)
		{
		  rtx new_src = subtargets ? gen_reg_rtx (mode) : target;

		  insns = arm_gen_constant (AND, mode, cond, new_val,
					    new_src, source, subtargets, 1);
		  source = new_src;
		}
	      else
		{
		  rtx targ = subtargets ? NULL_RTX : target;

		  insns = arm_gen_constant (AND, mode, cond, new_val,
					    targ, source, subtargets, 0);
		}
	    }

	  if (generate)
	    {
	      rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
	      rtx shift = GEN_INT (clear_zero_bit_copies);

	      emit_insn (gen_lshrsi3 (new_src, source, shift));
	      emit_insn (gen_ashlsi3 (target, new_src, shift));
	    }

	  return insns + 2;
	}

      break;

    default:
      break;
    }

  /* Calculate what the instruction sequences would be if we generated it
     normally, negated, or inverted.  */
  if (code == AND)
    /* AND cannot be split into multiple insns, so invert and use BIC.  */
    insns = 99;
  else
    insns = optimal_immediate_sequence (code, remainder, &pos_immediates);

  if (can_negate)
    neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
					    &neg_immediates);
  else
    neg_insns = 99;

  if (can_invert || final_invert)
    inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
					    &inv_immediates);
  else
    inv_insns = 99;

  immediates = &pos_immediates;

  /* Is the negated immediate sequence more efficient?  */
  if (neg_insns < insns && neg_insns <= inv_insns)
    {
      insns = neg_insns;
      immediates = &neg_immediates;
    }
  else
    can_negate = 0;

  /* Is the inverted immediate sequence more efficient?
     We must allow for an extra NOT instruction for XOR operations, although
     there is some chance that the final 'mvn' will get optimized later.  */
  if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
    {
      insns = inv_insns;
      immediates = &inv_immediates;
    }
  else
    {
      can_invert = 0;
      final_invert = 0;
    }

  /* Now output the chosen sequence as instructions.  */
  if (generate)
    {
      for (i = 0; i < insns; i++)
	{
	  rtx new_src, temp1_rtx;

	  temp1 = immediates->i[i];

	  if (code == SET || code == MINUS)
	    new_src = (subtargets ? gen_reg_rtx (mode) : target);
	  else if ((final_invert || i < (insns - 1)) && subtargets)
	    new_src = gen_reg_rtx (mode);
	  else
	    new_src = target;

	  if (can_invert)
	    temp1 = ~temp1;
	  else if (can_negate)
	    temp1 = -temp1;

	  temp1 = trunc_int_for_mode (temp1, mode);
	  temp1_rtx = GEN_INT (temp1);

	  if (code == SET)
	    ;
	  else if (code == MINUS)
	    temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
	  else
	    temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);

	  emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
	  source = new_src;

	  if (code == SET)
	    {
	      can_negate = can_invert;
	      can_invert = 0;
	      code = PLUS;
	    }
	  else if (code == MINUS)
	    code = PLUS;
	}
    }

  if (final_invert)
    {
      if (generate)
	emit_constant_insn (cond, gen_rtx_SET (target,
					       gen_rtx_NOT (mode, source)));
      insns++;
    }

  return insns;
}

/* Return TRUE if op is a constant where both the low and top words are
   suitable for RSB/RSC instructions.  This is never true for Thumb, since
   we do not have RSC in that case.  */
static bool
arm_const_double_prefer_rsbs_rsc (rtx op)
{
  /* Thumb lacks RSC, so we never prefer that sequence.  */
  if (TARGET_THUMB || !CONST_INT_P (op))
    return false;
  HOST_WIDE_INT hi, lo;
  lo = UINTVAL (op) & 0xffffffffULL;
  hi = UINTVAL (op) >> 32;
  return const_ok_for_arm (lo) && const_ok_for_arm (hi);
}

/* Canonicalize a comparison so that we are more likely to recognize it.
   This can be done for a few constant compares, where we can make the
   immediate value easier to load.  */

static void
arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
			     bool op0_preserve_value)
{
  machine_mode mode;
  unsigned HOST_WIDE_INT i, maxval;

  mode = GET_MODE (*op0);
  if (mode == VOIDmode)
    mode = GET_MODE (*op1);

  maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;

  /* For DImode, we have GE/LT/GEU/LTU comparisons (with cmp/sbc).  In
     ARM mode we can also use cmp/cmpeq for GTU/LEU.  GT/LE must be
     either reversed or (for constant OP1) adjusted to GE/LT.
     Similarly for GTU/LEU in Thumb mode.  */
  if (mode == DImode)
    {

      if (*code == GT || *code == LE
	  || *code == GTU || *code == LEU)
	{
	  /* Missing comparison.  First try to use an available
	     comparison.  */
	  if (CONST_INT_P (*op1))
	    {
	      i = INTVAL (*op1);
	      switch (*code)
		{
		case GT:
		case LE:
		  if (i != maxval)
		    {
		      /* Try to convert to GE/LT, unless that would be more
			 expensive.  */
		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
			  && arm_const_double_prefer_rsbs_rsc (*op1))
			return;
		      *op1 = GEN_INT (i + 1);
		      *code = *code == GT ? GE : LT;
		      return;
		    }
		  break;

		case GTU:
		case LEU:
		  if (i != ~((unsigned HOST_WIDE_INT) 0))
		    {
		      /* Try to convert to GEU/LTU, unless that would
			 be more expensive.  */
		      if (!arm_const_double_by_immediates (GEN_INT (i + 1))
			  && arm_const_double_prefer_rsbs_rsc (*op1))
			return;
		      *op1 = GEN_INT (i + 1);
		      *code = *code == GTU ? GEU : LTU;
		      return;
		    }
		  break;

		default:
		  gcc_unreachable ();
		}
	    }

	  if (!op0_preserve_value)
	    {
	      std::swap (*op0, *op1);
	      *code = (int)swap_condition ((enum rtx_code)*code);
	    }
	}
      return;
    }

  /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
     with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
     to facilitate possible combining with a cmp into 'ands'.  */
  if (mode == SImode
      && GET_CODE (*op0) == ZERO_EXTEND
      && GET_CODE (XEXP (*op0, 0)) == SUBREG
      && GET_MODE (XEXP (*op0, 0)) == QImode
      && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
      && subreg_lowpart_p (XEXP (*op0, 0))
      && *op1 == const0_rtx)
    *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
			GEN_INT (255));

  /* Comparisons smaller than DImode.  Only adjust comparisons against
     an out-of-range constant.  */
  if (!CONST_INT_P (*op1)
      || const_ok_for_arm (INTVAL (*op1))
      || const_ok_for_arm (- INTVAL (*op1)))
    return;

  i = INTVAL (*op1);

  switch (*code)
    {
    case EQ:
    case NE:
      return;

    case GT:
    case LE:
      if (i != maxval
	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
	{
	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
	  *code = *code == GT ? GE : LT;
	  return;
	}
      break;

    case GE:
    case LT:
      if (i != ~maxval
	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
	{
	  *op1 = GEN_INT (i - 1);
	  *code = *code == GE ? GT : LE;
	  return;
	}
      break;

    case GTU:
    case LEU:
      if (i != ~((unsigned HOST_WIDE_INT) 0)
	  && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
	{
	  *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
	  *code = *code == GTU ? GEU : LTU;
	  return;
	}
      break;

    case GEU:
    case LTU:
      if (i != 0
	  && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
	{
	  *op1 = GEN_INT (i - 1);
	  *code = *code == GEU ? GTU : LEU;
	  return;
	}
      break;

    default:
      gcc_unreachable ();
    }
}


/* Define how to find the value returned by a function.  */

static rtx
arm_function_value(const_tree type, const_tree func,
		   bool outgoing ATTRIBUTE_UNUSED)
{
  machine_mode mode;
  int unsignedp ATTRIBUTE_UNUSED;
  rtx r ATTRIBUTE_UNUSED;

  mode = TYPE_MODE (type);

  if (TARGET_AAPCS_BASED)
    return aapcs_allocate_return_reg (mode, type, func);

  /* Promote integer types.  */
  if (INTEGRAL_TYPE_P (type))
    mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);

  /* Promotes small structs returned in a register to full-word size
     for big-endian AAPCS.  */
  if (arm_return_in_msb (type))
    {
      HOST_WIDE_INT size = int_size_in_bytes (type);
      if (size % UNITS_PER_WORD != 0)
	{
	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
	}
    }

  return arm_libcall_value_1 (mode);
}

/* libcall hashtable helpers.  */

struct libcall_hasher : nofree_ptr_hash <const rtx_def>
{
  static inline hashval_t hash (const rtx_def *);
  static inline bool equal (const rtx_def *, const rtx_def *);
  static inline void remove (rtx_def *);
};

inline bool
libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
{
  return rtx_equal_p (p1, p2);
}

inline hashval_t
libcall_hasher::hash (const rtx_def *p1)
{
  return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
}

typedef hash_table<libcall_hasher> libcall_table_type;

static void
add_libcall (libcall_table_type *htab, rtx libcall)
{
  *htab->find_slot (libcall, INSERT) = libcall;
}

static bool
arm_libcall_uses_aapcs_base (const_rtx libcall)
{
  static bool init_done = false;
  static libcall_table_type *libcall_htab = NULL;

  if (!init_done)
    {
      init_done = true;

      libcall_htab = new libcall_table_type (31);
      add_libcall (libcall_htab,
		   convert_optab_libfunc (sfloat_optab, SFmode, SImode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (sfloat_optab, DFmode, SImode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (sfloat_optab, SFmode, DImode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (sfloat_optab, DFmode, DImode));

      add_libcall (libcall_htab,
		   convert_optab_libfunc (ufloat_optab, SFmode, SImode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (ufloat_optab, DFmode, SImode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (ufloat_optab, SFmode, DImode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (ufloat_optab, DFmode, DImode));

      add_libcall (libcall_htab,
		   convert_optab_libfunc (sext_optab, SFmode, HFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (trunc_optab, HFmode, SFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (sfix_optab, SImode, DFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (ufix_optab, SImode, DFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (sfix_optab, DImode, DFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (ufix_optab, DImode, DFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (sfix_optab, DImode, SFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (ufix_optab, DImode, SFmode));

      /* Values from double-precision helper functions are returned in core
	 registers if the selected core only supports single-precision
	 arithmetic, even if we are using the hard-float ABI.  The same is
	 true for single-precision helpers, but we will never be using the
	 hard-float ABI on a CPU which doesn't support single-precision
	 operations in hardware.  */
      add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
      add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
      add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
							SFmode));
      add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
							DFmode));
      add_libcall (libcall_htab,
		   convert_optab_libfunc (trunc_optab, HFmode, DFmode));
    }

  return libcall && libcall_htab->find (libcall) != NULL;
}

static rtx
arm_libcall_value_1 (machine_mode mode)
{
  if (TARGET_AAPCS_BASED)
    return aapcs_libcall_value (mode);
  else if (TARGET_IWMMXT_ABI
	   && arm_vector_mode_supported_p (mode))
    return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
  else
    return gen_rtx_REG (mode, ARG_REGISTER (1));
}

/* Define how to find the value returned by a library function
   assuming the value has mode MODE.  */

static rtx
arm_libcall_value (machine_mode mode, const_rtx libcall)
{
  if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
      && GET_MODE_CLASS (mode) == MODE_FLOAT)
    {
      /* The following libcalls return their result in integer registers,
	 even though they return a floating point value.  */
      if (arm_libcall_uses_aapcs_base (libcall))
	return gen_rtx_REG (mode, ARG_REGISTER(1));

    }

  return arm_libcall_value_1 (mode);
}

/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */

static bool
arm_function_value_regno_p (const unsigned int regno)
{
  if (regno == ARG_REGISTER (1)
      || (TARGET_32BIT
	  && TARGET_AAPCS_BASED
	  && TARGET_HARD_FLOAT
	  && regno == FIRST_VFP_REGNUM)
      || (TARGET_IWMMXT_ABI
	  && regno == FIRST_IWMMXT_REGNUM))
    return true;

  return false;
}

/* Determine the amount of memory needed to store the possible return
   registers of an untyped call.  */
int
arm_apply_result_size (void)
{
  int size = 16;

  if (TARGET_32BIT)
    {
      if (TARGET_HARD_FLOAT_ABI)
	size += 32;
      if (TARGET_IWMMXT_ABI)
	size += 8;
    }

  return size;
}

/* Decide whether TYPE should be returned in memory (true)
   or in a register (false).  FNTYPE is the type of the function making
   the call.  */
static bool
arm_return_in_memory (const_tree type, const_tree fntype)
{
  HOST_WIDE_INT size;

  size = int_size_in_bytes (type);  /* Negative if not fixed size.  */

  if (TARGET_AAPCS_BASED)
    {
      /* Simple, non-aggregate types (ie not including vectors and
	 complex) are always returned in a register (or registers).
	 We don't care about which register here, so we can short-cut
	 some of the detail.  */
      if (!AGGREGATE_TYPE_P (type)
	  && TREE_CODE (type) != VECTOR_TYPE
	  && TREE_CODE (type) != COMPLEX_TYPE)
	return false;

      /* Any return value that is no larger than one word can be
	 returned in r0.  */
      if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
	return false;

      /* Check any available co-processors to see if they accept the
	 type as a register candidate (VFP, for example, can return
	 some aggregates in consecutive registers).  These aren't
	 available if the call is variadic.  */
      if (aapcs_select_return_coproc (type, fntype) >= 0)
	return false;

      /* Vector values should be returned using ARM registers, not
	 memory (unless they're over 16 bytes, which will break since
	 we only have four call-clobbered registers to play with).  */
      if (TREE_CODE (type) == VECTOR_TYPE)
	return (size < 0 || size > (4 * UNITS_PER_WORD));

      /* The rest go in memory.  */
      return true;
    }

  if (TREE_CODE (type) == VECTOR_TYPE)
    return (size < 0 || size > (4 * UNITS_PER_WORD));

  if (!AGGREGATE_TYPE_P (type) &&
      (TREE_CODE (type) != VECTOR_TYPE))
    /* All simple types are returned in registers.  */
    return false;

  if (arm_abi != ARM_ABI_APCS)
    {
      /* ATPCS and later return aggregate types in memory only if they are
	 larger than a word (or are variable size).  */
      return (size < 0 || size > UNITS_PER_WORD);
    }

  /* For the arm-wince targets we choose to be compatible with Microsoft's
     ARM and Thumb compilers, which always return aggregates in memory.  */
#ifndef ARM_WINCE
  /* All structures/unions bigger than one word are returned in memory.
     Also catch the case where int_size_in_bytes returns -1.  In this case
     the aggregate is either huge or of variable size, and in either case
     we will want to return it via memory and not in a register.  */
  if (size < 0 || size > UNITS_PER_WORD)
    return true;

  if (TREE_CODE (type) == RECORD_TYPE)
    {
      tree field;

      /* For a struct the APCS says that we only return in a register
	 if the type is 'integer like' and every addressable element
	 has an offset of zero.  For practical purposes this means
	 that the structure can have at most one non bit-field element
	 and that this element must be the first one in the structure.  */

      /* Find the first field, ignoring non FIELD_DECL things which will
	 have been created by C++.  */
      for (field = TYPE_FIELDS (type);
	   field && TREE_CODE (field) != FIELD_DECL;
	   field = DECL_CHAIN (field))
	continue;

      if (field == NULL)
	return false; /* An empty structure.  Allowed by an extension to ANSI C.  */

      /* Check that the first field is valid for returning in a register.  */

      /* ... Floats are not allowed */
      if (FLOAT_TYPE_P (TREE_TYPE (field)))
	return true;

      /* ... Aggregates that are not themselves valid for returning in
	 a register are not allowed.  */
      if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
	return true;

      /* Now check the remaining fields, if any.  Only bitfields are allowed,
	 since they are not addressable.  */
      for (field = DECL_CHAIN (field);
	   field;
	   field = DECL_CHAIN (field))
	{
	  if (TREE_CODE (field) != FIELD_DECL)
	    continue;

	  if (!DECL_BIT_FIELD_TYPE (field))
	    return true;
	}

      return false;
    }

  if (TREE_CODE (type) == UNION_TYPE)
    {
      tree field;

      /* Unions can be returned in registers if every element is
	 integral, or can be returned in an integer register.  */
      for (field = TYPE_FIELDS (type);
	   field;
	   field = DECL_CHAIN (field))
	{
	  if (TREE_CODE (field) != FIELD_DECL)
	    continue;

	  if (FLOAT_TYPE_P (TREE_TYPE (field)))
	    return true;

	  if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
	    return true;
	}

      return false;
    }
#endif /* not ARM_WINCE */

  /* Return all other types in memory.  */
  return true;
}

const struct pcs_attribute_arg
{
  const char *arg;
  enum arm_pcs value;
} pcs_attribute_args[] =
  {
    {"aapcs", ARM_PCS_AAPCS},
    {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
#if 0
    /* We could recognize these, but changes would be needed elsewhere
     * to implement them.  */
    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
    {"atpcs", ARM_PCS_ATPCS},
    {"apcs", ARM_PCS_APCS},
#endif
    {NULL, ARM_PCS_UNKNOWN}
  };

static enum arm_pcs
arm_pcs_from_attribute (tree attr)
{
  const struct pcs_attribute_arg *ptr;
  const char *arg;

  /* Get the value of the argument.  */
  if (TREE_VALUE (attr) == NULL_TREE
      || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
    return ARM_PCS_UNKNOWN;

  arg = TREE_STRING_POINTER (TREE_VALUE (attr));

  /* Check it against the list of known arguments.  */
  for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
    if (streq (arg, ptr->arg))
      return ptr->value;

  /* An unrecognized interrupt type.  */
  return ARM_PCS_UNKNOWN;
}

/* Get the PCS variant to use for this call.  TYPE is the function's type
   specification, DECL is the specific declartion.  DECL may be null if
   the call could be indirect or if this is a library call.  */
static enum arm_pcs
arm_get_pcs_model (const_tree type, const_tree decl)
{
  bool user_convention = false;
  enum arm_pcs user_pcs = arm_pcs_default;
  tree attr;

  gcc_assert (type);

  attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
  if (attr)
    {
      user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
      user_convention = true;
    }

  if (TARGET_AAPCS_BASED)
    {
      /* Detect varargs functions.  These always use the base rules
	 (no argument is ever a candidate for a co-processor
	 register).  */
      bool base_rules = stdarg_p (type);

      if (user_convention)
	{
	  if (user_pcs > ARM_PCS_AAPCS_LOCAL)
	    sorry ("non-AAPCS derived PCS variant");
	  else if (base_rules && user_pcs != ARM_PCS_AAPCS)
	    error ("variadic functions must use the base AAPCS variant");
	}

      if (base_rules)
	return ARM_PCS_AAPCS;
      else if (user_convention)
	return user_pcs;
      else if (decl && flag_unit_at_a_time)
	{
	  /* Local functions never leak outside this compilation unit,
	     so we are free to use whatever conventions are
	     appropriate.  */
	  /* FIXME: remove CONST_CAST_TREE when cgraph is constified.  */
	  cgraph_node *local_info_node
	    = cgraph_node::local_info_node (CONST_CAST_TREE (decl));
	  if (local_info_node && local_info_node->local)
	    return ARM_PCS_AAPCS_LOCAL;
	}
    }
  else if (user_convention && user_pcs != arm_pcs_default)
    sorry ("PCS variant");

  /* For everything else we use the target's default.  */
  return arm_pcs_default;
}


static void
aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
		    const_tree fntype ATTRIBUTE_UNUSED,
		    rtx libcall ATTRIBUTE_UNUSED,
		    const_tree fndecl ATTRIBUTE_UNUSED)
{
  /* Record the unallocated VFP registers.  */
  pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
  pcum->aapcs_vfp_reg_alloc = 0;
}

/* Walk down the type tree of TYPE counting consecutive base elements.
   If *MODEP is VOIDmode, then set it to the first valid floating point
   type.  If a non-floating point type is found, or if a floating point
   type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
   otherwise return the count in the sub-tree.  */
static int
aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
{
  machine_mode mode;
  HOST_WIDE_INT size;

  switch (TREE_CODE (type))
    {
    case REAL_TYPE:
      mode = TYPE_MODE (type);
      if (mode != DFmode && mode != SFmode && mode != HFmode && mode != BFmode)
	return -1;

      if (*modep == VOIDmode)
	*modep = mode;

      if (*modep == mode)
	return 1;

      break;

    case COMPLEX_TYPE:
      mode = TYPE_MODE (TREE_TYPE (type));
      if (mode != DFmode && mode != SFmode)
	return -1;

      if (*modep == VOIDmode)
	*modep = mode;

      if (*modep == mode)
	return 2;

      break;

    case VECTOR_TYPE:
      /* Use V2SImode and V4SImode as representatives of all 64-bit
	 and 128-bit vector types, whether or not those modes are
	 supported with the present options.  */
      size = int_size_in_bytes (type);
      switch (size)
	{
	case 8:
	  mode = V2SImode;
	  break;
	case 16:
	  mode = V4SImode;
	  break;
	default:
	  return -1;
	}

      if (*modep == VOIDmode)
	*modep = mode;

      /* Vector modes are considered to be opaque: two vectors are
	 equivalent for the purposes of being homogeneous aggregates
	 if they are the same size.  */
      if (*modep == mode)
	return 1;

      break;

    case ARRAY_TYPE:
      {
	int count;
	tree index = TYPE_DOMAIN (type);

	/* Can't handle incomplete types nor sizes that are not
	   fixed.  */
	if (!COMPLETE_TYPE_P (type)
	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
	  return -1;

	count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
	if (count == -1
	    || !index
	    || !TYPE_MAX_VALUE (index)
	    || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
	    || !TYPE_MIN_VALUE (index)
	    || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
	    || count < 0)
	  return -1;

	count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
		      - tree_to_uhwi (TYPE_MIN_VALUE (index)));

	/* There must be no padding.  */
	if (wi::to_wide (TYPE_SIZE (type))
	    != count * GET_MODE_BITSIZE (*modep))
	  return -1;

	return count;
      }

    case RECORD_TYPE:
      {
	int count = 0;
	int sub_count;
	tree field;

	/* Can't handle incomplete types nor sizes that are not
	   fixed.  */
	if (!COMPLETE_TYPE_P (type)
	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
	  return -1;

	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
	  {
	    if (TREE_CODE (field) != FIELD_DECL)
	      continue;

	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
	    if (sub_count < 0)
	      return -1;
	    count += sub_count;
	  }

	/* There must be no padding.  */
	if (wi::to_wide (TYPE_SIZE (type))
	    != count * GET_MODE_BITSIZE (*modep))
	  return -1;

	return count;
      }

    case UNION_TYPE:
    case QUAL_UNION_TYPE:
      {
	/* These aren't very interesting except in a degenerate case.  */
	int count = 0;
	int sub_count;
	tree field;

	/* Can't handle incomplete types nor sizes that are not
	   fixed.  */
	if (!COMPLETE_TYPE_P (type)
	    || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
	  return -1;

	for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
	  {
	    if (TREE_CODE (field) != FIELD_DECL)
	      continue;

	    sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
	    if (sub_count < 0)
	      return -1;
	    count = count > sub_count ? count : sub_count;
	  }

	/* There must be no padding.  */
	if (wi::to_wide (TYPE_SIZE (type))
	    != count * GET_MODE_BITSIZE (*modep))
	  return -1;

	return count;
      }

    default:
      break;
    }

  return -1;
}

/* Return true if PCS_VARIANT should use VFP registers.  */
static bool
use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
{
  if (pcs_variant == ARM_PCS_AAPCS_VFP)
    {
      static bool seen_thumb1_vfp = false;

      if (TARGET_THUMB1 && !seen_thumb1_vfp)
	{
	  sorry ("Thumb-1 hard-float VFP ABI");
	  /* sorry() is not immediately fatal, so only display this once.  */
	  seen_thumb1_vfp = true;
	}

      return true;
    }

  if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
    return false;

  return (TARGET_32BIT && TARGET_HARD_FLOAT &&
	  (TARGET_VFP_DOUBLE || !is_double));
}

/* Return true if an argument whose type is TYPE, or mode is MODE, is
   suitable for passing or returning in VFP registers for the PCS
   variant selected.  If it is, then *BASE_MODE is updated to contain
   a machine mode describing each element of the argument's type and
   *COUNT to hold the number of such elements.  */
static bool
aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
				       machine_mode mode, const_tree type,
				       machine_mode *base_mode, int *count)
{
  machine_mode new_mode = VOIDmode;

  /* If we have the type information, prefer that to working things
     out from the mode.  */
  if (type)
    {
      int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);

      if (ag_count > 0 && ag_count <= 4)
	*count = ag_count;
      else
	return false;
    }
  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
	   || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
	   || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
    {
      *count = 1;
      new_mode = mode;
    }
  else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
    {
      *count = 2;
      new_mode = (mode == DCmode ? DFmode : SFmode);
    }
  else
    return false;


  if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
    return false;

  *base_mode = new_mode;

  if (TARGET_GENERAL_REGS_ONLY)
    error ("argument of type %qT not permitted with -mgeneral-regs-only",
	   type);

  return true;
}

static bool
aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
			       machine_mode mode, const_tree type)
{
  int count ATTRIBUTE_UNUSED;
  machine_mode ag_mode ATTRIBUTE_UNUSED;

  if (!use_vfp_abi (pcs_variant, false))
    return false;
  return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
						&ag_mode, &count);
}

static bool
aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
			     const_tree type)
{
  if (!use_vfp_abi (pcum->pcs_variant, false))
    return false;

  return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
						&pcum->aapcs_vfp_rmode,
						&pcum->aapcs_vfp_rcount);
}

/* Implement the allocate field in aapcs_cp_arg_layout.  See the comment there
   for the behaviour of this function.  */

static bool
aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
		    const_tree type  ATTRIBUTE_UNUSED)
{
  int rmode_size
    = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
  int shift = rmode_size / GET_MODE_SIZE (SFmode);
  unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
  int regno;

  for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
    if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
      {
	pcum->aapcs_vfp_reg_alloc = mask << regno;
	if (mode == BLKmode
	    || (mode == TImode && ! TARGET_NEON)
	    || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
	  {
	    int i;
	    int rcount = pcum->aapcs_vfp_rcount;
	    int rshift = shift;
	    machine_mode rmode = pcum->aapcs_vfp_rmode;
	    rtx par;
	    if (!TARGET_NEON)
	      {
		/* Avoid using unsupported vector modes.  */
		if (rmode == V2SImode)
		  rmode = DImode;
		else if (rmode == V4SImode)
		  {
		    rmode = DImode;
		    rcount *= 2;
		    rshift /= 2;
		  }
	      }
	    par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
	    for (i = 0; i < rcount; i++)
	      {
		rtx tmp = gen_rtx_REG (rmode,
				       FIRST_VFP_REGNUM + regno + i * rshift);
		tmp = gen_rtx_EXPR_LIST
		  (VOIDmode, tmp,
		   GEN_INT (i * GET_MODE_SIZE (rmode)));
		XVECEXP (par, 0, i) = tmp;
	      }

	    pcum->aapcs_reg = par;
	  }
	else
	  pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
	return true;
      }
  return false;
}

/* Implement the allocate_return_reg field in aapcs_cp_arg_layout.  See the
   comment there for the behaviour of this function.  */

static rtx
aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
			       machine_mode mode,
			       const_tree type ATTRIBUTE_UNUSED)
{
  if (!use_vfp_abi (pcs_variant, false))
    return NULL;

  if (mode == BLKmode
      || (GET_MODE_CLASS (mode) == MODE_INT
	  && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
	  && !TARGET_NEON))
    {
      int count;
      machine_mode ag_mode;
      int i;
      rtx par;
      int shift;

      aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
					     &ag_mode, &count);

      if (!TARGET_NEON)
	{
	  if (ag_mode == V2SImode)
	    ag_mode = DImode;
	  else if (ag_mode == V4SImode)
	    {
	      ag_mode = DImode;
	      count *= 2;
	    }
	}
      shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
      par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
      for (i = 0; i < count; i++)
	{
	  rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
	  tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
				   GEN_INT (i * GET_MODE_SIZE (ag_mode)));
	  XVECEXP (par, 0, i) = tmp;
	}

      return par;
    }

  return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
}

static void
aapcs_vfp_advance (CUMULATIVE_ARGS *pcum  ATTRIBUTE_UNUSED,
		   machine_mode mode  ATTRIBUTE_UNUSED,
		   const_tree type  ATTRIBUTE_UNUSED)
{
  pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
  pcum->aapcs_vfp_reg_alloc = 0;
  return;
}

#define AAPCS_CP(X)				\
  {						\
    aapcs_ ## X ## _cum_init,			\
    aapcs_ ## X ## _is_call_candidate,		\
    aapcs_ ## X ## _allocate,			\
    aapcs_ ## X ## _is_return_candidate,	\
    aapcs_ ## X ## _allocate_return_reg,	\
    aapcs_ ## X ## _advance			\
  }

/* Table of co-processors that can be used to pass arguments in
   registers.  Idealy no arugment should be a candidate for more than
   one co-processor table entry, but the table is processed in order
   and stops after the first match.  If that entry then fails to put
   the argument into a co-processor register, the argument will go on
   the stack.  */
static struct
{
  /* Initialize co-processor related state in CUMULATIVE_ARGS structure.  */
  void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);

  /* Return true if an argument of mode MODE (or type TYPE if MODE is
     BLKmode) is a candidate for this co-processor's registers; this
     function should ignore any position-dependent state in
     CUMULATIVE_ARGS and only use call-type dependent information.  */
  bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);

  /* Return true if the argument does get a co-processor register; it
     should set aapcs_reg to an RTX of the register allocated as is
     required for a return from FUNCTION_ARG.  */
  bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);

  /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
     be returned in this co-processor's registers.  */
  bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);

  /* Allocate and return an RTX element to hold the return type of a call.  This
     routine must not fail and will only be called if is_return_candidate
     returned true with the same parameters.  */
  rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);

  /* Finish processing this argument and prepare to start processing
     the next one.  */
  void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
} aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
  {
    AAPCS_CP(vfp)
  };

#undef AAPCS_CP

static int
aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
			  const_tree type)
{
  int i;

  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
    if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
      return i;

  return -1;
}

static int
aapcs_select_return_coproc (const_tree type, const_tree fntype)
{
  /* We aren't passed a decl, so we can't check that a call is local.
     However, it isn't clear that that would be a win anyway, since it
     might limit some tail-calling opportunities.  */
  enum arm_pcs pcs_variant;

  if (fntype)
    {
      const_tree fndecl = NULL_TREE;

      if (TREE_CODE (fntype) == FUNCTION_DECL)
	{
	  fndecl = fntype;
	  fntype = TREE_TYPE (fntype);
	}

      pcs_variant = arm_get_pcs_model (fntype, fndecl);
    }
  else
    pcs_variant = arm_pcs_default;

  if (pcs_variant != ARM_PCS_AAPCS)
    {
      int i;

      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
							TYPE_MODE (type),
							type))
	  return i;
    }
  return -1;
}

static rtx
aapcs_allocate_return_reg (machine_mode mode, const_tree type,
			   const_tree fntype)
{
  /* We aren't passed a decl, so we can't check that a call is local.
     However, it isn't clear that that would be a win anyway, since it
     might limit some tail-calling opportunities.  */
  enum arm_pcs pcs_variant;
  int unsignedp ATTRIBUTE_UNUSED;

  if (fntype)
    {
      const_tree fndecl = NULL_TREE;

      if (TREE_CODE (fntype) == FUNCTION_DECL)
	{
	  fndecl = fntype;
	  fntype = TREE_TYPE (fntype);
	}

      pcs_variant = arm_get_pcs_model (fntype, fndecl);
    }
  else
    pcs_variant = arm_pcs_default;

  /* Promote integer types.  */
  if (type && INTEGRAL_TYPE_P (type))
    mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);

  if (pcs_variant != ARM_PCS_AAPCS)
    {
      int i;

      for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
	if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
							type))
	  return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
							     mode, type);
    }

  /* Promotes small structs returned in a register to full-word size
     for big-endian AAPCS.  */
  if (type && arm_return_in_msb (type))
    {
      HOST_WIDE_INT size = int_size_in_bytes (type);
      if (size % UNITS_PER_WORD != 0)
	{
	  size += UNITS_PER_WORD - size % UNITS_PER_WORD;
	  mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
	}
    }

  return gen_rtx_REG (mode, R0_REGNUM);
}

static rtx
aapcs_libcall_value (machine_mode mode)
{
  if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
      && GET_MODE_SIZE (mode) <= 4)
    mode = SImode;

  return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
}

/* Lay out a function argument using the AAPCS rules.  The rule
   numbers referred to here are those in the AAPCS.  */
static void
aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
		  const_tree type, bool named)
{
  int nregs, nregs2;
  int ncrn;

  /* We only need to do this once per argument.  */
  if (pcum->aapcs_arg_processed)
    return;

  pcum->aapcs_arg_processed = true;

  /* Special case: if named is false then we are handling an incoming
     anonymous argument which is on the stack.  */
  if (!named)
    return;

  /* Is this a potential co-processor register candidate?  */
  if (pcum->pcs_variant != ARM_PCS_AAPCS)
    {
      int slot = aapcs_select_call_coproc (pcum, mode, type);
      pcum->aapcs_cprc_slot = slot;

      /* We don't have to apply any of the rules from part B of the
	 preparation phase, these are handled elsewhere in the
	 compiler.  */

      if (slot >= 0)
	{
	  /* A Co-processor register candidate goes either in its own
	     class of registers or on the stack.  */
	  if (!pcum->aapcs_cprc_failed[slot])
	    {
	      /* C1.cp - Try to allocate the argument to co-processor
		 registers.  */
	      if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
		return;

	      /* C2.cp - Put the argument on the stack and note that we
		 can't assign any more candidates in this slot.  We also
		 need to note that we have allocated stack space, so that
		 we won't later try to split a non-cprc candidate between
		 core registers and the stack.  */
	      pcum->aapcs_cprc_failed[slot] = true;
	      pcum->can_split = false;
	    }

	  /* We didn't get a register, so this argument goes on the
	     stack.  */
	  gcc_assert (pcum->can_split == false);
	  return;
	}
    }

  /* C3 - For double-word aligned arguments, round the NCRN up to the
     next even number.  */
  ncrn = pcum->aapcs_ncrn;
  if (ncrn & 1)
    {
      int res = arm_needs_doubleword_align (mode, type);
      /* Only warn during RTL expansion of call stmts, otherwise we would
	 warn e.g. during gimplification even on functions that will be
	 always inlined, and we'd warn multiple times.  Don't warn when
	 called in expand_function_start either, as we warn instead in
	 arm_function_arg_boundary in that case.  */
      if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
	inform (input_location, "parameter passing for argument of type "
		"%qT changed in GCC 7.1", type);
      else if (res > 0)
	ncrn++;
    }

  nregs = ARM_NUM_REGS2(mode, type);

  /* Sigh, this test should really assert that nregs > 0, but a GCC
     extension allows empty structs and then gives them empty size; it
     then allows such a structure to be passed by value.  For some of
     the code below we have to pretend that such an argument has
     non-zero size so that we 'locate' it correctly either in
     registers or on the stack.  */
  gcc_assert (nregs >= 0);

  nregs2 = nregs ? nregs : 1;

  /* C4 - Argument fits entirely in core registers.  */
  if (ncrn + nregs2 <= NUM_ARG_REGS)
    {
      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
      pcum->aapcs_next_ncrn = ncrn + nregs;
      return;
    }

  /* C5 - Some core registers left and there are no arguments already
     on the stack: split this argument between the remaining core
     registers and the stack.  */
  if (ncrn < NUM_ARG_REGS && pcum->can_split)
    {
      pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
      pcum->aapcs_next_ncrn = NUM_ARG_REGS;
      pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
      return;
    }

  /* C6 - NCRN is set to 4.  */
  pcum->aapcs_next_ncrn = NUM_ARG_REGS;

  /* C7,C8 - arugment goes on the stack.  We have nothing to do here.  */
  return;
}

/* Initialize a variable CUM of type CUMULATIVE_ARGS
   for a call to a function whose data type is FNTYPE.
   For a library call, FNTYPE is NULL.  */
void
arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
			  rtx libname,
			  tree fndecl ATTRIBUTE_UNUSED)
{
  /* Long call handling.  */
  if (fntype)
    pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
  else
    pcum->pcs_variant = arm_pcs_default;

  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
    {
      if (arm_libcall_uses_aapcs_base (libname))
	pcum->pcs_variant = ARM_PCS_AAPCS;

      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
      pcum->aapcs_reg = NULL_RTX;
      pcum->aapcs_partial = 0;
      pcum->aapcs_arg_processed = false;
      pcum->aapcs_cprc_slot = -1;
      pcum->can_split = true;

      if (pcum->pcs_variant != ARM_PCS_AAPCS)
	{
	  int i;

	  for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
	    {
	      pcum->aapcs_cprc_failed[i] = false;
	      aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
	    }
	}
      return;
    }

  /* Legacy ABIs */

  /* On the ARM, the offset starts at 0.  */
  pcum->nregs = 0;
  pcum->iwmmxt_nregs = 0;
  pcum->can_split = true;

  /* Varargs vectors are treated the same as long long.
     named_count avoids having to change the way arm handles 'named' */
  pcum->named_count = 0;
  pcum->nargs = 0;

  if (TARGET_REALLY_IWMMXT && fntype)
    {
      tree fn_arg;

      for (fn_arg = TYPE_ARG_TYPES (fntype);
	   fn_arg;
	   fn_arg = TREE_CHAIN (fn_arg))
	pcum->named_count += 1;

      if (! pcum->named_count)
	pcum->named_count = INT_MAX;
    }
}

/* Return 2 if double word alignment is required for argument passing,
   but wasn't required before the fix for PR88469.
   Return 1 if double word alignment is required for argument passing.
   Return -1 if double word alignment used to be required for argument
   passing before PR77728 ABI fix, but is not required anymore.
   Return 0 if double word alignment is not required and wasn't requried
   before either.  */
static int
arm_needs_doubleword_align (machine_mode mode, const_tree type)
{
  if (!type)
    return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;

  /* Scalar and vector types: Use natural alignment, i.e. of base type.  */
  if (!AGGREGATE_TYPE_P (type))
    return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;

  /* Array types: Use member alignment of element type.  */
  if (TREE_CODE (type) == ARRAY_TYPE)
    return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;

  int ret = 0;
  int ret2 = 0;
  /* Record/aggregate types: Use greatest member alignment of any member.  */
  for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
    if (DECL_ALIGN (field) > PARM_BOUNDARY)
      {
	if (TREE_CODE (field) == FIELD_DECL)
	  return 1;
	else
	  /* Before PR77728 fix, we were incorrectly considering also
	     other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
	     Make sure we can warn about that with -Wpsabi.  */
	  ret = -1;
      }
    else if (TREE_CODE (field) == FIELD_DECL
	     && DECL_BIT_FIELD_TYPE (field)
	     && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
      ret2 = 1;

  if (ret2)
    return 2;

  return ret;
}


/* Determine where to put an argument to a function.
   Value is zero to push the argument on the stack,
   or a hard register in which to store the argument.

   CUM is a variable of type CUMULATIVE_ARGS which gives info about
    the preceding args and about the function being called.
   ARG is a description of the argument.

   On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
   other arguments are passed on the stack.  If (NAMED == 0) (which happens
   only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
   defined), say it is passed in the stack (function_prologue will
   indeed make it pass in the stack if necessary).  */

static rtx
arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
{
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
  int nregs;

  /* Handle the special case quickly.  Pick an arbitrary value for op2 of
     a call insn (op3 of a call_value insn).  */
  if (arg.end_marker_p ())
    return const0_rtx;

  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
    {
      aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
      return pcum->aapcs_reg;
    }

  /* Varargs vectors are treated the same as long long.
     named_count avoids having to change the way arm handles 'named' */
  if (TARGET_IWMMXT_ABI
      && arm_vector_mode_supported_p (arg.mode)
      && pcum->named_count > pcum->nargs + 1)
    {
      if (pcum->iwmmxt_nregs <= 9)
	return gen_rtx_REG (arg.mode,
			    pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
      else
	{
	  pcum->can_split = false;
	  return NULL_RTX;
	}
    }

  /* Put doubleword aligned quantities in even register pairs.  */
  if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
    {
      int res = arm_needs_doubleword_align (arg.mode, arg.type);
      if (res < 0 && warn_psabi)
	inform (input_location, "parameter passing for argument of type "
		"%qT changed in GCC 7.1", arg.type);
      else if (res > 0)
	{
	  pcum->nregs++;
	  if (res > 1 && warn_psabi)
	    inform (input_location, "parameter passing for argument of type "
		    "%qT changed in GCC 9.1", arg.type);
	}
    }

  /* Only allow splitting an arg between regs and memory if all preceding
     args were allocated to regs.  For args passed by reference we only count
     the reference pointer.  */
  if (pcum->can_split)
    nregs = 1;
  else
    nregs = ARM_NUM_REGS2 (arg.mode, arg.type);

  if (!arg.named || pcum->nregs + nregs > NUM_ARG_REGS)
    return NULL_RTX;

  return gen_rtx_REG (arg.mode, pcum->nregs);
}

static unsigned int
arm_function_arg_boundary (machine_mode mode, const_tree type)
{
  if (!ARM_DOUBLEWORD_ALIGN)
    return PARM_BOUNDARY;

  int res = arm_needs_doubleword_align (mode, type);
  if (res < 0 && warn_psabi)
    inform (input_location, "parameter passing for argument of type %qT "
	    "changed in GCC 7.1", type);
  if (res > 1 && warn_psabi)
    inform (input_location, "parameter passing for argument of type "
	    "%qT changed in GCC 9.1", type);

  return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
}

static int
arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
{
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
  int nregs = pcum->nregs;

  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
    {
      aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
      return pcum->aapcs_partial;
    }

  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
    return 0;

  if (NUM_ARG_REGS > nregs
      && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
      && pcum->can_split)
    return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;

  return 0;
}

/* Update the data in PCUM to advance over argument ARG.  */

static void
arm_function_arg_advance (cumulative_args_t pcum_v,
			  const function_arg_info &arg)
{
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);

  if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
    {
      aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);

      if (pcum->aapcs_cprc_slot >= 0)
	{
	  aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, arg.mode,
							      arg.type);
	  pcum->aapcs_cprc_slot = -1;
	}

      /* Generic stuff.  */
      pcum->aapcs_arg_processed = false;
      pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
      pcum->aapcs_reg = NULL_RTX;
      pcum->aapcs_partial = 0;
    }
  else
    {
      pcum->nargs += 1;
      if (arm_vector_mode_supported_p (arg.mode)
	  && pcum->named_count > pcum->nargs
	  && TARGET_IWMMXT_ABI)
	pcum->iwmmxt_nregs += 1;
      else
	pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
    }
}

/* Variable sized types are passed by reference.  This is a GCC
   extension to the ARM ABI.  */

static bool
arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
{
  return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
}

/* Encode the current state of the #pragma [no_]long_calls.  */
typedef enum
{
  OFF,		/* No #pragma [no_]long_calls is in effect.  */
  LONG,		/* #pragma long_calls is in effect.  */
  SHORT		/* #pragma no_long_calls is in effect.  */
} arm_pragma_enum;

static arm_pragma_enum arm_pragma_long_calls = OFF;

void
arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
{
  arm_pragma_long_calls = LONG;
}

void
arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
{
  arm_pragma_long_calls = SHORT;
}

void
arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
{
  arm_pragma_long_calls = OFF;
}

/* Handle an attribute requiring a FUNCTION_DECL;
   arguments as in struct attribute_spec.handler.  */
static tree
arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
			     int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
  if (TREE_CODE (*node) != FUNCTION_DECL)
    {
      warning (OPT_Wattributes, "%qE attribute only applies to functions",
	       name);
      *no_add_attrs = true;
    }

  return NULL_TREE;
}

/* Handle an "interrupt" or "isr" attribute;
   arguments as in struct attribute_spec.handler.  */
static tree
arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
			  bool *no_add_attrs)
{
  if (DECL_P (*node))
    {
      if (TREE_CODE (*node) != FUNCTION_DECL)
	{
	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
		   name);
	  *no_add_attrs = true;
	}
      /* FIXME: the argument if any is checked for type attributes;
	 should it be checked for decl ones?  */
    }
  else
    {
      if (TREE_CODE (*node) == FUNCTION_TYPE
	  || TREE_CODE (*node) == METHOD_TYPE)
	{
	  if (arm_isr_value (args) == ARM_FT_UNKNOWN)
	    {
	      warning (OPT_Wattributes, "%qE attribute ignored",
		       name);
	      *no_add_attrs = true;
	    }
	}
      else if (TREE_CODE (*node) == POINTER_TYPE
	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
	       && arm_isr_value (args) != ARM_FT_UNKNOWN)
	{
	  *node = build_variant_type_copy (*node);
	  TREE_TYPE (*node) = build_type_attribute_variant
	    (TREE_TYPE (*node),
	     tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
	  *no_add_attrs = true;
	}
      else
	{
	  /* Possibly pass this attribute on from the type to a decl.  */
	  if (flags & ((int) ATTR_FLAG_DECL_NEXT
		       | (int) ATTR_FLAG_FUNCTION_NEXT
		       | (int) ATTR_FLAG_ARRAY_NEXT))
	    {
	      *no_add_attrs = true;
	      return tree_cons (name, args, NULL_TREE);
	    }
	  else
	    {
	      warning (OPT_Wattributes, "%qE attribute ignored",
		       name);
	    }
	}
    }

  return NULL_TREE;
}

/* Handle a "pcs" attribute; arguments as in struct
   attribute_spec.handler.  */
static tree
arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
			  int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
  if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
    {
      warning (OPT_Wattributes, "%qE attribute ignored", name);
      *no_add_attrs = true;
    }
  return NULL_TREE;
}

#if TARGET_DLLIMPORT_DECL_ATTRIBUTES
/* Handle the "notshared" attribute.  This attribute is another way of
   requesting hidden visibility.  ARM's compiler supports
   "__declspec(notshared)"; we support the same thing via an
   attribute.  */

static tree
arm_handle_notshared_attribute (tree *node,
				tree name ATTRIBUTE_UNUSED,
				tree args ATTRIBUTE_UNUSED,
				int flags ATTRIBUTE_UNUSED,
				bool *no_add_attrs)
{
  tree decl = TYPE_NAME (*node);

  if (decl)
    {
      DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
      DECL_VISIBILITY_SPECIFIED (decl) = 1;
      *no_add_attrs = false;
    }
  return NULL_TREE;
}
#endif

/* This function returns true if a function with declaration FNDECL and type
   FNTYPE uses the stack to pass arguments or return variables and false
   otherwise.  This is used for functions with the attributes
   'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
   diagnostic messages if the stack is used.  NAME is the name of the attribute
   used.  */

static bool
cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
{
  function_args_iterator args_iter;
  CUMULATIVE_ARGS args_so_far_v;
  cumulative_args_t args_so_far;
  bool first_param = true;
  tree arg_type, prev_arg_type = NULL_TREE, ret_type;

  /* Error out if any argument is passed on the stack.  */
  arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
  args_so_far = pack_cumulative_args (&args_so_far_v);
  FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
    {
      rtx arg_rtx;

      prev_arg_type = arg_type;
      if (VOID_TYPE_P (arg_type))
	continue;

      function_arg_info arg (arg_type, /*named=*/true);
      if (!first_param)
	/* ??? We should advance after processing the argument and pass
	   the argument we're advancing past.  */
	arm_function_arg_advance (args_so_far, arg);
      arg_rtx = arm_function_arg (args_so_far, arg);
      if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
	{
	  error ("%qE attribute not available to functions with arguments "
		 "passed on the stack", name);
	  return true;
	}
      first_param = false;
    }

  /* Error out for variadic functions since we cannot control how many
     arguments will be passed and thus stack could be used.  stdarg_p () is not
     used for the checking to avoid browsing arguments twice.  */
  if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
    {
      error ("%qE attribute not available to functions with variable number "
	     "of arguments", name);
      return true;
    }

  /* Error out if return value is passed on the stack.  */
  ret_type = TREE_TYPE (fntype);
  if (arm_return_in_memory (ret_type, fntype))
    {
      error ("%qE attribute not available to functions that return value on "
	     "the stack", name);
      return true;
    }
  return false;
}

/* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
   function will check whether the attribute is allowed here and will add the
   attribute to the function declaration tree or otherwise issue a warning.  */

static tree
arm_handle_cmse_nonsecure_entry (tree *node, tree name,
				 tree /* args */,
				 int /* flags */,
				 bool *no_add_attrs)
{
  tree fndecl;

  if (!use_cmse)
    {
      *no_add_attrs = true;
      warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
	       "option.", name);
      return NULL_TREE;
    }

  /* Ignore attribute for function types.  */
  if (TREE_CODE (*node) != FUNCTION_DECL)
    {
      warning (OPT_Wattributes, "%qE attribute only applies to functions",
	       name);
      *no_add_attrs = true;
      return NULL_TREE;
    }

  fndecl = *node;

  /* Warn for static linkage functions.  */
  if (!TREE_PUBLIC (fndecl))
    {
      warning (OPT_Wattributes, "%qE attribute has no effect on functions "
	       "with static linkage", name);
      *no_add_attrs = true;
      return NULL_TREE;
    }

  *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
						TREE_TYPE (fndecl));
  return NULL_TREE;
}


/* Called upon detection of the use of the cmse_nonsecure_call attribute, this
   function will check whether the attribute is allowed here and will add the
   attribute to the function type tree or otherwise issue a diagnostic.  The
   reason we check this at declaration time is to only allow the use of the
   attribute with declarations of function pointers and not function
   declarations.  This function checks NODE is of the expected type and issues
   diagnostics otherwise using NAME.  If it is not of the expected type
   *NO_ADD_ATTRS will be set to true.  */

static tree
arm_handle_cmse_nonsecure_call (tree *node, tree name,
				 tree /* args */,
				 int /* flags */,
				 bool *no_add_attrs)
{
  tree decl = NULL_TREE, fntype = NULL_TREE;
  tree type;

  if (!use_cmse)
    {
      *no_add_attrs = true;
      warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
	       "option.", name);
      return NULL_TREE;
    }

  if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
    {
      decl = *node;
      fntype = TREE_TYPE (decl);
    }

  while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
    fntype = TREE_TYPE (fntype);

  if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
    {
	warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
		 "function pointer", name);
	*no_add_attrs = true;
	return NULL_TREE;
    }

  *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);

  if (*no_add_attrs)
    return NULL_TREE;

  /* Prevent trees being shared among function types with and without
     cmse_nonsecure_call attribute.  */
  type = TREE_TYPE (decl);

  type = build_distinct_type_copy (type);
  TREE_TYPE (decl) = type;
  fntype = type;

  while (TREE_CODE (fntype) != FUNCTION_TYPE)
    {
      type = fntype;
      fntype = TREE_TYPE (fntype);
      fntype = build_distinct_type_copy (fntype);
      TREE_TYPE (type) = fntype;
    }

  /* Construct a type attribute and add it to the function type.  */
  tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
			  TYPE_ATTRIBUTES (fntype));
  TYPE_ATTRIBUTES (fntype) = attrs;
  return NULL_TREE;
}

/* Return 0 if the attributes for two types are incompatible, 1 if they
   are compatible, and 2 if they are nearly compatible (which causes a
   warning to be generated).  */
static int
arm_comp_type_attributes (const_tree type1, const_tree type2)
{
  int l1, l2, s1, s2;

  /* Check for mismatch of non-default calling convention.  */
  if (TREE_CODE (type1) != FUNCTION_TYPE)
    return 1;

  /* Check for mismatched call attributes.  */
  l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
  l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
  s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
  s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;

  /* Only bother to check if an attribute is defined.  */
  if (l1 | l2 | s1 | s2)
    {
      /* If one type has an attribute, the other must have the same attribute.  */
      if ((l1 != l2) || (s1 != s2))
	return 0;

      /* Disallow mixed attributes.  */
      if ((l1 & s2) || (l2 & s1))
	return 0;
    }

  /* Check for mismatched ISR attribute.  */
  l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
  if (! l1)
    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
  l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
  if (! l2)
    l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
  if (l1 != l2)
    return 0;

  l1 = lookup_attribute ("cmse_nonsecure_call",
			 TYPE_ATTRIBUTES (type1)) != NULL;
  l2 = lookup_attribute ("cmse_nonsecure_call",
			 TYPE_ATTRIBUTES (type2)) != NULL;

  if (l1 != l2)
    return 0;

  return 1;
}

/*  Assigns default attributes to newly defined type.  This is used to
    set short_call/long_call attributes for function types of
    functions defined inside corresponding #pragma scopes.  */
static void
arm_set_default_type_attributes (tree type)
{
  /* Add __attribute__ ((long_call)) to all functions, when
     inside #pragma long_calls or __attribute__ ((short_call)),
     when inside #pragma no_long_calls.  */
  if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
    {
      tree type_attr_list, attr_name;
      type_attr_list = TYPE_ATTRIBUTES (type);

      if (arm_pragma_long_calls == LONG)
 	attr_name = get_identifier ("long_call");
      else if (arm_pragma_long_calls == SHORT)
 	attr_name = get_identifier ("short_call");
      else
 	return;

      type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
      TYPE_ATTRIBUTES (type) = type_attr_list;
    }
}

/* Return true if DECL is known to be linked into section SECTION.  */

static bool
arm_function_in_section_p (tree decl, section *section)
{
  /* We can only be certain about the prevailing symbol definition.  */
  if (!decl_binds_to_current_def_p (decl))
    return false;

  /* If DECL_SECTION_NAME is set, assume it is trustworthy.  */
  if (!DECL_SECTION_NAME (decl))
    {
      /* Make sure that we will not create a unique section for DECL.  */
      if (flag_function_sections || DECL_COMDAT_GROUP (decl))
	return false;
    }

  return function_section (decl) == section;
}

/* Return nonzero if a 32-bit "long_call" should be generated for
   a call from the current function to DECL.  We generate a long_call
   if the function:

        a.  has an __attribute__((long call))
     or b.  is within the scope of a #pragma long_calls
     or c.  the -mlong-calls command line switch has been specified

   However we do not generate a long call if the function:

        d.  has an __attribute__ ((short_call))
     or e.  is inside the scope of a #pragma no_long_calls
     or f.  is defined in the same section as the current function.  */

bool
arm_is_long_call_p (tree decl)
{
  tree attrs;

  if (!decl)
    return TARGET_LONG_CALLS;

  attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
  if (lookup_attribute ("short_call", attrs))
    return false;

  /* For "f", be conservative, and only cater for cases in which the
     whole of the current function is placed in the same section.  */
  if (!flag_reorder_blocks_and_partition
      && TREE_CODE (decl) == FUNCTION_DECL
      && arm_function_in_section_p (decl, current_function_section ()))
    return false;

  if (lookup_attribute ("long_call", attrs))
    return true;

  return TARGET_LONG_CALLS;
}

/* Return nonzero if it is ok to make a tail-call to DECL.  */
static bool
arm_function_ok_for_sibcall (tree decl, tree exp)
{
  unsigned long func_type;

  if (cfun->machine->sibcall_blocked)
    return false;

  if (TARGET_FDPIC)
    {
      /* In FDPIC, never tailcall something for which we have no decl:
	 the target function could be in a different module, requiring
	 a different FDPIC register value.  */
      if (decl == NULL)
	return false;
    }

  /* Never tailcall something if we are generating code for Thumb-1.  */
  if (TARGET_THUMB1)
    return false;

  /* The PIC register is live on entry to VxWorks PLT entries, so we
     must make the call before restoring the PIC register.  */
  if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
    return false;

  /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
     may be used both as target of the call and base register for restoring
     the VFP registers  */
  if (TARGET_APCS_FRAME && TARGET_ARM
      && TARGET_HARD_FLOAT
      && decl && arm_is_long_call_p (decl))
    return false;

  /* If we are interworking and the function is not declared static
     then we can't tail-call it unless we know that it exists in this
     compilation unit (since it might be a Thumb routine).  */
  if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
      && !TREE_ASM_WRITTEN (decl))
    return false;

  func_type = arm_current_func_type ();
  /* Never tailcall from an ISR routine - it needs a special exit sequence.  */
  if (IS_INTERRUPT (func_type))
    return false;

  /* ARMv8-M non-secure entry functions need to return with bxns which is only
     generated for entry functions themselves.  */
  if (IS_CMSE_ENTRY (arm_current_func_type ()))
    return false;

  /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
     this would complicate matters for later code generation.  */
  if (TREE_CODE (exp) == CALL_EXPR)
    {
      tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
      if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
	return false;
    }

  if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
    {
      /* Check that the return value locations are the same.  For
	 example that we aren't returning a value from the sibling in
	 a VFP register but then need to transfer it to a core
	 register.  */
      rtx a, b;
      tree decl_or_type = decl;

      /* If it is an indirect function pointer, get the function type.  */
      if (!decl)
	decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));

      a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
      b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
			      cfun->decl, false);
      if (!rtx_equal_p (a, b))
	return false;
    }

  /* Never tailcall if function may be called with a misaligned SP.  */
  if (IS_STACKALIGN (func_type))
    return false;

  /* The AAPCS says that, on bare-metal, calls to unresolved weak
     references should become a NOP.  Don't convert such calls into
     sibling calls.  */
  if (TARGET_AAPCS_BASED
      && arm_abi == ARM_ABI_AAPCS
      && decl
      && DECL_WEAK (decl))
    return false;

  /* We cannot do a tailcall for an indirect call by descriptor if all the
     argument registers are used because the only register left to load the
     address is IP and it will already contain the static chain.  */
  if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
    {
      tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
      CUMULATIVE_ARGS cum;
      cumulative_args_t cum_v;

      arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
      cum_v = pack_cumulative_args (&cum);

      for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
	{
	  tree type = TREE_VALUE (t);
	  if (!VOID_TYPE_P (type))
	    {
	      function_arg_info arg (type, /*named=*/true);
	      arm_function_arg_advance (cum_v, arg);
	    }
	}

      function_arg_info arg (integer_type_node, /*named=*/true);
      if (!arm_function_arg (cum_v, arg))
	return false;
    }

  /* Everything else is ok.  */
  return true;
}


/* Addressing mode support functions.  */

/* Return nonzero if X is a legitimate immediate operand when compiling
   for PIC.  We know that X satisfies CONSTANT_P and flag_pic is true.  */
int
legitimate_pic_operand_p (rtx x)
{
  if (GET_CODE (x) == SYMBOL_REF
      || (GET_CODE (x) == CONST
	  && GET_CODE (XEXP (x, 0)) == PLUS
	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
    return 0;

  return 1;
}

/* Record that the current function needs a PIC register.  If PIC_REG is null,
   a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
   both case cfun->machine->pic_reg is initialized if we have not already done
   so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
   PIC register is reloaded in the current position of the instruction stream
   irregardless of whether it was loaded before.  Otherwise, it is only loaded
   if not already done so (crtl->uses_pic_offset_table is null).  Note that
   nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
   is only supported iff COMPUTE_NOW is false.  */

static void
require_pic_register (rtx pic_reg, bool compute_now)
{
  gcc_assert (compute_now == (pic_reg != NULL_RTX));

  /* A lot of the logic here is made obscure by the fact that this
     routine gets called as part of the rtx cost estimation process.
     We don't want those calls to affect any assumptions about the real
     function; and further, we can't call entry_of_function() until we
     start the real expansion process.  */
  if (!crtl->uses_pic_offset_table || compute_now)
    {
      gcc_assert (can_create_pseudo_p ()
		  || (pic_reg != NULL_RTX
		      && REG_P (pic_reg)
		      && GET_MODE (pic_reg) == Pmode));
      if (arm_pic_register != INVALID_REGNUM
	  && !compute_now
	  && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
	{
	  if (!cfun->machine->pic_reg)
	    cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);

	  /* Play games to avoid marking the function as needing pic
	     if we are being called as part of the cost-estimation
	     process.  */
	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
	    crtl->uses_pic_offset_table = 1;
	}
      else
	{
	  rtx_insn *seq, *insn;

	  if (pic_reg == NULL_RTX)
	    pic_reg = gen_reg_rtx (Pmode);
	  if (!cfun->machine->pic_reg)
	    cfun->machine->pic_reg = pic_reg;

	  /* Play games to avoid marking the function as needing pic
	     if we are being called as part of the cost-estimation
	     process.  */
	  if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
	    {
	      crtl->uses_pic_offset_table = 1;
	      start_sequence ();

	      if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
		  && arm_pic_register > LAST_LO_REGNUM
		  && !compute_now)
		emit_move_insn (cfun->machine->pic_reg,
				gen_rtx_REG (Pmode, arm_pic_register));
	      else
		arm_load_pic_register (0UL, pic_reg);

	      seq = get_insns ();
	      end_sequence ();

	      for (insn = seq; insn; insn = NEXT_INSN (insn))
		if (INSN_P (insn))
		  INSN_LOCATION (insn) = prologue_location;

	      /* We can be called during expansion of PHI nodes, where
	         we can't yet emit instructions directly in the final
		 insn stream.  Queue the insns on the entry edge, they will
		 be committed after everything else is expanded.  */
	      if (currently_expanding_to_rtl)
		insert_insn_on_edge (seq,
				     single_succ_edge
				     (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
	      else
		emit_insn (seq);
	    }
	}
    }
}

/* Generate insns to calculate the address of ORIG in pic mode.  */
static rtx_insn *
calculate_pic_address_constant (rtx reg, rtx pic_reg, rtx orig)
{
  rtx pat;
  rtx mem;

  pat = gen_calculate_pic_address (reg, pic_reg, orig);

  /* Make the MEM as close to a constant as possible.  */
  mem = SET_SRC (pat);
  gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
  MEM_READONLY_P (mem) = 1;
  MEM_NOTRAP_P (mem) = 1;

  return emit_insn (pat);
}

/* Legitimize PIC load to ORIG into REG.  If REG is NULL, a new pseudo is
   created to hold the result of the load.  If not NULL, PIC_REG indicates
   which register to use as PIC register, otherwise it is decided by register
   allocator.  COMPUTE_NOW forces the PIC register to be loaded at the current
   location in the instruction stream, irregardless of whether it was loaded
   previously.  Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
   true and null PIC_REG is only supported iff COMPUTE_NOW is false.

   Returns the register REG into which the PIC load is performed.  */

rtx
legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
			bool compute_now)
{
  gcc_assert (compute_now == (pic_reg != NULL_RTX));

  if (GET_CODE (orig) == SYMBOL_REF
      || GET_CODE (orig) == LABEL_REF)
    {
      if (reg == 0)
	{
	  gcc_assert (can_create_pseudo_p ());
	  reg = gen_reg_rtx (Pmode);
	}

      /* VxWorks does not impose a fixed gap between segments; the run-time
	 gap can be different from the object-file gap.  We therefore can't
	 use GOTOFF unless we are absolutely sure that the symbol is in the
	 same segment as the GOT.  Unfortunately, the flexibility of linker
	 scripts means that we can't be sure of that in general, so assume
	 that GOTOFF is never valid on VxWorks.  */
      /* References to weak symbols cannot be resolved locally: they
	 may be overridden by a non-weak definition at link time.  */
      rtx_insn *insn;
      if ((GET_CODE (orig) == LABEL_REF
	   || (GET_CODE (orig) == SYMBOL_REF
	       && SYMBOL_REF_LOCAL_P (orig)
	       && (SYMBOL_REF_DECL (orig)
		   ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)
	       && (!SYMBOL_REF_FUNCTION_P (orig)
		   || arm_fdpic_local_funcdesc_p (orig))))
	  && NEED_GOT_RELOC
	  && arm_pic_data_is_text_relative)
	insn = arm_pic_static_addr (orig, reg);
      else
	{
	  /* If this function doesn't have a pic register, create one now.  */
	  require_pic_register (pic_reg, compute_now);

	  if (pic_reg == NULL_RTX)
	    pic_reg = cfun->machine->pic_reg;

	  insn = calculate_pic_address_constant (reg, pic_reg, orig);
	}

      /* Put a REG_EQUAL note on this insn, so that it can be optimized
	 by loop.  */
      set_unique_reg_note (insn, REG_EQUAL, orig);

      return reg;
    }
  else if (GET_CODE (orig) == CONST)
    {
      rtx base, offset;

      if (GET_CODE (XEXP (orig, 0)) == PLUS
	  && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
	return orig;

      /* Handle the case where we have: const (UNSPEC_TLS).  */
      if (GET_CODE (XEXP (orig, 0)) == UNSPEC
	  && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
	return orig;

      /* Handle the case where we have:
         const (plus (UNSPEC_TLS) (ADDEND)).  The ADDEND must be a
         CONST_INT.  */
      if (GET_CODE (XEXP (orig, 0)) == PLUS
          && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
          && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
        {
	  gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
	  return orig;
	}

      if (reg == 0)
	{
	  gcc_assert (can_create_pseudo_p ());
	  reg = gen_reg_rtx (Pmode);
	}

      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);

      base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
				     pic_reg, compute_now);
      offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
				       base == reg ? 0 : reg, pic_reg,
				       compute_now);

      if (CONST_INT_P (offset))
	{
	  /* The base register doesn't really matter, we only want to
	     test the index for the appropriate mode.  */
	  if (!arm_legitimate_index_p (mode, offset, SET, 0))
	    {
	      gcc_assert (can_create_pseudo_p ());
	      offset = force_reg (Pmode, offset);
	    }

	  if (CONST_INT_P (offset))
	    return plus_constant (Pmode, base, INTVAL (offset));
	}

      if (GET_MODE_SIZE (mode) > 4
	  && (GET_MODE_CLASS (mode) == MODE_INT
	      || TARGET_SOFT_FLOAT))
	{
	  emit_insn (gen_addsi3 (reg, base, offset));
	  return reg;
	}

      return gen_rtx_PLUS (Pmode, base, offset);
    }

  return orig;
}


/* Whether a register is callee saved or not.  This is necessary because high
   registers are marked as caller saved when optimizing for size on Thumb-1
   targets despite being callee saved in order to avoid using them.  */
#define callee_saved_reg_p(reg) \
  (!call_used_or_fixed_reg_p (reg) \
   || (TARGET_THUMB1 && optimize_size \
       && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))

/* Return a mask for the call-clobbered low registers that are unused
   at the end of the prologue.  */
static unsigned long
thumb1_prologue_unused_call_clobbered_lo_regs (void)
{
  unsigned long mask = 0;
  bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));

  for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
    if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
      mask |= 1 << (reg - FIRST_LO_REGNUM);
  return mask;
}

/* Similarly for the start of the epilogue.  */
static unsigned long
thumb1_epilogue_unused_call_clobbered_lo_regs (void)
{
  unsigned long mask = 0;
  bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));

  for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
    if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
      mask |= 1 << (reg - FIRST_LO_REGNUM);
  return mask;
}

/* Find a spare register to use during the prolog of a function.  */

static int
thumb_find_work_register (unsigned long pushed_regs_mask)
{
  int reg;

  unsigned long unused_regs
    = thumb1_prologue_unused_call_clobbered_lo_regs ();

  /* Check the argument registers first as these are call-used.  The
     register allocation order means that sometimes r3 might be used
     but earlier argument registers might not, so check them all.  */
  for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
    if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
      return reg;

  /* Otherwise look for a call-saved register that is going to be pushed.  */
  for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
    if (pushed_regs_mask & (1 << reg))
      return reg;

  if (TARGET_THUMB2)
    {
      /* Thumb-2 can use high regs.  */
      for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
	if (pushed_regs_mask & (1 << reg))
	  return reg;
    }
  /* Something went wrong - thumb_compute_save_reg_mask()
     should have arranged for a suitable register to be pushed.  */
  gcc_unreachable ();
}

static GTY(()) int pic_labelno;

/* Generate code to load the PIC register.  In thumb mode SCRATCH is a
   low register.  */

void
arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
{
  rtx l1, labelno, pic_tmp, pic_rtx;

  if (crtl->uses_pic_offset_table == 0
      || TARGET_SINGLE_PIC_BASE
      || TARGET_FDPIC)
    return;

  gcc_assert (flag_pic);

  if (pic_reg == NULL_RTX)
    pic_reg = cfun->machine->pic_reg;
  if (TARGET_VXWORKS_RTP)
    {
      pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
      emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));

      emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));

      pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
      emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
    }
  else
    {
      /* We use an UNSPEC rather than a LABEL_REF because this label
	 never appears in the code stream.  */

      labelno = GEN_INT (pic_labelno++);
      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
      l1 = gen_rtx_CONST (VOIDmode, l1);

      /* On the ARM the PC register contains 'dot + 8' at the time of the
	 addition, on the Thumb it is 'dot + 4'.  */
      pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
      pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
				UNSPEC_GOTSYM_OFF);
      pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);

      if (TARGET_32BIT)
	{
	  emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
	}
      else /* TARGET_THUMB1 */
	{
	  if (arm_pic_register != INVALID_REGNUM
	      && REGNO (pic_reg) > LAST_LO_REGNUM)
	    {
	      /* We will have pushed the pic register, so we should always be
		 able to find a work register.  */
	      pic_tmp = gen_rtx_REG (SImode,
				     thumb_find_work_register (saved_regs));
	      emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
	      emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
	      emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
	    }
	  else if (arm_pic_register != INVALID_REGNUM
		   && arm_pic_register > LAST_LO_REGNUM
		   && REGNO (pic_reg) <= LAST_LO_REGNUM)
	    {
	      emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
	      emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
	      emit_use (gen_rtx_REG (Pmode, arm_pic_register));
	    }
	  else
	    emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
	}
    }

  /* Need to emit this whether or not we obey regdecls,
     since setjmp/longjmp can cause life info to screw up.  */
  emit_use (pic_reg);
}

/* Try to determine whether an object, referenced via ORIG, will be
   placed in the text or data segment.  This is used in FDPIC mode, to
   decide which relocations to use when accessing ORIG.  *IS_READONLY
   is set to true if ORIG is a read-only location, false otherwise.
   Return true if we could determine the location of ORIG, false
   otherwise.  *IS_READONLY is valid only when we return true.  */
static bool
arm_is_segment_info_known (rtx orig, bool *is_readonly)
{
  *is_readonly = false;

  if (GET_CODE (orig) == LABEL_REF)
    {
      *is_readonly = true;
      return true;
    }

  if (SYMBOL_REF_P (orig))
    {
      if (CONSTANT_POOL_ADDRESS_P (orig))
	{
	  *is_readonly = true;
	  return true;
	}
      if (SYMBOL_REF_LOCAL_P (orig)
	  && !SYMBOL_REF_EXTERNAL_P (orig)
	  && SYMBOL_REF_DECL (orig)
	  && (!DECL_P (SYMBOL_REF_DECL (orig))
	      || !DECL_COMMON (SYMBOL_REF_DECL (orig))))
	{
	  tree decl = SYMBOL_REF_DECL (orig);
	  tree init = (TREE_CODE (decl) == VAR_DECL)
	    ? DECL_INITIAL (decl) : (TREE_CODE (decl) == CONSTRUCTOR)
	    ? decl : 0;
	  int reloc = 0;
	  bool named_section, readonly;

	  if (init && init != error_mark_node)
	    reloc = compute_reloc_for_constant (init);

	  named_section = TREE_CODE (decl) == VAR_DECL
	    && lookup_attribute ("section", DECL_ATTRIBUTES (decl));
	  readonly = decl_readonly_section (decl, reloc);

	  /* We don't know where the link script will put a named
	     section, so return false in such a case.  */
	  if (named_section)
	    return false;

	  *is_readonly = readonly;
	  return true;
	}

      /* We don't know.  */
      return false;
    }

  gcc_unreachable ();
}

/* Generate code to load the address of a static var when flag_pic is set.  */
static rtx_insn *
arm_pic_static_addr (rtx orig, rtx reg)
{
  rtx l1, labelno, offset_rtx;
  rtx_insn *insn;

  gcc_assert (flag_pic);

  bool is_readonly = false;
  bool info_known = false;

  if (TARGET_FDPIC
      && SYMBOL_REF_P (orig)
      && !SYMBOL_REF_FUNCTION_P (orig))
    info_known = arm_is_segment_info_known (orig, &is_readonly);

  if (TARGET_FDPIC
      && SYMBOL_REF_P (orig)
      && !SYMBOL_REF_FUNCTION_P (orig)
      && !info_known)
    {
      /* We don't know where orig is stored, so we have be
	 pessimistic and use a GOT relocation.  */
      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

      insn = calculate_pic_address_constant (reg, pic_reg, orig);
    }
  else if (TARGET_FDPIC
	   && SYMBOL_REF_P (orig)
	   && (SYMBOL_REF_FUNCTION_P (orig)
	       || !is_readonly))
    {
      /* We use the GOTOFF relocation.  */
      rtx pic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);

      rtx l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig), UNSPEC_PIC_SYM);
      emit_insn (gen_movsi (reg, l1));
      insn = emit_insn (gen_addsi3 (reg, reg, pic_reg));
    }
  else
    {
      /* Not FDPIC, not SYMBOL_REF_P or readonly: we can use
	 PC-relative access.  */
      /* We use an UNSPEC rather than a LABEL_REF because this label
	 never appears in the code stream.  */
      labelno = GEN_INT (pic_labelno++);
      l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
      l1 = gen_rtx_CONST (VOIDmode, l1);

      /* On the ARM the PC register contains 'dot + 8' at the time of the
	 addition, on the Thumb it is 'dot + 4'.  */
      offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
      offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
				   UNSPEC_SYMBOL_OFFSET);
      offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);

      insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx,
						   labelno));
    }

  return insn;
}

/* Return nonzero if X is valid as an ARM state addressing register.  */
static int
arm_address_register_rtx_p (rtx x, int strict_p)
{
  int regno;

  if (!REG_P (x))
    return 0;

  regno = REGNO (x);

  if (strict_p)
    return ARM_REGNO_OK_FOR_BASE_P (regno);

  return (regno <= LAST_ARM_REGNUM
	  || regno >= FIRST_PSEUDO_REGISTER
	  || regno == FRAME_POINTER_REGNUM
	  || regno == ARG_POINTER_REGNUM);
}

/* Return TRUE if this rtx is the difference of a symbol and a label,
   and will reduce to a PC-relative relocation in the object file.
   Expressions like this can be left alone when generating PIC, rather
   than forced through the GOT.  */
static int
pcrel_constant_p (rtx x)
{
  if (GET_CODE (x) == MINUS)
    return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));

  return FALSE;
}

/* Return true if X will surely end up in an index register after next
   splitting pass.  */
static bool
will_be_in_index_register (const_rtx x)
{
  /* arm.md: calculate_pic_address will split this into a register.  */
  return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
}

/* Return nonzero if X is a valid ARM state address operand.  */
int
arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
			        int strict_p)
{
  bool use_ldrd;
  enum rtx_code code = GET_CODE (x);

  if (arm_address_register_rtx_p (x, strict_p))
    return 1;

  use_ldrd = (TARGET_LDRD
	      && (mode == DImode || mode == DFmode));

  if (code == POST_INC || code == PRE_DEC
      || ((code == PRE_INC || code == POST_DEC)
	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);

  else if ((code == POST_MODIFY || code == PRE_MODIFY)
	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
	   && GET_CODE (XEXP (x, 1)) == PLUS
	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
    {
      rtx addend = XEXP (XEXP (x, 1), 1);

      /* Don't allow ldrd post increment by register because it's hard
	 to fixup invalid register choices.  */
      if (use_ldrd
	  && GET_CODE (x) == POST_MODIFY
	  && REG_P (addend))
	return 0;

      return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
	      && arm_legitimate_index_p (mode, addend, outer, strict_p));
    }

  /* After reload constants split into minipools will have addresses
     from a LABEL_REF.  */
  else if (reload_completed
	   && (code == LABEL_REF
	       || (code == CONST
		   && GET_CODE (XEXP (x, 0)) == PLUS
		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
    return 1;

  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
    return 0;

  else if (code == PLUS)
    {
      rtx xop0 = XEXP (x, 0);
      rtx xop1 = XEXP (x, 1);

      return ((arm_address_register_rtx_p (xop0, strict_p)
	       && ((CONST_INT_P (xop1)
		    && arm_legitimate_index_p (mode, xop1, outer, strict_p))
		   || (!strict_p && will_be_in_index_register (xop1))))
	      || (arm_address_register_rtx_p (xop1, strict_p)
		  && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
    }

#if 0
  /* Reload currently can't handle MINUS, so disable this for now */
  else if (GET_CODE (x) == MINUS)
    {
      rtx xop0 = XEXP (x, 0);
      rtx xop1 = XEXP (x, 1);

      return (arm_address_register_rtx_p (xop0, strict_p)
	      && arm_legitimate_index_p (mode, xop1, outer, strict_p));
    }
#endif

  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
	   && code == SYMBOL_REF
	   && CONSTANT_POOL_ADDRESS_P (x)
	   && ! (flag_pic
		 && symbol_mentioned_p (get_pool_constant (x))
		 && ! pcrel_constant_p (get_pool_constant (x))))
    return 1;

  return 0;
}

/* Return true if we can avoid creating a constant pool entry for x.  */
static bool
can_avoid_literal_pool_for_label_p (rtx x)
{
  /* Normally we can assign constant values to target registers without
     the help of constant pool.  But there are cases we have to use constant
     pool like:
     1) assign a label to register.
     2) sign-extend a 8bit value to 32bit and then assign to register.

     Constant pool access in format:
     (set (reg r0) (mem (symbol_ref (".LC0"))))
     will cause the use of literal pool (later in function arm_reorg).
     So here we mark such format as an invalid format, then the compiler
     will adjust it into:
     (set (reg r0) (symbol_ref (".LC0")))
     (set (reg r0) (mem (reg r0))).
     No extra register is required, and (mem (reg r0)) won't cause the use
     of literal pools.  */
  if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
      && CONSTANT_POOL_ADDRESS_P (x))
    return 1;
  return 0;
}


/* Return nonzero if X is a valid Thumb-2 address operand.  */
static int
thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
{
  bool use_ldrd;
  enum rtx_code code = GET_CODE (x);

  if (arm_address_register_rtx_p (x, strict_p))
    return 1;

  use_ldrd = (TARGET_LDRD
	      && (mode == DImode || mode == DFmode));

  if (code == POST_INC || code == PRE_DEC
      || ((code == PRE_INC || code == POST_DEC)
	  && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
    return arm_address_register_rtx_p (XEXP (x, 0), strict_p);

  else if ((code == POST_MODIFY || code == PRE_MODIFY)
	   && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
	   && GET_CODE (XEXP (x, 1)) == PLUS
	   && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
    {
      /* Thumb-2 only has autoincrement by constant.  */
      rtx addend = XEXP (XEXP (x, 1), 1);
      HOST_WIDE_INT offset;

      if (!CONST_INT_P (addend))
	return 0;

      offset = INTVAL(addend);
      if (GET_MODE_SIZE (mode) <= 4)
	return (offset > -256 && offset < 256);

      return (use_ldrd && offset > -1024 && offset < 1024
	      && (offset & 3) == 0);
    }

  /* After reload constants split into minipools will have addresses
     from a LABEL_REF.  */
  else if (reload_completed
	   && (code == LABEL_REF
	       || (code == CONST
		   && GET_CODE (XEXP (x, 0)) == PLUS
		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
    return 1;

  else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
    return 0;

  else if (code == PLUS)
    {
      rtx xop0 = XEXP (x, 0);
      rtx xop1 = XEXP (x, 1);

      return ((arm_address_register_rtx_p (xop0, strict_p)
	       && (thumb2_legitimate_index_p (mode, xop1, strict_p)
		   || (!strict_p && will_be_in_index_register (xop1))))
	      || (arm_address_register_rtx_p (xop1, strict_p)
		  && thumb2_legitimate_index_p (mode, xop0, strict_p)));
    }

  else if (can_avoid_literal_pool_for_label_p (x))
    return 0;

  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
	   && code == SYMBOL_REF
	   && CONSTANT_POOL_ADDRESS_P (x)
	   && ! (flag_pic
		 && symbol_mentioned_p (get_pool_constant (x))
		 && ! pcrel_constant_p (get_pool_constant (x))))
    return 1;

  return 0;
}

/* Return nonzero if INDEX is valid for an address index operand in
   ARM state.  */
static int
arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
			int strict_p)
{
  HOST_WIDE_INT range;
  enum rtx_code code = GET_CODE (index);

  /* Standard coprocessor addressing modes.  */
  if (TARGET_HARD_FLOAT
      && (mode == SFmode || mode == DFmode))
    return (code == CONST_INT && INTVAL (index) < 1024
	    && INTVAL (index) > -1024
	    && (INTVAL (index) & 3) == 0);

  /* For quad modes, we restrict the constant offset to be slightly less
     than what the instruction format permits.  We do this because for
     quad mode moves, we will actually decompose them into two separate
     double-mode reads or writes.  INDEX must therefore be a valid
     (double-mode) offset and so should INDEX+8.  */
  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
    return (code == CONST_INT
	    && INTVAL (index) < 1016
	    && INTVAL (index) > -1024
	    && (INTVAL (index) & 3) == 0);

  /* We have no such constraint on double mode offsets, so we permit the
     full range of the instruction format.  */
  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
    return (code == CONST_INT
	    && INTVAL (index) < 1024
	    && INTVAL (index) > -1024
	    && (INTVAL (index) & 3) == 0);

  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
    return (code == CONST_INT
	    && INTVAL (index) < 1024
	    && INTVAL (index) > -1024
	    && (INTVAL (index) & 3) == 0);

  if (arm_address_register_rtx_p (index, strict_p)
      && (GET_MODE_SIZE (mode) <= 4))
    return 1;

  if (mode == DImode || mode == DFmode)
    {
      if (code == CONST_INT)
	{
	  HOST_WIDE_INT val = INTVAL (index);

	  /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
	     If vldr is selected it uses arm_coproc_mem_operand.  */
	  if (TARGET_LDRD)
	    return val > -256 && val < 256;
	  else
	    return val > -4096 && val < 4092;
	}

      return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
    }

  if (GET_MODE_SIZE (mode) <= 4
      && ! (arm_arch4
	    && (mode == HImode
		|| mode == HFmode
		|| (mode == QImode && outer == SIGN_EXTEND))))
    {
      if (code == MULT)
	{
	  rtx xiop0 = XEXP (index, 0);
	  rtx xiop1 = XEXP (index, 1);

	  return ((arm_address_register_rtx_p (xiop0, strict_p)
		   && power_of_two_operand (xiop1, SImode))
		  || (arm_address_register_rtx_p (xiop1, strict_p)
		      && power_of_two_operand (xiop0, SImode)));
	}
      else if (code == LSHIFTRT || code == ASHIFTRT
	       || code == ASHIFT || code == ROTATERT)
	{
	  rtx op = XEXP (index, 1);

	  return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
		  && CONST_INT_P (op)
		  && INTVAL (op) > 0
		  && INTVAL (op) <= 31);
	}
    }

  /* For ARM v4 we may be doing a sign-extend operation during the
     load.  */
  if (arm_arch4)
    {
      if (mode == HImode
	  || mode == HFmode
	  || (outer == SIGN_EXTEND && mode == QImode))
	range = 256;
      else
	range = 4096;
    }
  else
    range = (mode == HImode || mode == HFmode) ? 4095 : 4096;

  return (code == CONST_INT
	  && INTVAL (index) < range
	  && INTVAL (index) > -range);
}

/* Return true if OP is a valid index scaling factor for Thumb-2 address
   index operand.  i.e. 1, 2, 4 or 8.  */
static bool
thumb2_index_mul_operand (rtx op)
{
  HOST_WIDE_INT val;

  if (!CONST_INT_P (op))
    return false;

  val = INTVAL(op);
  return (val == 1 || val == 2 || val == 4 || val == 8);
}

/* Return nonzero if INDEX is a valid Thumb-2 address index operand.  */
static int
thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
{
  enum rtx_code code = GET_CODE (index);

  /* ??? Combine arm and thumb2 coprocessor addressing modes.  */
  /* Standard coprocessor addressing modes.  */
  if (TARGET_HARD_FLOAT
      && (mode == SFmode || mode == DFmode))
    return (code == CONST_INT && INTVAL (index) < 1024
	    /* Thumb-2 allows only > -256 index range for it's core register
	       load/stores. Since we allow SF/DF in core registers, we have
	       to use the intersection between -256~4096 (core) and -1024~1024
	       (coprocessor).  */
	    && INTVAL (index) > -256
	    && (INTVAL (index) & 3) == 0);

  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
    {
      /* For DImode assume values will usually live in core regs
	 and only allow LDRD addressing modes.  */
      if (!TARGET_LDRD || mode != DImode)
	return (code == CONST_INT
		&& INTVAL (index) < 1024
		&& INTVAL (index) > -1024
		&& (INTVAL (index) & 3) == 0);
    }

  /* For quad modes, we restrict the constant offset to be slightly less
     than what the instruction format permits.  We do this because for
     quad mode moves, we will actually decompose them into two separate
     double-mode reads or writes.  INDEX must therefore be a valid
     (double-mode) offset and so should INDEX+8.  */
  if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
    return (code == CONST_INT
	    && INTVAL (index) < 1016
	    && INTVAL (index) > -1024
	    && (INTVAL (index) & 3) == 0);

  /* We have no such constraint on double mode offsets, so we permit the
     full range of the instruction format.  */
  if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
    return (code == CONST_INT
	    && INTVAL (index) < 1024
	    && INTVAL (index) > -1024
	    && (INTVAL (index) & 3) == 0);

  if (arm_address_register_rtx_p (index, strict_p)
      && (GET_MODE_SIZE (mode) <= 4))
    return 1;

  if (mode == DImode || mode == DFmode)
    {
      if (code == CONST_INT)
	{
	  HOST_WIDE_INT val = INTVAL (index);
	  /* Thumb-2 ldrd only has reg+const addressing modes.
	     Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
	     If vldr is selected it uses arm_coproc_mem_operand.  */
	  if (TARGET_LDRD)
	    return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
	  else
	    return IN_RANGE (val, -255, 4095 - 4);
	}
      else
	return 0;
    }

  if (code == MULT)
    {
      rtx xiop0 = XEXP (index, 0);
      rtx xiop1 = XEXP (index, 1);

      return ((arm_address_register_rtx_p (xiop0, strict_p)
	       && thumb2_index_mul_operand (xiop1))
	      || (arm_address_register_rtx_p (xiop1, strict_p)
		  && thumb2_index_mul_operand (xiop0)));
    }
  else if (code == ASHIFT)
    {
      rtx op = XEXP (index, 1);

      return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
	      && CONST_INT_P (op)
	      && INTVAL (op) > 0
	      && INTVAL (op) <= 3);
    }

  return (code == CONST_INT
	  && INTVAL (index) < 4096
	  && INTVAL (index) > -256);
}

/* Return nonzero if X is valid as a 16-bit Thumb state base register.  */
static int
thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
{
  int regno;

  if (!REG_P (x))
    return 0;

  regno = REGNO (x);

  if (strict_p)
    return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);

  return (regno <= LAST_LO_REGNUM
	  || regno > LAST_VIRTUAL_REGISTER
	  || regno == FRAME_POINTER_REGNUM
	  || (GET_MODE_SIZE (mode) >= 4
	      && (regno == STACK_POINTER_REGNUM
		  || regno >= FIRST_PSEUDO_REGISTER
		  || x == hard_frame_pointer_rtx
		  || x == arg_pointer_rtx)));
}

/* Return nonzero if x is a legitimate index register.  This is the case
   for any base register that can access a QImode object.  */
inline static int
thumb1_index_register_rtx_p (rtx x, int strict_p)
{
  return thumb1_base_register_rtx_p (x, QImode, strict_p);
}

/* Return nonzero if x is a legitimate 16-bit Thumb-state address.

   The AP may be eliminated to either the SP or the FP, so we use the
   least common denominator, e.g. SImode, and offsets from 0 to 64.

   ??? Verify whether the above is the right approach.

   ??? Also, the FP may be eliminated to the SP, so perhaps that
   needs special handling also.

   ??? Look at how the mips16 port solves this problem.  It probably uses
   better ways to solve some of these problems.

   Although it is not incorrect, we don't accept QImode and HImode
   addresses based on the frame pointer or arg pointer until the
   reload pass starts.  This is so that eliminating such addresses
   into stack based ones won't produce impossible code.  */
int
thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
{
  if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
    return 0;

  /* ??? Not clear if this is right.  Experiment.  */
  if (GET_MODE_SIZE (mode) < 4
      && !(reload_in_progress || reload_completed)
      && (reg_mentioned_p (frame_pointer_rtx, x)
	  || reg_mentioned_p (arg_pointer_rtx, x)
	  || reg_mentioned_p (virtual_incoming_args_rtx, x)
	  || reg_mentioned_p (virtual_outgoing_args_rtx, x)
	  || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
	  || reg_mentioned_p (virtual_stack_vars_rtx, x)))
    return 0;

  /* Accept any base register.  SP only in SImode or larger.  */
  else if (thumb1_base_register_rtx_p (x, mode, strict_p))
    return 1;

  /* This is PC relative data before arm_reorg runs.  */
  else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
	   && GET_CODE (x) == SYMBOL_REF
	   && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic
	   && !arm_disable_literal_pool)
    return 1;

  /* This is PC relative data after arm_reorg runs.  */
  else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
	   && reload_completed
	   && (GET_CODE (x) == LABEL_REF
	       || (GET_CODE (x) == CONST
		   && GET_CODE (XEXP (x, 0)) == PLUS
		   && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
		   && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
    return 1;

  /* Post-inc indexing only supported for SImode and larger.  */
  else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
	   && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
    return 1;

  else if (GET_CODE (x) == PLUS)
    {
      /* REG+REG address can be any two index registers.  */
      /* We disallow FRAME+REG addressing since we know that FRAME
	 will be replaced with STACK, and SP relative addressing only
	 permits SP+OFFSET.  */
      if (GET_MODE_SIZE (mode) <= 4
	  && XEXP (x, 0) != frame_pointer_rtx
	  && XEXP (x, 1) != frame_pointer_rtx
	  && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
	  && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
	      || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
	return 1;

      /* REG+const has 5-7 bit offset for non-SP registers.  */
      else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
		|| XEXP (x, 0) == arg_pointer_rtx)
	       && CONST_INT_P (XEXP (x, 1))
	       && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
	return 1;

      /* REG+const has 10-bit offset for SP, but only SImode and
	 larger is supported.  */
      /* ??? Should probably check for DI/DFmode overflow here
	 just like GO_IF_LEGITIMATE_OFFSET does.  */
      else if (REG_P (XEXP (x, 0))
	       && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
	       && GET_MODE_SIZE (mode) >= 4
	       && CONST_INT_P (XEXP (x, 1))
	       && INTVAL (XEXP (x, 1)) >= 0
	       && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
	return 1;

      else if (REG_P (XEXP (x, 0))
	       && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
		   || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
		   || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
		       && REGNO (XEXP (x, 0))
			  <= LAST_VIRTUAL_POINTER_REGISTER))
	       && GET_MODE_SIZE (mode) >= 4
	       && CONST_INT_P (XEXP (x, 1))
	       && (INTVAL (XEXP (x, 1)) & 3) == 0)
	return 1;
    }

  else if (GET_MODE_CLASS (mode) != MODE_FLOAT
	   && GET_MODE_SIZE (mode) == 4
	   && GET_CODE (x) == SYMBOL_REF
	   && CONSTANT_POOL_ADDRESS_P (x)
	   && !arm_disable_literal_pool
	   && ! (flag_pic
		 && symbol_mentioned_p (get_pool_constant (x))
		 && ! pcrel_constant_p (get_pool_constant (x))))
    return 1;

  return 0;
}

/* Return nonzero if VAL can be used as an offset in a Thumb-state address
   instruction of mode MODE.  */
int
thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
{
  switch (GET_MODE_SIZE (mode))
    {
    case 1:
      return val >= 0 && val < 32;

    case 2:
      return val >= 0 && val < 64 && (val & 1) == 0;

    default:
      return (val >= 0
	      && (val + GET_MODE_SIZE (mode)) <= 128
	      && (val & 3) == 0);
    }
}

bool
arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
{
  if (TARGET_ARM)
    return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
  else if (TARGET_THUMB2)
    return thumb2_legitimate_address_p (mode, x, strict_p);
  else /* if (TARGET_THUMB1) */
    return thumb1_legitimate_address_p (mode, x, strict_p);
}

/* Worker function for TARGET_PREFERRED_RELOAD_CLASS.

   Given an rtx X being reloaded into a reg required to be
   in class CLASS, return the class of reg to actually use.
   In general this is just CLASS, but for the Thumb core registers and
   immediate constants we prefer a LO_REGS class or a subset.  */

static reg_class_t
arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
{
  if (TARGET_32BIT)
    return rclass;
  else
    {
      if (rclass == GENERAL_REGS)
	return LO_REGS;
      else
	return rclass;
    }
}

/* Build the SYMBOL_REF for __tls_get_addr.  */

static GTY(()) rtx tls_get_addr_libfunc;

static rtx
get_tls_get_addr (void)
{
  if (!tls_get_addr_libfunc)
    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
  return tls_get_addr_libfunc;
}

rtx
arm_load_tp (rtx target)
{
  if (!target)
    target = gen_reg_rtx (SImode);

  if (TARGET_HARD_TP)
    {
      /* Can return in any reg.  */
      emit_insn (gen_load_tp_hard (target));
    }
  else
    {
      /* Always returned in r0.  Immediately copy the result into a pseudo,
	 otherwise other uses of r0 (e.g. setting up function arguments) may
	 clobber the value.  */

      rtx tmp;

      if (TARGET_FDPIC)
	{
	  rtx fdpic_reg = gen_rtx_REG (Pmode, FDPIC_REGNUM);
	  rtx initial_fdpic_reg = get_hard_reg_initial_val (Pmode, FDPIC_REGNUM);

	  emit_insn (gen_load_tp_soft_fdpic ());

	  /* Restore r9.  */
	  emit_insn (gen_restore_pic_register_after_call(fdpic_reg, initial_fdpic_reg));
	}
      else
	emit_insn (gen_load_tp_soft ());

      tmp = gen_rtx_REG (SImode, R0_REGNUM);
      emit_move_insn (target, tmp);
    }
  return target;
}

static rtx
load_tls_operand (rtx x, rtx reg)
{
  rtx tmp;

  if (reg == NULL_RTX)
    reg = gen_reg_rtx (SImode);

  tmp = gen_rtx_CONST (SImode, x);

  emit_move_insn (reg, tmp);

  return reg;
}

static rtx_insn *
arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
{
  rtx label, labelno = NULL_RTX, sum;

  gcc_assert (reloc != TLS_DESCSEQ);
  start_sequence ();

  if (TARGET_FDPIC)
    {
      sum = gen_rtx_UNSPEC (Pmode,
			    gen_rtvec (2, x, GEN_INT (reloc)),
			    UNSPEC_TLS);
    }
  else
    {
      labelno = GEN_INT (pic_labelno++);
      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
      label = gen_rtx_CONST (VOIDmode, label);

      sum = gen_rtx_UNSPEC (Pmode,
			    gen_rtvec (4, x, GEN_INT (reloc), label,
				       GEN_INT (TARGET_ARM ? 8 : 4)),
			    UNSPEC_TLS);
    }
  reg = load_tls_operand (sum, reg);

  if (TARGET_FDPIC)
      emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
  else if (TARGET_ARM)
    emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
  else
    emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));

  *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
				     LCT_PURE, /* LCT_CONST?  */
				     Pmode, reg, Pmode);

  rtx_insn *insns = get_insns ();
  end_sequence ();

  return insns;
}

static rtx
arm_tls_descseq_addr (rtx x, rtx reg)
{
  rtx labelno = GEN_INT (pic_labelno++);
  rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
  rtx sum = gen_rtx_UNSPEC (Pmode,
			    gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
				       gen_rtx_CONST (VOIDmode, label),
				       GEN_INT (!TARGET_ARM)),
			    UNSPEC_TLS);
  rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));

  emit_insn (gen_tlscall (x, labelno));
  if (!reg)
    reg = gen_reg_rtx (SImode);
  else
    gcc_assert (REGNO (reg) != R0_REGNUM);

  emit_move_insn (reg, reg0);

  return reg;
}


rtx
legitimize_tls_address (rtx x, rtx reg)
{
  rtx dest, tp, label, labelno, sum, ret, eqv, addend;
  rtx_insn *insns;
  unsigned int model = SYMBOL_REF_TLS_MODEL (x);

  switch (model)
    {
    case TLS_MODEL_GLOBAL_DYNAMIC:
      if (TARGET_GNU2_TLS)
	{
	  gcc_assert (!TARGET_FDPIC);

	  reg = arm_tls_descseq_addr (x, reg);

	  tp = arm_load_tp (NULL_RTX);

	  dest = gen_rtx_PLUS (Pmode, tp, reg);
	}
      else
	{
	  /* Original scheme */
	  if (TARGET_FDPIC)
	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32_FDPIC);
	  else
	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
	  dest = gen_reg_rtx (Pmode);
	  emit_libcall_block (insns, dest, ret, x);
	}
      return dest;

    case TLS_MODEL_LOCAL_DYNAMIC:
      if (TARGET_GNU2_TLS)
	{
	  gcc_assert (!TARGET_FDPIC);

	  reg = arm_tls_descseq_addr (x, reg);

	  tp = arm_load_tp (NULL_RTX);

	  dest = gen_rtx_PLUS (Pmode, tp, reg);
	}
      else
	{
	  if (TARGET_FDPIC)
	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32_FDPIC);
	  else
	    insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);

	  /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
	     share the LDM result with other LD model accesses.  */
	  eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
				UNSPEC_TLS);
	  dest = gen_reg_rtx (Pmode);
	  emit_libcall_block (insns, dest, ret, eqv);

	  /* Load the addend.  */
	  addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
						     GEN_INT (TLS_LDO32)),
				   UNSPEC_TLS);
	  addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
	  dest = gen_rtx_PLUS (Pmode, dest, addend);
	}
      return dest;

    case TLS_MODEL_INITIAL_EXEC:
      if (TARGET_FDPIC)
	{
	  sum = gen_rtx_UNSPEC (Pmode,
				gen_rtvec (2, x, GEN_INT (TLS_IE32_FDPIC)),
				UNSPEC_TLS);
	  reg = load_tls_operand (sum, reg);
	  emit_insn (gen_addsi3 (reg, reg, gen_rtx_REG (Pmode, FDPIC_REGNUM)));
	  emit_move_insn (reg, gen_rtx_MEM (Pmode, reg));
	}
      else
	{
	  labelno = GEN_INT (pic_labelno++);
	  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
	  label = gen_rtx_CONST (VOIDmode, label);
	  sum = gen_rtx_UNSPEC (Pmode,
				gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
					   GEN_INT (TARGET_ARM ? 8 : 4)),
				UNSPEC_TLS);
	  reg = load_tls_operand (sum, reg);

	  if (TARGET_ARM)
	    emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
	  else if (TARGET_THUMB2)
	    emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
	  else
	    {
	      emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
	      emit_move_insn (reg, gen_const_mem (SImode, reg));
	    }
	}

      tp = arm_load_tp (NULL_RTX);

      return gen_rtx_PLUS (Pmode, tp, reg);

    case TLS_MODEL_LOCAL_EXEC:
      tp = arm_load_tp (NULL_RTX);

      reg = gen_rtx_UNSPEC (Pmode,
			    gen_rtvec (2, x, GEN_INT (TLS_LE32)),
			    UNSPEC_TLS);
      reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));

      return gen_rtx_PLUS (Pmode, tp, reg);

    default:
      abort ();
    }
}

/* Try machine-dependent ways of modifying an illegitimate address
   to be legitimate.  If we find one, return the new, valid address.  */
rtx
arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
{
  if (arm_tls_referenced_p (x))
    {
      rtx addend = NULL;

      if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
	{
	  addend = XEXP (XEXP (x, 0), 1);
	  x = XEXP (XEXP (x, 0), 0);
	}

      if (GET_CODE (x) != SYMBOL_REF)
	return x;

      gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);

      x = legitimize_tls_address (x, NULL_RTX);

      if (addend)
	{
	  x = gen_rtx_PLUS (SImode, x, addend);
	  orig_x = x;
	}
      else
	return x;
    }

  if (TARGET_THUMB1)
    return thumb_legitimize_address (x, orig_x, mode);

  if (GET_CODE (x) == PLUS)
    {
      rtx xop0 = XEXP (x, 0);
      rtx xop1 = XEXP (x, 1);

      if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
	xop0 = force_reg (SImode, xop0);

      if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
	  && !symbol_mentioned_p (xop1))
	xop1 = force_reg (SImode, xop1);

      if (ARM_BASE_REGISTER_RTX_P (xop0)
	  && CONST_INT_P (xop1))
	{
	  HOST_WIDE_INT n, low_n;
	  rtx base_reg, val;
	  n = INTVAL (xop1);

	  /* VFP addressing modes actually allow greater offsets, but for
	     now we just stick with the lowest common denominator.  */
	  if (mode == DImode || mode == DFmode)
	    {
	      low_n = n & 0x0f;
	      n &= ~0x0f;
	      if (low_n > 4)
		{
		  n += 16;
		  low_n -= 16;
		}
	    }
	  else
	    {
	      low_n = ((mode) == TImode ? 0
		       : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
	      n -= low_n;
	    }

	  base_reg = gen_reg_rtx (SImode);
	  val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
	  emit_move_insn (base_reg, val);
	  x = plus_constant (Pmode, base_reg, low_n);
	}
      else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
	x = gen_rtx_PLUS (SImode, xop0, xop1);
    }

  /* XXX We don't allow MINUS any more -- see comment in
     arm_legitimate_address_outer_p ().  */
  else if (GET_CODE (x) == MINUS)
    {
      rtx xop0 = XEXP (x, 0);
      rtx xop1 = XEXP (x, 1);

      if (CONSTANT_P (xop0))
	xop0 = force_reg (SImode, xop0);

      if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
	xop1 = force_reg (SImode, xop1);

      if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
	x = gen_rtx_MINUS (SImode, xop0, xop1);
    }

  /* Make sure to take full advantage of the pre-indexed addressing mode
     with absolute addresses which often allows for the base register to
     be factorized for multiple adjacent memory references, and it might
     even allows for the mini pool to be avoided entirely. */
  else if (CONST_INT_P (x) && optimize > 0)
    {
      unsigned int bits;
      HOST_WIDE_INT mask, base, index;
      rtx base_reg;

      /* LDR and LDRB can use a 12-bit index, ldrsb and the rest can
	 only use a 8-bit index. So let's use a 12-bit index for
	 SImode only and hope that arm_gen_constant will enable LDRB
	 to use more bits. */
      bits = (mode == SImode) ? 12 : 8;
      mask = (1 << bits) - 1;
      base = INTVAL (x) & ~mask;
      index = INTVAL (x) & mask;
      if (TARGET_ARM && bit_count (base & 0xffffffff) > (32 - bits)/2)
	{
	  /* It'll most probably be more efficient to generate the
	     base with more bits set and use a negative index instead.
	     Don't do this for Thumb as negative offsets are much more
	     limited.  */
	  base |= mask;
	  index -= mask;
	}
      base_reg = force_reg (SImode, GEN_INT (base));
      x = plus_constant (Pmode, base_reg, index);
    }

  if (flag_pic)
    {
      /* We need to find and carefully transform any SYMBOL and LABEL
	 references; so go back to the original address expression.  */
      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
					  false /*compute_now*/);

      if (new_x != orig_x)
	x = new_x;
    }

  return x;
}


/* Try machine-dependent ways of modifying an illegitimate Thumb address
   to be legitimate.  If we find one, return the new, valid address.  */
rtx
thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
{
  if (GET_CODE (x) == PLUS
      && CONST_INT_P (XEXP (x, 1))
      && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
	  || INTVAL (XEXP (x, 1)) < 0))
    {
      rtx xop0 = XEXP (x, 0);
      rtx xop1 = XEXP (x, 1);
      HOST_WIDE_INT offset = INTVAL (xop1);

      /* Try and fold the offset into a biasing of the base register and
	 then offsetting that.  Don't do this when optimizing for space
	 since it can cause too many CSEs.  */
      if (optimize_size && offset >= 0
	  && offset < 256 + 31 * GET_MODE_SIZE (mode))
	{
	  HOST_WIDE_INT delta;

	  if (offset >= 256)
	    delta = offset - (256 - GET_MODE_SIZE (mode));
	  else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
	    delta = 31 * GET_MODE_SIZE (mode);
	  else
	    delta = offset & (~31 * GET_MODE_SIZE (mode));

	  xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
				NULL_RTX);
	  x = plus_constant (Pmode, xop0, delta);
	}
      else if (offset < 0 && offset > -256)
	/* Small negative offsets are best done with a subtract before the
	   dereference, forcing these into a register normally takes two
	   instructions.  */
	x = force_operand (x, NULL_RTX);
      else
	{
	  /* For the remaining cases, force the constant into a register.  */
	  xop1 = force_reg (SImode, xop1);
	  x = gen_rtx_PLUS (SImode, xop0, xop1);
	}
    }
  else if (GET_CODE (x) == PLUS
	   && s_register_operand (XEXP (x, 1), SImode)
	   && !s_register_operand (XEXP (x, 0), SImode))
    {
      rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);

      x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
    }

  if (flag_pic)
    {
      /* We need to find and carefully transform any SYMBOL and LABEL
	 references; so go back to the original address expression.  */
      rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
					  false /*compute_now*/);

      if (new_x != orig_x)
	x = new_x;
    }

  return x;
}

/* Return TRUE if X contains any TLS symbol references.  */

bool
arm_tls_referenced_p (rtx x)
{
  if (! TARGET_HAVE_TLS)
    return false;

  subrtx_iterator::array_type array;
  FOR_EACH_SUBRTX (iter, array, x, ALL)
    {
      const_rtx x = *iter;
      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
	{
	  /* ARM currently does not provide relocations to encode TLS variables
	     into AArch32 instructions, only data, so there is no way to
	     currently implement these if a literal pool is disabled.  */
	  if (arm_disable_literal_pool)
	    sorry ("accessing thread-local storage is not currently supported "
		   "with %<-mpure-code%> or %<-mslow-flash-data%>");

	  return true;
	}

      /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
	 TLS offsets, not real symbol references.  */
      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
	iter.skip_subrtxes ();
    }
  return false;
}

/* Implement TARGET_LEGITIMATE_CONSTANT_P.

   On the ARM, allow any integer (invalid ones are removed later by insn
   patterns), nice doubles and symbol_refs which refer to the function's
   constant pool XXX.

   When generating pic allow anything.  */

static bool
arm_legitimate_constant_p_1 (machine_mode, rtx x)
{
  return flag_pic || !label_mentioned_p (x);
}

static bool
thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
  /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
     RTX.  These RTX must therefore be allowed for Thumb-1 so that when run
     for ARMv8-M Baseline or later the result is valid.  */
  if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
    x = XEXP (x, 0);

  return (CONST_INT_P (x)
	  || CONST_DOUBLE_P (x)
	  || CONSTANT_ADDRESS_P (x)
	  || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
	  || flag_pic);
}

static bool
arm_legitimate_constant_p (machine_mode mode, rtx x)
{
  return (!arm_cannot_force_const_mem (mode, x)
	  && (TARGET_32BIT
	      ? arm_legitimate_constant_p_1 (mode, x)
	      : thumb_legitimate_constant_p (mode, x)));
}

/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */

static bool
arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
  rtx base, offset;
  split_const (x, &base, &offset);

  if (SYMBOL_REF_P (base))
    {
      /* Function symbols cannot have an offset due to the Thumb bit.  */
      if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
	  && INTVAL (offset) != 0)
	return true;

      if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
	  && !offset_within_block_p (base, INTVAL (offset)))
	return true;
    }
  return arm_tls_referenced_p (x);
}

#define REG_OR_SUBREG_REG(X)						\
  (REG_P (X)							\
   || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))

#define REG_OR_SUBREG_RTX(X)			\
   (REG_P (X) ? (X) : SUBREG_REG (X))

static inline int
thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
{
  machine_mode mode = GET_MODE (x);
  int total, words;

  switch (code)
    {
    case ASHIFT:
    case ASHIFTRT:
    case LSHIFTRT:
    case ROTATERT:
      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);

    case PLUS:
    case MINUS:
    case COMPARE:
    case NEG:
    case NOT:
      return COSTS_N_INSNS (1);

    case MULT:
      if (arm_arch6m && arm_m_profile_small_mul)
	return COSTS_N_INSNS (32);

      if (CONST_INT_P (XEXP (x, 1)))
	{
	  int cycles = 0;
	  unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));

	  while (i)
	    {
	      i >>= 2;
	      cycles++;
	    }
	  return COSTS_N_INSNS (2) + cycles;
	}
      return COSTS_N_INSNS (1) + 16;

    case SET:
      /* A SET doesn't have a mode, so let's look at the SET_DEST to get
	 the mode.  */
      words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
      return (COSTS_N_INSNS (words)
	      + 4 * ((MEM_P (SET_SRC (x)))
		     + MEM_P (SET_DEST (x))));

    case CONST_INT:
      if (outer == SET)
	{
	  if (UINTVAL (x) < 256
	      /* 16-bit constant.  */
	      || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
	    return 0;
	  if (thumb_shiftable_const (INTVAL (x)))
	    return COSTS_N_INSNS (2);
	  return arm_disable_literal_pool
	    ? COSTS_N_INSNS (8)
	    : COSTS_N_INSNS (3);
	}
      else if ((outer == PLUS || outer == COMPARE)
	       && INTVAL (x) < 256 && INTVAL (x) > -256)
	return 0;
      else if ((outer == IOR || outer == XOR || outer == AND)
	       && INTVAL (x) < 256 && INTVAL (x) >= -256)
	return COSTS_N_INSNS (1);
      else if (outer == AND)
	{
	  int i;
	  /* This duplicates the tests in the andsi3 expander.  */
	  for (i = 9; i <= 31; i++)
	    if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
		|| (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
	      return COSTS_N_INSNS (2);
	}
      else if (outer == ASHIFT || outer == ASHIFTRT
	       || outer == LSHIFTRT)
	return 0;
      return COSTS_N_INSNS (2);

    case CONST:
    case CONST_DOUBLE:
    case LABEL_REF:
    case SYMBOL_REF:
      return COSTS_N_INSNS (3);

    case UDIV:
    case UMOD:
    case DIV:
    case MOD:
      return 100;

    case TRUNCATE:
      return 99;

    case AND:
    case XOR:
    case IOR:
      /* XXX guess.  */
      return 8;

    case MEM:
      /* XXX another guess.  */
      /* Memory costs quite a lot for the first word, but subsequent words
	 load at the equivalent of a single insn each.  */
      return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
	      + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
		 ? 4 : 0));

    case IF_THEN_ELSE:
      /* XXX a guess.  */
      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
	return 14;
      return 2;

    case SIGN_EXTEND:
    case ZERO_EXTEND:
      total = mode == DImode ? COSTS_N_INSNS (1) : 0;
      total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);

      if (mode == SImode)
	return total;

      if (arm_arch6)
	return total + COSTS_N_INSNS (1);

      /* Assume a two-shift sequence.  Increase the cost slightly so
	 we prefer actual shifts over an extend operation.  */
      return total + 1 + COSTS_N_INSNS (2);

    default:
      return 99;
    }
}

/* Estimates the size cost of thumb1 instructions.
   For now most of the code is copied from thumb1_rtx_costs. We need more
   fine grain tuning when we have more related test cases.  */
static inline int
thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
{
  machine_mode mode = GET_MODE (x);
  int words, cost;

  switch (code)
    {
    case ASHIFT:
    case ASHIFTRT:
    case LSHIFTRT:
    case ROTATERT:
      return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);

    case PLUS:
    case MINUS:
      /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
	 defined by RTL expansion, especially for the expansion of
	 multiplication.  */
      if ((GET_CODE (XEXP (x, 0)) == MULT
	   && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
	  || (GET_CODE (XEXP (x, 1)) == MULT
	      && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
	return COSTS_N_INSNS (2);
      /* Fall through.  */
    case COMPARE:
    case NEG:
    case NOT:
      return COSTS_N_INSNS (1);

    case MULT:
      if (CONST_INT_P (XEXP (x, 1)))
        {
          /* Thumb1 mul instruction can't operate on const. We must Load it
             into a register first.  */
          int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
	  /* For the targets which have a very small and high-latency multiply
	     unit, we prefer to synthesize the mult with up to 5 instructions,
	     giving a good balance between size and performance.  */
	  if (arm_arch6m && arm_m_profile_small_mul)
	    return COSTS_N_INSNS (5);
	  else
	    return COSTS_N_INSNS (1) + const_size;
        }
      return COSTS_N_INSNS (1);

    case SET:
      /* A SET doesn't have a mode, so let's look at the SET_DEST to get
	 the mode.  */
      words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
      cost = COSTS_N_INSNS (words);
      if (satisfies_constraint_J (SET_SRC (x))
	  || satisfies_constraint_K (SET_SRC (x))
	     /* Too big an immediate for a 2-byte mov, using MOVT.  */
	  || (CONST_INT_P (SET_SRC (x))
	      && UINTVAL (SET_SRC (x)) >= 256
	      && TARGET_HAVE_MOVT
	      && satisfies_constraint_j (SET_SRC (x)))
	     /* thumb1_movdi_insn.  */
	  || ((words > 1) && MEM_P (SET_SRC (x))))
	cost += COSTS_N_INSNS (1);
      return cost;

    case CONST_INT:
      if (outer == SET)
        {
          if (UINTVAL (x) < 256)
            return COSTS_N_INSNS (1);
	  /* movw is 4byte long.  */
	  if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
	    return COSTS_N_INSNS (2);
	  /* See split "TARGET_THUMB1 && satisfies_constraint_J".  */
	  if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
            return COSTS_N_INSNS (2);
	  /* See split "TARGET_THUMB1 && satisfies_constraint_K".  */
          if (thumb_shiftable_const (INTVAL (x)))
            return COSTS_N_INSNS (2);
	  return arm_disable_literal_pool
	    ? COSTS_N_INSNS (8)
	    : COSTS_N_INSNS (3);
        }
      else if ((outer == PLUS || outer == COMPARE)
               && INTVAL (x) < 256 && INTVAL (x) > -256)
        return 0;
      else if ((outer == IOR || outer == XOR || outer == AND)
               && INTVAL (x) < 256 && INTVAL (x) >= -256)
        return COSTS_N_INSNS (1);
      else if (outer == AND)
        {
          int i;
          /* This duplicates the tests in the andsi3 expander.  */
          for (i = 9; i <= 31; i++)
            if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
                || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
              return COSTS_N_INSNS (2);
        }
      else if (outer == ASHIFT || outer == ASHIFTRT
               || outer == LSHIFTRT)
        return 0;
      return COSTS_N_INSNS (2);

    case CONST:
    case CONST_DOUBLE:
    case LABEL_REF:
    case SYMBOL_REF:
      return COSTS_N_INSNS (3);

    case UDIV:
    case UMOD:
    case DIV:
    case MOD:
      return 100;

    case TRUNCATE:
      return 99;

    case AND:
    case XOR:
    case IOR:
      return COSTS_N_INSNS (1);

    case MEM:
      return (COSTS_N_INSNS (1)
	      + COSTS_N_INSNS (1)
		* ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
              + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
                 ? COSTS_N_INSNS (1) : 0));

    case IF_THEN_ELSE:
      /* XXX a guess.  */
      if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
        return 14;
      return 2;

    case ZERO_EXTEND:
      /* XXX still guessing.  */
      switch (GET_MODE (XEXP (x, 0)))
        {
          case E_QImode:
            return (1 + (mode == DImode ? 4 : 0)
                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));

          case E_HImode:
            return (4 + (mode == DImode ? 4 : 0)
                    + (MEM_P (XEXP (x, 0)) ? 10 : 0));

          case E_SImode:
            return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));

          default:
            return 99;
        }

    default:
      return 99;
    }
}

/* Helper function for arm_rtx_costs.  If one operand of the OP, a
   PLUS, adds the carry flag, then return the other operand.  If
   neither is a carry, return OP unchanged.  */
static rtx
strip_carry_operation (rtx op)
{
  gcc_assert (GET_CODE (op) == PLUS);
  if (arm_carry_operation (XEXP (op, 0), GET_MODE (op)))
    return XEXP (op, 1);
  else if (arm_carry_operation (XEXP (op, 1), GET_MODE (op)))
    return XEXP (op, 0);
  return op;
}

/* Helper function for arm_rtx_costs.  If the operand is a valid shift
   operand, then return the operand that is being shifted.  If the shift
   is not by a constant, then set SHIFT_REG to point to the operand.
   Return NULL if OP is not a shifter operand.  */
static rtx
shifter_op_p (rtx op, rtx *shift_reg)
{
  enum rtx_code code = GET_CODE (op);

  if (code == MULT && CONST_INT_P (XEXP (op, 1))
      && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
    return XEXP (op, 0);
  else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
    return XEXP (op, 0);
  else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
	   || code == ASHIFTRT)
    {
      if (!CONST_INT_P (XEXP (op, 1)))
	*shift_reg = XEXP (op, 1);
      return XEXP (op, 0);
    }

  return NULL;
}

static bool
arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
{
  const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
  rtx_code code = GET_CODE (x);
  gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);

  switch (XINT (x, 1))
    {
    case UNSPEC_UNALIGNED_LOAD:
      /* We can only do unaligned loads into the integer unit, and we can't
	 use LDM or LDRD.  */
      *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
      if (speed_p)
	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
		  + extra_cost->ldst.load_unaligned);

#ifdef NOT_YET
      *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
				 ADDR_SPACE_GENERIC, speed_p);
#endif
      return true;

    case UNSPEC_UNALIGNED_STORE:
      *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
      if (speed_p)
	*cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
		  + extra_cost->ldst.store_unaligned);

      *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
#ifdef NOT_YET
      *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
				 ADDR_SPACE_GENERIC, speed_p);
#endif
      return true;

    case UNSPEC_VRINTZ:
    case UNSPEC_VRINTP:
    case UNSPEC_VRINTM:
    case UNSPEC_VRINTR:
    case UNSPEC_VRINTX:
    case UNSPEC_VRINTA:
      if (speed_p)
        *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;

      return true;
    default:
      *cost = COSTS_N_INSNS (2);
      break;
    }
  return true;
}

/* Cost of a libcall.  We assume one insn per argument, an amount for the
   call (one insn for -Os) and then one for processing the result.  */
#define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))

#define HANDLE_NARROW_SHIFT_ARITH(OP, IDX)				\
	do								\
	  {								\
	    shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg);	\
	    if (shift_op != NULL					\
	        && arm_rtx_shift_left_p (XEXP (x, IDX)))		\
	      {								\
	        if (shift_reg)						\
		  {							\
		    if (speed_p)					\
		      *cost += extra_cost->alu.arith_shift_reg;		\
		    *cost += rtx_cost (shift_reg, GET_MODE (shift_reg),	\
				       ASHIFT, 1, speed_p);		\
		  }							\
	        else if (speed_p)					\
		  *cost += extra_cost->alu.arith_shift;			\
									\
		*cost += (rtx_cost (shift_op, GET_MODE (shift_op),	\
				    ASHIFT, 0, speed_p)			\
			  + rtx_cost (XEXP (x, 1 - IDX),		\
				      GET_MODE (shift_op),		\
			              OP, 1, speed_p));			\
	        return true;						\
	      }								\
	  }								\
	while (0)

/* Helper function for arm_rtx_costs_internal.  Calculates the cost of a MEM,
   considering the costs of the addressing mode and memory access
   separately.  */
static bool
arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
	       int *cost, bool speed_p)
{
  machine_mode mode = GET_MODE (x);

  *cost = COSTS_N_INSNS (1);

  if (flag_pic
      && GET_CODE (XEXP (x, 0)) == PLUS
      && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
    /* This will be split into two instructions.  Add the cost of the
       additional instruction here.  The cost of the memory access is computed
       below.  See arm.md:calculate_pic_address.  */
    *cost += COSTS_N_INSNS (1);

  /* Calculate cost of the addressing mode.  */
  if (speed_p)
    {
      arm_addr_mode_op op_type;
      switch (GET_CODE (XEXP (x, 0)))
	{
	default:
	case REG:
	  op_type = AMO_DEFAULT;
	  break;
	case MINUS:
	  /* MINUS does not appear in RTL, but the architecture supports it,
	     so handle this case defensively.  */
	  /* fall through */
	case PLUS:
	  op_type = AMO_NO_WB;
	  break;
	case PRE_INC:
	case PRE_DEC:
	case POST_INC:
	case POST_DEC:
	case PRE_MODIFY:
	case POST_MODIFY:
	  op_type = AMO_WB;
	  break;
	}

      if (VECTOR_MODE_P (mode))
	  *cost += current_tune->addr_mode_costs->vector[op_type];
      else if (FLOAT_MODE_P (mode))
	  *cost += current_tune->addr_mode_costs->fp[op_type];
      else
	  *cost += current_tune->addr_mode_costs->integer[op_type];
    }

  /* Calculate cost of memory access.  */
  if (speed_p)
    {
      if (FLOAT_MODE_P (mode))
	{
	  if (GET_MODE_SIZE (mode) == 8)
	    *cost += extra_cost->ldst.loadd;
	  else
	    *cost += extra_cost->ldst.loadf;
	}
      else if (VECTOR_MODE_P (mode))
	*cost += extra_cost->ldst.loadv;
      else
	{
	  /* Integer modes */
	  if (GET_MODE_SIZE (mode) == 8)
	    *cost += extra_cost->ldst.ldrd;
	  else
	    *cost += extra_cost->ldst.load;
	}
    }

  return true;
}

/* RTX costs.  Make an estimate of the cost of executing the operation
   X, which is contained within an operation with code OUTER_CODE.
   SPEED_P indicates whether the cost desired is the performance cost,
   or the size cost.  The estimate is stored in COST and the return
   value is TRUE if the cost calculation is final, or FALSE if the
   caller should recurse through the operands of X to add additional
   costs.

   We currently make no attempt to model the size savings of Thumb-2
   16-bit instructions.  At the normal points in compilation where
   this code is called we have no measure of whether the condition
   flags are live or not, and thus no realistic way to determine what
   the size will eventually be.  */
static bool
arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
		   const struct cpu_cost_table *extra_cost,
		   int *cost, bool speed_p)
{
  machine_mode mode = GET_MODE (x);

  *cost = COSTS_N_INSNS (1);

  if (TARGET_THUMB1)
    {
      if (speed_p)
	*cost = thumb1_rtx_costs (x, code, outer_code);
      else
	*cost = thumb1_size_rtx_costs (x, code, outer_code);
      return true;
    }

  switch (code)
    {
    case SET:
      *cost = 0;
      /* SET RTXs don't have a mode so we get it from the destination.  */
      mode = GET_MODE (SET_DEST (x));

      if (REG_P (SET_SRC (x))
	  && REG_P (SET_DEST (x)))
	{
	  /* Assume that most copies can be done with a single insn,
	     unless we don't have HW FP, in which case everything
	     larger than word mode will require two insns.  */
	  *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
				   && GET_MODE_SIZE (mode) > 4)
				  || mode == DImode)
				 ? 2 : 1);
	  /* Conditional register moves can be encoded
	     in 16 bits in Thumb mode.  */
	  if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
	    *cost >>= 1;

	  return true;
	}

      if (CONST_INT_P (SET_SRC (x)))
	{
	  /* Handle CONST_INT here, since the value doesn't have a mode
	     and we would otherwise be unable to work out the true cost.  */
	  *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
			    0, speed_p);
	  outer_code = SET;
	  /* Slightly lower the cost of setting a core reg to a constant.
	     This helps break up chains and allows for better scheduling.  */
	  if (REG_P (SET_DEST (x))
	      && REGNO (SET_DEST (x)) <= LR_REGNUM)
	    *cost -= 1;
	  x = SET_SRC (x);
	  /* Immediate moves with an immediate in the range [0, 255] can be
	     encoded in 16 bits in Thumb mode.  */
	  if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
	      && INTVAL (x) >= 0 && INTVAL (x) <=255)
	    *cost >>= 1;
	  goto const_int_cost;
	}

      return false;

    case MEM:
      return arm_mem_costs (x, extra_cost, cost, speed_p);

    case PARALLEL:
    {
   /* Calculations of LDM costs are complex.  We assume an initial cost
   (ldm_1st) which will load the number of registers mentioned in
   ldm_regs_per_insn_1st registers; then each additional
   ldm_regs_per_insn_subsequent registers cost one more insn.  The
   formula for N regs is thus:

   ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
			     + ldm_regs_per_insn_subsequent - 1)
			    / ldm_regs_per_insn_subsequent).

   Additional costs may also be added for addressing.  A similar
   formula is used for STM.  */

      bool is_ldm = load_multiple_operation (x, SImode);
      bool is_stm = store_multiple_operation (x, SImode);

      if (is_ldm || is_stm)
        {
	  if (speed_p)
	    {
	      HOST_WIDE_INT nregs = XVECLEN (x, 0);
	      HOST_WIDE_INT regs_per_insn_1st = is_ldm
	                              ? extra_cost->ldst.ldm_regs_per_insn_1st
	                              : extra_cost->ldst.stm_regs_per_insn_1st;
	      HOST_WIDE_INT regs_per_insn_sub = is_ldm
	                       ? extra_cost->ldst.ldm_regs_per_insn_subsequent
	                       : extra_cost->ldst.stm_regs_per_insn_subsequent;

	      *cost += regs_per_insn_1st
	               + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
					    + regs_per_insn_sub - 1)
					  / regs_per_insn_sub);
	      return true;
	    }

        }
      return false;
    }
    case DIV:
    case UDIV:
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
	  && (mode == SFmode || !TARGET_VFP_SINGLE))
	*cost += COSTS_N_INSNS (speed_p
			       ? extra_cost->fp[mode != SFmode].div : 0);
      else if (mode == SImode && TARGET_IDIV)
	*cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
      else
	*cost = LIBCALL_COST (2);

      /* Make the cost of sdiv more expensive so when both sdiv and udiv are
	 possible udiv is prefered.  */
      *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
      return false;	/* All arguments must be in registers.  */

    case MOD:
      /* MOD by a power of 2 can be expanded as:
	 rsbs    r1, r0, #0
	 and     r0, r0, #(n - 1)
	 and     r1, r1, #(n - 1)
	 rsbpl   r0, r1, #0.  */
      if (CONST_INT_P (XEXP (x, 1))
	  && exact_log2 (INTVAL (XEXP (x, 1))) > 0
	  && mode == SImode)
	{
	  *cost += COSTS_N_INSNS (3);

	  if (speed_p)
	    *cost += 2 * extra_cost->alu.logical
		     + extra_cost->alu.arith;
	  return true;
	}

    /* Fall-through.  */
    case UMOD:
      /* Make the cost of sdiv more expensive so when both sdiv and udiv are
	 possible udiv is prefered.  */
      *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
      return false;	/* All arguments must be in registers.  */

    case ROTATE:
      if (mode == SImode && REG_P (XEXP (x, 1)))
	{
	  *cost += (COSTS_N_INSNS (1)
		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
	  if (speed_p)
	    *cost += extra_cost->alu.shift_reg;
	  return true;
	}
      /* Fall through */
    case ROTATERT:
    case ASHIFT:
    case LSHIFTRT:
    case ASHIFTRT:
      if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
	{
	  *cost += (COSTS_N_INSNS (2)
		   + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
	  if (speed_p)
	    *cost += 2 * extra_cost->alu.shift;
	  /* Slightly disparage left shift by 1 at so we prefer adddi3.  */
	  if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
	    *cost += 1;
	  return true;
	}
      else if (mode == SImode)
	{
	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
	  /* Slightly disparage register shifts at -Os, but not by much.  */
	  if (!CONST_INT_P (XEXP (x, 1)))
	    *cost += (speed_p ? extra_cost->alu.shift_reg : 1
		      + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
	  return true;
	}
      else if (GET_MODE_CLASS (mode) == MODE_INT
	       && GET_MODE_SIZE (mode) < 4)
	{
	  if (code == ASHIFT)
	    {
	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
	      /* Slightly disparage register shifts at -Os, but not by
	         much.  */
	      if (!CONST_INT_P (XEXP (x, 1)))
		*cost += (speed_p ? extra_cost->alu.shift_reg : 1
			  + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
	    }
	  else if (code == LSHIFTRT || code == ASHIFTRT)
	    {
	      if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
		{
		  /* Can use SBFX/UBFX.  */
		  if (speed_p)
		    *cost += extra_cost->alu.bfx;
		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
		}
	      else
		{
		  *cost += COSTS_N_INSNS (1);
		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
		  if (speed_p)
		    {
		      if (CONST_INT_P (XEXP (x, 1)))
			*cost += 2 * extra_cost->alu.shift;
		      else
			*cost += (extra_cost->alu.shift
				  + extra_cost->alu.shift_reg);
		    }
		  else
		    /* Slightly disparage register shifts.  */
		    *cost += !CONST_INT_P (XEXP (x, 1));
		}
	    }
	  else /* Rotates.  */
	    {
	      *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
	      if (speed_p)
		{
		  if (CONST_INT_P (XEXP (x, 1)))
		    *cost += (2 * extra_cost->alu.shift
			      + extra_cost->alu.log_shift);
		  else
		    *cost += (extra_cost->alu.shift
			      + extra_cost->alu.shift_reg
			      + extra_cost->alu.log_shift_reg);
		}
	    }
	  return true;
	}

      *cost = LIBCALL_COST (2);
      return false;

    case BSWAP:
      if (arm_arch6)
        {
          if (mode == SImode)
            {
              if (speed_p)
                *cost += extra_cost->alu.rev;

              return false;
            }
        }
      else
        {
        /* No rev instruction available.  Look at arm_legacy_rev
           and thumb_legacy_rev for the form of RTL used then.  */
          if (TARGET_THUMB)
            {
              *cost += COSTS_N_INSNS (9);

              if (speed_p)
                {
                  *cost += 6 * extra_cost->alu.shift;
                  *cost += 3 * extra_cost->alu.logical;
                }
            }
          else
            {
              *cost += COSTS_N_INSNS (4);

              if (speed_p)
                {
                  *cost += 2 * extra_cost->alu.shift;
                  *cost += extra_cost->alu.arith_shift;
                  *cost += 2 * extra_cost->alu.logical;
                }
            }
          return true;
        }
      return false;

    case MINUS:
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
	  && (mode == SFmode || !TARGET_VFP_SINGLE))
	{
	  if (GET_CODE (XEXP (x, 0)) == MULT
	      || GET_CODE (XEXP (x, 1)) == MULT)
	    {
	      rtx mul_op0, mul_op1, sub_op;

	      if (speed_p)
		*cost += extra_cost->fp[mode != SFmode].mult_addsub;

	      if (GET_CODE (XEXP (x, 0)) == MULT)
		{
		  mul_op0 = XEXP (XEXP (x, 0), 0);
		  mul_op1 = XEXP (XEXP (x, 0), 1);
		  sub_op = XEXP (x, 1);
		}
	      else
		{
		  mul_op0 = XEXP (XEXP (x, 1), 0);
		  mul_op1 = XEXP (XEXP (x, 1), 1);
		  sub_op = XEXP (x, 0);
		}

	      /* The first operand of the multiply may be optionally
		 negated.  */
	      if (GET_CODE (mul_op0) == NEG)
		mul_op0 = XEXP (mul_op0, 0);

	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
			+ rtx_cost (sub_op, mode, code, 0, speed_p));

	      return true;
	    }

	  if (speed_p)
	    *cost += extra_cost->fp[mode != SFmode].addsub;
	  return false;
	}

      if (mode == SImode)
	{
	  rtx shift_by_reg = NULL;
	  rtx shift_op;
	  rtx non_shift_op;
	  rtx op0 = XEXP (x, 0);
	  rtx op1 = XEXP (x, 1);

	  /* Factor out any borrow operation.  There's more than one way
	     of expressing this; try to recognize them all.  */
	  if (GET_CODE (op0) == MINUS)
	    {
	      if (arm_borrow_operation (op1, SImode))
		{
		  op1 = XEXP (op0, 1);
		  op0 = XEXP (op0, 0);
		}
	      else if (arm_borrow_operation (XEXP (op0, 1), SImode))
		op0 = XEXP (op0, 0);
	    }
	  else if (GET_CODE (op1) == PLUS
		   && arm_borrow_operation (XEXP (op1, 0), SImode))
	    op1 = XEXP (op1, 0);
	  else if (GET_CODE (op0) == NEG
		   && arm_borrow_operation (op1, SImode))
	    {
	      /* Negate with carry-in.  For Thumb2 this is done with
		 SBC R, X, X lsl #1 (ie X - 2X - C) as Thumb lacks the
		 RSC instruction that exists in Arm mode.  */
	      if (speed_p)
		*cost += (TARGET_THUMB2
			  ? extra_cost->alu.arith_shift
			  : extra_cost->alu.arith);
	      *cost += rtx_cost (XEXP (op0, 0), mode, MINUS, 0, speed_p);
	      return true;
	    }
	  /* (Carry_op - reg) can be done as RSC Rd, Rn, #1 on Arm.
	     Note we do mean ~borrow here.  */
	  else if (TARGET_ARM && arm_carry_operation (op0, SImode))
	    {
	      *cost += rtx_cost (op1, mode, code, 1, speed_p);
	      return true;
	    }

	  shift_op = shifter_op_p (op0, &shift_by_reg);
	  if (shift_op == NULL)
	    {
	      shift_op = shifter_op_p (op1, &shift_by_reg);
	      non_shift_op = op0;
	    }
	  else
	    non_shift_op = op1;

	  if (shift_op != NULL)
	    {
	      if (shift_by_reg != NULL)
		{
		  if (speed_p)
		    *cost += extra_cost->alu.arith_shift_reg;
		  *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
		}
	      else if (speed_p)
		*cost += extra_cost->alu.arith_shift;

	      *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
	      *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
	      return true;
	    }

	  if (arm_arch_thumb2
	      && GET_CODE (XEXP (x, 1)) == MULT)
	    {
	      /* MLS.  */
	      if (speed_p)
		*cost += extra_cost->mult[0].add;
	      *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
	      *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
	      return true;
	    }

	  if (CONST_INT_P (op0))
	    {
	      int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
					    INTVAL (op0), NULL_RTX,
					    NULL_RTX, 1, 0);
	      *cost = COSTS_N_INSNS (insns);
	      if (speed_p)
		*cost += insns * extra_cost->alu.arith;
	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
	      return true;
	    }
	  else if (speed_p)
	    *cost += extra_cost->alu.arith;

	  /* Don't recurse as we don't want to cost any borrow that
	     we've stripped.  */
	  *cost += rtx_cost (op0, mode, MINUS, 0, speed_p);
	  *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
	  return true;
	}

      if (GET_MODE_CLASS (mode) == MODE_INT
	  && GET_MODE_SIZE (mode) < 4)
	{
	  rtx shift_op, shift_reg;
	  shift_reg = NULL;

	  /* We check both sides of the MINUS for shifter operands since,
	     unlike PLUS, it's not commutative.  */

	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
	  HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);

	  /* Slightly disparage, as we might need to widen the result.  */
	  *cost += 1;
	  if (speed_p)
	    *cost += extra_cost->alu.arith;

	  if (CONST_INT_P (XEXP (x, 0)))
	    {
	      *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
	      return true;
	    }

	  return false;
	}

      if (mode == DImode)
	{
	  *cost += COSTS_N_INSNS (1);

	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
	    {
	      rtx op1 = XEXP (x, 1);

	      if (speed_p)
		*cost += 2 * extra_cost->alu.arith;

	      if (GET_CODE (op1) == ZERO_EXTEND)
		*cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
				   0, speed_p);
	      else
		*cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
				 0, speed_p);
	      return true;
	    }
	  else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
	    {
	      if (speed_p)
		*cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
				  0, speed_p)
			+ rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
	      return true;
	    }
	  else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
		   || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
	    {
	      if (speed_p)
		*cost += (extra_cost->alu.arith
			  + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
			     ? extra_cost->alu.arith
			     : extra_cost->alu.arith_shift));
	      *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
				    GET_CODE (XEXP (x, 1)), 0, speed_p));
	      return true;
	    }

	  if (speed_p)
	    *cost += 2 * extra_cost->alu.arith;
	  return false;
	}

      /* Vector mode?  */

      *cost = LIBCALL_COST (2);
      return false;

    case PLUS:
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
	  && (mode == SFmode || !TARGET_VFP_SINGLE))
	{
	  if (GET_CODE (XEXP (x, 0)) == MULT)
	    {
	      rtx mul_op0, mul_op1, add_op;

	      if (speed_p)
		*cost += extra_cost->fp[mode != SFmode].mult_addsub;

	      mul_op0 = XEXP (XEXP (x, 0), 0);
	      mul_op1 = XEXP (XEXP (x, 0), 1);
	      add_op = XEXP (x, 1);

	      *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
			+ rtx_cost (mul_op1, mode, code, 0, speed_p)
			+ rtx_cost (add_op, mode, code, 0, speed_p));

	      return true;
	    }

	  if (speed_p)
	    *cost += extra_cost->fp[mode != SFmode].addsub;
	  return false;
	}
      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
	{
	  *cost = LIBCALL_COST (2);
	  return false;
	}

	/* Narrow modes can be synthesized in SImode, but the range
	   of useful sub-operations is limited.  Check for shift operations
	   on one of the operands.  Only left shifts can be used in the
	   narrow modes.  */
      if (GET_MODE_CLASS (mode) == MODE_INT
	  && GET_MODE_SIZE (mode) < 4)
	{
	  rtx shift_op, shift_reg;
	  shift_reg = NULL;

	  HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);

	  if (CONST_INT_P (XEXP (x, 1)))
	    {
	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
					    INTVAL (XEXP (x, 1)), NULL_RTX,
					    NULL_RTX, 1, 0);
	      *cost = COSTS_N_INSNS (insns);
	      if (speed_p)
		*cost += insns * extra_cost->alu.arith;
	      /* Slightly penalize a narrow operation as the result may
		 need widening.  */
	      *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
	      return true;
	    }

	  /* Slightly penalize a narrow operation as the result may
	     need widening.  */
	  *cost += 1;
	  if (speed_p)
	    *cost += extra_cost->alu.arith;

	  return false;
	}

      if (mode == SImode)
	{
	  rtx shift_op, shift_reg;

	  if (TARGET_INT_SIMD
	      && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
		  || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
	    {
	      /* UXTA[BH] or SXTA[BH].  */
	      if (speed_p)
		*cost += extra_cost->alu.extend_arith;
	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
				  0, speed_p)
			+ rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
	      return true;
	    }

	  rtx op0 = XEXP (x, 0);
	  rtx op1 = XEXP (x, 1);

	  /* Handle a side effect of adding in the carry to an addition.  */
	  if (GET_CODE (op0) == PLUS
	      && arm_carry_operation (op1, mode))
	    {
	      op1 = XEXP (op0, 1);
	      op0 = XEXP (op0, 0);
	    }
	  else if (GET_CODE (op1) == PLUS
		   && arm_carry_operation (op0, mode))
	    {
	      op0 = XEXP (op1, 0);
	      op1 = XEXP (op1, 1);
	    }
	  else if (GET_CODE (op0) == PLUS)
	    {
	      op0 = strip_carry_operation (op0);
	      if (swap_commutative_operands_p (op0, op1))
		std::swap (op0, op1);
	    }

	  if (arm_carry_operation (op0, mode))
	    {
	      /* Adding the carry to a register is a canonicalization of
		 adding 0 to the register plus the carry.  */
	      if (speed_p)
		*cost += extra_cost->alu.arith;
	      *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
	      return true;
	    }

	  shift_reg = NULL;
	  shift_op = shifter_op_p (op0, &shift_reg);
	  if (shift_op != NULL)
	    {
	      if (shift_reg)
		{
		  if (speed_p)
		    *cost += extra_cost->alu.arith_shift_reg;
		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
		}
	      else if (speed_p)
		*cost += extra_cost->alu.arith_shift;

	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
	      return true;
	    }

	  if (GET_CODE (op0) == MULT)
	    {
	      rtx mul_op = op0;

	      if (TARGET_DSP_MULTIPLY
		  && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
		       && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
			   || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
			       && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
			       && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
		      || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
			  && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
			  && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
			  && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
			      || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
				  && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
				  && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
				      == 16))))))
		{
		  /* SMLA[BT][BT].  */
		  if (speed_p)
		    *cost += extra_cost->mult[0].extend_add;
		  *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
				      SIGN_EXTEND, 0, speed_p)
			    + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
					SIGN_EXTEND, 0, speed_p)
			    + rtx_cost (op1, mode, PLUS, 1, speed_p));
		  return true;
		}

	      if (speed_p)
		*cost += extra_cost->mult[0].add;
	      *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
			+ rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
			+ rtx_cost (op1, mode, PLUS, 1, speed_p));
	      return true;
	    }

	  if (CONST_INT_P (op1))
	    {
	      int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
					    INTVAL (op1), NULL_RTX,
					    NULL_RTX, 1, 0);
	      *cost = COSTS_N_INSNS (insns);
	      if (speed_p)
		*cost += insns * extra_cost->alu.arith;
	      *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
	      return true;
	    }

	  if (speed_p)
	    *cost += extra_cost->alu.arith;

	  /* Don't recurse here because we want to test the operands
	     without any carry operation.  */
	  *cost += rtx_cost (op0, mode, PLUS, 0, speed_p);
	  *cost += rtx_cost (op1, mode, PLUS, 1, speed_p);
	  return true;
	}

      if (mode == DImode)
	{
	  if (GET_CODE (XEXP (x, 0)) == MULT
	      && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
		   && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
		  || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
		      && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
	    {
	      if (speed_p)
		*cost += extra_cost->mult[1].extend_add;
	      *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
				  ZERO_EXTEND, 0, speed_p)
			+ rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
				    ZERO_EXTEND, 0, speed_p)
			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
	      return true;
	    }

	  *cost += COSTS_N_INSNS (1);

	  if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
	      || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
	    {
	      if (speed_p)
		*cost += (extra_cost->alu.arith
			  + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
			     ? extra_cost->alu.arith
			     : extra_cost->alu.arith_shift));

	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
				  0, speed_p)
			+ rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
	      return true;
	    }

	  if (speed_p)
	    *cost += 2 * extra_cost->alu.arith;
	  return false;
	}

      /* Vector mode?  */
      *cost = LIBCALL_COST (2);
      return false;
    case IOR:
      if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
        {
          if (speed_p)
            *cost += extra_cost->alu.rev;

          return true;
        }
    /* Fall through.  */
    case AND: case XOR:
      if (mode == SImode)
	{
	  enum rtx_code subcode = GET_CODE (XEXP (x, 0));
	  rtx op0 = XEXP (x, 0);
	  rtx shift_op, shift_reg;

	  if (subcode == NOT
	      && (code == AND
		  || (code == IOR && TARGET_THUMB2)))
	    op0 = XEXP (op0, 0);

	  shift_reg = NULL;
	  shift_op = shifter_op_p (op0, &shift_reg);
	  if (shift_op != NULL)
	    {
	      if (shift_reg)
		{
		  if (speed_p)
		    *cost += extra_cost->alu.log_shift_reg;
		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
		}
	      else if (speed_p)
		*cost += extra_cost->alu.log_shift;

	      *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
			+ rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
	      return true;
	    }

	  if (CONST_INT_P (XEXP (x, 1)))
	    {
	      int insns = arm_gen_constant (code, SImode, NULL_RTX,
					    INTVAL (XEXP (x, 1)), NULL_RTX,
					    NULL_RTX, 1, 0);

	      *cost = COSTS_N_INSNS (insns);
	      if (speed_p)
		*cost += insns * extra_cost->alu.logical;
	      *cost += rtx_cost (op0, mode, code, 0, speed_p);
	      return true;
	    }

	  if (speed_p)
	    *cost += extra_cost->alu.logical;
	  *cost += (rtx_cost (op0, mode, code, 0, speed_p)
		    + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
	  return true;
	}

      if (mode == DImode)
	{
	  rtx op0 = XEXP (x, 0);
	  enum rtx_code subcode = GET_CODE (op0);

	  *cost += COSTS_N_INSNS (1);

	  if (subcode == NOT
	      && (code == AND
		  || (code == IOR && TARGET_THUMB2)))
	    op0 = XEXP (op0, 0);

	  if (GET_CODE (op0) == ZERO_EXTEND)
	    {
	      if (speed_p)
		*cost += 2 * extra_cost->alu.logical;

	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
				  0, speed_p)
			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
	      return true;
	    }
	  else if (GET_CODE (op0) == SIGN_EXTEND)
	    {
	      if (speed_p)
		*cost += extra_cost->alu.logical + extra_cost->alu.log_shift;

	      *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
				  0, speed_p)
			+ rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
	      return true;
	    }

	  if (speed_p)
	    *cost += 2 * extra_cost->alu.logical;

	  return true;
	}
      /* Vector mode?  */

      *cost = LIBCALL_COST (2);
      return false;

    case MULT:
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
	  && (mode == SFmode || !TARGET_VFP_SINGLE))
	{
	  rtx op0 = XEXP (x, 0);

	  if (GET_CODE (op0) == NEG && !flag_rounding_math)
	    op0 = XEXP (op0, 0);

	  if (speed_p)
	    *cost += extra_cost->fp[mode != SFmode].mult;

	  *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
		    + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
	  return true;
	}
      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
	{
	  *cost = LIBCALL_COST (2);
	  return false;
	}

      if (mode == SImode)
	{
	  if (TARGET_DSP_MULTIPLY
	      && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
		   && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
		       || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
			   && CONST_INT_P (XEXP (XEXP (x, 1), 1))
			   && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
		  || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
		      && CONST_INT_P (XEXP (XEXP (x, 0), 1))
		      && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
		      && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
			  || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
			      && CONST_INT_P (XEXP (XEXP (x, 1), 1))
			      && (INTVAL (XEXP (XEXP (x, 1), 1))
				  == 16))))))
	    {
	      /* SMUL[TB][TB].  */
	      if (speed_p)
		*cost += extra_cost->mult[0].extend;
	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
				 SIGN_EXTEND, 0, speed_p);
	      *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
				 SIGN_EXTEND, 1, speed_p);
	      return true;
	    }
	  if (speed_p)
	    *cost += extra_cost->mult[0].simple;
	  return false;
	}

      if (mode == DImode)
	{
	  if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
		&& GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
	       || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
		   && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
	    {
	      if (speed_p)
		*cost += extra_cost->mult[1].extend;
	      *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
				  ZERO_EXTEND, 0, speed_p)
			+ rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
				    ZERO_EXTEND, 0, speed_p));
	      return true;
	    }

	  *cost = LIBCALL_COST (2);
	  return false;
	}

      /* Vector mode?  */
      *cost = LIBCALL_COST (2);
      return false;

    case NEG:
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
	  && (mode == SFmode || !TARGET_VFP_SINGLE))
	{
	  if (GET_CODE (XEXP (x, 0)) == MULT)
	    {
	      /* VNMUL.  */
	      *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
	      return true;
	    }

	  if (speed_p)
	    *cost += extra_cost->fp[mode != SFmode].neg;

	  return false;
	}
      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
	{
	  *cost = LIBCALL_COST (1);
	  return false;
	}

      if (mode == SImode)
	{
	  if (GET_CODE (XEXP (x, 0)) == ABS)
	    {
	      *cost += COSTS_N_INSNS (1);
	      /* Assume the non-flag-changing variant.  */
	      if (speed_p)
		*cost += (extra_cost->alu.log_shift
			  + extra_cost->alu.arith_shift);
	      *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
	      return true;
	    }

	  if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
	      || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
	    {
	      *cost += COSTS_N_INSNS (1);
	      /* No extra cost for MOV imm and MVN imm.  */
	      /* If the comparison op is using the flags, there's no further
		 cost, otherwise we need to add the cost of the comparison.  */
	      if (!(REG_P (XEXP (XEXP (x, 0), 0))
		    && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
		    && XEXP (XEXP (x, 0), 1) == const0_rtx))
		{
		  mode = GET_MODE (XEXP (XEXP (x, 0), 0));
		  *cost += (COSTS_N_INSNS (1)
			    + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
					0, speed_p)
			    + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
					1, speed_p));
		  if (speed_p)
		    *cost += extra_cost->alu.arith;
		}
	      return true;
	    }

	  if (speed_p)
	    *cost += extra_cost->alu.arith;
	  return false;
	}

      if (GET_MODE_CLASS (mode) == MODE_INT
	  && GET_MODE_SIZE (mode) < 4)
	{
	  /* Slightly disparage, as we might need an extend operation.  */
	  *cost += 1;
	  if (speed_p)
	    *cost += extra_cost->alu.arith;
	  return false;
	}

      if (mode == DImode)
	{
	  *cost += COSTS_N_INSNS (1);
	  if (speed_p)
	    *cost += 2 * extra_cost->alu.arith;
	  return false;
	}

      /* Vector mode?  */
      *cost = LIBCALL_COST (1);
      return false;

    case NOT:
      if (mode == SImode)
	{
	  rtx shift_op;
	  rtx shift_reg = NULL;

	  shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);

	  if (shift_op)
	    {
	      if (shift_reg != NULL)
		{
		  if (speed_p)
		    *cost += extra_cost->alu.log_shift_reg;
		  *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
		}
	      else if (speed_p)
		*cost += extra_cost->alu.log_shift;
	      *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
	      return true;
	    }

	  if (speed_p)
	    *cost += extra_cost->alu.logical;
	  return false;
	}
      if (mode == DImode)
	{
	  *cost += COSTS_N_INSNS (1);
	  return false;
	}

      /* Vector mode?  */

      *cost += LIBCALL_COST (1);
      return false;

    case IF_THEN_ELSE:
      {
        if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
	  {
	    *cost += COSTS_N_INSNS (3);
	    return true;
	  }
	int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
	int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);

	*cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
	/* Assume that if one arm of the if_then_else is a register,
	   that it will be tied with the result and eliminate the
	   conditional insn.  */
	if (REG_P (XEXP (x, 1)))
	  *cost += op2cost;
	else if (REG_P (XEXP (x, 2)))
	  *cost += op1cost;
	else
	  {
	    if (speed_p)
	      {
		if (extra_cost->alu.non_exec_costs_exec)
		  *cost += op1cost + op2cost + extra_cost->alu.non_exec;
		else
		  *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
	      }
	    else
	      *cost += op1cost + op2cost;
	  }
      }
      return true;

    case COMPARE:
      if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
	*cost = 0;
      else
	{
	  machine_mode op0mode;
	  /* We'll mostly assume that the cost of a compare is the cost of the
	     LHS.  However, there are some notable exceptions.  */

	  /* Floating point compares are never done as side-effects.  */
	  op0mode = GET_MODE (XEXP (x, 0));
	  if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
	      && (op0mode == SFmode || !TARGET_VFP_SINGLE))
	    {
	      if (speed_p)
		*cost += extra_cost->fp[op0mode != SFmode].compare;

	      if (XEXP (x, 1) == CONST0_RTX (op0mode))
		{
		  *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
		  return true;
		}

	      return false;
	    }
	  else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
	    {
	      *cost = LIBCALL_COST (2);
	      return false;
	    }

	  /* DImode compares normally take two insns.  */
	  if (op0mode == DImode)
	    {
	      *cost += COSTS_N_INSNS (1);
	      if (speed_p)
		*cost += 2 * extra_cost->alu.arith;
	      return false;
	    }

	  if (op0mode == SImode)
	    {
	      rtx shift_op;
	      rtx shift_reg;

	      if (XEXP (x, 1) == const0_rtx
		  && !(REG_P (XEXP (x, 0))
		       || (GET_CODE (XEXP (x, 0)) == SUBREG
			   && REG_P (SUBREG_REG (XEXP (x, 0))))))
		{
		  *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);

		  /* Multiply operations that set the flags are often
		     significantly more expensive.  */
		  if (speed_p
		      && GET_CODE (XEXP (x, 0)) == MULT
		      && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
		    *cost += extra_cost->mult[0].flag_setting;

		  if (speed_p
		      && GET_CODE (XEXP (x, 0)) == PLUS
		      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
		      && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
							    0), 1), mode))
		    *cost += extra_cost->mult[0].flag_setting;
		  return true;
		}

	      shift_reg = NULL;
	      shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
	      if (shift_op != NULL)
		{
		  if (shift_reg != NULL)
		    {
		      *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
					 1, speed_p);
		      if (speed_p)
			*cost += extra_cost->alu.arith_shift_reg;
		    }
		  else if (speed_p)
		    *cost += extra_cost->alu.arith_shift;
		  *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
		  *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
		  return true;
		}

	      if (speed_p)
		*cost += extra_cost->alu.arith;
	      if (CONST_INT_P (XEXP (x, 1))
		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
		{
		  *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
		  return true;
		}
	      return false;
	    }

	  /* Vector mode?  */

	  *cost = LIBCALL_COST (2);
	  return false;
	}
      return true;

    case EQ:
    case NE:
    case LT:
    case LE:
    case GT:
    case GE:
    case LTU:
    case LEU:
    case GEU:
    case GTU:
    case ORDERED:
    case UNORDERED:
    case UNEQ:
    case UNLE:
    case UNLT:
    case UNGE:
    case UNGT:
    case LTGT:
      if (outer_code == SET)
	{
	  /* Is it a store-flag operation?  */
	  if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
	      && XEXP (x, 1) == const0_rtx)
	    {
	      /* Thumb also needs an IT insn.  */
	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
	      return true;
	    }
	  if (XEXP (x, 1) == const0_rtx)
	    {
	      switch (code)
		{
		case LT:
		  /* LSR Rd, Rn, #31.  */
		  if (speed_p)
		    *cost += extra_cost->alu.shift;
		  break;

		case EQ:
		  /* RSBS T1, Rn, #0
		     ADC  Rd, Rn, T1.  */

		case NE:
		  /* SUBS T1, Rn, #1
		     SBC  Rd, Rn, T1.  */
		  *cost += COSTS_N_INSNS (1);
		  break;

		case LE:
		  /* RSBS T1, Rn, Rn, LSR #31
		     ADC  Rd, Rn, T1. */
		  *cost += COSTS_N_INSNS (1);
		  if (speed_p)
		    *cost += extra_cost->alu.arith_shift;
		  break;

		case GT:
		  /* RSB  Rd, Rn, Rn, ASR #1
		     LSR  Rd, Rd, #31.  */
		  *cost += COSTS_N_INSNS (1);
		  if (speed_p)
		    *cost += (extra_cost->alu.arith_shift
			      + extra_cost->alu.shift);
		  break;

		case GE:
		  /* ASR  Rd, Rn, #31
		     ADD  Rd, Rn, #1.  */
		  *cost += COSTS_N_INSNS (1);
		  if (speed_p)
		    *cost += extra_cost->alu.shift;
		  break;

		default:
		  /* Remaining cases are either meaningless or would take
		     three insns anyway.  */
		  *cost = COSTS_N_INSNS (3);
		  break;
		}
	      *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
	      return true;
	    }
	  else
	    {
	      *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
	      if (CONST_INT_P (XEXP (x, 1))
		  && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
		{
		  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
		  return true;
		}

	      return false;
	    }
	}
      /* Not directly inside a set.  If it involves the condition code
	 register it must be the condition for a branch, cond_exec or
	 I_T_E operation.  Since the comparison is performed elsewhere
	 this is just the control part which has no additional
	 cost.  */
      else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
	       && XEXP (x, 1) == const0_rtx)
	{
	  *cost = 0;
	  return true;
	}
      return false;

    case ABS:
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
	  && (mode == SFmode || !TARGET_VFP_SINGLE))
	{
	  if (speed_p)
	    *cost += extra_cost->fp[mode != SFmode].neg;

	  return false;
	}
      else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
	{
	  *cost = LIBCALL_COST (1);
	  return false;
	}

      if (mode == SImode)
	{
	  if (speed_p)
	    *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
	  return false;
	}
      /* Vector mode?  */
      *cost = LIBCALL_COST (1);
      return false;

    case SIGN_EXTEND:
      if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
	  && MEM_P (XEXP (x, 0)))
	{
	  if (mode == DImode)
	    *cost += COSTS_N_INSNS (1);

	  if (!speed_p)
	    return true;

	  if (GET_MODE (XEXP (x, 0)) == SImode)
	    *cost += extra_cost->ldst.load;
	  else
	    *cost += extra_cost->ldst.load_sign_extend;

	  if (mode == DImode)
	    *cost += extra_cost->alu.shift;

	  return true;
	}

      /* Widening from less than 32-bits requires an extend operation.  */
      if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
	{
	  /* We have SXTB/SXTH.  */
	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
	  if (speed_p)
	    *cost += extra_cost->alu.extend;
	}
      else if (GET_MODE (XEXP (x, 0)) != SImode)
	{
	  /* Needs two shifts.  */
	  *cost += COSTS_N_INSNS (1);
	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
	  if (speed_p)
	    *cost += 2 * extra_cost->alu.shift;
	}

      /* Widening beyond 32-bits requires one more insn.  */
      if (mode == DImode)
	{
	  *cost += COSTS_N_INSNS (1);
	  if (speed_p)
	    *cost += extra_cost->alu.shift;
	}

      return true;

    case ZERO_EXTEND:
      if ((arm_arch4
	   || GET_MODE (XEXP (x, 0)) == SImode
	   || GET_MODE (XEXP (x, 0)) == QImode)
	  && MEM_P (XEXP (x, 0)))
	{
	  *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);

	  if (mode == DImode)
	    *cost += COSTS_N_INSNS (1);  /* No speed penalty.  */

	  return true;
	}

      /* Widening from less than 32-bits requires an extend operation.  */
      if (GET_MODE (XEXP (x, 0)) == QImode)
	{
	  /* UXTB can be a shorter instruction in Thumb2, but it might
	     be slower than the AND Rd, Rn, #255 alternative.  When
	     optimizing for speed it should never be slower to use
	     AND, and we don't really model 16-bit vs 32-bit insns
	     here.  */
	  if (speed_p)
	    *cost += extra_cost->alu.logical;
	}
      else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
	{
	  /* We have UXTB/UXTH.  */
	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
	  if (speed_p)
	    *cost += extra_cost->alu.extend;
	}
      else if (GET_MODE (XEXP (x, 0)) != SImode)
	{
	  /* Needs two shifts.  It's marginally preferable to use
	     shifts rather than two BIC instructions as the second
	     shift may merge with a subsequent insn as a shifter
	     op.  */
	  *cost = COSTS_N_INSNS (2);
	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
	  if (speed_p)
	    *cost += 2 * extra_cost->alu.shift;
	}

      /* Widening beyond 32-bits requires one more insn.  */
      if (mode == DImode)
	{
	  *cost += COSTS_N_INSNS (1);	/* No speed penalty.  */
	}

      return true;

    case CONST_INT:
      *cost = 0;
      /* CONST_INT has no mode, so we cannot tell for sure how many
	 insns are really going to be needed.  The best we can do is
	 look at the value passed.  If it fits in SImode, then assume
	 that's the mode it will be used for.  Otherwise assume it
	 will be used in DImode.  */
      if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
	mode = SImode;
      else
	mode = DImode;

      /* Avoid blowing up in arm_gen_constant ().  */
      if (!(outer_code == PLUS
	    || outer_code == AND
	    || outer_code == IOR
	    || outer_code == XOR
	    || outer_code == MINUS))
	outer_code = SET;

    const_int_cost:
      if (mode == SImode)
	{
	  *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
						    INTVAL (x), NULL, NULL,
						    0, 0));
	  /* Extra costs?  */
	}
      else
	{
	  *cost += COSTS_N_INSNS (arm_gen_constant
				  (outer_code, SImode, NULL,
				   trunc_int_for_mode (INTVAL (x), SImode),
				   NULL, NULL, 0, 0)
				  + arm_gen_constant (outer_code, SImode, NULL,
						      INTVAL (x) >> 32, NULL,
						      NULL, 0, 0));
	  /* Extra costs?  */
	}

      return true;

    case CONST:
    case LABEL_REF:
    case SYMBOL_REF:
      if (speed_p)
	{
	  if (arm_arch_thumb2 && !flag_pic)
	    *cost += COSTS_N_INSNS (1);
	  else
	    *cost += extra_cost->ldst.load;
	}
      else
	*cost += COSTS_N_INSNS (1);

      if (flag_pic)
	{
	  *cost += COSTS_N_INSNS (1);
	  if (speed_p)
	    *cost += extra_cost->alu.arith;
	}

      return true;

    case CONST_FIXED:
      *cost = COSTS_N_INSNS (4);
      /* Fixme.  */
      return true;

    case CONST_DOUBLE:
      if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
	  && (mode == SFmode || !TARGET_VFP_SINGLE))
	{
	  if (vfp3_const_double_rtx (x))
	    {
	      if (speed_p)
		*cost += extra_cost->fp[mode == DFmode].fpconst;
	      return true;
	    }

	  if (speed_p)
	    {
	      if (mode == DFmode)
		*cost += extra_cost->ldst.loadd;
	      else
		*cost += extra_cost->ldst.loadf;
	    }
	  else
	    *cost += COSTS_N_INSNS (1 + (mode == DFmode));

	  return true;
	}
      *cost = COSTS_N_INSNS (4);
      return true;

    case CONST_VECTOR:
      /* Fixme.  */
      if (TARGET_NEON
	  && TARGET_HARD_FLOAT
	  && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
	  && neon_immediate_valid_for_move (x, mode, NULL, NULL))
	*cost = COSTS_N_INSNS (1);
      else
	*cost = COSTS_N_INSNS (4);
      return true;

    case HIGH:
    case LO_SUM:
      /* When optimizing for size, we prefer constant pool entries to
	 MOVW/MOVT pairs, so bump the cost of these slightly.  */
      if (!speed_p)
	*cost += 1;
      return true;

    case CLZ:
      if (speed_p)
	*cost += extra_cost->alu.clz;
      return false;

    case SMIN:
      if (XEXP (x, 1) == const0_rtx)
	{
	  if (speed_p)
	    *cost += extra_cost->alu.log_shift;
	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
	  return true;
	}
      /* Fall through.  */
    case SMAX:
    case UMIN:
    case UMAX:
      *cost += COSTS_N_INSNS (1);
      return false;

    case TRUNCATE:
      if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
	  && CONST_INT_P (XEXP (XEXP (x, 0), 1))
	  && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
	  && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
	       && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
	      || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
		  && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
		      == ZERO_EXTEND))))
	{
	  if (speed_p)
	    *cost += extra_cost->mult[1].extend;
	  *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
			      ZERO_EXTEND, 0, speed_p)
		    + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
				ZERO_EXTEND, 0, speed_p));
	  return true;
	}
      *cost = LIBCALL_COST (1);
      return false;

    case UNSPEC_VOLATILE:
    case UNSPEC:
      return arm_unspec_cost (x, outer_code, speed_p, cost);

    case PC:
      /* Reading the PC is like reading any other register.  Writing it
	 is more expensive, but we take that into account elsewhere.  */
      *cost = 0;
      return true;

    case ZERO_EXTRACT:
      /* TODO: Simple zero_extract of bottom bits using AND.  */
      /* Fall through.  */
    case SIGN_EXTRACT:
      if (arm_arch6
	  && mode == SImode
	  && CONST_INT_P (XEXP (x, 1))
	  && CONST_INT_P (XEXP (x, 2)))
	{
	  if (speed_p)
	    *cost += extra_cost->alu.bfx;
	  *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
	  return true;
	}
      /* Without UBFX/SBFX, need to resort to shift operations.  */
      *cost += COSTS_N_INSNS (1);
      if (speed_p)
	*cost += 2 * extra_cost->alu.shift;
      *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
      return true;

    case FLOAT_EXTEND:
      if (TARGET_HARD_FLOAT)
	{
	  if (speed_p)
	    *cost += extra_cost->fp[mode == DFmode].widen;
	  if (!TARGET_VFP5
	      && GET_MODE (XEXP (x, 0)) == HFmode)
	    {
	      /* Pre v8, widening HF->DF is a two-step process, first
	         widening to SFmode.  */
	      *cost += COSTS_N_INSNS (1);
	      if (speed_p)
		*cost += extra_cost->fp[0].widen;
	    }
	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
	  return true;
	}

      *cost = LIBCALL_COST (1);
      return false;

    case FLOAT_TRUNCATE:
      if (TARGET_HARD_FLOAT)
	{
	  if (speed_p)
	    *cost += extra_cost->fp[mode == DFmode].narrow;
	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
	  return true;
	  /* Vector modes?  */
	}
      *cost = LIBCALL_COST (1);
      return false;

    case FMA:
      if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
        {
          rtx op0 = XEXP (x, 0);
          rtx op1 = XEXP (x, 1);
          rtx op2 = XEXP (x, 2);


          /* vfms or vfnma.  */
          if (GET_CODE (op0) == NEG)
            op0 = XEXP (op0, 0);

          /* vfnms or vfnma.  */
          if (GET_CODE (op2) == NEG)
            op2 = XEXP (op2, 0);

          *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
          *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
          *cost += rtx_cost (op2, mode, FMA, 2, speed_p);

          if (speed_p)
            *cost += extra_cost->fp[mode ==DFmode].fma;

          return true;
        }

      *cost = LIBCALL_COST (3);
      return false;

    case FIX:
    case UNSIGNED_FIX:
      if (TARGET_HARD_FLOAT)
	{
	  /* The *combine_vcvtf2i reduces a vmul+vcvt into
	     a vcvt fixed-point conversion.  */
	  if (code == FIX && mode == SImode
	      && GET_CODE (XEXP (x, 0)) == FIX
	      && GET_MODE (XEXP (x, 0)) == SFmode
	      && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
	      && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
		 > 0)
	    {
	      if (speed_p)
		*cost += extra_cost->fp[0].toint;

	      *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
				 code, 0, speed_p);
	      return true;
	    }

	  if (GET_MODE_CLASS (mode) == MODE_INT)
	    {
	      mode = GET_MODE (XEXP (x, 0));
	      if (speed_p)
		*cost += extra_cost->fp[mode == DFmode].toint;
	      /* Strip of the 'cost' of rounding towards zero.  */
	      if (GET_CODE (XEXP (x, 0)) == FIX)
		*cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
				   0, speed_p);
	      else
		*cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
	      /* ??? Increase the cost to deal with transferring from
		 FP -> CORE registers?  */
	      return true;
	    }
	  else if (GET_MODE_CLASS (mode) == MODE_FLOAT
		   && TARGET_VFP5)
	    {
	      if (speed_p)
		*cost += extra_cost->fp[mode == DFmode].roundint;
	      return false;
	    }
	  /* Vector costs? */
	}
      *cost = LIBCALL_COST (1);
      return false;

    case FLOAT:
    case UNSIGNED_FLOAT:
      if (TARGET_HARD_FLOAT)
	{
	  /* ??? Increase the cost to deal with transferring from CORE
	     -> FP registers?  */
	  if (speed_p)
	    *cost += extra_cost->fp[mode == DFmode].fromint;
	  return false;
	}
      *cost = LIBCALL_COST (1);
      return false;

    case CALL:
      return true;

    case ASM_OPERANDS:
      {
      /* Just a guess.  Guess number of instructions in the asm
         plus one insn per input.  Always a minimum of COSTS_N_INSNS (1)
         though (see PR60663).  */
        int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
        int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);

        *cost = COSTS_N_INSNS (asm_length + num_operands);
        return true;
      }
    default:
      if (mode != VOIDmode)
	*cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
      else
	*cost = COSTS_N_INSNS (4); /* Who knows?  */
      return false;
    }
}

#undef HANDLE_NARROW_SHIFT_ARITH

/* RTX costs entry point.  */

static bool
arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
	       int opno ATTRIBUTE_UNUSED, int *total, bool speed)
{
  bool result;
  int code = GET_CODE (x);
  gcc_assert (current_tune->insn_extra_cost);

  result =  arm_rtx_costs_internal (x, (enum rtx_code) code,
				(enum rtx_code) outer_code,
				current_tune->insn_extra_cost,
				total, speed);

  if (dump_file && arm_verbose_cost)
    {
      print_rtl_single (dump_file, x);
      fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
	       *total, result ? "final" : "partial");
    }
  return result;
}

static int
arm_insn_cost (rtx_insn *insn, bool speed)
{
  int cost;

  /* Don't cost a simple reg-reg move at a full insn cost: such moves
     will likely disappear during register allocation.  */
  if (!reload_completed
      && GET_CODE (PATTERN (insn)) == SET
      && REG_P (SET_DEST (PATTERN (insn)))
      && REG_P (SET_SRC (PATTERN (insn))))
    return 2;
  cost = pattern_cost (PATTERN (insn), speed);
  /* If the cost is zero, then it's likely a complex insn.  We don't want the
     cost of these to be less than something we know about.  */
  return cost ? cost : COSTS_N_INSNS (2);
}

/* All address computations that can be done are free, but rtx cost returns
   the same for practically all of them.  So we weight the different types
   of address here in the order (most pref first):
   PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL.  */
static inline int
arm_arm_address_cost (rtx x)
{
  enum rtx_code c  = GET_CODE (x);

  if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
    return 0;
  if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
    return 10;

  if (c == PLUS)
    {
      if (CONST_INT_P (XEXP (x, 1)))
	return 2;

      if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
	return 3;

      return 4;
    }

  return 6;
}

static inline int
arm_thumb_address_cost (rtx x)
{
  enum rtx_code c  = GET_CODE (x);

  if (c == REG)
    return 1;
  if (c == PLUS
      && REG_P (XEXP (x, 0))
      && CONST_INT_P (XEXP (x, 1)))
    return 1;

  return 2;
}

static int
arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
		  addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
{
  return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
}

/* Adjust cost hook for XScale.  */
static bool
xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
			  int * cost)
{
  /* Some true dependencies can have a higher cost depending
     on precisely how certain input operands are used.  */
  if (dep_type == 0
      && recog_memoized (insn) >= 0
      && recog_memoized (dep) >= 0)
    {
      int shift_opnum = get_attr_shift (insn);
      enum attr_type attr_type = get_attr_type (dep);

      /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
	 operand for INSN.  If we have a shifted input operand and the
	 instruction we depend on is another ALU instruction, then we may
	 have to account for an additional stall.  */
      if (shift_opnum != 0
	  && (attr_type == TYPE_ALU_SHIFT_IMM
	      || attr_type == TYPE_ALUS_SHIFT_IMM
	      || attr_type == TYPE_LOGIC_SHIFT_IMM
	      || attr_type == TYPE_LOGICS_SHIFT_IMM
	      || attr_type == TYPE_ALU_SHIFT_REG
	      || attr_type == TYPE_ALUS_SHIFT_REG
	      || attr_type == TYPE_LOGIC_SHIFT_REG
	      || attr_type == TYPE_LOGICS_SHIFT_REG
	      || attr_type == TYPE_MOV_SHIFT
	      || attr_type == TYPE_MVN_SHIFT
	      || attr_type == TYPE_MOV_SHIFT_REG
	      || attr_type == TYPE_MVN_SHIFT_REG))
	{
	  rtx shifted_operand;
	  int opno;

	  /* Get the shifted operand.  */
	  extract_insn (insn);
	  shifted_operand = recog_data.operand[shift_opnum];

	  /* Iterate over all the operands in DEP.  If we write an operand
	     that overlaps with SHIFTED_OPERAND, then we have increase the
	     cost of this dependency.  */
	  extract_insn (dep);
	  preprocess_constraints (dep);
	  for (opno = 0; opno < recog_data.n_operands; opno++)
	    {
	      /* We can ignore strict inputs.  */
	      if (recog_data.operand_type[opno] == OP_IN)
		continue;

	      if (reg_overlap_mentioned_p (recog_data.operand[opno],
					   shifted_operand))
		{
		  *cost = 2;
		  return false;
		}
	    }
	}
    }
  return true;
}

/* Adjust cost hook for Cortex A9.  */
static bool
cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
			     int * cost)
{
  switch (dep_type)
    {
    case REG_DEP_ANTI:
      *cost = 0;
      return false;

    case REG_DEP_TRUE:
    case REG_DEP_OUTPUT:
	if (recog_memoized (insn) >= 0
	    && recog_memoized (dep) >= 0)
	  {
	    if (GET_CODE (PATTERN (insn)) == SET)
	      {
		if (GET_MODE_CLASS
		    (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
		  || GET_MODE_CLASS
		    (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
		  {
		    enum attr_type attr_type_insn = get_attr_type (insn);
		    enum attr_type attr_type_dep = get_attr_type (dep);

		    /* By default all dependencies of the form
		       s0 = s0 <op> s1
		       s0 = s0 <op> s2
		       have an extra latency of 1 cycle because
		       of the input and output dependency in this
		       case. However this gets modeled as an true
		       dependency and hence all these checks.  */
		    if (REG_P (SET_DEST (PATTERN (insn)))
			&& reg_set_p (SET_DEST (PATTERN (insn)), dep))
		      {
			/* FMACS is a special case where the dependent
			   instruction can be issued 3 cycles before
			   the normal latency in case of an output
			   dependency.  */
			if ((attr_type_insn == TYPE_FMACS
			     || attr_type_insn == TYPE_FMACD)
			    && (attr_type_dep == TYPE_FMACS
				|| attr_type_dep == TYPE_FMACD))
			  {
			    if (dep_type == REG_DEP_OUTPUT)
			      *cost = insn_default_latency (dep) - 3;
			    else
			      *cost = insn_default_latency (dep);
			    return false;
			  }
			else
			  {
			    if (dep_type == REG_DEP_OUTPUT)
			      *cost = insn_default_latency (dep) + 1;
			    else
			      *cost = insn_default_latency (dep);
			  }
			return false;
		      }
		  }
	      }
	  }
	break;

    default:
      gcc_unreachable ();
    }

  return true;
}

/* Adjust cost hook for FA726TE.  */
static bool
fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
			   int * cost)
{
  /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
     have penalty of 3.  */
  if (dep_type == REG_DEP_TRUE
      && recog_memoized (insn) >= 0
      && recog_memoized (dep) >= 0
      && get_attr_conds (dep) == CONDS_SET)
    {
      /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency.  */
      if (get_attr_conds (insn) == CONDS_USE
          && get_attr_type (insn) != TYPE_BRANCH)
        {
          *cost = 3;
          return false;
        }

      if (GET_CODE (PATTERN (insn)) == COND_EXEC
          || get_attr_conds (insn) == CONDS_USE)
        {
          *cost = 0;
          return false;
        }
    }

  return true;
}

/* Implement TARGET_REGISTER_MOVE_COST.

   Moves between VFP_REGS and GENERAL_REGS are a single insn, but
   it is typically more expensive than a single memory access.  We set
   the cost to less than two memory accesses so that floating
   point to integer conversion does not go through memory.  */

int
arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
			reg_class_t from, reg_class_t to)
{
  if (TARGET_32BIT)
    {
      if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
	return 15;
      else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
	return 4;
      else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
	return 20;
      else
	return 2;
    }
  else
    {
      if (from == HI_REGS || to == HI_REGS)
	return 4;
      else
	return 2;
    }
}

/* Implement TARGET_MEMORY_MOVE_COST.  */

int
arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
		      bool in ATTRIBUTE_UNUSED)
{
  if (TARGET_32BIT)
    return 10;
  else
    {
      if (GET_MODE_SIZE (mode) < 4)
	return 8;
      else
	return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
    }
}

/* Vectorizer cost model implementation.  */

/* Implement targetm.vectorize.builtin_vectorization_cost.  */
static int
arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
				tree vectype,
				int misalign ATTRIBUTE_UNUSED)
{
  unsigned elements;

  switch (type_of_cost)
    {
      case scalar_stmt:
        return current_tune->vec_costs->scalar_stmt_cost;

      case scalar_load:
        return current_tune->vec_costs->scalar_load_cost;

      case scalar_store:
        return current_tune->vec_costs->scalar_store_cost;

      case vector_stmt:
        return current_tune->vec_costs->vec_stmt_cost;

      case vector_load:
        return current_tune->vec_costs->vec_align_load_cost;

      case vector_store:
        return current_tune->vec_costs->vec_store_cost;

      case vec_to_scalar:
        return current_tune->vec_costs->vec_to_scalar_cost;

      case scalar_to_vec:
        return current_tune->vec_costs->scalar_to_vec_cost;

      case unaligned_load:
      case vector_gather_load:
        return current_tune->vec_costs->vec_unalign_load_cost;

      case unaligned_store:
      case vector_scatter_store:
        return current_tune->vec_costs->vec_unalign_store_cost;

      case cond_branch_taken:
        return current_tune->vec_costs->cond_taken_branch_cost;

      case cond_branch_not_taken:
        return current_tune->vec_costs->cond_not_taken_branch_cost;

      case vec_perm:
      case vec_promote_demote:
        return current_tune->vec_costs->vec_stmt_cost;

      case vec_construct:
	elements = TYPE_VECTOR_SUBPARTS (vectype);
	return elements / 2 + 1;

      default:
        gcc_unreachable ();
    }
}

/* Implement targetm.vectorize.add_stmt_cost.  */

static unsigned
arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
		   struct _stmt_vec_info *stmt_info, int misalign,
		   enum vect_cost_model_location where)
{
  unsigned *cost = (unsigned *) data;
  unsigned retval = 0;

  if (flag_vect_cost_model)
    {
      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
      int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);

      /* Statements in an inner loop relative to the loop being
	 vectorized are weighted more heavily.  The value here is
	 arbitrary and could potentially be improved with analysis.  */
      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
	count *= 50;  /* FIXME.  */

      retval = (unsigned) (count * stmt_cost);
      cost[where] += retval;
    }

  return retval;
}

/* Return true if and only if this insn can dual-issue only as older.  */
static bool
cortexa7_older_only (rtx_insn *insn)
{
  if (recog_memoized (insn) < 0)
    return false;

  switch (get_attr_type (insn))
    {
    case TYPE_ALU_DSP_REG:
    case TYPE_ALU_SREG:
    case TYPE_ALUS_SREG:
    case TYPE_LOGIC_REG:
    case TYPE_LOGICS_REG:
    case TYPE_ADC_REG:
    case TYPE_ADCS_REG:
    case TYPE_ADR:
    case TYPE_BFM:
    case TYPE_REV:
    case TYPE_MVN_REG:
    case TYPE_SHIFT_IMM:
    case TYPE_SHIFT_REG:
    case TYPE_LOAD_BYTE:
    case TYPE_LOAD_4:
    case TYPE_STORE_4:
    case TYPE_FFARITHS:
    case TYPE_FADDS:
    case TYPE_FFARITHD:
    case TYPE_FADDD:
    case TYPE_FMOV:
    case TYPE_F_CVT:
    case TYPE_FCMPS:
    case TYPE_FCMPD:
    case TYPE_FCONSTS:
    case TYPE_FCONSTD:
    case TYPE_FMULS:
    case TYPE_FMACS:
    case TYPE_FMULD:
    case TYPE_FMACD:
    case TYPE_FDIVS:
    case TYPE_FDIVD:
    case TYPE_F_MRC:
    case TYPE_F_MRRC:
    case TYPE_F_FLAG:
    case TYPE_F_LOADS:
    case TYPE_F_STORES:
      return true;
    default:
      return false;
    }
}

/* Return true if and only if this insn can dual-issue as younger.  */
static bool
cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
{
  if (recog_memoized (insn) < 0)
    {
      if (verbose > 5)
        fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
      return false;
    }

  switch (get_attr_type (insn))
    {
    case TYPE_ALU_IMM:
    case TYPE_ALUS_IMM:
    case TYPE_LOGIC_IMM:
    case TYPE_LOGICS_IMM:
    case TYPE_EXTEND:
    case TYPE_MVN_IMM:
    case TYPE_MOV_IMM:
    case TYPE_MOV_REG:
    case TYPE_MOV_SHIFT:
    case TYPE_MOV_SHIFT_REG:
    case TYPE_BRANCH:
    case TYPE_CALL:
      return true;
    default:
      return false;
    }
}


/* Look for an instruction that can dual issue only as an older
   instruction, and move it in front of any instructions that can
   dual-issue as younger, while preserving the relative order of all
   other instructions in the ready list.  This is a hueuristic to help
   dual-issue in later cycles, by postponing issue of more flexible
   instructions.  This heuristic may affect dual issue opportunities
   in the current cycle.  */
static void
cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
			int *n_readyp, int clock)
{
  int i;
  int first_older_only = -1, first_younger = -1;

  if (verbose > 5)
    fprintf (file,
             ";; sched_reorder for cycle %d with %d insns in ready list\n",
             clock,
             *n_readyp);

  /* Traverse the ready list from the head (the instruction to issue
     first), and looking for the first instruction that can issue as
     younger and the first instruction that can dual-issue only as
     older.  */
  for (i = *n_readyp - 1; i >= 0; i--)
    {
      rtx_insn *insn = ready[i];
      if (cortexa7_older_only (insn))
        {
          first_older_only = i;
          if (verbose > 5)
            fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
          break;
        }
      else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
        first_younger = i;
    }

  /* Nothing to reorder because either no younger insn found or insn
     that can dual-issue only as older appears before any insn that
     can dual-issue as younger.  */
  if (first_younger == -1)
    {
      if (verbose > 5)
        fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
      return;
    }

  /* Nothing to reorder because no older-only insn in the ready list.  */
  if (first_older_only == -1)
    {
      if (verbose > 5)
        fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
      return;
    }

  /* Move first_older_only insn before first_younger.  */
  if (verbose > 5)
    fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
             INSN_UID(ready [first_older_only]),
             INSN_UID(ready [first_younger]));
  rtx_insn *first_older_only_insn = ready [first_older_only];
  for (i = first_older_only; i < first_younger; i++)
    {
      ready[i] = ready[i+1];
    }

  ready[i] = first_older_only_insn;
  return;
}

/* Implement TARGET_SCHED_REORDER. */
static int
arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
                   int clock)
{
  switch (arm_tune)
    {
    case TARGET_CPU_cortexa7:
      cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
      break;
    default:
      /* Do nothing for other cores.  */
      break;
    }

  return arm_issue_rate ();
}

/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
   It corrects the value of COST based on the relationship between
   INSN and DEP through the dependence LINK.  It returns the new
   value. There is a per-core adjust_cost hook to adjust scheduler costs
   and the per-core hook can choose to completely override the generic
   adjust_cost function. Only put bits of code into arm_adjust_cost that
   are common across all cores.  */
static int
arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
		 unsigned int)
{
  rtx i_pat, d_pat;

 /* When generating Thumb-1 code, we want to place flag-setting operations
    close to a conditional branch which depends on them, so that we can
    omit the comparison. */
  if (TARGET_THUMB1
      && dep_type == 0
      && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
      && recog_memoized (dep) >= 0
      && get_attr_conds (dep) == CONDS_SET)
    return 0;

  if (current_tune->sched_adjust_cost != NULL)
    {
      if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
	return cost;
    }

  /* XXX Is this strictly true?  */
  if (dep_type == REG_DEP_ANTI
      || dep_type == REG_DEP_OUTPUT)
    return 0;

  /* Call insns don't incur a stall, even if they follow a load.  */
  if (dep_type == 0
      && CALL_P (insn))
    return 1;

  if ((i_pat = single_set (insn)) != NULL
      && MEM_P (SET_SRC (i_pat))
      && (d_pat = single_set (dep)) != NULL
      && MEM_P (SET_DEST (d_pat)))
    {
      rtx src_mem = XEXP (SET_SRC (i_pat), 0);
      /* This is a load after a store, there is no conflict if the load reads
	 from a cached area.  Assume that loads from the stack, and from the
	 constant pool are cached, and that others will miss.  This is a
	 hack.  */

      if ((GET_CODE (src_mem) == SYMBOL_REF
	   && CONSTANT_POOL_ADDRESS_P (src_mem))
	  || reg_mentioned_p (stack_pointer_rtx, src_mem)
	  || reg_mentioned_p (frame_pointer_rtx, src_mem)
	  || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
	return 1;
    }

  return cost;
}

int
arm_max_conditional_execute (void)
{
  return max_insns_skipped;
}

static int
arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
{
  if (TARGET_32BIT)
    return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
  else
    return (optimize > 0) ? 2 : 0;
}

static int
arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
{
  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
}

/* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
   on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
   sequences of non-executed instructions in IT blocks probably take the same
   amount of time as executed instructions (and the IT instruction itself takes
   space in icache).  This function was experimentally determined to give good
   results on a popular embedded benchmark.  */

static int
arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
{
  return (TARGET_32BIT && speed_p) ? 1
         : arm_default_branch_cost (speed_p, predictable_p);
}

static int
arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
{
  return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
}

static bool fp_consts_inited = false;

static REAL_VALUE_TYPE value_fp0;

static void
init_fp_table (void)
{
  REAL_VALUE_TYPE r;

  r = REAL_VALUE_ATOF ("0", DFmode);
  value_fp0 = r;
  fp_consts_inited = true;
}

/* Return TRUE if rtx X is a valid immediate FP constant.  */
int
arm_const_double_rtx (rtx x)
{
  const REAL_VALUE_TYPE *r;

  if (!fp_consts_inited)
    init_fp_table ();

  r = CONST_DOUBLE_REAL_VALUE (x);
  if (REAL_VALUE_MINUS_ZERO (*r))
    return 0;

  if (real_equal (r, &value_fp0))
    return 1;

  return 0;
}

/* VFPv3 has a fairly wide range of representable immediates, formed from
   "quarter-precision" floating-point values. These can be evaluated using this
   formula (with ^ for exponentiation):

     -1^s * n * 2^-r

   Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
   16 <= n <= 31 and 0 <= r <= 7.

   These values are mapped onto an 8-bit integer ABCDEFGH s.t.

     - A (most-significant) is the sign bit.
     - BCD are the exponent (encoded as r XOR 3).
     - EFGH are the mantissa (encoded as n - 16).
*/

/* Return an integer index for a VFPv3 immediate operand X suitable for the
   fconst[sd] instruction, or -1 if X isn't suitable.  */
static int
vfp3_const_double_index (rtx x)
{
  REAL_VALUE_TYPE r, m;
  int sign, exponent;
  unsigned HOST_WIDE_INT mantissa, mant_hi;
  unsigned HOST_WIDE_INT mask;
  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
  bool fail;

  if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
    return -1;

  r = *CONST_DOUBLE_REAL_VALUE (x);

  /* We can't represent these things, so detect them first.  */
  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
    return -1;

  /* Extract sign, exponent and mantissa.  */
  sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
  r = real_value_abs (&r);
  exponent = REAL_EXP (&r);
  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
     highest (sign) bit, with a fixed binary point at bit point_pos.
     WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
     bits for the mantissa, this may fail (low bits would be lost).  */
  real_ldexp (&m, &r, point_pos - exponent);
  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
  mantissa = w.elt (0);
  mant_hi = w.elt (1);

  /* If there are bits set in the low part of the mantissa, we can't
     represent this value.  */
  if (mantissa != 0)
    return -1;

  /* Now make it so that mantissa contains the most-significant bits, and move
     the point_pos to indicate that the least-significant bits have been
     discarded.  */
  point_pos -= HOST_BITS_PER_WIDE_INT;
  mantissa = mant_hi;

  /* We can permit four significant bits of mantissa only, plus a high bit
     which is always 1.  */
  mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
  if ((mantissa & mask) != 0)
    return -1;

  /* Now we know the mantissa is in range, chop off the unneeded bits.  */
  mantissa >>= point_pos - 5;

  /* The mantissa may be zero. Disallow that case. (It's possible to load the
     floating-point immediate zero with Neon using an integer-zero load, but
     that case is handled elsewhere.)  */
  if (mantissa == 0)
    return -1;

  gcc_assert (mantissa >= 16 && mantissa <= 31);

  /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
     normalized significands are in the range [1, 2). (Our mantissa is shifted
     left 4 places at this point relative to normalized IEEE754 values).  GCC
     internally uses [0.5, 1) (see real.c), so the exponent returned from
     REAL_EXP must be altered.  */
  exponent = 5 - exponent;

  if (exponent < 0 || exponent > 7)
    return -1;

  /* Sign, mantissa and exponent are now in the correct form to plug into the
     formula described in the comment above.  */
  return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
}

/* Return TRUE if rtx X is a valid immediate VFPv3 constant.  */
int
vfp3_const_double_rtx (rtx x)
{
  if (!TARGET_VFP3)
    return 0;

  return vfp3_const_double_index (x) != -1;
}

/* Recognize immediates which can be used in various Neon instructions. Legal
   immediates are described by the following table (for VMVN variants, the
   bitwise inverse of the constant shown is recognized. In either case, VMOV
   is output and the correct instruction to use for a given constant is chosen
   by the assembler). The constant shown is replicated across all elements of
   the destination vector.

   insn elems variant constant (binary)
   ---- ----- ------- -----------------
   vmov  i32     0    00000000 00000000 00000000 abcdefgh
   vmov  i32     1    00000000 00000000 abcdefgh 00000000
   vmov  i32     2    00000000 abcdefgh 00000000 00000000
   vmov  i32     3    abcdefgh 00000000 00000000 00000000
   vmov  i16     4    00000000 abcdefgh
   vmov  i16     5    abcdefgh 00000000
   vmvn  i32     6    00000000 00000000 00000000 abcdefgh
   vmvn  i32     7    00000000 00000000 abcdefgh 00000000
   vmvn  i32     8    00000000 abcdefgh 00000000 00000000
   vmvn  i32     9    abcdefgh 00000000 00000000 00000000
   vmvn  i16    10    00000000 abcdefgh
   vmvn  i16    11    abcdefgh 00000000
   vmov  i32    12    00000000 00000000 abcdefgh 11111111
   vmvn  i32    13    00000000 00000000 abcdefgh 11111111
   vmov  i32    14    00000000 abcdefgh 11111111 11111111
   vmvn  i32    15    00000000 abcdefgh 11111111 11111111
   vmov   i8    16    abcdefgh
   vmov  i64    17    aaaaaaaa bbbbbbbb cccccccc dddddddd
                      eeeeeeee ffffffff gggggggg hhhhhhhh
   vmov  f32    18    aBbbbbbc defgh000 00000000 00000000
   vmov  f32    19    00000000 00000000 00000000 00000000

   For case 18, B = !b. Representable values are exactly those accepted by
   vfp3_const_double_index, but are output as floating-point numbers rather
   than indices.

   For case 19, we will change it to vmov.i32 when assembling.

   Variants 0-5 (inclusive) may also be used as immediates for the second
   operand of VORR/VBIC instructions.

   The INVERSE argument causes the bitwise inverse of the given operand to be
   recognized instead (used for recognizing legal immediates for the VAND/VORN
   pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
   *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
   output, rather than the real insns vbic/vorr).

   INVERSE makes no difference to the recognition of float vectors.

   The return value is the variant of immediate as shown in the above table, or
   -1 if the given value doesn't match any of the listed patterns.
*/
static int
neon_valid_immediate (rtx op, machine_mode mode, int inverse,
		      rtx *modconst, int *elementwidth)
{
#define CHECK(STRIDE, ELSIZE, CLASS, TEST)	\
  matches = 1;					\
  for (i = 0; i < idx; i += (STRIDE))		\
    if (!(TEST))				\
      matches = 0;				\
  if (matches)					\
    {						\
      immtype = (CLASS);			\
      elsize = (ELSIZE);			\
      break;					\
    }

  unsigned int i, elsize = 0, idx = 0, n_elts;
  unsigned int innersize;
  unsigned char bytes[16] = {};
  int immtype = -1, matches;
  unsigned int invmask = inverse ? 0xff : 0;
  bool vector = GET_CODE (op) == CONST_VECTOR;

  if (vector)
    n_elts = CONST_VECTOR_NUNITS (op);
  else
    {
      n_elts = 1;
      gcc_assert (mode != VOIDmode);
    }

  innersize = GET_MODE_UNIT_SIZE (mode);

  /* Vectors of float constants.  */
  if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
    {
      rtx el0 = CONST_VECTOR_ELT (op, 0);

      if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
        return -1;

      /* FP16 vectors cannot be represented.  */
      if (GET_MODE_INNER (mode) == HFmode)
	return -1;

      /* All elements in the vector must be the same.  Note that 0.0 and -0.0
	 are distinct in this context.  */
      if (!const_vec_duplicate_p (op))
	return -1;

      if (modconst)
        *modconst = CONST_VECTOR_ELT (op, 0);

      if (elementwidth)
        *elementwidth = 0;

      if (el0 == CONST0_RTX (GET_MODE (el0)))
	return 19;
      else
	return 18;
    }

  /* The tricks done in the code below apply for little-endian vector layout.
     For big-endian vectors only allow vectors of the form { a, a, a..., a }.
     FIXME: Implement logic for big-endian vectors.  */
  if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
    return -1;

  /* Splat vector constant out into a byte vector.  */
  for (i = 0; i < n_elts; i++)
    {
      rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
      unsigned HOST_WIDE_INT elpart;

      gcc_assert (CONST_INT_P (el));
      elpart = INTVAL (el);

      for (unsigned int byte = 0; byte < innersize; byte++)
	{
	  bytes[idx++] = (elpart & 0xff) ^ invmask;
	  elpart >>= BITS_PER_UNIT;
	}
    }

  /* Sanity check.  */
  gcc_assert (idx == GET_MODE_SIZE (mode));

  do
    {
      CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);

      CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
		       && bytes[i + 2] == 0 && bytes[i + 3] == 0);

      CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);

      CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
		       && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);

      CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);

      CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);

      CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);

      CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
		       && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);

      CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
		       && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);

      CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
		       && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);

      CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);

      CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);

      CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
			&& bytes[i + 2] == 0 && bytes[i + 3] == 0);

      CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
			&& bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);

      CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);

      CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
			&& bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);

      CHECK (1, 8, 16, bytes[i] == bytes[0]);

      CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
			&& bytes[i] == bytes[(i + 8) % idx]);
    }
  while (0);

  if (immtype == -1)
    return -1;

  if (elementwidth)
    *elementwidth = elsize;

  if (modconst)
    {
      unsigned HOST_WIDE_INT imm = 0;

      /* Un-invert bytes of recognized vector, if necessary.  */
      if (invmask != 0)
        for (i = 0; i < idx; i++)
          bytes[i] ^= invmask;

      if (immtype == 17)
        {
          /* FIXME: Broken on 32-bit H_W_I hosts.  */
          gcc_assert (sizeof (HOST_WIDE_INT) == 8);

          for (i = 0; i < 8; i++)
            imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
                   << (i * BITS_PER_UNIT);

          *modconst = GEN_INT (imm);
        }
      else
        {
          unsigned HOST_WIDE_INT imm = 0;

          for (i = 0; i < elsize / BITS_PER_UNIT; i++)
            imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);

          *modconst = GEN_INT (imm);
        }
    }

  return immtype;
#undef CHECK
}

/* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
   VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
   float elements), and a modified constant (whatever should be output for a
   VMOV) in *MODCONST.  */

int
neon_immediate_valid_for_move (rtx op, machine_mode mode,
			       rtx *modconst, int *elementwidth)
{
  rtx tmpconst;
  int tmpwidth;
  int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);

  if (retval == -1)
    return 0;

  if (modconst)
    *modconst = tmpconst;

  if (elementwidth)
    *elementwidth = tmpwidth;

  return 1;
}

/* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction.  If
   the immediate is valid, write a constant suitable for using as an operand
   to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
   *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE.  */

int
neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
				rtx *modconst, int *elementwidth)
{
  rtx tmpconst;
  int tmpwidth;
  int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);

  if (retval < 0 || retval > 5)
    return 0;

  if (modconst)
    *modconst = tmpconst;

  if (elementwidth)
    *elementwidth = tmpwidth;

  return 1;
}

/* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction.  If
   the immediate is valid, write a constant suitable for using as an operand
   to VSHR/VSHL to *MODCONST and the corresponding element width to
   *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
   because they have different limitations.  */

int
neon_immediate_valid_for_shift (rtx op, machine_mode mode,
				rtx *modconst, int *elementwidth,
				bool isleftshift)
{
  unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
  unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
  unsigned HOST_WIDE_INT last_elt = 0;
  unsigned HOST_WIDE_INT maxshift;

  /* Split vector constant out into a byte vector.  */
  for (i = 0; i < n_elts; i++)
    {
      rtx el = CONST_VECTOR_ELT (op, i);
      unsigned HOST_WIDE_INT elpart;

      if (CONST_INT_P (el))
        elpart = INTVAL (el);
      else if (CONST_DOUBLE_P (el))
        return 0;
      else
        gcc_unreachable ();

      if (i != 0 && elpart != last_elt)
        return 0;

      last_elt = elpart;
    }

  /* Shift less than element size.  */
  maxshift = innersize * 8;

  if (isleftshift)
    {
      /* Left shift immediate value can be from 0 to <size>-1.  */
      if (last_elt >= maxshift)
        return 0;
    }
  else
    {
      /* Right shift immediate value can be from 1 to <size>.  */
      if (last_elt == 0 || last_elt > maxshift)
	return 0;
    }

  if (elementwidth)
    *elementwidth = innersize * 8;

  if (modconst)
    *modconst = CONST_VECTOR_ELT (op, 0);

  return 1;
}

/* Return a string suitable for output of Neon immediate logic operation
   MNEM.  */

char *
neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
			     int inverse, int quad)
{
  int width, is_valid;
  static char templ[40];

  is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);

  gcc_assert (is_valid != 0);

  if (quad)
    sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
  else
    sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);

  return templ;
}

/* Return a string suitable for output of Neon immediate shift operation
   (VSHR or VSHL) MNEM.  */

char *
neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
			     machine_mode mode, int quad,
			     bool isleftshift)
{
  int width, is_valid;
  static char templ[40];

  is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
  gcc_assert (is_valid != 0);

  if (quad)
    sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
  else
    sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);

  return templ;
}

/* Output a sequence of pairwise operations to implement a reduction.
   NOTE: We do "too much work" here, because pairwise operations work on two
   registers-worth of operands in one go. Unfortunately we can't exploit those
   extra calculations to do the full operation in fewer steps, I don't think.
   Although all vector elements of the result but the first are ignored, we
   actually calculate the same result in each of the elements. An alternative
   such as initially loading a vector with zero to use as each of the second
   operands would use up an additional register and take an extra instruction,
   for no particular gain.  */

void
neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
		      rtx (*reduc) (rtx, rtx, rtx))
{
  unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
  rtx tmpsum = op1;

  for (i = parts / 2; i >= 1; i /= 2)
    {
      rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
      emit_insn (reduc (dest, tmpsum, tmpsum));
      tmpsum = dest;
    }
}

/* If VALS is a vector constant that can be loaded into a register
   using VDUP, generate instructions to do so and return an RTX to
   assign to the register.  Otherwise return NULL_RTX.  */

static rtx
neon_vdup_constant (rtx vals)
{
  machine_mode mode = GET_MODE (vals);
  machine_mode inner_mode = GET_MODE_INNER (mode);
  rtx x;

  if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
    return NULL_RTX;

  if (!const_vec_duplicate_p (vals, &x))
    /* The elements are not all the same.  We could handle repeating
       patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
       {0, C, 0, C, 0, C, 0, C} which can be loaded using
       vdup.i16).  */
    return NULL_RTX;

  /* We can load this constant by using VDUP and a constant in a
     single ARM register.  This will be cheaper than a vector
     load.  */

  x = copy_to_mode_reg (inner_mode, x);
  return gen_vec_duplicate (mode, x);
}

/* Generate code to load VALS, which is a PARALLEL containing only
   constants (for vec_init) or CONST_VECTOR, efficiently into a
   register.  Returns an RTX to copy into the register, or NULL_RTX
   for a PARALLEL that cannot be converted into a CONST_VECTOR.  */

rtx
neon_make_constant (rtx vals)
{
  machine_mode mode = GET_MODE (vals);
  rtx target;
  rtx const_vec = NULL_RTX;
  int n_elts = GET_MODE_NUNITS (mode);
  int n_const = 0;
  int i;

  if (GET_CODE (vals) == CONST_VECTOR)
    const_vec = vals;
  else if (GET_CODE (vals) == PARALLEL)
    {
      /* A CONST_VECTOR must contain only CONST_INTs and
	 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
	 Only store valid constants in a CONST_VECTOR.  */
      for (i = 0; i < n_elts; ++i)
	{
	  rtx x = XVECEXP (vals, 0, i);
	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
	    n_const++;
	}
      if (n_const == n_elts)
	const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
    }
  else
    gcc_unreachable ();

  if (const_vec != NULL
      && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
    /* Load using VMOV.  On Cortex-A8 this takes one cycle.  */
    return const_vec;
  else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
    /* Loaded using VDUP.  On Cortex-A8 the VDUP takes one NEON
       pipeline cycle; creating the constant takes one or two ARM
       pipeline cycles.  */
    return target;
  else if (const_vec != NULL_RTX)
    /* Load from constant pool.  On Cortex-A8 this takes two cycles
       (for either double or quad vectors).  We cannot take advantage
       of single-cycle VLD1 because we need a PC-relative addressing
       mode.  */
    return const_vec;
  else
    /* A PARALLEL containing something not valid inside CONST_VECTOR.
       We cannot construct an initializer.  */
    return NULL_RTX;
}

/* Initialize vector TARGET to VALS.  */

void
neon_expand_vector_init (rtx target, rtx vals)
{
  machine_mode mode = GET_MODE (target);
  machine_mode inner_mode = GET_MODE_INNER (mode);
  int n_elts = GET_MODE_NUNITS (mode);
  int n_var = 0, one_var = -1;
  bool all_same = true;
  rtx x, mem;
  int i;

  for (i = 0; i < n_elts; ++i)
    {
      x = XVECEXP (vals, 0, i);
      if (!CONSTANT_P (x))
	++n_var, one_var = i;

      if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
	all_same = false;
    }

  if (n_var == 0)
    {
      rtx constant = neon_make_constant (vals);
      if (constant != NULL_RTX)
	{
	  emit_move_insn (target, constant);
	  return;
	}
    }

  /* Splat a single non-constant element if we can.  */
  if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
    {
      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
      emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
      return;
    }

  /* One field is non-constant.  Load constant then overwrite varying
     field.  This is more efficient than using the stack.  */
  if (n_var == 1)
    {
      rtx copy = copy_rtx (vals);
      rtx merge_mask = GEN_INT (1 << one_var);

      /* Load constant part of vector, substitute neighboring value for
	 varying element.  */
      XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
      neon_expand_vector_init (target, copy);

      /* Insert variable.  */
      x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
      emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
      return;
    }

  /* Construct the vector in memory one field at a time
     and load the whole vector.  */
  mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
  for (i = 0; i < n_elts; i++)
    emit_move_insn (adjust_address_nv (mem, inner_mode,
				    i * GET_MODE_SIZE (inner_mode)),
		    XVECEXP (vals, 0, i));
  emit_move_insn (target, mem);
}

/* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive).  Raise
   ERR if it doesn't.  EXP indicates the source location, which includes the
   inlining history for intrinsics.  */

static void
bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
	      const_tree exp, const char *desc)
{
  HOST_WIDE_INT lane;

  gcc_assert (CONST_INT_P (operand));

  lane = INTVAL (operand);

  if (lane < low || lane >= high)
    {
      if (exp)
	error ("%K%s %wd out of range %wd - %wd",
	       exp, desc, lane, low, high - 1);
      else
	error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
    }
}

/* Bounds-check lanes.  */

void
neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
		  const_tree exp)
{
  bounds_check (operand, low, high, exp, "lane");
}

/* Bounds-check constants.  */

void
arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
{
  bounds_check (operand, low, high, NULL_TREE, "constant");
}

HOST_WIDE_INT
neon_element_bits (machine_mode mode)
{
  return GET_MODE_UNIT_BITSIZE (mode);
}


/* Predicates for `match_operand' and `match_operator'.  */

/* Return TRUE if OP is a valid coprocessor memory address pattern.
   WB is true if full writeback address modes are allowed and is false
   if limited writeback address modes (POST_INC and PRE_DEC) are
   allowed.  */

int
arm_coproc_mem_operand (rtx op, bool wb)
{
  rtx ind;

  /* Reject eliminable registers.  */
  if (! (reload_in_progress || reload_completed || lra_in_progress)
      && (   reg_mentioned_p (frame_pointer_rtx, op)
	  || reg_mentioned_p (arg_pointer_rtx, op)
	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
    return FALSE;

  /* Constants are converted into offsets from labels.  */
  if (!MEM_P (op))
    return FALSE;

  ind = XEXP (op, 0);

  if (reload_completed
      && (GET_CODE (ind) == LABEL_REF
	  || (GET_CODE (ind) == CONST
	      && GET_CODE (XEXP (ind, 0)) == PLUS
	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
    return TRUE;

  /* Match: (mem (reg)).  */
  if (REG_P (ind))
    return arm_address_register_rtx_p (ind, 0);

  /* Autoincremment addressing modes.  POST_INC and PRE_DEC are
     acceptable in any case (subject to verification by
     arm_address_register_rtx_p).  We need WB to be true to accept
     PRE_INC and POST_DEC.  */
  if (GET_CODE (ind) == POST_INC
      || GET_CODE (ind) == PRE_DEC
      || (wb
	  && (GET_CODE (ind) == PRE_INC
	      || GET_CODE (ind) == POST_DEC)))
    return arm_address_register_rtx_p (XEXP (ind, 0), 0);

  if (wb
      && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
      && arm_address_register_rtx_p (XEXP (ind, 0), 0)
      && GET_CODE (XEXP (ind, 1)) == PLUS
      && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
    ind = XEXP (ind, 1);

  /* Match:
     (plus (reg)
	   (const)).  */
  if (GET_CODE (ind) == PLUS
      && REG_P (XEXP (ind, 0))
      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
      && CONST_INT_P (XEXP (ind, 1))
      && INTVAL (XEXP (ind, 1)) > -1024
      && INTVAL (XEXP (ind, 1)) <  1024
      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
    return TRUE;

  return FALSE;
}

/* Return TRUE if OP is a memory operand which we can load or store a vector
   to/from. TYPE is one of the following values:
    0 - Vector load/stor (vldr)
    1 - Core registers (ldm)
    2 - Element/structure loads (vld1)
 */
int
neon_vector_mem_operand (rtx op, int type, bool strict)
{
  rtx ind;

  /* Reject eliminable registers.  */
  if (strict && ! (reload_in_progress || reload_completed)
      && (reg_mentioned_p (frame_pointer_rtx, op)
	  || reg_mentioned_p (arg_pointer_rtx, op)
	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
    return FALSE;

  /* Constants are converted into offsets from labels.  */
  if (!MEM_P (op))
    return FALSE;

  ind = XEXP (op, 0);

  if (reload_completed
      && (GET_CODE (ind) == LABEL_REF
	  || (GET_CODE (ind) == CONST
	      && GET_CODE (XEXP (ind, 0)) == PLUS
	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
    return TRUE;

  /* Match: (mem (reg)).  */
  if (REG_P (ind))
    return arm_address_register_rtx_p (ind, 0);

  /* Allow post-increment with Neon registers.  */
  if ((type != 1 && GET_CODE (ind) == POST_INC)
      || (type == 0 && GET_CODE (ind) == PRE_DEC))
    return arm_address_register_rtx_p (XEXP (ind, 0), 0);

  /* Allow post-increment by register for VLDn */
  if (type == 2 && GET_CODE (ind) == POST_MODIFY
      && GET_CODE (XEXP (ind, 1)) == PLUS
      && REG_P (XEXP (XEXP (ind, 1), 1)))
     return true;

  /* Match:
     (plus (reg)
          (const)).  */
  if (type == 0
      && GET_CODE (ind) == PLUS
      && REG_P (XEXP (ind, 0))
      && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
      && CONST_INT_P (XEXP (ind, 1))
      && INTVAL (XEXP (ind, 1)) > -1024
      /* For quad modes, we restrict the constant offset to be slightly less
	 than what the instruction format permits.  We have no such constraint
	 on double mode offsets.  (This must match arm_legitimate_index_p.)  */
      && (INTVAL (XEXP (ind, 1))
	  < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
      && (INTVAL (XEXP (ind, 1)) & 3) == 0)
    return TRUE;

  return FALSE;
}

/* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
   type.  */
int
neon_struct_mem_operand (rtx op)
{
  rtx ind;

  /* Reject eliminable registers.  */
  if (! (reload_in_progress || reload_completed)
      && (   reg_mentioned_p (frame_pointer_rtx, op)
	  || reg_mentioned_p (arg_pointer_rtx, op)
	  || reg_mentioned_p (virtual_incoming_args_rtx, op)
	  || reg_mentioned_p (virtual_outgoing_args_rtx, op)
	  || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
	  || reg_mentioned_p (virtual_stack_vars_rtx, op)))
    return FALSE;

  /* Constants are converted into offsets from labels.  */
  if (!MEM_P (op))
    return FALSE;

  ind = XEXP (op, 0);

  if (reload_completed
      && (GET_CODE (ind) == LABEL_REF
	  || (GET_CODE (ind) == CONST
	      && GET_CODE (XEXP (ind, 0)) == PLUS
	      && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
	      && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
    return TRUE;

  /* Match: (mem (reg)).  */
  if (REG_P (ind))
    return arm_address_register_rtx_p (ind, 0);

  /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db).  */
  if (GET_CODE (ind) == POST_INC
      || GET_CODE (ind) == PRE_DEC)
    return arm_address_register_rtx_p (XEXP (ind, 0), 0);

  return FALSE;
}

/* Prepares the operands for the VCMLA by lane instruction such that the right
   register number is selected.  This instruction is special in that it always
   requires a D register, however there is a choice to be made between Dn[0],
   Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.

   The VCMLA by lane function always selects two values. For instance given D0
   and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
   used by the instruction.  However given V4SF then index 0 and 1 are valid as
   D0[0] or D1[0] are both valid.

   This function centralizes that information based on OPERANDS, OPERANDS[3]
   will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
   updated to contain the right index.  */

rtx *
neon_vcmla_lane_prepare_operands (rtx *operands)
{
  int lane = INTVAL (operands[4]);
  machine_mode constmode = SImode;
  machine_mode mode = GET_MODE (operands[3]);
  int regno = REGNO (operands[3]);
  regno = ((regno - FIRST_VFP_REGNUM) >> 1);
  if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
    {
      operands[3] = gen_int_mode (regno + 1, constmode);
      operands[4]
	= gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
    }
  else
    {
      operands[3] = gen_int_mode (regno, constmode);
      operands[4] = gen_int_mode (lane, constmode);
    }
  return operands;
}


/* Return true if X is a register that will be eliminated later on.  */
int
arm_eliminable_register (rtx x)
{
  return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
		       || REGNO (x) == ARG_POINTER_REGNUM
		       || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
			   && REGNO (x) <= LAST_VIRTUAL_REGISTER));
}

/* Return GENERAL_REGS if a scratch register required to reload x to/from
   coprocessor registers.  Otherwise return NO_REGS.  */

enum reg_class
coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
{
  if (mode == HFmode)
    {
      if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
	return GENERAL_REGS;
      if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
	return NO_REGS;
      return GENERAL_REGS;
    }

  /* The neon move patterns handle all legitimate vector and struct
     addresses.  */
  if (TARGET_NEON
      && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
      && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
	  || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
	  || VALID_NEON_STRUCT_MODE (mode)))
    return NO_REGS;

  if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
    return NO_REGS;

  return GENERAL_REGS;
}

/* Values which must be returned in the most-significant end of the return
   register.  */

static bool
arm_return_in_msb (const_tree valtype)
{
  return (TARGET_AAPCS_BASED
          && BYTES_BIG_ENDIAN
	  && (AGGREGATE_TYPE_P (valtype)
	      || TREE_CODE (valtype) == COMPLEX_TYPE
	      || FIXED_POINT_TYPE_P (valtype)));
}

/* Return TRUE if X references a SYMBOL_REF.  */
int
symbol_mentioned_p (rtx x)
{
  const char * fmt;
  int i;

  if (GET_CODE (x) == SYMBOL_REF)
    return 1;

  /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
     are constant offsets, not symbols.  */
  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
    return 0;

  fmt = GET_RTX_FORMAT (GET_CODE (x));

  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
    {
      if (fmt[i] == 'E')
	{
	  int j;

	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
	    if (symbol_mentioned_p (XVECEXP (x, i, j)))
	      return 1;
	}
      else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
	return 1;
    }

  return 0;
}

/* Return TRUE if X references a LABEL_REF.  */
int
label_mentioned_p (rtx x)
{
  const char * fmt;
  int i;

  if (GET_CODE (x) == LABEL_REF)
    return 1;

  /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
     instruction, but they are constant offsets, not symbols.  */
  if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
    return 0;

  fmt = GET_RTX_FORMAT (GET_CODE (x));
  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
    {
      if (fmt[i] == 'E')
	{
	  int j;

	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
	    if (label_mentioned_p (XVECEXP (x, i, j)))
	      return 1;
	}
      else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
	return 1;
    }

  return 0;
}

int
tls_mentioned_p (rtx x)
{
  switch (GET_CODE (x))
    {
    case CONST:
      return tls_mentioned_p (XEXP (x, 0));

    case UNSPEC:
      if (XINT (x, 1) == UNSPEC_TLS)
	return 1;

    /* Fall through.  */
    default:
      return 0;
    }
}

/* Must not copy any rtx that uses a pc-relative address.
   Also, disallow copying of load-exclusive instructions that
   may appear after splitting of compare-and-swap-style operations
   so as to prevent those loops from being transformed away from their
   canonical forms (see PR 69904).  */

static bool
arm_cannot_copy_insn_p (rtx_insn *insn)
{
  /* The tls call insn cannot be copied, as it is paired with a data
     word.  */
  if (recog_memoized (insn) == CODE_FOR_tlscall)
    return true;

  subrtx_iterator::array_type array;
  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
    {
      const_rtx x = *iter;
      if (GET_CODE (x) == UNSPEC
	  && (XINT (x, 1) == UNSPEC_PIC_BASE
	      || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
	return true;
    }

  rtx set = single_set (insn);
  if (set)
    {
      rtx src = SET_SRC (set);
      if (GET_CODE (src) == ZERO_EXTEND)
	src = XEXP (src, 0);

      /* Catch the load-exclusive and load-acquire operations.  */
      if (GET_CODE (src) == UNSPEC_VOLATILE
	  && (XINT (src, 1) == VUNSPEC_LL
	      || XINT (src, 1) == VUNSPEC_LAX))
	return true;
    }
  return false;
}

enum rtx_code
minmax_code (rtx x)
{
  enum rtx_code code = GET_CODE (x);

  switch (code)
    {
    case SMAX:
      return GE;
    case SMIN:
      return LE;
    case UMIN:
      return LEU;
    case UMAX:
      return GEU;
    default:
      gcc_unreachable ();
    }
}

/* Match pair of min/max operators that can be implemented via usat/ssat.  */

bool
arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
			int *mask, bool *signed_sat)
{
  /* The high bound must be a power of two minus one.  */
  int log = exact_log2 (INTVAL (hi_bound) + 1);
  if (log == -1)
    return false;

  /* The low bound is either zero (for usat) or one less than the
     negation of the high bound (for ssat).  */
  if (INTVAL (lo_bound) == 0)
    {
      if (mask)
        *mask = log;
      if (signed_sat)
        *signed_sat = false;

      return true;
    }

  if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
    {
      if (mask)
        *mask = log + 1;
      if (signed_sat)
        *signed_sat = true;

      return true;
    }

  return false;
}

/* Return 1 if memory locations are adjacent.  */
int
adjacent_mem_locations (rtx a, rtx b)
{
  /* We don't guarantee to preserve the order of these memory refs.  */
  if (volatile_refs_p (a) || volatile_refs_p (b))
    return 0;

  if ((REG_P (XEXP (a, 0))
       || (GET_CODE (XEXP (a, 0)) == PLUS
	   && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
      && (REG_P (XEXP (b, 0))
	  || (GET_CODE (XEXP (b, 0)) == PLUS
	      && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
    {
      HOST_WIDE_INT val0 = 0, val1 = 0;
      rtx reg0, reg1;
      int val_diff;

      if (GET_CODE (XEXP (a, 0)) == PLUS)
        {
	  reg0 = XEXP (XEXP (a, 0), 0);
	  val0 = INTVAL (XEXP (XEXP (a, 0), 1));
        }
      else
	reg0 = XEXP (a, 0);

      if (GET_CODE (XEXP (b, 0)) == PLUS)
        {
	  reg1 = XEXP (XEXP (b, 0), 0);
	  val1 = INTVAL (XEXP (XEXP (b, 0), 1));
        }
      else
	reg1 = XEXP (b, 0);

      /* Don't accept any offset that will require multiple
	 instructions to handle, since this would cause the
	 arith_adjacentmem pattern to output an overlong sequence.  */
      if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
	return 0;

      /* Don't allow an eliminable register: register elimination can make
	 the offset too large.  */
      if (arm_eliminable_register (reg0))
	return 0;

      val_diff = val1 - val0;

      if (arm_ld_sched)
	{
	  /* If the target has load delay slots, then there's no benefit
	     to using an ldm instruction unless the offset is zero and
	     we are optimizing for size.  */
	  return (optimize_size && (REGNO (reg0) == REGNO (reg1))
		  && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
		  && (val_diff == 4 || val_diff == -4));
	}

      return ((REGNO (reg0) == REGNO (reg1))
	      && (val_diff == 4 || val_diff == -4));
    }

  return 0;
}

/* Return true if OP is a valid load or store multiple operation.  LOAD is true
   for load operations, false for store operations.  CONSECUTIVE is true
   if the register numbers in the operation must be consecutive in the register
   bank. RETURN_PC is true if value is to be loaded in PC.
   The pattern we are trying to match for load is:
     [(SET (R_d0) (MEM (PLUS (addr) (offset))))
      (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
       :
       :
      (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
     ]
     where
     1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
     2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
     3.  If consecutive is TRUE, then for kth register being loaded,
         REGNO (R_dk) = REGNO (R_d0) + k.
   The pattern for store is similar.  */
bool
ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
                     bool consecutive, bool return_pc)
{
  HOST_WIDE_INT count = XVECLEN (op, 0);
  rtx reg, mem, addr;
  unsigned regno;
  unsigned first_regno;
  HOST_WIDE_INT i = 1, base = 0, offset = 0;
  rtx elt;
  bool addr_reg_in_reglist = false;
  bool update = false;
  int reg_increment;
  int offset_adj;
  int regs_per_val;

  /* If not in SImode, then registers must be consecutive
     (e.g., VLDM instructions for DFmode).  */
  gcc_assert ((mode == SImode) || consecutive);
  /* Setting return_pc for stores is illegal.  */
  gcc_assert (!return_pc || load);

  /* Set up the increments and the regs per val based on the mode.  */
  reg_increment = GET_MODE_SIZE (mode);
  regs_per_val = reg_increment / 4;
  offset_adj = return_pc ? 1 : 0;

  if (count <= 1
      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
      || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
    return false;

  /* Check if this is a write-back.  */
  elt = XVECEXP (op, 0, offset_adj);
  if (GET_CODE (SET_SRC (elt)) == PLUS)
    {
      i++;
      base = 1;
      update = true;

      /* The offset adjustment must be the number of registers being
         popped times the size of a single register.  */
      if (!REG_P (SET_DEST (elt))
          || !REG_P (XEXP (SET_SRC (elt), 0))
          || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
             ((count - 1 - offset_adj) * reg_increment))
        return false;
    }

  i = i + offset_adj;
  base = base + offset_adj;
  /* Perform a quick check so we don't blow up below. If only one reg is loaded,
     success depends on the type: VLDM can do just one reg,
     LDM must do at least two.  */
  if ((count <= i) && (mode == SImode))
      return false;

  elt = XVECEXP (op, 0, i - 1);
  if (GET_CODE (elt) != SET)
    return false;

  if (load)
    {
      reg = SET_DEST (elt);
      mem = SET_SRC (elt);
    }
  else
    {
      reg = SET_SRC (elt);
      mem = SET_DEST (elt);
    }

  if (!REG_P (reg) || !MEM_P (mem))
    return false;

  regno = REGNO (reg);
  first_regno = regno;
  addr = XEXP (mem, 0);
  if (GET_CODE (addr) == PLUS)
    {