view gcc/config/csky/csky.c @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
line wrap: on
line source

/* GCC backend functions for C-SKY targets.
   Copyright (C) 2018-2020 Free Software Foundation, Inc.
   Contributed by C-SKY Microsystems and Mentor Graphics.

   This file is part of GCC.

   GCC is free software; you can redistribute it and/or modify it
   under the terms of the GNU General Public License as published
   by the Free Software Foundation; either version 3, or (at your
   option) any later version.

   GCC is distributed in the hope that it will be useful, but WITHOUT
   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
   License for more details.

   You should have received a copy of the GNU General Public License
   along with GCC; see the file COPYING3.  If not see
   <http://www.gnu.org/licenses/>.  */

#define IN_TARGET_CODE 1

#include "config.h"
#include "system.h"
#include "coretypes.h"
#include "memmodel.h"
#include "backend.h"
#include "target.h"
#include "rtl.h"
#include "tree.h"
#include "cfghooks.h"
#include "df.h"
#include "tm_p.h"
#include "stringpool.h"
#include "attribs.h"
#include "optabs.h"
#include "regs.h"
#include "emit-rtl.h"
#include "recog.h"
#include "cgraph.h"
#include "c-family/c-common.h"
#include "cpplib.h"
#include "diagnostic-core.h"
#include "alias.h"
#include "fold-const.h"
#include "stor-layout.h"
#include "calls.h"
#include "varasm.h"
#include "output.h"
#include "insn-attr.h"
#include "flags.h"
#include "reload.h"
#include "explow.h"
#include "expr.h"
#include "cfgrtl.h"
#include "sched-int.h"
#include "common/common-target.h"
#include "langhooks.h"
#include "intl.h"
#include "libfuncs.h"
#include "opts.h"
#include "dumpfile.h"
#include "target-globals.h"
#include "builtins.h"
#include "tm-constrs.h"
#include "rtl-iter.h"
#include "pass_manager.h"
#include "tree-pass.h"
#include "context.h"

/* This file should be included last.  */
#include "target-def.h"

/* Stack and register size macros.  */

#define CSKY_NUM_WORDS(SIZE) \
  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
#define CSKY_NUM_REGS(MODE) \
  CSKY_NUM_WORDS (GET_MODE_SIZE (MODE))
#define CSKY_STACK_ALIGN(SIZE) \
  (CSKY_NUM_WORDS (SIZE) * UNITS_PER_WORD)

/* Offsets and range macros.  */

#define CSKY_LD16_MAX_OFFSET(MODE)		\
  (31 * GET_MODE_SIZE (MODE))
#define CSKY_LD32_MAX_OFFSET(MODE) \
  (4095 * GET_MODE_SIZE (MODE))
#define CSKY_LD16_OFFSET_MASK(MODE) \
  (CSKY_LD16_MAX_OFFSET (MODE) + GET_MODE_SIZE (MODE) - 1)

#define CSKY_ADDI16_MAX_IMM	      256
#define CSKY_SUBI16_MAX_IMM	      256

#define CSKY_CONSTPOOL_LABEL_PREFIX   "LCP"

/* Array of the smallest class containing reg number REGNO, indexed by
   REGNO.  Used by REGNO_REG_CLASS.  */
enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
{
  /* Registers r0-r7.  */
  MINI_REGS,	 MINI_REGS,	MINI_REGS,     MINI_REGS,
  MINI_REGS,	 MINI_REGS,	MINI_REGS,     MINI_REGS,
  /* Registers r8-r15.  */
  LOW_REGS,	 LOW_REGS,	LOW_REGS,      LOW_REGS,
  LOW_REGS,	 LOW_REGS,	SP_REGS,       LOW_REGS,
  /* Registers r16-r31.  */
  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
  /* Reserved.  */
  RESERVE_REGS,
  /* CC,HI,LO registers.  */
  C_REGS,      HI_REGS,	     LO_REGS,
  /* Reserved.  */
  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
  /* Vec registers.  */
  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
  /* Reserved.  */
  RESERVE_REGS, RESERVE_REGS,
  /* Register epc.  */
  OTHER_REGS
};

/* Arrays that map GCC register numbers to debugger register numbers,
   '-1' means that is INVALID_REGNUM.
   TODO: which rules according to here ?  */
const int csky_dbx_regno[FIRST_PSEUDO_REGISTER] =
{
  0,  1,  2,  3,  4,  5,  6,  7,
  8,  9,  10, 11, 12, 13, 14, 15,
  16, 17, 18, 19, 20, 21, 22, 23,
  24, 25, 26, 27, 28, 29, 30, 31,
  -1, -1, 36, 37, -1, -1, -1, -1,
  -1, -1, -1, -1, -1, -1, -1, -1,
  -1, -1, -1, -1, 56, 57, 58, 59,
  60, 61, 62, 63, 64, 65, 66, 67,
  68, 69, 70, 71, -1, -1, 72
};

/* Table of machine attributes.  */
static tree csky_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
static tree csky_handle_isr_attribute (tree *, tree, tree, int, bool *);
static const struct attribute_spec csky_attribute_table[] =
{
  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
       affects_type_identity, handler, exclude } */
  { "naked",	 0, 0, true,  false, false, false, csky_handle_fndecl_attribute, NULL },
  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
  { "interrupt", 0, 1, false, false, false, false, csky_handle_isr_attribute,	 NULL },
  { "isr",	 0, 1, false, false, false, false, csky_handle_isr_attribute,	 NULL },
  { NULL,	 0, 0, false, false, false, false, NULL,			 NULL }
};

/* A C structure for machine-specific, per-function data.
   This is added to the cfun structure.  */
typedef struct GTY(()) machine_function
{
  /* Records if LR has to be saved for far jumps.  */
  int far_jump_used;
  /* Records the type of the current function.  */
  unsigned long func_type;
  /* Record if the function has a variable argument list.  */
  int uses_anonymous_args;

  /* Stack frame layout information.  If frame_init_p is true,
     these fields have been initialized and don't need to be
     recomputed.  */
  unsigned int reg_mask;	/* non-volatile reg saves */
  int arg_size;			/* stdarg spills (bytes) */
  int reg_size;			/* non-volatile reg saves (bytes) */
  int local_size;		/* locals */
  int outbound_size;		/* arg overflow on calls out */
  int frame_size;		/* total static size of stack frame */
  int local_offset;
  int reg_offset;
  int arg_offset;
  int frame_init_p;

} machine_function;

/* These macros are for the func_type values above.  */
#define CSKY_FT_TYPE_MASK   ((1 << 3) - 1)
#define CSKY_FT_UNKNOWN	    0		    /* Type not been determined */
#define CSKY_FT_NORMAL	    1		    /* Normal function */
#define CSKY_FT_ISR	    4		    /* Interrupt service routine */
#define CSKY_FT_FIQ	    5		    /* Fast interrupt service routine */
#define CSKY_FT_EXCEPTION   6		    /* Exception handler */
#define CSKY_FT_INTERRUPT   (1 << 2)	    /* overlap CSKY_FT_ISR */
#define CSKY_FT_NAKED	    (1 << 3)	    /* No prologue and epilogue */
#define CSKY_FUNCTION_TYPE(t)	      ((t) & CSKY_FT_TYPE_MASK)
#define CSKY_FUNCTION_IS_INTERRUPT(t) ((t) & CSKY_FT_INTERRUPT)
#define CSKY_FUNCTION_IS_NAKED(t)     ((t) & CSKY_FT_NAKED)

struct csky_processors
{
  const char *const name;
  enum csky_processor_type core;
  const char *arch;
  enum csky_base_architecture base_arch;
  enum csky_isa_feature isa_bits[CSKY_ISA_FEATURE_GET (max)];
};

static struct csky_processors all_cores[] =
{
#undef CSKY_CORE
#define CSKY_CORE(NAME, CORE, X, ARCH, ISA)  \
  {NAME, TARGET_CPU_##CORE, #ARCH, CSKY_BASE_ARCH_##ARCH, \
  {ISA CSKY_ISA_FEATURE_GET (none)}},
#include "csky_cores.def"
#undef CSKY_CORE
  {NULL, TARGET_CPU_csky_none, NULL, CSKY_BASE_ARCH_NONE, \
  {CSKY_ISA_FEATURE_GET (none)}}
};

static struct csky_processors all_architectures[] =
{
#undef CSKY_ARCH
#define CSKY_ARCH(NAME, CORE, ARCH, ISA)     \
  {NAME, TARGET_CPU_##CORE, #ARCH, CSKY_BASE_ARCH_##ARCH,  \
  {ISA CSKY_ISA_FEATURE_GET (none)}},
#include "csky_cores.def"
#undef CSKY_ARCH
  {NULL, TARGET_CPU_csky_none, NULL, CSKY_BASE_ARCH_NONE, \
  {CSKY_ISA_FEATURE_GET (none)}}
};

struct csky_fpu_desc
{
  const char *name;
  enum csky_isa_feature isa_bits[CSKY_ISA_FEATURE_GET (max)];
};

static const struct csky_fpu_desc all_fpus[] =
{
#undef CSKY_FPU
#define CSKY_FPU(NAME, CNAME, ISA) \
  {NAME, {ISA CSKY_ISA_FEATURE_GET (none)}},
#include "csky_cores.def"
#undef CSKY_FPU
};

/* Active target architecture.  */
struct csky_build_target
{
  /* Name of the target CPU, if known, or NULL if the target CPU was not
     specified by the user (and inferred from the -march option).  */
  const char *core_name;
  /* Name of the target ARCH.  NULL if there is a selected CPU.  */
  const char *arch_name;
  /* Preprocessor substring (never NULL).  */
  const char *arch_pp_name;
  /* CPU identifier for the core we're compiling for (architecturally).  */
  enum csky_processor_type arch_core;
  /* The base architecture value.  */
  enum csky_base_architecture base_arch;
  /* Bitmap encapsulating the isa_bits for the target environment.  */
  sbitmap isa;
};

struct csky_build_target csky_active_target;

/* The following are used in the .md file as equivalents to bits.  */
int csky_arch_isa_features[CSKY_ISA_FEATURE_GET (max)] = {0};

/* The highest CSKY architecture version supported by the target.  */
enum csky_base_architecture csky_base_arch = CSKY_TARGET_ARCH_GET (NONE);

/* Forward definitions of types.  */
typedef struct minipool_node	Mnode;
typedef struct minipool_fixup	Mfix;

static GTY(()) int tls_labelno;


/* Maximum constant offset that can be added/subtracted from SP in a
   single instruction.  For ck801, this is for addsp/subsp, otherwise
   it is the range of addi/subi.  */
#define CSKY_MAX_SP_ADJUST \
  (CSKY_TARGET_ARCH (CK801) ? 508 : 4096)


/* Implement TARGET_CPU_CPP_BUILTINS.  */

#define builtin_define(MACRO) cpp_define (pfile, MACRO)

void
csky_cpu_cpp_builtins (cpp_reader *pfile)
{
  const char *arch_name = csky_active_target.arch_pp_name;
  char *pp_name = (char *) alloca (1 + strlen (arch_name) + 4);
  sprintf (pp_name, "__%s__", arch_name);
  builtin_define (pp_name);

  builtin_define ("__csky__=2");
  builtin_define ("__CSKY__=2");
  builtin_define ("__ckcore__=2");
  builtin_define ("__CKCORE__=2");

  builtin_define ("__CSKYABIV2__");
  builtin_define ("__cskyabiv2__");
  builtin_define ("__CSKYABI__=2");
  builtin_define ("__cskyabi__=2");

  if (TARGET_BIG_ENDIAN)
    {
      builtin_define ("__ckcoreBE__");
      builtin_define ("__cskyBE__");
      builtin_define ("__cskybe__");
      builtin_define ("__CSKYBE__");
    }
  else
    {
      builtin_define ("__ckcoreLE__");
      builtin_define ("__cskyLE__");
      builtin_define ("__cskyle__");
      builtin_define ("__CSKYLE__");
    }

  if (TARGET_HARD_FLOAT)
    {
      builtin_define ("__csky_hard_float__");
      builtin_define ("__CSKY_HARD_FLOAT__");
    }
  else
    {
      builtin_define ("__csky_soft_float__");
      builtin_define ("__CSKY_SOFT_FLOAT__");
    }

  if (CSKY_ISA_FEATURE (fpv2_sf))
    {
      builtin_define ("__csky_fpuv2__");
      builtin_define ("__CSKY_FPUV2__");
    }

  if (TARGET_ELRW)
    {
      builtin_define ("__csky_elrw__");
      builtin_define ("__CSKY_ELRW__");
    }
  if (TARGET_ISTACK)
    {
      builtin_define ("__csky_istack__");
      builtin_define ("__CSKY_ISTACK__");
    }
  if (TARGET_MP)
    {
      builtin_define ("__csky_mp__");
      builtin_define ("__CSKY_MP__");
    }
  if (TARGET_CP)
    {
      builtin_define ("__csky_cp__");
      builtin_define ("__CSKY_CP__");
    }
  if (TARGET_CACHE)
    {
      builtin_define ("__csky_cache__");
      builtin_define ("__CSKY_CACHE__");
    }
  if (TARGET_SECURITY)
    {
      builtin_define ("__csky_security__");
      builtin_define ("__CSKY_SECURITY__");
    }
  if (TARGET_TRUST)
    {
      builtin_define ("__csky_trust__");
      builtin_define ("__CSKY_TRUST__");
    }
  if (TARGET_DSP)
    {
      builtin_define ("__csky_dsp__");
      builtin_define ("__CSKY_DSP__");
    }
  if (TARGET_EDSP)
    {
      builtin_define ("__csky_edsp__");
      builtin_define ("__CSKY_EDSP__");
    }
  if (TARGET_VDSP)
    {
      builtin_define ("__csky_vdsp__");
      builtin_define ("__CSKY_VDSP__");
    }
}


/******************************************************************
 *			   Storage Layout			  *
 ******************************************************************/


#undef	TARGET_PROMOTE_FUNCTION_MODE
#define TARGET_PROMOTE_FUNCTION_MODE \
  default_promote_function_mode_always_promote

#undef TARGET_CONSTANT_ALIGNMENT
#define TARGET_CONSTANT_ALIGNMENT csky_constant_alignment


/******************************************************************
 *		Stack Layout and Calling Conventions		  *
 ******************************************************************/

#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE csky_can_eliminate

#undef	TARGET_FUNCTION_ARG
#define TARGET_FUNCTION_ARG csky_function_arg

#undef	TARGET_FUNCTION_ARG_ADVANCE
#define TARGET_FUNCTION_ARG_ADVANCE csky_function_arg_advance

#undef	TARGET_FUNCTION_VALUE
#define TARGET_FUNCTION_VALUE csky_function_value

#undef	TARGET_LIBCALL_VALUE
#define TARGET_LIBCALL_VALUE csky_libcall_value

#undef	TARGET_FUNCTION_VALUE_REGNO_P
#define TARGET_FUNCTION_VALUE_REGNO_P csky_function_value_regno_p

#undef	TARGET_SPLIT_COMPLEX_ARG
#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true

#undef	TARGET_PROMOTE_PROTOTYPES
#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true

#undef	TARGET_MUST_PASS_IN_STACK
#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size

#undef	TARGET_ARG_PARTIAL_BYTES
#define TARGET_ARG_PARTIAL_BYTES csky_arg_partial_bytes

#undef	TARGET_PASS_BY_REFERENCE
#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack

#undef	TARGET_ASM_OUTPUT_MI_THUNK
#define TARGET_ASM_OUTPUT_MI_THUNK csky_output_mi_thunk

#undef	TARGET_ASM_CAN_OUTPUT_MI_THUNK
#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
  hook_bool_const_tree_hwi_hwi_const_tree_true

#undef	TARGET_ASM_FUNCTION_PROLOGUE
#define TARGET_ASM_FUNCTION_PROLOGUE csky_output_function_prologue

#undef	TARGET_ASM_FUNCTION_EPILOGUE
#define TARGET_ASM_FUNCTION_EPILOGUE csky_output_function_epilogue

#undef	TARGET_WARN_FUNC_RETURN
#define TARGET_WARN_FUNC_RETURN csky_warn_func_return

#undef	TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY csky_return_in_memory


/******************************************************************
 *		  Implementing the Varargs Macros		  *
 ******************************************************************/


#undef	TARGET_SETUP_INCOMING_VARARGS
#define TARGET_SETUP_INCOMING_VARARGS csky_setup_incoming_varargs


/******************************************************************
 *		 Implicit Calls to Library Routines		  *
 ******************************************************************/


#undef TARGET_INIT_LIBFUNCS
#define TARGET_INIT_LIBFUNCS csky_init_libfuncs


/******************************************************************
 *    Dividing the Output into Sections (Texts, Data, . . . )	  *
 ******************************************************************/


#undef TARGET_HAVE_TLS
#define TARGET_HAVE_TLS TARGET_CSKY_LINUX


/******************************************************************
 *	   Defining target-specific uses of __attribute__	  *
 ******************************************************************/


#undef TARGET_ATTRIBUTE_TABLE
#define TARGET_ATTRIBUTE_TABLE csky_attribute_table

#undef	TARGET_OPTION_OVERRIDE
#define TARGET_OPTION_OVERRIDE csky_option_override


/* Implement the BRANCH_COST target macro.  */

int
csky_default_branch_cost (bool speed_p ATTRIBUTE_UNUSED,
			  bool predictable_p ATTRIBUTE_UNUSED)
{
  return csky_branch_cost;
}

bool
csky_default_logical_op_non_short_circuit (void)
{
  return BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2;
}

/******************************************************************
 *			   Register Usage			  *
 ******************************************************************/

#undef TARGET_HARD_REGNO_NREGS
#define TARGET_HARD_REGNO_NREGS csky_hard_regno_nregs

#undef TARGET_HARD_REGNO_MODE_OK
#define TARGET_HARD_REGNO_MODE_OK csky_hard_regno_mode_ok

#undef TARGET_MODES_TIEABLE_P
#define TARGET_MODES_TIEABLE_P csky_modes_tieable_p

#undef TARGET_CAN_CHANGE_MODE_CLASS
#define TARGET_CAN_CHANGE_MODE_CLASS csky_can_change_mode_class

#undef	TARGET_CONDITIONAL_REGISTER_USAGE
#define TARGET_CONDITIONAL_REGISTER_USAGE csky_conditional_register_usage

#undef TARGET_CLASS_LIKELY_SPILLED_P
#define TARGET_CLASS_LIKELY_SPILLED_P csky_class_likely_spilled_p

#undef TARGET_PREFERRED_RELOAD_CLASS
#define TARGET_PREFERRED_RELOAD_CLASS csky_preferred_reload_class

#undef TARGET_CLASS_MAX_NREGS
#define TARGET_CLASS_MAX_NREGS csky_class_max_nregs

#undef	TARGET_SECONDARY_RELOAD
#define TARGET_SECONDARY_RELOAD	 csky_secondary_reload

#undef TARGET_SPILL_CLASS
#define TARGET_SPILL_CLASS csky_spill_class


/******************************************************************
 *			  Addressing Modes			  *
 ******************************************************************/


#undef TARGET_CANNOT_FORCE_CONST_MEM
#define TARGET_CANNOT_FORCE_CONST_MEM csky_cannot_force_const_mem

#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P csky_legitimate_constant_p

#undef TARGET_LEGITIMIZE_ADDRESS
#define TARGET_LEGITIMIZE_ADDRESS csky_legitimize_address

#undef TARGET_LEGITIMATE_ADDRESS_P
#define TARGET_LEGITIMATE_ADDRESS_P csky_legitimate_address_p


/******************************************************************
 *			       Others				  *
 ******************************************************************/


#undef	TARGET_CANNOT_COPY_INSN_P
#define TARGET_CANNOT_COPY_INSN_P csky_cannot_copy_insn_p


/******************************************************************
 *			Assembler Format			  *
 ******************************************************************/


#undef TARGET_PRINT_OPERAND
#define TARGET_PRINT_OPERAND csky_print_operand

#undef TARGET_PRINT_OPERAND_ADDRESS
#define TARGET_PRINT_OPERAND_ADDRESS csky_print_operand_address

#undef	TARGET_ASM_UNALIGNED_HI_OP
#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"

#undef	TARGET_ASM_UNALIGNED_SI_OP
#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"

#undef	TARGET_DWARF_REGISTER_SPAN
#define TARGET_DWARF_REGISTER_SPAN csky_dwarf_register_span


/******************************************************************
 *		      Miscellaneous Parameters			  *
 ******************************************************************/


#undef	TARGET_MACHINE_DEPENDENT_REORG
#define TARGET_MACHINE_DEPENDENT_REORG csky_reorg

#undef	TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS csky_allocate_stack_slots_for_args

#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed


/******************************************************************
 *		  Trampolines for Nested Functions		  *
 ******************************************************************/


#undef	TARGET_ASM_TRAMPOLINE_TEMPLATE
#define TARGET_ASM_TRAMPOLINE_TEMPLATE	csky_asm_trampoline_template
#undef	TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT		csky_trampoline_init

/* The low bit is ignored by jsr and jmp instructions so is safe to use.  */
#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1

/******************************************************************
 *	      Describing Relative Costs of Operations		  *
 ******************************************************************/


#undef	TARGET_REGISTER_MOVE_COST
#define TARGET_REGISTER_MOVE_COST csky_register_move_cost

#undef	TARGET_MEMORY_MOVE_COST
#define TARGET_MEMORY_MOVE_COST	  csky_memory_move_cost

#undef	TARGET_RTX_COSTS
#define TARGET_RTX_COSTS	  csky_rtx_costs

#undef	TARGET_ADDRESS_COST
#define TARGET_ADDRESS_COST	  csky_address_cost


/******************************************************************
 *			  Anchor address			  *
 ******************************************************************/


/* FIXME: the max offset is related to mode size, the following is
   defined according to SImode. How to deal with HImode and
   QImode, and should the min offset be defined?  */
#undef	TARGET_MAX_ANCHOR_OFFSET
#define TARGET_MAX_ANCHOR_OFFSET \
  ((TARGET_MINI_REGISTERS && optimize_size) ? 127 : 4095)


/******************************************************************
 *		       Condition Code Status			  *
 ******************************************************************/


#undef	TARGET_FIXED_CONDITION_CODE_REGS
#define TARGET_FIXED_CONDITION_CODE_REGS csky_fixed_condition_code_regs


/******************************************************************
 *	     Adjusting the Instruction Scheduler		  *
 ******************************************************************/


#undef	TARGET_SCHED_ISSUE_RATE
#define TARGET_SCHED_ISSUE_RATE csky_sched_issue_rate

#undef	TARGET_SCHED_ADJUST_COST
#define	 TARGET_SCHED_ADJUST_COST csky_sched_adjust_cost


/* The declaration of functions.  */
static void push_csky_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
				    machine_mode, rtx);
static void csky_print_operand (FILE *stream, rtx x, int code);


/* Define a table to map ISR attribute arguments onto function type
   modifiers.  */

typedef struct
{
  const char *const arg;
  const unsigned long return_value;
} isr_attribute_entry;

static const isr_attribute_entry isr_attribute_map[] =
{
  {"irq", CSKY_FT_ISR },
  {"IRQ", CSKY_FT_ISR },
  {"fiq", CSKY_FT_FIQ },
  {"FIQ", CSKY_FT_FIQ },
  {NULL, CSKY_FT_NORMAL }
};


/* Return the function type of the current function, if it has not been
   determined, return CSKY_FT_UNKNOWN.  */

static unsigned long
get_csky_isr_type (tree argument)
{
  const isr_attribute_entry *ptr;
  const char *arg;

  /* if argument is NULL, set default value ISR.  */
  if (argument == NULL_TREE)
    return CSKY_FT_ISR;

  if (TREE_VALUE (argument) == NULL_TREE
     || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
    return CSKY_FT_UNKNOWN;

  arg = TREE_STRING_POINTER (TREE_VALUE (argument));

  for (ptr = isr_attribute_map; ptr->arg != NULL; ptr++)
    if (strcmp (arg, ptr->arg) == 0)
      return ptr->return_value;

  return CSKY_FT_UNKNOWN;
}

/* Classify cfun as a normal function or some sort of interrupt
   handler, and set the corresponding bits in cfun->machine->func_type.  */

static unsigned long
get_csky_current_func_type (void)
{
  if (CSKY_FUNCTION_TYPE (cfun->machine->func_type) == CSKY_FT_UNKNOWN)
    {
      unsigned long type = CSKY_FT_UNKNOWN;
      tree a;
      tree attr;

      gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);

      attr = DECL_ATTRIBUTES (current_function_decl);
      a = lookup_attribute ("naked", attr);
      if (a != NULL_TREE)
	type |= CSKY_FT_NAKED;
      a = lookup_attribute ("isr", attr);
      if (a == NULL_TREE)
	a = lookup_attribute ("interrupt", attr);
      if (a == NULL_TREE)
	type |= CSKY_FT_NORMAL;
      else
	type |= get_csky_isr_type (TREE_VALUE (a));

      cfun->machine->func_type = type;
    }

  return cfun->machine->func_type;
}

/* These typedefs are located at the start of this file, so that
   they can be used in the prototypes there.  This comment is to
   remind readers of that fact so that the following structures
   can be understood more easily.

     typedef struct minipool_node    Mnode;
     typedef struct minipool_fixup   Mfix;  */

struct minipool_node
{
  /* Doubly linked chain of entries.  */
  Mnode *next;
  Mnode *prev;
  /* The maximum offset into the code that this entry can be placed.  While
     pushing fixes for forward references, all entries are sorted in order
     of increasing max_address.  */
  HOST_WIDE_INT max_address;
  /* Similarly for an entry inserted for a backwards ref.  */
  HOST_WIDE_INT min_address;
  /* The number of fixes referencing this entry.  This can become zero
     if we "unpush" an entry.  In this case we ignore the entry when we
     come to emit the code.  */
  int refcount;
  /* The offset from the start of the minipool.  */
  HOST_WIDE_INT offset;
  /* The value in table.  */
  rtx value;
  /* The mode of value.  */
  machine_mode mode;
  /* The size of the value.  */
  int fix_size;
};

struct minipool_fixup
{
  Mfix *next;
  rtx_insn *insn;
  HOST_WIDE_INT address;
  rtx *loc;
  machine_mode mode;
  int fix_size;
  rtx value;
  Mnode *minipool;
  HOST_WIDE_INT forwards;
  HOST_WIDE_INT backwards;
};

static Mnode *minipool_vector_head;
static Mnode *minipool_vector_tail;
static rtx  minipool_vector_label;
static HOST_WIDE_INT constpool_label_no = 0;

/* Obstack for minipool constant handling.  */
static struct obstack minipool_obstack;
static char *minipool_startobj;
/* The linked list of all minipool fixes required for this function.  */
Mfix *minipool_fix_head;
Mfix *minipool_fix_tail;
/* The fix entry for the current minipool, once it has been placed.  */
Mfix *minipool_barrier;

/* Allow GC scanning of the minipool obstack.  */
static void
csky_add_gc_roots (void)
{
  gcc_obstack_init (&minipool_obstack);
  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
}

/* Implement TARGET_CONSTANT_ALIGNMENT.
   Make strings word-aligned so strcpy from constants will be faster.  */
static HOST_WIDE_INT
csky_constant_alignment (const_tree exp, HOST_WIDE_INT align)
{
  if (TREE_CODE (exp) == STRING_CST
      && !optimize_size
      && align < BITS_PER_WORD)
    return BITS_PER_WORD;
  return align;
}

/* Record that there is a natural barrier in the insn stream at
   ADDRESS.  */

static void
push_csky_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
{
  Mfix *fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (*fix));

  fix->insn = insn;
  fix->address = address;

  fix->next = NULL;
  if (minipool_fix_head != NULL)
    minipool_fix_tail->next = fix;
  else
    minipool_fix_head = fix;

  minipool_fix_tail = fix;
}

/* Compute the size of a vector jump table.  */

static HOST_WIDE_INT
get_csky_jump_table_size (rtx insn)
{
  /* ADDR_VECs only take room if read-only data does into the text
     section.  */
  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
    {
      rtx body = PATTERN (insn);
      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
      HOST_WIDE_INT size;
      HOST_WIDE_INT modesize;

      modesize = GET_MODE_SIZE (GET_MODE (body));
      size = modesize * XVECLEN (body, elt);
      switch (modesize)
	{
	case 1:
	  /* Round up size  of TBB table to a halfword boundary.  */
	  size = (size + 1) & ~(HOST_WIDE_INT)1;
	  break;
	case 2:
	  /* No padding necessary for TBH.  */
	  break;
	case 4:
	  break;
	default:
	  gcc_unreachable ();
	}
      return size;
    }

  return 0;
}


/* Scan INSN and note any of its operands that need fixing.
   If DO_PUSHES is false we do not actually push any of the fixups
   needed.  The function returns TRUE if any fixups were needed/pushed.  */

static bool
note_csky_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address,
			     int do_pushes)
{
  bool result = false;
  int opno;

  extract_constrain_insn (insn);

  if (recog_data.n_alternatives == 0)
    return false;

  /* Fill in recog_op_alt with information about the constraints of
     this insn.  */
  preprocess_constraints (insn);

  const operand_alternative *op_alt = which_op_alt ();
  for (opno = 0; opno < recog_data.n_operands; opno++)
    {
      /* Things we need to fix can only occur in inputs.  */
      if (recog_data.operand_type[opno] != OP_IN)
	continue;

      /* If this alternative is a memory reference, then any mention
	 of constants in this alternative is really to fool reload
	 into allowing us to accept one there.  We need to fix them up
	 now so that we output the right code.  */
      if (op_alt[opno].memory_ok)
	{
	  rtx op = recog_data.operand[opno];

	  if (CONSTANT_P (op))
	    {
	      if (do_pushes)
		push_csky_minipool_fix (insn, address,
					recog_data.operand_loc[opno],
					recog_data.operand_mode[opno], op);
	      result = true;
	    }
	}
    }

  return result;
}


/* Add a constant to the minipool for a forward reference.  Returns the
   node added or NULL if the constant will not fit in this pool.  */

static Mnode *
add_csky_minipool_forward_ref (Mfix *fix)
{
  /* If set, max_mp is the first pool_entry that has a lower
     constraint than the one we are trying to add.  */
  Mnode *max_mp = NULL;
  HOST_WIDE_INT max_address = fix->address + fix->forwards;
  Mnode *mp;

  /* If the minipool starts before the end of FIX->INSN then this FIX
     cannot be placed into the current pool.  Furthermore, adding the
     new constant pool entry may cause the pool to start FIX_SIZE bytes
     earlier.  */
  if (minipool_vector_head
      && (fix->address + get_attr_length (fix->insn)
	  >= minipool_vector_head->max_address - fix->fix_size))
    return NULL;

  /* Scan the pool to see if a constant with the same value has
     already been added.  While we are doing this, also note the
     location where we must insert the constant if it doesn't already
     exist.  */
  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
    {
      if (GET_CODE (fix->value) == GET_CODE (mp->value)
	  && fix->mode == mp->mode
	  && (GET_CODE (fix->value) != CODE_LABEL
	      || (CODE_LABEL_NUMBER (fix->value)
		  == CODE_LABEL_NUMBER (mp->value)))
	  && rtx_equal_p (fix->value, mp->value))
	{
	  /* More than one fix references this entry.  */
	  mp->refcount++;
	  return mp;
	}

      /* Note the insertion point if necessary.  */
      if (max_mp == NULL && mp->max_address > max_address)
	max_mp = mp;
    }

  /* The value is not currently in the minipool, so we need to create
     a new entry for it.  If MAX_MP is NULL, the entry will be put on
     the end of the list since the placement is less constrained than
     any existing entry.  Otherwise, we insert the new fix before
     MAX_MP and, if necessary, adjust the constraints on the other
     entries.  */
  mp = XNEW (Mnode);
  mp->fix_size = fix->fix_size;
  mp->mode = fix->mode;
  mp->value = fix->value;
  mp->refcount = 1;
  /* Not yet required for a backwards ref.  */
  mp->min_address = -65536;

  if (max_mp == NULL)
    {
      mp->max_address = max_address;
      mp->next = NULL;
      mp->prev = minipool_vector_tail;

      if (mp->prev == NULL)
	{
	  minipool_vector_head = mp;
	  minipool_vector_label
	    = gen_csky_constpool_label (gen_rtx_CONST_INT (VOIDmode,
							   constpool_label_no++));
	}
      else
	mp->prev->next = mp;

      minipool_vector_tail = mp;
    }
  else
    {
      if (max_address > max_mp->max_address - mp->fix_size)
	mp->max_address = max_mp->max_address - mp->fix_size;
      else
	mp->max_address = max_address;

      mp->next = max_mp;
      mp->prev = max_mp->prev;
      max_mp->prev = mp;
      if (mp->prev != NULL)
	mp->prev->next = mp;
      else
	minipool_vector_head = mp;
    }

  /* Save the new entry.  */
  max_mp = mp;

  /* Scan over the preceding entries and adjust their addresses as
     required.  */
  while (mp->prev != NULL
	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
    {
      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
      mp = mp->prev;
    }

  return max_mp;
}


/* Return the cost of forcibly inserting a barrier after INSN.  */

static int
get_csky_barrier_cost (rtx_insn *insn)
{
  /* Basing the location of the pool on the loop depth is preferable,
     but at the moment, the basic block information seems to be
     corrupt by this stage of the compilation.  */
  int base_cost = 50;
  rtx next = next_nonnote_insn (insn);

  if (next != NULL && GET_CODE (next) == CODE_LABEL)
    base_cost -= 20;

  switch (GET_CODE (insn))
    {
    case CODE_LABEL:
      /* It will always be better to place the table before the label, rather
     than after it.  */
      return 50;

    case INSN:
    case CALL_INSN:
      return base_cost;

    case JUMP_INSN:
      return base_cost - 10;

    default:
      return base_cost + 10;
    }
}


/* Find the best place in the insn stream in the range
   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
   Create the barrier by inserting a jump and add a new fix entry for
   it.  */
static Mfix *
create_csky_fix_barrier (Mfix *fix, Mfix *fix_next,
			 HOST_WIDE_INT max_address)
{
  rtx_barrier *barrier;
  rtx_insn *from = (fix ? fix->insn : get_insns ());
  /* The instruction after which we will insert the jump.  */
  rtx_insn *selected = NULL;
  int selected_cost;
  /* The address at which the jump instruction will be placed.  */
  HOST_WIDE_INT selected_address = 0;
  Mfix *new_fix;
  HOST_WIDE_INT count = (fix ? fix->address : 0);
  HOST_WIDE_INT max_count = max_address;
  rtx_code_label *label = gen_label_rtx ();

  selected_cost = get_csky_barrier_cost (from);

  while (from && count < max_count)
    {
      int new_cost;
      rtx_jump_table_data *table;

      /* Count the length of this insn.  */
      count += get_attr_length (from);

      /* If there is a jump table, add its length.  */
      if (tablejump_p (from, NULL, &table))
	{
	  count += get_csky_jump_table_size (table);

	  /* Jump tables aren't in a basic block, so base the cost on
	     the dispatch insn.  If we select this location, we will
	     still put the pool after the table.  */
	  new_cost = get_csky_barrier_cost (from);

	  if (count < max_count
	      && (!selected || new_cost <= selected_cost))
	    {
	      selected = table;
	      selected_cost = new_cost;
	      selected_address = count;
	    }

	  /* Continue after the dispatch table.  */
	  from = NEXT_INSN (table);
	  continue;
	}

      new_cost = get_csky_barrier_cost (from);

      if (count < max_count
	  && (!selected || new_cost <= selected_cost))
	{
	  selected = from;
	  selected_cost = new_cost;
	  selected_address = count;
	}

      from = NEXT_INSN (from);
    }

  /* Make sure that we found a place to insert the jump.  */
  gcc_assert (selected);

  /* Create a new JUMP_INSN that branches around a barrier.  */
  from = emit_jump_insn_after (gen_jump (label), selected);
  JUMP_LABEL (from) = label;
  barrier = emit_barrier_after (from);
  emit_label_after (label, barrier);

  /* Create a minipool barrier entry for the new barrier.  */
  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
  new_fix->insn = barrier;
  new_fix->address = selected_address;
  if (fix)
    {
      new_fix->next = fix->next;
      fix->next = new_fix;
    }
  else
    new_fix->next = fix_next;

  return new_fix;
}


/* Print a symbolic form of the constant X to the dump file F.
   This is used for dump output for -mconstpool in the target-dependent
   reorg pass.  */

static void
print_csky_value (FILE *f, rtx x)
{
  switch (GET_CODE (x))
    {
    case CONST_INT:
      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
      return;

    case CONST_DOUBLE:
      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
      return;

    case CONST_VECTOR:
      {
	int i;

	fprintf (f, "<");
	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
	  {
	    fprintf (f, HOST_WIDE_INT_PRINT_HEX,
		     INTVAL (CONST_VECTOR_ELT (x, i)));
	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
	      fputc (',', f);
	  }
	fprintf (f, ">");
      }
      return;

    case CONST_STRING:
      fprintf (f, "\"%s\"", XSTR (x, 0));
      return;

    case SYMBOL_REF:
      fprintf (f, "`%s'", XSTR (x, 0));
      return;

    case LABEL_REF:
      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
      return;

    case CONST:
      print_csky_value (f, XEXP (x, 0));
      return;

    case PLUS:
      print_csky_value (f, XEXP (x, 0));
      fprintf (f, "+");
      print_csky_value (f, XEXP (x, 1));
      return;

    case PC:
      fprintf (f, "pc");
      return;

    default:
      fprintf (f, "????");
      return;
    }
}


/* Record INSN, which will need fixing up to load a value from the
   minipool.  ADDRESS is the offset of the insn since the start of the
   function; LOC is a pointer to the part of the insn which requires
   fixing; VALUE is the constant that must be loaded, which is of type
   MODE.  */

static void
push_csky_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
			machine_mode mode, rtx value)
{
  #define CSKY_ELRW16_RANGE  1400
  #define CSKY_LRW16_RANGE   700
  #define CSKY_CONSTANT_POOL_RANGE (TARGET_ELRW ? CSKY_ELRW16_RANGE \
						: CSKY_LRW16_RANGE)

  /* Fixes less than a word need padding out to a word boundary.  */
  #define CSKY_MINIPOOL_FIX_SIZE(mode) \
    (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)

  Mfix *fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (*fix));

  fix->insn = insn;
  fix->address = address;
  fix->loc = loc;
  fix->mode = mode;
  fix->fix_size = CSKY_MINIPOOL_FIX_SIZE (mode);
  fix->value = value;
  fix->forwards = CSKY_CONSTANT_POOL_RANGE;
  fix->backwards = 0;
  fix->minipool = NULL;

  /* If an insn doesn't have a range defined for it, then it isn't
     expecting to be reworked by this code.  Better to stop now than
     to generate duff assembly code.  */
  gcc_assert (fix->forwards || fix->backwards);

  if (dump_file)
    {
      fprintf (dump_file,
	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
	       GET_MODE_NAME (mode),
	       INSN_UID (insn), (unsigned long) address,
	       -1 * (long)fix->backwards, (long)fix->forwards);
      print_csky_value (dump_file, fix->value);
      fprintf (dump_file, "\n");
    }

  /* Add it to the chain of fixes.  */
  fix->next = NULL;

  if (minipool_fix_head != NULL)
    minipool_fix_tail->next = fix;
  else
    minipool_fix_head = fix;

  minipool_fix_tail = fix;
}


/* Fill in the offsets for minipool entries.  */

static void
assign_csky_minipool_offsets (Mfix *barrier)
{
  HOST_WIDE_INT offset = 0;
  Mnode *mp;

  minipool_barrier = barrier;

  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
    {
      mp->offset = offset;

      if (mp->refcount > 0)
	offset += mp->fix_size;
    }
}


/* Output the literal table.  */

static HOST_WIDE_INT
dump_csky_minipool (rtx_insn *scan)
{
  Mnode *mp;
  Mnode *nmp;
  HOST_WIDE_INT pool_length = 0;

  if (dump_file)
    fprintf (dump_file,
	     ";; Emitting minipool after insn %u;\
	      address %ld; align %d (bytes)\n",
	     INSN_UID (scan), (unsigned long) minipool_barrier->address, 4);

  scan = emit_insn_after (gen_align_4 (), scan);
  scan = emit_insn_after (minipool_vector_label, scan);

  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
    {
      if (mp->refcount > 0)
	{
	  if (dump_file)
	    {
	      fprintf (dump_file, ";;  Offset %u, min %ld, max %ld ",
		       (unsigned) mp->offset, (unsigned long) mp->min_address,
		       (unsigned long) mp->max_address);
	      print_csky_value (dump_file, mp->value);
	      fputc ('\n', dump_file);
	    }

	  switch (mp->fix_size)
	    {
	    case 4:
	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
	      pool_length += 4;
	      break;
	    case 8:
	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
	      pool_length += 8;
	      break;
	    default:
	      gcc_unreachable ();
	    }
	}

      nmp = mp->next;
      free (mp);
    }

  minipool_vector_head = minipool_vector_tail = NULL;
  scan = emit_barrier_after (scan);

  return pool_length;
}

/* Return true if INSN is a minipool load or instruction that will be
   converted to one.  It is assumed that INSN has type attribute "load".  */

bool
csky_minipool_load_p (rtx_insn *insn)
{
  rtx op1, addr;

  extract_insn_cached (insn);

  op1 = recog_data.operand[1];

  /* This is a constant that has not yet been turned into
     a minipool load.  */
  if (CONSTANT_P (op1))
    return true;

  /* Constant pool loads are label_refs.  */
  if (GET_CODE (op1) == ZERO_EXTEND || GET_CODE (op1) == SIGN_EXTEND)
    op1 = XEXP (op1, 0);
  if (GET_CODE (op1) != MEM)
    return false;
  addr = XEXP (op1, 0);
  if (GET_CODE (addr) == PLUS && CONST_INT_P (XEXP (addr, 1)))
    addr = XEXP (addr, 0);
  return GET_CODE (addr) == LABEL_REF;
}


/* Compute the attribute "length" of push or pop insn, according to
   the registers it uses.  */

int
csky_compute_pushpop_length (rtx *operands)
{
  rtx parallel_op = operands[2];
  /* Initialize to elements number of PARALLEL.  */
  unsigned indx = XVECLEN (parallel_op, 0) - 1;
  unsigned first_indx = 0;
  unsigned regno = REGNO (operands[1]);

  if (regno > CSKY_LR_REGNUM)
    return 4;

  /* Check each register in the list.  */
  for (; indx > first_indx; indx--)
    {
      regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
      /* If a register number higher than 15 is included, a 32-bit insn
	 is used.  */
      if (regno > CSKY_LR_REGNUM)
	return 4;
    }

  return 2;
}

/* Emit constant pools for -mconstpool.  */
static void
csky_emit_constant_pools (void)
{
    rtx_insn *insn;
    HOST_WIDE_INT address = 0;
    Mfix *fix;

    minipool_fix_head = minipool_fix_tail = NULL;

    /* The first insn must always be a note, or the code below won't
       scan it properly.  */
    insn = get_insns ();
    gcc_assert (NOTE_P (insn));

    /* Scan the insns and record the operands that need fixing.  */
    for (insn = next_nonnote_insn (insn); insn;
	 insn = next_nonnote_insn (insn))
      {
	if (BARRIER_P (insn))
	  push_csky_minipool_barrier (insn, address);
	else if (INSN_P (insn))
	  {
	    rtx_jump_table_data *table;

	    note_csky_invalid_constants (insn, address, true);
	    address += get_attr_length (insn);

	    /* If the insn is a vector jump, add the size of the table
	     and skip the table.  */
	    if (tablejump_p (insn, NULL, &table))
	      {
		address += get_csky_jump_table_size (table);
		insn = table;
	      }
	  }
      }

    fix = minipool_fix_head;

    /* Now scan the fixups and perform the required changes.  */
    while (fix)
      {
	Mfix *ftmp;
	Mfix *last_added_fix;
	Mfix *last_barrier = NULL;
	Mfix *this_fix;
	Mnode *mp;
	bool has_pending_const = false;

	/* Check if there is any pending constant not processed.  */
	for (mp = minipool_vector_head; mp; mp = mp->next)
	  if (mp->refcount > 0)
	    {
	      has_pending_const = true;
	      break;
	    }

	/* If no pending constant, skip over barrier insns.  */
	if (has_pending_const == false)
	  {
	    while (fix && BARRIER_P (fix->insn))
	      fix = fix->next;
	    if (fix == NULL)
	      break;
	  }

	last_added_fix = NULL;

	for (ftmp = fix; ftmp; ftmp = ftmp->next)
	  {
	    if (BARRIER_P (ftmp->insn))
	      {
		if (minipool_vector_head
		    && ftmp->address >= minipool_vector_head->max_address)
		  break;

		last_barrier = ftmp;
	      }
	    else
	      {
		ftmp->minipool = add_csky_minipool_forward_ref (ftmp);
		if (ftmp->minipool == NULL)
		  break;
	      }
	    last_added_fix = ftmp;  /* Keep track of the last fix added.  */
	  }

	/* If the last added fix is a barrier, dump minipool after it.  */
	if (last_added_fix && BARRIER_P (last_added_fix->insn))
	  ftmp = last_barrier;
	else
	  {
	    /* ftmp is first fix that we can't fit into this pool.
	       Insert a new barrier in the code somewhere between the previous
	       fix and this one, and arrange to jump around it.  */
	    HOST_WIDE_INT max_address;

	    /* The last item on the list of fixes must be a barrier, so
	       we can never run off the end of the list of fixes without
	       last_barrier being set.  */
	    gcc_assert (ftmp);

	    /* Check that there isn't another fix that is in range that
	       we couldn't fit into this pool because the pool was
	       already too large: we need to put the pool before such an
	       instruction.  The pool itself may come just after the
	       fix because create_csky_fix_barrier also allows space for a
	       jump instruction.  */
	    max_address = minipool_vector_head->max_address;
	    if (ftmp->address < max_address)
	      max_address = ftmp->address + 1;
	    last_barrier = create_csky_fix_barrier (last_added_fix, ftmp,
						    max_address);
	  }

	assign_csky_minipool_offsets (last_barrier);

	/* Scan over the fixes we have identified for this pool, fixing them
	   up and adding the constants to the pool itself.  */
	for (this_fix = fix; this_fix && ftmp != this_fix;
	     this_fix = this_fix->next)
	  {
	    if (GET_CODE (this_fix->insn) != BARRIER)
	      {
		rtx addr
		  = plus_constant (Pmode,
				   gen_rtx_LABEL_REF (VOIDmode,
						      minipool_vector_label),
				   this_fix->minipool->offset);
		rtx insn_body = PATTERN (this_fix->insn);
		rtx src = XEXP (insn_body, 1);
		*this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
		if (GET_CODE (this_fix->value) == SYMBOL_REF)
		  emit_insn_after (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
							    gen_rtvec (1, src),
							    VUNSPEC_SYMBOL_REF),
				   this_fix->insn);
	      }
	  }
	dump_csky_minipool (last_barrier->insn);
	fix = ftmp;
	if (fix->next == NULL)
	  break;
      }

    /* Free the minipool memory.  */
    obstack_free (&minipool_obstack, minipool_startobj);
}


/* Implement TARGET_MACHINE_DEPENDENT_REORG.  This handles
   -mconstpool output.  */

static void
csky_reorg (void)
{
  if (TARGET_CONSTANT_POOL)
    csky_emit_constant_pools ();
}


/* Check to see if the current function contains a branch insn with the
   far jump attribute set.  Such a function uses the LR register.  */

static bool
csky_far_jump_used_p (void)
{
  rtx_insn *insn;
  if (cfun->machine->far_jump_used)
    return true;

  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
    if (GET_CODE (insn) == JUMP_INSN
	/* Ignore tablejump patterns.  */
	&& GET_CODE (PATTERN (insn)) != ADDR_VEC
	&& GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
	&& get_attr_far_jump (insn) == FAR_JUMP_YES)
      {
	cfun->machine->far_jump_used = 1;
	return true;
      }
  return false;
}


/* Return the mask of registers used by the current function.  Set
   COUNT to the number of registers used.  */

static unsigned int
get_csky_live_regs (int *count)
{
  int reg;
  unsigned int live_regs_mask = 0;

  *count = 0;
  for (reg = 0; reg < CSKY_NGPR_REGS; reg++)
    {
      bool save = false;

      /* Ignore unsupported registers.  */
      if (CSKY_TARGET_ARCH (CK801) && reg > 8 && reg < 13)
	continue;
      if ((CSKY_TARGET_ARCH (CK801)
	   || CSKY_TARGET_ARCH (CK802)
	   || CSKY_TARGET_ARCH (CK803))
	  && reg > 15)
	break;

      /* Caller-saved registers marked as used.  */
      if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
	save = true;

      /* Frame pointer marked used.  */
      else if (frame_pointer_needed && reg == FRAME_POINTER_REGNUM)
	save = true;

      /* This is required for CK801/802 where FP is a fixed reg, otherwise
	 we end up with no FP value available to the DWARF-2 unwinder.  */
      else if (crtl->calls_eh_return && reg == FRAME_POINTER_REGNUM)
	save = true;

      /* CK801/802 also need special handling for LR because it's clobbered
	 by far jumps.  */
      else if ((CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
	       && reg == CSKY_LR_REGNUM
	       && (!crtl->is_leaf || csky_far_jump_used_p ()))
	save = true;

      /* Register is used for EH data return.  */
      else if (crtl->calls_eh_return
	       && reg >= CSKY_FIRST_EH_RETDATA_REGNUM
	       && reg <= CSKY_LAST_EH_RETDATA_REGNUM)
	save = true;

      /* We need a temporary reg to hold the offset for adjusting the SP
	 for a large stack frame.  */
      if (reg == CSKY_STACKADJUST_REGNUM
	  && cfun->machine->reg_offset > CSKY_MAX_SP_ADJUST * 2)
	save = true;

      /* Add reg to the mask.  */
      if (save)
	{
	  (*count)++;
	  live_regs_mask |= (1 << reg);
	}
    }
  return live_regs_mask;
}

/* Compute the stack frame layout, storing sizes of the various pieces
   in cfun->machine.

   Stack frames constructed in the prologue look like:
			... caller's frame ...
	incoming SP ->	caller's outbound argument overflow
			argument spill
	optional FP ->	register save
			local variables
			alloca() space
	adjusted SP ->	outbound argument overflow

   with SP/FP pointing at the base (low address) of the respective area,
   and each area aligned to a word boundary.  */

static void
csky_layout_stack_frame (void)
{
  machine_function *infp = cfun->machine;
  int reg_count;

  if (infp->frame_init_p)
    return;

  /* Get sizes of local variables & outbound arguments.  */
  infp->outbound_size = CSKY_STACK_ALIGN (crtl->outgoing_args_size);
  infp->local_offset = infp->outbound_size;
  infp->local_size = CSKY_STACK_ALIGN (get_frame_size ());
  infp->reg_offset = infp->local_offset + infp->local_size;

  /* Now compute size of argument spill + saved regs.  These do not
     need explicit alignment since they are already word-sized.  */
  infp->reg_mask = get_csky_live_regs (&reg_count);
  infp->reg_size = reg_count * UNITS_PER_WORD;
  infp->arg_offset = infp->reg_offset + infp->reg_size;
  infp->arg_size = crtl->args.pretend_args_size;
  infp->frame_size = infp->arg_offset + infp->arg_size;
  infp->frame_init_p = reload_completed;
}

/* Implement TARGET_CAN_ELIMINATE.  */
static bool
csky_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
{
  if (to == STACK_POINTER_REGNUM)
    return !frame_pointer_needed;
  return true;
}

/* Worker function for INITIAL_ELIMINATION_OFFSET macro.
   Define the offset between two registers, one to be eliminated, and
   the other its replacement, at the start of a routine.  */

HOST_WIDE_INT
csky_initial_elimination_offset (int from, int to)
{
  int offset;

  csky_layout_stack_frame ();

  /* Set OFFSET to the offset to the initial stack pointer.  */
  switch (from)
    {
    case FRAME_POINTER_REGNUM:
      offset = cfun->machine->reg_offset;
      break;

    case ARG_POINTER_REGNUM:
      offset = cfun->machine->arg_offset;
      break;

    default:
      gcc_unreachable ();
    }

  /* If we are asked for the offset to the frame pointer instead,
     then subtract the difference between the frame pointer and stack
     pointer.  */
  if (to == FRAME_POINTER_REGNUM)
    offset -= cfun->machine->reg_offset;
  return offset;
}


/* Determine where to put an argument to a function.
   Value is zero to push the argument on the stack,
   or a hard register in which to store the argument.

   CUM is a variable of type CUMULATIVE_ARGS which gives info about
    the preceding args and about the function being called.
   ARG is a description of the argument.  */
static rtx
csky_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
{
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);

  if (*pcum < CSKY_NPARM_REGS)
    return gen_rtx_REG (arg.mode, CSKY_FIRST_PARM_REGNUM + *pcum);

  return NULL_RTX;
}


/* Return the number of registers (words) needed to pass an argument of
   MODE and TYPE.  */

static int
csky_num_arg_regs (machine_mode mode, const_tree type)
{
  int size;

  if (type && mode == BLKmode)
    size = int_size_in_bytes (type);
  else
    size = GET_MODE_SIZE (mode);

  return CSKY_NUM_WORDS (size);
}


/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */

static void
csky_function_arg_advance (cumulative_args_t pcum_v,
			   const function_arg_info &arg)
{
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
  int param_size = csky_num_arg_regs (arg.mode, arg.type);

  if (*pcum + param_size > CSKY_NPARM_REGS)
    *pcum = CSKY_NPARM_REGS;
  else
    *pcum += param_size;
}


/* Implement TARGET_FUNCTION_VALUE.  */
static rtx
csky_function_value (const_tree type, const_tree func,
		     bool outgoing ATTRIBUTE_UNUSED)
{
  machine_mode mode;
  int unsignedp ATTRIBUTE_UNUSED;
  int size;

  mode = TYPE_MODE (type);
  size = int_size_in_bytes (type);

  /* Since we promote return types, we must promote the mode here too.  */
  if (INTEGRAL_TYPE_P (type))
    {
      mode = promote_function_mode (type, mode, &unsignedp, func, 1);
      return gen_rtx_REG (mode, CSKY_FIRST_RET_REGNUM);
    }

  if (mode == BLKmode && size > UNITS_PER_WORD
      && size <= UNITS_PER_WORD * 2)
    {
      rtx ret_regs[2];
      ret_regs[0] = gen_rtx_EXPR_LIST (SImode,
				       gen_rtx_REG (SImode,
						    CSKY_FIRST_RET_REGNUM),
				       GEN_INT (0 * UNITS_PER_WORD));
      ret_regs[1] = gen_rtx_EXPR_LIST (SImode,
				       gen_rtx_REG (SImode,
						    CSKY_FIRST_RET_REGNUM + 1),
				       GEN_INT (1 * UNITS_PER_WORD));

      rtvec vec = gen_rtvec (2, ret_regs[0], ret_regs[1]);

      return gen_rtx_PARALLEL (mode, vec);
    }

    return gen_rtx_REG (mode, CSKY_FIRST_RET_REGNUM);
}


/* Implement TARGET_LIBCALL_VALUE.  */
static rtx
csky_libcall_value (machine_mode mode,
		    const_rtx libcall ATTRIBUTE_UNUSED)
{
  return gen_rtx_REG (mode, CSKY_FIRST_RET_REGNUM);
}


/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
   On C-SKY, only r0 can return results.  */

static bool
csky_function_value_regno_p (const unsigned int regno)
{
  return (regno == CSKY_FIRST_RET_REGNUM);
}


/* Return an RTX indicating where the return address to the
   calling function can be found.  */
rtx
csky_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
{
  if (count != 0)
    return NULL_RTX;

  return get_hard_reg_initial_val (Pmode, CSKY_LR_REGNUM);
}


/* Implement TARGET_ARG_PARTIAL_BYTES.
   Return the number of bytes at the beginning of an argument
   that must be put in registers. The value must be zero for arguments
   that are passed entirely in registers or
   that are entirely pushed on the stack.  */
static int
csky_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
{
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
  int param_size = csky_num_arg_regs (arg.mode, arg.type);

  if (*pcum < CSKY_NPARM_REGS
      && *pcum + param_size > CSKY_NPARM_REGS)
    return (CSKY_NPARM_REGS - *pcum) * UNITS_PER_WORD;

  return 0;
}


/* Implement TARGET_SETUP_INCOMING_VARARGS.
   On C-Sky the copy from the argument registers to the stack is emitted
   by the prologue hooks, so here we just have to note how much stack space
   to save.  */

static void
csky_setup_incoming_varargs (cumulative_args_t pcum_v,
			     const function_arg_info &arg,
			     int *pretend_size,
			     int second_time ATTRIBUTE_UNUSED)
{
  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
  CUMULATIVE_ARGS local_cum;
  cumulative_args_t local_cum_v = pack_cumulative_args (&local_cum);
  int regs_to_push;

  cfun->machine->uses_anonymous_args = 1;
  local_cum = *pcum;
  csky_function_arg_advance (local_cum_v, arg);
  regs_to_push = CSKY_NPARM_REGS - local_cum;
  if (regs_to_push)
    *pretend_size  = regs_to_push * UNITS_PER_WORD;
}


/* Implement TARGET_ASM_OUTPUT_MI_THUNK.
   Output code to add DELTA to the first argument, and then jump
   to FUNCTION.  Used for C++ multiple inheritance.  */

static void
csky_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
		      HOST_WIDE_INT delta,
		      HOST_WIDE_INT vcall_offset,
		      tree function)
{
  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
  const char *thiz = "a0";
  const char *reg0 = "t0";
  const char *reg1 = "t1";
  int maxoff = 4096;		/* Constant range for addi/subi.  */

  assemble_start_function (thunk, fnname);
  final_start_function (emit_barrier (), file, 1);

  rtx fnaddr = XEXP (DECL_RTL (function), 0);

  if (CSKY_TARGET_ARCH (CK801))
    {
      /* CK801 can't use t registers and has only 16-bit addi/subi.  */
      reg0 = "l0";
      reg1 = "l1";
      maxoff = 256;
      if (vcall_offset > maxoff || vcall_offset < -maxoff)
	fprintf (file, "\tpush\tl0, l1\n");
      else if (delta > maxoff || delta < -maxoff)
	fprintf (file, "\tpush\tl0\n");
    }

  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
    thiz = "a1";

  /* Add delta to this_rtx.  */
  if (delta != 0)
    {
      if (delta > maxoff || delta < -maxoff)
	{
	  fprintf (file, "\tlrw\t%s, %ld\n", reg0, (long)delta);
	  fprintf (file, "\taddu\t%s, %s, %s\n", thiz, thiz, reg0);
	}
      else
	fprintf (file, "\t%s\t%s, %s, %ld\n",
		 (delta > 0 ? "addi" : "subi"), thiz, thiz,
		 (long)(delta > 0 ? delta : -delta));
    }

  /* If needed, add *(*this_rtx + vcall_offset) to this_rtx.  */
  if (vcall_offset != 0)
    {
      fprintf (file, "\tld.w\t%s, (%s, 0)\n", reg0, thiz);

      if (vcall_offset > maxoff || vcall_offset < -maxoff)
	{
	  fprintf (file, "\tlrw\t%s, %ld\n", reg1, (long)vcall_offset);
	  fprintf (file, "\taddu\t%s, %s, %s\n", reg0, reg0, reg1);
	}
      else
	fprintf (file, "\t%s\t%s, %s, %ld\n",
		 (vcall_offset > 0 ? "addi" : "subi"), reg0, reg0,
		 (long)(vcall_offset > 0 ? vcall_offset : -vcall_offset));

      /* Load the offset and add it to this_rtx	 */
      fprintf (file, "\tld.w\t%s, (%s, 0)\n", reg0, reg0);
      fprintf (file, "\taddu\t%s, %s, %s\n", thiz, thiz, reg0);
    }

  /* We must pop the scratch regs individually instead of using the
     "pop" insn, which also does a return.  */
  if (CSKY_TARGET_ARCH (CK801))
    {
      if (vcall_offset > maxoff || vcall_offset < -maxoff)
	{
	  fprintf (file, "\tld.w\tl0, (sp, 0)\n");
	  fprintf (file, "\tld.w\tl1, (sp, 4)\n");
	  fprintf (file, "\taddi\t sp, sp, 8\n");
	}
      else if (delta > maxoff || delta < -maxoff)
	{
	  fprintf (file, "\tld.w\tl0, (sp, 0)\n");
	  fprintf (file, "\taddi\tsp, sp, 4\n");
	}
    }

  fprintf (file, "\tjbr\t");
  output_addr_const (file, fnaddr);
  fprintf (file, "\n");

  final_end_function ();
  assemble_end_function (thunk, fnname);
}


/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.
   Conditionally modify five variables fixed_regs, call_used_regs, global_regs,
   reg_names, and reg_class_contents, to take into account any dependence of
   these register sets on target flags.

   CK801 has registers r0-r8 and r13-r15.  CK802 and CK803 have registers
   r0-r15 (the "low" registers).  Other cpus use registers r0-r31 with
   -mhigh-registers, otherwise also only r0-r15.

   CK801 only has 16-bit instructions, most of which can only reference
   r0-r7 (the "mini" registers).  So we mark regs outside that range as
   fixed.  -msmart can be used on other arch variants to force the same
   behavior because it results in smaller code size.

   TODO: investigate whether it's beneficial to use r8-r13 as a spill
   class when TARGET_MINI_REGISTERS instead of making them unusable by
   the register allocator.  */

static void
csky_conditional_register_usage (void)
{
  /* Only use mini registers in smart mode or 801.  */
  if (TARGET_MINI_REGISTERS)
    {
      int i;

      for (i = (CSKY_LAST_MINI_REGNUM + 1); i < 32; i++)
	{
	  fixed_regs[i] = 1;
	  call_used_regs[i] = 1;
	}
    }
  /* For some targets, the high registers are not supported.
     CPUs other than ck801/ck802/ck803 use high registers
     depending on -mhigh-registers option.  */
  else if (CSKY_TARGET_ARCH (CK802)
	   || CSKY_TARGET_ARCH (CK803)
	   || !TARGET_HIGH_REGISTERS)
   {
      int i;

      for (i = CSKY_FIRST_HIGH_REGNUM; i <= CSKY_LAST_HIGH_REGNUM; i++)
	{
	  fixed_regs[i] = 1;
	  call_used_regs[i] = 1;
	}
   }

  /* On CK801/CK802 we must mark lr as a fixed register because it is
     used to implement far jumps.
     FIXME: perhaps there should be a command-line option controlling
     use of lr for far jumps on ck802 when !TARGET_MINI_REGS, when
     you really want lr to be available to the register allocator and
     you know there are no far jumps in the code.  */
  if (CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
    {
      fixed_regs[CSKY_LR_REGNUM] = 1;
      call_used_regs[CSKY_LR_REGNUM] = 0;
    }

  /* The hi/lo registers are only supported in dsp mode.  */
  if (!TARGET_DSP)
    {
      fixed_regs[CSKY_HI_REGNUM] = 1;
      call_used_regs[CSKY_HI_REGNUM] = 1;

      fixed_regs[CSKY_LO_REGNUM] = 1;
      call_used_regs[CSKY_LO_REGNUM] = 1;
    }

  /* The V_REGS are only supported in hard float mode.  */
  if (!TARGET_HARD_FLOAT)
    {
      int regno;

      for (regno = CSKY_FIRST_VFP_REGNUM;
	   regno <= CSKY_LAST_VFP_REGNUM; regno++)
	{
	  fixed_regs[regno] = 1;
	  call_used_regs[regno] = 1;
	}
    }

  /* In pic mode, the gb register is not available for register
     allocation.  Since gb is not clobbered by function
     calls, set its call_used_regs to 0.  */
  if (flag_pic)
    {
      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 0;
    }
}

/* Implement TARGET_HARD_REGNO_NREGS.  */
static unsigned int
csky_hard_regno_nregs (unsigned int regno, machine_mode mode)
{
  if (regno >= CSKY_FIRST_VFP_REGNUM && !CSKY_TARGET_ARCH (CK803))
    return 1;
  else
    return CSKY_NUM_REGS (mode);
}

/* Implement TARGET_HARD_REGNO_MODE_OK.  Return true if REGNO is a
   valid register for holding a quantity of type MODE.  */

static bool
csky_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
{
  int nregs = CSKY_NUM_REGS (mode);

  /* We can't handle more than doubleword sizes for any register.  */
  if (nregs > 2)
    return false;

  /* For general registers, return true if mode is one word size.
     When the size is larger than one word size, there should
     be two successive hard registers to put the data.  */
  if (regno < CSKY_NGPR_REGS)
    {
      if (nregs < 2)
	return true;
      else if (TARGET_MINI_REGISTERS)
	return (regno < CSKY_LAST_MINI_REGNUM);
      else if (CSKY_TARGET_ARCH (CK802)
	       || CSKY_TARGET_ARCH (CK803)
	       || !TARGET_HIGH_REGISTERS)
	/* Without high register, r15 cannot hold doubleword data.  */
	return (regno < (CSKY_SP_REGNUM - 1));
      else
	return (regno < (CSKY_SP_REGNUM - 1)
		|| (regno >= CSKY_LR_REGNUM
		    && regno < CSKY_LAST_HIGH_UNFIXED_REGNUM));
    }
  else if (regno == CSKY_CC_REGNUM)
    return (mode == CCmode);
  else if (regno == CSKY_HI_REGNUM || regno == CSKY_LO_REGNUM)
    {
      /* Don't allocate hi,lo register for float data even
	 if in dsp mode, because it will cause high cost
	 to reload data from hi,lo register.  */
      if (!TARGET_DSP || mode == SFmode || mode == DFmode)
	return false;
      else if (nregs == 2)
	return (regno == CSKY_HI_REGNUM);
      else
	return true;
    }
  else if (CSKY_VREG_P (regno) && TARGET_HARD_FLOAT)
    return true;

  return false;
}

/* Implement TARGET_MODES_TIEABLE_P.  We can't tie DFmode with other modes
   when V_REGs might be in use because those registers mess with the stored
   bits.  */
static bool
csky_modes_tieable_p (machine_mode mode1, machine_mode mode2)
{
  return !(TARGET_HARD_FLOAT
	   && mode1 != mode2
	   && (mode1 == DFmode || mode2 == DFmode));
}

/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
   V_REG registers can't do subreg as all values are reformatted to
   internal precision.  */
static bool
csky_can_change_mode_class (machine_mode from,
			    machine_mode to,
			    reg_class_t rclass)
{
  return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
	  || !reg_classes_intersect_p (V_REGS, rclass));
}

/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
   We need to define this for MINI_REGS when we only use r0 - r7.
   Otherwise we can end up using r0-r4 for function arguments, and don't
   have enough left over to do doubleword arithmetic.  */

static bool
csky_class_likely_spilled_p (reg_class_t rclass)
{
  if ((TARGET_MINI_REGISTERS && rclass == MINI_REGS)
      || rclass == C_REGS)
    return true;

  return false;
}


/* Implement TARGET_PREFERRED_RELOAD_CLASS.
   Given an rtx X being reloaded into a reg required to be
   in class CLASS, return the class of reg to actually use.
   In general this is just CLASS.  */

static reg_class_t
csky_preferred_reload_class (rtx x, reg_class_t rclass)
{
  if (TARGET_HARD_FLOAT
      && CONST_DOUBLE_P (x)
      && (GET_MODE (x) == DFmode || GET_MODE (x) == SFmode)
      && rclass == NO_REGS)
    return GENERAL_REGS;
  return rclass;
}


/* Implement TARGET_CLASS_MAX_NREGS.
   Return the maximum number of consecutive registers of class rclass needed
   to hold a value of mode mode.
   On the csky, this is the size of MODE in words,
   except in the FP regs, where a single reg is always enough.  */

static unsigned char
csky_class_max_nregs (reg_class_t rclass, machine_mode mode)
{
  if (rclass == V_REGS)
    return 1;
  else
    return CSKY_NUM_REGS (mode);
}


/* Implement TARGET_SECONDARY_RELOAD.
   If copying a register of RCLASS from/to X requires an intermediate
   register, the hook should return the REGISTER_CLASS required for this
   intermediate register.
   If no intermediate register is required, it should return NO_REGS.
   If more than one intermediate register is required, describe the one
   that is closest in the copy chain to the reload register.  */

reg_class_t
csky_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
		       reg_class_t rclass,
		       machine_mode mode,
		       secondary_reload_info *sri ATTRIBUTE_UNUSED)
{
  int regno = -1;

  /* Extract the real regno from X.  */
  if (GET_CODE (x) == SIGN_EXTEND)
    {
      int off = 0;

      x = XEXP (x, 0);

      if (reg_renumber)
	regno = true_regnum (x);
      else
	{
	  while (GET_CODE (x) == SUBREG)
	    {
	      off += subreg_regno_offset (REGNO (SUBREG_REG (x)),
	      GET_MODE (SUBREG_REG (x)),
	      SUBREG_BYTE (x), GET_MODE (x));
	      x = SUBREG_REG (x);
	    }

	    if (GET_CODE (x) == REG)
	      regno = REGNO (x) + off;
	}
    }
  else if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
    regno = true_regnum (x);

  /* We always require a general register when copying anything to
     HI/LO_REGNUM, except when copying an SImode value from HI/LO_REGNUM
     to a general register, or when copying from register 0.  */
  if ((rclass == HILO_REGS || rclass == LO_REGS || rclass == HI_REGS)
      && !CSKY_GENERAL_REGNO_P (regno))
    return GENERAL_REGS;

  if (rclass == V_REGS && !CSKY_GENERAL_REGNO_P (regno))
    {
      /* Reload between vector reg and memory does not need an
	 intermediate register.  */
      if (MEM_P (x) && (mode == SFmode || mode == DFmode))
	return NO_REGS;
      else
	return GENERAL_REGS;
    }

  return NO_REGS;
}

/* Implement TARGET_SPILL_CLASS.
   Try spilling to a larger register class before spilling to memory.  */

static reg_class_t
csky_spill_class (reg_class_t rclass, machine_mode mode ATTRIBUTE_UNUSED)
{
  if ((rclass == MINI_REGS && !TARGET_MINI_REGISTERS)
      || (rclass == LOW_REGS && TARGET_HIGH_REGISTERS))
    return GENERAL_REGS;
  return NO_REGS;
}

/* Convert a static initializer array of feature bits to sbitmap
   representation.  */
static void
csky_initialize_isa (sbitmap isa, const enum csky_isa_feature *isa_bits)
{
  bitmap_clear (isa);
  while (*isa_bits != CSKY_ISA_FEATURE_GET (none))
    bitmap_set_bit (isa, *(isa_bits++));
}


/* Configure a build target TARGET from the user-specified options OPTS and
   OPTS_SET.  */
static void
csky_configure_build_target (struct csky_build_target *target,
			     struct cl_target_option *opts,
			     struct gcc_options *opts_set)
{
  const struct csky_processors *csky_selected_tune = NULL;
  struct csky_processors *csky_selected_cpu = NULL;
  struct csky_processors *csky_selected_arch = NULL;
  sbitmap all_sbits = sbitmap_alloc (CSKY_ISA_FEATURE_GET (max));
  bitmap_clear (all_sbits);

  bitmap_clear (target->isa);
  target->core_name = NULL;
  target->arch_name = NULL;

  if (opts_set->x_csky_arch_option)
    csky_selected_arch = &all_architectures[opts->x_csky_arch_option];

  if (opts_set->x_csky_cpu_option)
    {
      csky_selected_cpu = &all_cores[opts->x_csky_cpu_option];
      csky_selected_tune = &all_cores[opts->x_csky_cpu_option];
    }

  if (csky_selected_cpu)
    {
      /* TODO: support combination of features
	 between different cpu & arch, should based on arch.  */
      if (csky_selected_arch
	  && (csky_selected_cpu->base_arch != csky_selected_arch->base_arch))
	warning (0, "cpu %s is not based on arch %s, ignoring the arch",
		 csky_selected_cpu->name, csky_selected_arch->name);
      if (!csky_selected_arch)
	csky_selected_arch = &all_architectures[csky_selected_cpu->base_arch];
      csky_initialize_isa (all_sbits, csky_selected_arch->isa_bits);
      target->core_name = csky_selected_cpu->name;
    }
  else if (csky_selected_arch)
    {
      csky_selected_cpu = csky_selected_arch;
      target->arch_name = csky_selected_arch->name;
    }
  else /* If the user did not specify a processor, choose one for them.  */
    {
      csky_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
      csky_selected_arch = &all_architectures[csky_selected_cpu->base_arch];
      csky_initialize_isa (all_sbits, csky_selected_arch->isa_bits);
      target->core_name = csky_selected_cpu->name;
    }

  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
  if (!csky_selected_tune)
    csky_selected_tune = &all_cores[csky_selected_cpu->core];
  gcc_assert (csky_selected_tune);

  gcc_assert (csky_selected_arch);
  gcc_assert (csky_selected_cpu);
  csky_initialize_isa (target->isa, csky_selected_cpu->isa_bits);
  bitmap_ior (target->isa, target->isa, all_sbits);

  /* Finish initializing the target structure.  */
  target->arch_pp_name = csky_selected_cpu->arch;
  target->base_arch = csky_selected_cpu->base_arch;
  target->arch_core = csky_selected_cpu->core;

  sbitmap_free (all_sbits);
}


/* Implement TARGET_OPTION_OVERRIDE.  */

static void
csky_option_override (void)
{
  csky_active_target.isa = sbitmap_alloc (CSKY_ISA_FEATURE_GET (max));

  /* Create the default target_options structure.  We need this early
     to configure the overall build target.  */
  target_option_default_node = target_option_current_node
			     = build_target_option_node (&global_options);

  csky_configure_build_target (&csky_active_target,
			      TREE_TARGET_OPTION (target_option_default_node),
			      &global_options_set);

#ifdef SUBTARGET_OVERRIDE_OPTIONS
  SUBTARGET_OVERRIDE_OPTIONS;
#endif

  csky_base_arch = csky_active_target.base_arch;

  if (flag_pic && !(CSKY_TARGET_ARCH (CK810) || CSKY_TARGET_ARCH (CK807)))
    {
      flag_pic = 0;
      warning (0, "%qs is not supported by arch %s",
	       "-fPIC", csky_active_target.arch_pp_name);
    }

  /* Check floating-point options for consistency.  */
  if (TARGET_HARD_FLOAT)
    {
      const struct csky_fpu_desc *csky_selected_fpu = NULL;

      if (csky_fpu_index == TARGET_FPU_auto)
	{
	  const char *target_fpu_name;
	  bool ok;
	  int fpu_index;

#ifdef CSKY_FPUTYPE_DEFAULT
	  target_fpu_name = CSKY_FPUTYPE_DEFAULT;
#else
	  target_fpu_name = "fpv2";
#endif

	  if (csky_active_target.core_name != NULL
	      && !strchr (csky_active_target.core_name, 'f'))
	    target_fpu_name = "auto";
	  else if (CSKY_TARGET_ARCH (CK803) || !TARGET_DOUBLE_FLOAT)
	    target_fpu_name = "fpv2_sf";
	  else if (TARGET_DOUBLE_FLOAT && TARGET_FDIVDU)
	    target_fpu_name = "fpv2_divd";

	  ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
				      CL_TARGET);
	  gcc_assert (ok);
	  csky_fpu_index = (enum csky_fpu_type) fpu_index;
	}

      if (CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
	error ("%qs is not supported by arch %s",
	       "-mhard-float", csky_active_target.arch_pp_name);
      else if (csky_fpu_index == TARGET_FPU_auto)
	error ("%<-mhard-float%> is not supported by the selected CPU");
      else
	{
	  csky_selected_fpu = &all_fpus[csky_fpu_index];
	  sbitmap fpu_bits = sbitmap_alloc (CSKY_ISA_FEATURE_GET (max));
	  csky_initialize_isa (fpu_bits, csky_selected_fpu->isa_bits);

	  bitmap_ior (csky_active_target.isa, csky_active_target.isa,
		      fpu_bits);

	  sbitmap_free (fpu_bits);
	}
    }
  else
    {
      if (TARGET_DOUBLE_FLOAT > 0)
	warning (0, "%<-mdouble-float%> ignored without %<-mhard-float%>");
      TARGET_DOUBLE_FLOAT = 0;
      if (TARGET_FDIVDU > 0)
	warning (0, "%<-mfdivdu%> ignored without %<-mhard-float%>");
      TARGET_FDIVDU = 0;
    }

  /* Extended LRW instructions are enabled by default on CK801, disabled
     otherwise.  */
  if (TARGET_ELRW == -1)
    TARGET_ELRW = CSKY_TARGET_ARCH (CK801);

  /* DSP is enabled either by the processor feature or -mdsp
     command-line option.  There is no -mno-dsp option as the assembler
     doesn't take one.  */
  if (!TARGET_DSP)
    TARGET_DSP = CSKY_ISA_FEATURE (dsp);

  /* There's both -mdiv and -mno-div.  Take default from processor if
     neither is specified explicitly.  */
  if (TARGET_DIV == -1)
    TARGET_DIV = CSKY_ISA_FEATURE (div);

  /* TARGET_CONSTANT_POOL is mandatory for CK801 and CK802 and optional
     for other CPUs.
     The reason why the compiler has to generate constant pools for CK801/2
     instead of deferring to the assembler is that these cores don't have a
     long branch instruction other than jbsr, which clobbers lr.  So for
     the compiler to correctly save/restore lr it has to know whether there
     are long branches, which depends on having accurate branch length
     counts, which in turn depends on having control over where constant
     pools are placed.  */
  if ((CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
      && !TARGET_CONSTANT_POOL)
    error ("%qs is not supported by arch %s",
	   "-mno-constpool", csky_active_target.arch_pp_name);
  else if (TARGET_CONSTANT_POOL == -1)
    TARGET_CONSTANT_POOL = (CSKY_TARGET_ARCH (CK801)
			    || CSKY_TARGET_ARCH (CK802));

  /* TARGET_MINI_REGISTERS is mandatory for CK801, the default for CK802,
     and optional for other CPUs.  TARGET_HIGH_REGISTERS is incompatible
     with TARGET_MINI_REGISTERS, is not supported by CK801/802/803,
     and is the default for other processors.
     See csky_conditional_register_usage.  */
  if (TARGET_MINI_REGISTERS > 0 && TARGET_HIGH_REGISTERS > 0)
    error ("%<-msmart%> is incompatible with %<-mhigh-registers%>");
  else if (CSKY_TARGET_ARCH (CK801)
	   || CSKY_TARGET_ARCH (CK802)
	   || CSKY_TARGET_ARCH (CK803))
    {
      if (CSKY_TARGET_ARCH (CK801)
	  || (CSKY_TARGET_ARCH (CK802) && TARGET_MINI_REGISTERS == -1))
	TARGET_MINI_REGISTERS = 1;
      else if (TARGET_MINI_REGISTERS == -1)
	TARGET_MINI_REGISTERS = 0;
      if (TARGET_HIGH_REGISTERS > 0)
	warning (0, "%qs is not supported by arch %s",
		 "-mhigh-registers", csky_active_target.arch_pp_name);
      TARGET_HIGH_REGISTERS = 0;
    }
  else
    {
      if (TARGET_MINI_REGISTERS == -1)
	TARGET_MINI_REGISTERS = 0;
      if (TARGET_HIGH_REGISTERS == -1)
	TARGET_HIGH_REGISTERS = !TARGET_MINI_REGISTERS;
    }

  /* -mmultiple-stld is the default for everything but CK801, which
     doesn't support it.  */
  if (CSKY_TARGET_ARCH (CK801))
    {
      if (TARGET_MULTIPLE_STLD > 0)
	warning (0, "%qs is not supported by arch %s",
		 "-mmultiple-stld", csky_active_target.arch_pp_name);
      TARGET_MULTIPLE_STLD = 0;
    }

  /* Initialize boolean versions of the architectural flags, for use
     in the .md file.  */

#undef	CSKY_ISA
#define CSKY_ISA(IDENT, DESC)						  \
  {									  \
    csky_arch_isa_features[CSKY_ISA_FEATURE_GET (IDENT)] =		   \
      bitmap_bit_p (csky_active_target.isa, CSKY_ISA_FEATURE_GET (IDENT)); \
  }
#include "csky_isa.def"
#undef	CSKY_ISA

  /* TODO  */

  /* Resynchronize the saved target options.  */
  cl_target_option_save (TREE_TARGET_OPTION (target_option_default_node),
			 &global_options);

#ifdef ENABLE_TPF_DEBUG
  /* Don't emit DWARF4 unless specifically selected.  The TPF
     debuggers do not yet support DWARF 3/4.  */
  if (!global_options_set.x_dwarf_strict)
    dwarf_strict = 1;
  if (!global_options_set.x_dwarf_version)
    dwarf_version = 3;
#endif

  /* Don't run the scheduler before reload by default,
     since it tends to increase register pressure.  */
  if (!global_options_set.x_flag_schedule_insns)
    flag_schedule_insns = 0;

  csky_add_gc_roots ();
}


/* Return TRUE if X contains any references to TLS symbols.  */

bool
csky_tls_referenced_p (rtx x)
{
  if (!TARGET_TLS)
    return false;

  subrtx_iterator::array_type array;
  FOR_EACH_SUBRTX (iter, array, x, ALL)
    {
      const_rtx x = *iter;
      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
	return true;

      /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
	 TLS offsets, not real symbol references.  */
      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
	iter.skip_subrtxes ();
    }
  return false;
}


/* Implement TARGET_CANNOT_FORCE_CONST_MEM.
   Determine if it's legal to put X into the constant pool.  This
   is not possible for the address of thread-local symbols, which
   is checked above.  */

static bool
csky_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
			     rtx x)
{
  return csky_tls_referenced_p (x);
}


/* Implement TARGET_LEGITIMATE_CONSTANT_P.  Returns nonzero if the
   constant value X is a legitimate general operand.
   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */

static bool
csky_legitimate_constant_p (machine_mode mode, rtx x)
{
  return (!csky_cannot_force_const_mem (mode, x)
	  && CONSTANT_P (x));
}


/* Return true if X is valid as an CSKY addressing register.  */

static bool
is_csky_address_register_rtx_p (rtx x, int strict_p)
{
  int regno;

  if (!x)
    return false;
  if (!REG_P (x))
    return false;

  regno = REGNO (x);

  if (strict_p)
    return (CSKY_GENERAL_REGNO_P (regno)
	    || CSKY_GENERAL_REGNO_P (reg_renumber[regno]));
  else
    return CSKY_GENERAL_REGNO_P (regno) || regno >= FIRST_PSEUDO_REGISTER;
}


/* Return TRUE if X is a thread-local symbol.  */

static bool
csky_tls_symbol_p (rtx x)
{
  if (!TARGET_TLS)
    return false;

  if (GET_CODE (x) != SYMBOL_REF)
    return false;

  return SYMBOL_REF_TLS_MODEL (x) != 0;
}


/* Handle lazy initialization of __tls_get_addr libfunc.  */
static GTY(()) rtx tls_get_addr_libfunc;

static rtx
get_tls_get_addr (void)
{
  if (!tls_get_addr_libfunc)
    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
  return tls_get_addr_libfunc;
}


/* Emit a call to __tls_get_addr.  */

static rtx_insn *
csky_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
{
  rtx label, labelno, unspec, tmp;
  rtx_insn *insns;

  start_sequence ();

  labelno = GEN_INT (tls_labelno++);
  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_TLS_LABEL);
  unspec = gen_rtx_UNSPEC (Pmode,
			   gen_rtvec (3, x, GEN_INT (reloc), label),
			   UNSPEC_TLS);
  tmp = gen_reg_rtx (SImode);
  emit_move_insn (reg, unspec);
  emit_move_insn (tmp, label);
  emit_insn (gen_addsi3 (reg, reg, tmp));
  *valuep = emit_library_call_value (get_tls_get_addr (),
				     NULL_RTX, LCT_PURE, /* LCT_CONST?	*/
				     Pmode, reg, Pmode);
  insns = get_insns ();
  end_sequence ();
  return insns;
}

/* Helper function for csky_legitimize_address, to handle the TLS cases.
   REG is a scratch register and may be null.  */

rtx
csky_legitimize_tls_address (rtx x, rtx reg)
{
  rtx dest, tp, label, labelno, unspec, ret, eqv, addend, tmp;
  rtx_insn *insns;
  unsigned int model = SYMBOL_REF_TLS_MODEL (x);

  if (!reg)
    reg = gen_reg_rtx (SImode);

  switch (model)
    {
    case TLS_MODEL_GLOBAL_DYNAMIC:
      insns = csky_call_tls_get_addr (x, reg, &ret, TLS_GD32);
      dest = gen_reg_rtx (Pmode);
      emit_libcall_block (insns, dest, ret, x);
      return dest;

    case TLS_MODEL_LOCAL_DYNAMIC:
      insns = csky_call_tls_get_addr (x, reg, &ret, TLS_LDM32);

      /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
	 share the LDM result with other LD model accesses.  */
      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), UNSPEC_TLS);
      dest = gen_reg_rtx (Pmode);
      emit_libcall_block (insns, dest, ret, eqv);

      /* Load the addend.  */
      addend = gen_rtx_UNSPEC (Pmode,
			       gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
			       UNSPEC_TLS);
      addend = force_reg (SImode, addend);
      return gen_rtx_PLUS (Pmode, dest, addend);

    case TLS_MODEL_INITIAL_EXEC:
      labelno = GEN_INT (tls_labelno++);
      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_TLS_LABEL);
      unspec = gen_rtx_UNSPEC (Pmode,
			       gen_rtvec (3, x, GEN_INT (TLS_IE32), label),
			       UNSPEC_TLS);
      tmp = gen_reg_rtx (SImode);
      emit_move_insn (reg, unspec);
      emit_move_insn (tmp, label);
      emit_insn (gen_addsi3 (reg, reg, tmp));
      emit_move_insn (reg, gen_const_mem (Pmode, reg));
      tp = gen_rtx_REG (SImode, CSKY_TLS_REGNUM);
      return gen_rtx_PLUS (Pmode, tp, reg);

    case TLS_MODEL_LOCAL_EXEC:
      unspec = gen_rtx_UNSPEC (Pmode,
			       gen_rtvec (2, x, GEN_INT (TLS_LE32)),
			       UNSPEC_TLS);
      emit_move_insn (reg, unspec);
      tp = gen_rtx_REG (SImode, CSKY_TLS_REGNUM);
      return gen_rtx_PLUS (Pmode, tp, reg);

    default:
      abort ();
    }
}


/* Implement TARGET_LEGITIMIZE_ADDRESS.  */

static rtx
csky_legitimize_address (rtx x, rtx orig_x ATTRIBUTE_UNUSED,
			 machine_mode mode)
{
  if (csky_tls_symbol_p (x))
    return csky_legitimize_tls_address (x, NULL_RTX);

  if (GET_CODE (x) == PLUS)
    {
      rtx xop0 = XEXP (x, 0);
      rtx xop1 = XEXP (x, 1);

      if (is_csky_address_register_rtx_p (xop0, 0)
	  && CONST_INT_P (xop1))
	{
	  HOST_WIDE_INT offset = INTVAL (xop1);

	  /* Try to replace ld32 rx,(ry, offset), to addi16 rz, oimm8
	     and ld16 rx,(rz, new_ld_offset) to avoid emitting a
	     32-bit ld, but this addi has a range limitation.  */
	  if (optimize_size
	      && offset > CSKY_LD16_MAX_OFFSET (mode)
	      && offset <= (CSKY_ADDI16_MAX_IMM
			   + CSKY_LD16_MAX_OFFSET (mode)))
	    {
	      HOST_WIDE_INT new_ld_offset
		= offset & CSKY_LD16_OFFSET_MASK (mode);

	      xop0 = force_operand (plus_constant (Pmode, xop0,
						   offset - new_ld_offset),
				    NULL_RTX);
	      x = plus_constant (Pmode, xop0, new_ld_offset);
	    }
	  else if (offset < 0 && offset >= (-CSKY_SUBI16_MAX_IMM))
	    x = force_operand (x, NULL_RTX);
	  else if (offset > CSKY_LD16_MAX_OFFSET (mode)
		   || offset < 0)
	    {
	      /* For the remaining cases, force the constant into a
		 register.  */
	      xop1 = force_reg (SImode, xop1);
	      x = gen_rtx_PLUS (SImode, xop0, xop1);
	    }
	}

      /* If the index is store in register, force the
	 base to register.  */
      if (is_csky_address_register_rtx_p (xop1, 0)
	  && !is_csky_address_register_rtx_p (xop0, 0))
	{
	  xop0 = force_operand (xop0, NULL_RTX);
	  x = gen_rtx_PLUS (SImode, xop0, xop1);
	}
    }
  /* Make sure to take full advantage of the pre-indexed addressing mode
     with absolute addresses which often allows for the base register to
     be factorized for multiple adjacent memory references, and it might
     even allows for the mini pool to be avoided entirely. */
  else if (CONST_INT_P (x)  && optimize > 0)
    {
      HOST_WIDE_INT mask, base, index;
      rtx base_reg;

      mask = CSKY_LD16_OFFSET_MASK (mode);
      base = INTVAL (x) & ~mask;
      index = INTVAL (x) & mask;
      base_reg = force_reg (SImode, GEN_INT (base));
      x = plus_constant (Pmode, base_reg, index);
    }

  return x;
}


/* Return nonzero if INDEX is valid for an address index operand.
   ck801 use 16 bits ld
   ck802 use 16 and 32 bits ld
   others use ld and ldr.  */

static int
ck801_legitimate_index_p (machine_mode mode, rtx index,
			  int strict_p ATTRIBUTE_UNUSED)
{
  enum rtx_code code = GET_CODE (index);

  /* When the mode size is larger than 4, we may use two ld instruction
     to get data, the index and (index+1) should be valid.  */
  if (GET_MODE_SIZE (mode) >= 8)
    return (code == CONST_INT
	    && INTVAL (index) <	 CSKY_LD16_MAX_OFFSET (SImode)
	    && INTVAL (index) >= 0 && (INTVAL (index) & 3) == 0);

  if (code == CONST_INT && GET_MODE_SIZE (mode) > 0
      && INTVAL (index) <= CSKY_LD16_MAX_OFFSET (mode)
      && INTVAL (index) >= 0)
    return ((INTVAL (index) % GET_MODE_SIZE (mode)) == 0);

  return 0;
}


static int
ck802_legitimate_index_p (machine_mode mode, rtx index,
			  int strict_p ATTRIBUTE_UNUSED)
{
  enum rtx_code code = GET_CODE (index);

  /* When the mode size is larger than 4, we may use two ld instruction
     to get data, the index and (index+1) should be valid.  */
  if (GET_MODE_SIZE (mode) >= 8)
    return (code == CONST_INT
	    && INTVAL (index) < CSKY_LD32_MAX_OFFSET (SImode)
	    && INTVAL (index) >= 0 && (INTVAL (index) & 3) == 0);

  if (code == CONST_INT && GET_MODE_SIZE (mode) > 0
      && INTVAL (index) <= CSKY_LD32_MAX_OFFSET (mode)
      && INTVAL (index) >= 0)
    return ((INTVAL (index) % GET_MODE_SIZE (mode)) == 0);

  return 0;
}


/* The instruction ldr rz, (rx, ry << i), i can be 0,1,2,3.
   Check that SHIFT is valid, that the code is MULT, and that
   the shift is a power of 2.  */

static bool
is_ldr_shift_p (HOST_WIDE_INT shift, enum rtx_code code)
{
  if (code == ASHIFT)
    return (shift >= 0 && shift <= 3);
  else if (code == MULT)
    return (shift == 1
	    || shift == 2
	    || shift == 4
	    || shift == 8);
  else
    return false;
}


static int
ck810_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
{
  enum rtx_code code = GET_CODE (index);

  if (TARGET_HARD_FLOAT
      && (mode == SFmode || mode == DFmode))
    return (code == CONST_INT && INTVAL (index) < 1024
	    && INTVAL (index) >= 0
	    && (INTVAL (index) & 3) == 0);

  if (code == CONST_INT)
    {
      /* When the mode size is larger than 4, we may use two ld instruction
	 to get data, the index and (index+1) should be valid.  */
      if (GET_MODE_SIZE (mode) >= 8)
	return (INTVAL (index) < CSKY_LD32_MAX_OFFSET (SImode)
		&& INTVAL (index) >= 0 && (INTVAL (index) & 3) == 0);

      if (GET_MODE_SIZE (mode) > 0
	  && INTVAL (index) <= CSKY_LD32_MAX_OFFSET (mode)
	  && INTVAL (index) >= 0)
	return ((INTVAL (index) % GET_MODE_SIZE (mode)) == 0);
    }
  /* Allow ld.w rx, (gb, sym@got) when -fpic specially.  */
  else if (code == UNSPEC)
    return (flag_pic == 1
	    && (XINT (index, 1) == UNSPEC_PIC_SYMBOL_PLT
		|| XINT (index, 1) == UNSPEC_PIC_SYMBOL_GOT));
  /* The follow index is for ldr instruction, the ldr cannot
     load dword data, so the mode size should not be larger than
     4.  */
  else if (GET_MODE_SIZE (mode) <= 4)
    {
      if (is_csky_address_register_rtx_p (index, strict_p))
	return 1;
      else if (code == MULT || code == ASHIFT)
	{
	  rtx xiop0 = XEXP (index, 0);
	  rtx xiop1 = XEXP (index, 1);

	  /* FIXME can the xiop1 be the reg and xiop0 be the int when mult?  */
	  return (is_csky_address_register_rtx_p (xiop0, strict_p)
		  && CONST_INT_P (xiop1)
		  && is_ldr_shift_p (INTVAL (xiop1), code));
	}
    }

  return 0;
}


static int
csky_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
{
  if (CSKY_TARGET_ARCH (CK801))
    return ck801_legitimate_index_p (mode, index, strict_p);
  else if (CSKY_TARGET_ARCH (CK802))
    return ck802_legitimate_index_p (mode, index, strict_p);
  else
    return ck810_legitimate_index_p (mode, index, strict_p);
}


/* Implement TARGET_LEGITIMATE_ADDRESS_P.
   Recognizes RTL expressions that are valid memory addresses for an
   instruction.  The MODE argument is the machine mode for the MEM
   expression that wants to use this address.

   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
   convert common non-canonical forms to canonical form so that they will
   be recognized.  */

static bool
csky_legitimate_address_p (machine_mode mode, rtx addr, bool strict_p)
{
  enum rtx_code code = GET_CODE (addr);

  /* Match the RTX form emitted for constant pool references.
     After reload constants split into minipools will have addresses
     from a LABEL_REF.  */
  if (reload_completed
      && ((code == LABEL_REF)
	   || (code == CONST
	       && GET_CODE (XEXP (addr, 0)) == PLUS
	       && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF
	       && CONST_INT_P (XEXP (XEXP (addr, 0), 1)))))
    return 1;

  if (is_csky_address_register_rtx_p (addr, strict_p))
    return 1;
  /* It is a pc-relative load, may be generated for constpool.  */
  else if (GET_CODE (addr) == LABEL_REF)
    return 1;

  if (code == PLUS)
    {
      rtx xop0 = XEXP (addr, 0);
      rtx xop1 = XEXP (addr, 1);

      return ((is_csky_address_register_rtx_p (xop0, strict_p)
	       && csky_legitimate_index_p (mode, xop1, strict_p))
	      || (is_csky_address_register_rtx_p (xop1, strict_p)
		  && csky_legitimate_index_p (mode, xop0, strict_p)));
    }

  return 0;
}


/* Functions to save and restore machine-specific function data.  */

static struct machine_function *
csky_init_machine_status (void)
{
  struct machine_function *machine;

  machine = ggc_cleared_alloc<machine_function> ();

#if CSKY_FT_UNKNOWN != 0
  machine->func_type = CSKY_FT_UNKNOWN;
#endif
  return machine;
}


/* Implement INIT_EXPANDERS.  */

void
csky_init_expanders (void)
{
  /* Arrange to initialize and mark the machine per-function status.  */
  init_machine_status = csky_init_machine_status;
}


/* Implement TARGET_CANNOT_COPY_INSN_P.
   We must not copy any rtx that uses a pc-relative address.  */

static bool
csky_cannot_copy_insn_p (rtx_insn *insn)
{
  subrtx_iterator::array_type array;
  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
    {
      const_rtx x = *iter;
      if (GET_CODE (x) == UNSPEC
	  && (XINT (x, 1) == UNSPEC_TLS_LABEL
	      || XINT (x, 1) == UNSPEC_PIC_SYMBOL_GOTPC_GRS))
	return true;
    }
  return false;
}


/* Extract the parts of an RTL expression that is a valid memory address
   for an instruction.  Return FALSE if it is a invalid memory address.  */

struct csky_address
{
  rtx base, index, symbol, label, disp;
  HOST_WIDE_INT scale;
};

static bool
decompose_csky_address (rtx addr, struct csky_address *out)
{
  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
  HOST_WIDE_INT scale = 1;
  rtx scale_rtx = NULL_RTX;
  int i;

  out->base = out->index = out->symbol = out->label = out->disp = NULL_RTX;
  out->scale = 0;

  if (REG_P (addr))
    {
      out->base = addr;
      return true;
    }

  if (GET_CODE (addr) == LABEL_REF)
    {
      out->label = addr;
      return true;
    }

  if (GET_CODE (addr) == CONST)
    addr = XEXP (addr, 0);

  if (GET_CODE (addr) == PLUS)
    {
      rtx addends[2], op;

      addends[0] = XEXP (addr, 0);
      addends[1] = XEXP (addr, 1);

      if (GET_CODE (addends[0]) == LABEL_REF && CONST_INT_P (addends[1]))
	{
	  out->label = addends[0];
	  out->disp = addends[1];
	  return true;
	}

      if (!REG_P (addends[0]))
	std::swap (addends[0], addends[1]);

      for (i = 0; i < 2; ++i)
	{
	  op = addends[i];
	  switch (GET_CODE (op))
	    {
	    case REG:
	      if (!base)
		base = op;
	      else if (!index)
		index = op;
	      else
		return false;
	      break;
	    case CONST_INT:
	    case UNSPEC:
	      if (disp)
		return false;
	      disp = op;
	      break;
	    case MULT:
	      if (index)
		return false;
	      index = XEXP (op, 0);
	      scale_rtx = XEXP (op, 1);
	      if (!CONST_INT_P (index) && !CONST_INT_P (scale_rtx))
		return false;
	      else if (CONST_INT_P (index))
		std::swap (index, scale_rtx);
	      scale = INTVAL (scale_rtx);
	      break;
	    case ASHIFT:
	      if (index)
		return false;
	      index = XEXP (op, 0);
	      scale_rtx = XEXP (op, 1);
	      if (!CONST_INT_P (scale_rtx))
		return false;
	      scale = scale << INTVAL (scale_rtx);
	      break;
	    default:
	      return false;
	    }
	}
    }

  if (!base)
    return false;

  out->base = base;
  out->index = index;
  out->disp = disp;
  out->scale = scale;

  return true;
}

/* Helper function for the csky_simple_mem_operand predicate.  Returns
   true if OP is an address of the form reg + displacement.  */

bool
csky_simple_addr_operand_p (rtx op)
{
  struct csky_address addr;

  if (!decompose_csky_address (op, &addr))
    return false;

  /* FIXME The PIC related code.
     Check if load the symbol address from got table.  */
  if (addr.disp && GET_CODE (addr.disp) == UNSPEC)
    return false;
  if (!addr.index && !addr.symbol)
    return true;
  return false;
}


/* Print the UNSPEC operand in X to the STREAM.  */

static void
csky_output_pic_addr_const (FILE *stream, rtx x, int code)
{

  if (GET_CODE (x) != UNSPEC)
    return;

  if (UNSPEC_TLS == XINT (x, 1))
    {
      /* FIXME It is not reached */
      return;
    }

  csky_print_operand (stream, XVECEXP (x, 0, 0), code);

  switch (XINT (x, 1))
    {
    case UNSPEC_PIC_SYMBOL_GOTOFF:
      fputs ("@GOTOFF", stream);
      break;
    case UNSPEC_PIC_SYMBOL_PLT:
      fputs ("@PLT", stream);
      break;
    case UNSPEC_PIC_SYMBOL_GOT:
      fputs ("@GOT", stream);
      break;
    case UNSPEC_PIC_SYMBOL_GOTPC:
      fputs ("@GOTPC", stream);
      break;
    case UNSPEC_PIC_SYMBOL_BSR:
      break;
    default:
      break;
    }
}


/* Output the constpool label according to the rtx expression X.  */

static void
csky_output_constpool_label (FILE *stream, rtx x)
{
  char buf[15];

  gcc_assert (GET_CODE (x) == LABEL_REF);
  x = XEXP (x, 0);

  if (GET_CODE (x) == UNSPEC_VOLATILE && XINT (x, 1) == VUNSPEC_POOL_LABEL)
    {
      ASM_GENERATE_INTERNAL_LABEL (buf, CSKY_CONSTPOOL_LABEL_PREFIX,
				   INTVAL (XVECEXP (x, 0, 0)));
      assemble_name (stream, buf);
    }
}


/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */

static void
csky_print_operand_address (FILE *stream,
			    machine_mode mode ATTRIBUTE_UNUSED,
			    rtx x)
{

  struct csky_address addr;

  decompose_csky_address (x, &addr);

  if (addr.label && addr.disp && GET_CODE (addr.disp) == CONST_INT)
    {
      fprintf (stream, "[");
      csky_output_constpool_label (stream, addr.label);
      fprintf (stream, "+%d]", (int) INTVAL (addr.disp));
    }
  else if (addr.label)
    {
      fprintf (stream, "[");
      csky_output_constpool_label (stream, addr.label);
      fprintf (stream, "]");
    }
  else if (addr.symbol && addr.disp && GET_CODE (addr.disp) == CONST_INT)
    {
      fprintf (stream, "[");
      output_addr_const (stream, addr.symbol);
      fprintf (stream, "+%d]", (int) INTVAL (addr.disp));
    }
  else if (addr.symbol)
    {
      fprintf (stream, "[");
      output_addr_const (stream, addr.symbol);
      fprintf (stream, "]");
    }
  else if (addr.disp && GET_CODE (addr.disp) == CONST_INT)
    fprintf (stream, "(%s, %d)",
	     reg_names[REGNO (addr.base)], (int) INTVAL (addr.disp));
  else if (addr.disp && GET_CODE (addr.disp) == UNSPEC)
    {
      if (REGNO (addr.base) != CSKY_GB_REGNUM)
	fprintf (stream, "(%s, ", reg_names[REGNO (addr.base)]);
      else
	fprintf (stream, "[");
      csky_output_pic_addr_const (stream, addr.disp, 0);
      fprintf (stream, "%s", (REGNO (addr.base) != CSKY_GB_REGNUM)
	       ? ")" : "]");
    }
  else if (addr.index)
    fprintf (stream, "(%s, %s << %d)",
	     reg_names[REGNO (addr.base)], reg_names[REGNO (addr.index)],
	     exact_log2 ((int) (addr.scale)));
  else
    fprintf (stream, "(%s, 0)", reg_names[REGNO (addr.base)]);
}


/* Implement TARGET_PRINT_OPERAND.
   Print operand X (an rtx) in assembler syntax to file STREAM
   according to modifier CODE.

   'N'	print the log2(X+1), mainly used for bmaski
   'P'	print the log2(X)
   'Q'	print the log2(~X)
   'O'	print a decimal number
   'M'	print a decimal number as its negative
   'R'	print the next register or memory location along, i.e. the lsw in
   a double word value
   'H'	print the high 16 bits of a constant.  */

static void
csky_print_operand (FILE *stream, rtx x, int code)
{
  switch (code)
    {
    case 'N':
      if ((INTVAL (x) & 0xffffffff) == 0xffffffff)
	fprintf (stream, "0");
      else
	fprintf (stream, "%d",
		 (int) exact_log2 ((INTVAL (x) & 0xffffffff) + 1) % 32);
      break;
    case 'P':
      fprintf (stream, "%d",
	       (int) exact_log2 (INTVAL (x) & 0xffffffff));
      break;
    case 'Q':
      fprintf (stream, "%d",
	       (int) exact_log2 (~INTVAL (x) & 0xffffffff));
      break;
    case 'O':
      fprintf (stream, "%d", (int) INTVAL (x));
      break;
    case 'M':
      fprintf (stream, "%d", (int) (-INTVAL (x)));
      break;
    case 'R':
      /* Next location along in memory or register.  */
      switch (GET_CODE (x))
	{
	case REG:
	  fputs (reg_names[REGNO (x) + 1], stream);
	  break;
	case MEM:
	  csky_print_operand_address
	    (stream, GET_MODE (x), XEXP (adjust_address (x, SImode, 4), 0));
	  break;
	default:
	  gcc_unreachable ();
	}
      break;
    case 'H':
      fprintf (stream, "%ld", (long)((INTVAL (x) & 0xFFFF0000) >> 16));
      break;
    default:
      switch (GET_CODE (x))
	{
	case REG:
	  fputs (reg_names[REGNO (x)], stream);
	  break;
	case MEM:
	  output_address (GET_MODE (x), XEXP (x, 0));
	  break;
	case UNSPEC:
	  csky_output_pic_addr_const (stream, x, code);
	  break;
	default:
	  output_addr_const (stream, x);
	  break;
	}
      break;
    }
}



/* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS.  */

static bool
csky_allocate_stack_slots_for_args (void)
{
  /* Naked functions should not allocate stack slots for arguments.  */
  return !CSKY_FUNCTION_IS_NAKED (get_csky_current_func_type ());
}


/* Can we generate a constant with a single instruction, without using
   lrw?	 */

static int
const_ok_for_cskyv2 (HOST_WIDE_INT value)
{
  /* Try exact power of two. It can be generated by bgeni.  */
  if (CSKY_CONST_OK_FOR_Ub (value))
    return 1;

  /* Try exact power of two - 1.  It can be generated by bmaski.  */
  if (CSKY_CONST_OK_FOR_Uc (value) && value != -1)
    return 1;

  /* Try if it can be generated by movi.  */
  if (CSKY_CONST_OK_FOR_I (value))
    return 1;

  /* The constant can be generated by movih.
     Notice that movih is a 32-bit instruction.  */
  if (CSKY_CONST_OK_FOR_MOVIH (value))
    return 1;

  return 0;
}


/* Tricks for synthesizing constants from values that can be directly
   manipulated by machine instructions.  */

enum csky_inline_const_type
{
  IC_UNINLINABLE = 0, /* Not inlineable */
  IC_SINGLE,	      /* Single instruction */
  IC_APPEND_NOT,      /* Single instruction followed by a not */
  IC_APPEND_ADDI,     /* Single insn followed by an addi */
  IC_APPEND_SUBI,     /* Single insn followed by a subi */
  IC_BGENI_ADDI,      /* Single insn(bgeni) followed by an addi */
  IC_BGENI_SUBI,      /* Single insn(bgeni) followed by a subi */
  IC_APPEND_BSETI,    /* Single insn followed by bseti */
  IC_APPEND_MOVI,     /* Single insn followed by movi */
  IC_APPEND_BCLRI,    /* Single insn followed by bclri */
  IC_APPEND_ROTLI,    /* Single insn followed by rotli */
  IC_APPEND_LSLI,     /* Single insn followed by lsli */
  IC_APPEND_IXH,      /* Single insn followed by ixh */
  IC_APPEND_IXW	      /* Single insn followed by ixw */
};


/* Try tricks to load a constant inline and return the trick number if
   success, or IC_UNINLINABLE.  */

static enum csky_inline_const_type
try_csky_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT *x,
			  HOST_WIDE_INT *y)
{
  HOST_WIDE_INT i, value_invert;
  unsigned HOST_WIDE_INT bit, shf, rot, lobits, hibits;

  value &= 0xffffffff;
  value_invert = ~value & 0xffffffff;

  if (const_ok_for_cskyv2 (value))
    {
      *x = value;
      return IC_SINGLE;
    }

  /* Since movih is 32 bits, do not use it here, better code may
     be generated later.  */
  if (const_ok_for_cskyv2 (value_invert)
      && !CSKY_CONST_OK_FOR_MOVIH (value_invert))
    {
      *x = value_invert;
      return IC_APPEND_NOT;
    }

  /* One immediate generate instruction, and one 16-bit subi or addi.  */
  for (i = 1; i <= 32; i++)
    {
      if (const_ok_for_cskyv2 (value - i)
	  && !CSKY_CONST_OK_FOR_MOVIH (value - i))
	{
	  *x = value - i;
	  *y = i;
	  return IC_APPEND_ADDI;
	}

      if (const_ok_for_cskyv2 (value + i)
	  && !CSKY_CONST_OK_FOR_MOVIH (value - i))
	{
	  *x = value + i;
	  *y = i;
	  return IC_APPEND_SUBI;
	}
    }

  /* Generate bgeni + addi.  */
  if (CSKY_CONST_OK_FOR_Ub (value & 0xfffff000))
    {
      *x = (value & 0xfffff000);
      *y = (value & 0xfff);
      return IC_BGENI_ADDI;
    }

  /* Generate bgeni + subi.  */
  lobits = value & 0xfff;
  hibits = (unsigned HOST_WIDE_INT)(value & 0xfffff000) + (1 << 12);
  if (exact_log2 (hibits) >= 1
      && exact_log2 (hibits) <= 30
      && lobits != 0)
    {
      *x = hibits;
      *y = (0x1000 - lobits);
      return IC_BGENI_SUBI;
    }

  /* One immediate generate instruction, and one bseti or bclri.  */
  bit = 0x80000000ULL;
  for (i = 0; i <= 31; i++)
    {
      if (const_ok_for_cskyv2 (value & ~bit)
	  && !CSKY_CONST_OK_FOR_MOVIH (value & ~bit))
	{
	  *y = bit;
	  *x = (value & ~bit);
	  return IC_APPEND_BSETI;
	}

      if (const_ok_for_cskyv2 (value | bit)
	  && !CSKY_CONST_OK_FOR_MOVIH (value | bit))
	{
	  *y = ~bit & 0xffffffff;
	  *x = value | bit;
	  return IC_APPEND_BCLRI;
	}

      bit >>= 1;
    }

  /* One immediate generate instruction, and one rotli or lsli.  */
  shf = value;
  rot = value;
  for (i = 1; i < 31; i++)
    {
      int c;

      /* Rotate left.  */
      c = rot << 31;
      rot >>= 1;
      rot &= 0x7FFFFFFF;
      rot |= c;

      if (const_ok_for_cskyv2 (rot) && !CSKY_CONST_OK_FOR_MOVIH (rot))
	{
	  *y = i;
	  *x = rot;
	  return IC_APPEND_ROTLI;
	}

      /* Can't use logical shift when low order bit is one.  */
      if (shf & 1)
	shf = 0;
      else
	shf >>= 1;

      if (shf != 0 && const_ok_for_cskyv2 (shf)
	  && !CSKY_CONST_OK_FOR_MOVIH (shf))
	{
	  *y = i;
	  *x = shf;
	  return IC_APPEND_LSLI;
	}
    }

  /* One immediate generate instruction, and one ixh.  */
  if (CSKY_ISA_FEATURE (E2)
      && (value % 3) == 0
      && const_ok_for_cskyv2 (value / 3)
      && !CSKY_CONST_OK_FOR_MOVIH (value / 3))
    {
      *x = value / 3;
      return IC_APPEND_IXH;
    }

  /* One immediate generate instruction, and one ixw.  */
  if (CSKY_ISA_FEATURE (E2)
      && (value % 5) == 0
      && const_ok_for_cskyv2 (value / 5)
      && !CSKY_CONST_OK_FOR_MOVIH (value / 5))
    {
      *x = value / 5;
      return IC_APPEND_IXW;
    }

  /* Generate movih + bseti.  */
  if (CSKY_CONST_OK_FOR_Ub (value & 0xffff))
    {
      *x = value & 0xffff0000;
      *y = value & 0xffff;
      return IC_APPEND_BSETI;
    }

  /* Generate movih + not.  */
  if (CSKY_CONST_OK_FOR_MOVIH (value_invert))
    {
      *x = value_invert;
      return IC_APPEND_NOT;
    }

  /* One movih, and one 16bits addi or subi.  */
  for (i = 1; i <= 32; i++)
    {
      if (CSKY_CONST_OK_FOR_MOVIH (value - i))
	{
	  *x = value - i;
	  *y = i;
	  return IC_APPEND_ADDI;
	}

      if (CSKY_CONST_OK_FOR_MOVIH (value + i))
	{
	  *x = value + i;
	  *y = i;
	  return IC_APPEND_SUBI;
	}
    }

  /* One movih, and one bseti or bclri.  */
  bit = 0x80000000ULL;
  for (i = 0; i <= 31; i++)
    {
      if (CSKY_CONST_OK_FOR_MOVIH (value & ~bit))
	{
	  *y = bit;
	  *x = value & ~bit;
	  return IC_APPEND_BSETI;
	}

      if (CSKY_CONST_OK_FOR_MOVIH (value | bit))
	{
	  *y = ~bit & 0xffffffff;
	  *x = value | bit;
	  return IC_APPEND_BCLRI;
	}

       bit >>= 1;
    }

  /* One movih, and one rotli or lsli.  */
  shf = value;
  rot = value;
  for (i = 1; i < 31; i++)
    {
      int c;

      /* Rotate left.  */
      c = rot << 31;
      rot >>= 1;
      rot &= 0x7FFFFFFF;
      rot |= c;

      if (CSKY_CONST_OK_FOR_MOVIH (rot))
	{
	  *y = i;
	  *x = rot;
	  return IC_APPEND_ROTLI;
	}

      /* Can't use logical shift when low order bit is one.  */
      if (shf & 1)
	shf = 0;
      else
	shf >>= 1;

      if (shf != 0 && CSKY_CONST_OK_FOR_MOVIH (shf))
	{
	  *y = i;
	  *x = shf;
	  return IC_APPEND_LSLI;
	}
    }

  return IC_UNINLINABLE;
}


/* Actually output a constant using a trick.
   FIXME: I think this would be better handled by a splitter than at the
   asm output level.  */

static const char *
csky_output_inline_const (machine_mode mode, rtx operands[])
{
  HOST_WIDE_INT x = 0, y = 0;
  enum csky_inline_const_type trick_type;
  rtx out_operands[3];
  char buf[256];
  char load_op[128];
  const char *dst_fmt;
  HOST_WIDE_INT value = INTVAL (operands[1]);
  int ivalue = (int) value;
  unsigned int uvalue = (unsigned int) value;

  trick_type = try_csky_constant_tricks (value, &x, &y);
  /* lrw's are handled separately: Large inlinable constants never get
     turned into lrw's.  Our caller uses try_csky_constant_tricks to back
     off to an lrw rather than calling this routine.  */
  gcc_assert (trick_type != IC_UNINLINABLE);

  /* Operands: 0 = dst, 1 = load immedate., 2 = adjust immedate.  */
  out_operands[0] = operands[0];
  out_operands[1] = GEN_INT (x);
  if (trick_type != IC_SINGLE && trick_type != IC_APPEND_NOT)
    out_operands[2] = GEN_INT (y);

  /* Select dst format based on mode.  */
  if (mode == DImode && TARGET_BIG_ENDIAN)
    dst_fmt = "%R0";
  else
    dst_fmt = "%0";

  /* Try movi16: 0~31,movi32: 0~65535.  */
  if (CSKY_CONST_OK_FOR_I (x))
    sprintf (load_op, "movi\t%s, %%1", dst_fmt);
  /* Try exact power of two - 1.  */
  else if (CSKY_CONST_OK_FOR_Uc (x))
    sprintf (load_op, "bmaski\t%s, %%N1", dst_fmt);
  /* Try movih.  */
  else if (CSKY_CONST_OK_FOR_MOVIH (x))
    sprintf (load_op, "movih\t%s, %%H1", dst_fmt);
  else
    {
      sprintf (load_op, "BADMOVI-inline_const %s, %%1", dst_fmt);
      gcc_unreachable ();
    }

  switch (trick_type)
    {
    case IC_SINGLE:
      strcpy (buf, load_op);
      break;
    /* Add instruction 'not'.  */
    case IC_APPEND_NOT:
      sprintf (buf, "%s\n\tnot\t%s, %s\t// %d 0x%x", load_op, dst_fmt,
	       dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'addi'.  */
    case IC_APPEND_ADDI:
      sprintf (buf, "%s\n\taddi\t%s, %s, %%2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'subi'.  */
    case IC_APPEND_SUBI:
      sprintf (buf, "%s\n\tsubi\t%s, %s, %%2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'addi', the last instruction is bgeni.  */
    case IC_BGENI_ADDI:
      sprintf (buf, "%s\n\taddi\t%s, %s, %%2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'subi', the last instruction is bgeni.  */
    case IC_BGENI_SUBI:
      sprintf (buf, "%s\n\tsubi\t%s, %s, %%2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'bseti'.  */
    case IC_APPEND_BSETI:
      sprintf (buf, "%s\n\tbseti\t%s, %s, %%P2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'movi'.  */
    case IC_APPEND_MOVI:
      sprintf (buf, "%s\n\tmovi\t%s, %%2\t// %d 0x%x", load_op, dst_fmt,
	       ivalue, uvalue);
      break;
    /* Add instruction 'bclri'.  */
    case IC_APPEND_BCLRI:
      sprintf (buf, "%s\n\tbclri\t%s, %s, %%Q2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'rotli'.  */
    case IC_APPEND_ROTLI:
      sprintf (buf, "%s\n\trotli\t%s, %s, %%2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'lsli'.  */
    case IC_APPEND_LSLI:
      sprintf (buf, "%s\n\tlsli\t%s, %s, %%2\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'ixh'.  */
    case IC_APPEND_IXH:
      sprintf (buf, "%s\n\tixh\t%s, %s, %s\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    /* Add instruction 'ixw'.  */
    case IC_APPEND_IXW:
      sprintf (buf, "%s\n\tixw\t%s, %s, %s\t// %d 0x%x", load_op,
	       dst_fmt, dst_fmt, dst_fmt, ivalue, uvalue);
      break;
    default:
      return "";
    }

  output_asm_insn (buf, out_operands);

  return "";
}

/* This is a helper function for the Uo constraint for movsi patterns.  */

bool
csky_inlinable_constant (HOST_WIDE_INT value)
{
  HOST_WIDE_INT x, y;
  return (!(CSKY_TARGET_ARCH (CK802) || CSKY_TARGET_ARCH (CK801))
	  && try_csky_constant_tricks (value, &x, &y));
}


/* Return true if the constant VAL can be expressed by an 8-bit constant
   with a shift value, filling in *BASE and *SHIFT.  */

bool
csky_shifted_imm8_constant (unsigned HOST_WIDE_INT val,
			    unsigned int *base, unsigned int *shift)
{
  unsigned HOST_WIDE_INT mask = 0xff;
  int i;
  val = val & (unsigned HOST_WIDE_INT) 0xffffffffu;
  if (val == 0)
    return 0;

  for (i = 0; i < 25; i++)
    if ((val & (mask << i)) == val)
      {
	if (base)
	  *base = (unsigned int) (val >> i);
	if (shift)
	  *shift = (unsigned int) i;
	return true;
      }

  return false;
}


/* Output a move of a word or less value.  */

const char *
csky_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
		  machine_mode mode ATTRIBUTE_UNUSED)
{
  rtx dst = operands[0];
  rtx src = operands[1];
  struct csky_address op0, op1;

  if (REG_P (dst))
    {
      /* The situation mov reg to reg.  */
      if (REG_P (src))
	{
	  int dstreg = REGNO (dst);
	  int srcreg = REGNO (src);

	  /* hilo registers exchange their places,
	     and their order of Dimode as same as other
	     general registers in LITTLE_ENDIAN mode.  */
	  if (TARGET_BIG_ENDIAN)
	    {
	      if (dstreg == CSKY_HI_REGNUM)
		return "mthi\t%1";
	      else if (dstreg == CSKY_LO_REGNUM)
		return "mtlo\t%1";
	      else if (srcreg == CSKY_HI_REGNUM)
		return "mfhi\t%0";
	      else if (srcreg == CSKY_LO_REGNUM)
		return "mflo\t%0";
	    }
	  else
	    {
	      if (dstreg == CSKY_HI_REGNUM)
		return "mtlo\t%1";
	      else if (dstreg == CSKY_LO_REGNUM)
		return "mthi\t%1";
	      else if (srcreg == CSKY_HI_REGNUM)
		return "mflo\t%0";
	      else if (srcreg == CSKY_LO_REGNUM)
		return "mfhi\t%0";
	    }

	    if (CSKY_VREG_P (dstreg) && CSKY_VREG_P (srcreg))
	      return "fmovs\t%0, %1";
	    if (CSKY_VREG_P (dstreg))
	      return "fmtvrl\t%0, %1";
	    if (CSKY_VREG_P (srcreg))
	      return "fmfvrl\t%0, %1";

	    if (REGNO (src) == CSKY_CC_REGNUM)
	      return "mvc\t%0";
	    else
	      return "mov\t%0, %1";
	}
      /* The situation mov memory to reg.  */
      else if (GET_CODE (src) == MEM)
	{
	  decompose_csky_address (XEXP (src, 0), &op1);

	  if (op1.index)
	    switch (GET_MODE (src))
	      {
	      case E_HImode:
		return "ldr.h\t%0, %1";
	      case E_QImode:
		return "ldr.b\t%0, %1";
	      case E_SImode:
	      case E_SFmode:
		if (CSKY_VREG_P (REGNO (dst)))
		  return "fldrs\t%0, %1";
		else
		  return "ldr.w\t%0, %1";
	      default:
		gcc_unreachable ();
	      }
	  /* Generate lrw rx, [LABEL].  This happens when the compiler
	     generates constant pool references and uses lrw to get the
	     constant into memory.  */
	  else if (op1.label)
	    return "lrw\t%0, %1";
	  /* Generate lrs.w rx, [symbol@GOT/PLT].  */
	  else if (flag_pic == 1 && op1.disp && GET_CODE (op1.disp) == UNSPEC)
	    return "lrs.w\t%0, %1";
	  else
	    switch (GET_MODE (src))
	      {
	      case E_HImode:
		return "ld.h\t%0, %1";
	      case E_QImode:
		return "ld.b\t%0, %1";
	      case E_SFmode:
	      case E_SImode:
		if (CSKY_VREG_P (REGNO (dst)))
		  return "flds\t%0, %1";
		else
		  return "ld.w\t%0, %1";
	      default:
		gcc_unreachable ();
	      }
	}
      /* The situation mov integer to reg.  */
      else if (GET_CODE (src) == CONST_INT ||
	       (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode))
	{
	  HOST_WIDE_INT x, y;
	  const REAL_VALUE_TYPE *d;
	  long l;

	  if (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode)
	    {
	      d = CONST_DOUBLE_REAL_VALUE (src);
	      REAL_VALUE_TO_TARGET_SINGLE (*d, l);
	      operands[1] = GEN_INT (l);
	      src = operands[1];
	    }

	  if (try_csky_constant_tricks (INTVAL (src), &x, &y))
	    return csky_output_inline_const (SImode, operands);
	  /* Return '#' to split it.  */
	  else if (CSKY_CONST_OK_FOR_T (INTVAL (src)))
	    return "#";
	  else
	    return "lrw\t%0, %x1\t";
	}
      else if (TARGET_ANCHOR && GET_CODE (src) == SYMBOL_REF)
	{
	  if (SYMBOL_REF_FUNCTION_P (src))
	    return "lrw\t%0, %1@BTEXT";
	  else
	    return "lrw\t%0, %1@BDATA";
	}
      else if (GET_CODE (src) == UNSPEC
	       && XINT (src, 1) == UNSPEC_PIC_SYMBOL_GRS)
	return "grs\t%0, %1";
      else
	return "lrw\t%0, %1";
    }
  else if (GET_CODE (dst) == MEM)
    {
      decompose_csky_address (XEXP (dst, 0), &op0);

      if (op0.index)
	switch (GET_MODE (src))
	  {
	  case E_HImode:
	    return "str.h\t%1, %0";
	  case E_QImode:
	    return "str.b\t%1, %0";
	  case E_SFmode:
	  case E_SImode:
	    if (CSKY_VREG_P (REGNO (src)))
	      return "fstrs\t%1, %0";
	    else
	      return "str.w\t%1, %0";
	  default:
	    gcc_unreachable ();
	  }
      else
	switch (GET_MODE (dst))
	  {
	  case E_HImode:
	    return "st.h\t%1, %0";
	  case E_QImode:
	    return "st.b\t%1, %0";
	  case E_SImode:
	  case E_SFmode:
	    if (CSKY_VREG_P (REGNO (src)))
	      return "fsts\t%1, %0";
	    else
	      return "st.w\t%1, %0";
	  default:
	    gcc_unreachable ();
	  }
    }

  gcc_unreachable ();
}


/* Output a move of a word or less value.  Specific for ck801.  */

const char *
csky_output_ck801_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
			machine_mode mode ATTRIBUTE_UNUSED)
{
  rtx dst = operands[0];
  rtx src = operands[1];
  struct csky_address op1;

  if (REG_P (dst))
    {
      if (REG_P (src))
	return "mov\t%0, %1";
      else if (GET_CODE (src) == MEM)
	{
	  decompose_csky_address (XEXP (src, 0), &op1);

	  /* Generate lrw rx, [LABEL].  This happens when the compiler
	     generates constant pool references and uses lrw to get the
	     constant in memory.  */
	  if (op1.label)
	    return "lrw\t%0, %1";
	  else
	    switch (GET_MODE (src))
	      {
	      case E_HImode:
		return "ld.h\t%0, %1";
	      case E_QImode:
		return "ld.b\t%0, %1";
	      case E_SFmode:
	      case E_SImode:
		return "ld.w\t%0, %1";
	      default:
		gcc_unreachable ();
	      }
	}
      else if (GET_CODE (src) == CONST_INT)
	{
	  if (REGNO (dst) > 7)
	    return "lrw\t%0, %x1\t";
	  else if (CSKY_CONST_OK_FOR_N (INTVAL (src) + 1))
	    return "movi\t%0, %1";
	  /* Return '#' to split it.  */
	  else if (CSKY_CONST_OK_FOR_T (INTVAL (src)))
	    return "#";
	  else if (csky_shifted_imm8_constant (INTVAL (src), NULL, NULL))
	    return "#";
	  else
	    return "lrw\t%0, %x1\t";
	}
      else if (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode)
	{
	  const REAL_VALUE_TYPE *d;
	  long l;

	  d = CONST_DOUBLE_REAL_VALUE (src);
	  REAL_VALUE_TO_TARGET_SINGLE (*d, l);
	  operands[1] = GEN_INT (l);
	  src = operands[1];

	  if (CSKY_CONST_OK_FOR_N (INTVAL (src) + 1))
	    return "movi\t%0, %1";
	  else
	    return "lrw\t%0, %x1\t";
	}
      else if (TARGET_ANCHOR && GET_CODE (src) == SYMBOL_REF)
	{
	  if (SYMBOL_REF_FUNCTION_P (src))
	    return "lrw\t%0, %1@BTEXT";
	  else
	    return "lrw\t%0, %1@BDATA";
	}
      else
	return "lrw\t%0, %1";
    }
  else if (GET_CODE (dst) == MEM)
    switch (GET_MODE (dst))
      {
      case E_HImode:
	return "st.h\t%1, %0";
      case E_QImode:
	return "st.b\t%1, %0";
      case E_SImode:
      case E_SFmode:
	return "st.w\t%1, %0";
      default:
	gcc_unreachable ();
      }

  gcc_unreachable ();
}


/* Return a sequence of instructions to perform DI or DF move.
   Since the CSKY cannot move a DI or DF in one instruction, we have
   to take care when we see overlapping source and dest registers.  */

const char *
csky_output_movedouble (rtx operands[],
			machine_mode mode ATTRIBUTE_UNUSED)
{
  rtx dst = operands[0];
  rtx src = operands[1];

  if (REG_P (dst))
    {
      if (REG_P (src))
	{
	  int dstreg = REGNO (dst);
	  int srcreg = REGNO (src);

	  if (CSKY_HILO_REG_P (srcreg))
	    {
	      if (TARGET_BIG_ENDIAN)
		return "mfhi\t%0\n\tmflo\t%R0";
	      else
		return "mfhi\t%R0\n\tmflo\t%0";
	    }
	  else if (CSKY_HILO_REG_P (dstreg))
	    {
	      if (TARGET_BIG_ENDIAN)
		return "mthi\t%1\n\tmtlo\t%R1";
	      else
		return "mthi\t%R1\n\tmtlo\t%1";
	    }
	  else if (CSKY_VREG_P (srcreg) && CSKY_VREG_P (dstreg))
	    return "fmovd\t%0, %1";
	  else if (CSKY_VREG_P (srcreg))
	    {
	      /* Since the vector registers in fpuv2_soft processors
		 like ck803f are 32 bits wide, just one insn is needed
		 to complete the move operation.  */
	      if (TARGET_SOFT_FPU)
		  return "fmfvrl\t%0, %1";
	      else if (TARGET_BIG_ENDIAN)
		return "fmfvrh\t%0, %1\n\tfmfvrl\t%R0, %1";
	      else
		return "fmfvrh\t%R0, %1\n\tfmfvrl\t%0, %1";
	    }
	  else if (CSKY_VREG_P (dstreg))
	    {
	      if (TARGET_SOFT_FPU)
		return "fmtvrl\t%0, %1";
	      else if (TARGET_BIG_ENDIAN)
		return "fmtvrh\t%0, %1\n\tfmtvrl\t%0, %R1";
	      else
		return "fmtvrh\t%0, %R1\n\tfmtvrl\t%0, %1";
	    }

	  /* Ensure the second source not overwritten.  */
	  if (srcreg + 1 == dstreg)
	    return "mov\t%R0, %R1\n\tmov\t%0, %1";
	  else
	    return "mov\t%0, %1\n\tmov\t%R0, %R1";
	}
      else if (GET_CODE (src) == MEM)
	{
	  rtx memexp = XEXP (src, 0);
	  int dstreg = REGNO (dst);
	  int basereg = -1;
	  struct csky_address op0;

	  decompose_csky_address (XEXP (src, 0), &op0);

	  if (GET_CODE (memexp) == LABEL_REF
	      || (GET_CODE (memexp) == CONST
		  && GET_CODE (XEXP (memexp, 0)) == PLUS
		  && GET_CODE (XEXP (XEXP (memexp, 0), 0)) == LABEL_REF))
	    return "lrw\t%0, [%1]\n\tlrw\t%R0, [%R1]";
	  else if (GET_CODE (memexp) == REG)
	    basereg = REGNO (memexp);
	  else if (GET_CODE (memexp) == PLUS)
	    {
	      if (GET_CODE (XEXP (memexp, 0)) == REG)
		basereg = REGNO (XEXP (memexp, 0));
	      else if (GET_CODE (XEXP (memexp, 1)) == REG)
		basereg = REGNO (XEXP (memexp, 1));
	      else
		gcc_unreachable ();
	    }
	  else
	    gcc_unreachable ();


	  /* When FPUV2.  */
	  if (CSKY_VREG_P (dstreg))
	    {
	      if (op0.index)
		return "fldrd\t%0, %1";
	      else
		return "fldd\t%0, %1";
	    }
	  /* FIXME length attribute is wrong here.  */
	  if (dstreg == basereg)
	    /* Just load them in reverse order.  */
	    return "ld.w\t%R0, %R1\n\tld.w\t%0, %1";
	  else
	    return "ld.w\t%0, %1\n\tld.w\t%R0, %R1";
	}
      else if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE)
	{
	  split_double (src, operands + 2, operands + 3);

	  if (CSKY_CONST_OK_FOR_I (INTVAL (operands[2])))
	    output_asm_insn ("movi\t%0, %2", operands);
	  else if (CSKY_CONST_OK_FOR_Uc (INTVAL (operands[2])))
	    output_asm_insn ("bmaski\t%0, %N2", operands);
	  else if (CSKY_CONST_OK_FOR_Ub (INTVAL (operands[2])))
	    output_asm_insn ("bgeni\t%0, %P2", operands);
	  else
	    output_asm_insn ("lrw\t%0, %2", operands);

	  if (CSKY_CONST_OK_FOR_I (INTVAL (operands[3])))
	    output_asm_insn ("movi\t%R0, %3", operands);
	  else if (CSKY_CONST_OK_FOR_Uc (INTVAL (operands[3])))
	    output_asm_insn ("bmaski\t%R0, %N3", operands);

	  else if (CSKY_CONST_OK_FOR_Ub (INTVAL (operands[3])))
	    output_asm_insn ("bgeni\t%R0, %P3", operands);
	  else
	    output_asm_insn ("lrw\t%R0, %3", operands);

	  return "";
	}
      else
	gcc_unreachable ();
    }
  else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG)
    {
      rtx memexp = XEXP (dst, 0);
      int srcreg = REGNO (src);
      int basereg = -1;
      struct csky_address op0;

      decompose_csky_address (XEXP (dst, 0), &op0);

      if (GET_CODE (memexp) == REG)
	basereg = REGNO (memexp);
      else if (GET_CODE (memexp) == PLUS)
	{
	  if (GET_CODE (XEXP (memexp, 0)) == REG)
	    basereg = REGNO (XEXP (memexp, 0));
	  else if (GET_CODE (XEXP (memexp, 1)) == REG)
	    basereg = REGNO (XEXP (memexp, 1));
	  else
	    gcc_unreachable ();
	}
      else
	gcc_unreachable ();

      /* When FPUV2.  */
      if (CSKY_VREG_P (srcreg))
	{
	  if (op0.index)
	    return "fstrd\t%1, %0";
	  else
	    return "fstd\t%1, %0";
	}
      /* FIXME length attribute is wrong here.  */
      if (srcreg == basereg)
	/* Just load them in reverse order.  */
	return "st.w\t%R1, %R0\n\tst.w\t%1, %0";
      else
	return "st.w\t%1, %0\n\tst.w\t%R1, %R0";
    }
  else
    gcc_unreachable ();
}


const char *
csky_output_ck801_movedouble (rtx operands[],
			      machine_mode mode ATTRIBUTE_UNUSED)
{
  rtx dst = operands[0];
  rtx src = operands[1];

  if (REG_P (dst))
    {
      if (REG_P (src))
	{
	  int dstreg = REGNO (dst);
	  int srcreg = REGNO (src);

	  /* Ensure the second source not overwritten.  */
	  if (srcreg + 1 == dstreg)
	    return "mov\t%R0, %R1\n\tmov\t%0, %1";
	  else
	    return "mov\t%0, %1\n\tmov\t%R0, %R1";
	}
      else if (GET_CODE (src) == MEM)
	{
	  rtx memexp = XEXP (src, 0);
	  int dstreg = REGNO (dst);
	  int basereg = -1;
	  struct csky_address op0;

	  decompose_csky_address (XEXP (src, 0), &op0);

	  if (GET_CODE (memexp) == LABEL_REF
	      || (GET_CODE (memexp) == CONST
		  && GET_CODE (XEXP (memexp, 0)) == PLUS
		  && GET_CODE (XEXP (XEXP (memexp, 0), 0)) == LABEL_REF))
	    return "lrw\t%0, [%1]\n\tlrw\t%R0, [%R1]";
	  else if (GET_CODE (memexp) == REG)
	    basereg = REGNO (memexp);
	  else if (GET_CODE (memexp) == PLUS)
	    {
	      if (GET_CODE (XEXP (memexp, 0)) == REG)
		basereg = REGNO (XEXP (memexp, 0));
	      else if (GET_CODE (XEXP (memexp, 1)) == REG)
		basereg = REGNO (XEXP (memexp, 1));
	      else
		gcc_unreachable ();
	    }
	  else
	    gcc_unreachable ();

	  /* FIXME length attribute is wrong here.  */
	  if (dstreg == basereg)
	    /* Just load them in reverse order.  */
	    return "ld.w\t%R0, %R1\n\tld.w\t%0, %1";
	  else
	    return "ld.w\t%0, %1\n\tld.w\t%R0, %R1";
	}
      else if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE)
	{
	  split_double (src, operands + 2, operands + 3);

	  if (REGNO (dst) <= 7
	      && CSKY_CONST_OK_FOR_N (INTVAL (operands[2]) + 1))
	    output_asm_insn ("movi\t%0, %2", operands);
	  else
	    output_asm_insn ("lrw\t%0, %2", operands);


	  if (REGNO (dst) <= 6
	      && CSKY_CONST_OK_FOR_N (INTVAL (operands[3]) + 1))
	    output_asm_insn ("movi\t%R0, %3", operands);
	  else
	    output_asm_insn ("lrw\t%R0, %3", operands);

	  return "";


	}
      else
	gcc_unreachable ();
    }
  else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG)
    {
      rtx memexp = XEXP (dst, 0);
      int srcreg = REGNO (src);
      int basereg = -1;
      struct csky_address op0;

      decompose_csky_address (XEXP (dst, 0), &op0);

      if (GET_CODE (memexp) == REG)
	basereg = REGNO (memexp);
      else if (GET_CODE (memexp) == PLUS)
	{
	  if (GET_CODE (XEXP (memexp, 0)) == REG)
	    basereg = REGNO (XEXP (memexp, 0));
	  else if (GET_CODE (XEXP (memexp, 1)) == REG)
	    basereg = REGNO (XEXP (memexp, 1));
	  else
	    gcc_unreachable ();
	}
      else
	gcc_unreachable ();

      /* FIXME length attribute is wrong here.  */
      if (srcreg == basereg)
	/* Just load them in reverse order.  */
	return "st.w\t%R1, %R0\n\tst.w\t%1, %0";
      else
	return "st.w\t%1, %0\n\tst.w\t%R1, %R0";
    }
  else
    gcc_unreachable ();
}

/* Split operands for an AND expression when OPERANDS[2] is a constant.
   Note operands[0] is marked earlyclobber in this case and can be
   overwritten.  Return true if "DONE", false otherwise.  */
bool
csky_split_and (rtx *operands)
{
  HOST_WIDE_INT mask = INTVAL (operands[2]);
  rtx not_value = GEN_INT (~mask);
  int i;

  /* All zeros or all ones can be handled by a move instruction.  */
  if (mask == 0)
    {
      emit_move_insn (operands[0], const0_rtx);
      return true;
    }
  if (mask == -1)
    {
      emit_move_insn (operands[0], operands[1]);
      return true;
    }

  /* Check for constants that can be handled directly by the 32-bit andi
     instruction.  */
  if (CSKY_ISA_FEATURE (E2) && csky_arith_O_operand (operands[2], SImode))
    return false;

  /* Try to transform to andni instruction.  */
  if (CSKY_ISA_FEATURE (E2) && csky_arith_O_operand (not_value, SImode))
    {
      emit_insn (gen_cskyv2_andnsi3 (operands[0], not_value, operands[1]));
      return true;
    }

  /* If there are only one or two 0 bits in the constant, we can
     replace the operation with bclri instructions on those bits.
     Note CK801 has only the 16-bit bclri that operates on a single
     register, so we must count a move if we are post-reload.  */
  if (popcount_hwi (~mask & 0xffffffff)
      <= (reload_completed && !CSKY_ISA_FEATURE (E2) ? 1 : 2))
    {
      rtx input = operands[1];

      if (!CSKY_ISA_FEATURE (E2))
	{
	  emit_move_insn (operands[0], input);
	  input = operands[0];
	}

      for (i = 0; i < 32; i++)
	if ((mask & (1 << i)) == 0x0)
	  {
	    emit_insn (gen_bclri (operands[0], input, GEN_INT (i)));
	    input = operands[0];
	  }
      return true;
    }

  /* If the constant mask is outside the [0, 4095] range for
     constraint O, or if constraint O is not allowed (ck801),
     maybe the constant is a contiguous bit range that we can
     handle by bit extract (low bits) or shifts (high bits).  */
  for (i = (CSKY_ISA_FEATURE (E2) ? 13 : 1); i < 32; i++)
    {
      if ((((HOST_WIDE_INT) 1) << i) - 1 == mask)
	{
	  if (CSKY_ISA_FEATURE (2E3))
	    emit_insn (gen_cskyv2_extzv (operands[0], operands[1],
					 GEN_INT (i), const0_rtx));
	  else
	    {
	      rtx shift = GEN_INT (32 - i);
	      rtx reg = (reload_completed
			 ? operands[0] : gen_reg_rtx (SImode));

	      emit_insn (gen_ashlsi3 (reg, operands[1], shift));
	      emit_insn (gen_lshrsi3 (operands[0], reg, shift));
	    }
	  return true;
	}
      else if ((((HOST_WIDE_INT) 1) << i) - 1 == ~mask)
	{
	  rtx shift = GEN_INT (i);
	  rtx reg = (reload_completed
		     ? operands[0] : gen_reg_rtx (SImode));

	  emit_insn (gen_lshrsi3 (reg, operands[1], shift));
	  emit_insn (gen_ashlsi3 (operands[0], reg, shift));
	  return true;
	}
    }

  /* If the constant is a negative number, it seems better to use
     andn and copy the NOT_VALUE to a register instead of the
     original value, since the NOT_VALUE is always smaller and thus
     more likely to be representable as a small constant.
     This transformation can only be done before reload because
     it requires a temporary.  Hopefully register allocation can get
     rid of the extra move required for CK801.  */
  if (!reload_completed && INTVAL (operands[2]) < 0)
    {
      rtx reg = copy_to_mode_reg (SImode, not_value);

      if (CSKY_ISA_FEATURE (E2))
	emit_insn (gen_cskyv2_andnsi3 (operands[0], reg, operands[1]));
      else
	{
	  emit_move_insn (operands[0], operands[1]);
	  emit_insn (gen_ck801_andnsi3 (operands[0], reg, operands[0]));
	}
      return true;
    }

  /* If the above ways are all not working, move the constant
     to a register.  We can clobber operands[0] as it is
     marked earlyclobber in the insn constraints, but then we have to
     swap operands 1 and 2 to match the constraints on the 2-operand
     16-bit and instruction.  */
  if (reload_completed)
    {
      emit_move_insn (operands[0], operands[2]);
      operands[2] = operands[1];
      operands[1] = operands[0];
    }
  else
    operands[2] = copy_to_mode_reg (SImode, operands[2]);
  return false;
}

/* Split operands for an IOR expression when OPERANDS[2] is a constant.
   Note operands[0] is marked earlyclobber in this case and can be
   overwritten.  Return true if "DONE", false otherwise.  */
bool
csky_split_ior (rtx *operands)
{
  HOST_WIDE_INT mask = INTVAL (operands[2]);
  int i;

  /* All zeros or all ones can be handled by a move instruction.  */
  if (mask == 0)
    {
      emit_move_insn (operands[0], operands[1]);
      return true;
    }
  if (mask == -1)
    {
      emit_move_insn (operands[0], gen_int_mode (-1, SImode));
      return true;
    }

  /* Check for constants that can be handled directly by the 32-bit ori
     instruction.  */
  if (CSKY_ISA_FEATURE (E2) && csky_literal_I_operand (operands[2], SImode))
    return false;

  /* If there are only one or two 1 bits in the value, we can replace
     the operation with bseti instructions to set those bits.
     Note CK801 has only the 16-bit bclri that operates on a single
     register, so we must count a move if we are post-reload.  */
  if (popcount_hwi (mask & 0xffffffff)
      <= (reload_completed && !CSKY_ISA_FEATURE (E2) ? 1 : 2))
    {
      rtx input = operands[1];

      if (!CSKY_ISA_FEATURE (E2))
	{
	  emit_move_insn (operands[0], input);
	  input = operands[0];
	}

      for (i = 0; i < 32; i++)
	if (mask & (1 << i))
	  {
	    emit_insn (gen_bseti (operands[0], input, GEN_INT (i)));
	    input = operands[0];
	  }
      return true;
    }

  /* If the above ways are all not working, move the constant
     to a register.  We can clobber operands[0] as it is
     marked earlyclobber in the insn constraints, but then we have to
     swap operands 1 and 2 to match the constraints on the 2-operand
     16-bit ior instruction.  */
  if (reload_completed)
    {
      emit_move_insn (operands[0], operands[2]);
      operands[2] = operands[1];
      operands[1] = operands[0];
    }
  else
    operands[2] = copy_to_mode_reg (SImode, operands[2]);
  return false;
}


/* Split operands for an XOR expression when OPERANDS[2] is a constant.
   Note operands[0] is marked earlyclobber in this case and can be
   overwritten.  Return true if "DONE", false otherwise.  */
bool
csky_split_xor (rtx *operands)
{
  HOST_WIDE_INT mask = INTVAL (operands[2]);

  /* All zeros can be turned into move instruction.  */
  if (mask == 0)
    {
      emit_move_insn (operands[0], operands[1]);
      return true;
    }

  /* All ones can be turned into a bitwise not.  */
  if (mask == -1)
    {
      if (CSKY_ISA_FEATURE (E2))
	emit_insn (gen_cskyv2_one_cmplsi2 (operands[0], operands[1]));
      else
	{
	  emit_move_insn (operands[0], operands[1]);
	  emit_insn (gen_ck801_one_cmplsi2 (operands[0], operands[0]));
	}
      return true;
    }

  /* Check for constants that can be handled directly by the 32-bit xori
     instruction.  */
  if (CSKY_ISA_FEATURE (E2) && csky_arith_O_operand (operands[2], SImode))
    return false;

  /* If the above ways are all not working, move the constant
     to a register.  We can clobber operands[0] as it is
     marked earlyclobber in the insn constraints, but then we have to
     swap operands 1 and 2 to match the constraints on the 2-operand
     16-bit ior instruction.  */
  if (reload_completed)
    {
      emit_move_insn (operands[0], operands[2]);
      operands[2] = operands[1];
      operands[1] = operands[0];
    }
  else
    operands[2] = copy_to_mode_reg (SImode, operands[2]);
  return false;
}


/* Return true if X is an address form involving a symbol or label ref.  */
bool
csky_symbolic_address_p (rtx x)
{
  switch (GET_CODE (x))
    {
    case SYMBOL_REF:
    case LABEL_REF:
      return 1;
    case CONST:
      x = XEXP (x, 0);
      return ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
	       || GET_CODE (XEXP (x, 0)) == LABEL_REF)
	      && GET_CODE (XEXP (x, 1)) == CONST_INT);
    default:
      return 0;
    }
}


/* Emit a comparison instruction.
   Return true if an inverted comparison is generated.  */

bool
csky_emit_compare (enum rtx_code code, rtx op0, rtx op1)
{
  bool invert;
  rtx cc_reg = gen_rtx_REG (CCmode, CSKY_CC_REGNUM);

  if (GET_CODE (op1) == CONST_INT)
    {
      HOST_WIDE_INT val = INTVAL (op1);

      switch (code)
	{
	case GTU:
	  /* Unsigned (GTU 0) is the same as (NE 0); everything else is
	     converted below to LEU (reversed cmphs).  */
	  if (val == 0)
	    code = NE;
	  /* Check whether (GTU A imm) can become (GEU A  imm + 1).  */
	  else if (TARGET_MINI_REGISTERS
		   ? CSKY_CONST_OK_FOR_J (val + 1)
		   : CSKY_CONST_OK_FOR_Uk (val + 1))
	    {
	      op1 = GEN_INT (val + 1);
	      code = GEU;
	    }
	  break;
	/* Check whether (LE A imm) can become (LT A imm + 1),
	   or (GT A imm) can become (GE A imm + 1).  */
	case GT:
	case LE:
	  if (TARGET_MINI_REGISTERS
	      ? CSKY_CONST_OK_FOR_J (val + 1)
	      : CSKY_CONST_OK_FOR_Uk (val + 1))
	    {
	      op1 = GEN_INT (val + 1);
	      code = code == LE ? LT : GE;
	    }
	  break;

	default:
	  break;
	}
    }

  if (CONSTANT_P (op1) && GET_CODE (op1) != CONST_INT)
    op1 = force_reg (GET_MODE (op1), op1);

  /* cmpnei: 0-31 (K immediate)
     ti: 1-32 (J immediate, 0 using btsti x,31).  */
  invert = false;
  switch (code)
    {
      /* Use inverted condition, cmpne.  */
      case EQ:
	code = NE;
	invert = true;
      /* Fall through.  */
      /* Use normal condition, cmpne.  */
      case NE:
	if (GET_CODE (op1) == CONST_INT
	    && (TARGET_MINI_REGISTERS
		? !csky_literal_K_operand (op1, SImode)
		: !csky_literal_I_operand (op1, SImode)))
	  op1 = force_reg (SImode, op1);
      break;

      /* Use inverted condition, reversed cmplt.  */
      case LE:
	code = GT;
	invert = true;
      /* Fall through.  */
      /* Use normal condition, reversed cmplt.  */
      case GT:
	if (GET_CODE (op1) == CONST_INT)
	  op1 = force_reg (SImode, op1);
      break;

      /* Use inverted condition, cmplt.  */
      case GE:
	code = LT;
	invert = true;
      /* Fall through.  */
      /* Use normal condition, cmplt.  */
      case LT:
	/* covered by btsti x,31.  */
	if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0
	    && (TARGET_MINI_REGISTERS
		? !csky_literal_J_operand (op1, SImode)
		: !csky_literal_Uk_operand (op1, SImode)))
	  op1 = force_reg (SImode, op1);
	break;

      /* Use inverted condition, cmple.  */
      case GTU:
	/* We coped with unsigned > 0 above.  */
	gcc_assert (GET_CODE (op1) != CONST_INT || INTVAL (op1) != 0);
	code = LEU;
	invert = true;
      /* Fall through.  */
      /* Use normal condition, reversed cmphs.  */
      case LEU:
	if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
	  op1 = force_reg (SImode, op1);
	break;

      /* Use inverted condition, cmphs.  */
      case LTU:
	code = GEU;
	invert = true;
      /* Fall through.  */
      /* Use normal condition, cmphs.  */
      case GEU:
	if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0
	    && (TARGET_MINI_REGISTERS
		? !csky_literal_J_operand (op1, SImode)
		: !csky_literal_Uk_operand (op1, SImode)))
	  op1 = force_reg (SImode, op1);
      break;

    default:
      break;
    }

  emit_insn (gen_rtx_SET (cc_reg,
			  gen_rtx_fmt_ee (code, CCmode, op0, op1)));
  return invert;
}

/* Return true if push/pop can be used to save/restore all the registers
   indicated by MASK.  We currently don't attempt to handle situations where
   some of the registers could be handled by push/pop and others saved and
   restored individually.  */

static bool
csky_can_use_pushpop (unsigned int mask)
{
  int i;
  int end_reg;

  if (!TARGET_PUSHPOP)
    return false;

  if (mask == 0)
    return false;

  /* Regs 0-3, 12-14, 18-27, 29-31 cannot be in the mask.  */
  if (mask & 0xeffc700f)
    return false;

  /* Regs in the range r4-r11 must be contiguous.  */
  for (end_reg = 0, i = 11; i >= 4; i--)
    {
      if (!end_reg && (mask & (1 << i)))
	end_reg = i;
      if (end_reg && !(mask & (1 << i)))
	return false;
    }

  /* Likewise for regs in the range r16-r17.  */
  for (end_reg = 0, i = 17; i >= 16; i--)
    {
      if (!end_reg && (mask & (1 << i)))
	end_reg = i;
      if (end_reg && !(mask & (1 << i)))
	return false;
    }

  return true;
}


/* Return true if store/load multiple instructions can be used to
   save/restore at least some of the registers indicated by MASK.
   Unlike the push/pop case, this does handle partial ranges.
   Set *BR and *ER to the beginning and end (respectively) of the
   register range that can be handled.  */

static bool
csky_can_use_ldstm (int mask, int *br, int *er)
{
  int regno;
  int begin_reg = 0, end_reg = 0;
  int count = 0;

  if (!TARGET_MULTIPLE_STLD)
    return false;

  /* We'll only handle registers in the range 4-11, the contiguous range
     of caller-saved registers.  Higher-numbered registers are handled
     individually in addition to this, but we'll give up on doing ldstm
     entirely if we need to save/restore the low-numbered EH registers.  */
  if (mask & 0xf)
    return false;

  for (regno = 4; regno <= 11; regno++)
    {
      if (mask & 1 << regno)
	{
	  if (!begin_reg)
	    begin_reg = regno;
	  end_reg = regno;
	  count++;
	}
      else if (begin_reg)
	break;
    }

  if (count >= CSKY_MIN_MULTIPLE_STLD && count <= CSKY_MAX_MULTIPLE_STLD)
    {
      if (br)
	*br = begin_reg;
      if (er)
	*er = end_reg;
      return true;
    }
  return false;
}


const char *
csky_output_return_instruction (void)
{
  unsigned long func_type = get_csky_current_func_type ();

  if (CSKY_FUNCTION_IS_NAKED (func_type))
    return "";
  if (CSKY_FUNCTION_IS_INTERRUPT (func_type))
    return "ipop\n\tnir\n";
  else
    return "rts\n";
}


/* Adjust the stack pointer by OFFSET bytes.  OFFSET is negative if this
   is in the prologue, positive if in the epilogue.  This may require
   multiple instructions and/or use of CSKY_STACKADJUST_REGNUM as
   a scratch register.  Emit CFA notes as appropriate.  */
static void
expand_csky_stack_adjust (int offset)
{
  rtx set;
  rtx_insn *insn;
  int size = (offset > 0 ? offset : -offset);

  if (offset == 0)
    return;

  /* If OFFSET is too large for addi/subi, load it into
     CSKY_STACKADJUST_REGNUM and use a register add/sub instead.
     This case is not mentioned in the ABI documentation, but it is
     supported by GDB prologue analysis provided that the instruction(s)
     to initialize CSKY_STACKADJUST_REGNUM appear directly before
     the sub.  Depending on the value of OFFSET, this might be a
     lrw instruction or the "tricks" used by csky_output_inline_const to
     encode special-case integer constants.  */
  if (size > CSKY_MAX_SP_ADJUST * 2)
    {
      rtx tmp, dwarf;

      /* We should have reserved the scratch register already in
	 csky_layout_stack_frame.  */
      gcc_assert (cfun->machine->reg_size != 0
		  && (cfun->machine->reg_mask
		      & (1 << CSKY_STACKADJUST_REGNUM)));

      /* Prevent the optimizer from reordering these instructions to
	 keep GDB happy.  */
      if (!flag_sched_prolog)
	emit_insn (gen_blockage ());

      tmp = gen_rtx_REG (SImode, CSKY_STACKADJUST_REGNUM);
      emit_move_insn (tmp, GEN_INT (size));

      if (offset > 0)
	set = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp);
      else
	set = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp);
      insn = emit_insn (set);
      RTX_FRAME_RELATED_P (insn) = 1;
      dwarf = gen_rtx_SET (stack_pointer_rtx,
			   plus_constant (Pmode, stack_pointer_rtx, offset));
      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);

      /* More make GDB happy.  */
      if (!flag_sched_prolog)
	emit_insn (gen_blockage ());
    }

  /* Use one or two addi or subi insns to adjust stack.  */
  else
    while (size)
      {
	int delta = (size > CSKY_MAX_SP_ADJUST
		     ? CSKY_MAX_SP_ADJUST : size);

	if (offset > 0)
	  set = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
			    GEN_INT (delta));
	else
	  set = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
			    GEN_INT (delta));
	insn = emit_insn (set);
	RTX_FRAME_RELATED_P (insn) = 1;
	size -= delta;
      }
}


/* Generate and emit an insn that we will recognize as a push_multi.
   Unfortunately, since this insn does not reflect very well the actual
   semantics of the operation, we need to annotate the insn for the benefit
   of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
   MASK for registers that should be annotated for DWARF2 frame unwind
   information.  */

static rtx
emit_csky_regs_push (unsigned long mask)
{
  int num_regs = 0;
  int i, j;
  rtx par;
  rtx dwarf;
  rtx tmp;
  int dwarf_par_index;

  for (i = 0; i < CSKY_NGPR_REGS; i++)
    {
      if (mask & (1 << i))
	num_regs++;
    }

  /* The reg range for push is:r4-r11,r15-r17,r28.  */
  gcc_assert (num_regs && num_regs <= 12);

  /* For the body of the insn we are going to generate an UNSPEC in
     parallel with several USEs.  This allows the insn to be recognized
     by the push_multi pattern in the csky.md file.

     The body of the insn looks something like this:

       (parallel [
	   (set (mem:BLK (pre_modify:SI (reg:SI sp)
					(const_int:SI <num>)))
		(unspec:BLK [(reg:SI r4)] UNSPEC_PUSHPOP_MULT))
	   (use (reg:SI XX))
	   (use (reg:SI YY))
	   ...
	])

     For the frame note however, we try to be more explicit and actually
     show each register being stored into the stack frame, plus a (single)
     decrement of the stack pointer.  We do it this way in order to be
     friendly to the stack unwinding code, which only wants to see a single
     stack decrement per instruction.  The RTL we generate for the note looks
     something like this:

      (sequence [
	   (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
	   (set (mem:SI (reg:SI sp)) (reg:SI r4))
	   (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
	   (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
	   ...
	])

     FIXME:: In an ideal world the PRE_MODIFY would not exist and
     instead we'd have a parallel expression detailing all
     the stores to the various memory addresses so that debug
     information is more up-to-date. Remember however while writing
     this to take care of the constraints with the push instruction.

     Note also that this has to be taken care of for the VFP registers.

     For more see PR43399.  */

  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
  dwarf_par_index = 1;

  for (i = 0; i < CSKY_NGPR_REGS; i++)
    if (mask & (1 << i))
      {
	rtx reg = gen_rtx_REG (SImode, i);
	rtx addr = plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs);
	tmp = gen_frame_mem (BLKmode,
			     gen_rtx_PRE_MODIFY (Pmode,
						 stack_pointer_rtx, addr));
	XVECEXP (par, 0, 0)
	  = gen_rtx_SET (tmp,
			 gen_rtx_UNSPEC (BLKmode,
					 gen_rtvec (1, reg),
					 UNSPEC_PUSHPOP_MULT));
	tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
			   reg);
	RTX_FRAME_RELATED_P (tmp) = 1;
	XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;

	break;
      }

  for (j = 1, i++; j < num_regs; i++)
    if (mask & (1 << i))
      {
	rtx reg = gen_rtx_REG (SImode, i);
	rtx addr = plus_constant (Pmode, stack_pointer_rtx, 4 * j);
	tmp = gen_rtx_SET (gen_frame_mem (SImode, addr), reg);
	RTX_FRAME_RELATED_P (tmp) = 1;
	XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
	XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
	j++;
      }

  par = emit_insn (par);

  tmp = gen_rtx_SET (stack_pointer_rtx,
		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
  RTX_FRAME_RELATED_P (tmp) = 1;
  XVECEXP (dwarf, 0, 0) = tmp;

  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
  RTX_FRAME_RELATED_P (par) = 1;

  return par;
}


/* Generate and emit an insn pattern that we will recognize as a pop_multi.
   SAVED_REGS_MASK shows which registers need to be restored.

   Unfortunately, since this insn does not reflect very well the actual
   semantics of the operation, we need to annotate the insn for the benefit
   of DWARF2 frame unwind information.  */

static void
emit_csky_regs_pop (unsigned long mask)
{
  int num_regs = 0;
  int i, j;
  rtx par;

  for (i = 0; i < CSKY_NGPR_REGS; i++)
    if (mask & (1 << i))
      num_regs++;

  /* The reg range for push is:r4-r11,r15-r17,r28.  */
  gcc_assert (num_regs && num_regs <= 12);

  /* The first element is (return),
     the second element is
       (set (reg:SI 'first reg number')
	    (unspec:SI [(mem)] UNSPEC_PUSHPOP_MULT),
     the rest elements is (use (reg:SI 'rest reg number')),
     so the length should be number of register to be poped
     plus one.  */
  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));

  XVECEXP (par, 0, 0) = ret_rtx;

  for (i = 0; i < CSKY_NGPR_REGS; i++)
    if (mask & (1 << i))
      {
	rtx reg = gen_rtx_REG (SImode, i);
	rtx addr = plus_constant (Pmode, stack_pointer_rtx, 4 * num_regs);
	rtx tmp = gen_frame_mem (SImode,
				 gen_rtx_POST_MODIFY (Pmode,
						      stack_pointer_rtx, addr));
	XVECEXP (par, 0, 1)
	  = gen_rtx_SET (reg,
			 gen_rtx_UNSPEC (SImode,
					 gen_rtvec (1, tmp),
					 UNSPEC_PUSHPOP_MULT));
	break;
      }

  for (j = 2, i++; j < (num_regs + 1); i++)
    if (mask & (1 << i))
      {
	rtx reg = gen_rtx_REG (SImode, i);
	XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
	j++;
      }

  par = emit_jump_insn (par);
}


/* Generate the function prologue.  */

void
csky_expand_prologue (void)
{
  rtx_insn *insn;
  unsigned long func_type = get_csky_current_func_type ();
  unsigned int reg_mask;
  int reg_size;

  if (CSKY_FUNCTION_IS_NAKED (func_type))
    {
      if (flag_stack_usage_info)
	current_function_static_stack_size = 0;
      return;
    }

  csky_layout_stack_frame ();
  reg_mask = cfun->machine->reg_mask;
  reg_size = cfun->machine->reg_size;

  /* Adjust stack pointer past argument overflow area.  */
  if (cfun->machine->arg_size != 0)
    {
      int offset = cfun->machine->arg_size;
      expand_csky_stack_adjust (- offset);

      /* If we have a parameter passed partially in regs and partially
	 in memory, the registers will have been stored to memory already
	 in function.c.  So we only need to copy varargs from registers
	 to stack.  */
      if (cfun->machine->uses_anonymous_args)
	{
	  int rn = CSKY_FIRST_PARM_REGNUM + CSKY_NPARM_REGS - 1;
	  for (offset -= 4; offset >= 0; offset -= 4, rn--)
	    {
	      rtx dst = gen_frame_mem (SImode,
				       plus_constant (Pmode,
						      stack_pointer_rtx,
						      offset));
	      insn = emit_move_insn (dst, gen_rtx_REG (SImode, rn));
	      RTX_FRAME_RELATED_P (insn) = 1;
	    }
	}
    }

  /* Push caller-saved registers to stack.  */
  if (csky_can_use_pushpop (reg_mask))
    emit_csky_regs_push (reg_mask);
  else if (reg_size)
    {
      int sreg = -1, ereg = -1;
      bool stm_p = csky_can_use_ldstm (reg_mask, &sreg, &ereg);
      int stm_regs = stm_p ? ereg - sreg + 1 : 0;
      int stm_size = stm_regs * 4;

      /* First adjust the SP to the low end of the register save area.  */
      expand_csky_stack_adjust (- reg_size);

      /* Emit individual register saves.  Even if we are going to emit an
	 stm, we may need to save individual registers above that too.  */
      if (reg_size > stm_size)
	{
	  int offset = reg_size - 4;
	  int regno = 31;
	  for ( ; regno > ereg; regno--)
	    if (reg_mask & (1 << regno))
	      {
		rtx dst = gen_rtx_MEM (SImode,
				       plus_constant (Pmode,
						      stack_pointer_rtx,
						      offset));
		rtx insn = emit_insn (gen_movsi (dst,
						 gen_rtx_REG (SImode, regno)));
		RTX_FRAME_RELATED_P (insn) = 1;
		if (offset == stm_size)
		  break;
		offset -= 4;
	      }
	}

      /* If possible, emit a stm to do a bulk store of sequential
	 registers to the stack.  Note that it is an error in the ABI
	 documentation that it doesn't list stm as a valid prologue
	 instruction.  */
      if (stm_p)
	{
	  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (stm_regs));
	  int regno, slot;
	  for (regno = sreg, slot = 0; regno <= ereg; regno++, slot++)
	    {
	      rtx reg = gen_rtx_REG (SImode, regno);
	      rtx addr = plus_constant (Pmode, stack_pointer_rtx, slot * 4);
	      rtx set = gen_rtx_SET (gen_frame_mem (SImode, addr), reg);
	      RTX_FRAME_RELATED_P (set) = 1;
	      XVECEXP (par, 0, slot) = set;
	    }
	  insn = emit_insn (par);
	  RTX_FRAME_RELATED_P (insn) = 1;
	}
    }

  /* Initialize hard frame pointer, if necessary.  It points at the base
     of the register save area.  */
  if (frame_pointer_needed)
    {
      insn = emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
      RTX_FRAME_RELATED_P (insn) = 1;
    }

  /* Reserve stack space for locals and outgoing args.  */
  expand_csky_stack_adjust (- cfun->machine->reg_offset);

  /* Put the GOT address in reg_gb for PIC, using R13 as a scratch.
     See section 4.7.1 in  the ABI documentation,
     "Function Prologue for PIC".  */
  if (flag_pic && (reg_mask & (1 << PIC_OFFSET_TABLE_REGNUM)))
    {
      rtx l1 = gen_label_rtx ();
      rtx grs_label = gen_rtx_LABEL_REF (SImode, l1);
      rtx reg_gb = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
      rtx reg_temp = gen_rtx_REG (SImode, 13);

      rtx tmp0_unspec = gen_rtx_UNSPEC (Pmode,
					gen_rtvec (1, grs_label),
					UNSPEC_PIC_SYMBOL_GOTPC_GRS);
      rtx tmp1_unspec = gen_rtx_UNSPEC (Pmode,
					gen_rtvec (1, grs_label),
					UNSPEC_PIC_SYMBOL_GOTPC);

      emit_insn (gen_prologue_get_pc (tmp0_unspec));
      emit_move_insn (reg_temp, tmp1_unspec);
      emit_insn (gen_addsi3 (reg_gb, reg_gb, reg_temp));
    }

  if (flag_stack_usage_info)
    current_function_static_stack_size = cfun->machine->frame_size;

  if (!flag_sched_prolog)
    emit_insn (gen_blockage ());
}

void
csky_expand_epilogue (void)
{
  unsigned long func_type = get_csky_current_func_type ();
  unsigned int reg_mask;
  int reg_size;
  int adjust;
  rtx_insn *insn;

  if (!flag_sched_prolog)
    emit_insn (gen_blockage ());

  if (CSKY_FUNCTION_IS_NAKED (func_type))
    {
      emit_jump_insn (gen_simple_return ());
      return;
    }

  /* Get the frame information.  */
  csky_layout_stack_frame ();
  reg_mask = cfun->machine->reg_mask;
  reg_size = cfun->machine->reg_size;
  adjust = reg_size + cfun->machine->arg_size;

  /* Restore the SP to the base of the register save area.  */
  if (frame_pointer_needed)
    {
      insn = emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
      RTX_FRAME_RELATED_P (insn) = 1;
    }
  else
    expand_csky_stack_adjust (cfun->machine->reg_offset);

  /* Restore the callee-saved registers.  */
  if (csky_can_use_pushpop (reg_mask)
      && cfun->machine->arg_size == 0
      && !CSKY_FUNCTION_IS_INTERRUPT (func_type)
      && !crtl->calls_eh_return)
    {
      /* Pop includes an implicit return, so we are done.  */
      emit_csky_regs_pop (reg_mask);
      return;
    }
  else if (reg_size)
    {
      int sreg = -1, ereg = -1;
      bool ldm_p = csky_can_use_ldstm (reg_mask, &sreg, &ereg);
      int ldm_regs = ldm_p ? ereg - sreg + 1 : 0;
      int ldm_size = ldm_regs * 4;

      /* Emit individual register loads.  Even if we are going to emit an
	 ldm, we may need to load individual registers above that too.  */
      if (reg_size > ldm_size)
	{
	  int offset = reg_size - 4;
	  int regno = 31;
	  for ( ; regno > ereg; regno--)
	    if (reg_mask & (1 << regno))
	      {
		rtx src = gen_frame_mem (SImode,
					 plus_constant (Pmode,
							stack_pointer_rtx,
							offset));
		rtx reg = gen_rtx_REG (SImode, regno);
		insn = emit_move_insn (reg, src);
		RTX_FRAME_RELATED_P (insn) = 1;
		add_reg_note (insn, REG_CFA_RESTORE, reg);
		if (offset == ldm_size)
		  break;
		offset -= 4;
	      }
	}

      /* If possible, emit a ldm to do a bulk load of sequential
	 registers from the stack.  */
      if (ldm_p)
	{
	  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (ldm_regs));
	  int regno, slot;
	  for (regno = sreg, slot = 0; regno <= ereg; regno++, slot++)
	    {
	      rtx reg = gen_rtx_REG (SImode, regno);
	      rtx addr = plus_constant (Pmode, stack_pointer_rtx, slot * 4);
	      rtx set = gen_rtx_SET (reg, gen_frame_mem (SImode, addr));
	      XVECEXP (par, 0, slot) = set;
	    }
	  insn = emit_insn (par);
	  RTX_FRAME_RELATED_P (insn) = 1;
	  for (regno = sreg; regno <= ereg; regno++)
	    {
	      rtx reg = gen_rtx_REG (SImode, regno);
	      add_reg_note (insn, REG_CFA_RESTORE, reg);
	    }
	}
    }

  /* Emit the final stack pointer adjustment to deallocate the saved
     registers and incoming argument area.  */
  expand_csky_stack_adjust (adjust);

  /* Extra stack adjustment for exception handler return.  */
  if (crtl->calls_eh_return)
    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
			   EH_RETURN_STACKADJ_RTX));

  /* Now we can return.  */
  emit_jump_insn (gen_simple_return ());
}


static void
csky_output_function_prologue (FILE *f)
{
  unsigned long func_type = get_csky_current_func_type ();

  switch ((int) CSKY_FUNCTION_TYPE (func_type))
    {
    default:
    case CSKY_FT_NORMAL:
      break;
    case CSKY_FT_INTERRUPT:
      {
	asm_fprintf (f, "\t# Interrupt Service Routine.\n");
	asm_fprintf (f, "\tnie\n\tipush\n");
	break;
      }
    case CSKY_FT_FIQ:
      asm_fprintf (f, "\t# Fast Interrupt Service Routine.\n");
      break;
    case CSKY_FT_EXCEPTION:
      asm_fprintf (f, "\t# CSKY Exception Handler.\n");
      break;
    case CSKY_FT_NAKED:
      asm_fprintf (f, "\t# Naked Function: prologue and epilogue \
		      provided by programmer.\n");
      return;
    }

  csky_layout_stack_frame ();

  /* Generate .stack_size function-name, size for callgraph;
     the default stack size is 0.  */
  if (TARGET_STACK_SIZE && cfun->machine->frame_size > 0)
    {
      gcc_assert (current_function_decl != NULL);
      const char *func_name =
	  IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl));
      if (func_name[0] == '*')
	asm_fprintf (f, "\t.stack_size %s, %d\n",
		     &func_name[1], cfun->machine->frame_size);
      else
	asm_fprintf (f, "\t.stack_size %s, %d\n",
		     func_name, cfun->machine->frame_size);
    }
}


static void
csky_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
{

}


/* Helper for csky_eh_return splitter: store the call frame exception
   handler address in lr.  */
void
csky_set_eh_return_address (rtx source, rtx scratch)
{
  HOST_WIDE_INT delta = 0;
  rtx basereg, addr;
  unsigned int reg_mask;

  csky_layout_stack_frame ();
  reg_mask = cfun->machine->reg_mask;

  if (reg_mask & (1 << CSKY_LR_REGNUM))
    {
      /* Find LR in the stack frame.  */
      int i = 0;

      if (frame_pointer_needed)
	{
	  basereg = frame_pointer_rtx;
	  delta = 0;
	}
      else
	{
	  basereg = stack_pointer_rtx;
	  delta = cfun->machine->reg_offset;
	}

      /* At this point, (basereg + delta) points at the low end of
	 the reg save area.  Regs are saved sequentially from low
	 to high from this address.  */
      for (i = 0; i < CSKY_LR_REGNUM; i++)
	if (reg_mask & (1 << i))
	  delta += 4;

      if ((CSKY_TARGET_ARCH (CK801) && delta >= CSKY_LD16_MAX_OFFSET (Pmode))
	  || delta >= CSKY_LD32_MAX_OFFSET (Pmode))
	{
	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
	  emit_insn (gen_addsi3 (scratch, scratch, basereg));
	  addr = scratch;
	}
      else
	addr = plus_constant (Pmode, basereg, delta);
      emit_move_insn (gen_frame_mem (Pmode, addr), source);
    }
  else
    emit_move_insn (gen_rtx_REG (Pmode, CSKY_LR_REGNUM), source);
}

/* Return TRUE if X references a SYMBOL_REF.  */

bool
csky_symbol_mentioned_p (rtx x)
{
  const char *fmt;
  int i;

  if (GET_CODE (x) == SYMBOL_REF)
    return true;

  fmt = GET_RTX_FORMAT (GET_CODE (x));
  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
    {
      if (fmt[i] == 'E')
	{
	  int j;

	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
	    if (csky_symbol_mentioned_p (XVECEXP (x, i, j)))
	      return true;
	}
      else if (fmt[i] == 'e' && csky_symbol_mentioned_p (XEXP (x, i)))
	return true;
    }
  return false;
}


/* Return TRUE if X references a LABEL_REF.  */

bool
csky_label_mentioned_p (rtx x)
{
  const char *fmt;
  int i;

  if (GET_CODE (x) == LABEL_REF)
    return true;

  fmt = GET_RTX_FORMAT (GET_CODE (x));
  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
    {
      if (fmt[i] == 'E')
	{
	  int j;

	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
	    if (csky_label_mentioned_p (XVECEXP (x, i, j)))
	      return true;
	}
      else if (fmt[i] == 'e' && csky_label_mentioned_p (XEXP (x, i)))
	return true;
    }

  return false;
}


static bool
tls_unspec_mentioned_p (rtx x)
{
  switch (GET_CODE (x))
    {
    case CONST:
      return tls_unspec_mentioned_p (XEXP (x, 0));

    case UNSPEC:
      if (XINT (x, 1) == UNSPEC_TLS)
	return true;

    /* Fall through.  */
    default:
      return false;
    }
}


/* Implement LEGITIMATE_PIC_OPERAND_P.  */
bool
csky_legitimate_pic_operand_p (rtx x)
{
  if (tls_unspec_mentioned_p (x))
    return true;
  if (csky_symbol_mentioned_p (x) || csky_label_mentioned_p (x))
    return false;
  return true;
}

rtx
csky_legitimize_pic_address (rtx orig, rtx reg, bool gotrel_p)
{
  rtx pic_reg = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
  bool optimize_p = false;

  if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF)
    {
      rtx pic_ref, address, rtx_tmp;
      rtx insn;
      rtx pic_reg = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
      int subregs = 0;

      if (reg == 0)
	{
	  gcc_assert (can_create_pseudo_p ());
	  reg = gen_reg_rtx (Pmode);
	  subregs = 1;
	}

      if (subregs)
	address = gen_reg_rtx (Pmode);
      else
	address = reg;

      if (GET_CODE (orig) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (orig))
	{
	  /* When gotrel_p generate sym@GOT, otherwise generate sym@PLT.  */
	  rtx_tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),
				    (gotrel_p
				     ? UNSPEC_PIC_SYMBOL_GOT
				     : UNSPEC_PIC_SYMBOL_PLT));
	  optimize_p = gotrel_p;
	  if (flag_pic != 1)
	    {
	      emit_move_insn (address, rtx_tmp);
	      rtx_tmp = gen_rtx_MULT (Pmode, address, GEN_INT (1));
	    }
	  pic_ref = gen_const_mem (Pmode,
				   gen_rtx_PLUS (Pmode, pic_reg, rtx_tmp));
	}
      else
	{
	  /* bsr symbol */
	  if (flag_pic == 1 && !gotrel_p)
	    {
	      pic_ref = gen_rtx_UNSPEC (Pmode,
					gen_rtvec (1, orig),
					UNSPEC_PIC_SYMBOL_BSR);
	      return pic_ref;
	    }
	  /* grs rx, symbol */
	  else if (flag_pic == 1 && (GET_CODE (orig) == SYMBOL_REF)
		   && SYMBOL_REF_FUNCTION_P (orig))
	    {
	      pic_ref = gen_rtx_UNSPEC (Pmode,
					gen_rtvec (1, orig),
					UNSPEC_PIC_SYMBOL_GRS);
	      return pic_ref;
	    }
	  /* lrw rx, symbol@GOTOFF; add rx, rx, gb */
	  else
	    {
	      rtx_tmp = gen_rtx_UNSPEC (Pmode,
					gen_rtvec (1, orig),
					UNSPEC_PIC_SYMBOL_GOTOFF);
	      emit_move_insn (address, rtx_tmp);
	      pic_ref = gen_rtx_PLUS (Pmode, address, pic_reg);
	      optimize_p = true;
	    }
	}

      insn = emit_move_insn (reg, pic_ref);
      /* Put a REG_EQUAL note on this insn,
	 so that it can be optimized by loop.  */
      if (optimize_p)
	set_unique_reg_note (insn, REG_EQUAL, orig);

      return reg;
    }
  else if (GET_CODE (orig) == CONST)
    {
      rtx base, offset;

      if (GET_CODE (XEXP (orig, 0)) == PLUS
	  && XEXP (XEXP (orig, 0), 1) == pic_reg)
	return orig;

      if (reg == 0)
	{
	  gcc_assert (can_create_pseudo_p ());
	  reg = gen_reg_rtx (Pmode);
	}

      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);

      base = csky_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
					  reg, gotrel_p);
      offset = csky_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
					    base == reg ? 0 : reg, gotrel_p);

      if (GET_CODE (offset) == CONST_INT)
	return plus_constant (Pmode, base, INTVAL (offset));

      return gen_rtx_PLUS (Pmode, base, offset);
    }

  return orig;
}


/* Functions to output assembly code for a function call.  */

char *
csky_output_call (rtx *operands, int index)
{
  static char buffer[20];
  rtx addr = operands[index];

  if (REG_P (addr))
    sprintf (buffer, "jsr\t%%%d", index);
  else if (flag_pic && (GET_CODE (addr) == UNSPEC))
    sprintf (buffer, "bsr\t%%%d", index);
  else
    sprintf (buffer, "jbsr\t%%%d", index);

  return buffer;
}


/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.
   Output assembler code for a block containing the constant parts
   of a trampoline, leaving space for the variable parts.
   Note that STATIC_CHAIN_REGNUM is t1 (aka r12) on ck801 and
   t1 (r13) otherwise.  */

static void
csky_asm_trampoline_template (FILE *f)
{
  if (CSKY_ISA_FEATURE (2E3))
    {
      fprintf (f, "\tlrw\t%s, [.Lstatic_chain]\n",
	       reg_names[STATIC_CHAIN_REGNUM]);
      fprintf (f, "\tjmpi\t[.Lfunc_address]\n");
      /* 2 32-bit insns = 8 bytes.  */
    }
  else if (CSKY_TARGET_ARCH (CK801))
    {
      /* It's hard to provide general support for trampolines on this
	 core.  We need a register other than the one holding the
	 static chain (r13) to hold the function pointer for the
	 indirect jump to it.  But ck801 has such a limited register set
	 there is no other call-clobbered scratch register available -- in
	 particular, this core does not have r12, which we use for the
	 ck802 case below.  If we use a callee-saved register like r4,
	 saving the old value on the stack screws up the stack frame
	 if there are overflow arguments pushed on the stack
	 by the caller.  In theory we could test for that and handle
	 limited cases with parameters that all fit in r0-r3 with no
	 stack overflow, but punt for now.  */
      sorry ("Nested function trampolines not supported on CK801.");
    }
  else
    {
      fprintf (f, "\tlrw\t%s, [.Lfunc_address]\n",
	       reg_names[CSKY_T1_REGNUM]);
      fprintf (f, "\tlrw\t%s, [.Lstatic_chain]\n",
	       reg_names[STATIC_CHAIN_REGNUM]);
      fprintf (f, "\tjmp\t%s\n",
	       reg_names[CSKY_T1_REGNUM]);
      /* To align constant pool on a word boundary.  */
      fprintf (f, "\t.align 2\n");
      /* 2 32-bit lrw insns + 16-bit jump + 16-bit pad = 12 bytes.  */
    }

  fprintf (f, ".Lstatic_chain:\n");
  fprintf (f, "\t.long 0\n");
  fprintf (f, ".Lfunc_address:\n");
  fprintf (f, "\t.long 0\n");
  /* 2 words of constant pool = 8 bytes.  */
}

/* Worker function for TARGET_TRAMPOLINE_INIT.  */

static void
csky_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
{
  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
  rtx mem, a_tramp;
  int pool = TRAMPOLINE_SIZE - 8;

  emit_block_move (m_tramp, assemble_trampoline_template (),
		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);

  mem = adjust_address (m_tramp, SImode, pool);
  emit_move_insn (mem, chain_value);
  mem = adjust_address (m_tramp, SImode, pool + 4);
  emit_move_insn (mem, fnaddr);

  a_tramp = XEXP (m_tramp, 0);
  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
		     LCT_NORMAL, VOIDmode, a_tramp, Pmode,
		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
}


/* Emit a comparison insn for float values.
   Return true if the comparison is inverted.  */

bool
csky_emit_compare_float (enum rtx_code code, rtx op0, rtx op1)
{
  rtx cc_reg = gen_rtx_REG (CCmode, CSKY_CC_REGNUM);
  bool invert;
  machine_mode mode = GET_MODE (op1);

  if (op1 != CONST0_RTX (mode))
    op1 = force_reg (mode, op1);

  invert = false;
  switch (code)
    {
    case EQ:
      code = NE;
      invert = true;
      break;

    case NE:
      break;
    case LE:
      if (op1 == CONST0_RTX (mode))
	op1 = force_reg (mode, op1);
      break;
    case GT:
      if (op1 == CONST0_RTX (mode))
	op1 = force_reg (mode, op1);
      break;
    case GE:
      break;
    case LT:
      if (op1 == CONST0_RTX (mode))
	{
	  code = GE;
	  invert = true;
	}
      break;
    case UNORDERED:
      break;
    case ORDERED:
      code = UNORDERED;
      invert = true;
      break;

    default:
      break;
    }

  emit_insn (gen_rtx_SET (cc_reg, gen_rtx_fmt_ee (code, CCmode, op0, op1)));

  return invert;
}

/* Support for the Q memory constraint.  Returns true if OP is a MEM RTX
   with an address consisting of base + index or base + displacement.  */
bool
csky_valid_fpuv2_mem_operand (rtx op)
{
  struct csky_address addr;

  if (GET_CODE (op) != MEM)
    return false;

  if (!decompose_csky_address (XEXP (op, 0), &addr))
    return false;

  /* Verify base register. */
  if (!is_csky_address_register_rtx_p (addr.base, 0))
    return false;

  /* Verify index operand. */
  if (addr.index)
    {
      if (!is_csky_address_register_rtx_p (addr.index, 0))
	return false;

      if (addr.scale == 1 || addr.scale == 2 || addr.scale == 4
	  || addr.scale == 8)
	return true;

      return false;
    }
  /* Verify disp operand.  */
  else if (addr.disp)
    {
      rtx disp = addr.disp;

      if (!CONST_INT_P (disp))
	return false;

      if (((unsigned) INTVAL (disp) % 4) == 0
	  && (unsigned) INTVAL (disp) <= (unsigned) 1020)
	return true;

       return false;
    }
  return true;
}


/* Returns the (interrupt) function type of the current
   function, or CSKY_FT_UNKNOWN if the type cannot be determined.  */

static unsigned long
csky_isr_value (tree argument)
{
  const isr_attribute_entry *ptr;
  const char *arg;

  /* No argument - default to IRQ.  */
  if (argument == NULL_TREE)
    return CSKY_FT_ISR;

  /* Get the value of the argument.  */
  if (TREE_VALUE (argument) == NULL_TREE
      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
    return CSKY_FT_UNKNOWN;

  arg = TREE_STRING_POINTER (TREE_VALUE (argument));

  /* Check it against the list of known arguments.  */
  for (ptr = isr_attribute_map; ptr->arg != NULL; ptr++)
    if (strcmp (arg, ptr->arg) == 0)
      return ptr->return_value;

  /* An unrecognized interrupt type.  */
  return CSKY_FT_UNKNOWN;
}

/* Handle an attribute requiring a FUNCTION_DECL;
   arguments as in struct attribute_spec.handler.  */

static tree
csky_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
{
  if (TREE_CODE (*node) != FUNCTION_DECL)
    {
      warning (OPT_Wattributes, "%qE attribute only applies to functions",
	       name);
      *no_add_attrs = true;
    }

  return NULL_TREE;
}

/* Handle an "interrupt" or "isr" attribute;
   arguments as in struct attribute_spec.handler.  */

static tree
csky_handle_isr_attribute (tree *node, tree name, tree args, int flags,
			   bool *no_add_attrs)
{

  if (!TARGET_ISTACK)
    {
      warning (OPT_Wattributes, "%qE attribute ignored without %<-mistack%>",
	       name);
      *no_add_attrs = true;
      return NULL_TREE;
    }

  if (DECL_P (*node))
    {
      if (TREE_CODE (*node) != FUNCTION_DECL)
	{
	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
		   name);
	  *no_add_attrs = true;
	}
    }
  else
    {
      if (TREE_CODE (*node) == FUNCTION_TYPE
	  || TREE_CODE (*node) == METHOD_TYPE)
	{
	  if (csky_isr_value (args) == CSKY_FT_UNKNOWN)
	    {
	      warning (OPT_Wattributes, "%qE attribute ignored", name);
	      *no_add_attrs = true;
	    }
	}
      else if (TREE_CODE (*node) == POINTER_TYPE
	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
	       && csky_isr_value (args) != CSKY_FT_UNKNOWN)
	{
	  *node = build_variant_type_copy (*node);
	  TREE_TYPE (*node) = build_type_attribute_variant (TREE_TYPE (*node),
	    tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
	  *no_add_attrs = true;
	}
      else if (flags & ((int)ATTR_FLAG_DECL_NEXT
			| (int)ATTR_FLAG_FUNCTION_NEXT
			| (int)ATTR_FLAG_ARRAY_NEXT))
	{
	  *no_add_attrs = true;
	  return tree_cons (name, args, NULL_TREE);
	}
      else
	warning (OPT_Wattributes, "%qE attribute ignored", name);
    }
  return NULL_TREE;
}


/* Implement TARGET_REGISTER_MOVE_COST: compute extra cost of moving data
   between one register class and another.  */

int
csky_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
			 reg_class_t from, reg_class_t to)
{
#define GR_REG_CLASS_P(CLASS) \
  ((CLASS) == GENERAL_REGS || (CLASS) == MINI_REGS || (CLASS) == SP_REGS \
   || (CLASS) == LOW_REGS)

#define HILO_REG_CLASS_P(CLASS) \
  ((CLASS) == HI_REGS || (CLASS) == LO_REGS || (CLASS) == HILO_REGS)

#define V_REG_CLASS_P(CLASS) \
  ((CLASS) == V_REGS)

  if (V_REG_CLASS_P (from) && V_REG_CLASS_P (to))
    return 2;

  if ((V_REG_CLASS_P (from) && GR_REG_CLASS_P (to))
      || (GR_REG_CLASS_P (from) && V_REG_CLASS_P (to)))
    return 6;

  if ((HILO_REG_CLASS_P (from) && GR_REG_CLASS_P (to))
      || (GR_REG_CLASS_P (from) && HILO_REG_CLASS_P (to)))
    return 16;

  if (HILO_REG_CLASS_P (from) && HILO_REG_CLASS_P (to))
    return 32;

  if ((HILO_REG_CLASS_P (from) && V_REG_CLASS_P (to))
      || (V_REG_CLASS_P (from) && HILO_REG_CLASS_P (to)))
    return 64;

  return 2;
}


/* Implement TARGET_MEMORY_MOVE_COST: compute the cost of moving data
   between registers and memory.  */

int
csky_memory_move_cost (machine_mode mode, reg_class_t rclass,
		       bool in)
{
  return (4 + memory_move_secondary_cost (mode, rclass, in));
}


/* TARGET_RTX_COSTS helper for ck801/ck802.  */

static bool
ck802_ck801_rtx_costs (rtx x, int code, int outer_code, int *total,
		       bool speed)
{
  machine_mode mode = GET_MODE (x);
  switch (code)
    {
      /* Accessing memory costs quite a lot for first word;  */
    case MEM:
      *total = COSTS_N_INSNS (1 + CSKY_NUM_REGS (mode));
      return false;
    case DIV:
    case UDIV:
    case MOD:
    case UMOD:
      *total = 100;
      return true;

    case ROTATE:
    case ROTATERT:
    case ASHIFT:
    case LSHIFTRT:
    case ASHIFTRT:
      if (speed)
	*total = 2;
      else
	*total = COSTS_N_INSNS (1);
      return false;

    case MINUS:
    case PLUS:
      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
      return false;

    case AND:
      {
	enum rtx_code subcode = GET_CODE (XEXP (x, 1));

	/* If subcode is "not", we'll try to combine it into e.g. "andn"
	   instruction, so give AND itself zero cost. */
	if (subcode == NOT)
	  {
	    *total = 0;
	    return false;
	  }
      }
      /* Fall through.  */
    case XOR:
    case IOR:
      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
      return false;

    case MULT:
      /* FIXME:	 is ixw supported on ck801/ck802?  */
      /* We can use "ix.h/w" insn to replace multiply by 2 or 4.
	 "ix.h/w" is a 32-bit insn, so let its cost be a little less than
	 "mult" insn.  */
      if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
	{
	  unsigned HOST_WIDE_INT m
	    = (unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)));
	  if ((m == 2 || m == 4) && outer_code == PLUS)
	    {
	      *total = 2;
	      return true;
	    }
	  else
	    {
	      /* Because mult is relatively slower than other operations,
		 we try to use other insns when optimizing for speed.
		 When optimizing for size, give it lower cost.  */
	      if (speed)
		{
		  *total = COSTS_N_INSNS (10 * CSKY_NUM_REGS (mode));
		  return true;
		}
	      int cycle = 0;
	      while (m)
		{
		  m >>= 2;
		  cycle++;
		}
	      *total = COSTS_N_INSNS (1) + cycle;
	      return false;
	    }
	}
      if (!speed)
	*total = COSTS_N_INSNS (1);
      return false;

    case NEG:
      /* Usually, we use subtract from 0 to substitute for neg, and
	 it costs 1 extra insn to move 0 to a register.  */
      *total = COSTS_N_INSNS (2 * CSKY_NUM_REGS (mode));
      return false;

    case NOT:
      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
      return false;

    case COMPARE:
      *total = COSTS_N_INSNS (1);
      return false;

    case SIGN_EXTEND:
    case ZERO_EXTEND:
      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
      return false;

    case SIGN_EXTRACT:
    case ZERO_EXTRACT:
      if (REG_P (XEXP (x, 0))
	  && CONST_INT_P (XEXP (x, 1))
	  && CONST_INT_P (XEXP (x, 2))
	  && INTVAL (XEXP (x, 1)) == 8
	  && INTVAL (XEXP (x, 2)) % 8 == 0)
	{
	  *total = COSTS_N_INSNS (1);
	  return true;
	}
      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
      return false;

    case CONST_INT:
      {
	unsigned HOST_WIDE_INT t = (unsigned HOST_WIDE_INT) (INTVAL (x));

	if (outer_code == COMPARE)
	  {
	    if (t < 0x10000)
	      *total = 0;
	    else
	      *total = COSTS_N_INSNS (2);
	  }
	else if (outer_code == AND || outer_code == IOR || outer_code == XOR)
	  {
	    /* "andi,xori,ori" are 32-bit insns, so let it cost a
	       little more.  */
	    if (t < 0x1000)
	      {
		/* Try replacing "andi" by "sextb/h", so let it cost more.  */
		if (outer_code == AND && (t == 0xff || t == 0xffff))
		  {
		    *total = 8;
		    return true;
		  }
		*total = 2;
	      }
	    else if (t < 0x10000)
	      *total = COSTS_N_INSNS (1);
	    else
	      *total = COSTS_N_INSNS (2);
	  }
	else if (outer_code == PLUS || outer_code == MINUS)
	  {
	    /* "addi/subi rx,ry,imm", if imm<9, it is more often a
	       16-bit insn.  If imm>=9, use "movi" insn; it's probably
	       less than "addi/subi". */
	    if (t < 9)
	      *total = 0;
	    else if (t < 0x1000)
	      *total = 2;
	    else if (t < 0x10000)
	      *total = COSTS_N_INSNS (1);
	    else
	      *total = COSTS_N_INSNS (2);
	  }
	else if (outer_code == ROTATE || outer_code == ROTATERT
		 || outer_code == LSHIFTRT || outer_code == ASHIFTRT
		 || outer_code == ASHIFT)
	  {
	    if (t < 32)
	      *total = 0;
	    else
	      *total = COSTS_N_INSNS (2);
	  }
	else
	  {
	    if (t < 0x10000)
	      if (outer_code == SET && t < 256)
		*total = 0;
	      else
		*total = COSTS_N_INSNS (1);
	    else
	      *total = COSTS_N_INSNS (2);
	  }
      }
      return true;

    case CONST:
    case LABEL_REF:
    case SYMBOL_REF:
      *total = COSTS_N_INSNS (3);
      return true;
    default:
      return false;
    }
}


/* TARGET_RTX_COSTS helper for ck803.  */

static bool
ck803_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
		 int *total, bool speed ATTRIBUTE_UNUSED)
{
  switch (code)
    {
    case SET:
      if (MEM_P (XEXP (x, 1)))
	{
	  struct csky_address op1;
	  bool address_valid
	    = decompose_csky_address (XEXP (XEXP (x, 1), 0), &op1);
	  if (op1.index)
	    {
	      *total = COSTS_N_INSNS (3);
	      return true;
	    }
	  else if (address_valid)
	    {
	      *total = COSTS_N_INSNS (1);
	      return true;
	    }
	}
      if (REG_P (XEXP (x, 0)) && (GET_CODE (XEXP (x, 1)) == PLUS))
       {
	 rtx sub_exp = XEXP (x, 1);
	 if (REG_P (XEXP (sub_exp, 0)) && REG_P (XEXP (sub_exp, 1)))
	   {
	     *total = COSTS_N_INSNS (1);
	     return true;
	   }
       }
      return false;
    case MULT:
      if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
	{
	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
	  if (val % 2 == 0 && val < 0xffffffff && val > 0)
	    {
	      *total = COSTS_N_INSNS (1);
	      return true;
	    }
	}
      return false;

    case CONST:
    case LABEL_REF:
    case SYMBOL_REF:
      *total = COSTS_N_INSNS (3);
      return true;
    default:
      return false;
    }
}

/* TARGET_RTX_COSTS helper for ck807+ arches.  */

static bool
ck807_ck810_rtx_costs (rtx x, int code,
		       int outer_code ATTRIBUTE_UNUSED,
		       int *total, bool speed ATTRIBUTE_UNUSED)
{
  switch (code)
    {
    case MULT:
      if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
	{
	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
	  if (val % 2 == 0 && val < 0xffffffff && val > 0)
	    {
	      *total = COSTS_N_INSNS (1);
	      return true;
	    }
	}
      return false;

    case CONST:
    case LABEL_REF:
    case SYMBOL_REF:
      *total = COSTS_N_INSNS (3);
      return true;
    default:
      return false;
    }
}


/* Implement TARGET_RTX_COSTS, to compute a (partial) cost for rtx X.
   Return true if the complete cost has been computed, and false if
   subexpressions should be scanned.  In either case, *TOTAL contains
   the cost result.  */

static bool
csky_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
		int opno ATTRIBUTE_UNUSED, int *total, bool speed)
{
  int code = GET_CODE (x);

  if (CSKY_TARGET_ARCH (CK802) || CSKY_TARGET_ARCH (CK801))
    return ck802_ck801_rtx_costs (x, code, outer_code, total, speed);
  else if (CSKY_TARGET_ARCH (CK803))
    return ck803_rtx_costs (x, code, outer_code, total, speed);
  else if (CSKY_TARGET_ARCH (CK807) || CSKY_TARGET_ARCH (CK810))
    return ck807_ck810_rtx_costs (x, code, outer_code, total, speed);
  else
    gcc_unreachable ();
}

/* Emit assembly code for CASESI.  This is only used on CK801 and CK802
   when optimizing for size, and uses helper functions in libgcc instead
   of doing the control transfer inline.  */

const char *
csky_output_casesi (rtx *operands)
{
  rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));

  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);

  switch (GET_MODE (diff_vec))
    {
    case E_QImode:
      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned
	      ? "jbsr\t___gnu_csky_case_uqi"
	      : "jbsr\t___gnu_csky_case_sqi");
    case E_HImode:
      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned
	      ? "jbsr\t___gnu_csky_case_uhi"
	      : "jbsr\t___gnu_csky_case_shi");
    case E_SImode:
      return "jbsr\t___gnu_csky_case_si";
    default:
      gcc_unreachable ();
    }
}

/* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
   per-core tuning structs.  */
static int
csky_sched_issue_rate (void)
{
  if (CSKY_TARGET_ARCH (CK810))
    return 2;
  else
    return 1;
}


/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
   It corrects the value of COST based on the relationship between
   INSN and DEP through the dependence DEP_TYPE.  It returns the new
   value.  */

static int
csky_sched_adjust_cost (rtx_insn *insn,
			int dep_type,
			rtx_insn *dep,
			int cost,
			unsigned int dw ATTRIBUTE_UNUSED)
{
  if (dep_type == REG_DEP_ANTI || dep_type == REG_DEP_OUTPUT)
    return 0;
  /* The REG_DEP_TRUE situation.  */
  else if (recog_memoized (insn) >= 0 && recog_memoized (dep) >= 0)
    {
      enum attr_type insn_type = get_attr_type (insn);
      if (CSKY_TARGET_ARCH (CK803))
	{
	  /* The ld or st's base reg depends on the pre insn,
	     it will delay 1 cycle.  */
	  if (insn_type == TYPE_LOAD || insn_type == TYPE_STORE)
	    {
	      rtx pattern = PATTERN (insn);

	      gcc_assert (GET_CODE (pattern) == SET);
	      rtx addr = (insn_type == TYPE_LOAD
			  ? SET_SRC (pattern) : SET_DEST (pattern));

	      enum rtx_code code = GET_CODE (addr);
	      if (code == ZERO_EXTEND || code == SIGN_EXTEND)
		addr = XEXP (addr, 0);
	      gcc_assert (GET_CODE (addr) == MEM);

	      rtx base =  XEXP (addr, 0);
	      rtx reg = NULL_RTX;
	      if (REG_P (base))
		reg = base;
	      if (GET_CODE (base) == PLUS
		  && GET_CODE (XEXP (base, 0)) == REG)
		reg = XEXP (base, 0);
	      if ((reg != NULL_RTX) && reg_set_p (reg, PATTERN (dep)))
		return 2;
	    }
	}
      else if (CSKY_TARGET_ARCH (CK802))
	{
	  if ((insn_type == TYPE_CALL_JSR || insn_type == TYPE_BRANCH_JMP)
	      && get_attr_type (dep) != TYPE_LOAD)
	    return 1;

	  if (insn_type == TYPE_LOAD || insn_type == TYPE_STORE)
	    {
	      rtx pattern = PATTERN (insn);

	      gcc_assert (GET_CODE (pattern) == SET);

	      rtx addr = (insn_type == TYPE_LOAD
			  ? SET_SRC (pattern) : SET_DEST (pattern));

	      enum rtx_code code = GET_CODE (addr);
	      if (code == ZERO_EXTEND || code == SIGN_EXTEND)
		addr = XEXP (addr, 0);
	      gcc_assert (GET_CODE (addr) == MEM);

	      rtx base =  XEXP (addr, 0);
	      rtx reg = NULL_RTX;
	      if (REG_P (base))
		reg = base;
	      if (GET_CODE (base) == PLUS
		  && GET_CODE (XEXP (base, 0)) == REG)
		reg = XEXP (base, 0);
	      if ((reg != NULL_RTX) && reg_set_p (reg, PATTERN (dep))
		  && get_attr_type (dep) != TYPE_LOAD)
		return 1;

	      if (insn_type == TYPE_STORE
		  && reg_referenced_p (SET_SRC (pattern), PATTERN (dep)))
		return 1;
	    }
	}
    }
  return cost;
}

static bool
csky_warn_func_return (tree decl)
{
  /* Naked functions are implemented entirely in assembly, including the
     return sequence, so suppress warnings about this.  */
  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
}


/* Implement TARGET_RETURN_IN_MEMORY to decide whether TYPE should be
   returned in memory (true) or in a register (false).
   FNTYPE is the type of the function making the call.  */
static bool
csky_return_in_memory (const_tree type,
		       const_tree fntype ATTRIBUTE_UNUSED)
{
  const HOST_WIDE_INT size = int_size_in_bytes (type);
  return (size == -1 || size > 2 * UNITS_PER_WORD);
}


/* Implement TARGET_DWARF_REGISTER_SPAN.
   Dwarf models VFP registers as  64-bit or 128-bit registers default.
   GCC models tham as 32-bit registers, so we need to describe this to
   the DWARF generation code.  Other registers can use the default.  */
static rtx
csky_dwarf_register_span (rtx rtl)
{
  machine_mode mode;
  unsigned regno;
  rtx parts[16];
  int nregs;
  int i;

  regno = REGNO (rtl);
  if (!CSKY_VREG_P (regno))
    return NULL_RTX;

  mode = GET_MODE (rtl);
  if (GET_MODE_SIZE (mode) < 8)
    return NULL_RTX;

  if (TARGET_SOFT_FPU)
    {
      nregs = GET_MODE_SIZE (mode) / 4;
      for (i = 0; i < nregs; i += 2)
      if (TARGET_BIG_ENDIAN)
	{
	  parts[i] = gen_rtx_REG (SImode, regno + i + 1);
	  parts[i + 1] = gen_rtx_REG (SImode, regno + i);
	}
      else
	{
	  parts[i] = gen_rtx_REG (SImode, regno + i);
	  parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
	}
    }
  else
    {
      /* FIXME: dwarf2 considers all general registers to be the same
	 as the CPU bit width. Transform the 64-bit FPU registers to
	 32 bits here, and we will modify the unwind processing to
	 fit CSKY architecture later.  */
      nregs = GET_MODE_SIZE (mode) / 8;
      for (i = 0; i < nregs; i++)
	parts[i] = gen_rtx_REG (SImode, regno + i);
    }

  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
}

/* Implement TARGET_INIT_LIBFUNCS.  */

static void
csky_init_libfuncs (void)
{
  if (TARGET_CSKY_LINUX)
    init_sync_libfuncs (UNITS_PER_WORD);
  if (!TARGET_LIBCCRT)
    return;

  #define CSKY_GCC_SYM(sym) "__csky_ccrt_" # sym

  /* int */

  /* Arithmetic functions */
  set_optab_libfunc (ashl_optab,    DImode, CSKY_GCC_SYM (ashldi3));
  set_optab_libfunc (ashr_optab,    DImode, CSKY_GCC_SYM (ashrdi3));
  set_optab_libfunc (sdiv_optab,    SImode, CSKY_GCC_SYM (divsi3));
  set_optab_libfunc (sdiv_optab,    DImode, CSKY_GCC_SYM (divdi3));
  set_optab_libfunc (lshr_optab,    DImode, CSKY_GCC_SYM (lshrdi3));
  set_optab_libfunc (smod_optab,    SImode, CSKY_GCC_SYM (modsi3));
  set_optab_libfunc (smod_optab,    DImode, CSKY_GCC_SYM (moddi3));
  set_optab_libfunc (smul_optab,    DImode, CSKY_GCC_SYM (muldi3));
  set_optab_libfunc (neg_optab,	    DImode, CSKY_GCC_SYM (negdi2));
  set_optab_libfunc (udiv_optab,    SImode, CSKY_GCC_SYM (udivsi3));
  set_optab_libfunc (udiv_optab,    DImode, CSKY_GCC_SYM (udivdi3));
  set_optab_libfunc (udivmod_optab, DImode, CSKY_GCC_SYM (udivmoddi4));
  set_optab_libfunc (umod_optab,    SImode, CSKY_GCC_SYM (umodsi3));
  set_optab_libfunc (umod_optab,    DImode, CSKY_GCC_SYM (umoddi3));

  /* Comparison functions */
  set_optab_libfunc (cmp_optab,	    DImode, CSKY_GCC_SYM (cmpdi2));
  set_optab_libfunc (ucmp_optab,    DImode, CSKY_GCC_SYM (ucmpdi2));

  /* Trapping arithmetic functions */
  set_optab_libfunc (absv_optab,    SImode, CSKY_GCC_SYM (absvsi2));
  set_optab_libfunc (absv_optab,    DImode, CSKY_GCC_SYM (absvdi2));
  set_optab_libfunc (addv_optab,    SImode, CSKY_GCC_SYM (addvsi3));
  set_optab_libfunc (addv_optab,    DImode, CSKY_GCC_SYM (addvdi3));
  set_optab_libfunc (smulv_optab,   SImode, CSKY_GCC_SYM (mulvsi3));
  set_optab_libfunc (smulv_optab,   DImode, CSKY_GCC_SYM (mulvdi3));
  set_optab_libfunc (negv_optab,    SImode, CSKY_GCC_SYM (negvsi2));
  set_optab_libfunc (negv_optab,    DImode, CSKY_GCC_SYM (negvdi2));
  set_optab_libfunc (subv_optab,    SImode, CSKY_GCC_SYM (subvsi3));
  set_optab_libfunc (subv_optab,    DImode, CSKY_GCC_SYM (subvdi3));

  /* Bit operations */
  set_optab_libfunc (clz_optab,	    SImode, CSKY_GCC_SYM (clzsi2));
  set_optab_libfunc (clz_optab,	    DImode, CSKY_GCC_SYM (clzdi2));
  set_optab_libfunc (ctz_optab,	    SImode, CSKY_GCC_SYM (ctzsi2));
  set_optab_libfunc (ctz_optab,	    DImode, CSKY_GCC_SYM (ctzdi2));
  set_optab_libfunc (ffs_optab,	    DImode, CSKY_GCC_SYM (ffsdi2));
  set_optab_libfunc (parity_optab,  SImode, CSKY_GCC_SYM (paritysi2));
  set_optab_libfunc (parity_optab,  DImode, CSKY_GCC_SYM (paritydi2));
  set_optab_libfunc (popcount_optab,SImode, CSKY_GCC_SYM (popcountsi2));
  set_optab_libfunc (popcount_optab,DImode, CSKY_GCC_SYM (popcountdi2));
  set_optab_libfunc (bswap_optab,   SImode, CSKY_GCC_SYM (bswapsi2));
  set_optab_libfunc (bswap_optab,   DImode, CSKY_GCC_SYM (bswapdi2));

  /* float */

  /* Arithmetic functions */
  set_optab_libfunc (add_optab,	    SFmode, CSKY_GCC_SYM (addsf3));
  set_optab_libfunc (add_optab,	    DFmode, CSKY_GCC_SYM (adddf3));
  set_optab_libfunc (sub_optab,	    SFmode, CSKY_GCC_SYM (subsf3));
  set_optab_libfunc (sub_optab,	    DFmode, CSKY_GCC_SYM (subdf3));
  set_optab_libfunc (smul_optab,    SFmode, CSKY_GCC_SYM (mulsf3));
  set_optab_libfunc (smul_optab,    DFmode, CSKY_GCC_SYM (muldf3));
  set_optab_libfunc (sdiv_optab,    SFmode, CSKY_GCC_SYM (divsf3));
  set_optab_libfunc (sdiv_optab,    DFmode, CSKY_GCC_SYM (divdf3));
  set_optab_libfunc (neg_optab,	    SFmode, CSKY_GCC_SYM (negsf2));
  set_optab_libfunc (neg_optab,	    DFmode, CSKY_GCC_SYM (negdf2));

  /* Conversion functions */
  set_conv_libfunc (sext_optab,	   DFmode, SFmode, CSKY_GCC_SYM (extendsfdf2));
  set_conv_libfunc (trunc_optab,   SFmode, DFmode, CSKY_GCC_SYM (truncdfsf2));
  set_conv_libfunc (sfix_optab,	   SImode, SFmode, CSKY_GCC_SYM (fixsfsi));
  set_conv_libfunc (sfix_optab,	   SImode, DFmode, CSKY_GCC_SYM (fixdfsi));
  set_conv_libfunc (sfix_optab,	   DImode, SFmode, CSKY_GCC_SYM (fixsfdi));
  set_conv_libfunc (sfix_optab,	   DImode, DFmode, CSKY_GCC_SYM (fixdfdi));
  set_conv_libfunc (ufix_optab,	   SImode, SFmode, CSKY_GCC_SYM (fixunssfsi));
  set_conv_libfunc (ufix_optab,	   SImode, DFmode, CSKY_GCC_SYM (fixunsdfsi));
  set_conv_libfunc (ufix_optab,	   DImode, SFmode, CSKY_GCC_SYM (fixunssfdi));
  set_conv_libfunc (ufix_optab,	   DImode, DFmode, CSKY_GCC_SYM (fixunsdfdi));
  set_conv_libfunc (sfloat_optab,  SFmode, SImode, CSKY_GCC_SYM (floatsisf));
  set_conv_libfunc (sfloat_optab,  DFmode, SImode, CSKY_GCC_SYM (floatsidf));
  set_conv_libfunc (sfloat_optab,  SFmode, DImode, CSKY_GCC_SYM (floatdisf));
  set_conv_libfunc (sfloat_optab,  DFmode, DImode, CSKY_GCC_SYM (floatdidf));
  set_conv_libfunc (ufloat_optab,  SFmode, SImode, CSKY_GCC_SYM (floatunsisf));
  set_conv_libfunc (ufloat_optab,  DFmode, SImode, CSKY_GCC_SYM (floatunsidf));
  set_conv_libfunc (ufloat_optab,  SFmode, DImode, CSKY_GCC_SYM (floatundisf));
  set_conv_libfunc (ufloat_optab,  DFmode, DImode, CSKY_GCC_SYM (floatundidf));

  /* Comparison functions */
  set_optab_libfunc (cmp_optab,	   SFmode, CSKY_GCC_SYM (cmpsf2));
  set_optab_libfunc (cmp_optab,	   DFmode, CSKY_GCC_SYM (cmpdf2));
  set_optab_libfunc (unord_optab,  SFmode, CSKY_GCC_SYM (unordsf2));
  set_optab_libfunc (unord_optab,  DFmode, CSKY_GCC_SYM (unorddf2));
  set_optab_libfunc (eq_optab,	   SFmode, CSKY_GCC_SYM (eqsf2));
  set_optab_libfunc (eq_optab,	   DFmode, CSKY_GCC_SYM (eqdf2));
  set_optab_libfunc (ne_optab,	   SFmode, CSKY_GCC_SYM (nesf2));
  set_optab_libfunc (ne_optab,	   DFmode, CSKY_GCC_SYM (nedf2));
  set_optab_libfunc (ge_optab,	   SFmode, CSKY_GCC_SYM (gesf2));
  set_optab_libfunc (ge_optab,	   DFmode, CSKY_GCC_SYM (gedf2));
  set_optab_libfunc (lt_optab,	   SFmode, CSKY_GCC_SYM (ltsf2));
  set_optab_libfunc (lt_optab,	   DFmode, CSKY_GCC_SYM (ltdf2));
  set_optab_libfunc (le_optab,	   SFmode, CSKY_GCC_SYM (lesf2));
  set_optab_libfunc (le_optab,	   DFmode, CSKY_GCC_SYM (ledf2));
  set_optab_libfunc (gt_optab,	   SFmode, CSKY_GCC_SYM (gtsf2));
  set_optab_libfunc (gt_optab,	   DFmode, CSKY_GCC_SYM (gtdf2));
}


/* Implement TARGET_ADDRESS_COST to estimate cost of the memory address X.
   For C-SKY, (register) and (register + offset) have the same cost.
   Other situations cost more.  */

static int
csky_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
		   addr_space_t as ATTRIBUTE_UNUSED,
		   bool speed ATTRIBUTE_UNUSED)
{
  enum rtx_code code = GET_CODE (x);

  if (code == REG)
    return COSTS_N_INSNS (1);
  if (code == PLUS
      && REG_P (XEXP (x, 0))
      && CONST_INT_P (XEXP (x, 1)))
    return COSTS_N_INSNS (1);

  return COSTS_N_INSNS (3);
}


/* Implement TARGET_FIXED_CONDITION_CODE_REGS.  */

static bool
csky_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
{
  *p1 = CSKY_CC_REGNUM;
  *p2 = INVALID_REGNUM;
  return true;
}


struct gcc_target targetm = TARGET_INITIALIZER;

#include "gt-csky.h"