diff gcc/config/csky/csky.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents
children 1830386684a0
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/gcc/config/csky/csky.c	Thu Oct 25 07:37:49 2018 +0900
@@ -0,0 +1,6795 @@
+/* GCC backend functions for C-SKY targets.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by C-SKY Microsystems and Mentor Graphics.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "memmodel.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "cfghooks.h"
+#include "df.h"
+#include "tm_p.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "optabs.h"
+#include "regs.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "cgraph.h"
+#include "c-family/c-common.h"
+#include "cpplib.h"
+#include "diagnostic-core.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "output.h"
+#include "insn-attr.h"
+#include "flags.h"
+#include "reload.h"
+#include "explow.h"
+#include "expr.h"
+#include "cfgrtl.h"
+#include "sched-int.h"
+#include "common/common-target.h"
+#include "langhooks.h"
+#include "intl.h"
+#include "libfuncs.h"
+#include "params.h"
+#include "opts.h"
+#include "dumpfile.h"
+#include "target-globals.h"
+#include "builtins.h"
+#include "tm-constrs.h"
+#include "rtl-iter.h"
+#include "pass_manager.h"
+#include "tree-pass.h"
+#include "context.h"
+
+/* This file should be included last.  */
+#include "target-def.h"
+
+/* Stack and register size macros.  */
+
+#define CSKY_NUM_WORDS(SIZE) \
+  (((SIZE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
+#define CSKY_NUM_REGS(MODE) \
+  CSKY_NUM_WORDS (GET_MODE_SIZE (MODE))
+#define CSKY_STACK_ALIGN(SIZE) \
+  (CSKY_NUM_WORDS (SIZE) * UNITS_PER_WORD)
+
+/* Offsets and range macros.  */
+
+#define CSKY_LD16_MAX_OFFSET(MODE)		\
+  (31 * GET_MODE_SIZE (MODE))
+#define CSKY_LD32_MAX_OFFSET(MODE) \
+  (4095 * GET_MODE_SIZE (MODE))
+#define CSKY_LD16_OFFSET_MASK(MODE) \
+  (CSKY_LD16_MAX_OFFSET (MODE) + GET_MODE_SIZE (MODE) - 1)
+
+#define CSKY_ADDI16_MAX_IMM	      256
+#define CSKY_SUBI16_MAX_IMM	      256
+
+#define CSKY_CONSTPOOL_LABEL_PREFIX   "LCP"
+
+/* Array of the smallest class containing reg number REGNO, indexed by
+   REGNO.  Used by REGNO_REG_CLASS.  */
+enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
+{
+  /* Registers r0-r7.  */
+  MINI_REGS,	 MINI_REGS,	MINI_REGS,     MINI_REGS,
+  MINI_REGS,	 MINI_REGS,	MINI_REGS,     MINI_REGS,
+  /* Registers r8-r15.  */
+  LOW_REGS,	 LOW_REGS,	LOW_REGS,      LOW_REGS,
+  LOW_REGS,	 LOW_REGS,	SP_REGS,       LOW_REGS,
+  /* Registers r16-r31.  */
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
+  /* Reserved.  */
+  RESERVE_REGS,
+  /* CC,HI,LO registers.  */
+  C_REGS,      HI_REGS,	     LO_REGS,
+  /* Reserved.  */
+  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
+  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
+  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
+  RESERVE_REGS, RESERVE_REGS, RESERVE_REGS, RESERVE_REGS,
+  /* Vec registers.  */
+  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
+  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
+  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
+  V_REGS,	V_REGS,	      V_REGS,	    V_REGS,
+  /* Reserved.  */
+  RESERVE_REGS, RESERVE_REGS,
+  /* Register epc.  */
+  OTHER_REGS
+};
+
+/* Arrays that map GCC register numbers to debugger register numbers,
+   '-1' means that is INVALID_REGNUM.
+   TODO: which rules according to here ?  */
+const int csky_dbx_regno[FIRST_PSEUDO_REGISTER] =
+{
+  0,  1,  2,  3,  4,  5,  6,  7,
+  8,  9,  10, 11, 12, 13, 14, 15,
+  16, 17, 18, 19, 20, 21, 22, 23,
+  24, 25, 26, 27, 28, 29, 30, 31,
+  -1, -1, 36, 37, -1, -1, -1, -1,
+  -1, -1, -1, -1, -1, -1, -1, -1,
+  -1, -1, -1, -1, 56, 57, 58, 59,
+  60, 61, 62, 63, 64, 65, 66, 67,
+  68, 69, 70, 71, -1, -1, 72
+};
+
+/* Table of machine attributes.  */
+static tree csky_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
+static tree csky_handle_isr_attribute (tree *, tree, tree, int, bool *);
+static const struct attribute_spec csky_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
+       affects_type_identity, handler, exclude } */
+  { "naked",	 0, 0, true,  false, false, false, csky_handle_fndecl_attribute, NULL },
+  /* Interrupt Service Routines have special prologue and epilogue requirements.  */
+  { "interrupt", 0, 1, false, false, false, false, csky_handle_isr_attribute,	 NULL },
+  { "isr",	 0, 1, false, false, false, false, csky_handle_isr_attribute,	 NULL },
+  { NULL,	 0, 0, false, false, false, false, NULL,			 NULL }
+};
+
+/* A C structure for machine-specific, per-function data.
+   This is added to the cfun structure.  */
+typedef struct GTY(()) machine_function
+{
+  /* Records if LR has to be saved for far jumps.  */
+  int far_jump_used;
+  /* Records the type of the current function.  */
+  unsigned long func_type;
+  /* Record if the function has a variable argument list.  */
+  int uses_anonymous_args;
+
+  /* Stack frame layout information.  If frame_init_p is true,
+     these fields have been initialized and don't need to be
+     recomputed.  */
+  unsigned int reg_mask;	/* non-volatile reg saves */
+  int arg_size;			/* stdarg spills (bytes) */
+  int reg_size;			/* non-volatile reg saves (bytes) */
+  int local_size;		/* locals */
+  int outbound_size;		/* arg overflow on calls out */
+  int frame_size;		/* total static size of stack frame */
+  int local_offset;
+  int reg_offset;
+  int arg_offset;
+  int frame_init_p;
+
+} machine_function;
+
+/* These macros are for the func_type values above.  */
+#define CSKY_FT_TYPE_MASK   ((1 << 3) - 1)
+#define CSKY_FT_UNKNOWN	    0		    /* Type not been determined */
+#define CSKY_FT_NORMAL	    1		    /* Normal function */
+#define CSKY_FT_ISR	    4		    /* Interrupt service routine */
+#define CSKY_FT_FIQ	    5		    /* Fast interrupt service routine */
+#define CSKY_FT_EXCEPTION   6		    /* Exception handler */
+#define CSKY_FT_INTERRUPT   (1 << 2)	    /* overlap CSKY_FT_ISR */
+#define CSKY_FT_NAKED	    (1 << 3)	    /* No prologue and epilogue */
+#define CSKY_FUNCTION_TYPE(t)	      ((t) & CSKY_FT_TYPE_MASK)
+#define CSKY_FUNCTION_IS_INTERRUPT(t) ((t) & CSKY_FT_INTERRUPT)
+#define CSKY_FUNCTION_IS_NAKED(t)     ((t) & CSKY_FT_NAKED)
+
+struct csky_processors
+{
+  const char *const name;
+  enum csky_processor_type core;
+  const char *arch;
+  enum csky_base_architecture base_arch;
+  enum csky_isa_feature isa_bits[CSKY_ISA_FEATURE_GET (max)];
+};
+
+static struct csky_processors all_cores[] =
+{
+#undef CSKY_CORE
+#define CSKY_CORE(NAME, CORE, X, ARCH, ISA)  \
+  {NAME, TARGET_CPU_##CORE, #ARCH, CSKY_BASE_ARCH_##ARCH, \
+  {ISA CSKY_ISA_FEATURE_GET (none)}},
+#include "csky_cores.def"
+#undef CSKY_CORE
+  {NULL, TARGET_CPU_csky_none, NULL, CSKY_BASE_ARCH_NONE, \
+  {CSKY_ISA_FEATURE_GET (none)}}
+};
+
+static struct csky_processors all_architectures[] =
+{
+#undef CSKY_ARCH
+#define CSKY_ARCH(NAME, CORE, ARCH, ISA)     \
+  {NAME, TARGET_CPU_##CORE, #ARCH, CSKY_BASE_ARCH_##ARCH,  \
+  {ISA CSKY_ISA_FEATURE_GET (none)}},
+#include "csky_cores.def"
+#undef CSKY_ARCH
+  {NULL, TARGET_CPU_csky_none, NULL, CSKY_BASE_ARCH_NONE, \
+  {CSKY_ISA_FEATURE_GET (none)}}
+};
+
+struct csky_fpu_desc
+{
+  const char *name;
+  enum csky_isa_feature isa_bits[CSKY_ISA_FEATURE_GET (max)];
+};
+
+static const struct csky_fpu_desc all_fpus[] =
+{
+#undef CSKY_FPU
+#define CSKY_FPU(NAME, CNAME, ISA) \
+  {NAME, {ISA CSKY_ISA_FEATURE_GET (none)}},
+#include "csky_cores.def"
+#undef CSKY_FPU
+};
+
+/* Active target architecture.  */
+struct csky_build_target
+{
+  /* Name of the target CPU, if known, or NULL if the target CPU was not
+     specified by the user (and inferred from the -march option).  */
+  const char *core_name;
+  /* Name of the target ARCH.  NULL if there is a selected CPU.  */
+  const char *arch_name;
+  /* Preprocessor substring (never NULL).  */
+  const char *arch_pp_name;
+  /* CPU identifier for the core we're compiling for (architecturally).  */
+  enum csky_processor_type arch_core;
+  /* The base architecture value.  */
+  enum csky_base_architecture base_arch;
+  /* Bitmap encapsulating the isa_bits for the target environment.  */
+  sbitmap isa;
+};
+
+struct csky_build_target csky_active_target;
+
+/* The following are used in the .md file as equivalents to bits.  */
+int csky_arch_isa_features[CSKY_ISA_FEATURE_GET (max)] = {0};
+
+/* The highest CSKY architecture version supported by the target.  */
+enum csky_base_architecture csky_base_arch = CSKY_TARGET_ARCH_GET (NONE);
+
+/* Forward definitions of types.  */
+typedef struct minipool_node	Mnode;
+typedef struct minipool_fixup	Mfix;
+
+static GTY(()) int tls_labelno;
+
+
+/* Maximum constant offset that can be added/subtracted from SP in a
+   single instruction.  For ck801, this is for addsp/subsp, otherwise
+   it is the range of addi/subi.  */
+#define CSKY_MAX_SP_ADJUST \
+  (CSKY_TARGET_ARCH (CK801) ? 508 : 4096)
+
+
+/* Implement TARGET_CPU_CPP_BUILTINS.  */
+
+#define builtin_define(MACRO) cpp_define (pfile, MACRO)
+
+void
+csky_cpu_cpp_builtins (cpp_reader *pfile)
+{
+  const char *arch_name = csky_active_target.arch_pp_name;
+  char *pp_name = (char *) alloca (1 + strlen (arch_name) + 4);
+  sprintf (pp_name, "__%s__", arch_name);
+  builtin_define (pp_name);
+
+  builtin_define ("__csky__=2");
+  builtin_define ("__CSKY__=2");
+  builtin_define ("__ckcore__=2");
+  builtin_define ("__CKCORE__=2");
+
+  builtin_define ("__CSKYABIV2__");
+  builtin_define ("__cskyabiv2__");
+  builtin_define ("__CSKYABI__=2");
+  builtin_define ("__cskyabi__=2");
+
+  if (TARGET_BIG_ENDIAN)
+    {
+      builtin_define ("__ckcoreBE__");
+      builtin_define ("__cskyBE__");
+      builtin_define ("__cskybe__");
+      builtin_define ("__CSKYBE__");
+    }
+  else
+    {
+      builtin_define ("__ckcoreLE__");
+      builtin_define ("__cskyLE__");
+      builtin_define ("__cskyle__");
+      builtin_define ("__CSKYLE__");
+    }
+
+  if (TARGET_HARD_FLOAT)
+    {
+      builtin_define ("__csky_hard_float__");
+      builtin_define ("__CSKY_HARD_FLOAT__");
+    }
+  else
+    {
+      builtin_define ("__csky_soft_float__");
+      builtin_define ("__CSKY_SOFT_FLOAT__");
+    }
+
+  if (CSKY_ISA_FEATURE (fpv2_sf))
+    {
+      builtin_define ("__csky_fpuv2__");
+      builtin_define ("__CSKY_FPUV2__");
+    }
+
+  if (TARGET_ELRW)
+    {
+      builtin_define ("__csky_elrw__");
+      builtin_define ("__CSKY_ELRW__");
+    }
+  if (TARGET_ISTACK)
+    {
+      builtin_define ("__csky_istack__");
+      builtin_define ("__CSKY_ISTACK__");
+    }
+  if (TARGET_MP)
+    {
+      builtin_define ("__csky_mp__");
+      builtin_define ("__CSKY_MP__");
+    }
+  if (TARGET_CP)
+    {
+      builtin_define ("__csky_cp__");
+      builtin_define ("__CSKY_CP__");
+    }
+  if (TARGET_CACHE)
+    {
+      builtin_define ("__csky_cache__");
+      builtin_define ("__CSKY_CACHE__");
+    }
+  if (TARGET_SECURITY)
+    {
+      builtin_define ("__csky_security__");
+      builtin_define ("__CSKY_SECURITY__");
+    }
+  if (TARGET_TRUST)
+    {
+      builtin_define ("__csky_trust__");
+      builtin_define ("__CSKY_TRUST__");
+    }
+  if (TARGET_DSP)
+    {
+      builtin_define ("__csky_dsp__");
+      builtin_define ("__CSKY_DSP__");
+    }
+  if (TARGET_EDSP)
+    {
+      builtin_define ("__csky_edsp__");
+      builtin_define ("__CSKY_EDSP__");
+    }
+  if (TARGET_VDSP)
+    {
+      builtin_define ("__csky_vdsp__");
+      builtin_define ("__CSKY_VDSP__");
+    }
+}
+
+
+/******************************************************************
+ *			   Storage Layout			  *
+ ******************************************************************/
+
+
+#undef	TARGET_PROMOTE_FUNCTION_MODE
+#define TARGET_PROMOTE_FUNCTION_MODE \
+  default_promote_function_mode_always_promote
+
+#undef TARGET_CONSTANT_ALIGNMENT
+#define TARGET_CONSTANT_ALIGNMENT csky_constant_alignment
+
+
+/******************************************************************
+ *		Stack Layout and Calling Conventions		  *
+ ******************************************************************/
+
+#undef TARGET_CAN_ELIMINATE
+#define TARGET_CAN_ELIMINATE csky_can_eliminate
+
+#undef	TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG csky_function_arg
+
+#undef	TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE csky_function_arg_advance
+
+#undef	TARGET_FUNCTION_VALUE
+#define TARGET_FUNCTION_VALUE csky_function_value
+
+#undef	TARGET_LIBCALL_VALUE
+#define TARGET_LIBCALL_VALUE csky_libcall_value
+
+#undef	TARGET_FUNCTION_VALUE_REGNO_P
+#define TARGET_FUNCTION_VALUE_REGNO_P csky_function_value_regno_p
+
+#undef	TARGET_SPLIT_COMPLEX_ARG
+#define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
+
+#undef	TARGET_PROMOTE_PROTOTYPES
+#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+
+#undef	TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef	TARGET_ARG_PARTIAL_BYTES
+#define TARGET_ARG_PARTIAL_BYTES csky_arg_partial_bytes
+
+#undef	TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE hook_pass_by_reference_must_pass_in_stack
+
+#undef	TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK csky_output_mi_thunk
+
+#undef	TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
+  hook_bool_const_tree_hwi_hwi_const_tree_true
+
+#undef	TARGET_ASM_FUNCTION_PROLOGUE
+#define TARGET_ASM_FUNCTION_PROLOGUE csky_output_function_prologue
+
+#undef	TARGET_ASM_FUNCTION_EPILOGUE
+#define TARGET_ASM_FUNCTION_EPILOGUE csky_output_function_epilogue
+
+#undef	TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN csky_warn_func_return
+
+#undef	TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY csky_return_in_memory
+
+
+/******************************************************************
+ *		  Implementing the Varargs Macros		  *
+ ******************************************************************/
+
+
+#undef	TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS csky_setup_incoming_varargs
+
+
+/******************************************************************
+ *		 Implicit Calls to Library Routines		  *
+ ******************************************************************/
+
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS csky_init_libfuncs
+
+
+/******************************************************************
+ *    Dividing the Output into Sections (Texts, Data, . . . )	  *
+ ******************************************************************/
+
+
+#undef TARGET_HAVE_TLS
+#define TARGET_HAVE_TLS TARGET_CSKY_LINUX
+
+
+/******************************************************************
+ *	   Defining target-specific uses of __attribute__	  *
+ ******************************************************************/
+
+
+#undef TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE csky_attribute_table
+
+#undef	TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE csky_option_override
+
+
+/* Implement the BRANCH_COST target macro.  */
+
+int
+csky_default_branch_cost (bool speed_p ATTRIBUTE_UNUSED,
+			  bool predictable_p ATTRIBUTE_UNUSED)
+{
+  return csky_branch_cost;
+}
+
+bool
+csky_default_logical_op_non_short_circuit (void)
+{
+  return BRANCH_COST (optimize_function_for_speed_p (cfun), false) >= 2;
+}
+
+/******************************************************************
+ *			   Register Usage			  *
+ ******************************************************************/
+
+#undef TARGET_HARD_REGNO_NREGS
+#define TARGET_HARD_REGNO_NREGS csky_hard_regno_nregs
+
+#undef TARGET_HARD_REGNO_MODE_OK
+#define TARGET_HARD_REGNO_MODE_OK csky_hard_regno_mode_ok
+
+#undef TARGET_MODES_TIEABLE_P
+#define TARGET_MODES_TIEABLE_P csky_modes_tieable_p
+
+#undef TARGET_CAN_CHANGE_MODE_CLASS
+#define TARGET_CAN_CHANGE_MODE_CLASS csky_can_change_mode_class
+
+#undef	TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE csky_conditional_register_usage
+
+#undef TARGET_CLASS_LIKELY_SPILLED_P
+#define TARGET_CLASS_LIKELY_SPILLED_P csky_class_likely_spilled_p
+
+#undef TARGET_PREFERRED_RELOAD_CLASS
+#define TARGET_PREFERRED_RELOAD_CLASS csky_preferred_reload_class
+
+#undef TARGET_CLASS_MAX_NREGS
+#define TARGET_CLASS_MAX_NREGS csky_class_max_nregs
+
+#undef	TARGET_SECONDARY_RELOAD
+#define TARGET_SECONDARY_RELOAD	 csky_secondary_reload
+
+#undef TARGET_SPILL_CLASS
+#define TARGET_SPILL_CLASS csky_spill_class
+
+
+/******************************************************************
+ *			  Addressing Modes			  *
+ ******************************************************************/
+
+
+#undef TARGET_CANNOT_FORCE_CONST_MEM
+#define TARGET_CANNOT_FORCE_CONST_MEM csky_cannot_force_const_mem
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P csky_legitimate_constant_p
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS csky_legitimize_address
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P csky_legitimate_address_p
+
+
+/******************************************************************
+ *			       Others				  *
+ ******************************************************************/
+
+
+#undef	TARGET_CANNOT_COPY_INSN_P
+#define TARGET_CANNOT_COPY_INSN_P csky_cannot_copy_insn_p
+
+
+/******************************************************************
+ *			Assembler Format			  *
+ ******************************************************************/
+
+
+#undef TARGET_PRINT_OPERAND
+#define TARGET_PRINT_OPERAND csky_print_operand
+
+#undef TARGET_PRINT_OPERAND_ADDRESS
+#define TARGET_PRINT_OPERAND_ADDRESS csky_print_operand_address
+
+#undef	TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
+
+#undef	TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
+
+#undef	TARGET_DWARF_REGISTER_SPAN
+#define TARGET_DWARF_REGISTER_SPAN csky_dwarf_register_span
+
+
+/******************************************************************
+ *		      Miscellaneous Parameters			  *
+ ******************************************************************/
+
+
+#undef	TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG csky_reorg
+
+#undef	TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
+#define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS csky_allocate_stack_slots_for_args
+
+#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
+#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+
+
+/******************************************************************
+ *		  Trampolines for Nested Functions		  *
+ ******************************************************************/
+
+
+#undef	TARGET_ASM_TRAMPOLINE_TEMPLATE
+#define TARGET_ASM_TRAMPOLINE_TEMPLATE	csky_asm_trampoline_template
+#undef	TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT		csky_trampoline_init
+
+/* The low bit is ignored by jsr and jmp instructions so is safe to use.  */
+#undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
+#define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
+
+/******************************************************************
+ *	      Describing Relative Costs of Operations		  *
+ ******************************************************************/
+
+
+#undef	TARGET_REGISTER_MOVE_COST
+#define TARGET_REGISTER_MOVE_COST csky_register_move_cost
+
+#undef	TARGET_MEMORY_MOVE_COST
+#define TARGET_MEMORY_MOVE_COST	  csky_memory_move_cost
+
+#undef	TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS	  csky_rtx_costs
+
+#undef	TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST	  csky_address_cost
+
+
+/******************************************************************
+ *			  Anchor address			  *
+ ******************************************************************/
+
+
+/* FIXME: the max offset is related to mode size, the following is
+   defined according to SImode. How to deal with HImode and
+   QImode, and should the min offset be defined?  */
+#undef	TARGET_MAX_ANCHOR_OFFSET
+#define TARGET_MAX_ANCHOR_OFFSET \
+  ((TARGET_MINI_REGISTERS && optimize_size) ? 127 : 4095)
+
+
+/******************************************************************
+ *		       Condition Code Status			  *
+ ******************************************************************/
+
+
+#undef	TARGET_FIXED_CONDITION_CODE_REGS
+#define TARGET_FIXED_CONDITION_CODE_REGS csky_fixed_condition_code_regs
+
+
+/******************************************************************
+ *	     Adjusting the Instruction Scheduler		  *
+ ******************************************************************/
+
+
+#undef	TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE csky_sched_issue_rate
+
+#undef	TARGET_SCHED_ADJUST_COST
+#define	 TARGET_SCHED_ADJUST_COST csky_sched_adjust_cost
+
+
+/* The declaration of functions.  */
+static void push_csky_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
+				    machine_mode, rtx);
+static void csky_print_operand (FILE *stream, rtx x, int code);
+
+
+/* Define a table to map ISR attribute arguments onto function type
+   modifiers.  */
+
+typedef struct
+{
+  const char *const arg;
+  const unsigned long return_value;
+} isr_attribute_entry;
+
+static const isr_attribute_entry isr_attribute_map[] =
+{
+  {"irq", CSKY_FT_ISR },
+  {"IRQ", CSKY_FT_ISR },
+  {"fiq", CSKY_FT_FIQ },
+  {"FIQ", CSKY_FT_FIQ },
+  {NULL, CSKY_FT_NORMAL }
+};
+
+
+/* Return the function type of the current function, if it has not been
+   determined, return CSKY_FT_UNKNOWN.  */
+
+static unsigned long
+get_csky_isr_type (tree argument)
+{
+  const isr_attribute_entry *ptr;
+  const char *arg;
+
+  /* if argument is NULL, set default value ISR.  */
+  if (argument == NULL_TREE)
+    return CSKY_FT_ISR;
+
+  if (TREE_VALUE (argument) == NULL_TREE
+     || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
+    return CSKY_FT_UNKNOWN;
+
+  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
+
+  for (ptr = isr_attribute_map; ptr->arg != NULL; ptr++)
+    if (strcmp (arg, ptr->arg) == 0)
+      return ptr->return_value;
+
+  return CSKY_FT_UNKNOWN;
+}
+
+/* Classify cfun as a normal function or some sort of interrupt
+   handler, and set the corresponding bits in cfun->machine->func_type.  */
+
+static unsigned long
+get_csky_current_func_type (void)
+{
+  if (CSKY_FUNCTION_TYPE (cfun->machine->func_type) == CSKY_FT_UNKNOWN)
+    {
+      unsigned long type = CSKY_FT_UNKNOWN;
+      tree a;
+      tree attr;
+
+      gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
+
+      attr = DECL_ATTRIBUTES (current_function_decl);
+      a = lookup_attribute ("naked", attr);
+      if (a != NULL_TREE)
+	type |= CSKY_FT_NAKED;
+      a = lookup_attribute ("isr", attr);
+      if (a == NULL_TREE)
+	a = lookup_attribute ("interrupt", attr);
+      if (a == NULL_TREE)
+	type |= CSKY_FT_NORMAL;
+      else
+	type |= get_csky_isr_type (TREE_VALUE (a));
+
+      cfun->machine->func_type = type;
+    }
+
+  return cfun->machine->func_type;
+}
+
+/* These typedefs are located at the start of this file, so that
+   they can be used in the prototypes there.  This comment is to
+   remind readers of that fact so that the following structures
+   can be understood more easily.
+
+     typedef struct minipool_node    Mnode;
+     typedef struct minipool_fixup   Mfix;  */
+
+struct minipool_node
+{
+  /* Doubly linked chain of entries.  */
+  Mnode *next;
+  Mnode *prev;
+  /* The maximum offset into the code that this entry can be placed.  While
+     pushing fixes for forward references, all entries are sorted in order
+     of increasing max_address.  */
+  HOST_WIDE_INT max_address;
+  /* Similarly for an entry inserted for a backwards ref.  */
+  HOST_WIDE_INT min_address;
+  /* The number of fixes referencing this entry.  This can become zero
+     if we "unpush" an entry.  In this case we ignore the entry when we
+     come to emit the code.  */
+  int refcount;
+  /* The offset from the start of the minipool.  */
+  HOST_WIDE_INT offset;
+  /* The value in table.  */
+  rtx value;
+  /* The mode of value.  */
+  machine_mode mode;
+  /* The size of the value.  */
+  int fix_size;
+};
+
+struct minipool_fixup
+{
+  Mfix *next;
+  rtx_insn *insn;
+  HOST_WIDE_INT address;
+  rtx *loc;
+  machine_mode mode;
+  int fix_size;
+  rtx value;
+  Mnode *minipool;
+  HOST_WIDE_INT forwards;
+  HOST_WIDE_INT backwards;
+};
+
+static Mnode *minipool_vector_head;
+static Mnode *minipool_vector_tail;
+static rtx  minipool_vector_label;
+static HOST_WIDE_INT constpool_label_no = 0;
+
+/* Obstack for minipool constant handling.  */
+static struct obstack minipool_obstack;
+static char *minipool_startobj;
+/* The linked list of all minipool fixes required for this function.  */
+Mfix *minipool_fix_head;
+Mfix *minipool_fix_tail;
+/* The fix entry for the current minipool, once it has been placed.  */
+Mfix *minipool_barrier;
+
+/* Allow GC scanning of the minipool obstack.  */
+static void
+csky_add_gc_roots (void)
+{
+  gcc_obstack_init (&minipool_obstack);
+  minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
+}
+
+/* Implement TARGET_CONSTANT_ALIGNMENT.
+   Make strings word-aligned so strcpy from constants will be faster.  */
+static HOST_WIDE_INT
+csky_constant_alignment (const_tree exp, HOST_WIDE_INT align)
+{
+  if (TREE_CODE (exp) == STRING_CST
+      && !optimize_size
+      && align < BITS_PER_WORD)
+    return BITS_PER_WORD;
+  return align;
+}
+
+/* Record that there is a natural barrier in the insn stream at
+   ADDRESS.  */
+
+static void
+push_csky_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
+{
+  Mfix *fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (*fix));
+
+  fix->insn = insn;
+  fix->address = address;
+
+  fix->next = NULL;
+  if (minipool_fix_head != NULL)
+    minipool_fix_tail->next = fix;
+  else
+    minipool_fix_head = fix;
+
+  minipool_fix_tail = fix;
+}
+
+/* Compute the size of a vector jump table.  */
+
+static HOST_WIDE_INT
+get_csky_jump_table_size (rtx insn)
+{
+  /* ADDR_VECs only take room if read-only data does into the text
+     section.  */
+  if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
+    {
+      rtx body = PATTERN (insn);
+      int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
+      HOST_WIDE_INT size;
+      HOST_WIDE_INT modesize;
+
+      modesize = GET_MODE_SIZE (GET_MODE (body));
+      size = modesize * XVECLEN (body, elt);
+      switch (modesize)
+	{
+	case 1:
+	  /* Round up size  of TBB table to a halfword boundary.  */
+	  size = (size + 1) & ~(HOST_WIDE_INT)1;
+	  break;
+	case 2:
+	  /* No padding necessary for TBH.  */
+	  break;
+	case 4:
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      return size;
+    }
+
+  return 0;
+}
+
+
+/* Scan INSN and note any of its operands that need fixing.
+   If DO_PUSHES is false we do not actually push any of the fixups
+   needed.  The function returns TRUE if any fixups were needed/pushed.  */
+
+static bool
+note_csky_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address,
+			     int do_pushes)
+{
+  bool result = false;
+  int opno;
+
+  extract_constrain_insn (insn);
+
+  if (recog_data.n_alternatives == 0)
+    return false;
+
+  /* Fill in recog_op_alt with information about the constraints of
+     this insn.  */
+  preprocess_constraints (insn);
+
+  const operand_alternative *op_alt = which_op_alt ();
+  for (opno = 0; opno < recog_data.n_operands; opno++)
+    {
+      /* Things we need to fix can only occur in inputs.  */
+      if (recog_data.operand_type[opno] != OP_IN)
+	continue;
+
+      /* If this alternative is a memory reference, then any mention
+	 of constants in this alternative is really to fool reload
+	 into allowing us to accept one there.  We need to fix them up
+	 now so that we output the right code.  */
+      if (op_alt[opno].memory_ok)
+	{
+	  rtx op = recog_data.operand[opno];
+
+	  if (CONSTANT_P (op))
+	    {
+	      if (do_pushes)
+		push_csky_minipool_fix (insn, address,
+					recog_data.operand_loc[opno],
+					recog_data.operand_mode[opno], op);
+	      result = true;
+	    }
+	}
+    }
+
+  return result;
+}
+
+
+/* Add a constant to the minipool for a forward reference.  Returns the
+   node added or NULL if the constant will not fit in this pool.  */
+
+static Mnode *
+add_csky_minipool_forward_ref (Mfix *fix)
+{
+  /* If set, max_mp is the first pool_entry that has a lower
+     constraint than the one we are trying to add.  */
+  Mnode *max_mp = NULL;
+  HOST_WIDE_INT max_address = fix->address + fix->forwards;
+  Mnode *mp;
+
+  /* If the minipool starts before the end of FIX->INSN then this FIX
+     can not be placed into the current pool.  Furthermore, adding the
+     new constant pool entry may cause the pool to start FIX_SIZE bytes
+     earlier.  */
+  if (minipool_vector_head
+      && (fix->address + get_attr_length (fix->insn)
+	  >= minipool_vector_head->max_address - fix->fix_size))
+    return NULL;
+
+  /* Scan the pool to see if a constant with the same value has
+     already been added.  While we are doing this, also note the
+     location where we must insert the constant if it doesn't already
+     exist.  */
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      if (GET_CODE (fix->value) == GET_CODE (mp->value)
+	  && fix->mode == mp->mode
+	  && (GET_CODE (fix->value) != CODE_LABEL
+	      || (CODE_LABEL_NUMBER (fix->value)
+		  == CODE_LABEL_NUMBER (mp->value)))
+	  && rtx_equal_p (fix->value, mp->value))
+	{
+	  /* More than one fix references this entry.  */
+	  mp->refcount++;
+	  return mp;
+	}
+
+      /* Note the insertion point if necessary.  */
+      if (max_mp == NULL && mp->max_address > max_address)
+	max_mp = mp;
+    }
+
+  /* The value is not currently in the minipool, so we need to create
+     a new entry for it.  If MAX_MP is NULL, the entry will be put on
+     the end of the list since the placement is less constrained than
+     any existing entry.  Otherwise, we insert the new fix before
+     MAX_MP and, if necessary, adjust the constraints on the other
+     entries.  */
+  mp = XNEW (Mnode);
+  mp->fix_size = fix->fix_size;
+  mp->mode = fix->mode;
+  mp->value = fix->value;
+  mp->refcount = 1;
+  /* Not yet required for a backwards ref.  */
+  mp->min_address = -65536;
+
+  if (max_mp == NULL)
+    {
+      mp->max_address = max_address;
+      mp->next = NULL;
+      mp->prev = minipool_vector_tail;
+
+      if (mp->prev == NULL)
+	{
+	  minipool_vector_head = mp;
+	  minipool_vector_label
+	    = gen_csky_constpool_label (gen_rtx_CONST_INT (VOIDmode,
+							   constpool_label_no++));
+	}
+      else
+	mp->prev->next = mp;
+
+      minipool_vector_tail = mp;
+    }
+  else
+    {
+      if (max_address > max_mp->max_address - mp->fix_size)
+	mp->max_address = max_mp->max_address - mp->fix_size;
+      else
+	mp->max_address = max_address;
+
+      mp->next = max_mp;
+      mp->prev = max_mp->prev;
+      max_mp->prev = mp;
+      if (mp->prev != NULL)
+	mp->prev->next = mp;
+      else
+	minipool_vector_head = mp;
+    }
+
+  /* Save the new entry.  */
+  max_mp = mp;
+
+  /* Scan over the preceding entries and adjust their addresses as
+     required.  */
+  while (mp->prev != NULL
+	 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
+    {
+      mp->prev->max_address = mp->max_address - mp->prev->fix_size;
+      mp = mp->prev;
+    }
+
+  return max_mp;
+}
+
+
+/* Return the cost of forcibly inserting a barrier after INSN.  */
+
+static int
+get_csky_barrier_cost (rtx_insn *insn)
+{
+  /* Basing the location of the pool on the loop depth is preferable,
+     but at the moment, the basic block information seems to be
+     corrupt by this stage of the compilation.  */
+  int base_cost = 50;
+  rtx next = next_nonnote_insn (insn);
+
+  if (next != NULL && GET_CODE (next) == CODE_LABEL)
+    base_cost -= 20;
+
+  switch (GET_CODE (insn))
+    {
+    case CODE_LABEL:
+      /* It will always be better to place the table before the label, rather
+     than after it.  */
+      return 50;
+
+    case INSN:
+    case CALL_INSN:
+      return base_cost;
+
+    case JUMP_INSN:
+      return base_cost - 10;
+
+    default:
+      return base_cost + 10;
+    }
+}
+
+
+/* Find the best place in the insn stream in the range
+   (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
+   Create the barrier by inserting a jump and add a new fix entry for
+   it.  */
+static Mfix *
+create_csky_fix_barrier (Mfix *fix, Mfix *fix_next,
+			 HOST_WIDE_INT max_address)
+{
+  rtx_barrier *barrier;
+  rtx_insn *from = (fix ? fix->insn : get_insns ());
+  /* The instruction after which we will insert the jump.  */
+  rtx_insn *selected = NULL;
+  int selected_cost;
+  /* The address at which the jump instruction will be placed.  */
+  HOST_WIDE_INT selected_address = 0;
+  Mfix *new_fix;
+  HOST_WIDE_INT count = (fix ? fix->address : 0);
+  HOST_WIDE_INT max_count = max_address;
+  rtx_code_label *label = gen_label_rtx ();
+
+  selected_cost = get_csky_barrier_cost (from);
+
+  while (from && count < max_count)
+    {
+      int new_cost;
+      rtx_jump_table_data *table;
+
+      /* Count the length of this insn.  */
+      count += get_attr_length (from);
+
+      /* If there is a jump table, add its length.  */
+      if (tablejump_p (from, NULL, &table))
+	{
+	  count += get_csky_jump_table_size (table);
+
+	  /* Jump tables aren't in a basic block, so base the cost on
+	     the dispatch insn.  If we select this location, we will
+	     still put the pool after the table.  */
+	  new_cost = get_csky_barrier_cost (from);
+
+	  if (count < max_count
+	      && (!selected || new_cost <= selected_cost))
+	    {
+	      selected = table;
+	      selected_cost = new_cost;
+	      selected_address = count;
+	    }
+
+	  /* Continue after the dispatch table.  */
+	  from = NEXT_INSN (table);
+	  continue;
+	}
+
+      new_cost = get_csky_barrier_cost (from);
+
+      if (count < max_count
+	  && (!selected || new_cost <= selected_cost))
+	{
+	  selected = from;
+	  selected_cost = new_cost;
+	  selected_address = count;
+	}
+
+      from = NEXT_INSN (from);
+    }
+
+  /* Make sure that we found a place to insert the jump.  */
+  gcc_assert (selected);
+
+  /* Create a new JUMP_INSN that branches around a barrier.  */
+  from = emit_jump_insn_after (gen_jump (label), selected);
+  JUMP_LABEL (from) = label;
+  barrier = emit_barrier_after (from);
+  emit_label_after (label, barrier);
+
+  /* Create a minipool barrier entry for the new barrier.  */
+  new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
+  new_fix->insn = barrier;
+  new_fix->address = selected_address;
+  if (fix)
+    {
+      new_fix->next = fix->next;
+      fix->next = new_fix;
+    }
+  else
+    new_fix->next = fix_next;
+
+  return new_fix;
+}
+
+
+/* Print a symbolic form of the constant X to the dump file F.
+   This is used for dump output for -mconstpool in the target-dependent
+   reorg pass.  */
+
+static void
+print_csky_value (FILE *f, rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST_INT:
+      fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
+      return;
+
+    case CONST_DOUBLE:
+      fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
+      return;
+
+    case CONST_VECTOR:
+      {
+	int i;
+
+	fprintf (f, "<");
+	for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
+	  {
+	    fprintf (f, HOST_WIDE_INT_PRINT_HEX,
+		     INTVAL (CONST_VECTOR_ELT (x, i)));
+	    if (i < (CONST_VECTOR_NUNITS (x) - 1))
+	      fputc (',', f);
+	  }
+	fprintf (f, ">");
+      }
+      return;
+
+    case CONST_STRING:
+      fprintf (f, "\"%s\"", XSTR (x, 0));
+      return;
+
+    case SYMBOL_REF:
+      fprintf (f, "`%s'", XSTR (x, 0));
+      return;
+
+    case LABEL_REF:
+      fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
+      return;
+
+    case CONST:
+      print_csky_value (f, XEXP (x, 0));
+      return;
+
+    case PLUS:
+      print_csky_value (f, XEXP (x, 0));
+      fprintf (f, "+");
+      print_csky_value (f, XEXP (x, 1));
+      return;
+
+    case PC:
+      fprintf (f, "pc");
+      return;
+
+    default:
+      fprintf (f, "????");
+      return;
+    }
+}
+
+
+/* Record INSN, which will need fixing up to load a value from the
+   minipool.  ADDRESS is the offset of the insn since the start of the
+   function; LOC is a pointer to the part of the insn which requires
+   fixing; VALUE is the constant that must be loaded, which is of type
+   MODE.  */
+
+static void
+push_csky_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
+			machine_mode mode, rtx value)
+{
+  #define CSKY_ELRW16_RANGE  1400
+  #define CSKY_LRW16_RANGE   700
+  #define CSKY_CONSTANT_POOL_RANGE (TARGET_ELRW ? CSKY_ELRW16_RANGE \
+						: CSKY_LRW16_RANGE)
+
+  /* Fixes less than a word need padding out to a word boundary.  */
+  #define CSKY_MINIPOOL_FIX_SIZE(mode) \
+    (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
+
+  Mfix *fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (*fix));
+
+  fix->insn = insn;
+  fix->address = address;
+  fix->loc = loc;
+  fix->mode = mode;
+  fix->fix_size = CSKY_MINIPOOL_FIX_SIZE (mode);
+  fix->value = value;
+  fix->forwards = CSKY_CONSTANT_POOL_RANGE;
+  fix->backwards = 0;
+  fix->minipool = NULL;
+
+  /* If an insn doesn't have a range defined for it, then it isn't
+     expecting to be reworked by this code.  Better to stop now than
+     to generate duff assembly code.  */
+  gcc_assert (fix->forwards || fix->backwards);
+
+  if (dump_file)
+    {
+      fprintf (dump_file,
+	       ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
+	       GET_MODE_NAME (mode),
+	       INSN_UID (insn), (unsigned long) address,
+	       -1 * (long)fix->backwards, (long)fix->forwards);
+      print_csky_value (dump_file, fix->value);
+      fprintf (dump_file, "\n");
+    }
+
+  /* Add it to the chain of fixes.  */
+  fix->next = NULL;
+
+  if (minipool_fix_head != NULL)
+    minipool_fix_tail->next = fix;
+  else
+    minipool_fix_head = fix;
+
+  minipool_fix_tail = fix;
+}
+
+
+/* Fill in the offsets for minipool entries.  */
+
+static void
+assign_csky_minipool_offsets (Mfix *barrier)
+{
+  HOST_WIDE_INT offset = 0;
+  Mnode *mp;
+
+  minipool_barrier = barrier;
+
+  for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
+    {
+      mp->offset = offset;
+
+      if (mp->refcount > 0)
+	offset += mp->fix_size;
+    }
+}
+
+
+/* Output the literal table.  */
+
+static HOST_WIDE_INT
+dump_csky_minipool (rtx_insn *scan)
+{
+  Mnode *mp;
+  Mnode *nmp;
+  HOST_WIDE_INT pool_length = 0;
+
+  if (dump_file)
+    fprintf (dump_file,
+	     ";; Emitting minipool after insn %u;\
+	      address %ld; align %d (bytes)\n",
+	     INSN_UID (scan), (unsigned long) minipool_barrier->address, 4);
+
+  scan = emit_insn_after (gen_align_4 (), scan);
+  scan = emit_insn_after (minipool_vector_label, scan);
+
+  for (mp = minipool_vector_head; mp != NULL; mp = nmp)
+    {
+      if (mp->refcount > 0)
+	{
+	  if (dump_file)
+	    {
+	      fprintf (dump_file, ";;  Offset %u, min %ld, max %ld ",
+		       (unsigned) mp->offset, (unsigned long) mp->min_address,
+		       (unsigned long) mp->max_address);
+	      print_csky_value (dump_file, mp->value);
+	      fputc ('\n', dump_file);
+	    }
+
+	  switch (mp->fix_size)
+	    {
+	    case 4:
+	      scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
+	      pool_length += 4;
+	      break;
+	    case 8:
+	      scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
+	      pool_length += 8;
+	      break;
+	    default:
+	      gcc_unreachable ();
+	    }
+	}
+
+      nmp = mp->next;
+      free (mp);
+    }
+
+  minipool_vector_head = minipool_vector_tail = NULL;
+  scan = emit_barrier_after (scan);
+
+  return pool_length;
+}
+
+/* Return true if INSN is a minipool load or instruction that will be
+   converted to one.  It is assumed that INSN has type attribute "load".  */
+
+bool
+csky_minipool_load_p (rtx_insn *insn)
+{
+  rtx op1, addr;
+
+  extract_insn_cached (insn);
+
+  op1 = recog_data.operand[1];
+
+  /* This is a constant that has not yet been turned into
+     a minipool load.  */
+  if (CONSTANT_P (op1))
+    return true;
+
+  /* Constant pool loads are label_refs.  */
+  if (GET_CODE (op1) == ZERO_EXTEND || GET_CODE (op1) == SIGN_EXTEND)
+    op1 = XEXP (op1, 0);
+  if (GET_CODE (op1) != MEM)
+    return false;
+  addr = XEXP (op1, 0);
+  if (GET_CODE (addr) == PLUS && CONST_INT_P (XEXP (addr, 1)))
+    addr = XEXP (addr, 0);
+  return GET_CODE (addr) == LABEL_REF;
+}
+
+
+/* Compute the attribute "length" of push or pop insn, according to
+   the registers it uses.  */
+
+int
+csky_compute_pushpop_length (rtx *operands)
+{
+  rtx parallel_op = operands[2];
+  /* Initialize to elements number of PARALLEL.  */
+  unsigned indx = XVECLEN (parallel_op, 0) - 1;
+  unsigned first_indx = 0;
+  unsigned regno = REGNO (operands[1]);
+
+  if (regno > CSKY_LR_REGNUM)
+    return 4;
+
+  /* Check each register in the list.  */
+  for (; indx > first_indx; indx--)
+    {
+      regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
+      /* If a register number higher than 15 is included, a 32-bit insn
+	 is used.  */
+      if (regno > CSKY_LR_REGNUM)
+	return 4;
+    }
+
+  return 2;
+}
+
+/* Emit constant pools for -mconstpool.  */
+static void
+csky_emit_constant_pools (void)
+{
+    rtx_insn *insn;
+    HOST_WIDE_INT address = 0;
+    Mfix *fix;
+
+    minipool_fix_head = minipool_fix_tail = NULL;
+
+    /* The first insn must always be a note, or the code below won't
+       scan it properly.  */
+    insn = get_insns ();
+    gcc_assert (NOTE_P (insn));
+
+    /* Scan the insns and record the operands that need fixing.  */
+    for (insn = next_nonnote_insn (insn); insn;
+	 insn = next_nonnote_insn (insn))
+      {
+	if (BARRIER_P (insn))
+	  push_csky_minipool_barrier (insn, address);
+	else if (INSN_P (insn))
+	  {
+	    rtx_jump_table_data *table;
+
+	    note_csky_invalid_constants (insn, address, true);
+	    address += get_attr_length (insn);
+
+	    /* If the insn is a vector jump, add the size of the table
+	     and skip the table.  */
+	    if (tablejump_p (insn, NULL, &table))
+	      {
+		address += get_csky_jump_table_size (table);
+		insn = table;
+	      }
+	  }
+      }
+
+    fix = minipool_fix_head;
+
+    /* Now scan the fixups and perform the required changes.  */
+    while (fix)
+      {
+	Mfix *ftmp;
+	Mfix *last_added_fix;
+	Mfix *last_barrier = NULL;
+	Mfix *this_fix;
+	Mnode *mp;
+	bool has_pending_const = false;
+
+	/* Check if there is any pending constant not processed.  */
+	for (mp = minipool_vector_head; mp; mp = mp->next)
+	  if (mp->refcount > 0)
+	    {
+	      has_pending_const = true;
+	      break;
+	    }
+
+	/* If no pending constant, skip over barrier insns.  */
+	if (has_pending_const == false)
+	  {
+	    while (fix && BARRIER_P (fix->insn))
+	      fix = fix->next;
+	    if (fix == NULL)
+	      break;
+	  }
+
+	last_added_fix = NULL;
+
+	for (ftmp = fix; ftmp; ftmp = ftmp->next)
+	  {
+	    if (BARRIER_P (ftmp->insn))
+	      {
+		if (minipool_vector_head
+		    && ftmp->address >= minipool_vector_head->max_address)
+		  break;
+
+		last_barrier = ftmp;
+	      }
+	    else
+	      {
+		ftmp->minipool = add_csky_minipool_forward_ref (ftmp);
+		if (ftmp->minipool == NULL)
+		  break;
+	      }
+	    last_added_fix = ftmp;  /* Keep track of the last fix added.  */
+	  }
+
+	/* If the last added fix is a barrier, dump minipool after it.  */
+	if (last_added_fix && BARRIER_P (last_added_fix->insn))
+	  ftmp = last_barrier;
+	else
+	  {
+	    /* ftmp is first fix that we can't fit into this pool.
+	       Insert a new barrier in the code somewhere between the previous
+	       fix and this one, and arrange to jump around it.  */
+	    HOST_WIDE_INT max_address;
+
+	    /* The last item on the list of fixes must be a barrier, so
+	       we can never run off the end of the list of fixes without
+	       last_barrier being set.  */
+	    gcc_assert (ftmp);
+
+	    /* Check that there isn't another fix that is in range that
+	       we couldn't fit into this pool because the pool was
+	       already too large: we need to put the pool before such an
+	       instruction.  The pool itself may come just after the
+	       fix because create_csky_fix_barrier also allows space for a
+	       jump instruction.  */
+	    max_address = minipool_vector_head->max_address;
+	    if (ftmp->address < max_address)
+	      max_address = ftmp->address + 1;
+	    last_barrier = create_csky_fix_barrier (last_added_fix, ftmp,
+						    max_address);
+	  }
+
+	assign_csky_minipool_offsets (last_barrier);
+
+	/* Scan over the fixes we have identified for this pool, fixing them
+	   up and adding the constants to the pool itself.  */
+	for (this_fix = fix; this_fix && ftmp != this_fix;
+	     this_fix = this_fix->next)
+	  {
+	    if (GET_CODE (this_fix->insn) != BARRIER)
+	      {
+		rtx addr
+		  = plus_constant (Pmode,
+				   gen_rtx_LABEL_REF (VOIDmode,
+						      minipool_vector_label),
+				   this_fix->minipool->offset);
+		rtx insn_body = PATTERN (this_fix->insn);
+		rtx src = XEXP (insn_body, 1);
+		*this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
+		if (GET_CODE (this_fix->value) == SYMBOL_REF)
+		  emit_insn_after (gen_rtx_UNSPEC_VOLATILE (VOIDmode,
+							    gen_rtvec (1, src),
+							    VUNSPEC_SYMBOL_REF),
+				   this_fix->insn);
+	      }
+	  }
+	dump_csky_minipool (last_barrier->insn);
+	fix = ftmp;
+	if (fix->next == NULL)
+	  break;
+      }
+
+    /* Free the minipool memory.  */
+    obstack_free (&minipool_obstack, minipool_startobj);
+}
+
+
+/* Implement TARGET_MACHINE_DEPENDENT_REORG.  This handles
+   -mconstpool output.  */
+
+static void
+csky_reorg (void)
+{
+  if (TARGET_CONSTANT_POOL)
+    csky_emit_constant_pools ();
+}
+
+
+/* Check to see if the current function contains a branch insn with the
+   far jump attribute set.  Such a function uses the LR register.  */
+
+static bool
+csky_far_jump_used_p (void)
+{
+  rtx_insn *insn;
+  if (cfun->machine->far_jump_used)
+    return true;
+
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    if (GET_CODE (insn) == JUMP_INSN
+	/* Ignore tablejump patterns.  */
+	&& GET_CODE (PATTERN (insn)) != ADDR_VEC
+	&& GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
+	&& get_attr_far_jump (insn) == FAR_JUMP_YES)
+      {
+	cfun->machine->far_jump_used = 1;
+	return true;
+      }
+  return false;
+}
+
+
+/* Return the mask of registers used by the current function.  Set
+   COUNT to the number of registers used.  */
+
+static unsigned int
+get_csky_live_regs (int *count)
+{
+  int reg;
+  unsigned int live_regs_mask = 0;
+
+  *count = 0;
+  for (reg = 0; reg < CSKY_NGPR_REGS; reg++)
+    {
+      bool save = false;
+
+      /* Ignore unsupported registers.  */
+      if (CSKY_TARGET_ARCH (CK801) && reg > 8 && reg < 13)
+	continue;
+      if ((CSKY_TARGET_ARCH (CK801)
+	   || CSKY_TARGET_ARCH (CK802)
+	   || CSKY_TARGET_ARCH (CK803))
+	  && reg > 15)
+	break;
+
+      /* Caller-saved registers marked as used.  */
+      if (df_regs_ever_live_p (reg) && !call_really_used_regs[reg])
+	save = true;
+
+      /* Frame pointer marked used.  */
+      else if (frame_pointer_needed && reg == FRAME_POINTER_REGNUM)
+	save = true;
+
+      /* This is required for CK801/802 where FP is a fixed reg, otherwise
+	 we end up with no FP value available to the DWARF-2 unwinder.  */
+      else if (crtl->calls_eh_return && reg == FRAME_POINTER_REGNUM)
+	save = true;
+
+      /* CK801/802 also need special handling for LR because it's clobbered
+	 by far jumps.  */
+      else if ((CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
+	       && reg == CSKY_LR_REGNUM
+	       && (!crtl->is_leaf || csky_far_jump_used_p ()))
+	save = true;
+
+      /* Register is used for EH data return.  */
+      else if (crtl->calls_eh_return
+	       && reg >= CSKY_FIRST_EH_RETDATA_REGNUM
+	       && reg <= CSKY_LAST_EH_RETDATA_REGNUM)
+	save = true;
+
+      /* We need a temporary reg to hold the offset for adjusting the SP
+	 for a large stack frame.  */
+      if (reg == CSKY_STACKADJUST_REGNUM
+	  && cfun->machine->reg_offset > CSKY_MAX_SP_ADJUST * 2)
+	save = true;
+
+      /* Add reg to the mask.  */
+      if (save)
+	{
+	  (*count)++;
+	  live_regs_mask |= (1 << reg);
+	}
+    }
+  return live_regs_mask;
+}
+
+/* Compute the stack frame layout, storing sizes of the various pieces
+   in cfun->machine.
+
+   Stack frames constructed in the prologue look like:
+			... caller's frame ...
+	incoming SP ->	caller's outbound argument overflow
+			argument spill
+	optional FP ->	register save
+			local variables
+			alloca() space
+	adjusted SP ->	outbound argument overflow
+
+   with SP/FP pointing at the base (low address) of the respective area,
+   and each area aligned to a word boundary.  */
+
+static void
+csky_layout_stack_frame (void)
+{
+  machine_function *infp = cfun->machine;
+  int reg_count;
+
+  if (infp->frame_init_p)
+    return;
+
+  /* Get sizes of local variables & outbound arguments.  */
+  infp->outbound_size = CSKY_STACK_ALIGN (crtl->outgoing_args_size);
+  infp->local_offset = infp->outbound_size;
+  infp->local_size = CSKY_STACK_ALIGN (get_frame_size ());
+  infp->reg_offset = infp->local_offset + infp->local_size;
+
+  /* Now compute size of argument spill + saved regs.  These do not
+     need explicit alignment since they are already word-sized.  */
+  infp->reg_mask = get_csky_live_regs (&reg_count);
+  infp->reg_size = reg_count * UNITS_PER_WORD;
+  infp->arg_offset = infp->reg_offset + infp->reg_size;
+  infp->arg_size = crtl->args.pretend_args_size;
+  infp->frame_size = infp->arg_offset + infp->arg_size;
+  infp->frame_init_p = reload_completed;
+}
+
+/* Implement TARGET_CAN_ELIMINATE.  */
+static bool
+csky_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
+{
+  if (to == STACK_POINTER_REGNUM)
+    return !frame_pointer_needed;
+  return true;
+}
+
+/* Worker function for INITIAL_ELIMINATION_OFFSET macro.
+   Define the offset between two registers, one to be eliminated, and
+   the other its replacement, at the start of a routine.  */
+
+HOST_WIDE_INT
+csky_initial_elimination_offset (int from, int to)
+{
+  int offset;
+
+  csky_layout_stack_frame ();
+
+  /* Set OFFSET to the offset to the initial stack pointer.  */
+  switch (from)
+    {
+    case FRAME_POINTER_REGNUM:
+      offset = cfun->machine->reg_offset;
+      break;
+
+    case ARG_POINTER_REGNUM:
+      offset = cfun->machine->arg_offset;
+      break;
+
+    default:
+      gcc_unreachable ();
+    }
+
+  /* If we are asked for the offset to the frame pointer instead,
+     then subtract the difference between the frame pointer and stack
+     pointer.  */
+  if (to == FRAME_POINTER_REGNUM)
+    offset -= cfun->machine->reg_offset;
+  return offset;
+}
+
+
+/* Determine where to put an argument to a function.
+   Value is zero to push the argument on the stack,
+   or a hard register in which to store the argument.
+
+   MODE is the argument's machine mode.
+   TYPE is the data type of the argument (as a tree).
+    This is null for libcalls where that information may
+    not be available.
+   CUM is a variable of type CUMULATIVE_ARGS which gives info about
+    the preceding args and about the function being called.
+   NAMED is nonzero if this argument is a named parameter
+    (otherwise it is an extra parameter matching an ellipsis).  */
+static rtx
+csky_function_arg (cumulative_args_t pcum_v, machine_mode mode,
+		   const_tree type ATTRIBUTE_UNUSED,
+		   bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+
+  if (*pcum < CSKY_NPARM_REGS)
+    return gen_rtx_REG (mode, CSKY_FIRST_PARM_REGNUM + *pcum);
+
+  return NULL_RTX;
+}
+
+
+/* Return the number of registers (words) needed to pass an argument of
+   MODE and TYPE.  */
+
+static int
+csky_num_arg_regs (machine_mode mode, const_tree type)
+{
+  int size;
+
+  if (type && mode == BLKmode)
+    size = int_size_in_bytes (type);
+  else
+    size = GET_MODE_SIZE (mode);
+
+  return CSKY_NUM_WORDS (size);
+}
+
+
+/* Implement TARGET_FUNCTION_ARG_ADVANCE.  */
+
+static void
+csky_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
+			   const_tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int param_size = csky_num_arg_regs (mode, type);
+
+  if (*pcum + param_size > CSKY_NPARM_REGS)
+    *pcum = CSKY_NPARM_REGS;
+  else
+    *pcum += param_size;
+}
+
+
+/* Implement TARGET_FUNCTION_VALUE.  */
+static rtx
+csky_function_value (const_tree type, const_tree func,
+		     bool outgoing ATTRIBUTE_UNUSED)
+{
+  machine_mode mode;
+  int unsignedp ATTRIBUTE_UNUSED;
+  int size;
+
+  mode = TYPE_MODE (type);
+  size = int_size_in_bytes (type);
+
+  /* Since we promote return types, we must promote the mode here too.  */
+  if (INTEGRAL_TYPE_P (type))
+    {
+      mode = promote_function_mode (type, mode, &unsignedp, func, 1);
+      return gen_rtx_REG (mode, CSKY_FIRST_RET_REGNUM);
+    }
+
+  if (mode == BLKmode && size > UNITS_PER_WORD
+      && size <= UNITS_PER_WORD * 2)
+    {
+      rtx ret_regs[2];
+      ret_regs[0] = gen_rtx_EXPR_LIST (SImode,
+				       gen_rtx_REG (SImode,
+						    CSKY_FIRST_RET_REGNUM),
+				       GEN_INT (0 * UNITS_PER_WORD));
+      ret_regs[1] = gen_rtx_EXPR_LIST (SImode,
+				       gen_rtx_REG (SImode,
+						    CSKY_FIRST_RET_REGNUM + 1),
+				       GEN_INT (1 * UNITS_PER_WORD));
+
+      rtvec vec = gen_rtvec (2, ret_regs[0], ret_regs[1]);
+
+      return gen_rtx_PARALLEL (mode, vec);
+    }
+
+    return gen_rtx_REG (mode, CSKY_FIRST_RET_REGNUM);
+}
+
+
+/* Implement TARGET_LIBCALL_VALUE.  */
+static rtx
+csky_libcall_value (machine_mode mode,
+		    const_rtx libcall ATTRIBUTE_UNUSED)
+{
+  return gen_rtx_REG (mode, CSKY_FIRST_RET_REGNUM);
+}
+
+
+/* Implement TARGET_FUNCTION_VALUE_REGNO_P.
+   On C-SKY, only r0 can return results.  */
+
+static bool
+csky_function_value_regno_p (const unsigned int regno)
+{
+  return (regno == CSKY_FIRST_RET_REGNUM);
+}
+
+
+/* Return an RTX indicating where the return address to the
+   calling function can be found.  */
+rtx
+csky_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
+{
+  if (count != 0)
+    return NULL_RTX;
+
+  return get_hard_reg_initial_val (Pmode, CSKY_LR_REGNUM);
+}
+
+
+/* Implement TARGET_ARG_PARTIAL_BYTES.
+   Return the number of bytes at the beginning of an argument
+   that must be put in registers. The value must be zero for arguments
+   that are passed entirely in registers or
+   that are entirely pushed on the stack.  */
+static int
+csky_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
+			tree type, bool named ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  int param_size = csky_num_arg_regs (mode, type);
+
+  if (*pcum < CSKY_NPARM_REGS
+      && *pcum + param_size > CSKY_NPARM_REGS)
+    return (CSKY_NPARM_REGS - *pcum) * UNITS_PER_WORD;
+
+  return 0;
+}
+
+
+/* Implement TARGET_SETUP_INCOMING_VARARGS.
+   On C-Sky the copy from the argument registers to the stack is emitted
+   by the prologue hooks, so here we just have to note how much stack space
+   to save.  */
+
+static void
+csky_setup_incoming_varargs (cumulative_args_t pcum_v,
+			     machine_mode mode,
+			     tree type,
+			     int *pretend_size,
+			     int second_time ATTRIBUTE_UNUSED)
+{
+  CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
+  CUMULATIVE_ARGS local_cum;
+  cumulative_args_t local_cum_v = pack_cumulative_args (&local_cum);
+  int regs_to_push;
+
+  cfun->machine->uses_anonymous_args = 1;
+  local_cum = *pcum;
+  csky_function_arg_advance (local_cum_v, mode, type, true);
+  regs_to_push = CSKY_NPARM_REGS - local_cum;
+  if (regs_to_push)
+    *pretend_size  = regs_to_push * UNITS_PER_WORD;
+}
+
+
+/* Implement TARGET_ASM_OUTPUT_MI_THUNK.
+   Output code to add DELTA to the first argument, and then jump
+   to FUNCTION.  Used for C++ multiple inheritance.  */
+
+static void
+csky_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		      HOST_WIDE_INT delta,
+		      HOST_WIDE_INT vcall_offset,
+		      tree function)
+{
+  const char *thiz = "a0";
+  const char *reg0 = "t0";
+  const char *reg1 = "t1";
+  int maxoff = 4096;		/* Constant range for addi/subi.  */
+
+  final_start_function (emit_barrier (), file, 1);
+
+  rtx fnaddr = XEXP (DECL_RTL (function), 0);
+
+  if (CSKY_TARGET_ARCH (CK801))
+    {
+      /* CK801 can't use t registers and has only 16-bit addi/subi.  */
+      reg0 = "l0";
+      reg1 = "l1";
+      maxoff = 256;
+      if (vcall_offset > maxoff || vcall_offset < -maxoff)
+	fprintf (file, "\tpush\tl0, l1\n");
+      else if (delta > maxoff || delta < -maxoff)
+	fprintf (file, "\tpush\tl0\n");
+    }
+
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    thiz = "a1";
+
+  /* Add delta to this_rtx.  */
+  if (delta != 0)
+    {
+      if (delta > maxoff || delta < -maxoff)
+	{
+	  fprintf (file, "\tlrw\t%s, %ld\n", reg0, (long)delta);
+	  fprintf (file, "\taddu\t%s, %s, %s\n", thiz, thiz, reg0);
+	}
+      else
+	fprintf (file, "\t%s\t%s, %s, %ld\n",
+		 (delta > 0 ? "addi" : "subi"), thiz, thiz,
+		 (long)(delta > 0 ? delta : -delta));
+    }
+
+  /* If needed, add *(*this_rtx + vcall_offset) to this_rtx.  */
+  if (vcall_offset != 0)
+    {
+      fprintf (file, "\tld.w\t%s, (%s, 0)\n", reg0, thiz);
+
+      if (vcall_offset > maxoff || vcall_offset < -maxoff)
+	{
+	  fprintf (file, "\tlrw\t%s, %ld\n", reg1, (long)vcall_offset);
+	  fprintf (file, "\taddu\t%s, %s, %s\n", reg0, reg0, reg1);
+	}
+      else
+	fprintf (file, "\t%s\t%s, %s, %ld\n",
+		 (vcall_offset > 0 ? "addi" : "subi"), reg0, reg0,
+		 (long)(vcall_offset > 0 ? vcall_offset : -vcall_offset));
+
+      /* Load the offset and add it to this_rtx	 */
+      fprintf (file, "\tld.w\t%s, (%s, 0)\n", reg0, reg0);
+      fprintf (file, "\taddu\t%s, %s, %s\n", thiz, thiz, reg0);
+    }
+
+  /* We must pop the scratch regs individually instead of using the
+     "pop" insn, which also does a return.  */
+  if (CSKY_TARGET_ARCH (CK801))
+    {
+      if (vcall_offset > maxoff || vcall_offset < -maxoff)
+	{
+	  fprintf (file, "\tld.w\tl0, (sp, 0)\n");
+	  fprintf (file, "\tld.w\tl1, (sp, 4)\n");
+	  fprintf (file, "\taddi\t sp, sp, 8\n");
+	}
+      else if (delta > maxoff || delta < -maxoff)
+	{
+	  fprintf (file, "\tld.w\tl0, (sp, 0)\n");
+	  fprintf (file, "\taddi\tsp, sp, 4\n");
+	}
+    }
+
+  fprintf (file, "\tjbr\t");
+  output_addr_const (file, fnaddr);
+  fprintf (file, "\n");
+
+  final_end_function ();
+}
+
+
+/* Implement TARGET_CONDITIONAL_REGISTER_USAGE.
+   Conditionally modify five variables fixed_regs, call_used_regs, global_regs,
+   reg_names, and reg_class_contents, to take into account any dependence of
+   these register sets on target flags.
+
+   CK801 has registers r0-r8 and r13-r15.  CK802 and CK803 have registers
+   r0-r15 (the "low" registers).  Other cpus use registers r0-r31 with
+   -mhigh-registers, otherwise also only r0-r15.
+
+   CK801 only has 16-bit instructions, most of which can only reference
+   r0-r7 (the "mini" registers).  So we mark regs outside that range as
+   fixed.  -msmart can be used on other arch variants to force the same
+   behavior because it results in smaller code size.
+
+   TODO: investigate whether it's beneficial to use r8-r13 as a spill
+   class when TARGET_MINI_REGISTERS instead of making them unusable by
+   the register allocator.  */
+
+static void
+csky_conditional_register_usage (void)
+{
+  /* Only use mini registers in smart mode or 801.  */
+  if (TARGET_MINI_REGISTERS)
+    {
+      int i;
+
+      for (i = (CSKY_LAST_MINI_REGNUM + 1); i < 32; i++)
+	{
+	  fixed_regs[i] = 1;
+	  call_used_regs[i] = 1;
+	  call_really_used_regs[i] = 1;
+	}
+    }
+  /* For some targets, the high registers are not supported.
+     CPUs other than ck801/ck802/ck803 use high registers
+     depending on -mhigh-registers option.  */
+  else if (CSKY_TARGET_ARCH (CK802)
+	   || CSKY_TARGET_ARCH (CK803)
+	   || !TARGET_HIGH_REGISTERS)
+   {
+      int i;
+
+      for (i = CSKY_FIRST_HIGH_REGNUM; i <= CSKY_LAST_HIGH_REGNUM; i++)
+	{
+	  fixed_regs[i] = 1;
+	  call_used_regs[i] = 1;
+	  call_really_used_regs[i] = 1;
+	}
+   }
+
+  /* On CK801/CK802 we must mark lr as a fixed register because it is
+     used to implement far jumps.
+     FIXME: perhaps there should be a command-line option controlling
+     use of lr for far jumps on ck802 when !TARGET_MINI_REGS, when
+     you really want lr to be available to the register allocator and
+     you know there are no far jumps in the code.  */
+  if (CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
+    {
+      fixed_regs[CSKY_LR_REGNUM] = 1;
+      call_used_regs[CSKY_LR_REGNUM] = 1;
+      call_really_used_regs[CSKY_LR_REGNUM] = 0;
+    }
+
+  /* The hi/lo registers are only supported in dsp mode.  */
+  if (!TARGET_DSP)
+    {
+      fixed_regs[CSKY_HI_REGNUM] = 1;
+      call_used_regs[CSKY_HI_REGNUM] = 1;
+      call_really_used_regs[CSKY_HI_REGNUM] = 1;
+
+      fixed_regs[CSKY_LO_REGNUM] = 1;
+      call_used_regs[CSKY_LO_REGNUM] = 1;
+      call_really_used_regs[CSKY_LO_REGNUM] = 1;
+    }
+
+  /* The V_REGS are only supported in hard float mode.  */
+  if (!TARGET_HARD_FLOAT)
+    {
+      int regno;
+
+      for (regno = CSKY_FIRST_VFP_REGNUM;
+	   regno <= CSKY_LAST_VFP_REGNUM; regno++)
+	{
+	  fixed_regs[regno] = 1;
+	  call_used_regs[regno] = 1;
+	  call_really_used_regs[regno] = 1;
+	}
+    }
+
+  /* In pic mode, the gb register is not available for register
+     allocation.  Since gb is not clobbered by function
+     calls, set its call_really_used_regs to 0.  */
+  if (flag_pic)
+    {
+      fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
+      call_really_used_regs[PIC_OFFSET_TABLE_REGNUM] = 0;
+    }
+}
+
+/* Implement TARGET_HARD_REGNO_NREGS.  */
+static unsigned int
+csky_hard_regno_nregs (unsigned int regno, machine_mode mode)
+{
+  if (regno >= CSKY_FIRST_VFP_REGNUM && !CSKY_TARGET_ARCH (CK803))
+    return 1;
+  else
+    return CSKY_NUM_REGS (mode);
+}
+
+/* Implement TARGET_HARD_REGNO_MODE_OK.  Return true if REGNO is a
+   valid register for holding a quantity of type MODE.  */
+
+static bool
+csky_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
+{
+  int nregs = CSKY_NUM_REGS (mode);
+
+  /* We can't handle more than doubleword sizes for any register.  */
+  if (nregs > 2)
+    return false;
+
+  /* For general registers, return true if mode is one word size.
+     When the size is larger than one word size, there should
+     be two successive hard registers to put the data.  */
+  if (regno < CSKY_NGPR_REGS)
+    {
+      if (nregs < 2)
+	return true;
+      else if (TARGET_MINI_REGISTERS)
+	return (regno < CSKY_LAST_MINI_REGNUM);
+      else if (CSKY_TARGET_ARCH (CK802)
+	       || CSKY_TARGET_ARCH (CK803)
+	       || !TARGET_HIGH_REGISTERS)
+	/* Without high register, r15 cannot hold doubleword data.  */
+	return (regno < (CSKY_SP_REGNUM - 1));
+      else
+	return (regno < (CSKY_SP_REGNUM - 1)
+		|| (regno >= CSKY_LR_REGNUM
+		    && regno < CSKY_LAST_HIGH_UNFIXED_REGNUM));
+    }
+  else if (regno == CSKY_CC_REGNUM)
+    return (mode == CCmode);
+  else if (regno == CSKY_HI_REGNUM || regno == CSKY_LO_REGNUM)
+    {
+      /* Don't allocate hi,lo register for float data even
+	 if in dsp mode, because it will cause high cost
+	 to reload data from hi,lo register.  */
+      if (!TARGET_DSP || mode == SFmode || mode == DFmode)
+	return false;
+      else if (nregs == 2)
+	return (regno == CSKY_HI_REGNUM);
+      else
+	return true;
+    }
+  else if (CSKY_VREG_P (regno) && TARGET_HARD_FLOAT)
+    return true;
+
+  return false;
+}
+
+/* Implement TARGET_MODES_TIEABLE_P.  We can't tie DFmode with other modes
+   when V_REGs might be in use because those registers mess with the stored
+   bits.  */
+static bool
+csky_modes_tieable_p (machine_mode mode1, machine_mode mode2)
+{
+  return !(TARGET_HARD_FLOAT
+	   && mode1 != mode2
+	   && (mode1 == DFmode || mode2 == DFmode));
+}
+
+/* Implement TARGET_CAN_CHANGE_MODE_CLASS.
+   V_REG registers can't do subreg as all values are reformatted to
+   internal precision.  */
+static bool
+csky_can_change_mode_class (machine_mode from,
+			    machine_mode to,
+			    reg_class_t rclass)
+{
+  return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
+	  || !reg_classes_intersect_p (V_REGS, rclass));
+}
+
+/* Implement TARGET_CLASS_LIKELY_SPILLED_P.
+   We need to define this for MINI_REGS when we only use r0 - r7.
+   Otherwise we can end up using r0-r4 for function arguments, and don't
+   have enough left over to do doubleword arithmetic.  */
+
+static bool
+csky_class_likely_spilled_p (reg_class_t rclass)
+{
+  if ((TARGET_MINI_REGISTERS && rclass == MINI_REGS)
+      || rclass == C_REGS)
+    return true;
+
+  return false;
+}
+
+
+/* Implement TARGET_PREFERRED_RELOAD_CLASS.
+   Given an rtx X being reloaded into a reg required to be
+   in class CLASS, return the class of reg to actually use.
+   In general this is just CLASS.  */
+
+static reg_class_t
+csky_preferred_reload_class (rtx x, reg_class_t rclass)
+{
+  if (TARGET_HARD_FLOAT
+      && CONST_DOUBLE_P (x)
+      && (GET_MODE (x) == DFmode || GET_MODE (x) == SFmode)
+      && rclass == NO_REGS)
+    return GENERAL_REGS;
+  return rclass;
+}
+
+
+/* Implement TARGET_CLASS_MAX_NREGS.
+   Return the maximum number of consecutive registers of class rclass needed
+   to hold a value of mode mode.
+   On the csky, this is the size of MODE in words,
+   except in the FP regs, where a single reg is always enough.  */
+
+static unsigned char
+csky_class_max_nregs (reg_class_t rclass, machine_mode mode)
+{
+  if (rclass == V_REGS)
+    return 1;
+  else
+    return CSKY_NUM_REGS (mode);
+}
+
+
+/* Implement TARGET_SECONDARY_RELOAD.
+   If copying a register of RCLASS from/to X requires an intermediate
+   register, the hook should return the REGISTER_CLASS required for this
+   intermediate register.
+   If no intermediate register is required, it should return NO_REGS.
+   If more than one intermediate register is required, describe the one
+   that is closest in the copy chain to the reload register.  */
+
+reg_class_t
+csky_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
+		       reg_class_t rclass,
+		       machine_mode mode,
+		       secondary_reload_info *sri ATTRIBUTE_UNUSED)
+{
+  int regno = -1;
+
+  /* Extract the real regno from X.  */
+  if (GET_CODE (x) == SIGN_EXTEND)
+    {
+      int off = 0;
+
+      x = XEXP (x, 0);
+
+      if (reg_renumber)
+	regno = true_regnum (x);
+      else
+	{
+	  while (GET_CODE (x) == SUBREG)
+	    {
+	      off += subreg_regno_offset (REGNO (SUBREG_REG (x)),
+	      GET_MODE (SUBREG_REG (x)),
+	      SUBREG_BYTE (x), GET_MODE (x));
+	      x = SUBREG_REG (x);
+	    }
+
+	    if (GET_CODE (x) == REG)
+	      regno = REGNO (x) + off;
+	}
+    }
+  else if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
+    regno = true_regnum (x);
+
+  /* We always require a general register when copying anything to
+     HI/LO_REGNUM, except when copying an SImode value from HI/LO_REGNUM
+     to a general register, or when copying from register 0.  */
+  if ((rclass == HILO_REGS || rclass == LO_REGS || rclass == HI_REGS)
+      && !CSKY_GENERAL_REGNO_P (regno))
+    return GENERAL_REGS;
+
+  if (rclass == V_REGS && !CSKY_GENERAL_REGNO_P (regno))
+    {
+      /* Reload between vector reg and memory does not need an
+	 intermediate register.  */
+      if (MEM_P (x) && (mode == SFmode || mode == DFmode))
+	return NO_REGS;
+      else
+	return GENERAL_REGS;
+    }
+
+  return NO_REGS;
+}
+
+/* Implement TARGET_SPILL_CLASS.
+   Try spilling to a larger register class before spilling to memory.  */
+
+static reg_class_t
+csky_spill_class (reg_class_t rclass, machine_mode mode ATTRIBUTE_UNUSED)
+{
+  if ((rclass == MINI_REGS && !TARGET_MINI_REGISTERS)
+      || (rclass == LOW_REGS && TARGET_HIGH_REGISTERS))
+    return GENERAL_REGS;
+  return NO_REGS;
+}
+
+/* Convert a static initializer array of feature bits to sbitmap
+   representation.  */
+static void
+csky_initialize_isa (sbitmap isa, const enum csky_isa_feature *isa_bits)
+{
+  bitmap_clear (isa);
+  while (*isa_bits != CSKY_ISA_FEATURE_GET (none))
+    bitmap_set_bit (isa, *(isa_bits++));
+}
+
+
+/* Configure a build target TARGET from the user-specified options OPTS and
+   OPTS_SET.  */
+static void
+csky_configure_build_target (struct csky_build_target *target,
+			     struct cl_target_option *opts,
+			     struct gcc_options *opts_set)
+{
+  const struct csky_processors *csky_selected_tune = NULL;
+  struct csky_processors *csky_selected_cpu = NULL;
+  struct csky_processors *csky_selected_arch = NULL;
+  sbitmap all_sbits = sbitmap_alloc (CSKY_ISA_FEATURE_GET (max));
+  bitmap_clear (all_sbits);
+
+  bitmap_clear (target->isa);
+  target->core_name = NULL;
+  target->arch_name = NULL;
+
+  if (opts_set->x_csky_arch_option)
+    csky_selected_arch = &all_architectures[opts->x_csky_arch_option];
+
+  if (opts_set->x_csky_cpu_option)
+    {
+      csky_selected_cpu = &all_cores[opts->x_csky_cpu_option];
+      csky_selected_tune = &all_cores[opts->x_csky_cpu_option];
+    }
+
+  if (csky_selected_cpu)
+    {
+      /* TODO: support combination of features
+	 between different cpu & arch, should based on arch.  */
+      if (csky_selected_arch
+	  && (csky_selected_cpu->base_arch != csky_selected_arch->base_arch))
+	warning (0, "cpu %s is not based on arch %s, ignoring the arch",
+		 csky_selected_cpu->name, csky_selected_arch->name);
+      if (!csky_selected_arch)
+	csky_selected_arch = &all_architectures[csky_selected_cpu->base_arch];
+      csky_initialize_isa (all_sbits, csky_selected_arch->isa_bits);
+      target->core_name = csky_selected_cpu->name;
+    }
+  else if (csky_selected_arch)
+    {
+      csky_selected_cpu = csky_selected_arch;
+      target->arch_name = csky_selected_arch->name;
+    }
+  else /* If the user did not specify a processor, choose one for them.  */
+    {
+      csky_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
+      csky_selected_arch = &all_architectures[csky_selected_cpu->base_arch];
+      csky_initialize_isa (all_sbits, csky_selected_arch->isa_bits);
+      target->core_name = csky_selected_cpu->name;
+    }
+
+  /* The selected cpu may be an architecture, so lookup tuning by core ID.  */
+  if (!csky_selected_tune)
+    csky_selected_tune = &all_cores[csky_selected_cpu->core];
+  gcc_assert (csky_selected_tune);
+
+  gcc_assert (csky_selected_arch);
+  gcc_assert (csky_selected_cpu);
+  csky_initialize_isa (target->isa, csky_selected_cpu->isa_bits);
+  bitmap_ior (target->isa, target->isa, all_sbits);
+
+  /* Finish initializing the target structure.  */
+  target->arch_pp_name = csky_selected_cpu->arch;
+  target->base_arch = csky_selected_cpu->base_arch;
+  target->arch_core = csky_selected_cpu->core;
+
+  sbitmap_free (all_sbits);
+}
+
+
+/* Implement TARGET_OPTION_OVERRIDE.  */
+
+static void
+csky_option_override (void)
+{
+  csky_active_target.isa = sbitmap_alloc (CSKY_ISA_FEATURE_GET (max));
+
+  /* Create the default target_options structure.  We need this early
+     to configure the overall build target.  */
+  target_option_default_node = target_option_current_node
+			     = build_target_option_node (&global_options);
+
+  csky_configure_build_target (&csky_active_target,
+			      TREE_TARGET_OPTION (target_option_default_node),
+			      &global_options_set);
+
+#ifdef SUBTARGET_OVERRIDE_OPTIONS
+  SUBTARGET_OVERRIDE_OPTIONS;
+#endif
+
+  csky_base_arch = csky_active_target.base_arch;
+
+  if (flag_pic && !(CSKY_TARGET_ARCH (CK810) || CSKY_TARGET_ARCH (CK807)))
+    {
+      flag_pic = 0;
+      warning (0, "%qs is not supported by arch %s",
+	       "-fPIC", csky_active_target.arch_pp_name);
+    }
+
+  /* Check floating-point options for consistency.  */
+  if (TARGET_HARD_FLOAT)
+    {
+      const struct csky_fpu_desc *csky_selected_fpu = NULL;
+
+      if (csky_fpu_index == TARGET_FPU_auto)
+	{
+	  const char *target_fpu_name;
+	  bool ok;
+	  int fpu_index;
+
+#ifdef CSKY_FPUTYPE_DEFAULT
+	  target_fpu_name = CSKY_FPUTYPE_DEFAULT;
+#else
+	  target_fpu_name = "fpv2";
+#endif
+
+	  if (csky_active_target.core_name != NULL
+	      && !strchr (csky_active_target.core_name, 'f'))
+	    target_fpu_name = "auto";
+	  else if (CSKY_TARGET_ARCH (CK803) || !TARGET_DOUBLE_FLOAT)
+	    target_fpu_name = "fpv2_sf";
+	  else if (TARGET_DOUBLE_FLOAT && TARGET_FDIVDU)
+	    target_fpu_name = "fpv2_divd";
+
+	  ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &fpu_index,
+				      CL_TARGET);
+	  gcc_assert (ok);
+	  csky_fpu_index = (enum csky_fpu_type) fpu_index;
+	}
+
+      if (CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
+	error ("%qs is not supported by arch %s",
+	       "-mhard-float", csky_active_target.arch_pp_name);
+      else if (csky_fpu_index == TARGET_FPU_auto)
+	error ("%<-mhard-float%> is not supported by the selected CPU");
+      else
+	{
+	  csky_selected_fpu = &all_fpus[csky_fpu_index];
+	  sbitmap fpu_bits = sbitmap_alloc (CSKY_ISA_FEATURE_GET (max));
+	  csky_initialize_isa (fpu_bits, csky_selected_fpu->isa_bits);
+
+	  bitmap_ior (csky_active_target.isa, csky_active_target.isa,
+		      fpu_bits);
+
+	  sbitmap_free (fpu_bits);
+	}
+    }
+  else
+    {
+      if (TARGET_DOUBLE_FLOAT > 0)
+	warning (0, "%<-mdouble-float%> ignored without %<-mhard-float%>");
+      TARGET_DOUBLE_FLOAT = 0;
+      if (TARGET_FDIVDU > 0)
+	warning (0, "%<-mfdivdu%> ignored without %<-mhard-float%>");
+      TARGET_FDIVDU = 0;
+    }
+
+  /* Extended LRW instructions are enabled by default on CK801, disabled
+     otherwise.  */
+  if (TARGET_ELRW == -1)
+    TARGET_ELRW = CSKY_TARGET_ARCH (CK801);
+
+  /* DSP is enabled either by the processor feature or -mdsp
+     command-line option.  There is no -mno-dsp option as the assembler
+     doesn't take one.  */
+  if (!TARGET_DSP)
+    TARGET_DSP = CSKY_ISA_FEATURE (dsp);
+
+  /* There's both -mdiv and -mno-div.  Take default from processor if
+     neither is specified explicitly.  */
+  if (TARGET_DIV == -1)
+    TARGET_DIV = CSKY_ISA_FEATURE (div);
+
+  /* TARGET_CONSTANT_POOL is mandatory for CK801 and CK802 and optional
+     for other CPUs.
+     The reason why the compiler has to generate constant pools for CK801/2
+     instead of deferring to the assembler is that these cores don't have a
+     long branch instruction other than jbsr, which clobbers lr.  So for
+     the compiler to correctly save/restore lr it has to know whether there
+     are long branches, which depends on having accurate branch length
+     counts, which in turn depends on having control over where constant
+     pools are placed.  */
+  if ((CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
+      && !TARGET_CONSTANT_POOL)
+    error ("%qs is not supported by arch %s",
+	   "-mno-constpool", csky_active_target.arch_pp_name);
+  else if (TARGET_CONSTANT_POOL == -1)
+    TARGET_CONSTANT_POOL = (CSKY_TARGET_ARCH (CK801)
+			    || CSKY_TARGET_ARCH (CK802));
+
+  /* TARGET_MINI_REGISTERS is mandatory for CK801, the default for CK802,
+     and optional for other CPUs.  TARGET_HIGH_REGISTERS is incompatible
+     with TARGET_MINI_REGISTERS, is not supported by CK801/802/803,
+     and is the default for other processors.
+     See csky_conditional_register_usage.  */
+  if (TARGET_MINI_REGISTERS > 0 && TARGET_HIGH_REGISTERS > 0)
+    error ("%<-msmart%> is incompatible with %<-mhigh-registers%>");
+  else if (CSKY_TARGET_ARCH (CK801)
+	   || CSKY_TARGET_ARCH (CK802)
+	   || CSKY_TARGET_ARCH (CK803))
+    {
+      if (CSKY_TARGET_ARCH (CK801)
+	  || (CSKY_TARGET_ARCH (CK802) && TARGET_MINI_REGISTERS == -1))
+	TARGET_MINI_REGISTERS = 1;
+      else if (TARGET_MINI_REGISTERS == -1)
+	TARGET_MINI_REGISTERS = 0;
+      if (TARGET_HIGH_REGISTERS > 0)
+	warning (0, "%qs is not supported by arch %s",
+		 "-mhigh-registers", csky_active_target.arch_pp_name);
+      TARGET_HIGH_REGISTERS = 0;
+    }
+  else
+    {
+      if (TARGET_MINI_REGISTERS == -1)
+	TARGET_MINI_REGISTERS = 0;
+      if (TARGET_HIGH_REGISTERS == -1)
+	TARGET_HIGH_REGISTERS = !TARGET_MINI_REGISTERS;
+    }
+
+  /* -mmultiple-stld is the default for everything but CK801, which
+     doesn't support it.  */
+  if (CSKY_TARGET_ARCH (CK801))
+    {
+      if (TARGET_MULTIPLE_STLD > 0)
+	warning (0, "%qs is not supported by arch %s",
+		 "-mmultiple-stld", csky_active_target.arch_pp_name);
+      TARGET_MULTIPLE_STLD = 0;
+    }
+
+  /* Initialize boolean versions of the architectural flags, for use
+     in the .md file.  */
+
+#undef	CSKY_ISA
+#define CSKY_ISA(IDENT, DESC)						  \
+  {									  \
+    csky_arch_isa_features[CSKY_ISA_FEATURE_GET (IDENT)] =		   \
+      bitmap_bit_p (csky_active_target.isa, CSKY_ISA_FEATURE_GET (IDENT)); \
+  }
+#include "csky_isa.def"
+#undef	CSKY_ISA
+
+  /* TODO  */
+
+  /* Resynchronize the saved target options.  */
+  cl_target_option_save (TREE_TARGET_OPTION (target_option_default_node),
+			 &global_options);
+
+#ifdef ENABLE_TPF_DEBUG
+  /* Don't emit DWARF4 unless specifically selected.  The TPF
+     debuggers do not yet support DWARF 3/4.  */
+  if (!global_options_set.x_dwarf_strict)
+    dwarf_strict = 1;
+  if (!global_options_set.x_dwarf_version)
+    dwarf_version = 3;
+#endif
+
+  /* Don't run the scheduler before reload by default,
+     since it tends to increase register pressure.  */
+  if (!global_options_set.x_flag_schedule_insns)
+    flag_schedule_insns = 0;
+
+  csky_add_gc_roots ();
+}
+
+
+/* Return TRUE if X contains any references to TLS symbols.  */
+
+bool
+csky_tls_referenced_p (rtx x)
+{
+  if (!TARGET_TLS)
+    return false;
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, x, ALL)
+    {
+      const_rtx x = *iter;
+      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
+	return true;
+
+      /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
+	 TLS offsets, not real symbol references.  */
+      if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
+	iter.skip_subrtxes ();
+    }
+  return false;
+}
+
+
+/* Implement TARGET_CANNOT_FORCE_CONST_MEM.
+   Determine if it's legal to put X into the constant pool.  This
+   is not possible for the address of thread-local symbols, which
+   is checked above.  */
+
+static bool
+csky_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED,
+			     rtx x)
+{
+  return csky_tls_referenced_p (x);
+}
+
+
+/* Implement TARGET_LEGITIMATE_CONSTANT_P.  Returns nonzero if the
+   constant value X is a legitimate general operand.
+   It is given that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
+
+static bool
+csky_legitimate_constant_p (machine_mode mode, rtx x)
+{
+  return (!csky_cannot_force_const_mem (mode, x)
+	  && CONSTANT_P (x));
+}
+
+
+/* Return true if X is valid as an CSKY addressing register.  */
+
+static bool
+is_csky_address_register_rtx_p (rtx x, int strict_p)
+{
+  int regno;
+
+  if (!x)
+    return false;
+  if (!REG_P (x))
+    return false;
+
+  regno = REGNO (x);
+
+  if (strict_p)
+    return (CSKY_GENERAL_REGNO_P (regno)
+	    || CSKY_GENERAL_REGNO_P (reg_renumber[regno]));
+  else
+    return CSKY_GENERAL_REGNO_P (regno) || regno >= FIRST_PSEUDO_REGISTER;
+}
+
+
+/* Return TRUE if X is a thread-local symbol.  */
+
+static bool
+csky_tls_symbol_p (rtx x)
+{
+  if (!TARGET_TLS)
+    return false;
+
+  if (GET_CODE (x) != SYMBOL_REF)
+    return false;
+
+  return SYMBOL_REF_TLS_MODEL (x) != 0;
+}
+
+
+/* Handle lazy initialization of __tls_get_addr libfunc.  */
+static GTY(()) rtx tls_get_addr_libfunc;
+
+static rtx
+get_tls_get_addr (void)
+{
+  if (!tls_get_addr_libfunc)
+    tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
+  return tls_get_addr_libfunc;
+}
+
+
+/* Emit a call to __tls_get_addr.  */
+
+static rtx_insn *
+csky_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
+{
+  rtx label, labelno, unspec, tmp;
+  rtx_insn *insns;
+
+  start_sequence ();
+
+  labelno = GEN_INT (tls_labelno++);
+  label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_TLS_LABEL);
+  unspec = gen_rtx_UNSPEC (Pmode,
+			   gen_rtvec (3, x, GEN_INT (reloc), label),
+			   UNSPEC_TLS);
+  tmp = gen_reg_rtx (SImode);
+  emit_move_insn (reg, unspec);
+  emit_move_insn (tmp, label);
+  emit_insn (gen_addsi3 (reg, reg, tmp));
+  *valuep = emit_library_call_value (get_tls_get_addr (),
+				     NULL_RTX, LCT_PURE, /* LCT_CONST?	*/
+				     Pmode, reg, Pmode);
+  insns = get_insns ();
+  end_sequence ();
+  return insns;
+}
+
+/* Helper function for csky_legitimize_address, to handle the TLS cases.
+   REG is a scratch register and may be null.  */
+
+rtx
+csky_legitimize_tls_address (rtx x, rtx reg)
+{
+  rtx dest, tp, label, labelno, unspec, ret, eqv, addend, tmp;
+  rtx_insn *insns;
+  unsigned int model = SYMBOL_REF_TLS_MODEL (x);
+
+  if (!reg)
+    reg = gen_reg_rtx (SImode);
+
+  switch (model)
+    {
+    case TLS_MODEL_GLOBAL_DYNAMIC:
+      insns = csky_call_tls_get_addr (x, reg, &ret, TLS_GD32);
+      dest = gen_reg_rtx (Pmode);
+      emit_libcall_block (insns, dest, ret, x);
+      return dest;
+
+    case TLS_MODEL_LOCAL_DYNAMIC:
+      insns = csky_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
+
+      /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
+	 share the LDM result with other LD model accesses.  */
+      eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx), UNSPEC_TLS);
+      dest = gen_reg_rtx (Pmode);
+      emit_libcall_block (insns, dest, ret, eqv);
+
+      /* Load the addend.  */
+      addend = gen_rtx_UNSPEC (Pmode,
+			       gen_rtvec (2, x, GEN_INT (TLS_LDO32)),
+			       UNSPEC_TLS);
+      addend = force_reg (SImode, addend);
+      return gen_rtx_PLUS (Pmode, dest, addend);
+
+    case TLS_MODEL_INITIAL_EXEC:
+      labelno = GEN_INT (tls_labelno++);
+      label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_TLS_LABEL);
+      unspec = gen_rtx_UNSPEC (Pmode,
+			       gen_rtvec (3, x, GEN_INT (TLS_IE32), label),
+			       UNSPEC_TLS);
+      tmp = gen_reg_rtx (SImode);
+      emit_move_insn (reg, unspec);
+      emit_move_insn (tmp, label);
+      emit_insn (gen_addsi3 (reg, reg, tmp));
+      emit_move_insn (reg, gen_const_mem (Pmode, reg));
+      tp = gen_rtx_REG (SImode, CSKY_TLS_REGNUM);
+      return gen_rtx_PLUS (Pmode, tp, reg);
+
+    case TLS_MODEL_LOCAL_EXEC:
+      unspec = gen_rtx_UNSPEC (Pmode,
+			       gen_rtvec (2, x, GEN_INT (TLS_LE32)),
+			       UNSPEC_TLS);
+      emit_move_insn (reg, unspec);
+      tp = gen_rtx_REG (SImode, CSKY_TLS_REGNUM);
+      return gen_rtx_PLUS (Pmode, tp, reg);
+
+    default:
+      abort ();
+    }
+}
+
+
+/* Implement TARGET_LEGITIMIZE_ADDRESS.  */
+
+static rtx
+csky_legitimize_address (rtx x, rtx orig_x ATTRIBUTE_UNUSED,
+			 machine_mode mode)
+{
+  if (csky_tls_symbol_p (x))
+    return csky_legitimize_tls_address (x, NULL_RTX);
+
+  if (GET_CODE (x) == PLUS)
+    {
+      rtx xop0 = XEXP (x, 0);
+      rtx xop1 = XEXP (x, 1);
+
+      if (is_csky_address_register_rtx_p (xop0, 0)
+	  && CONST_INT_P (xop1))
+	{
+	  HOST_WIDE_INT offset = INTVAL (xop1);
+
+	  /* Try to replace ld32 rx,(ry, offset), to addi16 rz, oimm8
+	     and ld16 rx,(rz, new_ld_offset) to avoid emitting a
+	     32-bit ld, but this addi has a range limitation.  */
+	  if (optimize_size
+	      && offset > CSKY_LD16_MAX_OFFSET (mode)
+	      && offset <= (CSKY_ADDI16_MAX_IMM
+			   + CSKY_LD16_MAX_OFFSET (mode)))
+	    {
+	      HOST_WIDE_INT new_ld_offset
+		= offset & CSKY_LD16_OFFSET_MASK (mode);
+
+	      xop0 = force_operand (plus_constant (Pmode, xop0,
+						   offset - new_ld_offset),
+				    NULL_RTX);
+	      x = plus_constant (Pmode, xop0, new_ld_offset);
+	    }
+	  else if (offset < 0 && offset >= (-CSKY_SUBI16_MAX_IMM))
+	    x = force_operand (x, NULL_RTX);
+	  else if (offset > CSKY_LD16_MAX_OFFSET (mode)
+		   || offset < 0)
+	    {
+	      /* For the remaining cases, force the constant into a
+		 register.  */
+	      xop1 = force_reg (SImode, xop1);
+	      x = gen_rtx_PLUS (SImode, xop0, xop1);
+	    }
+	}
+
+      /* If the index is store in register, force the
+	 base to register.  */
+      if (is_csky_address_register_rtx_p (xop1, 0)
+	  && !is_csky_address_register_rtx_p (xop0, 0))
+	{
+	  xop0 = force_operand (xop0, NULL_RTX);
+	  x = gen_rtx_PLUS (SImode, xop0, xop1);
+	}
+    }
+  /* Make sure to take full advantage of the pre-indexed addressing mode
+     with absolute addresses which often allows for the base register to
+     be factorized for multiple adjacent memory references, and it might
+     even allows for the mini pool to be avoided entirely. */
+  else if (CONST_INT_P (x)  && optimize > 0)
+    {
+      HOST_WIDE_INT mask, base, index;
+      rtx base_reg;
+
+      mask = CSKY_LD16_OFFSET_MASK (mode);
+      base = INTVAL (x) & ~mask;
+      index = INTVAL (x) & mask;
+      base_reg = force_reg (SImode, GEN_INT (base));
+      x = plus_constant (Pmode, base_reg, index);
+    }
+
+  return x;
+}
+
+
+/* Return nonzero if INDEX is valid for an address index operand.
+   ck801 use 16 bits ld
+   ck802 use 16 and 32 bits ld
+   others use ld and ldr.  */
+
+static int
+ck801_legitimate_index_p (machine_mode mode, rtx index,
+			  int strict_p ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code = GET_CODE (index);
+
+  /* When the mode size is larger than 4, we may use two ld instruction
+     to get data, the index and (index+1) should be valid.  */
+  if (GET_MODE_SIZE (mode) >= 8)
+    return (code == CONST_INT
+	    && INTVAL (index) <	 CSKY_LD16_MAX_OFFSET (SImode)
+	    && INTVAL (index) >= 0 && (INTVAL (index) & 3) == 0);
+
+  if (code == CONST_INT && GET_MODE_SIZE (mode) > 0
+      && INTVAL (index) <= CSKY_LD16_MAX_OFFSET (mode)
+      && INTVAL (index) >= 0)
+    return ((INTVAL (index) % GET_MODE_SIZE (mode)) == 0);
+
+  return 0;
+}
+
+
+static int
+ck802_legitimate_index_p (machine_mode mode, rtx index,
+			  int strict_p ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code = GET_CODE (index);
+
+  /* When the mode size is larger than 4, we may use two ld instruction
+     to get data, the index and (index+1) should be valid.  */
+  if (GET_MODE_SIZE (mode) >= 8)
+    return (code == CONST_INT
+	    && INTVAL (index) < CSKY_LD32_MAX_OFFSET (SImode)
+	    && INTVAL (index) >= 0 && (INTVAL (index) & 3) == 0);
+
+  if (code == CONST_INT && GET_MODE_SIZE (mode) > 0
+      && INTVAL (index) <= CSKY_LD32_MAX_OFFSET (mode)
+      && INTVAL (index) >= 0)
+    return ((INTVAL (index) % GET_MODE_SIZE (mode)) == 0);
+
+  return 0;
+}
+
+
+/* The instruction ldr rz, (rx, ry << i), i can be 0,1,2,3.
+   Check that SHIFT is valid, that the code is MULT, and that
+   the shift is a power of 2.  */
+
+static bool
+is_ldr_shift_p (HOST_WIDE_INT shift, enum rtx_code code)
+{
+  if (code == ASHIFT)
+    return (shift >= 0 && shift <= 3);
+  else if (code == MULT)
+    return (shift == 1
+	    || shift == 2
+	    || shift == 4
+	    || shift == 8);
+  else
+    return false;
+}
+
+
+static int
+ck810_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
+{
+  enum rtx_code code = GET_CODE (index);
+
+  if (TARGET_HARD_FLOAT
+      && (mode == SFmode || mode == DFmode))
+    return (code == CONST_INT && INTVAL (index) < 1024
+	    && INTVAL (index) >= 0
+	    && (INTVAL (index) & 3) == 0);
+
+  if (code == CONST_INT)
+    {
+      /* When the mode size is larger than 4, we may use two ld instruction
+	 to get data, the index and (index+1) should be valid.  */
+      if (GET_MODE_SIZE (mode) >= 8)
+	return (INTVAL (index) < CSKY_LD32_MAX_OFFSET (SImode)
+		&& INTVAL (index) >= 0 && (INTVAL (index) & 3) == 0);
+
+      if (GET_MODE_SIZE (mode) > 0
+	  && INTVAL (index) <= CSKY_LD32_MAX_OFFSET (mode)
+	  && INTVAL (index) >= 0)
+	return ((INTVAL (index) % GET_MODE_SIZE (mode)) == 0);
+    }
+  /* Allow ld.w rx, (gb, sym@got) when -fpic specially.  */
+  else if (code == UNSPEC)
+    return (flag_pic == 1
+	    && (XINT (index, 1) == UNSPEC_PIC_SYMBOL_PLT
+		|| XINT (index, 1) == UNSPEC_PIC_SYMBOL_GOT));
+  /* The follow index is for ldr instruction, the ldr cannot
+     load dword data, so the mode size should not be larger than
+     4.  */
+  else if (GET_MODE_SIZE (mode) <= 4)
+    {
+      if (is_csky_address_register_rtx_p (index, strict_p))
+	return 1;
+      else if (code == MULT || code == ASHIFT)
+	{
+	  rtx xiop0 = XEXP (index, 0);
+	  rtx xiop1 = XEXP (index, 1);
+
+	  /* FIXME can the xiop1 be the reg and xiop0 be the int when mult?  */
+	  return (is_csky_address_register_rtx_p (xiop0, strict_p)
+		  && CONST_INT_P (xiop1)
+		  && is_ldr_shift_p (INTVAL (xiop1), code));
+	}
+    }
+
+  return 0;
+}
+
+
+static int
+csky_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
+{
+  if (CSKY_TARGET_ARCH (CK801))
+    return ck801_legitimate_index_p (mode, index, strict_p);
+  else if (CSKY_TARGET_ARCH (CK802))
+    return ck802_legitimate_index_p (mode, index, strict_p);
+  else
+    return ck810_legitimate_index_p (mode, index, strict_p);
+}
+
+
+/* Implement TARGET_LEGITIMATE_ADDRESS_P.
+   Recognizes RTL expressions that are valid memory addresses for an
+   instruction.  The MODE argument is the machine mode for the MEM
+   expression that wants to use this address.
+
+   It only recognizes address in canonical form.  LEGITIMIZE_ADDRESS should
+   convert common non-canonical forms to canonical form so that they will
+   be recognized.  */
+
+static bool
+csky_legitimate_address_p (machine_mode mode, rtx addr, bool strict_p)
+{
+  enum rtx_code code = GET_CODE (addr);
+
+  /* Match the RTX form emitted for constant pool references.
+     After reload constants split into minipools will have addresses
+     from a LABEL_REF.  */
+  if (reload_completed
+      && ((code == LABEL_REF)
+	   || (code == CONST
+	       && GET_CODE (XEXP (addr, 0)) == PLUS
+	       && GET_CODE (XEXP (XEXP (addr, 0), 0)) == LABEL_REF
+	       && CONST_INT_P (XEXP (XEXP (addr, 0), 1)))))
+    return 1;
+
+  if (is_csky_address_register_rtx_p (addr, strict_p))
+    return 1;
+  /* It is a pc-relative load, may be generated for constpool.  */
+  else if (GET_CODE (addr) == LABEL_REF)
+    return 1;
+
+  if (code == PLUS)
+    {
+      rtx xop0 = XEXP (addr, 0);
+      rtx xop1 = XEXP (addr, 1);
+
+      return ((is_csky_address_register_rtx_p (xop0, strict_p)
+	       && csky_legitimate_index_p (mode, xop1, strict_p))
+	      || (is_csky_address_register_rtx_p (xop1, strict_p)
+		  && csky_legitimate_index_p (mode, xop0, strict_p)));
+    }
+
+  return 0;
+}
+
+
+/* Functions to save and restore machine-specific function data.  */
+
+static struct machine_function *
+csky_init_machine_status (void)
+{
+  struct machine_function *machine;
+
+  machine = ggc_cleared_alloc<machine_function> ();
+
+#if CSKY_FT_UNKNOWN != 0
+  machine->func_type = CSKY_FT_UNKNOWN;
+#endif
+  return machine;
+}
+
+
+/* Implement INIT_EXPANDERS.  */
+
+void
+csky_init_expanders (void)
+{
+  /* Arrange to initialize and mark the machine per-function status.  */
+  init_machine_status = csky_init_machine_status;
+}
+
+
+/* Implement TARGET_CANNOT_COPY_INSN_P.
+   We must not copy any rtx that uses a pc-relative address.  */
+
+static bool
+csky_cannot_copy_insn_p (rtx_insn *insn)
+{
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
+    {
+      const_rtx x = *iter;
+      if (GET_CODE (x) == UNSPEC
+	  && (XINT (x, 1) == UNSPEC_TLS_LABEL
+	      || XINT (x, 1) == UNSPEC_PIC_SYMBOL_GOTPC_GRS))
+	return true;
+    }
+  return false;
+}
+
+
+/* Extract the parts of an RTL expression that is a valid memory address
+   for an instruction.  Return FALSE if it is a invalid memory address.  */
+
+struct csky_address
+{
+  rtx base, index, symbol, label, disp;
+  HOST_WIDE_INT scale;
+};
+
+static bool
+decompose_csky_address (rtx addr, struct csky_address *out)
+{
+  rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
+  HOST_WIDE_INT scale = 1;
+  rtx scale_rtx = NULL_RTX;
+  int i;
+
+  out->base = out->index = out->symbol = out->label = out->disp = NULL_RTX;
+  out->scale = 0;
+
+  if (REG_P (addr))
+    {
+      out->base = addr;
+      return true;
+    }
+
+  if (GET_CODE (addr) == LABEL_REF)
+    {
+      out->label = addr;
+      return true;
+    }
+
+  if (GET_CODE (addr) == CONST)
+    addr = XEXP (addr, 0);
+
+  if (GET_CODE (addr) == PLUS)
+    {
+      rtx addends[2], op;
+
+      addends[0] = XEXP (addr, 0);
+      addends[1] = XEXP (addr, 1);
+
+      if (GET_CODE (addends[0]) == LABEL_REF && CONST_INT_P (addends[1]))
+	{
+	  out->label = addends[0];
+	  out->disp = addends[1];
+	  return true;
+	}
+
+      if (!REG_P (addends[0]))
+	std::swap (addends[0], addends[1]);
+
+      for (i = 0; i < 2; ++i)
+	{
+	  op = addends[i];
+	  switch (GET_CODE (op))
+	    {
+	    case REG:
+	      if (!base)
+		base = op;
+	      else if (!index)
+		index = op;
+	      else
+		return false;
+	      break;
+	    case CONST_INT:
+	    case UNSPEC:
+	      if (disp)
+		return false;
+	      disp = op;
+	      break;
+	    case MULT:
+	      if (index)
+		return false;
+	      index = XEXP (op, 0);
+	      scale_rtx = XEXP (op, 1);
+	      if (!CONST_INT_P (index) && !CONST_INT_P (scale_rtx))
+		return false;
+	      else if (CONST_INT_P (index))
+		std::swap (index, scale_rtx);
+	      scale = INTVAL (scale_rtx);
+	      break;
+	    case ASHIFT:
+	      if (index)
+		return false;
+	      index = XEXP (op, 0);
+	      scale_rtx = XEXP (op, 1);
+	      if (!CONST_INT_P (scale_rtx))
+		return false;
+	      scale = scale << INTVAL (scale_rtx);
+	      break;
+	    default:
+	      return false;
+	    }
+	}
+    }
+
+  if (!base)
+    return false;
+
+  out->base = base;
+  out->index = index;
+  out->disp = disp;
+  out->scale = scale;
+
+  return true;
+}
+
+/* Helper function for the csky_simple_mem_operand predicate.  Returns
+   true if OP is an address of the form reg + displacement.  */
+
+bool
+csky_simple_addr_operand_p (rtx op)
+{
+  struct csky_address addr;
+
+  if (!decompose_csky_address (op, &addr))
+    return false;
+
+  /* FIXME The PIC related code.
+     Check if load the symbol address from got table.  */
+  if (addr.disp && GET_CODE (addr.disp) == UNSPEC)
+    return false;
+  if (!addr.index && !addr.symbol)
+    return true;
+  return false;
+}
+
+
+/* Print the UNSPEC operand in X to the STREAM.  */
+
+static void
+csky_output_pic_addr_const (FILE *stream, rtx x, int code)
+{
+
+  if (GET_CODE (x) != UNSPEC)
+    return;
+
+  if (UNSPEC_TLS == XINT (x, 1))
+    {
+      /* FIXME It is not reached */
+      return;
+    }
+
+  csky_print_operand (stream, XVECEXP (x, 0, 0), code);
+
+  switch (XINT (x, 1))
+    {
+    case UNSPEC_PIC_SYMBOL_GOTOFF:
+      fputs ("@GOTOFF", stream);
+      break;
+    case UNSPEC_PIC_SYMBOL_PLT:
+      fputs ("@PLT", stream);
+      break;
+    case UNSPEC_PIC_SYMBOL_GOT:
+      fputs ("@GOT", stream);
+      break;
+    case UNSPEC_PIC_SYMBOL_GOTPC:
+      fputs ("@GOTPC", stream);
+      break;
+    case UNSPEC_PIC_SYMBOL_BSR:
+      break;
+    default:
+      break;
+    }
+}
+
+
+/* Output the constpool label according to the rtx expression X.  */
+
+static void
+csky_output_constpool_label (FILE *stream, rtx x)
+{
+  char buf[15];
+
+  gcc_assert (GET_CODE (x) == LABEL_REF);
+  x = XEXP (x, 0);
+
+  if (GET_CODE (x) == UNSPEC_VOLATILE && XINT (x, 1) == VUNSPEC_POOL_LABEL)
+    {
+      ASM_GENERATE_INTERNAL_LABEL (buf, CSKY_CONSTPOOL_LABEL_PREFIX,
+				   INTVAL (XVECEXP (x, 0, 0)));
+      assemble_name (stream, buf);
+    }
+}
+
+
+/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
+
+static void
+csky_print_operand_address (FILE *stream,
+			    machine_mode mode ATTRIBUTE_UNUSED,
+			    rtx x)
+{
+
+  struct csky_address addr;
+
+  decompose_csky_address (x, &addr);
+
+  if (addr.label && addr.disp && GET_CODE (addr.disp) == CONST_INT)
+    {
+      fprintf (stream, "[");
+      csky_output_constpool_label (stream, addr.label);
+      fprintf (stream, "+%d]", (int) INTVAL (addr.disp));
+    }
+  else if (addr.label)
+    {
+      fprintf (stream, "[");
+      csky_output_constpool_label (stream, addr.label);
+      fprintf (stream, "]");
+    }
+  else if (addr.symbol && addr.disp && GET_CODE (addr.disp) == CONST_INT)
+    {
+      fprintf (stream, "[");
+      output_addr_const (stream, addr.symbol);
+      fprintf (stream, "+%d]", (int) INTVAL (addr.disp));
+    }
+  else if (addr.symbol)
+    {
+      fprintf (stream, "[");
+      output_addr_const (stream, addr.symbol);
+      fprintf (stream, "]");
+    }
+  else if (addr.disp && GET_CODE (addr.disp) == CONST_INT)
+    fprintf (stream, "(%s, %d)",
+	     reg_names[REGNO (addr.base)], (int) INTVAL (addr.disp));
+  else if (addr.disp && GET_CODE (addr.disp) == UNSPEC)
+    {
+      if (REGNO (addr.base) != CSKY_GB_REGNUM)
+	fprintf (stream, "(%s, ", reg_names[REGNO (addr.base)]);
+      else
+	fprintf (stream, "[");
+      csky_output_pic_addr_const (stream, addr.disp, 0);
+      fprintf (stream, "%s", (REGNO (addr.base) != CSKY_GB_REGNUM)
+	       ? ")" : "]");
+    }
+  else if (addr.index)
+    fprintf (stream, "(%s, %s << %d)",
+	     reg_names[REGNO (addr.base)], reg_names[REGNO (addr.index)],
+	     exact_log2 ((int) (addr.scale)));
+  else
+    fprintf (stream, "(%s, 0)", reg_names[REGNO (addr.base)]);
+}
+
+
+/* Implement TARGET_PRINT_OPERAND.
+   Print operand X (an rtx) in assembler syntax to file STREAM
+   according to modifier CODE.
+
+   'N'	print the log2(X+1), mainly used for bmaski
+   'P'	print the log2(X)
+   'Q'	print the log2(~X)
+   'O'	print a decimal number
+   'M'	print a decimal number as its negative
+   'R'	print the next register or memory location along, i.e. the lsw in
+   a double word value
+   'H'	print the high 16 bits of a constant.  */
+
+static void
+csky_print_operand (FILE *stream, rtx x, int code)
+{
+  switch (code)
+    {
+    case 'N':
+      if ((INTVAL (x) & 0xffffffff) == 0xffffffff)
+	fprintf (stream, "0");
+      else
+	fprintf (stream, "%d",
+		 (int) exact_log2 ((INTVAL (x) & 0xffffffff) + 1) % 32);
+      break;
+    case 'P':
+      fprintf (stream, "%d",
+	       (int) exact_log2 (INTVAL (x) & 0xffffffff));
+      break;
+    case 'Q':
+      fprintf (stream, "%d",
+	       (int) exact_log2 (~INTVAL (x) & 0xffffffff));
+      break;
+    case 'O':
+      fprintf (stream, "%d", (int) INTVAL (x));
+      break;
+    case 'M':
+      fprintf (stream, "%d", (int) (-INTVAL (x)));
+      break;
+    case 'R':
+      /* Next location along in memory or register.  */
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x) + 1], stream);
+	  break;
+	case MEM:
+	  csky_print_operand_address
+	    (stream, GET_MODE (x), XEXP (adjust_address (x, SImode, 4), 0));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case 'H':
+      fprintf (stream, "%ld", (long)((INTVAL (x) & 0xFFFF0000) >> 16));
+      break;
+    default:
+      switch (GET_CODE (x))
+	{
+	case REG:
+	  fputs (reg_names[REGNO (x)], stream);
+	  break;
+	case MEM:
+	  output_address (GET_MODE (x), XEXP (x, 0));
+	  break;
+	case UNSPEC:
+	  csky_output_pic_addr_const (stream, x, code);
+	  break;
+	default:
+	  output_addr_const (stream, x);
+	  break;
+	}
+      break;
+    }
+}
+
+
+
+/* Implement TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS.  */
+
+static bool
+csky_allocate_stack_slots_for_args (void)
+{
+  /* Naked functions should not allocate stack slots for arguments.  */
+  return !CSKY_FUNCTION_IS_NAKED (get_csky_current_func_type ());
+}
+
+
+/* Can we generate a constant with a single instruction, without using
+   lrw?	 */
+
+static int
+const_ok_for_cskyv2 (HOST_WIDE_INT value)
+{
+  /* Try exact power of two. It can be generated by bgeni.  */
+  if (CSKY_CONST_OK_FOR_Ub (value))
+    return 1;
+
+  /* Try exact power of two - 1.  It can be generated by bmaski.  */
+  if (CSKY_CONST_OK_FOR_Uc (value) && value != -1)
+    return 1;
+
+  /* Try if it can be generated by movi.  */
+  if (CSKY_CONST_OK_FOR_I (value))
+    return 1;
+
+  /* The constant can be generated by movih.
+     Notice that movih is a 32-bit instruction.  */
+  if (CSKY_CONST_OK_FOR_MOVIH (value))
+    return 1;
+
+  return 0;
+}
+
+
+/* Tricks for synthesizing constants from values that can be directly
+   manipulated by machine instructions.  */
+
+enum csky_inline_const_type
+{
+  IC_UNINLINABLE = 0, /* Not inlineable */
+  IC_SINGLE,	      /* Single instruction */
+  IC_APPEND_NOT,      /* Single instruction followed by a not */
+  IC_APPEND_ADDI,     /* Single insn followed by an addi */
+  IC_APPEND_SUBI,     /* Single insn followed by a subi */
+  IC_BGENI_ADDI,      /* Single insn(bgeni) followed by an addi */
+  IC_BGENI_SUBI,      /* Single insn(bgeni) followed by a subi */
+  IC_APPEND_BSETI,    /* Single insn followed by bseti */
+  IC_APPEND_MOVI,     /* Single insn followed by movi */
+  IC_APPEND_BCLRI,    /* Single insn followed by bclri */
+  IC_APPEND_ROTLI,    /* Single insn followed by rotli */
+  IC_APPEND_LSLI,     /* Single insn followed by lsli */
+  IC_APPEND_IXH,      /* Single insn followed by ixh */
+  IC_APPEND_IXW	      /* Single insn followed by ixw */
+};
+
+
+/* Try tricks to load a constant inline and return the trick number if
+   success, or IC_UNINLINABLE.  */
+
+static enum csky_inline_const_type
+try_csky_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT *x,
+			  HOST_WIDE_INT *y)
+{
+  HOST_WIDE_INT i, value_invert;
+  unsigned HOST_WIDE_INT bit, shf, rot, lobits, hibits;
+
+  value &= 0xffffffff;
+  value_invert = ~value & 0xffffffff;
+
+  if (const_ok_for_cskyv2 (value))
+    {
+      *x = value;
+      return IC_SINGLE;
+    }
+
+  /* Since movih is 32 bits, do not use it here, better code may
+     be generated later.  */
+  if (const_ok_for_cskyv2 (value_invert)
+      && !CSKY_CONST_OK_FOR_MOVIH (value_invert))
+    {
+      *x = value_invert;
+      return IC_APPEND_NOT;
+    }
+
+  /* One immediate generate instruction, and one 16-bit subi or addi.  */
+  for (i = 1; i <= 32; i++)
+    {
+      if (const_ok_for_cskyv2 (value - i)
+	  && !CSKY_CONST_OK_FOR_MOVIH (value - i))
+	{
+	  *x = value - i;
+	  *y = i;
+	  return IC_APPEND_ADDI;
+	}
+
+      if (const_ok_for_cskyv2 (value + i)
+	  && !CSKY_CONST_OK_FOR_MOVIH (value - i))
+	{
+	  *x = value + i;
+	  *y = i;
+	  return IC_APPEND_SUBI;
+	}
+    }
+
+  /* Generate bgeni + addi.  */
+  if (CSKY_CONST_OK_FOR_Ub (value & 0xfffff000))
+    {
+      *x = (value & 0xfffff000);
+      *y = (value & 0xfff);
+      return IC_BGENI_ADDI;
+    }
+
+  /* Generate bgeni + subi.  */
+  lobits = value & 0xfff;
+  hibits = (unsigned HOST_WIDE_INT)(value & 0xfffff000) + (1 << 12);
+  if (exact_log2 (hibits) >= 1
+      && exact_log2 (hibits) <= 30
+      && lobits != 0)
+    {
+      *x = hibits;
+      *y = (0x1000 - lobits);
+      return IC_BGENI_SUBI;
+    }
+
+  /* One immediate generate instruction, and one bseti or bclri.  */
+  bit = 0x80000000ULL;
+  for (i = 0; i <= 31; i++)
+    {
+      if (const_ok_for_cskyv2 (value & ~bit)
+	  && !CSKY_CONST_OK_FOR_MOVIH (value & ~bit))
+	{
+	  *y = bit;
+	  *x = (value & ~bit);
+	  return IC_APPEND_BSETI;
+	}
+
+      if (const_ok_for_cskyv2 (value | bit)
+	  && !CSKY_CONST_OK_FOR_MOVIH (value | bit))
+	{
+	  *y = ~bit & 0xffffffff;
+	  *x = value | bit;
+	  return IC_APPEND_BCLRI;
+	}
+
+      bit >>= 1;
+    }
+
+  /* One immediate generate instruction, and one rotli or lsli.  */
+  shf = value;
+  rot = value;
+  for (i = 1; i < 31; i++)
+    {
+      int c;
+
+      /* Rotate left.  */
+      c = rot << 31;
+      rot >>= 1;
+      rot &= 0x7FFFFFFF;
+      rot |= c;
+
+      if (const_ok_for_cskyv2 (rot) && !CSKY_CONST_OK_FOR_MOVIH (rot))
+	{
+	  *y = i;
+	  *x = rot;
+	  return IC_APPEND_ROTLI;
+	}
+
+      /* Can't use logical shift when low order bit is one.  */
+      if (shf & 1)
+	shf = 0;
+      else
+	shf >>= 1;
+
+      if (shf != 0 && const_ok_for_cskyv2 (shf)
+	  && !CSKY_CONST_OK_FOR_MOVIH (shf))
+	{
+	  *y = i;
+	  *x = shf;
+	  return IC_APPEND_LSLI;
+	}
+    }
+
+  /* One immediate generate instruction, and one ixh.  */
+  if (CSKY_ISA_FEATURE (E2)
+      && (value % 3) == 0
+      && const_ok_for_cskyv2 (value / 3)
+      && !CSKY_CONST_OK_FOR_MOVIH (value / 3))
+    {
+      *x = value / 3;
+      return IC_APPEND_IXH;
+    }
+
+  /* One immediate generate instruction, and one ixw.  */
+  if (CSKY_ISA_FEATURE (E2)
+      && (value % 5) == 0
+      && const_ok_for_cskyv2 (value / 5)
+      && !CSKY_CONST_OK_FOR_MOVIH (value / 5))
+    {
+      *x = value / 5;
+      return IC_APPEND_IXW;
+    }
+
+  /* Generate movih + bseti.  */
+  if (CSKY_CONST_OK_FOR_Ub (value & 0xffff))
+    {
+      *x = value & 0xffff0000;
+      *y = value & 0xffff;
+      return IC_APPEND_BSETI;
+    }
+
+  /* Generate movih + not.  */
+  if (CSKY_CONST_OK_FOR_MOVIH (value_invert))
+    {
+      *x = value_invert;
+      return IC_APPEND_NOT;
+    }
+
+  /* One movih, and one 16bits addi or subi.  */
+  for (i = 1; i <= 32; i++)
+    {
+      if (CSKY_CONST_OK_FOR_MOVIH (value - i))
+	{
+	  *x = value - i;
+	  *y = i;
+	  return IC_APPEND_ADDI;
+	}
+
+      if (CSKY_CONST_OK_FOR_MOVIH (value + i))
+	{
+	  *x = value + i;
+	  *y = i;
+	  return IC_APPEND_SUBI;
+	}
+    }
+
+  /* One movih, and one bseti or bclri.  */
+  bit = 0x80000000ULL;
+  for (i = 0; i <= 31; i++)
+    {
+      if (CSKY_CONST_OK_FOR_MOVIH (value & ~bit))
+	{
+	  *y = bit;
+	  *x = value & ~bit;
+	  return IC_APPEND_BSETI;
+	}
+
+      if (CSKY_CONST_OK_FOR_MOVIH (value | bit))
+	{
+	  *y = ~bit & 0xffffffff;
+	  *x = value | bit;
+	  return IC_APPEND_BCLRI;
+	}
+
+       bit >>= 1;
+    }
+
+  /* One movih, and one rotli or lsli.  */
+  shf = value;
+  rot = value;
+  for (i = 1; i < 31; i++)
+    {
+      int c;
+
+      /* Rotate left.  */
+      c = rot << 31;
+      rot >>= 1;
+      rot &= 0x7FFFFFFF;
+      rot |= c;
+
+      if (CSKY_CONST_OK_FOR_MOVIH (rot))
+	{
+	  *y = i;
+	  *x = rot;
+	  return IC_APPEND_ROTLI;
+	}
+
+      /* Can't use logical shift when low order bit is one.  */
+      if (shf & 1)
+	shf = 0;
+      else
+	shf >>= 1;
+
+      if (shf != 0 && CSKY_CONST_OK_FOR_MOVIH (shf))
+	{
+	  *y = i;
+	  *x = shf;
+	  return IC_APPEND_LSLI;
+	}
+    }
+
+  return IC_UNINLINABLE;
+}
+
+
+/* Actually output a constant using a trick.
+   FIXME: I think this would be better handled by a splitter than at the
+   asm output level.  */
+
+static const char *
+csky_output_inline_const (machine_mode mode, rtx operands[])
+{
+  HOST_WIDE_INT x = 0, y = 0;
+  enum csky_inline_const_type trick_type;
+  rtx out_operands[3];
+  char buf[256];
+  char load_op[128];
+  const char *dst_fmt;
+  HOST_WIDE_INT value = INTVAL (operands[1]);
+  int ivalue = (int) value;
+  unsigned int uvalue = (unsigned int) value;
+
+  trick_type = try_csky_constant_tricks (value, &x, &y);
+  /* lrw's are handled separately: Large inlinable constants never get
+     turned into lrw's.  Our caller uses try_csky_constant_tricks to back
+     off to an lrw rather than calling this routine.  */
+  gcc_assert (trick_type != IC_UNINLINABLE);
+
+  /* Operands: 0 = dst, 1 = load immedate., 2 = adjust immedate.  */
+  out_operands[0] = operands[0];
+  out_operands[1] = GEN_INT (x);
+  if (trick_type != IC_SINGLE && trick_type != IC_APPEND_NOT)
+    out_operands[2] = GEN_INT (y);
+
+  /* Select dst format based on mode.  */
+  if (mode == DImode && TARGET_BIG_ENDIAN)
+    dst_fmt = "%R0";
+  else
+    dst_fmt = "%0";
+
+  /* Try movi16: 0~31,movi32: 0~65535.  */
+  if (CSKY_CONST_OK_FOR_I (x))
+    sprintf (load_op, "movi\t%s, %%1", dst_fmt);
+  /* Try exact power of two - 1.  */
+  else if (CSKY_CONST_OK_FOR_Uc (x))
+    sprintf (load_op, "bmaski\t%s, %%N1", dst_fmt);
+  /* Try movih.  */
+  else if (CSKY_CONST_OK_FOR_MOVIH (x))
+    sprintf (load_op, "movih\t%s, %%H1", dst_fmt);
+  else
+    {
+      sprintf (load_op, "BADMOVI-inline_const %s, %%1", dst_fmt);
+      gcc_unreachable ();
+    }
+
+  switch (trick_type)
+    {
+    case IC_SINGLE:
+      strcpy (buf, load_op);
+      break;
+    /* Add instruction 'not'.  */
+    case IC_APPEND_NOT:
+      sprintf (buf, "%s\n\tnot\t%s, %s\t// %d 0x%x", load_op, dst_fmt,
+	       dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'addi'.  */
+    case IC_APPEND_ADDI:
+      sprintf (buf, "%s\n\taddi\t%s, %s, %%2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'subi'.  */
+    case IC_APPEND_SUBI:
+      sprintf (buf, "%s\n\tsubi\t%s, %s, %%2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'addi', the last instruction is bgeni.  */
+    case IC_BGENI_ADDI:
+      sprintf (buf, "%s\n\taddi\t%s, %s, %%2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'subi', the last instruction is bgeni.  */
+    case IC_BGENI_SUBI:
+      sprintf (buf, "%s\n\tsubi\t%s, %s, %%2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'bseti'.  */
+    case IC_APPEND_BSETI:
+      sprintf (buf, "%s\n\tbseti\t%s, %s, %%P2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'movi'.  */
+    case IC_APPEND_MOVI:
+      sprintf (buf, "%s\n\tmovi\t%s, %%2\t// %d 0x%x", load_op, dst_fmt,
+	       ivalue, uvalue);
+      break;
+    /* Add instruction 'bclri'.  */
+    case IC_APPEND_BCLRI:
+      sprintf (buf, "%s\n\tbclri\t%s, %s, %%Q2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'rotli'.  */
+    case IC_APPEND_ROTLI:
+      sprintf (buf, "%s\n\trotli\t%s, %s, %%2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'lsli'.  */
+    case IC_APPEND_LSLI:
+      sprintf (buf, "%s\n\tlsli\t%s, %s, %%2\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'ixh'.  */
+    case IC_APPEND_IXH:
+      sprintf (buf, "%s\n\tixh\t%s, %s, %s\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    /* Add instruction 'ixw'.  */
+    case IC_APPEND_IXW:
+      sprintf (buf, "%s\n\tixw\t%s, %s, %s\t// %d 0x%x", load_op,
+	       dst_fmt, dst_fmt, dst_fmt, ivalue, uvalue);
+      break;
+    default:
+      return "";
+    }
+
+  output_asm_insn (buf, out_operands);
+
+  return "";
+}
+
+/* This is a helper function for the Uo constraint for movsi patterns.  */
+
+bool
+csky_inlinable_constant (HOST_WIDE_INT value)
+{
+  HOST_WIDE_INT x, y;
+  return (!(CSKY_TARGET_ARCH (CK802) || CSKY_TARGET_ARCH (CK801))
+	  && try_csky_constant_tricks (value, &x, &y));
+}
+
+
+/* Return true if the constant VAL can be expressed by an 8-bit constant
+   with a shift value, filling in *BASE and *SHIFT.  */
+
+bool
+csky_shifted_imm8_constant (unsigned HOST_WIDE_INT val,
+			    unsigned int *base, unsigned int *shift)
+{
+  unsigned HOST_WIDE_INT mask = 0xff;
+  int i;
+  val = val & (unsigned HOST_WIDE_INT) 0xffffffffu;
+  if (val == 0)
+    return 0;
+
+  for (i = 0; i < 25; i++)
+    if ((val & (mask << i)) == val)
+      {
+	if (base)
+	  *base = (unsigned int) (val >> i);
+	if (shift)
+	  *shift = (unsigned int) i;
+	return true;
+      }
+
+  return false;
+}
+
+
+/* Output a move of a word or less value.  */
+
+const char *
+csky_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
+		  machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  struct csky_address op0, op1;
+
+  if (REG_P (dst))
+    {
+      /* The situation mov reg to reg.  */
+      if (REG_P (src))
+	{
+	  int dstreg = REGNO (dst);
+	  int srcreg = REGNO (src);
+
+	  /* hilo registers exchange their places,
+	     and their order of Dimode as same as other
+	     general registers in LITTLE_ENDIAN mode.  */
+	  if (TARGET_BIG_ENDIAN)
+	    {
+	      if (dstreg == CSKY_HI_REGNUM)
+		return "mthi\t%1";
+	      else if (dstreg == CSKY_LO_REGNUM)
+		return "mtlo\t%1";
+	      else if (srcreg == CSKY_HI_REGNUM)
+		return "mfhi\t%0";
+	      else if (srcreg == CSKY_LO_REGNUM)
+		return "mflo\t%0";
+	    }
+	  else
+	    {
+	      if (dstreg == CSKY_HI_REGNUM)
+		return "mtlo\t%1";
+	      else if (dstreg == CSKY_LO_REGNUM)
+		return "mthi\t%1";
+	      else if (srcreg == CSKY_HI_REGNUM)
+		return "mflo\t%0";
+	      else if (srcreg == CSKY_LO_REGNUM)
+		return "mfhi\t%0";
+	    }
+
+	    if (CSKY_VREG_P (dstreg) && CSKY_VREG_P (srcreg))
+	      return "fmovs\t%0, %1";
+	    if (CSKY_VREG_P (dstreg))
+	      return "fmtvrl\t%0, %1";
+	    if (CSKY_VREG_P (srcreg))
+	      return "fmfvrl\t%0, %1";
+
+	    if (REGNO (src) == CSKY_CC_REGNUM)
+	      return "mvc\t%0";
+	    else
+	      return "mov\t%0, %1";
+	}
+      /* The situation mov memory to reg.  */
+      else if (GET_CODE (src) == MEM)
+	{
+	  decompose_csky_address (XEXP (src, 0), &op1);
+
+	  if (op1.index)
+	    switch (GET_MODE (src))
+	      {
+	      case E_HImode:
+		return "ldr.h\t%0, %1";
+	      case E_QImode:
+		return "ldr.b\t%0, %1";
+	      case E_SImode:
+	      case E_SFmode:
+		if (CSKY_VREG_P (REGNO (dst)))
+		  return "fldrs\t%0, %1";
+		else
+		  return "ldr.w\t%0, %1";
+	      default:
+		gcc_unreachable ();
+	      }
+	  /* Generate lrw rx, [LABEL].  This happens when the compiler
+	     generates constant pool references and uses lrw to get the
+	     constant into memory.  */
+	  else if (op1.label)
+	    return "lrw\t%0, %1";
+	  /* Generate lrs.w rx, [symbol@GOT/PLT].  */
+	  else if (flag_pic == 1 && op1.disp && GET_CODE (op1.disp) == UNSPEC)
+	    return "lrs.w\t%0, %1";
+	  else
+	    switch (GET_MODE (src))
+	      {
+	      case E_HImode:
+		return "ld.h\t%0, %1";
+	      case E_QImode:
+		return "ld.b\t%0, %1";
+	      case E_SFmode:
+	      case E_SImode:
+		if (CSKY_VREG_P (REGNO (dst)))
+		  return "flds\t%0, %1";
+		else
+		  return "ld.w\t%0, %1";
+	      default:
+		gcc_unreachable ();
+	      }
+	}
+      /* The situation mov integer to reg.  */
+      else if (GET_CODE (src) == CONST_INT ||
+	       (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode))
+	{
+	  HOST_WIDE_INT x, y;
+	  const REAL_VALUE_TYPE *d;
+	  long l;
+
+	  if (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode)
+	    {
+	      d = CONST_DOUBLE_REAL_VALUE (src);
+	      REAL_VALUE_TO_TARGET_SINGLE (*d, l);
+	      operands[1] = GEN_INT (l);
+	      src = operands[1];
+	    }
+
+	  if (try_csky_constant_tricks (INTVAL (src), &x, &y))
+	    return csky_output_inline_const (SImode, operands);
+	  /* Return '#' to split it.  */
+	  else if (CSKY_CONST_OK_FOR_T (INTVAL (src)))
+	    return "#";
+	  else
+	    return "lrw\t%0, %x1\t";
+	}
+      else if (TARGET_ANCHOR && GET_CODE (src) == SYMBOL_REF)
+	{
+	  if (SYMBOL_REF_FUNCTION_P (src))
+	    return "lrw\t%0, %1@BTEXT";
+	  else
+	    return "lrw\t%0, %1@BDATA";
+	}
+      else if (GET_CODE (src) == UNSPEC
+	       && XINT (src, 1) == UNSPEC_PIC_SYMBOL_GRS)
+	return "grs\t%0, %1";
+      else
+	return "lrw\t%0, %1";
+    }
+  else if (GET_CODE (dst) == MEM)
+    {
+      decompose_csky_address (XEXP (dst, 0), &op0);
+
+      if (op0.index)
+	switch (GET_MODE (src))
+	  {
+	  case E_HImode:
+	    return "str.h\t%1, %0";
+	  case E_QImode:
+	    return "str.b\t%1, %0";
+	  case E_SFmode:
+	  case E_SImode:
+	    if (CSKY_VREG_P (REGNO (src)))
+	      return "fstrs\t%1, %0";
+	    else
+	      return "str.w\t%1, %0";
+	  default:
+	    gcc_unreachable ();
+	  }
+      else
+	switch (GET_MODE (dst))
+	  {
+	  case E_HImode:
+	    return "st.h\t%1, %0";
+	  case E_QImode:
+	    return "st.b\t%1, %0";
+	  case E_SImode:
+	  case E_SFmode:
+	    if (CSKY_VREG_P (REGNO (src)))
+	      return "fsts\t%1, %0";
+	    else
+	      return "st.w\t%1, %0";
+	  default:
+	    gcc_unreachable ();
+	  }
+    }
+
+  gcc_unreachable ();
+}
+
+
+/* Output a move of a word or less value.  Specific for ck801.  */
+
+const char *
+csky_output_ck801_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
+			machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+  struct csky_address op1;
+
+  if (REG_P (dst))
+    {
+      if (REG_P (src))
+	return "mov\t%0, %1";
+      else if (GET_CODE (src) == MEM)
+	{
+	  decompose_csky_address (XEXP (src, 0), &op1);
+
+	  /* Generate lrw rx, [LABEL].  This happens when the compiler
+	     generates constant pool references and uses lrw to get the
+	     constant in memory.  */
+	  if (op1.label)
+	    return "lrw\t%0, %1";
+	  else
+	    switch (GET_MODE (src))
+	      {
+	      case E_HImode:
+		return "ld.h\t%0, %1";
+	      case E_QImode:
+		return "ld.b\t%0, %1";
+	      case E_SFmode:
+	      case E_SImode:
+		return "ld.w\t%0, %1";
+	      default:
+		gcc_unreachable ();
+	      }
+	}
+      else if (GET_CODE (src) == CONST_INT)
+	{
+	  if (REGNO (dst) > 7)
+	    return "lrw\t%0, %x1\t";
+	  else if (CSKY_CONST_OK_FOR_N (INTVAL (src) + 1))
+	    return "movi\t%0, %1";
+	  /* Return '#' to split it.  */
+	  else if (CSKY_CONST_OK_FOR_T (INTVAL (src)))
+	    return "#";
+	  else if (csky_shifted_imm8_constant (INTVAL (src), NULL, NULL))
+	    return "#";
+	  else
+	    return "lrw\t%0, %x1\t";
+	}
+      else if (GET_CODE (src) == CONST_DOUBLE && GET_MODE (src) == SFmode)
+	{
+	  const REAL_VALUE_TYPE *d;
+	  long l;
+
+	  d = CONST_DOUBLE_REAL_VALUE (src);
+	  REAL_VALUE_TO_TARGET_SINGLE (*d, l);
+	  operands[1] = GEN_INT (l);
+	  src = operands[1];
+
+	  if (CSKY_CONST_OK_FOR_N (INTVAL (src) + 1))
+	    return "movi\t%0, %1";
+	  else
+	    return "lrw\t%0, %x1\t";
+	}
+      else if (TARGET_ANCHOR && GET_CODE (src) == SYMBOL_REF)
+	{
+	  if (SYMBOL_REF_FUNCTION_P (src))
+	    return "lrw\t%0, %1@BTEXT";
+	  else
+	    return "lrw\t%0, %1@BDATA";
+	}
+      else
+	return "lrw\t%0, %1";
+    }
+  else if (GET_CODE (dst) == MEM)
+    switch (GET_MODE (dst))
+      {
+      case E_HImode:
+	return "st.h\t%1, %0";
+      case E_QImode:
+	return "st.b\t%1, %0";
+      case E_SImode:
+      case E_SFmode:
+	return "st.w\t%1, %0";
+      default:
+	gcc_unreachable ();
+      }
+
+  gcc_unreachable ();
+}
+
+
+/* Return a sequence of instructions to perform DI or DF move.
+   Since the CSKY cannot move a DI or DF in one instruction, we have
+   to take care when we see overlapping source and dest registers.  */
+
+const char *
+csky_output_movedouble (rtx operands[],
+			machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (REG_P (dst))
+    {
+      if (REG_P (src))
+	{
+	  int dstreg = REGNO (dst);
+	  int srcreg = REGNO (src);
+
+	  if (CSKY_HILO_REG_P (srcreg))
+	    {
+	      if (TARGET_BIG_ENDIAN)
+		return "mfhi\t%0\n\tmflo\t%R0";
+	      else
+		return "mfhi\t%R0\n\tmflo\t%0";
+	    }
+	  else if (CSKY_HILO_REG_P (dstreg))
+	    {
+	      if (TARGET_BIG_ENDIAN)
+		return "mthi\t%1\n\tmtlo\t%R1";
+	      else
+		return "mthi\t%R1\n\tmtlo\t%1";
+	    }
+	  else if (CSKY_VREG_P (srcreg) && CSKY_VREG_P (dstreg))
+	    return "fmovd\t%0, %1";
+	  else if (CSKY_VREG_P (srcreg))
+	    {
+	      /* Since the vector registers in fpuv2_soft processors
+		 like ck803f are 32 bits wide, just one insn is needed
+		 to complete the move operation.  */
+	      if (TARGET_SOFT_FPU)
+		  return "fmfvrl\t%0, %1";
+	      else if (TARGET_BIG_ENDIAN)
+		return "fmfvrh\t%0, %1\n\tfmfvrl\t%R0, %1";
+	      else
+		return "fmfvrh\t%R0, %1\n\tfmfvrl\t%0, %1";
+	    }
+	  else if (CSKY_VREG_P (dstreg))
+	    {
+	      if (TARGET_SOFT_FPU)
+		return "fmtvrl\t%0, %1";
+	      else if (TARGET_BIG_ENDIAN)
+		return "fmtvrh\t%0, %1\n\tfmtvrl\t%0, %R1";
+	      else
+		return "fmtvrh\t%0, %R1\n\tfmtvrl\t%0, %1";
+	    }
+
+	  /* Ensure the second source not overwritten.  */
+	  if (srcreg + 1 == dstreg)
+	    return "mov\t%R0, %R1\n\tmov\t%0, %1";
+	  else
+	    return "mov\t%0, %1\n\tmov\t%R0, %R1";
+	}
+      else if (GET_CODE (src) == MEM)
+	{
+	  rtx memexp = XEXP (src, 0);
+	  int dstreg = REGNO (dst);
+	  int basereg = -1;
+	  struct csky_address op0;
+
+	  decompose_csky_address (XEXP (src, 0), &op0);
+
+	  if (GET_CODE (memexp) == LABEL_REF
+	      || (GET_CODE (memexp) == CONST
+		  && GET_CODE (XEXP (memexp, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (memexp, 0), 0)) == LABEL_REF))
+	    return "lrw\t%0, [%1]\n\tlrw\t%R0, [%R1]";
+	  else if (GET_CODE (memexp) == REG)
+	    basereg = REGNO (memexp);
+	  else if (GET_CODE (memexp) == PLUS)
+	    {
+	      if (GET_CODE (XEXP (memexp, 0)) == REG)
+		basereg = REGNO (XEXP (memexp, 0));
+	      else if (GET_CODE (XEXP (memexp, 1)) == REG)
+		basereg = REGNO (XEXP (memexp, 1));
+	      else
+		gcc_unreachable ();
+	    }
+	  else
+	    gcc_unreachable ();
+
+
+	  /* When FPUV2.  */
+	  if (CSKY_VREG_P (dstreg))
+	    {
+	      if (op0.index)
+		return "fldrd\t%0, %1";
+	      else
+		return "fldd\t%0, %1";
+	    }
+	  /* FIXME length attribute is wrong here.  */
+	  if (dstreg == basereg)
+	    /* Just load them in reverse order.  */
+	    return "ld.w\t%R0, %R1\n\tld.w\t%0, %1";
+	  else
+	    return "ld.w\t%0, %1\n\tld.w\t%R0, %R1";
+	}
+      else if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE)
+	{
+	  split_double (src, operands + 2, operands + 3);
+
+	  if (CSKY_CONST_OK_FOR_I (INTVAL (operands[2])))
+	    output_asm_insn ("movi\t%0, %2", operands);
+	  else if (CSKY_CONST_OK_FOR_Uc (INTVAL (operands[2])))
+	    output_asm_insn ("bmaski\t%0, %N2", operands);
+	  else if (CSKY_CONST_OK_FOR_Ub (INTVAL (operands[2])))
+	    output_asm_insn ("bgeni\t%0, %P2", operands);
+	  else
+	    output_asm_insn ("lrw\t%0, %2", operands);
+
+	  if (CSKY_CONST_OK_FOR_I (INTVAL (operands[3])))
+	    output_asm_insn ("movi\t%R0, %3", operands);
+	  else if (CSKY_CONST_OK_FOR_Uc (INTVAL (operands[3])))
+	    output_asm_insn ("bmaski\t%R0, %N3", operands);
+
+	  else if (CSKY_CONST_OK_FOR_Ub (INTVAL (operands[3])))
+	    output_asm_insn ("bgeni\t%R0, %P3", operands);
+	  else
+	    output_asm_insn ("lrw\t%R0, %3", operands);
+
+	  return "";
+	}
+      else
+	gcc_unreachable ();
+    }
+  else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG)
+    {
+      rtx memexp = XEXP (dst, 0);
+      int srcreg = REGNO (src);
+      int basereg = -1;
+      struct csky_address op0;
+
+      decompose_csky_address (XEXP (dst, 0), &op0);
+
+      if (GET_CODE (memexp) == REG)
+	basereg = REGNO (memexp);
+      else if (GET_CODE (memexp) == PLUS)
+	{
+	  if (GET_CODE (XEXP (memexp, 0)) == REG)
+	    basereg = REGNO (XEXP (memexp, 0));
+	  else if (GET_CODE (XEXP (memexp, 1)) == REG)
+	    basereg = REGNO (XEXP (memexp, 1));
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	gcc_unreachable ();
+
+      /* When FPUV2.  */
+      if (CSKY_VREG_P (srcreg))
+	{
+	  if (op0.index)
+	    return "fstrd\t%1, %0";
+	  else
+	    return "fstd\t%1, %0";
+	}
+      /* FIXME length attribute is wrong here.  */
+      if (srcreg == basereg)
+	/* Just load them in reverse order.  */
+	return "st.w\t%R1, %R0\n\tst.w\t%1, %0";
+      else
+	return "st.w\t%1, %0\n\tst.w\t%R1, %R0";
+    }
+  else
+    gcc_unreachable ();
+}
+
+
+const char *
+csky_output_ck801_movedouble (rtx operands[],
+			      machine_mode mode ATTRIBUTE_UNUSED)
+{
+  rtx dst = operands[0];
+  rtx src = operands[1];
+
+  if (REG_P (dst))
+    {
+      if (REG_P (src))
+	{
+	  int dstreg = REGNO (dst);
+	  int srcreg = REGNO (src);
+
+	  /* Ensure the second source not overwritten.  */
+	  if (srcreg + 1 == dstreg)
+	    return "mov\t%R0, %R1\n\tmov\t%0, %1";
+	  else
+	    return "mov\t%0, %1\n\tmov\t%R0, %R1";
+	}
+      else if (GET_CODE (src) == MEM)
+	{
+	  rtx memexp = XEXP (src, 0);
+	  int dstreg = REGNO (dst);
+	  int basereg = -1;
+	  struct csky_address op0;
+
+	  decompose_csky_address (XEXP (src, 0), &op0);
+
+	  if (GET_CODE (memexp) == LABEL_REF
+	      || (GET_CODE (memexp) == CONST
+		  && GET_CODE (XEXP (memexp, 0)) == PLUS
+		  && GET_CODE (XEXP (XEXP (memexp, 0), 0)) == LABEL_REF))
+	    return "lrw\t%0, [%1]\n\tlrw\t%R0, [%R1]";
+	  else if (GET_CODE (memexp) == REG)
+	    basereg = REGNO (memexp);
+	  else if (GET_CODE (memexp) == PLUS)
+	    {
+	      if (GET_CODE (XEXP (memexp, 0)) == REG)
+		basereg = REGNO (XEXP (memexp, 0));
+	      else if (GET_CODE (XEXP (memexp, 1)) == REG)
+		basereg = REGNO (XEXP (memexp, 1));
+	      else
+		gcc_unreachable ();
+	    }
+	  else
+	    gcc_unreachable ();
+
+	  /* FIXME length attribute is wrong here.  */
+	  if (dstreg == basereg)
+	    /* Just load them in reverse order.  */
+	    return "ld.w\t%R0, %R1\n\tld.w\t%0, %1";
+	  else
+	    return "ld.w\t%0, %1\n\tld.w\t%R0, %R1";
+	}
+      else if (GET_CODE (src) == CONST_INT || GET_CODE (src) == CONST_DOUBLE)
+	{
+	  split_double (src, operands + 2, operands + 3);
+
+	  if (REGNO (dst) <= 7
+	      && CSKY_CONST_OK_FOR_N (INTVAL (operands[2]) + 1))
+	    output_asm_insn ("movi\t%0, %2", operands);
+	  else
+	    output_asm_insn ("lrw\t%0, %2", operands);
+
+
+	  if (REGNO (dst) <= 6
+	      && CSKY_CONST_OK_FOR_N (INTVAL (operands[3]) + 1))
+	    output_asm_insn ("movi\t%R0, %3", operands);
+	  else
+	    output_asm_insn ("lrw\t%R0, %3", operands);
+
+	  return "";
+
+
+	}
+      else
+	gcc_unreachable ();
+    }
+  else if (GET_CODE (dst) == MEM && GET_CODE (src) == REG)
+    {
+      rtx memexp = XEXP (dst, 0);
+      int srcreg = REGNO (src);
+      int basereg = -1;
+      struct csky_address op0;
+
+      decompose_csky_address (XEXP (dst, 0), &op0);
+
+      if (GET_CODE (memexp) == REG)
+	basereg = REGNO (memexp);
+      else if (GET_CODE (memexp) == PLUS)
+	{
+	  if (GET_CODE (XEXP (memexp, 0)) == REG)
+	    basereg = REGNO (XEXP (memexp, 0));
+	  else if (GET_CODE (XEXP (memexp, 1)) == REG)
+	    basereg = REGNO (XEXP (memexp, 1));
+	  else
+	    gcc_unreachable ();
+	}
+      else
+	gcc_unreachable ();
+
+      /* FIXME length attribute is wrong here.  */
+      if (srcreg == basereg)
+	/* Just load them in reverse order.  */
+	return "st.w\t%R1, %R0\n\tst.w\t%1, %0";
+      else
+	return "st.w\t%1, %0\n\tst.w\t%R1, %R0";
+    }
+  else
+    gcc_unreachable ();
+}
+
+/* Split operands for an AND expression when OPERANDS[2] is a constant.
+   Note operands[0] is marked earlyclobber in this case and can be
+   overwritten.  Return true if "DONE", false otherwise.  */
+bool
+csky_split_and (rtx *operands)
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  rtx not_value = GEN_INT (~mask);
+  int i;
+
+  /* All zeros or all ones can be handled by a move instruction.  */
+  if (mask == 0)
+    {
+      emit_move_insn (operands[0], const0_rtx);
+      return true;
+    }
+  if (mask == -1)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      return true;
+    }
+
+  /* Check for constants that can be handled directly by the 32-bit andi
+     instruction.  */
+  if (CSKY_ISA_FEATURE (E2) && csky_arith_O_operand (operands[2], SImode))
+    return false;
+
+  /* Try to transform to andni instruction.  */
+  if (CSKY_ISA_FEATURE (E2) && csky_arith_O_operand (not_value, SImode))
+    {
+      emit_insn (gen_cskyv2_andnsi3 (operands[0], not_value, operands[1]));
+      return true;
+    }
+
+  /* If there are only one or two 0 bits in the constant, we can
+     replace the operation with bclri instructions on those bits.
+     Note CK801 has only the 16-bit bclri that operates on a single
+     register, so we must count a move if we are post-reload.  */
+  if (popcount_hwi (~mask & 0xffffffff)
+      <= (reload_completed && !CSKY_ISA_FEATURE (E2) ? 1 : 2))
+    {
+      rtx input = operands[1];
+
+      if (!CSKY_ISA_FEATURE (E2))
+	{
+	  emit_move_insn (operands[0], input);
+	  input = operands[0];
+	}
+
+      for (i = 0; i < 32; i++)
+	if ((mask & (1 << i)) == 0x0)
+	  {
+	    emit_insn (gen_bclri (operands[0], input, GEN_INT (i)));
+	    input = operands[0];
+	  }
+      return true;
+    }
+
+  /* If the constant mask is outside the [0, 4095] range for
+     constraint O, or if constraint O is not allowed (ck801),
+     maybe the constant is a contiguous bit range that we can
+     handle by bit extract (low bits) or shifts (high bits).  */
+  for (i = (CSKY_ISA_FEATURE (E2) ? 13 : 1); i < 32; i++)
+    {
+      if ((((HOST_WIDE_INT) 1) << i) - 1 == mask)
+	{
+	  if (CSKY_ISA_FEATURE (2E3))
+	    emit_insn (gen_cskyv2_extzv (operands[0], operands[1],
+					 GEN_INT (i), const0_rtx));
+	  else
+	    {
+	      rtx shift = GEN_INT (32 - i);
+	      rtx reg = (reload_completed
+			 ? operands[0] : gen_reg_rtx (SImode));
+
+	      emit_insn (gen_ashlsi3 (reg, operands[1], shift));
+	      emit_insn (gen_lshrsi3 (operands[0], reg, shift));
+	    }
+	  return true;
+	}
+      else if ((((HOST_WIDE_INT) 1) << i) - 1 == ~mask)
+	{
+	  rtx shift = GEN_INT (i);
+	  rtx reg = (reload_completed
+		     ? operands[0] : gen_reg_rtx (SImode));
+
+	  emit_insn (gen_lshrsi3 (reg, operands[1], shift));
+	  emit_insn (gen_ashlsi3 (operands[0], reg, shift));
+	  return true;
+	}
+    }
+
+  /* If the constant is a negative number, it seems better to use
+     andn and copy the NOT_VALUE to a register instead of the
+     original value, since the NOT_VALUE is always smaller and thus
+     more likely to be representable as a small constant.
+     This transformation can only be done before reload because
+     it requires a temporary.  Hopefully register allocation can get
+     rid of the extra move required for CK801.  */
+  if (!reload_completed && INTVAL (operands[2]) < 0)
+    {
+      rtx reg = copy_to_mode_reg (SImode, not_value);
+
+      if (CSKY_ISA_FEATURE (E2))
+	emit_insn (gen_cskyv2_andnsi3 (operands[0], reg, operands[1]));
+      else
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  emit_insn (gen_ck801_andnsi3 (operands[0], reg, operands[0]));
+	}
+      return true;
+    }
+
+  /* If the above ways are all not working, move the constant
+     to a register.  We can clobber operands[0] as it is
+     marked earlyclobber in the insn constraints, but then we have to
+     swap operands 1 and 2 to match the constraints on the 2-operand
+     16-bit and instruction.  */
+  if (reload_completed)
+    {
+      emit_move_insn (operands[0], operands[2]);
+      operands[2] = operands[1];
+      operands[1] = operands[0];
+    }
+  else
+    operands[2] = copy_to_mode_reg (SImode, operands[2]);
+  return false;
+}
+
+/* Split operands for an IOR expression when OPERANDS[2] is a constant.
+   Note operands[0] is marked earlyclobber in this case and can be
+   overwritten.  Return true if "DONE", false otherwise.  */
+bool
+csky_split_ior (rtx *operands)
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+  int i;
+
+  /* All zeros or all ones can be handled by a move instruction.  */
+  if (mask == 0)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      return true;
+    }
+  if (mask == -1)
+    {
+      emit_move_insn (operands[0], gen_int_mode (-1, SImode));
+      return true;
+    }
+
+  /* Check for constants that can be handled directly by the 32-bit ori
+     instruction.  */
+  if (CSKY_ISA_FEATURE (E2) && csky_literal_I_operand (operands[2], SImode))
+    return false;
+
+  /* If there are only one or two 1 bits in the value, we can replace
+     the operation with bseti instructions to set those bits.
+     Note CK801 has only the 16-bit bclri that operates on a single
+     register, so we must count a move if we are post-reload.  */
+  if (popcount_hwi (mask & 0xffffffff)
+      <= (reload_completed && !CSKY_ISA_FEATURE (E2) ? 1 : 2))
+    {
+      rtx input = operands[1];
+
+      if (!CSKY_ISA_FEATURE (E2))
+	{
+	  emit_move_insn (operands[0], input);
+	  input = operands[0];
+	}
+
+      for (i = 0; i < 32; i++)
+	if (mask & (1 << i))
+	  {
+	    emit_insn (gen_bseti (operands[0], input, GEN_INT (i)));
+	    input = operands[0];
+	  }
+      return true;
+    }
+
+  /* If the above ways are all not working, move the constant
+     to a register.  We can clobber operands[0] as it is
+     marked earlyclobber in the insn constraints, but then we have to
+     swap operands 1 and 2 to match the constraints on the 2-operand
+     16-bit ior instruction.  */
+  if (reload_completed)
+    {
+      emit_move_insn (operands[0], operands[2]);
+      operands[2] = operands[1];
+      operands[1] = operands[0];
+    }
+  else
+    operands[2] = copy_to_mode_reg (SImode, operands[2]);
+  return false;
+}
+
+
+/* Split operands for an XOR expression when OPERANDS[2] is a constant.
+   Note operands[0] is marked earlyclobber in this case and can be
+   overwritten.  Return true if "DONE", false otherwise.  */
+bool
+csky_split_xor (rtx *operands)
+{
+  HOST_WIDE_INT mask = INTVAL (operands[2]);
+
+  /* All zeros can be turned into move instruction.  */
+  if (mask == 0)
+    {
+      emit_move_insn (operands[0], operands[1]);
+      return true;
+    }
+
+  /* All ones can be turned into a bitwise not.  */
+  if (mask == -1)
+    {
+      if (CSKY_ISA_FEATURE (E2))
+	emit_insn (gen_cskyv2_one_cmplsi2 (operands[0], operands[1]));
+      else
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  emit_insn (gen_ck801_one_cmplsi2 (operands[0], operands[0]));
+	}
+      return true;
+    }
+
+  /* Check for constants that can be handled directly by the 32-bit xori
+     instruction.  */
+  if (CSKY_ISA_FEATURE (E2) && csky_arith_O_operand (operands[2], SImode))
+    return false;
+
+  /* If the above ways are all not working, move the constant
+     to a register.  We can clobber operands[0] as it is
+     marked earlyclobber in the insn constraints, but then we have to
+     swap operands 1 and 2 to match the constraints on the 2-operand
+     16-bit ior instruction.  */
+  if (reload_completed)
+    {
+      emit_move_insn (operands[0], operands[2]);
+      operands[2] = operands[1];
+      operands[1] = operands[0];
+    }
+  else
+    operands[2] = copy_to_mode_reg (SImode, operands[2]);
+  return false;
+}
+
+
+/* Return true if X is an address form involving a symbol or label ref.  */
+bool
+csky_symbolic_address_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case SYMBOL_REF:
+    case LABEL_REF:
+      return 1;
+    case CONST:
+      x = XEXP (x, 0);
+      return ((GET_CODE (XEXP (x, 0)) == SYMBOL_REF
+	       || GET_CODE (XEXP (x, 0)) == LABEL_REF)
+	      && GET_CODE (XEXP (x, 1)) == CONST_INT);
+    default:
+      return 0;
+    }
+}
+
+
+/* Emit a comparison instruction.
+   Return true if an inverted comparison is generated.  */
+
+bool
+csky_emit_compare (enum rtx_code code, rtx op0, rtx op1)
+{
+  bool invert;
+  rtx cc_reg = gen_rtx_REG (CCmode, CSKY_CC_REGNUM);
+
+  if (GET_CODE (op1) == CONST_INT)
+    {
+      HOST_WIDE_INT val = INTVAL (op1);
+
+      switch (code)
+	{
+	case GTU:
+	  /* Unsigned (GTU 0) is the same as (NE 0); everything else is
+	     converted below to LEU (reversed cmphs).  */
+	  if (val == 0)
+	    code = NE;
+	  /* Check whether (GTU A imm) can become (GEU A  imm + 1).  */
+	  else if (TARGET_MINI_REGISTERS
+		   ? CSKY_CONST_OK_FOR_J (val + 1)
+		   : CSKY_CONST_OK_FOR_Uk (val + 1))
+	    {
+	      op1 = GEN_INT (val + 1);
+	      code = GEU;
+	    }
+	  break;
+	/* Check whether (LE A imm) can become (LT A imm + 1),
+	   or (GT A imm) can become (GE A imm + 1).  */
+	case GT:
+	case LE:
+	  if (TARGET_MINI_REGISTERS
+	      ? CSKY_CONST_OK_FOR_J (val + 1)
+	      : CSKY_CONST_OK_FOR_Uk (val + 1))
+	    {
+	      op1 = GEN_INT (val + 1);
+	      code = code == LE ? LT : GE;
+	    }
+	  break;
+
+	default:
+	  break;
+	}
+    }
+
+  if (CONSTANT_P (op1) && GET_CODE (op1) != CONST_INT)
+    op1 = force_reg (GET_MODE (op1), op1);
+
+  /* cmpnei: 0-31 (K immediate)
+     ti: 1-32 (J immediate, 0 using btsti x,31).  */
+  invert = false;
+  switch (code)
+    {
+      /* Use inverted condition, cmpne.  */
+      case EQ:
+	code = NE;
+	invert = true;
+      /* Fall through.  */
+      /* Use normal condition, cmpne.  */
+      case NE:
+	if (GET_CODE (op1) == CONST_INT
+	    && (TARGET_MINI_REGISTERS
+		? !csky_literal_K_operand (op1, SImode)
+		: !csky_literal_I_operand (op1, SImode)))
+	  op1 = force_reg (SImode, op1);
+      break;
+
+      /* Use inverted condition, reversed cmplt.  */
+      case LE:
+	code = GT;
+	invert = true;
+      /* Fall through.  */
+      /* Use normal condition, reversed cmplt.  */
+      case GT:
+	if (GET_CODE (op1) == CONST_INT)
+	  op1 = force_reg (SImode, op1);
+      break;
+
+      /* Use inverted condition, cmplt.  */
+      case GE:
+	code = LT;
+	invert = true;
+      /* Fall through.  */
+      /* Use normal condition, cmplt.  */
+      case LT:
+	/* covered by btsti x,31.  */
+	if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0
+	    && (TARGET_MINI_REGISTERS
+		? !csky_literal_J_operand (op1, SImode)
+		: !csky_literal_Uk_operand (op1, SImode)))
+	  op1 = force_reg (SImode, op1);
+	break;
+
+      /* Use inverted condition, cmple.  */
+      case GTU:
+	/* We coped with unsigned > 0 above.  */
+	gcc_assert (GET_CODE (op1) != CONST_INT || INTVAL (op1) != 0);
+	code = LEU;
+	invert = true;
+      /* Fall through.  */
+      /* Use normal condition, reversed cmphs.  */
+      case LEU:
+	if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
+	  op1 = force_reg (SImode, op1);
+	break;
+
+      /* Use inverted condition, cmphs.  */
+      case LTU:
+	code = GEU;
+	invert = true;
+      /* Fall through.  */
+      /* Use normal condition, cmphs.  */
+      case GEU:
+	if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0
+	    && (TARGET_MINI_REGISTERS
+		? !csky_literal_J_operand (op1, SImode)
+		: !csky_literal_Uk_operand (op1, SImode)))
+	  op1 = force_reg (SImode, op1);
+      break;
+
+    default:
+      break;
+    }
+
+  emit_insn (gen_rtx_SET (cc_reg,
+			  gen_rtx_fmt_ee (code, CCmode, op0, op1)));
+  return invert;
+}
+
+/* Return true if push/pop can be used to save/restore all the registers
+   indicated by MASK.  We currently don't attempt to handle situations where
+   some of the registers could be handled by push/pop and others saved and
+   restored individually.  */
+
+static bool
+csky_can_use_pushpop (unsigned int mask)
+{
+  int i;
+  int end_reg;
+
+  if (!TARGET_PUSHPOP)
+    return false;
+
+  if (mask == 0)
+    return false;
+
+  /* Regs 0-3, 12-14, 18-27, 29-31 cannot be in the mask.  */
+  if (mask & 0xeffc700f)
+    return false;
+
+  /* Regs in the range r4-r11 must be contiguous.  */
+  for (end_reg = 0, i = 11; i >= 4; i--)
+    {
+      if (!end_reg && (mask & (1 << i)))
+	end_reg = i;
+      if (end_reg && !(mask & (1 << i)))
+	return false;
+    }
+
+  /* Likewise for regs in the range r16-r17.  */
+  for (end_reg = 0, i = 17; i >= 16; i--)
+    {
+      if (!end_reg && (mask & (1 << i)))
+	end_reg = i;
+      if (end_reg && !(mask & (1 << i)))
+	return false;
+    }
+
+  return true;
+}
+
+
+/* Return true if store/load multiple instructions can be used to
+   save/restore at least some of the registers indicated by MASK.
+   Unlike the push/pop case, this does handle partial ranges.
+   Set *BR and *ER to the beginning and end (respectively) of the
+   register range that can be handled.  */
+
+static bool
+csky_can_use_ldstm (int mask, int *br, int *er)
+{
+  int regno;
+  int begin_reg = 0, end_reg = 0;
+  int count = 0;
+
+  if (!TARGET_MULTIPLE_STLD)
+    return false;
+
+  /* We'll only handle registers in the range 4-11, the contiguous range
+     of caller-saved registers.  Higher-numbered registers are handled
+     individually in addition to this, but we'll give up on doing ldstm
+     entirely if we need to save/restore the low-numbered EH registers.  */
+  if (mask & 0xf)
+    return false;
+
+  for (regno = 4; regno <= 11; regno++)
+    {
+      if (mask & 1 << regno)
+	{
+	  if (!begin_reg)
+	    begin_reg = regno;
+	  end_reg = regno;
+	  count++;
+	}
+      else if (begin_reg)
+	break;
+    }
+
+  if (count >= CSKY_MIN_MULTIPLE_STLD && count <= CSKY_MAX_MULTIPLE_STLD)
+    {
+      if (br)
+	*br = begin_reg;
+      if (er)
+	*er = end_reg;
+      return true;
+    }
+  return false;
+}
+
+
+const char *
+csky_output_return_instruction (void)
+{
+  unsigned long func_type = get_csky_current_func_type ();
+
+  if (CSKY_FUNCTION_IS_NAKED (func_type))
+    return "";
+  if (CSKY_FUNCTION_IS_INTERRUPT (func_type))
+    return "ipop\n\tnir\n";
+  else
+    return "rts\n";
+}
+
+
+/* Adjust the stack pointer by OFFSET bytes.  OFFSET is negative if this
+   is in the prologue, positive if in the epilogue.  This may require
+   multiple instructions and/or use of CSKY_STACKADJUST_REGNUM as
+   a scratch register.  Emit CFA notes as appropriate.  */
+static void
+expand_csky_stack_adjust (int offset)
+{
+  rtx set;
+  rtx_insn *insn;
+  int size = (offset > 0 ? offset : -offset);
+
+  if (offset == 0)
+    return;
+
+  /* If OFFSET is too large for addi/subi, load it into
+     CSKY_STACKADJUST_REGNUM and use a register add/sub instead.
+     This case is not mentioned in the ABI documentation, but it is
+     supported by GDB prologue analysis provided that the instruction(s)
+     to initialize CSKY_STACKADJUST_REGNUM appear directly before
+     the sub.  Depending on the value of OFFSET, this might be a
+     lrw instruction or the "tricks" used by csky_output_inline_const to
+     encode special-case integer constants.  */
+  if (size > CSKY_MAX_SP_ADJUST * 2)
+    {
+      rtx tmp, dwarf;
+
+      /* We should have reserved the scratch register already in
+	 csky_layout_stack_frame.  */
+      gcc_assert (cfun->machine->reg_size != 0
+		  && (cfun->machine->reg_mask
+		      & (1 << CSKY_STACKADJUST_REGNUM)));
+
+      /* Prevent the optimizer from reordering these instructions to
+	 keep GDB happy.  */
+      if (!flag_sched_prolog)
+	emit_insn (gen_blockage ());
+
+      tmp = gen_rtx_REG (SImode, CSKY_STACKADJUST_REGNUM);
+      emit_move_insn (tmp, GEN_INT (size));
+
+      if (offset > 0)
+	set = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp);
+      else
+	set = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp);
+      insn = emit_insn (set);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      dwarf = gen_rtx_SET (stack_pointer_rtx,
+			   plus_constant (Pmode, stack_pointer_rtx, offset));
+      add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
+
+      /* More make GDB happy.  */
+      if (!flag_sched_prolog)
+	emit_insn (gen_blockage ());
+    }
+
+  /* Use one or two addi or subi insns to adjust stack.  */
+  else
+    while (size)
+      {
+	int delta = (size > CSKY_MAX_SP_ADJUST
+		     ? CSKY_MAX_SP_ADJUST : size);
+
+	if (offset > 0)
+	  set = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (delta));
+	else
+	  set = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			    GEN_INT (delta));
+	insn = emit_insn (set);
+	RTX_FRAME_RELATED_P (insn) = 1;
+	size -= delta;
+      }
+}
+
+
+/* Generate and emit an insn that we will recognize as a push_multi.
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  DWARF_REGS_MASK is a subset of
+   MASK for registers that should be annotated for DWARF2 frame unwind
+   information.  */
+
+static rtx
+emit_csky_regs_push (unsigned long mask)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par;
+  rtx dwarf;
+  rtx tmp;
+  int dwarf_par_index;
+
+  for (i = 0; i < CSKY_NGPR_REGS; i++)
+    {
+      if (mask & (1 << i))
+	num_regs++;
+    }
+
+  /* The reg range for push is:r4-r11,r15-r17,r28.  */
+  gcc_assert (num_regs && num_regs <= 12);
+
+  /* For the body of the insn we are going to generate an UNSPEC in
+     parallel with several USEs.  This allows the insn to be recognized
+     by the push_multi pattern in the csky.md file.
+
+     The body of the insn looks something like this:
+
+       (parallel [
+	   (set (mem:BLK (pre_modify:SI (reg:SI sp)
+					(const_int:SI <num>)))
+		(unspec:BLK [(reg:SI r4)] UNSPEC_PUSHPOP_MULT))
+	   (use (reg:SI XX))
+	   (use (reg:SI YY))
+	   ...
+	])
+
+     For the frame note however, we try to be more explicit and actually
+     show each register being stored into the stack frame, plus a (single)
+     decrement of the stack pointer.  We do it this way in order to be
+     friendly to the stack unwinding code, which only wants to see a single
+     stack decrement per instruction.  The RTL we generate for the note looks
+     something like this:
+
+      (sequence [
+	   (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
+	   (set (mem:SI (reg:SI sp)) (reg:SI r4))
+	   (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
+	   (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
+	   ...
+	])
+
+     FIXME:: In an ideal world the PRE_MODIFY would not exist and
+     instead we'd have a parallel expression detailing all
+     the stores to the various memory addresses so that debug
+     information is more up-to-date. Remember however while writing
+     this to take care of the constraints with the push instruction.
+
+     Note also that this has to be taken care of for the VFP registers.
+
+     For more see PR43399.  */
+
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
+  dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
+  dwarf_par_index = 1;
+
+  for (i = 0; i < CSKY_NGPR_REGS; i++)
+    if (mask & (1 << i))
+      {
+	rtx reg = gen_rtx_REG (SImode, i);
+	rtx addr = plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs);
+	tmp = gen_frame_mem (BLKmode,
+			     gen_rtx_PRE_MODIFY (Pmode,
+						 stack_pointer_rtx, addr));
+	XVECEXP (par, 0, 0)
+	  = gen_rtx_SET (tmp,
+			 gen_rtx_UNSPEC (BLKmode,
+					 gen_rtvec (1, reg),
+					 UNSPEC_PUSHPOP_MULT));
+	tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
+			   reg);
+	RTX_FRAME_RELATED_P (tmp) = 1;
+	XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
+
+	break;
+      }
+
+  for (j = 1, i++; j < num_regs; i++)
+    if (mask & (1 << i))
+      {
+	rtx reg = gen_rtx_REG (SImode, i);
+	rtx addr = plus_constant (Pmode, stack_pointer_rtx, 4 * j);
+	tmp = gen_rtx_SET (gen_frame_mem (SImode, addr), reg);
+	RTX_FRAME_RELATED_P (tmp) = 1;
+	XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
+	XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
+	j++;
+      }
+
+  par = emit_insn (par);
+
+  tmp = gen_rtx_SET (stack_pointer_rtx,
+		     plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
+  RTX_FRAME_RELATED_P (tmp) = 1;
+  XVECEXP (dwarf, 0, 0) = tmp;
+
+  add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
+  RTX_FRAME_RELATED_P (par) = 1;
+
+  return par;
+}
+
+
+/* Generate and emit an insn pattern that we will recognize as a pop_multi.
+   SAVED_REGS_MASK shows which registers need to be restored.
+
+   Unfortunately, since this insn does not reflect very well the actual
+   semantics of the operation, we need to annotate the insn for the benefit
+   of DWARF2 frame unwind information.  */
+
+static void
+emit_csky_regs_pop (unsigned long mask)
+{
+  int num_regs = 0;
+  int i, j;
+  rtx par;
+
+  for (i = 0; i < CSKY_NGPR_REGS; i++)
+    if (mask & (1 << i))
+      num_regs++;
+
+  /* The reg range for push is:r4-r11,r15-r17,r28.  */
+  gcc_assert (num_regs && num_regs <= 12);
+
+  /* The first element is (return),
+     the second element is
+       (set (reg:SI 'first reg number')
+	    (unspec:SI [(mem)] UNSPEC_PUSHPOP_MULT),
+     the rest elements is (use (reg:SI 'rest reg number')),
+     so the length should be number of register to be poped
+     plus one.  */
+  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
+
+  XVECEXP (par, 0, 0) = ret_rtx;
+
+  for (i = 0; i < CSKY_NGPR_REGS; i++)
+    if (mask & (1 << i))
+      {
+	rtx reg = gen_rtx_REG (SImode, i);
+	rtx addr = plus_constant (Pmode, stack_pointer_rtx, 4 * num_regs);
+	rtx tmp = gen_frame_mem (SImode,
+				 gen_rtx_POST_MODIFY (Pmode,
+						      stack_pointer_rtx, addr));
+	XVECEXP (par, 0, 1)
+	  = gen_rtx_SET (reg,
+			 gen_rtx_UNSPEC (SImode,
+					 gen_rtvec (1, tmp),
+					 UNSPEC_PUSHPOP_MULT));
+	break;
+      }
+
+  for (j = 2, i++; j < (num_regs + 1); i++)
+    if (mask & (1 << i))
+      {
+	rtx reg = gen_rtx_REG (SImode, i);
+	XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
+	j++;
+      }
+
+  par = emit_jump_insn (par);
+}
+
+
+/* Generate the function prologue.  */
+
+void
+csky_expand_prologue (void)
+{
+  rtx_insn *insn;
+  unsigned long func_type = get_csky_current_func_type ();
+  unsigned int reg_mask;
+  int reg_size;
+
+  if (CSKY_FUNCTION_IS_NAKED (func_type))
+    {
+      if (flag_stack_usage_info)
+	current_function_static_stack_size = 0;
+      return;
+    }
+
+  csky_layout_stack_frame ();
+  reg_mask = cfun->machine->reg_mask;
+  reg_size = cfun->machine->reg_size;
+
+  /* Adjust stack pointer past argument overflow area.  */
+  if (cfun->machine->arg_size != 0)
+    {
+      int offset = cfun->machine->arg_size;
+      expand_csky_stack_adjust (- offset);
+
+      /* If we have a parameter passed partially in regs and partially
+	 in memory, the registers will have been stored to memory already
+	 in function.c.  So we only need to copy varargs from registers
+	 to stack.  */
+      if (cfun->machine->uses_anonymous_args)
+	{
+	  int rn = CSKY_FIRST_PARM_REGNUM + CSKY_NPARM_REGS - 1;
+	  for (offset -= 4; offset >= 0; offset -= 4, rn--)
+	    {
+	      rtx dst = gen_frame_mem (SImode,
+				       plus_constant (Pmode,
+						      stack_pointer_rtx,
+						      offset));
+	      insn = emit_move_insn (dst, gen_rtx_REG (SImode, rn));
+	      RTX_FRAME_RELATED_P (insn) = 1;
+	    }
+	}
+    }
+
+  /* Push caller-saved registers to stack.  */
+  if (csky_can_use_pushpop (reg_mask))
+    emit_csky_regs_push (reg_mask);
+  else if (reg_size)
+    {
+      int sreg = -1, ereg = -1;
+      bool stm_p = csky_can_use_ldstm (reg_mask, &sreg, &ereg);
+      int stm_regs = stm_p ? ereg - sreg + 1 : 0;
+      int stm_size = stm_regs * 4;
+
+      /* First adjust the SP to the low end of the register save area.  */
+      expand_csky_stack_adjust (- reg_size);
+
+      /* Emit individual register saves.  Even if we are going to emit an
+	 stm, we may need to save individual registers above that too.  */
+      if (reg_size > stm_size)
+	{
+	  int offset = reg_size - 4;
+	  int regno = 31;
+	  for ( ; regno > ereg; regno--)
+	    if (reg_mask & (1 << regno))
+	      {
+		rtx dst = gen_rtx_MEM (SImode,
+				       plus_constant (Pmode,
+						      stack_pointer_rtx,
+						      offset));
+		rtx insn = emit_insn (gen_movsi (dst,
+						 gen_rtx_REG (SImode, regno)));
+		RTX_FRAME_RELATED_P (insn) = 1;
+		if (offset == stm_size)
+		  break;
+		offset -= 4;
+	      }
+	}
+
+      /* If possible, emit a stm to do a bulk store of sequential
+	 registers to the stack.  Note that it is an error in the ABI
+	 documentation that it doesn't list stm as a valid prologue
+	 instruction.  */
+      if (stm_p)
+	{
+	  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (stm_regs));
+	  int regno, slot;
+	  for (regno = sreg, slot = 0; regno <= ereg; regno++, slot++)
+	    {
+	      rtx reg = gen_rtx_REG (SImode, regno);
+	      rtx addr = plus_constant (Pmode, stack_pointer_rtx, slot * 4);
+	      rtx set = gen_rtx_SET (gen_frame_mem (SImode, addr), reg);
+	      RTX_FRAME_RELATED_P (set) = 1;
+	      XVECEXP (par, 0, slot) = set;
+	    }
+	  insn = emit_insn (par);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	}
+    }
+
+  /* Initialize hard frame pointer, if necessary.  It points at the base
+     of the register save area.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+
+  /* Reserve stack space for locals and outgoing args.  */
+  expand_csky_stack_adjust (- cfun->machine->reg_offset);
+
+  /* Put the GOT address in reg_gb for PIC, using R13 as a scratch.
+     See section 4.7.1 in  the ABI documentation,
+     "Function Prologue for PIC".  */
+  if (flag_pic && (reg_mask & (1 << PIC_OFFSET_TABLE_REGNUM)))
+    {
+      rtx l1 = gen_label_rtx ();
+      rtx grs_label = gen_rtx_LABEL_REF (SImode, l1);
+      rtx reg_gb = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
+      rtx reg_temp = gen_rtx_REG (SImode, 13);
+
+      rtx tmp0_unspec = gen_rtx_UNSPEC (Pmode,
+					gen_rtvec (1, grs_label),
+					UNSPEC_PIC_SYMBOL_GOTPC_GRS);
+      rtx tmp1_unspec = gen_rtx_UNSPEC (Pmode,
+					gen_rtvec (1, grs_label),
+					UNSPEC_PIC_SYMBOL_GOTPC);
+
+      emit_insn (gen_prologue_get_pc (tmp0_unspec));
+      emit_move_insn (reg_temp, tmp1_unspec);
+      emit_insn (gen_addsi3 (reg_gb, reg_gb, reg_temp));
+    }
+
+  if (flag_stack_usage_info)
+    current_function_static_stack_size = cfun->machine->frame_size;
+
+  if (!flag_sched_prolog)
+    emit_insn (gen_blockage ());
+}
+
+void
+csky_expand_epilogue (void)
+{
+  unsigned long func_type = get_csky_current_func_type ();
+  unsigned int reg_mask;
+  int reg_size;
+  int adjust;
+  rtx_insn *insn;
+
+  if (!flag_sched_prolog)
+    emit_insn (gen_blockage ());
+
+  if (CSKY_FUNCTION_IS_NAKED (func_type))
+    {
+      emit_jump_insn (gen_simple_return ());
+      return;
+    }
+
+  /* Get the frame information.  */
+  csky_layout_stack_frame ();
+  reg_mask = cfun->machine->reg_mask;
+  reg_size = cfun->machine->reg_size;
+  adjust = reg_size + cfun->machine->arg_size;
+
+  /* Restore the SP to the base of the register save area.  */
+  if (frame_pointer_needed)
+    {
+      insn = emit_move_insn (stack_pointer_rtx, frame_pointer_rtx);
+      RTX_FRAME_RELATED_P (insn) = 1;
+    }
+  else
+    expand_csky_stack_adjust (cfun->machine->reg_offset);
+
+  /* Restore the callee-saved registers.  */
+  if (csky_can_use_pushpop (reg_mask)
+      && cfun->machine->arg_size == 0
+      && !CSKY_FUNCTION_IS_INTERRUPT (func_type)
+      && !crtl->calls_eh_return)
+    {
+      /* Pop includes an implicit return, so we are done.  */
+      emit_csky_regs_pop (reg_mask);
+      return;
+    }
+  else if (reg_size)
+    {
+      int sreg = -1, ereg = -1;
+      bool ldm_p = csky_can_use_ldstm (reg_mask, &sreg, &ereg);
+      int ldm_regs = ldm_p ? ereg - sreg + 1 : 0;
+      int ldm_size = ldm_regs * 4;
+
+      /* Emit individual register loads.  Even if we are going to emit an
+	 ldm, we may need to load individual registers above that too.  */
+      if (reg_size > ldm_size)
+	{
+	  int offset = reg_size - 4;
+	  int regno = 31;
+	  for ( ; regno > ereg; regno--)
+	    if (reg_mask & (1 << regno))
+	      {
+		rtx src = gen_frame_mem (SImode,
+					 plus_constant (Pmode,
+							stack_pointer_rtx,
+							offset));
+		rtx reg = gen_rtx_REG (SImode, regno);
+		insn = emit_move_insn (reg, src);
+		RTX_FRAME_RELATED_P (insn) = 1;
+		add_reg_note (insn, REG_CFA_RESTORE, reg);
+		if (offset == ldm_size)
+		  break;
+		offset -= 4;
+	      }
+	}
+
+      /* If possible, emit a ldm to do a bulk load of sequential
+	 registers from the stack.  */
+      if (ldm_p)
+	{
+	  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (ldm_regs));
+	  int regno, slot;
+	  for (regno = sreg, slot = 0; regno <= ereg; regno++, slot++)
+	    {
+	      rtx reg = gen_rtx_REG (SImode, regno);
+	      rtx addr = plus_constant (Pmode, stack_pointer_rtx, slot * 4);
+	      rtx set = gen_rtx_SET (reg, gen_frame_mem (SImode, addr));
+	      XVECEXP (par, 0, slot) = set;
+	    }
+	  insn = emit_insn (par);
+	  RTX_FRAME_RELATED_P (insn) = 1;
+	  for (regno = sreg; regno <= ereg; regno++)
+	    {
+	      rtx reg = gen_rtx_REG (SImode, regno);
+	      add_reg_note (insn, REG_CFA_RESTORE, reg);
+	    }
+	}
+    }
+
+  /* Emit the final stack pointer adjustment to deallocate the saved
+     registers and incoming argument area.  */
+  expand_csky_stack_adjust (adjust);
+
+  /* Extra stack adjustment for exception handler return.  */
+  if (crtl->calls_eh_return)
+    emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
+			   EH_RETURN_STACKADJ_RTX));
+
+  /* Now we can return.  */
+  emit_jump_insn (gen_simple_return ());
+}
+
+
+static void
+csky_output_function_prologue (FILE *f)
+{
+  unsigned long func_type = get_csky_current_func_type ();
+
+  switch ((int) CSKY_FUNCTION_TYPE (func_type))
+    {
+    default:
+    case CSKY_FT_NORMAL:
+      break;
+    case CSKY_FT_INTERRUPT:
+      {
+	asm_fprintf (f, "\t# Interrupt Service Routine.\n");
+	asm_fprintf (f, "\tnie\n\tipush\n");
+	break;
+      }
+    case CSKY_FT_FIQ:
+      asm_fprintf (f, "\t# Fast Interrupt Service Routine.\n");
+      break;
+    case CSKY_FT_EXCEPTION:
+      asm_fprintf (f, "\t# CSKY Exception Handler.\n");
+      break;
+    case CSKY_FT_NAKED:
+      asm_fprintf (f, "\t# Naked Function: prologue and epilogue \
+		      provided by programmer.\n");
+      return;
+    }
+
+  csky_layout_stack_frame ();
+
+  /* Generate .stack_size function-name, size for callgraph;
+     the default stack size is 0.  */
+  if (TARGET_STACK_SIZE && cfun->machine->frame_size > 0)
+    {
+      gcc_assert (current_function_decl != NULL);
+      const char *func_name =
+	  IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (current_function_decl));
+      if (func_name[0] == '*')
+	asm_fprintf (f, "\t.stack_size %s, %d\n",
+		     &func_name[1], cfun->machine->frame_size);
+      else
+	asm_fprintf (f, "\t.stack_size %s, %d\n",
+		     func_name, cfun->machine->frame_size);
+    }
+}
+
+
+static void
+csky_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED)
+{
+
+}
+
+
+/* Helper for csky_eh_return splitter: store the call frame exception
+   handler address in lr.  */
+void
+csky_set_eh_return_address (rtx source, rtx scratch)
+{
+  HOST_WIDE_INT delta = 0;
+  rtx basereg, addr;
+  unsigned int reg_mask;
+
+  csky_layout_stack_frame ();
+  reg_mask = cfun->machine->reg_mask;
+
+  if (reg_mask & (1 << CSKY_LR_REGNUM))
+    {
+      /* Find LR in the stack frame.  */
+      int i = 0;
+
+      if (frame_pointer_needed)
+	{
+	  basereg = frame_pointer_rtx;
+	  delta = 0;
+	}
+      else
+	{
+	  basereg = stack_pointer_rtx;
+	  delta = cfun->machine->reg_offset;
+	}
+
+      /* At this point, (basereg + delta) points at the low end of
+	 the reg save area.  Regs are saved sequentially from low
+	 to high from this address.  */
+      for (i = 0; i < CSKY_LR_REGNUM; i++)
+	if (reg_mask & (1 << i))
+	  delta += 4;
+
+      if ((CSKY_TARGET_ARCH (CK801) && delta >= CSKY_LD16_MAX_OFFSET (Pmode))
+	  || delta >= CSKY_LD32_MAX_OFFSET (Pmode))
+	{
+	  emit_insn (gen_movsi (scratch, GEN_INT (delta)));
+	  emit_insn (gen_addsi3 (scratch, scratch, basereg));
+	  addr = scratch;
+	}
+      else
+	addr = plus_constant (Pmode, basereg, delta);
+      emit_move_insn (gen_frame_mem (Pmode, addr), source);
+    }
+  else
+    emit_move_insn (gen_rtx_REG (Pmode, CSKY_LR_REGNUM), source);
+}
+
+/* Return TRUE if X references a SYMBOL_REF.  */
+
+bool
+csky_symbol_mentioned_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == SYMBOL_REF)
+    return true;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (csky_symbol_mentioned_p (XVECEXP (x, i, j)))
+	      return true;
+	}
+      else if (fmt[i] == 'e' && csky_symbol_mentioned_p (XEXP (x, i)))
+	return true;
+    }
+  return false;
+}
+
+
+/* Return TRUE if X references a LABEL_REF.  */
+
+bool
+csky_label_mentioned_p (rtx x)
+{
+  const char *fmt;
+  int i;
+
+  if (GET_CODE (x) == LABEL_REF)
+    return true;
+
+  fmt = GET_RTX_FORMAT (GET_CODE (x));
+  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
+    {
+      if (fmt[i] == 'E')
+	{
+	  int j;
+
+	  for (j = XVECLEN (x, i) - 1; j >= 0; j--)
+	    if (csky_label_mentioned_p (XVECEXP (x, i, j)))
+	      return true;
+	}
+      else if (fmt[i] == 'e' && csky_label_mentioned_p (XEXP (x, i)))
+	return true;
+    }
+
+  return false;
+}
+
+
+static bool
+tls_unspec_mentioned_p (rtx x)
+{
+  switch (GET_CODE (x))
+    {
+    case CONST:
+      return tls_unspec_mentioned_p (XEXP (x, 0));
+
+    case UNSPEC:
+      if (XINT (x, 1) == UNSPEC_TLS)
+	return true;
+
+    /* Fall through.  */
+    default:
+      return false;
+    }
+}
+
+
+/* Implement LEGITIMATE_PIC_OPERAND_P.  */
+bool
+csky_legitimate_pic_operand_p (rtx x)
+{
+  if (tls_unspec_mentioned_p (x))
+    return true;
+  if (csky_symbol_mentioned_p (x) || csky_label_mentioned_p (x))
+    return false;
+  return true;
+}
+
+rtx
+csky_legitimize_pic_address (rtx orig, rtx reg, bool gotrel_p)
+{
+  rtx pic_reg = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
+  bool optimize_p = false;
+
+  if (GET_CODE (orig) == SYMBOL_REF || GET_CODE (orig) == LABEL_REF)
+    {
+      rtx pic_ref, address, rtx_tmp;
+      rtx insn;
+      rtx pic_reg = gen_rtx_REG (SImode, PIC_OFFSET_TABLE_REGNUM);
+      int subregs = 0;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	  subregs = 1;
+	}
+
+      if (subregs)
+	address = gen_reg_rtx (Pmode);
+      else
+	address = reg;
+
+      if (GET_CODE (orig) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (orig))
+	{
+	  /* When gotrel_p generate sym@GOT, otherwise generate sym@PLT.  */
+	  rtx_tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, orig),
+				    (gotrel_p
+				     ? UNSPEC_PIC_SYMBOL_GOT
+				     : UNSPEC_PIC_SYMBOL_PLT));
+	  optimize_p = gotrel_p;
+	  if (flag_pic != 1)
+	    {
+	      emit_move_insn (address, rtx_tmp);
+	      rtx_tmp = gen_rtx_MULT (Pmode, address, GEN_INT (1));
+	    }
+	  pic_ref = gen_const_mem (Pmode,
+				   gen_rtx_PLUS (Pmode, pic_reg, rtx_tmp));
+	}
+      else
+	{
+	  /* bsr symbol */
+	  if (flag_pic == 1 && !gotrel_p)
+	    {
+	      pic_ref = gen_rtx_UNSPEC (Pmode,
+					gen_rtvec (1, orig),
+					UNSPEC_PIC_SYMBOL_BSR);
+	      return pic_ref;
+	    }
+	  /* grs rx, symbol */
+	  else if (flag_pic == 1 && (GET_CODE (orig) == SYMBOL_REF)
+		   && SYMBOL_REF_FUNCTION_P (orig))
+	    {
+	      pic_ref = gen_rtx_UNSPEC (Pmode,
+					gen_rtvec (1, orig),
+					UNSPEC_PIC_SYMBOL_GRS);
+	      return pic_ref;
+	    }
+	  /* lrw rx, symbol@GOTOFF; add rx, rx, gb */
+	  else
+	    {
+	      rtx_tmp = gen_rtx_UNSPEC (Pmode,
+					gen_rtvec (1, orig),
+					UNSPEC_PIC_SYMBOL_GOTOFF);
+	      emit_move_insn (address, rtx_tmp);
+	      pic_ref = gen_rtx_PLUS (Pmode, address, pic_reg);
+	      optimize_p = true;
+	    }
+	}
+
+      insn = emit_move_insn (reg, pic_ref);
+      /* Put a REG_EQUAL note on this insn,
+	 so that it can be optimized by loop.  */
+      if (optimize_p)
+	set_unique_reg_note (insn, REG_EQUAL, orig);
+
+      return reg;
+    }
+  else if (GET_CODE (orig) == CONST)
+    {
+      rtx base, offset;
+
+      if (GET_CODE (XEXP (orig, 0)) == PLUS
+	  && XEXP (XEXP (orig, 0), 1) == pic_reg)
+	return orig;
+
+      if (reg == 0)
+	{
+	  gcc_assert (can_create_pseudo_p ());
+	  reg = gen_reg_rtx (Pmode);
+	}
+
+      gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
+
+      base = csky_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
+					  reg, gotrel_p);
+      offset = csky_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
+					    base == reg ? 0 : reg, gotrel_p);
+
+      if (GET_CODE (offset) == CONST_INT)
+	return plus_constant (Pmode, base, INTVAL (offset));
+
+      return gen_rtx_PLUS (Pmode, base, offset);
+    }
+
+  return orig;
+}
+
+
+/* Functions to output assembly code for a function call.  */
+
+char *
+csky_output_call (rtx *operands, int index)
+{
+  static char buffer[20];
+  rtx addr = operands[index];
+
+  if (REG_P (addr))
+    sprintf (buffer, "jsr\t%%%d", index);
+  else if (flag_pic && (GET_CODE (addr) == UNSPEC))
+    sprintf (buffer, "bsr\t%%%d", index);
+  else
+    sprintf (buffer, "jbsr\t%%%d", index);
+
+  return buffer;
+}
+
+
+/* Worker function for TARGET_ASM_TRAMPOLINE_TEMPLATE.
+   Output assembler code for a block containing the constant parts
+   of a trampoline, leaving space for the variable parts.
+   Note that STATIC_CHAIN_REGNUM is t1 (aka r12) on ck801 and
+   t1 (r13) otherwise.  */
+
+static void
+csky_asm_trampoline_template (FILE *f)
+{
+  if (CSKY_ISA_FEATURE (2E3))
+    {
+      fprintf (f, "\tlrw\t%s, [.Lstatic_chain]\n",
+	       reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tjmpi\t[.Lfunc_address]\n");
+      /* 2 32-bit insns = 8 bytes.  */
+    }
+  else if (CSKY_TARGET_ARCH (CK801))
+    {
+      /* It's hard to provide general support for trampolines on this
+	 core.  We need a register other than the one holding the
+	 static chain (r13) to hold the function pointer for the
+	 indirect jump to it.  But ck801 has such a limited register set
+	 there is no other call-clobbered scratch register available -- in
+	 particular, this core does not have r12, which we use for the
+	 ck802 case below.  If we use a callee-saved register like r4,
+	 saving the old value on the stack screws up the stack frame
+	 if there are overflow arguments pushed on the stack
+	 by the caller.  In theory we could test for that and handle
+	 limited cases with parameters that all fit in r0-r3 with no
+	 stack overflow, but punt for now.  */
+      sorry ("Nested function trampolines not supported on CK801.");
+    }
+  else
+    {
+      fprintf (f, "\tlrw\t%s, [.Lfunc_address]\n",
+	       reg_names[CSKY_T1_REGNUM]);
+      fprintf (f, "\tlrw\t%s, [.Lstatic_chain]\n",
+	       reg_names[STATIC_CHAIN_REGNUM]);
+      fprintf (f, "\tjmp\t%s\n",
+	       reg_names[CSKY_T1_REGNUM]);
+      /* To align constant pool on a word boundary.  */
+      fprintf (f, "\t.align 2\n");
+      /* 2 32-bit lrw insns + 16-bit jump + 16-bit pad = 12 bytes.  */
+    }
+
+  fprintf (f, ".Lstatic_chain:\n");
+  fprintf (f, "\t.long 0\n");
+  fprintf (f, ".Lfunc_address:\n");
+  fprintf (f, "\t.long 0\n");
+  /* 2 words of constant pool = 8 bytes.  */
+}
+
+/* Worker function for TARGET_TRAMPOLINE_INIT.  */
+
+static void
+csky_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
+{
+  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
+  rtx mem, a_tramp;
+  int pool = TRAMPOLINE_SIZE - 8;
+
+  emit_block_move (m_tramp, assemble_trampoline_template (),
+		   GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
+
+  mem = adjust_address (m_tramp, SImode, pool);
+  emit_move_insn (mem, chain_value);
+  mem = adjust_address (m_tramp, SImode, pool + 4);
+  emit_move_insn (mem, fnaddr);
+
+  a_tramp = XEXP (m_tramp, 0);
+  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
+		     LCT_NORMAL, VOIDmode, a_tramp, Pmode,
+		     plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
+}
+
+
+/* Emit a comparison insn for float values.
+   Return true if the comparison is inverted.  */
+
+bool
+csky_emit_compare_float (enum rtx_code code, rtx op0, rtx op1)
+{
+  rtx cc_reg = gen_rtx_REG (CCmode, CSKY_CC_REGNUM);
+  bool invert;
+  machine_mode mode = GET_MODE (op1);
+
+  if (op1 != CONST0_RTX (mode))
+    op1 = force_reg (mode, op1);
+
+  invert = false;
+  switch (code)
+    {
+    case EQ:
+      code = NE;
+      invert = true;
+      break;
+
+    case NE:
+      break;
+    case LE:
+      if (op1 == CONST0_RTX (mode))
+	op1 = force_reg (mode, op1);
+      break;
+    case GT:
+      if (op1 == CONST0_RTX (mode))
+	op1 = force_reg (mode, op1);
+      break;
+    case GE:
+      break;
+    case LT:
+      if (op1 == CONST0_RTX (mode))
+	{
+	  code = GE;
+	  invert = true;
+	}
+      break;
+    case UNORDERED:
+      break;
+    case ORDERED:
+      code = UNORDERED;
+      invert = true;
+      break;
+
+    default:
+      break;
+    }
+
+  emit_insn (gen_rtx_SET (cc_reg, gen_rtx_fmt_ee (code, CCmode, op0, op1)));
+
+  return invert;
+}
+
+/* Support for the Q memory constraint.  Returns true if OP is a MEM RTX
+   with an address consisting of base + index or base + displacement.  */
+bool
+csky_valid_fpuv2_mem_operand (rtx op)
+{
+  struct csky_address addr;
+
+  if (GET_CODE (op) != MEM)
+    return false;
+
+  if (!decompose_csky_address (XEXP (op, 0), &addr))
+    return false;
+
+  /* Verify base register. */
+  if (!is_csky_address_register_rtx_p (addr.base, 0))
+    return false;
+
+  /* Verify index operand. */
+  if (addr.index)
+    {
+      if (!is_csky_address_register_rtx_p (addr.index, 0))
+	return false;
+
+      if (addr.scale == 1 || addr.scale == 2 || addr.scale == 4
+	  || addr.scale == 8)
+	return true;
+
+      return false;
+    }
+  /* Verify disp operand.  */
+  else if (addr.disp)
+    {
+      rtx disp = addr.disp;
+
+      if (!CONST_INT_P (disp))
+	return false;
+
+      if (((unsigned) INTVAL (disp) % 4) == 0
+	  && (unsigned) INTVAL (disp) <= (unsigned) 1020)
+	return true;
+
+       return false;
+    }
+  return true;
+}
+
+
+/* Returns the (interrupt) function type of the current
+   function, or CSKY_FT_UNKNOWN if the type cannot be determined.  */
+
+static unsigned long
+csky_isr_value (tree argument)
+{
+  const isr_attribute_entry *ptr;
+  const char *arg;
+
+  /* No argument - default to IRQ.  */
+  if (argument == NULL_TREE)
+    return CSKY_FT_ISR;
+
+  /* Get the value of the argument.  */
+  if (TREE_VALUE (argument) == NULL_TREE
+      || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
+    return CSKY_FT_UNKNOWN;
+
+  arg = TREE_STRING_POINTER (TREE_VALUE (argument));
+
+  /* Check it against the list of known arguments.  */
+  for (ptr = isr_attribute_map; ptr->arg != NULL; ptr++)
+    if (strcmp (arg, ptr->arg) == 0)
+      return ptr->return_value;
+
+  /* An unrecognized interrupt type.  */
+  return CSKY_FT_UNKNOWN;
+}
+
+/* Handle an attribute requiring a FUNCTION_DECL;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+csky_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
+{
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
+/* Handle an "interrupt" or "isr" attribute;
+   arguments as in struct attribute_spec.handler.  */
+
+static tree
+csky_handle_isr_attribute (tree *node, tree name, tree args, int flags,
+			   bool *no_add_attrs)
+{
+
+  if (!TARGET_ISTACK)
+    {
+      warning (OPT_Wattributes, "%qE attribute ignored without -mistack",
+	       name);
+      *no_add_attrs = true;
+      return NULL_TREE;
+    }
+
+  if (DECL_P (*node))
+    {
+      if (TREE_CODE (*node) != FUNCTION_DECL)
+	{
+	  warning (OPT_Wattributes, "%qE attribute only applies to functions",
+		   name);
+	  *no_add_attrs = true;
+	}
+    }
+  else
+    {
+      if (TREE_CODE (*node) == FUNCTION_TYPE
+	  || TREE_CODE (*node) == METHOD_TYPE)
+	{
+	  if (csky_isr_value (args) == CSKY_FT_UNKNOWN)
+	    {
+	      warning (OPT_Wattributes, "%qE attribute ignored", name);
+	      *no_add_attrs = true;
+	    }
+	}
+      else if (TREE_CODE (*node) == POINTER_TYPE
+	       && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
+		   || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
+	       && csky_isr_value (args) != CSKY_FT_UNKNOWN)
+	{
+	  *node = build_variant_type_copy (*node);
+	  TREE_TYPE (*node) = build_type_attribute_variant (TREE_TYPE (*node),
+	    tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
+	  *no_add_attrs = true;
+	}
+      else if (flags & ((int)ATTR_FLAG_DECL_NEXT
+			| (int)ATTR_FLAG_FUNCTION_NEXT
+			| (int)ATTR_FLAG_ARRAY_NEXT))
+	{
+	  *no_add_attrs = true;
+	  return tree_cons (name, args, NULL_TREE);
+	}
+      else
+	warning (OPT_Wattributes, "%qE attribute ignored", name);
+    }
+  return NULL_TREE;
+}
+
+
+/* Implement TARGET_REGISTER_MOVE_COST: compute extra cost of moving data
+   between one register class and another.  */
+
+int
+csky_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
+			 reg_class_t from, reg_class_t to)
+{
+#define GR_REG_CLASS_P(CLASS) \
+  ((CLASS) == GENERAL_REGS || (CLASS) == MINI_REGS || (CLASS) == SP_REGS \
+   || (CLASS) == LOW_REGS)
+
+#define HILO_REG_CLASS_P(CLASS) \
+  ((CLASS) == HI_REGS || (CLASS) == LO_REGS || (CLASS) == HILO_REGS)
+
+#define V_REG_CLASS_P(CLASS) \
+  ((CLASS) == V_REGS)
+
+  if (V_REG_CLASS_P (from) && V_REG_CLASS_P (to))
+    return 2;
+
+  if ((V_REG_CLASS_P (from) && GR_REG_CLASS_P (to))
+      || (GR_REG_CLASS_P (from) && V_REG_CLASS_P (to)))
+    return 6;
+
+  if ((HILO_REG_CLASS_P (from) && GR_REG_CLASS_P (to))
+      || (GR_REG_CLASS_P (from) && HILO_REG_CLASS_P (to)))
+    return 16;
+
+  if (HILO_REG_CLASS_P (from) && HILO_REG_CLASS_P (to))
+    return 32;
+
+  if ((HILO_REG_CLASS_P (from) && V_REG_CLASS_P (to))
+      || (V_REG_CLASS_P (from) && HILO_REG_CLASS_P (to)))
+    return 64;
+
+  return 2;
+}
+
+
+/* Implement TARGET_MEMORY_MOVE_COST: compute the cost of moving data
+   between registers and memory.  */
+
+int
+csky_memory_move_cost (machine_mode mode, reg_class_t rclass,
+		       bool in)
+{
+  return (4 + memory_move_secondary_cost (mode, rclass, in));
+}
+
+
+/* TARGET_RTX_COSTS helper for ck801/ck802.  */
+
+static bool
+ck802_ck801_rtx_costs (rtx x, int code, int outer_code, int *total,
+		       bool speed)
+{
+  machine_mode mode = GET_MODE (x);
+  switch (code)
+    {
+      /* Accessing memory costs quite a lot for first word;  */
+    case MEM:
+      *total = COSTS_N_INSNS (1 + CSKY_NUM_REGS (mode));
+      return false;
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+      *total = 100;
+      return true;
+
+    case ROTATE:
+    case ROTATERT:
+    case ASHIFT:
+    case LSHIFTRT:
+    case ASHIFTRT:
+      if (speed)
+	*total = 2;
+      else
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case MINUS:
+    case PLUS:
+      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
+      return false;
+
+    case AND:
+      {
+	enum rtx_code subcode = GET_CODE (XEXP (x, 1));
+
+	/* If subcode is "not", we'll try to combine it into e.g. "andn"
+	   instruction, so give AND itself zero cost. */
+	if (subcode == NOT)
+	  {
+	    *total = 0;
+	    return false;
+	  }
+      }
+      /* Fall through.  */
+    case XOR:
+    case IOR:
+      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
+      return false;
+
+    case MULT:
+      /* FIXME:	 is ixw supported on ck801/ck802?  */
+      /* We can use "ix.h/w" insn to replace multiply by 2 or 4.
+	 "ix.h/w" is a 32-bit insn, so let its cost be a little less than
+	 "mult" insn.  */
+      if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+	{
+	  unsigned HOST_WIDE_INT m
+	    = (unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)));
+	  if ((m == 2 || m == 4) && outer_code == PLUS)
+	    {
+	      *total = 2;
+	      return true;
+	    }
+	  else
+	    {
+	      /* Because mult is relatively slower than other operations,
+		 we try to use other insns when optimizing for speed.
+		 When optimizing for size, give it lower cost.  */
+	      if (speed)
+		{
+		  *total = COSTS_N_INSNS (10 * CSKY_NUM_REGS (mode));
+		  return true;
+		}
+	      int cycle = 0;
+	      while (m)
+		{
+		  m >>= 2;
+		  cycle++;
+		}
+	      *total = COSTS_N_INSNS (1) + cycle;
+	      return false;
+	    }
+	}
+      if (!speed)
+	*total = COSTS_N_INSNS (1);
+      return false;
+
+    case NEG:
+      /* Usually, we use subtract from 0 to substitute for neg, and
+	 it costs 1 extra insn to move 0 to a register.  */
+      *total = COSTS_N_INSNS (2 * CSKY_NUM_REGS (mode));
+      return false;
+
+    case NOT:
+      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
+      return false;
+
+    case COMPARE:
+      *total = COSTS_N_INSNS (1);
+      return false;
+
+    case SIGN_EXTEND:
+    case ZERO_EXTEND:
+      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
+      return false;
+
+    case SIGN_EXTRACT:
+    case ZERO_EXTRACT:
+      if (REG_P (XEXP (x, 0))
+	  && CONST_INT_P (XEXP (x, 1))
+	  && CONST_INT_P (XEXP (x, 2))
+	  && INTVAL (XEXP (x, 1)) == 8
+	  && INTVAL (XEXP (x, 2)) % 8 == 0)
+	{
+	  *total = COSTS_N_INSNS (1);
+	  return true;
+	}
+      *total = COSTS_N_INSNS (CSKY_NUM_REGS (mode));
+      return false;
+
+    case CONST_INT:
+      {
+	unsigned HOST_WIDE_INT t = (unsigned HOST_WIDE_INT) (INTVAL (x));
+
+	if (outer_code == COMPARE)
+	  {
+	    if (t < 0x10000)
+	      *total = 0;
+	    else
+	      *total = COSTS_N_INSNS (2);
+	  }
+	else if (outer_code == AND || outer_code == IOR || outer_code == XOR)
+	  {
+	    /* "andi,xori,ori" are 32-bit insns, so let it cost a
+	       little more.  */
+	    if (t < 0x1000)
+	      {
+		/* Try replacing "andi" by "sextb/h", so let it cost more.  */
+		if (outer_code == AND && (t == 0xff || t == 0xffff))
+		  {
+		    *total = 8;
+		    return true;
+		  }
+		*total = 2;
+	      }
+	    else if (t < 0x10000)
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	  }
+	else if (outer_code == PLUS || outer_code == MINUS)
+	  {
+	    /* "addi/subi rx,ry,imm", if imm<9, it is more often a
+	       16-bit insn.  If imm>=9, use "movi" insn; it's probably
+	       less than "addi/subi". */
+	    if (t < 9)
+	      *total = 0;
+	    else if (t < 0x1000)
+	      *total = 2;
+	    else if (t < 0x10000)
+	      *total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	  }
+	else if (outer_code == ROTATE || outer_code == ROTATERT
+		 || outer_code == LSHIFTRT || outer_code == ASHIFTRT
+		 || outer_code == ASHIFT)
+	  {
+	    if (t < 32)
+	      *total = 0;
+	    else
+	      *total = COSTS_N_INSNS (2);
+	  }
+	else
+	  {
+	    if (t < 0x10000)
+	      if (outer_code == SET && t < 256)
+		*total = 0;
+	      else
+		*total = COSTS_N_INSNS (1);
+	    else
+	      *total = COSTS_N_INSNS (2);
+	  }
+      }
+      return true;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (3);
+      return true;
+    default:
+      return false;
+    }
+}
+
+
+/* TARGET_RTX_COSTS helper for ck803.  */
+
+static bool
+ck803_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
+		 int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case SET:
+      if (MEM_P (XEXP (x, 1)))
+	{
+	  struct csky_address op1;
+	  bool address_valid
+	    = decompose_csky_address (XEXP (XEXP (x, 1), 0), &op1);
+	  if (op1.index)
+	    {
+	      *total = COSTS_N_INSNS (3);
+	      return true;
+	    }
+	  else if (address_valid)
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+	}
+      if (REG_P (XEXP (x, 0)) && (GET_CODE (XEXP (x, 1)) == PLUS))
+       {
+	 rtx sub_exp = XEXP (x, 1);
+	 if (REG_P (XEXP (sub_exp, 0)) && REG_P (XEXP (sub_exp, 1)))
+	   {
+	     *total = COSTS_N_INSNS (1);
+	     return true;
+	   }
+       }
+      return false;
+    case MULT:
+      if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+	{
+	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+	  if (val % 2 == 0 && val < 0xffffffff && val > 0)
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+	}
+      return false;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (3);
+      return true;
+    default:
+      return false;
+    }
+}
+
+/* TARGET_RTX_COSTS helper for ck807+ arches.  */
+
+static bool
+ck807_ck810_rtx_costs (rtx x, int code,
+		       int outer_code ATTRIBUTE_UNUSED,
+		       int *total, bool speed ATTRIBUTE_UNUSED)
+{
+  switch (code)
+    {
+    case MULT:
+      if (REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
+	{
+	  HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
+	  if (val % 2 == 0 && val < 0xffffffff && val > 0)
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+	}
+      return false;
+
+    case CONST:
+    case LABEL_REF:
+    case SYMBOL_REF:
+      *total = COSTS_N_INSNS (3);
+      return true;
+    default:
+      return false;
+    }
+}
+
+
+/* Implement TARGET_RTX_COSTS, to compute a (partial) cost for rtx X.
+   Return true if the complete cost has been computed, and false if
+   subexpressions should be scanned.  In either case, *TOTAL contains
+   the cost result.  */
+
+static bool
+csky_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
+		int opno ATTRIBUTE_UNUSED, int *total, bool speed)
+{
+  int code = GET_CODE (x);
+
+  if (CSKY_TARGET_ARCH (CK802) || CSKY_TARGET_ARCH (CK801))
+    return ck802_ck801_rtx_costs (x, code, outer_code, total, speed);
+  else if (CSKY_TARGET_ARCH (CK803))
+    return ck803_rtx_costs (x, code, outer_code, total, speed);
+  else if (CSKY_TARGET_ARCH (CK807) || CSKY_TARGET_ARCH (CK810))
+    return ck807_ck810_rtx_costs (x, code, outer_code, total, speed);
+  else
+    gcc_unreachable ();
+}
+
+/* Emit assembly code for CASESI.  This is only used on CK801 and CK802
+   when optimizing for size, and uses helper functions in libgcc instead
+   of doing the control transfer inline.  */
+
+const char *
+csky_output_casesi (rtx *operands)
+{
+  rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
+
+  gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
+
+  switch (GET_MODE (diff_vec))
+    {
+    case E_QImode:
+      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned
+	      ? "jbsr\t___gnu_csky_case_uqi"
+	      : "jbsr\t___gnu_csky_case_sqi");
+    case E_HImode:
+      return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned
+	      ? "jbsr\t___gnu_csky_case_uhi"
+	      : "jbsr\t___gnu_csky_case_shi");
+    case E_SImode:
+      return "jbsr\t___gnu_csky_case_si";
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* Implement TARGET_SCHED_ISSUE_RATE.  Lookup the issue rate in the
+   per-core tuning structs.  */
+static int
+csky_sched_issue_rate (void)
+{
+  if (CSKY_TARGET_ARCH (CK810))
+    return 2;
+  else
+    return 1;
+}
+
+
+/* This function implements the target macro TARGET_SCHED_ADJUST_COST.
+   It corrects the value of COST based on the relationship between
+   INSN and DEP through the dependence DEP_TYPE.  It returns the new
+   value.  */
+
+static int
+csky_sched_adjust_cost (rtx_insn *insn,
+			int dep_type,
+			rtx_insn *dep,
+			int cost,
+			unsigned int dw ATTRIBUTE_UNUSED)
+{
+  if (dep_type == REG_DEP_ANTI || dep_type == REG_DEP_OUTPUT)
+    return 0;
+  /* The REG_DEP_TRUE situation.  */
+  else if (recog_memoized (insn) >= 0 && recog_memoized (dep) >= 0)
+    {
+      enum attr_type insn_type = get_attr_type (insn);
+      if (CSKY_TARGET_ARCH (CK803))
+	{
+	  /* The ld or st's base reg depends on the pre insn,
+	     it will delay 1 cycle.  */
+	  if (insn_type == TYPE_LOAD || insn_type == TYPE_STORE)
+	    {
+	      rtx pattern = PATTERN (insn);
+
+	      gcc_assert (GET_CODE (pattern) == SET);
+	      rtx addr = (insn_type == TYPE_LOAD
+			  ? SET_SRC (pattern) : SET_DEST (pattern));
+
+	      enum rtx_code code = GET_CODE (addr);
+	      if (code == ZERO_EXTEND || code == SIGN_EXTEND)
+		addr = XEXP (addr, 0);
+	      gcc_assert (GET_CODE (addr) == MEM);
+
+	      rtx base =  XEXP (addr, 0);
+	      rtx reg = NULL_RTX;
+	      if (REG_P (base))
+		reg = base;
+	      if (GET_CODE (base) == PLUS
+		  && GET_CODE (XEXP (base, 0)) == REG)
+		reg = XEXP (base, 0);
+	      if ((reg != NULL_RTX) && reg_set_p (reg, PATTERN (dep)))
+		return 2;
+	    }
+	}
+      else if (CSKY_TARGET_ARCH (CK802))
+	{
+	  if ((insn_type == TYPE_CALL_JSR || insn_type == TYPE_BRANCH_JMP)
+	      && get_attr_type (dep) != TYPE_LOAD)
+	    return 1;
+
+	  if (insn_type == TYPE_LOAD || insn_type == TYPE_STORE)
+	    {
+	      rtx pattern = PATTERN (insn);
+
+	      gcc_assert (GET_CODE (pattern) == SET);
+
+	      rtx addr = (insn_type == TYPE_LOAD
+			  ? SET_SRC (pattern) : SET_DEST (pattern));
+
+	      enum rtx_code code = GET_CODE (addr);
+	      if (code == ZERO_EXTEND || code == SIGN_EXTEND)
+		addr = XEXP (addr, 0);
+	      gcc_assert (GET_CODE (addr) == MEM);
+
+	      rtx base =  XEXP (addr, 0);
+	      rtx reg = NULL_RTX;
+	      if (REG_P (base))
+		reg = base;
+	      if (GET_CODE (base) == PLUS
+		  && GET_CODE (XEXP (base, 0)) == REG)
+		reg = XEXP (base, 0);
+	      if ((reg != NULL_RTX) && reg_set_p (reg, PATTERN (dep))
+		  && get_attr_type (dep) != TYPE_LOAD)
+		return 1;
+
+	      if (insn_type == TYPE_STORE
+		  && reg_referenced_p (SET_SRC (pattern), PATTERN (dep)))
+		return 1;
+	    }
+	}
+    }
+  return cost;
+}
+
+static bool
+csky_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
+}
+
+
+/* Implement TARGET_RETURN_IN_MEMORY to decide whether TYPE should be
+   returned in memory (true) or in a register (false).
+   FNTYPE is the type of the function making the call.  */
+static bool
+csky_return_in_memory (const_tree type,
+		       const_tree fntype ATTRIBUTE_UNUSED)
+{
+  const HOST_WIDE_INT size = int_size_in_bytes (type);
+  return (size == -1 || size > 2 * UNITS_PER_WORD);
+}
+
+
+/* Implement TARGET_DWARF_REGISTER_SPAN.
+   Dwarf models VFP registers as  64-bit or 128-bit registers default.
+   GCC models tham as 32-bit registers, so we need to describe this to
+   the DWARF generation code.  Other registers can use the default.  */
+static rtx
+csky_dwarf_register_span (rtx rtl)
+{
+  machine_mode mode;
+  unsigned regno;
+  rtx parts[16];
+  int nregs;
+  int i;
+
+  regno = REGNO (rtl);
+  if (!CSKY_VREG_P (regno))
+    return NULL_RTX;
+
+  mode = GET_MODE (rtl);
+  if (GET_MODE_SIZE (mode) < 8)
+    return NULL_RTX;
+
+  if (TARGET_SOFT_FPU)
+    {
+      nregs = GET_MODE_SIZE (mode) / 4;
+      for (i = 0; i < nregs; i += 2)
+      if (TARGET_BIG_ENDIAN)
+	{
+	  parts[i] = gen_rtx_REG (SImode, regno + i + 1);
+	  parts[i + 1] = gen_rtx_REG (SImode, regno + i);
+	}
+      else
+	{
+	  parts[i] = gen_rtx_REG (SImode, regno + i);
+	  parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
+	}
+    }
+  else
+    {
+      /* FIXME: dwarf2 considers all general registers to be the same
+	 as the CPU bit width. Transform the 64-bit FPU registers to
+	 32 bits here, and we will modify the unwind processing to
+	 fit CSKY architecture later.  */
+      nregs = GET_MODE_SIZE (mode) / 8;
+      for (i = 0; i < nregs; i++)
+	parts[i] = gen_rtx_REG (SImode, regno + i);
+    }
+
+  return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
+}
+
+/* Implement TARGET_INIT_LIBFUNCS.  */
+
+static void
+csky_init_libfuncs (void)
+{
+  if (TARGET_CSKY_LINUX)
+    init_sync_libfuncs (UNITS_PER_WORD);
+  if (!TARGET_LIBCCRT)
+    return;
+
+  #define CSKY_GCC_SYM(sym) "__csky_ccrt_" # sym
+
+  /* int */
+
+  /* Arithmetic functions */
+  set_optab_libfunc (ashl_optab,    DImode, CSKY_GCC_SYM (ashldi3));
+  set_optab_libfunc (ashr_optab,    DImode, CSKY_GCC_SYM (ashrdi3));
+  set_optab_libfunc (sdiv_optab,    SImode, CSKY_GCC_SYM (divsi3));
+  set_optab_libfunc (sdiv_optab,    DImode, CSKY_GCC_SYM (divdi3));
+  set_optab_libfunc (lshr_optab,    DImode, CSKY_GCC_SYM (lshrdi3));
+  set_optab_libfunc (smod_optab,    SImode, CSKY_GCC_SYM (modsi3));
+  set_optab_libfunc (smod_optab,    DImode, CSKY_GCC_SYM (moddi3));
+  set_optab_libfunc (smul_optab,    DImode, CSKY_GCC_SYM (muldi3));
+  set_optab_libfunc (neg_optab,	    DImode, CSKY_GCC_SYM (negdi2));
+  set_optab_libfunc (udiv_optab,    SImode, CSKY_GCC_SYM (udivsi3));
+  set_optab_libfunc (udiv_optab,    DImode, CSKY_GCC_SYM (udivdi3));
+  set_optab_libfunc (udivmod_optab, DImode, CSKY_GCC_SYM (udivmoddi4));
+  set_optab_libfunc (umod_optab,    SImode, CSKY_GCC_SYM (umodsi3));
+  set_optab_libfunc (umod_optab,    DImode, CSKY_GCC_SYM (umoddi3));
+
+  /* Comparison functions */
+  set_optab_libfunc (cmp_optab,	    DImode, CSKY_GCC_SYM (cmpdi2));
+  set_optab_libfunc (ucmp_optab,    DImode, CSKY_GCC_SYM (ucmpdi2));
+
+  /* Trapping arithmetic functions */
+  set_optab_libfunc (absv_optab,    SImode, CSKY_GCC_SYM (absvsi2));
+  set_optab_libfunc (absv_optab,    DImode, CSKY_GCC_SYM (absvdi2));
+  set_optab_libfunc (addv_optab,    SImode, CSKY_GCC_SYM (addvsi3));
+  set_optab_libfunc (addv_optab,    DImode, CSKY_GCC_SYM (addvdi3));
+  set_optab_libfunc (smulv_optab,   SImode, CSKY_GCC_SYM (mulvsi3));
+  set_optab_libfunc (smulv_optab,   DImode, CSKY_GCC_SYM (mulvdi3));
+  set_optab_libfunc (negv_optab,    SImode, CSKY_GCC_SYM (negvsi2));
+  set_optab_libfunc (negv_optab,    DImode, CSKY_GCC_SYM (negvdi2));
+  set_optab_libfunc (subv_optab,    SImode, CSKY_GCC_SYM (subvsi3));
+  set_optab_libfunc (subv_optab,    DImode, CSKY_GCC_SYM (subvdi3));
+
+  /* Bit operations */
+  set_optab_libfunc (clz_optab,	    SImode, CSKY_GCC_SYM (clzsi2));
+  set_optab_libfunc (clz_optab,	    DImode, CSKY_GCC_SYM (clzdi2));
+  set_optab_libfunc (ctz_optab,	    SImode, CSKY_GCC_SYM (ctzsi2));
+  set_optab_libfunc (ctz_optab,	    DImode, CSKY_GCC_SYM (ctzdi2));
+  set_optab_libfunc (ffs_optab,	    DImode, CSKY_GCC_SYM (ffsdi2));
+  set_optab_libfunc (parity_optab,  SImode, CSKY_GCC_SYM (paritysi2));
+  set_optab_libfunc (parity_optab,  DImode, CSKY_GCC_SYM (paritydi2));
+  set_optab_libfunc (popcount_optab,SImode, CSKY_GCC_SYM (popcountsi2));
+  set_optab_libfunc (popcount_optab,DImode, CSKY_GCC_SYM (popcountdi2));
+  set_optab_libfunc (bswap_optab,   SImode, CSKY_GCC_SYM (bswapsi2));
+  set_optab_libfunc (bswap_optab,   DImode, CSKY_GCC_SYM (bswapdi2));
+
+  /* float */
+
+  /* Arithmetic functions */
+  set_optab_libfunc (add_optab,	    SFmode, CSKY_GCC_SYM (addsf3));
+  set_optab_libfunc (add_optab,	    DFmode, CSKY_GCC_SYM (adddf3));
+  set_optab_libfunc (sub_optab,	    SFmode, CSKY_GCC_SYM (subsf3));
+  set_optab_libfunc (sub_optab,	    DFmode, CSKY_GCC_SYM (subdf3));
+  set_optab_libfunc (smul_optab,    SFmode, CSKY_GCC_SYM (mulsf3));
+  set_optab_libfunc (smul_optab,    DFmode, CSKY_GCC_SYM (muldf3));
+  set_optab_libfunc (sdiv_optab,    SFmode, CSKY_GCC_SYM (divsf3));
+  set_optab_libfunc (sdiv_optab,    DFmode, CSKY_GCC_SYM (divdf3));
+  set_optab_libfunc (neg_optab,	    SFmode, CSKY_GCC_SYM (negsf2));
+  set_optab_libfunc (neg_optab,	    DFmode, CSKY_GCC_SYM (negdf2));
+
+  /* Conversion functions */
+  set_conv_libfunc (sext_optab,	   DFmode, SFmode, CSKY_GCC_SYM (extendsfdf2));
+  set_conv_libfunc (trunc_optab,   SFmode, DFmode, CSKY_GCC_SYM (truncdfsf2));
+  set_conv_libfunc (sfix_optab,	   SImode, SFmode, CSKY_GCC_SYM (fixsfsi));
+  set_conv_libfunc (sfix_optab,	   SImode, DFmode, CSKY_GCC_SYM (fixdfsi));
+  set_conv_libfunc (sfix_optab,	   DImode, SFmode, CSKY_GCC_SYM (fixsfdi));
+  set_conv_libfunc (sfix_optab,	   DImode, DFmode, CSKY_GCC_SYM (fixdfdi));
+  set_conv_libfunc (ufix_optab,	   SImode, SFmode, CSKY_GCC_SYM (fixunssfsi));
+  set_conv_libfunc (ufix_optab,	   SImode, DFmode, CSKY_GCC_SYM (fixunsdfsi));
+  set_conv_libfunc (ufix_optab,	   DImode, SFmode, CSKY_GCC_SYM (fixunssfdi));
+  set_conv_libfunc (ufix_optab,	   DImode, DFmode, CSKY_GCC_SYM (fixunsdfdi));
+  set_conv_libfunc (sfloat_optab,  SFmode, SImode, CSKY_GCC_SYM (floatsisf));
+  set_conv_libfunc (sfloat_optab,  DFmode, SImode, CSKY_GCC_SYM (floatsidf));
+  set_conv_libfunc (sfloat_optab,  SFmode, DImode, CSKY_GCC_SYM (floatdisf));
+  set_conv_libfunc (sfloat_optab,  DFmode, DImode, CSKY_GCC_SYM (floatdidf));
+  set_conv_libfunc (ufloat_optab,  SFmode, SImode, CSKY_GCC_SYM (floatunsisf));
+  set_conv_libfunc (ufloat_optab,  DFmode, SImode, CSKY_GCC_SYM (floatunsidf));
+  set_conv_libfunc (ufloat_optab,  SFmode, DImode, CSKY_GCC_SYM (floatundisf));
+  set_conv_libfunc (ufloat_optab,  DFmode, DImode, CSKY_GCC_SYM (floatundidf));
+
+  /* Comparison functions */
+  set_optab_libfunc (cmp_optab,	   SFmode, CSKY_GCC_SYM (cmpsf2));
+  set_optab_libfunc (cmp_optab,	   DFmode, CSKY_GCC_SYM (cmpdf2));
+  set_optab_libfunc (unord_optab,  SFmode, CSKY_GCC_SYM (unordsf2));
+  set_optab_libfunc (unord_optab,  DFmode, CSKY_GCC_SYM (unorddf2));
+  set_optab_libfunc (eq_optab,	   SFmode, CSKY_GCC_SYM (eqsf2));
+  set_optab_libfunc (eq_optab,	   DFmode, CSKY_GCC_SYM (eqdf2));
+  set_optab_libfunc (ne_optab,	   SFmode, CSKY_GCC_SYM (nesf2));
+  set_optab_libfunc (ne_optab,	   DFmode, CSKY_GCC_SYM (nedf2));
+  set_optab_libfunc (ge_optab,	   SFmode, CSKY_GCC_SYM (gesf2));
+  set_optab_libfunc (ge_optab,	   DFmode, CSKY_GCC_SYM (gedf2));
+  set_optab_libfunc (lt_optab,	   SFmode, CSKY_GCC_SYM (ltsf2));
+  set_optab_libfunc (lt_optab,	   DFmode, CSKY_GCC_SYM (ltdf2));
+  set_optab_libfunc (le_optab,	   SFmode, CSKY_GCC_SYM (lesf2));
+  set_optab_libfunc (le_optab,	   DFmode, CSKY_GCC_SYM (ledf2));
+  set_optab_libfunc (gt_optab,	   SFmode, CSKY_GCC_SYM (gtsf2));
+  set_optab_libfunc (gt_optab,	   DFmode, CSKY_GCC_SYM (gtdf2));
+}
+
+
+/* Implement TARGET_ADDRESS_COST to estimate cost of the memory address X.
+   For C-SKY, (register) and (register + offset) have the same cost.
+   Other situations cost more.  */
+
+static int
+csky_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
+		   addr_space_t as ATTRIBUTE_UNUSED,
+		   bool speed ATTRIBUTE_UNUSED)
+{
+  enum rtx_code code = GET_CODE (x);
+
+  if (code == REG)
+    return COSTS_N_INSNS (1);
+  if (code == PLUS
+      && REG_P (XEXP (x, 0))
+      && CONST_INT_P (XEXP (x, 1)))
+    return COSTS_N_INSNS (1);
+
+  return COSTS_N_INSNS (3);
+}
+
+
+/* Implement TARGET_FIXED_CONDITION_CODE_REGS.  */
+
+static bool
+csky_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
+{
+  *p1 = CSKY_CC_REGNUM;
+  *p2 = INVALID_REGNUM;
+  return true;
+}
+
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
+#include "gt-csky.h"