diff gcc/config/spu/spu.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents f6334be47118
children 84e7813d76e9
line wrap: on
line diff
--- a/gcc/config/spu/spu.c	Sun Aug 21 07:07:55 2011 +0900
+++ b/gcc/config/spu/spu.c	Fri Oct 27 22:46:09 2017 +0900
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
+/* Copyright (C) 2006-2017 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
@@ -17,42 +17,48 @@
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
-#include "tm.h"
+#include "backend.h"
+#include "target.h"
 #include "rtl.h"
+#include "tree.h"
+#include "gimple.h"
+#include "cfghooks.h"
+#include "cfgloop.h"
+#include "df.h"
+#include "memmodel.h"
+#include "tm_p.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "expmed.h"
+#include "optabs.h"
 #include "regs.h"
-#include "hard-reg-set.h"
-#include "insn-config.h"
-#include "conditions.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "diagnostic-core.h"
 #include "insn-attr.h"
-#include "flags.h"
-#include "recog.h"
-#include "obstack.h"
-#include "tree.h"
+#include "alias.h"
+#include "fold-const.h"
+#include "stor-layout.h"
+#include "calls.h"
+#include "varasm.h"
+#include "explow.h"
 #include "expr.h"
-#include "optabs.h"
-#include "except.h"
-#include "function.h"
 #include "output.h"
-#include "basic-block.h"
-#include "integrate.h"
-#include "diagnostic-core.h"
-#include "ggc.h"
-#include "hashtab.h"
-#include "tm_p.h"
-#include "target.h"
-#include "target-def.h"
+#include "cfgrtl.h"
+#include "cfgbuild.h"
 #include "langhooks.h"
 #include "reload.h"
-#include "cfglayout.h"
 #include "sched-int.h"
 #include "params.h"
-#include "machmode.h"
-#include "gimple.h"
+#include "gimplify.h"
 #include "tm-constrs.h"
 #include "ddg.h"
-#include "sbitmap.h"
-#include "timevar.h"
-#include "df.h"
+#include "dumpfile.h"
+#include "builtins.h"
+#include "rtl-iter.h"
+
+/* This file should be included last.  */
+#include "target-def.h"
 
 /* Builtin types, data and prototypes. */
 
@@ -147,90 +153,11 @@
 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
 
 /*  Prototypes and external defs.  */
-static void spu_option_override (void);
-static void spu_option_init_struct (struct gcc_options *opts);
-static void spu_option_default_params (void);
-static void spu_init_builtins (void);
-static tree spu_builtin_decl (unsigned, bool);
-static bool spu_scalar_mode_supported_p (enum machine_mode mode);
-static bool spu_vector_mode_supported_p (enum machine_mode mode);
-static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
-static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
-						 bool, addr_space_t);
-static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
-static rtx get_pic_reg (void);
-static int need_to_save_reg (int regno, int saving);
-static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
-static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
-static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
-			       rtx scratch);
-static void emit_nop_for_insn (rtx insn);
-static bool insn_clobbers_hbr (rtx insn);
-static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
-				  int distance, sbitmap blocks);
-static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
-	                            enum machine_mode dmode);
-static rtx get_branch_target (rtx branch);
-static void spu_machine_dependent_reorg (void);
-static int spu_sched_issue_rate (void);
-static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
-				     int can_issue_more);
-static int get_pipe (rtx insn);
-static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
-static void spu_sched_init_global (FILE *, int, int);
-static void spu_sched_init (FILE *, int, int);
-static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
-static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
-					 int flags,
-					 bool *no_add_attrs);
-static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
-					 int flags,
-					 bool *no_add_attrs);
+static int get_pipe (rtx_insn *insn);
 static int spu_naked_function_p (tree func);
-static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
-				   const_tree type, bool named);
-static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
-			     const_tree type, bool named);
-static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
-				      const_tree type, bool named);
-static tree spu_build_builtin_va_list (void);
-static void spu_va_start (tree, rtx);
-static tree spu_gimplify_va_arg_expr (tree valist, tree type,
-				      gimple_seq * pre_p, gimple_seq * post_p);
-static int store_with_one_insn_p (rtx mem);
 static int mem_is_padded_component_ref (rtx x);
-static int reg_aligned_for_addr (rtx x);
-static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
-static void spu_asm_globalize_label (FILE * file, const char *name);
-static bool spu_rtx_costs (rtx x, int code, int outer_code,
-			   int *total, bool speed);
-static bool spu_function_ok_for_sibcall (tree decl, tree exp);
-static void spu_init_libfuncs (void);
-static bool spu_return_in_memory (const_tree type, const_tree fntype);
 static void fix_range (const char *);
-static void spu_encode_section_info (tree, rtx, int);
-static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
-static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
-					      addr_space_t);
-static tree spu_builtin_mul_widen_even (tree);
-static tree spu_builtin_mul_widen_odd (tree);
-static tree spu_builtin_mask_for_load (void);
-static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
-static bool spu_vector_alignment_reachable (const_tree, bool);
-static tree spu_builtin_vec_perm (tree, tree *);
-static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
-static enum machine_mode spu_addr_space_address_mode (addr_space_t);
-static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
-static rtx spu_addr_space_convert (rtx, tree, tree);
-static int spu_sms_res_mii (struct ddg *g);
-static void asm_file_start (void);
-static unsigned int spu_section_type_flags (tree, const char *, int);
-static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
-static void spu_unique_section (tree, int);
 static rtx spu_expand_load (rtx, rtx, rtx, int);
-static void spu_trampoline_init (rtx, tree, rtx);
-static void spu_conditional_register_usage (void);
-static bool spu_ref_may_alias_errno (ao_ref *);
 
 /* Which instruction set architecture to use.  */
 int spu_arch;
@@ -245,10 +172,6 @@
    inserted in pairs, so we round down. */
 int spu_hint_dist = (8*4) - (2*4);
 
-/* Determines whether we run variable tracking in machine dependent
-   reorganization.  */
-static int spu_flag_var_tracking;
-
 enum spu_immediate {
   SPU_NONE,
   SPU_IL,
@@ -276,247 +199,33 @@
 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
 static enum immediate_class classify_immediate (rtx op,
-						enum machine_mode mode);
-
-static enum machine_mode spu_unwind_word_mode (void);
-
-static enum machine_mode
-spu_libgcc_cmp_return_mode (void);
-
-static enum machine_mode
-spu_libgcc_shift_count_mode (void);
+						machine_mode mode);
 
 /* Pointer mode for __ea references.  */
 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
 
 
-/*  Table of machine attributes.  */
-static const struct attribute_spec spu_attribute_table[] =
-{
-  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
-  { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute },
-  { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute },
-  { NULL,             0, 0, false, false, false, NULL }
+/* Define the structure for the machine field in struct function.  */
+struct GTY(()) machine_function
+{
+  /* Register to use for PIC accesses.  */
+  rtx pic_reg;
 };
-
-/*  TARGET overrides.  */
-
-#undef TARGET_ADDR_SPACE_POINTER_MODE
-#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
-
-#undef TARGET_ADDR_SPACE_ADDRESS_MODE
-#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
-
-#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
-#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
-  spu_addr_space_legitimate_address_p
-
-#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
-#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
-
-#undef TARGET_ADDR_SPACE_SUBSET_P
-#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
-
-#undef TARGET_ADDR_SPACE_CONVERT
-#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
-
-#undef TARGET_INIT_BUILTINS
-#define TARGET_INIT_BUILTINS spu_init_builtins
-#undef TARGET_BUILTIN_DECL
-#define TARGET_BUILTIN_DECL spu_builtin_decl
-
-#undef TARGET_EXPAND_BUILTIN
-#define TARGET_EXPAND_BUILTIN spu_expand_builtin
-
-#undef TARGET_UNWIND_WORD_MODE
-#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
-
-#undef TARGET_LEGITIMIZE_ADDRESS
-#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
-
-/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
-   and .quad for the debugger.  When it is known that the assembler is fixed,
-   these can be removed.  */
-#undef TARGET_ASM_UNALIGNED_SI_OP
-#define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
-
-#undef TARGET_ASM_ALIGNED_DI_OP
-#define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
-
-/* The .8byte directive doesn't seem to work well for a 32 bit
-   architecture. */
-#undef TARGET_ASM_UNALIGNED_DI_OP
-#define TARGET_ASM_UNALIGNED_DI_OP NULL
-
-#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS spu_rtx_costs
-
-#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST hook_int_rtx_bool_0
-
-#undef TARGET_SCHED_ISSUE_RATE
-#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
-
-#undef TARGET_SCHED_INIT_GLOBAL
-#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
-
-#undef TARGET_SCHED_INIT
-#define TARGET_SCHED_INIT spu_sched_init
-
-#undef TARGET_SCHED_VARIABLE_ISSUE
-#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
-
-#undef TARGET_SCHED_REORDER
-#define TARGET_SCHED_REORDER spu_sched_reorder
-
-#undef TARGET_SCHED_REORDER2
-#define TARGET_SCHED_REORDER2 spu_sched_reorder
-
-#undef TARGET_SCHED_ADJUST_COST
-#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
-
-#undef  TARGET_ATTRIBUTE_TABLE
-#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
-
-#undef TARGET_ASM_INTEGER
-#define TARGET_ASM_INTEGER spu_assemble_integer
-
-#undef TARGET_SCALAR_MODE_SUPPORTED_P
-#define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
-
-#undef TARGET_VECTOR_MODE_SUPPORTED_P
-#define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
-
-#undef TARGET_FUNCTION_OK_FOR_SIBCALL
-#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
-
-#undef TARGET_ASM_GLOBALIZE_LABEL
-#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
-
-#undef TARGET_PASS_BY_REFERENCE
-#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
-
-#undef TARGET_FUNCTION_ARG
-#define TARGET_FUNCTION_ARG spu_function_arg
-
-#undef TARGET_FUNCTION_ARG_ADVANCE
-#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
-
-#undef TARGET_MUST_PASS_IN_STACK
-#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
-
-#undef TARGET_BUILD_BUILTIN_VA_LIST
-#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
-
-#undef TARGET_EXPAND_BUILTIN_VA_START
-#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
-
-#undef TARGET_SETUP_INCOMING_VARARGS
-#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
-
-#undef TARGET_MACHINE_DEPENDENT_REORG
-#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
-
-#undef TARGET_GIMPLIFY_VA_ARG_EXPR
-#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
-
-#undef TARGET_DEFAULT_TARGET_FLAGS
-#define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
-
-#undef TARGET_INIT_LIBFUNCS
-#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
-
-#undef TARGET_RETURN_IN_MEMORY
-#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
-
-#undef  TARGET_ENCODE_SECTION_INFO
-#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
-
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
-
-#undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
-#define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
-
-#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
-#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
-
-#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
-#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
-
-#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
-#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
-
-#undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
-#define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
-
-#undef TARGET_LIBGCC_CMP_RETURN_MODE
-#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
-
-#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
-#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
-
-#undef TARGET_SCHED_SMS_RES_MII
-#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
-
-#undef TARGET_ASM_FILE_START
-#define TARGET_ASM_FILE_START asm_file_start
-
-#undef TARGET_SECTION_TYPE_FLAGS
-#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
-
-#undef TARGET_ASM_SELECT_SECTION
-#define TARGET_ASM_SELECT_SECTION  spu_select_section
-
-#undef TARGET_ASM_UNIQUE_SECTION
-#define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
-
-#undef TARGET_LEGITIMATE_ADDRESS_P
-#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
-
-#undef TARGET_TRAMPOLINE_INIT
-#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
-
-#undef TARGET_OPTION_OVERRIDE
-#define TARGET_OPTION_OVERRIDE spu_option_override
-
-#undef TARGET_OPTION_INIT_STRUCT
-#define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
-
-#undef TARGET_OPTION_DEFAULT_PARAMS
-#define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
-
-#undef TARGET_EXCEPT_UNWIND_INFO
-#define TARGET_EXCEPT_UNWIND_INFO  sjlj_except_unwind_info
-
-#undef TARGET_CONDITIONAL_REGISTER_USAGE
-#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
-
-#undef TARGET_REF_MAY_ALIAS_ERRNO
-#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
-
-struct gcc_target targetm = TARGET_INITIALIZER;
-
-static void
-spu_option_init_struct (struct gcc_options *opts)
-{
-  /* With so many registers this is better on by default. */
-  opts->x_flag_rename_registers = 1;
-}
-
-/* Implement TARGET_OPTION_DEFAULT_PARAMS.  */
-static void
-spu_option_default_params (void)
-{
-  /* Override some of the default param values.  With so many registers
-     larger values are better for these params.  */
-  set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
+
+/* How to allocate a 'struct machine_function'.  */
+static struct machine_function *
+spu_init_machine_status (void)
+{
+  return ggc_cleared_alloc<machine_function> ();
 }
 
 /* Implement TARGET_OPTION_OVERRIDE.  */
 static void
 spu_option_override (void)
 {
+  /* Set up function hooks.  */
+  init_machine_status = spu_init_machine_status;
+
   /* Small loops will be unpeeled at -O3.  For SPU it is more important
      to keep code small by default.  */
   if (!flag_unroll_loops && !flag_peel_loops)
@@ -572,6 +281,14 @@
   REAL_MODE_FORMAT (SFmode) = &spu_single_format;
 }
 
+/* Implement TARGET_HARD_REGNO_NREGS.  */
+
+static unsigned int
+spu_hard_regno_nregs (unsigned int, machine_mode mode)
+{
+  return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
+}
+
 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
    struct attribute_spec.handler.  */
 
@@ -579,17 +296,17 @@
    be manipulated in non-trivial ways.  In particular, this means all
    the arithmetic is supported.  */
 static bool
-spu_scalar_mode_supported_p (enum machine_mode mode)
+spu_scalar_mode_supported_p (scalar_mode mode)
 {
   switch (mode)
     {
-    case QImode:
-    case HImode:
-    case SImode:
-    case SFmode:
-    case DImode:
-    case TImode:
-    case DFmode:
+    case E_QImode:
+    case E_HImode:
+    case E_SImode:
+    case E_SFmode:
+    case E_DImode:
+    case E_TImode:
+    case E_DFmode:
       return true;
 
     default:
@@ -601,16 +318,16 @@
    least some operations are supported; need to check optabs or builtins
    for further details.  */
 static bool
-spu_vector_mode_supported_p (enum machine_mode mode)
+spu_vector_mode_supported_p (machine_mode mode)
 {
   switch (mode)
     {
-    case V16QImode:
-    case V8HImode:
-    case V4SImode:
-    case V2DImode:
-    case V4SFmode:
-    case V2DFmode:
+    case E_V16QImode:
+    case E_V8HImode:
+    case E_V4SImode:
+    case E_V2DImode:
+    case E_V4SFmode:
+    case E_V2DFmode:
       return true;
 
     default:
@@ -624,8 +341,8 @@
 int
 valid_subreg (rtx op)
 {
-  enum machine_mode om = GET_MODE (op);
-  enum machine_mode im = GET_MODE (SUBREG_REG (op));
+  machine_mode om = GET_MODE (op);
+  machine_mode im = GET_MODE (SUBREG_REG (op));
   return om != VOIDmode && im != VOIDmode
     && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
 	|| (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
@@ -637,7 +354,7 @@
 static rtx
 adjust_operand (rtx op, HOST_WIDE_INT * start)
 {
-  enum machine_mode mode;
+  machine_mode mode;
   int op_size;
   /* Strip any paradoxical SUBREG.  */
   if (GET_CODE (op) == SUBREG
@@ -659,7 +376,7 @@
       op_size = 32;
     }
   /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
-  mode = mode_for_size (op_size, MODE_INT, 0);
+  mode = int_mode_for_size (op_size, 0).require ();
   if (mode != GET_MODE (op))
     op = gen_rtx_SUBREG (mode, op, 0);
   return op;
@@ -724,7 +441,7 @@
       gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
       s0 = gen_reg_rtx (TImode);
       if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
-	emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
+	emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
       else
 	emit_move_insn (s0, src);
     }
@@ -741,10 +458,7 @@
     emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
 
   if (128 - width)
-    {
-      tree c = build_int_cst (NULL_TREE, 128 - width);
-      s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
-    }
+    s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
 
   emit_move_insn (dst, s0);
 }
@@ -754,8 +468,8 @@
 {
   HOST_WIDE_INT width = INTVAL (ops[1]);
   HOST_WIDE_INT start = INTVAL (ops[2]);
-  HOST_WIDE_INT maskbits;
-  enum machine_mode dst_mode;
+  unsigned HOST_WIDE_INT maskbits;
+  machine_mode dst_mode;
   rtx dst = ops[0], src = ops[3];
   int dst_size;
   rtx mask;
@@ -772,7 +486,7 @@
 
   if (CONSTANT_P (src))
     {
-      enum machine_mode m =
+      machine_mode m =
 	(width <= 32 ? SImode : width <= 64 ? DImode : TImode);
       src = force_reg (m, convert_to_mode (m, src, 0));
     }
@@ -790,13 +504,13 @@
     {
       switch (dst_mode)
 	{
-	case SImode:
+	case E_SImode:
 	  emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
 	  break;
-	case DImode:
+	case E_DImode:
 	  emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
 	  break;
-	case TImode:
+	case E_TImode:
 	  emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
 	  break;
 	default:
@@ -809,15 +523,15 @@
   switch (dst_size)
     {
     case 32:
-      maskbits = (-1ll << (32 - width - start));
+      maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
       if (start)
-	maskbits += (1ll << (32 - start));
+	maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
       emit_move_insn (mask, GEN_INT (maskbits));
       break;
     case 64:
-      maskbits = (-1ll << (64 - width - start));
+      maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
       if (start)
-	maskbits += (1ll << (64 - start));
+	maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
       emit_move_insn (mask, GEN_INT (maskbits));
       break;
     case 128:
@@ -861,7 +575,7 @@
 	  rtx mask1 = gen_reg_rtx (TImode);
 	  rtx dst1 = gen_reg_rtx (TImode);
 	  rtx mem1;
-	  addr1 = plus_constant (addr, 16);
+	  addr1 = plus_constant (Pmode, addr, 16);
 	  addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
 	  emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
 	  emit_insn (gen_shlqby_ti (mask1, mask, shl));
@@ -959,8 +673,8 @@
   int reverse_test = 0;
   rtx compare_result, eq_result;
   rtx comp_rtx, eq_rtx;
-  enum machine_mode comp_mode;
-  enum machine_mode op_mode;
+  machine_mode comp_mode;
+  machine_mode op_mode;
   enum spu_comp_code scode, eq_code;
   enum insn_code ior_code;
   enum rtx_code code = GET_CODE (cmp);
@@ -998,6 +712,27 @@
 	  }
     }
 
+  /* However, if we generate an integer result, performing a reverse test
+     would require an extra negation, so avoid that where possible.  */
+  if (GET_CODE (op1) == CONST_INT && is_set == 1)
+    {
+      HOST_WIDE_INT val = INTVAL (op1) + 1;
+      if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
+	switch (code)
+	  {
+	  case LE:
+	    op1 = GEN_INT (val);
+	    code = LT;
+	    break;
+	  case LEU:
+	    op1 = GEN_INT (val);
+	    code = LTU;
+	    break;
+	  default:
+	    break;
+	  }
+    }
+
   comp_mode = SImode;
   op_mode = GET_MODE (op0);
 
@@ -1075,50 +810,50 @@
 
   switch (op_mode)
     {
-    case QImode:
+    case E_QImode:
       index = 0;
       comp_mode = QImode;
       break;
-    case HImode:
+    case E_HImode:
       index = 1;
       comp_mode = HImode;
       break;
-    case SImode:
+    case E_SImode:
       index = 2;
       break;
-    case DImode:
+    case E_DImode:
       index = 3;
       break;
-    case TImode:
+    case E_TImode:
       index = 4;
       break;
-    case SFmode:
+    case E_SFmode:
       index = 5;
       break;
-    case DFmode:
+    case E_DFmode:
       index = 6;
       break;
-    case V16QImode:
+    case E_V16QImode:
       index = 7;
       comp_mode = op_mode;
       break;
-    case V8HImode:
+    case E_V8HImode:
       index = 8;
       comp_mode = op_mode;
       break;
-    case V4SImode:
+    case E_V4SImode:
       index = 9;
       comp_mode = op_mode;
       break;
-    case V4SFmode:
+    case E_V4SFmode:
       index = 10;
       comp_mode = V4SImode;
       break;
-    case V2DFmode:
+    case E_V2DFmode:
       index = 11;
       comp_mode = V2DImode;
       break;
-    case V2DImode:
+    case E_V2DImode:
     default:
       abort ();
     }
@@ -1129,7 +864,8 @@
 
   if (is_set == 0 && op1 == const0_rtx
       && (GET_MODE (op0) == SImode
-	  || GET_MODE (op0) == HImode) && scode == SPU_EQ)
+	  || GET_MODE (op0) == HImode
+	  || GET_MODE (op0) == QImode) && scode == SPU_EQ)
     {
       /* Don't need to set a register with the result when we are 
          comparing against zero and branching. */
@@ -1198,7 +934,7 @@
 	bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
 
       loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
-      emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+      emit_jump_insn (gen_rtx_SET (pc_rtx,
 				   gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
 							 loc_ref, pc_rtx)));
     }
@@ -1207,7 +943,7 @@
       rtx target = operands[0];
       int compare_size = GET_MODE_BITSIZE (comp_mode);
       int target_size = GET_MODE_BITSIZE (GET_MODE (target));
-      enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
+      machine_mode mode = int_mode_for_size (target_size, 0).require ();
       rtx select_mask;
       rtx op_t = operands[2];
       rtx op_f = operands[3];
@@ -1241,7 +977,7 @@
     {
       rtx target = operands[0];
       if (reverse_test)
-	emit_insn (gen_rtx_SET (VOIDmode, compare_result,
+	emit_insn (gen_rtx_SET (compare_result,
 				gen_rtx_NOT (comp_mode, compare_result)));
       if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
 	emit_insn (gen_extendhisi2 (target, compare_result));
@@ -1257,17 +993,12 @@
 const_double_to_hwint (rtx x)
 {
   HOST_WIDE_INT val;
-  REAL_VALUE_TYPE rv;
   if (GET_MODE (x) == SFmode)
-    {
-      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
-      REAL_VALUE_TO_TARGET_SINGLE (rv, val);
-    }
+    REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
   else if (GET_MODE (x) == DFmode)
     {
       long l[2];
-      REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
-      REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
+      REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
       val = l[0];
       val = (val << 32) | (l[1] & 0xffffffff);
     }
@@ -1277,7 +1008,7 @@
 }
 
 rtx
-hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
+hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
 {
   long tv[2];
   REAL_VALUE_TYPE rv;
@@ -1291,7 +1022,7 @@
       tv[0] = v >> 32;
     }
   real_from_target (&rv, tv, mode);
-  return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
+  return const_double_from_real_value (rv, mode);
 }
 
 void
@@ -1344,7 +1075,7 @@
 void
 print_operand (FILE * file, rtx x, int code)
 {
-  enum machine_mode mode = GET_MODE (x);
+  machine_mode mode = GET_MODE (x);
   HOST_WIDE_INT val;
   unsigned char arr[16];
   int xcode = GET_CODE (x);
@@ -1618,7 +1349,7 @@
 	    /* Used in indirect function calls. */
 	    fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
 	  else
-	    output_address (XEXP (x, 0));
+	    output_address (GET_MODE (x), XEXP (x, 0));
 	}
       return;
 
@@ -1711,7 +1442,7 @@
       if (xcode == REG)
 	fprintf (file, "%s", reg_names[REGNO (x)]);
       else if (xcode == MEM)
-	output_address (XEXP (x, 0));
+	output_address (GET_MODE (x), XEXP (x, 0));
       else if (xcode == CONST_VECTOR)
 	print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
       else
@@ -1735,12 +1466,22 @@
 static rtx
 get_pic_reg (void)
 {
-  rtx pic_reg = pic_offset_table_rtx;
   if (!reload_completed && !reload_in_progress)
     abort ();
-  if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
-    pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
-  return pic_reg;
+
+  /* If we've already made the decision, we need to keep with it.  Once we've
+     decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
+     return true since the register is now live; this should not cause us to
+     "switch back" to using pic_offset_table_rtx.  */
+  if (!cfun->machine->pic_reg)
+    {
+      if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
+	cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
+      else
+	cfun->machine->pic_reg = pic_offset_table_rtx;
+    }
+
+  return cfun->machine->pic_reg;
 }
 
 /* Split constant addresses to handle cases that are too large. 
@@ -1749,7 +1490,7 @@
 int
 spu_split_immediate (rtx * ops)
 {
-  enum machine_mode mode = GET_MODE (ops[0]);
+  machine_mode mode = GET_MODE (ops[0]);
   enum immediate_class c = classify_immediate (ops[1], mode);
 
   switch (c)
@@ -1760,10 +1501,9 @@
 	unsigned char arrlo[16];
 	rtx to, temp, hi, lo;
 	int i;
-	enum machine_mode imode = mode;
 	/* We need to do reals as ints because the constant used in the
 	   IOR might not be a legitimate real constant. */
-	imode = int_mode_for_mode (mode);
+	scalar_int_mode imode = int_mode_for_mode (mode).require ();
 	constant_to_array (mode, ops[1], arrhi);
 	if (imode != mode)
 	  to = simplify_gen_subreg (imode, ops[0], mode, 0);
@@ -1780,8 +1520,7 @@
 	hi = array_to_constant (imode, arrhi);
 	lo = array_to_constant (imode, arrlo);
 	emit_move_insn (temp, hi);
-	emit_insn (gen_rtx_SET
-		   (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
+	emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
 	return 1;
       }
     case IC_FSMBI2:
@@ -1790,10 +1529,9 @@
 	unsigned char arr_andbi[16];
 	rtx to, reg_fsmbi, reg_and;
 	int i;
-	enum machine_mode imode = mode;
 	/* We need to do reals as ints because the constant used in the
 	 * AND might not be a legitimate real constant. */
-	imode = int_mode_for_mode (mode);
+	scalar_int_mode imode = int_mode_for_mode (mode).require ();
 	constant_to_array (mode, ops[1], arr_fsmbi);
 	if (imode != mode)
 	  to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
@@ -1810,8 +1548,7 @@
 	reg_fsmbi = array_to_constant (imode, arr_fsmbi);
 	reg_and = array_to_constant (imode, arr_andbi);
 	emit_move_insn (to, reg_fsmbi);
-	emit_insn (gen_rtx_SET
-		   (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
+	emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
 	return 1;
       }
     case IC_POOL:
@@ -1843,7 +1580,6 @@
 	    {
 	      rtx pic_reg = get_pic_reg ();
 	      emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
-	      crtl->uses_pic_offset_table = 1;
 	    }
 	  return flag_pic || c == IC_IL2s;
 	}
@@ -1869,9 +1605,7 @@
     return 1;
   if (flag_pic
       && regno == PIC_OFFSET_TABLE_REGNUM
-      && (!saving || crtl->uses_pic_offset_table)
-      && (!saving
-	  || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
+      && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
     return 1;
   return 0;
 }
@@ -1890,7 +1624,7 @@
   return reg_save_size;
 }
 
-static rtx
+static rtx_insn *
 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
 {
   rtx reg = gen_rtx_REG (V4SImode, regno);
@@ -1899,7 +1633,7 @@
   return emit_insn (gen_movv4si (mem, reg));
 }
 
-static rtx
+static rtx_insn *
 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
 {
   rtx reg = gen_rtx_REG (V4SImode, regno);
@@ -1909,10 +1643,10 @@
 }
 
 /* This happens after reload, so we need to expand it.  */
-static rtx
+static rtx_insn *
 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
 {
-  rtx insn;
+  rtx_insn *insn;
   if (satisfies_constraint_K (GEN_INT (imm)))
     {
       insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
@@ -1939,7 +1673,7 @@
 	      + get_frame_size ()
 	      + crtl->outgoing_args_size
 	      + crtl->args.pretend_args_size == 0)
-	  && current_function_is_leaf)
+	  && crtl->is_leaf)
 	return 1;
     }
   return 0;
@@ -1983,10 +1717,11 @@
   HOST_WIDE_INT saved_regs_size;
   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
   rtx scratch_reg_0, scratch_reg_1;
-  rtx insn, real;
-
-  if (flag_pic && optimize == 0)
-    crtl->uses_pic_offset_table = 1;
+  rtx_insn *insn;
+  rtx real;
+
+  if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
+    cfun->machine->pic_reg = pic_offset_table_rtx;
 
   if (spu_naked_function_p (current_function_decl))
     return;
@@ -1999,13 +1734,13 @@
     + crtl->outgoing_args_size
     + crtl->args.pretend_args_size;
 
-  if (!current_function_is_leaf
+  if (!crtl->is_leaf
       || cfun->calls_alloca || total_size > 0)
     total_size += STACK_POINTER_OFFSET;
 
   /* Save this first because code after this might use the link
      register as a scratch register. */
-  if (!current_function_is_leaf)
+  if (!crtl->is_leaf)
     {
       insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
       RTX_FRAME_RELATED_P (insn) = 1;
@@ -2023,16 +1758,16 @@
 	  }
     }
 
-  if (flag_pic && crtl->uses_pic_offset_table)
-    {
-      rtx pic_reg = get_pic_reg ();
+  if (flag_pic && cfun->machine->pic_reg)
+    {
+      rtx pic_reg = cfun->machine->pic_reg;
       insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
       insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
     }
 
   if (total_size > 0)
     {
-      if (flag_stack_check)
+      if (flag_stack_check || flag_stack_clash_protection)
 	{
 	  /* We compare against total_size-1 because
 	     ($sp >= total_size) <=> ($sp > total_size-1) */
@@ -2045,7 +1780,7 @@
 	      size_v4si = scratch_v4si;
 	    }
 	  emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
-	  emit_insn (gen_vec_extractv4si
+	  emit_insn (gen_vec_extractv4sisi
 		     (scratch_reg_0, scratch_v4si, GEN_INT (1)));
 	  emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
 	}
@@ -2090,7 +1825,7 @@
 	}
     }
 
-  if (flag_stack_usage)
+  if (flag_stack_usage_info)
     current_function_static_stack_size = total_size;
 }
 
@@ -2100,7 +1835,7 @@
   int size = get_frame_size (), offset, regno;
   HOST_WIDE_INT saved_regs_size, total_size;
   rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
-  rtx jump, scratch_reg_0;
+  rtx scratch_reg_0;
 
   if (spu_naked_function_p (current_function_decl))
     return;
@@ -2112,7 +1847,7 @@
     + crtl->outgoing_args_size
     + crtl->args.pretend_args_size;
 
-  if (!current_function_is_leaf
+  if (!crtl->is_leaf
       || cfun->calls_alloca || total_size > 0)
     total_size += STACK_POINTER_OFFSET;
 
@@ -2136,16 +1871,14 @@
 	}
     }
 
-  if (!current_function_is_leaf)
+  if (!crtl->is_leaf)
     frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
 
   if (!sibcall_p)
     {
       emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
-      jump = emit_jump_insn (gen__return ());
-      emit_barrier_after (jump);
-    }
-
+      emit_jump_insn (gen__return ());
+    }
 }
 
 rtx
@@ -2167,7 +1900,7 @@
    If MODE is a vector mode, every element will be VAL.
    For TImode, VAL will be zero extended to 128 bits. */
 rtx
-spu_const (enum machine_mode mode, HOST_WIDE_INT val)
+spu_const (machine_mode mode, HOST_WIDE_INT val)
 {
   rtx inner;
   rtvec v;
@@ -2202,7 +1935,7 @@
 
 /* Create a MODE vector constant from 4 ints. */
 rtx
-spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
+spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
 {
   unsigned char arr[16];
   arr[0] = (a >> 24) & 0xff;
@@ -2229,13 +1962,13 @@
 /* An array of these is used to propagate hints to predecessor blocks. */
 struct spu_bb_info
 {
-  rtx prop_jump; /* propagated from another block */
+  rtx_insn *prop_jump; /* propagated from another block */
   int bb_index;  /* the original block. */
 };
 static struct spu_bb_info *spu_bb_info;
 
 #define STOP_HINT_P(INSN) \
-		(GET_CODE(INSN) == CALL_INSN \
+		(CALL_P(INSN) \
 		 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
 		 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
 
@@ -2251,13 +1984,22 @@
 /* Emit a nop for INSN such that the two will dual issue.  This assumes
    INSN is 8-byte aligned.  When INSN is inline asm we emit an lnop.
    We check for TImode to handle a MULTI1 insn which has dual issued its
-   first instruction.  get_pipe returns -1 for MULTI0, inline asm, or
-   ADDR_VEC insns. */
+   first instruction.  get_pipe returns -1 for MULTI0 or inline asm.  */
 static void
-emit_nop_for_insn (rtx insn)
+emit_nop_for_insn (rtx_insn *insn)
 {
   int p;
-  rtx new_insn;
+  rtx_insn *new_insn;
+
+  /* We need to handle JUMP_TABLE_DATA separately.  */
+  if (JUMP_TABLE_DATA_P (insn))
+    {
+      new_insn = emit_insn_after (gen_lnop(), insn);
+      recog_memoized (new_insn);
+      INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
+      return;
+    }
+
   p = get_pipe (insn);
   if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
     new_insn = emit_insn_after (gen_lnop (), insn);
@@ -2270,7 +2012,7 @@
   else
     new_insn = emit_insn_after (gen_lnop (), insn);
   recog_memoized (new_insn);
-  INSN_LOCATOR (new_insn) = INSN_LOCATOR (insn);
+  INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
 }
 
 /* Insert nops in basic blocks to meet dual issue alignment
@@ -2279,7 +2021,7 @@
 static void
 pad_bb(void)
 {
-  rtx insn, next_insn, prev_insn, hbr_insn = 0;
+  rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
   int length;
   int addr;
 
@@ -2309,13 +2051,13 @@
 		  prev_insn = emit_insn_before (gen_lnop (), insn);
 		  PUT_MODE (prev_insn, GET_MODE (insn));
 		  PUT_MODE (insn, TImode);
-		  INSN_LOCATOR (prev_insn) = INSN_LOCATOR (insn);
+		  INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
 		  length += 4;
 		}
 	    }
 	  hbr_insn = insn;
 	}
-      if (INSN_CODE (insn) == CODE_FOR_blockage)
+      if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
 	{
 	  if (GET_MODE (insn) == TImode)
 	    PUT_MODE (next_insn, TImode);
@@ -2349,13 +2091,12 @@
 /* Routines for branch hints. */
 
 static void
-spu_emit_branch_hint (rtx before, rtx branch, rtx target,
+spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
 		      int distance, sbitmap blocks)
 {
-  rtx branch_label = 0;
-  rtx hint;
-  rtx insn;
-  rtx table;
+  rtx_insn *hint;
+  rtx_insn *insn;
+  rtx_jump_table_data *table;
 
   if (before == 0 || branch == 0 || target == 0)
     return;
@@ -2369,16 +2110,16 @@
   if (NOTE_INSN_BASIC_BLOCK_P (before))
     before = NEXT_INSN (before);
 
-  branch_label = gen_label_rtx ();
+  rtx_code_label *branch_label = gen_label_rtx ();
   LABEL_NUSES (branch_label)++;
   LABEL_PRESERVE_P (branch_label) = 1;
   insn = emit_label_before (branch_label, branch);
-  branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
-  SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
-
-  hint = emit_insn_before (gen_hbr (branch_label, target), before);
+  rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
+  bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
+
+  hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
   recog_memoized (hint);
-  INSN_LOCATOR (hint) = INSN_LOCATOR (branch);
+  INSN_LOCATION (hint) = INSN_LOCATION (branch);
   HINTED_P (branch) = 1;
 
   if (GET_CODE (target) == LABEL_REF)
@@ -2401,7 +2142,7 @@
          which could make it too far for the branch offest to fit */
       insn = emit_insn_before (gen_blockage (), hint);
       recog_memoized (insn);
-      INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
+      INSN_LOCATION (insn) = INSN_LOCATION (hint);
     }
   else if (distance <= 8 * 4)
     {
@@ -2413,20 +2154,20 @@
 	  insn =
 	    emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
 	  recog_memoized (insn);
-	  INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
+	  INSN_LOCATION (insn) = INSN_LOCATION (hint);
 	}
 
       /* Make sure any nops inserted aren't scheduled before the hint. */
       insn = emit_insn_after (gen_blockage (), hint);
       recog_memoized (insn);
-      INSN_LOCATOR (insn) = INSN_LOCATOR (hint);
+      INSN_LOCATION (insn) = INSN_LOCATION (hint);
 
       /* Make sure any nops inserted aren't scheduled after the call. */
       if (CALL_P (branch) && distance < 8 * 4)
 	{
 	  insn = emit_insn_before (gen_blockage (), branch);
 	  recog_memoized (insn);
-	  INSN_LOCATOR (insn) = INSN_LOCATOR (branch);
+	  INSN_LOCATION (insn) = INSN_LOCATION (branch);
 	}
     }
 }
@@ -2434,9 +2175,9 @@
 /* Returns 0 if we don't want a hint for this branch.  Otherwise return
    the rtx for the branch target. */
 static rtx
-get_branch_target (rtx branch)
-{
-  if (GET_CODE (branch) == JUMP_INSN)
+get_branch_target (rtx_insn *branch)
+{
+  if (JUMP_P (branch))
     {
       rtx set, src;
 
@@ -2444,11 +2185,6 @@
       if (GET_CODE (PATTERN (branch)) == RETURN)
 	return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
 
-      /* jump table */
-      if (GET_CODE (PATTERN (branch)) == ADDR_VEC
-	  || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
-	return 0;
-
      /* ASM GOTOs. */
      if (extract_asm_operands (PATTERN (branch)) != NULL)
 	return NULL;
@@ -2466,7 +2202,8 @@
 	    {
 	      /* If the more probable case is not a fall through, then
 	         try a branch hint.  */
-	      HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
+	      int prob = profile_probability::from_reg_br_prob_note
+			    (XINT (note, 0)).to_reg_br_prob_base ();
 	      if (prob > (REG_BR_PROB_BASE * 6 / 10)
 		  && GET_CODE (XEXP (src, 1)) != PC)
 		lab = XEXP (src, 1);
@@ -2485,7 +2222,7 @@
 
       return src;
     }
-  else if (GET_CODE (branch) == CALL_INSN)
+  else if (CALL_P (branch))
     {
       rtx call;
       /* All of our call patterns are in a PARALLEL and the CALL is
@@ -2507,7 +2244,7 @@
    should only be used in a clobber, and this function searches for
    insns which clobber it.  */
 static bool
-insn_clobbers_hbr (rtx insn)
+insn_clobbers_hbr (rtx_insn *insn)
 {
   if (INSN_P (insn)
       && GET_CODE (PATTERN (insn)) == PARALLEL)
@@ -2538,9 +2275,9 @@
    and an hbrp within 16 instructions of FIRST.
  */
 static void
-insert_hbrp_for_ilb_runout (rtx first)
-{
-  rtx insn, before_4 = 0, before_16 = 0;
+insert_hbrp_for_ilb_runout (rtx_insn *first)
+{
+  rtx_insn *insn, *before_4 = 0, *before_16 = 0;
   int addr = 0, length, first_addr = -1;
   int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
   int insert_lnop_after = 0;
@@ -2612,7 +2349,7 @@
 		insn =
 		  emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
 		recog_memoized (insn);
-		INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
+		INSN_LOCATION (insn) = INSN_LOCATION (before_4);
 		INSN_ADDRESSES_NEW (insn,
 				    INSN_ADDRESSES (INSN_UID (before_4)));
 		PUT_MODE (insn, GET_MODE (before_4));
@@ -2621,7 +2358,7 @@
 		  {
 		    insn = emit_insn_before (gen_lnop (), before_4);
 		    recog_memoized (insn);
-		    INSN_LOCATOR (insn) = INSN_LOCATOR (before_4);
+		    INSN_LOCATION (insn) = INSN_LOCATION (before_4);
 		    INSN_ADDRESSES_NEW (insn,
 					INSN_ADDRESSES (INSN_UID (before_4)));
 		    PUT_MODE (insn, TImode);
@@ -2633,7 +2370,7 @@
 		insn =
 		  emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
 		recog_memoized (insn);
-		INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
+		INSN_LOCATION (insn) = INSN_LOCATION (before_16);
 		INSN_ADDRESSES_NEW (insn,
 				    INSN_ADDRESSES (INSN_UID (before_16)));
 		PUT_MODE (insn, GET_MODE (before_16));
@@ -2642,7 +2379,7 @@
 		  {
 		    insn = emit_insn_before (gen_lnop (), before_16);
 		    recog_memoized (insn);
-		    INSN_LOCATOR (insn) = INSN_LOCATOR (before_16);
+		    INSN_LOCATION (insn) = INSN_LOCATION (before_16);
 		    INSN_ADDRESSES_NEW (insn,
 					INSN_ADDRESSES (INSN_UID
 							(before_16)));
@@ -2659,17 +2396,18 @@
 
 /* The SPU might hang when it executes 48 inline instructions after a
    hinted branch jumps to its hinted target.  The beginning of a
-   function and the return from a call might have been hinted, and must
-   be handled as well.  To prevent a hang we insert 2 hbrps.  The first
-   should be within 6 insns of the branch target.  The second should be
-   within 22 insns of the branch target.  When determining if hbrps are
-   necessary, we look for only 32 inline instructions, because up to to
-   12 nops and 4 hbrps could be inserted.  Similarily, when inserting
-   new hbrps, we insert them within 4 and 16 insns of the target.  */
+   function and the return from a call might have been hinted, and
+   must be handled as well.  To prevent a hang we insert 2 hbrps.  The
+   first should be within 6 insns of the branch target.  The second
+   should be within 22 insns of the branch target.  When determining
+   if hbrps are necessary, we look for only 32 inline instructions,
+   because up to 12 nops and 4 hbrps could be inserted.  Similarily,
+   when inserting new hbrps, we insert them within 4 and 16 insns of
+   the target.  */
 static void
 insert_hbrp (void)
 {
-  rtx insn;
+  rtx_insn *insn;
   if (TARGET_SAFE_HINTS)
     {
       shorten_branches (get_insns ());
@@ -2686,6 +2424,19 @@
 
 static int in_spu_reorg;
 
+static void
+spu_var_tracking (void)
+{
+  if (flag_var_tracking)
+    {
+      df_analyze ();
+      timevar_push (TV_VAR_TRACKING);
+      variable_tracking_main ();
+      timevar_pop (TV_VAR_TRACKING);
+      df_finish_pass (false);
+    }
+}
+
 /* Insert branch hints.  There are no branch optimizations after this
    pass, so it's safe to set our branch hints now. */
 static void
@@ -2693,7 +2444,7 @@
 {
   sbitmap blocks;
   basic_block bb;
-  rtx branch, insn;
+  rtx_insn *branch, *insn;
   rtx branch_target = 0;
   int branch_addr = 0, insn_addr, required_dist = 0;
   int i;
@@ -2703,29 +2454,36 @@
     {
       /* We still do it for unoptimized code because an external
          function might have hinted a call or return. */
+      compute_bb_for_insn ();
       insert_hbrp ();
       pad_bb ();
+      spu_var_tracking ();
+      free_bb_for_insn ();
       return;
     }
 
-  blocks = sbitmap_alloc (last_basic_block);
-  sbitmap_zero (blocks);
+  blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
+  bitmap_clear (blocks);
 
   in_spu_reorg = 1;
   compute_bb_for_insn ();
 
+  /* (Re-)discover loops so that bb->loop_father can be used
+     in the analysis below.  */
+  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
   compact_blocks ();
 
   spu_bb_info =
-    (struct spu_bb_info *) xcalloc (n_basic_blocks,
+    (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
 				    sizeof (struct spu_bb_info));
 
   /* We need exact insn addresses and lengths.  */
   shorten_branches (get_insns ());
 
-  for (i = n_basic_blocks - 1; i >= 0; i--)
-    {
-      bb = BASIC_BLOCK (i);
+  for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
+    {
+      bb = BASIC_BLOCK_FOR_FN (cfun, i);
       branch = 0;
       if (spu_bb_info[i].prop_jump)
 	{
@@ -2758,7 +2516,7 @@
 		      || insn_clobbers_hbr (insn)
 		      || branch_addr - insn_addr > 600))
 		{
-		  rtx next = NEXT_INSN (insn);
+		  rtx_insn *next = NEXT_INSN (insn);
 		  int next_addr = INSN_ADDRESSES (INSN_UID (next));
 		  if (insn != BB_END (bb)
 		      && branch_addr - next_addr >= required_dist)
@@ -2797,7 +2555,7 @@
 	  /* If we haven't emitted a hint for this branch yet, it might
 	     be profitable to emit it in one of the predecessor blocks,
 	     especially for loops.  */
-	  rtx bbend;
+	  rtx_insn *bbend;
 	  basic_block prev = 0, prop = 0, prev2 = 0;
 	  int loop_exit = 0, simple_loop = 0;
 	  int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
@@ -2818,23 +2576,22 @@
 	     fallthru block. This catches the cases when it is a simple
 	     loop or when there is an initial branch into the loop. */
 	  if (prev && (loop_exit || simple_loop)
-	      && prev->loop_depth <= bb->loop_depth)
+	      && bb_loop_depth (prev) <= bb_loop_depth (bb))
 	    prop = prev;
 
 	  /* If there is only one adjacent predecessor.  Don't propagate
-	     outside this loop.  This loop_depth test isn't perfect, but
-	     I'm not sure the loop_father member is valid at this point.  */
+	     outside this loop.  */
 	  else if (prev && single_pred_p (bb)
-		   && prev->loop_depth == bb->loop_depth)
+		   && prev->loop_father == bb->loop_father)
 	    prop = prev;
 
 	  /* If this is the JOIN block of a simple IF-THEN then
-	     propogate the hint to the HEADER block. */
+	     propagate the hint to the HEADER block. */
 	  else if (prev && prev2
 		   && EDGE_COUNT (bb->preds) == 2
 		   && EDGE_COUNT (prev->preds) == 1
 		   && EDGE_PRED (prev, 0)->src == prev2
-		   && prev2->loop_depth == bb->loop_depth
+		   && prev2->loop_father == bb->loop_father
 		   && GET_CODE (branch_target) != REG)
 	    prop = prev;
 
@@ -2856,7 +2613,7 @@
 	      if (dump_file)
 		fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
 			 "for %i (loop_exit %i simple_loop %i dist %i)\n",
-			 bb->index, prop->index, bb->loop_depth,
+			 bb->index, prop->index, bb_loop_depth (bb),
 			 INSN_UID (branch), loop_exit, simple_loop,
 			 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
 
@@ -2877,11 +2634,11 @@
     }
   free (spu_bb_info);
 
-  if (!sbitmap_empty_p (blocks))
+  if (!bitmap_empty_p (blocks))
     find_many_sub_basic_blocks (blocks);
 
   /* We have to schedule to make sure alignment is ok. */
-  FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
+  FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
 
   /* The hints need to be scheduled, so call it again. */
   schedule_insns ();
@@ -2899,8 +2656,8 @@
 	   label because GCC expects it at the beginning of the block. */
 	rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
 	rtx label_ref = XVECEXP (unspec, 0, 0);
-	rtx label = XEXP (label_ref, 0);
-	rtx branch;
+	rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
+	rtx_insn *branch;
 	int offset = 0;
 	for (branch = NEXT_INSN (label);
 	     !JUMP_P (branch) && !CALL_P (branch);
@@ -2908,17 +2665,12 @@
 	  if (NONJUMP_INSN_P (branch))
 	    offset += get_attr_length (branch);
 	if (offset > 0)
-	  XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
+	  XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
       }
 
-  if (spu_flag_var_tracking)
-    {
-      df_analyze ();
-      timevar_push (TV_VAR_TRACKING);
-      variable_tracking_main ();
-      timevar_pop (TV_VAR_TRACKING);
-      df_finish_pass (false);
-    }
+  spu_var_tracking ();
+
+  loop_optimizer_finalize ();
 
   free_bb_for_insn ();
 
@@ -2934,7 +2686,7 @@
 }
 
 static int
-uses_ls_unit(rtx insn)
+uses_ls_unit(rtx_insn *insn)
 {
   rtx set = single_set (insn);
   if (set != 0
@@ -2945,7 +2697,7 @@
 }
 
 static int
-get_pipe (rtx insn)
+get_pipe (rtx_insn *insn)
 {
   enum attr_type t;
   /* Handle inline asm */
@@ -3041,7 +2793,8 @@
 
 static int
 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
-			  int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
+			  int verbose ATTRIBUTE_UNUSED,
+			  rtx_insn *insn, int more)
 {
   int len;
   int p;
@@ -3090,7 +2843,7 @@
 	prev_priority = INSN_PRIORITY (insn);
     }
 
-  /* Always try issueing more insns.  spu_sched_reorder will decide 
+  /* Always try issuing more insns.  spu_sched_reorder will decide 
      when the cycle should be advanced. */
   return 1;
 }
@@ -3099,11 +2852,11 @@
    TARGET_SCHED_REORDER2.  */
 static int
 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
-		   rtx *ready, int *nreadyp, int clock)
+		   rtx_insn **ready, int *nreadyp, int clock)
 {
   int i, nready = *nreadyp;
   int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
-  rtx insn;
+  rtx_insn *insn;
 
   clock_var = clock;
 
@@ -3148,6 +2901,7 @@
 	  case TYPE_LOAD:
 	  case TYPE_STORE:
 	    pipe_ls = i;
+	    /* FALLTHRU */
 	  case TYPE_LNOP:
 	  case TYPE_SHUF:
 	  case TYPE_BR:
@@ -3197,7 +2951,7 @@
      used to effect it. */
   if (in_spu_reorg && spu_dual_nops < 10)
     {
-      /* When we are at an even address and we are not issueing nops to
+      /* When we are at an even address and we are not issuing nops to
          improve scheduling then we need to advance the cycle.  */
       if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
 	  && (spu_dual_nops == 0
@@ -3238,7 +2992,8 @@
 
 /* INSN is dependent on DEP_INSN. */
 static int
-spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
+spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
+		       int cost, unsigned int)
 {
   rtx set;
 
@@ -3299,19 +3054,19 @@
      scheduler makes every insn in a block anti-dependent on the final
      jump_insn.  We adjust here so higher cost insns will get scheduled
      earlier. */
-  if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
-    return insn_cost (dep_insn) - 3;
+  if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
+    return insn_sched_cost (dep_insn) - 3;
 
   return cost;
 }
 
 /* Create a CONST_DOUBLE from a string.  */
-struct rtx_def *
-spu_float_const (const char *string, enum machine_mode mode)
+rtx
+spu_float_const (const char *string, machine_mode mode)
 {
   REAL_VALUE_TYPE value;
   value = REAL_VALUE_ATOF (string, mode);
-  return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
+  return const_double_from_real_value (value, mode);
 }
 
 int
@@ -3342,7 +3097,7 @@
 /* Return true when OP can be loaded by one of the il instructions, or
    when flow2 is not completed and OP can be loaded using ilhu and iohl. */
 int
-immediate_load_p (rtx op, enum machine_mode mode)
+immediate_load_p (rtx op, machine_mode mode)
 {
   if (CONSTANT_P (op))
     {
@@ -3405,7 +3160,7 @@
 /* OP is a CONSTANT_P.  Determine what instructions can be used to load
    it into a register.  MODE is only valid when OP is a CONST_INT. */
 static enum immediate_class
-classify_immediate (rtx op, enum machine_mode mode)
+classify_immediate (rtx op, machine_mode mode)
 {
   HOST_WIDE_INT val;
   unsigned char arr[16];
@@ -3421,11 +3176,8 @@
       && mode == V4SImode
       && GET_CODE (op) == CONST_VECTOR
       && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
-      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
-      && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
-      && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
-      && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
-    op = CONST_VECTOR_ELT (op, 0);
+      && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
+    op = unwrap_const_vec_duplicate (op);
 
   switch (GET_CODE (op))
     {
@@ -3545,7 +3297,7 @@
 }
 
 int
-logical_immediate_p (rtx op, enum machine_mode mode)
+logical_immediate_p (rtx op, machine_mode mode)
 {
   HOST_WIDE_INT val;
   unsigned char arr[16];
@@ -3577,7 +3329,7 @@
 }
 
 int
-iohl_immediate_p (rtx op, enum machine_mode mode)
+iohl_immediate_p (rtx op, machine_mode mode)
 {
   HOST_WIDE_INT val;
   unsigned char arr[16];
@@ -3608,7 +3360,7 @@
 }
 
 int
-arith_immediate_p (rtx op, enum machine_mode mode,
+arith_immediate_p (rtx op, machine_mode mode,
 		   HOST_WIDE_INT low, HOST_WIDE_INT high)
 {
   HOST_WIDE_INT val;
@@ -3627,11 +3379,8 @@
 
   constant_to_array (mode, op, arr);
 
-  if (VECTOR_MODE_P (mode))
-    mode = GET_MODE_INNER (mode);
-
-  bytes = GET_MODE_SIZE (mode);
-  mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+  bytes = GET_MODE_UNIT_SIZE (mode);
+  mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
 
   /* Check that bytes are repeated. */
   for (i = bytes; i < 16; i += bytes)
@@ -3652,9 +3401,9 @@
    OP is 2^scale, scale >= LOW && scale <= HIGH.  When OP is a vector,
    all entries must be the same. */
 bool
-exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
-{
-  enum machine_mode int_mode;
+exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
+{
+  machine_mode int_mode;
   HOST_WIDE_INT val;
   unsigned char arr[16];
   int bytes, i, j;
@@ -3671,11 +3420,10 @@
 
   constant_to_array (mode, op, arr);
 
-  if (VECTOR_MODE_P (mode))
-    mode = GET_MODE_INNER (mode);
+  mode = GET_MODE_INNER (mode);
 
   bytes = GET_MODE_SIZE (mode);
-  int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+  int_mode = int_mode_for_mode (mode).require ();
 
   /* Check that bytes are repeated. */
   for (i = bytes; i < 16; i += bytes)
@@ -3702,10 +3450,9 @@
 
 /* Return true if X is a SYMBOL_REF to an __ea qualified variable.  */
 
-static int
-ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
-{
-  rtx x = *px;
+static bool
+ea_symbol_ref_p (const_rtx x)
+{
   tree decl;
 
   if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
@@ -3729,26 +3476,26 @@
    - a 64-bit constant where the high and low bits are identical
      (DImode, DFmode)
    - a 128-bit constant where the four 32-bit words match.  */
-int
-spu_legitimate_constant_p (rtx x)
-{
+bool
+spu_legitimate_constant_p (machine_mode mode, rtx x)
+{
+  subrtx_iterator::array_type array;
   if (GET_CODE (x) == HIGH)
     x = XEXP (x, 0);
 
   /* Reject any __ea qualified reference.  These can't appear in
      instructions but must be forced to the constant pool.  */
-  if (for_each_rtx (&x, ea_symbol_ref, 0))
-    return 0;
+  FOR_EACH_SUBRTX (iter, array, x, ALL)
+    if (ea_symbol_ref_p (*iter))
+      return 0;
 
   /* V4SI with all identical symbols is valid. */
   if (!flag_pic
-      && GET_MODE (x) == V4SImode
+      && mode == V4SImode
       && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
 	  || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
-    return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
-	   && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
-	   && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
+    return const_vec_duplicate_p (x);
 
   if (GET_CODE (x) == CONST_VECTOR
       && !const_vector_immediate_p (x))
@@ -3766,7 +3513,7 @@
   16 byte modes because the expand phase will change all smaller MEM
   references to TImode.  */
 static bool
-spu_legitimate_address_p (enum machine_mode mode,
+spu_legitimate_address_p (machine_mode mode,
 			  rtx x, bool reg_ok_strict)
 {
   int aligned = GET_MODE_SIZE (mode) >= 16;
@@ -3784,7 +3531,7 @@
     case CONST:
       /* Keep __ea references until reload so that spu_expand_mov can see them
 	 in MEMs.  */
-      if (ea_symbol_ref (&x, 0))
+      if (ea_symbol_ref_p (x))
 	return !reload_in_progress && !reload_completed;
       return !TARGET_LARGE_MEM;
 
@@ -3793,8 +3540,9 @@
 
     case SUBREG:
       x = XEXP (x, 0);
-      if (REG_P (x))
+      if (!REG_P (x))
 	return 0;
+      /* FALLTHRU */
 
     case REG:
       return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
@@ -3811,8 +3559,14 @@
 	if (GET_CODE (op0) == REG
 	    && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
 	    && GET_CODE (op1) == CONST_INT
-	    && INTVAL (op1) >= -0x2000
-	    && INTVAL (op1) <= 0x1fff
+	    && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
+		/* If virtual registers are involved, the displacement will
+		   change later on anyway, so checking would be premature.
+		   Reload will make sure the final displacement after
+		   register elimination is OK.  */
+		|| op0 == arg_pointer_rtx
+		|| op0 == frame_pointer_rtx
+		|| op0 == virtual_stack_vars_rtx)
 	    && (!aligned || (INTVAL (op1) & 15) == 0))
 	  return TRUE;
 	if (GET_CODE (op0) == REG
@@ -3831,7 +3585,7 @@
 
 /* Like spu_legitimate_address_p, except with named addresses.  */
 static bool
-spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
+spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
 				     bool reg_ok_strict, addr_space_t as)
 {
   if (as == ADDR_SPACE_EA)
@@ -3845,9 +3599,9 @@
 
 /* When the address is reg + const_int, force the const_int into a
    register.  */
-rtx
+static rtx
 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
-			enum machine_mode mode ATTRIBUTE_UNUSED)
+			machine_mode mode ATTRIBUTE_UNUSED)
 {
   rtx op0, op1;
   /* Make sure both operands are registers.  */
@@ -3876,7 +3630,7 @@
 
 /* Like spu_legitimate_address, except with named address support.  */
 static rtx
-spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
+spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
 				   addr_space_t as)
 {
   if (as != ADDR_SPACE_GENERIC)
@@ -3885,6 +3639,45 @@
   return spu_legitimize_address (x, oldx, mode);
 }
 
+/* Reload reg + const_int for out-of-range displacements.  */
+rtx
+spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
+			       int opnum, int type)
+{
+  bool removed_and = false;
+
+  if (GET_CODE (ad) == AND
+      && CONST_INT_P (XEXP (ad, 1))
+      && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
+    {
+      ad = XEXP (ad, 0);
+      removed_and = true;
+    }
+
+  if (GET_CODE (ad) == PLUS
+      && REG_P (XEXP (ad, 0))
+      && CONST_INT_P (XEXP (ad, 1))
+      && !(INTVAL (XEXP (ad, 1)) >= -0x2000
+	   && INTVAL (XEXP (ad, 1)) <= 0x1fff))
+    {
+      /* Unshare the sum.  */
+      ad = copy_rtx (ad);
+
+      /* Reload the displacement.  */
+      push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
+		   BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
+		   opnum, (enum reload_type) type);
+
+      /* Add back AND for alignment if we stripped it.  */
+      if (removed_and)
+	ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
+
+      return ad;
+    }
+
+  return NULL_RTX;
+}
+
 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
    struct attribute_spec.handler.  */
 static tree
@@ -3910,7 +3703,7 @@
 			     int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
 {
   tree type = *node, result = NULL_TREE;
-  enum machine_mode mode;
+  machine_mode mode;
   int unsigned_p;
 
   while (POINTER_TYPE_P (type)
@@ -3923,22 +3716,22 @@
   unsigned_p = TYPE_UNSIGNED (type);
   switch (mode)
     {
-    case DImode:
+    case E_DImode:
       result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
       break;
-    case SImode:
+    case E_SImode:
       result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
       break;
-    case HImode:
+    case E_HImode:
       result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
       break;
-    case QImode:
+    case E_QImode:
       result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
       break;
-    case SFmode:
+    case E_SFmode:
       result = V4SF_type_node;
       break;
-    case DFmode:
+    case E_DFmode:
       result = V2DF_type_node;
       break;
     default:
@@ -3978,7 +3771,7 @@
 {
   int saved_regs_size = spu_saved_regs_size ();
   int sp_offset = 0;
-  if (!current_function_is_leaf || crtl->outgoing_args_size
+  if (!crtl->is_leaf || crtl->outgoing_args_size
       || get_frame_size () || saved_regs_size)
     sp_offset = STACK_POINTER_OFFSET;
   if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
@@ -3997,7 +3790,7 @@
 rtx
 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
 {
-  enum machine_mode mode = TYPE_MODE (type);
+  machine_mode mode = TYPE_MODE (type);
   int byte_size = ((mode == BLKmode)
 		   ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
 
@@ -4005,7 +3798,7 @@
   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
       && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
     {
-      enum machine_mode smode;
+      machine_mode smode;
       rtvec v;
       int i;
       int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
@@ -4025,8 +3818,7 @@
 	{
 	  if (byte_size < 4)
 	    byte_size = 4;
-	  smode =
-	    smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
+	  smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
 	  RTVEC_ELT (v, n) =
 	    gen_rtx_EXPR_LIST (VOIDmode,
 			       gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
@@ -4038,10 +3830,11 @@
 }
 
 static rtx
-spu_function_arg (CUMULATIVE_ARGS *cum,
-		  enum machine_mode mode,
+spu_function_arg (cumulative_args_t cum_v,
+		  machine_mode mode,
 		  const_tree type, bool named ATTRIBUTE_UNUSED)
 {
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   int byte_size;
 
   if (*cum >= MAX_REGISTER_ARGS)
@@ -4059,11 +3852,11 @@
   if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
       && byte_size < UNITS_PER_WORD && byte_size > 0)
     {
-      enum machine_mode smode;
+      machine_mode smode;
       rtx gr_reg;
       if (byte_size < 4)
 	byte_size = 4;
-      smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
+      smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
       gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
 				  gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
 				  const0_rtx);
@@ -4074,22 +3867,44 @@
 }
 
 static void
-spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
 			  const_tree type, bool named ATTRIBUTE_UNUSED)
 {
+  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+
   *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
 	   ? 1
 	   : mode == BLKmode
 	   ? ((int_size_in_bytes (type) + 15) / 16)
 	   : mode == VOIDmode
 	   ? 1
-	   : HARD_REGNO_NREGS (cum, mode));
+	   : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
+}
+
+/* Implement TARGET_FUNCTION_ARG_OFFSET.  The SPU ABI wants 32/64-bit
+   types at offset 0 in the quad-word on the stack.  8/16-bit types
+   should be at offsets 3/2 respectively.  */
+
+static HOST_WIDE_INT
+spu_function_arg_offset (machine_mode mode, const_tree type)
+{
+  if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
+    return 4 - GET_MODE_SIZE (mode);
+  return 0;
+}
+
+/* Implement TARGET_FUNCTION_ARG_PADDING.  */
+
+static pad_direction
+spu_function_arg_padding (machine_mode, const_tree)
+{
+  return PAD_UPWARD;
 }
 
 /* Variable sized types are passed by reference.  */
 static bool
-spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
-		       enum machine_mode mode ATTRIBUTE_UNUSED,
+spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
+		       machine_mode mode ATTRIBUTE_UNUSED,
 		       const_tree type, bool named ATTRIBUTE_UNUSED)
 {
   return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
@@ -4130,11 +3945,11 @@
 		       FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
 
   DECL_FIELD_CONTEXT (f_args) = record;
-  DECL_ALIGN (f_args) = 128;
+  SET_DECL_ALIGN (f_args, 128);
   DECL_USER_ALIGN (f_args) = 1;
 
   DECL_FIELD_CONTEXT (f_skip) = record;
-  DECL_ALIGN (f_skip) = 128;
+  SET_DECL_ALIGN (f_skip, 128);
   DECL_USER_ALIGN (f_skip) = 1;
 
   TYPE_STUB_DECL (record) = type_decl;
@@ -4186,17 +4001,15 @@
   /* Find the __args area.  */
   t = make_tree (TREE_TYPE (args), nextarg);
   if (crtl->args.pretend_args_size > 0)
-    t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
-		size_int (-STACK_POINTER_OFFSET));
+    t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
   t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
   TREE_SIDE_EFFECTS (t) = 1;
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
 
   /* Find the __skip area.  */
   t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
-  t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
-	      size_int (crtl->args.pretend_args_size
-			 - STACK_POINTER_OFFSET));
+  t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
+				       - STACK_POINTER_OFFSET));
   t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
   TREE_SIDE_EFFECTS (t) = 1;
   expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
@@ -4226,13 +4039,12 @@
   tree f_args, f_skip;
   tree args, skip;
   HOST_WIDE_INT size, rsize;
-  tree paddedsize, addr, tmp;
+  tree addr, tmp;
   bool pass_by_reference_p;
 
   f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
   f_skip = DECL_CHAIN (f_args);
 
-  valist = build_simple_mem_ref (valist);
   args =
     build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
   skip =
@@ -4242,8 +4054,8 @@
 
   /* if an object is dynamically sized, a pointer to it is passed
      instead of the object itself. */
-  pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
-					       false);
+  pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
+					   false);
   if (pass_by_reference_p)
     type = build_pointer_type (type);
   size = int_size_in_bytes (type);
@@ -4251,21 +4063,20 @@
 
   /* build conditional expression to calculate addr. The expression
      will be gimplified later. */
-  paddedsize = size_int (rsize);
-  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
+  tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
   tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
 		build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
 		build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
 		unshare_expr (skip)));
 
   tmp = build3 (COND_EXPR, ptr_type_node, tmp,
-		build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
-			size_int (32)), unshare_expr (args));
+		fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
+		unshare_expr (args));
 
   gimplify_assign (addr, tmp, pre_p);
 
   /* update VALIST.__args */
-  tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
+  tmp = fold_build_pointer_plus_hwi (addr, rsize);
   gimplify_assign (unshare_expr (args), tmp, pre_p);
 
   addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
@@ -4281,8 +4092,8 @@
    to the first unnamed parameters.  If the first unnamed parameter is
    in the stack then save no registers.  Set pretend_args_size to the
    amount of space needed to save the registers. */
-void
-spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
+static void
+spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
 			    tree type, int *pretend_size, int no_rtl)
 {
   if (!no_rtl)
@@ -4290,17 +4101,17 @@
       rtx tmp;
       int regno;
       int offset;
-      int ncum = *cum;
+      int ncum = *get_cumulative_args (cum);
 
       /* cum currently points to the last named argument, we want to
          start at the next argument. */
-      spu_function_arg_advance (&ncum, mode, type, true);
+      spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
 
       offset = -STACK_POINTER_OFFSET;
       for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
 	{
 	  tmp = gen_frame_mem (V4SImode,
-			       plus_constant (virtual_incoming_args_rtx,
+			       plus_constant (Pmode, virtual_incoming_args_rtx,
 					      offset));
 	  emit_move_insn (tmp,
 			  gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
@@ -4348,7 +4159,7 @@
    which is both 16-byte aligned and padded to a 16-byte boundary.  This
    would make it safe to store with a single instruction. 
    We guarantee the alignment and padding for static objects by aligning
-   all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
+   all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
    FIXME: We currently cannot guarantee this for objects on the stack
    because assign_parm_setup_stack calls assign_stack_local with the
    alignment of the parameter mode and in that case the alignment never
@@ -4356,7 +4167,7 @@
 static int
 store_with_one_insn_p (rtx mem)
 {
-  enum machine_mode mode = GET_MODE (mem);
+  machine_mode mode = GET_MODE (mem);
   rtx addr = XEXP (mem, 0);
   if (mode == BLKmode)
     return 0;
@@ -4367,7 +4178,7 @@
     {
       /* We use the associated declaration to make sure the access is
          referring to the whole object.
-         We check both MEM_EXPR and and SYMBOL_REF_DECL.  I'm not sure
+         We check both MEM_EXPR and SYMBOL_REF_DECL.  I'm not sure
          if it is necessary.  Will there be cases where one exists, and
          the other does not?  Will there be cases where both exist, but
          have different types?  */
@@ -4418,14 +4229,14 @@
       if (!cache_fetch_dirty)
 	cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
       emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
-			       2, ea_addr, EAmode, ndirty, SImode);
+			       ea_addr, EAmode, ndirty, SImode);
     }
   else
     {
       if (!cache_fetch)
 	cache_fetch = init_one_libfunc ("__cache_fetch");
       emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
-			       1, ea_addr, EAmode);
+			       ea_addr, EAmode);
     }
 }
 
@@ -4468,7 +4279,8 @@
   rtx tag_eq_pack = gen_reg_rtx (V4SImode);
   rtx tag_eq_pack_si = gen_reg_rtx (SImode);
   rtx eq_index = gen_reg_rtx (SImode);
-  rtx bcomp, hit_label, hit_ref, cont_label, insn;
+  rtx bcomp, hit_label, hit_ref, cont_label;
+  rtx_insn *insn;
 
   if (spu_ea_model != 32)
     {
@@ -4477,7 +4289,7 @@
       tag_equal_hi = gen_reg_rtx (V4SImode);
     }
 
-  emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
+  emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
   emit_move_insn (tag_arr, tag_arr_sym);
   v = 0x0001020300010203LL;
   emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
@@ -4504,14 +4316,16 @@
   emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
   if (spu_ea_model != 32)
     emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
-					       plus_constant (tag_addr, 16)));
+					       plus_constant (Pmode,
+							      tag_addr, 16)));
 
   /* tag = ea_addr & -128  */
   emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
 
   /* Read all four cache data pointers.  */
   emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
-					   plus_constant (tag_addr, 32)));
+					   plus_constant (Pmode,
+							  tag_addr, 32)));
 
   /* Compare tags.  */
   emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
@@ -4544,12 +4358,11 @@
   hit_label = gen_label_rtx ();
   hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
   bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
-  insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
+  insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
 				      gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
 							    hit_ref, pc_rtx)));
   /* Say that this branch is very likely to happen.  */
-  v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
-  add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
+  add_reg_br_prob_note (insn, profile_probability::very_likely ());
 
   ea_load_store (mem, is_store, ea_addr, data_addr);
   cont_label = gen_label_rtx ();
@@ -4628,7 +4441,7 @@
 }
 
 int
-spu_expand_mov (rtx * ops, enum machine_mode mode)
+spu_expand_mov (rtx * ops, machine_mode mode)
 {
   if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
     {
@@ -4642,7 +4455,7 @@
   if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
     {
       rtx from = SUBREG_REG (ops[1]);
-      enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
+      scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
 
       gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
 		  && GET_MODE_CLASS (imode) == MODE_INT
@@ -4709,12 +4522,12 @@
 static void
 spu_convert_move (rtx dst, rtx src)
 {
-  enum machine_mode mode = GET_MODE (dst);
-  enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
+  machine_mode mode = GET_MODE (dst);
+  machine_mode int_mode = int_mode_for_mode (mode).require ();
   rtx reg;
   gcc_assert (GET_MODE (src) == TImode);
   reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
-  emit_insn (gen_rtx_SET (VOIDmode, reg,
+  emit_insn (gen_rtx_SET (reg,
 	       gen_rtx_TRUNCATE (int_mode,
 		 gen_rtx_LSHIFTRT (TImode, src,
 		   GEN_INT (int_mode == DImode ? 64 : 96)))));
@@ -4862,7 +4675,7 @@
 
   if (dst1)
     {
-      addr1 = plus_constant (copy_rtx (addr), 16);
+      addr1 = plus_constant (SImode, copy_rtx (addr), 16);
       addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
       emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
     }
@@ -4873,7 +4686,7 @@
 int
 spu_split_load (rtx * ops)
 {
-  enum machine_mode mode = GET_MODE (ops[0]);
+  machine_mode mode = GET_MODE (ops[0]);
   rtx addr, load, rot;
   int rot_amt;
 
@@ -4907,7 +4720,7 @@
 int
 spu_split_store (rtx * ops)
 {
-  enum machine_mode mode = GET_MODE (ops[0]);
+  machine_mode mode = GET_MODE (ops[0]);
   rtx reg;
   rtx addr, p0, p1, p1_lo, smem;
   int aform;
@@ -5164,7 +4977,7 @@
 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
    can be generated using the cbd, chd, cwd or cdd instruction. */
 int
-cpat_const_p (rtx x, enum machine_mode mode)
+cpat_const_p (rtx x, machine_mode mode)
 {
   if (CONSTANT_P (x))
     {
@@ -5210,7 +5023,7 @@
    array.  Use MODE for CONST_INT's.  When the constant's mode is smaller
    than 16 bytes, the value is repeated across the rest of the array. */
 void
-constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
+constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
 {
   HOST_WIDE_INT val;
   int i, j, first;
@@ -5289,9 +5102,9 @@
    smaller than 16 bytes, use the bytes that would represent that value
    in a register, e.g., for QImode return the value of arr[3].  */
 rtx
-array_to_constant (enum machine_mode mode, const unsigned char arr[16])
-{
-  enum machine_mode inner_mode;
+array_to_constant (machine_mode mode, const unsigned char arr[16])
+{
+  machine_mode inner_mode;
   rtvec v;
   int units, size, i, j, k;
   HOST_WIDE_INT val;
@@ -5423,10 +5236,11 @@
 }
 
 static bool
-spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
+spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
+	       int opno ATTRIBUTE_UNUSED, int *total,
 	       bool speed ATTRIBUTE_UNUSED)
 {
-  enum machine_mode mode = GET_MODE (x);
+  int code = GET_CODE (x);
   int cost = COSTS_N_INSNS (2);
 
   /* Folding to a CONST_VECTOR will use extra space but there might
@@ -5435,7 +5249,7 @@
      of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
      because this cost will only be compared against a single insn. 
      if (code == CONST_VECTOR)
-       return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
+       return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
    */
 
   /* Use defaults for float operations.  Not accurate but good enough. */
@@ -5537,7 +5351,7 @@
   return true;
 }
 
-static enum machine_mode
+static scalar_int_mode
 spu_unwind_word_mode (void)
 {
   return SImode;
@@ -5576,11 +5390,11 @@
   emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
   emit_insn (gen_subv4si3 (sp, sp, splatted));
 
-  if (flag_stack_check)
+  if (flag_stack_check || flag_stack_clash_protection)
     {
       rtx avail = gen_reg_rtx(SImode);
       rtx result = gen_reg_rtx(SImode);
-      emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
+      emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
       emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
       emit_insn (gen_spu_heq (result, GEN_INT(0) ));
     }
@@ -5635,6 +5449,7 @@
   set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
   set_optab_libfunc (clz_optab, DImode, "__clzdi2");
   set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
+  set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
   set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
   set_optab_libfunc (parity_optab, DImode, "__paritydi2");
 
@@ -5665,7 +5480,7 @@
 /* Make a subreg, stripping any existing subreg.  We could possibly just
    call simplify_subreg, but in this case we know what we want. */
 rtx
-spu_gen_subreg (enum machine_mode mode, rtx x)
+spu_gen_subreg (machine_mode mode, rtx x)
 {
   if (GET_CODE (x) == SUBREG)
     x = SUBREG_REG (x);
@@ -5836,7 +5651,7 @@
 void
 spu_builtin_splats (rtx ops[])
 {
-  enum machine_mode mode = GET_MODE (ops[0]);
+  machine_mode mode = GET_MODE (ops[0]);
   if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
     {
       unsigned char arr[16];
@@ -5852,24 +5667,24 @@
 	ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
       switch (mode)
 	{
-	case V2DImode:
-	case V2DFmode:
+	case E_V2DImode:
+	case E_V2DFmode:
 	  shuf =
 	    immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
 				TImode);
 	  break;
-	case V4SImode:
-	case V4SFmode:
+	case E_V4SImode:
+	case E_V4SFmode:
 	  shuf =
 	    immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
 				TImode);
 	  break;
-	case V8HImode:
+	case E_V8HImode:
 	  shuf =
 	    immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
 				TImode);
 	  break;
-	case V16QImode:
+	case E_V16QImode:
 	  shuf =
 	    immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
 				TImode);
@@ -5885,7 +5700,7 @@
 void
 spu_builtin_extract (rtx ops[])
 {
-  enum machine_mode mode;
+  machine_mode mode;
   rtx rot, from, tmp;
 
   mode = GET_MODE (ops[1]);
@@ -5894,23 +5709,23 @@
     {
       switch (mode)
 	{
-	case V16QImode:
-	  emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
+	case E_V16QImode:
+	  emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
 	  break;
-	case V8HImode:
-	  emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
+	case E_V8HImode:
+	  emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
 	  break;
-	case V4SFmode:
-	  emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
+	case E_V4SFmode:
+	  emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
 	  break;
-	case V4SImode:
-	  emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
+	case E_V4SImode:
+	  emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
 	  break;
-	case V2DImode:
-	  emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
+	case E_V2DImode:
+	  emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
 	  break;
-	case V2DFmode:
-	  emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
+	case E_V2DFmode:
+	  emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
 	  break;
 	default:
 	  abort ();
@@ -5924,19 +5739,19 @@
 
   switch (mode)
     {
-    case V16QImode:
+    case E_V16QImode:
       emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
       break;
-    case V8HImode:
+    case E_V8HImode:
       emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
       emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
       break;
-    case V4SFmode:
-    case V4SImode:
+    case E_V4SFmode:
+    case E_V4SImode:
       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
       break;
-    case V2DImode:
-    case V2DFmode:
+    case E_V2DImode:
+    case E_V2DFmode:
       emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
       break;
     default:
@@ -5950,8 +5765,8 @@
 void
 spu_builtin_insert (rtx ops[])
 {
-  enum machine_mode mode = GET_MODE (ops[0]);
-  enum machine_mode imode = GET_MODE_INNER (mode);
+  machine_mode mode = GET_MODE (ops[0]);
+  machine_mode imode = GET_MODE_INNER (mode);
   rtx mask = gen_reg_rtx (TImode);
   rtx offset;
 
@@ -5972,7 +5787,7 @@
 void
 spu_builtin_promote (rtx ops[])
 {
-  enum machine_mode mode, imode;
+  machine_mode mode, imode;
   rtx rot, from, offset;
   HOST_WIDE_INT pos;
 
@@ -5996,20 +5811,20 @@
       offset = gen_reg_rtx (SImode);
       switch (mode)
 	{
-	case V16QImode:
+	case E_V16QImode:
 	  emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
 	  break;
-	case V8HImode:
+	case E_V8HImode:
 	  emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
 	  emit_insn (gen_addsi3 (offset, offset, offset));
 	  break;
-	case V4SFmode:
-	case V4SImode:
+	case E_V4SFmode:
+	case E_V4SImode:
 	  emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
 	  emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
 	  break;
-	case V2DImode:
-	case V2DFmode:
+	case E_V2DImode:
+	case E_V2DFmode:
 	  emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
 	  break;
 	default:
@@ -6095,6 +5910,14 @@
   emit_insn (gen_sync ());
 }
 
+static bool
+spu_warn_func_return (tree decl)
+{
+  /* Naked functions are implemented entirely in assembly, including the
+     return sequence, so suppress warnings about this.  */
+  return !spu_naked_function_p (decl);
+}
+
 void
 spu_expand_sign_extend (rtx ops[])
 {
@@ -6117,19 +5940,19 @@
 	arr[i] = 0x10;
       switch (GET_MODE (ops[1]))
 	{
-	case HImode:
+	case E_HImode:
 	  sign = gen_reg_rtx (SImode);
 	  emit_insn (gen_extendhisi2 (sign, ops[1]));
 	  arr[last] = 0x03;
 	  arr[last - 1] = 0x02;
 	  break;
-	case SImode:
+	case E_SImode:
 	  sign = gen_reg_rtx (SImode);
 	  emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
 	  for (i = 0; i < 4; i++)
 	    arr[last - i] = 3 - i;
 	  break;
-	case DImode:
+	case E_DImode:
 	  sign = gen_reg_rtx (SImode);
 	  c = gen_reg_rtx (SImode);
 	  emit_insn (gen_spu_convert (c, ops[1]));
@@ -6150,7 +5973,7 @@
 void
 spu_expand_vector_init (rtx target, rtx vals)
 {
-  enum machine_mode mode = GET_MODE (target);
+  machine_mode mode = GET_MODE (target);
   int n_elts = GET_MODE_NUNITS (mode);
   int n_var = 0;
   bool all_same = true;
@@ -6241,8 +6064,8 @@
 
 static int
 get_vec_cmp_insn (enum rtx_code code,
-                  enum machine_mode dest_mode,
-                  enum machine_mode op_mode)
+                  machine_mode dest_mode,
+                  machine_mode op_mode)
 
 {
   switch (code)
@@ -6291,12 +6114,12 @@
 static rtx
 spu_emit_vector_compare (enum rtx_code rcode,
                          rtx op0, rtx op1,
-                         enum machine_mode dmode)
+                         machine_mode dmode)
 {
   int vec_cmp_insn;
   rtx mask;
-  enum machine_mode dest_mode;
-  enum machine_mode op_mode = GET_MODE (op1);
+  machine_mode dest_mode;
+  machine_mode op_mode = GET_MODE (op1);
 
   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
 
@@ -6330,13 +6153,24 @@
           try_again = true;
           break;
         case NE:
+	case UNEQ:
+	case UNLE:
+	case UNLT:
+	case UNGE:
+	case UNGT:
+	case UNORDERED:
           /* Treat A != B as ~(A==B).  */
           {
+	    enum rtx_code rev_code;
             enum insn_code nor_code;
-            rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
+	    rtx rev_mask;
+
+	    rev_code = reverse_condition_maybe_unordered (rcode);
+            rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
+
             nor_code = optab_handler (one_cmpl_optab, dest_mode);
             gcc_assert (nor_code != CODE_FOR_nothing);
-            emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
+            emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
             if (dmode != dest_mode)
               {
                 rtx temp = gen_reg_rtx (dest_mode);
@@ -6381,6 +6215,48 @@
             return mask;
           }
           break;
+        case LTGT:
+          /* Try LT OR GT */
+          {
+            rtx lt_rtx, gt_rtx;
+            enum insn_code ior_code;
+
+            lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
+            gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
+
+            ior_code = optab_handler (ior_optab, dest_mode);
+            gcc_assert (ior_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
+        case ORDERED:
+          /* Implement as (A==A) & (B==B) */
+          {
+            rtx a_rtx, b_rtx;
+            enum insn_code and_code;
+
+            a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
+            b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
+
+            and_code = optab_handler (and_optab, dest_mode);
+            gcc_assert (and_code != CODE_FOR_nothing);
+            emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
+            if (dmode != dest_mode)
+              {
+                rtx temp = gen_reg_rtx (dest_mode);
+                convert_move (temp, mask, 0);
+                return temp;
+              }
+            return mask;
+          }
+          break;
         default:
           gcc_unreachable ();
         }
@@ -6419,7 +6295,7 @@
 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
                            rtx cond, rtx cc_op0, rtx cc_op1)
 {   
-  enum machine_mode dest_mode = GET_MODE (dest);
+  machine_mode dest_mode = GET_MODE (dest);
   enum rtx_code rcode = GET_CODE (cond);
   rtx mask;
     
@@ -6432,7 +6308,7 @@
 }
 
 static rtx
-spu_force_reg (enum machine_mode mode, rtx op)
+spu_force_reg (machine_mode mode, rtx op)
 {
   rtx x, r;
   if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
@@ -6542,9 +6418,7 @@
       ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
     }
 
-  /* The insn pattern may have additional operands (SCRATCH).
-     Return the number of actual non-SCRATCH operands.  */
-  gcc_assert (i <= insn_data[icode].n_operands);
+  gcc_assert (i == insn_data[icode].n_generator_args);
   return i;
 }
 
@@ -6555,7 +6429,7 @@
   rtx pat;
   rtx ops[8];
   enum insn_code icode = (enum insn_code) d->icode;
-  enum machine_mode mode, tmode;
+  machine_mode mode, tmode;
   int i, p;
   int n_operands;
   tree return_type;
@@ -6590,7 +6464,7 @@
 
   if (d->fcode == SPU_MASK_FOR_LOAD)
     {
-      enum machine_mode mode = insn_data[icode].operand[1].mode;
+      machine_mode mode = insn_data[icode].operand[1].mode;
       tree arg;
       rtx addr, op, pat;
 
@@ -6602,8 +6476,7 @@
 
       /* negate addr */
       op = gen_reg_rtx (GET_MODE (addr));
-      emit_insn (gen_rtx_SET (VOIDmode, op,
-                 gen_rtx_NEG (GET_MODE (addr), addr)));
+      emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
       op = gen_rtx_MEM (mode, op);
 
       pat = GEN_FCN (icode) (target, op);
@@ -6641,7 +6514,7 @@
 	  else
 	    {
 	      rtx reg = gen_reg_rtx (mode);
-	      enum machine_mode imode = GET_MODE_INNER (mode);
+	      machine_mode imode = GET_MODE_INNER (mode);
 	      if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
 		ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
 	      if (imode != GET_MODE (ops[i]))
@@ -6715,7 +6588,7 @@
 spu_expand_builtin (tree exp,
 		    rtx target,
 		    rtx subtarget ATTRIBUTE_UNUSED,
-		    enum machine_mode mode ATTRIBUTE_UNUSED,
+		    machine_mode mode ATTRIBUTE_UNUSED,
 		    int ignore ATTRIBUTE_UNUSED)
 {
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
@@ -6731,40 +6604,6 @@
   abort ();
 }
 
-/* Implement targetm.vectorize.builtin_mul_widen_even.  */
-static tree
-spu_builtin_mul_widen_even (tree type)
-{
-  switch (TYPE_MODE (type))
-    {
-    case V8HImode:
-      if (TYPE_UNSIGNED (type))
-	return spu_builtin_decls[SPU_MULE_0];
-      else
-	return spu_builtin_decls[SPU_MULE_1];
-      break;
-    default:
-      return NULL_TREE;
-    }
-}
-
-/* Implement targetm.vectorize.builtin_mul_widen_odd.  */
-static tree
-spu_builtin_mul_widen_odd (tree type)
-{
-  switch (TYPE_MODE (type))
-    {
-    case V8HImode:
-      if (TYPE_UNSIGNED (type))
-	return spu_builtin_decls[SPU_MULO_1];
-      else
-	return spu_builtin_decls[SPU_MULO_0]; 
-      break;
-    default:
-      return NULL_TREE;
-    }
-}
-
 /* Implement targetm.vectorize.builtin_mask_for_load.  */
 static tree
 spu_builtin_mask_for_load (void)
@@ -6775,9 +6614,11 @@
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 static int 
 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
-                                tree vectype ATTRIBUTE_UNUSED,
+                                tree vectype,
                                 int misalign ATTRIBUTE_UNUSED)
 {
+  unsigned elements;
+
   switch (type_of_cost)
     {
       case scalar_stmt:
@@ -6788,6 +6629,7 @@
       case scalar_to_vec:
       case cond_branch_not_taken:
       case vec_perm:
+      case vec_promote_demote:
         return 1;
 
       case scalar_store:
@@ -6798,16 +6640,80 @@
         return 2;
 
       case unaligned_load:
+      case vector_gather_load:
+      case vector_scatter_store:
         return 2;
 
       case cond_branch_taken:
         return 6;
 
+      case vec_construct:
+	elements = TYPE_VECTOR_SUBPARTS (vectype);
+	return elements / 2 + 1;
+
       default:
         gcc_unreachable ();
     }
 }
 
+/* Implement targetm.vectorize.init_cost.  */
+
+static void *
+spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
+{
+  unsigned *cost = XNEWVEC (unsigned, 3);
+  cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
+  return cost;
+}
+
+/* Implement targetm.vectorize.add_stmt_cost.  */
+
+static unsigned
+spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
+		   struct _stmt_vec_info *stmt_info, int misalign,
+		   enum vect_cost_model_location where)
+{
+  unsigned *cost = (unsigned *) data;
+  unsigned retval = 0;
+
+  if (flag_vect_cost_model)
+    {
+      tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
+      int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
+
+      /* Statements in an inner loop relative to the loop being
+	 vectorized are weighted more heavily.  The value here is
+	 arbitrary and could potentially be improved with analysis.  */
+      if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
+	count *= 50;  /* FIXME.  */
+
+      retval = (unsigned) (count * stmt_cost);
+      cost[where] += retval;
+    }
+
+  return retval;
+}
+
+/* Implement targetm.vectorize.finish_cost.  */
+
+static void
+spu_finish_cost (void *data, unsigned *prologue_cost,
+		 unsigned *body_cost, unsigned *epilogue_cost)
+{
+  unsigned *cost = (unsigned *) data;
+  *prologue_cost = cost[vect_prologue];
+  *body_cost     = cost[vect_body];
+  *epilogue_cost = cost[vect_epilogue];
+}
+
+/* Implement targetm.vectorize.destroy_cost_data.  */
+
+static void
+spu_destroy_cost_data (void *data)
+{
+  free (data);
+}
+
 /* Return true iff, data reference of TYPE can reach vector alignment (16)
    after applying N number of iterations.  This routine does not determine
    how may iterations are required to reach desired alignment.  */
@@ -6822,51 +6728,8 @@
   return true;
 }
 
-/* Implement targetm.vectorize.builtin_vec_perm.  */
-tree
-spu_builtin_vec_perm (tree type, tree *mask_element_type)
-{
-  *mask_element_type = unsigned_char_type_node;
-
-  switch (TYPE_MODE (type))
-    {
-    case V16QImode:
-      if (TYPE_UNSIGNED (type))
-        return spu_builtin_decls[SPU_SHUFFLE_0];
-      else
-        return spu_builtin_decls[SPU_SHUFFLE_1];
-
-    case V8HImode:
-      if (TYPE_UNSIGNED (type))
-        return spu_builtin_decls[SPU_SHUFFLE_2];
-      else
-        return spu_builtin_decls[SPU_SHUFFLE_3];
-
-    case V4SImode:
-      if (TYPE_UNSIGNED (type))
-        return spu_builtin_decls[SPU_SHUFFLE_4];
-      else
-        return spu_builtin_decls[SPU_SHUFFLE_5];
-
-    case V2DImode:
-      if (TYPE_UNSIGNED (type))
-        return spu_builtin_decls[SPU_SHUFFLE_6];
-      else
-        return spu_builtin_decls[SPU_SHUFFLE_7];
-
-    case V4SFmode:
-      return spu_builtin_decls[SPU_SHUFFLE_8];
-
-    case V2DFmode:
-      return spu_builtin_decls[SPU_SHUFFLE_9];
-
-    default:
-      return NULL_TREE;
-    }
-}
-
 /* Return the appropriate mode for a named address pointer.  */
-static enum machine_mode
+static scalar_int_mode
 spu_addr_space_pointer_mode (addr_space_t addrspace)
 {
   switch (addrspace)
@@ -6881,7 +6744,7 @@
 }
 
 /* Return the appropriate mode for a named address address.  */
-static enum machine_mode
+static scalar_int_mode
 spu_addr_space_address_mode (addr_space_t addrspace)
 {
   switch (addrspace)
@@ -6984,7 +6847,7 @@
 
   for (i = 0; i < g->num_nodes; i++)
     {
-      rtx insn = g->nodes[i].insn;
+      rtx_insn *insn = g->nodes[i].insn;
       int p = get_pipe (insn) + 2;
 
       gcc_assert (p >= 0);
@@ -7027,7 +6890,7 @@
     }
 }
 
-static enum machine_mode
+static scalar_int_mode
 spu_libgcc_cmp_return_mode (void)
 {
 
@@ -7036,7 +6899,7 @@
   return SImode;
 }
 
-static enum machine_mode
+static scalar_int_mode
 spu_libgcc_shift_count_mode (void)
 {
 /* For SPU word mode is TI mode so it is better to use SImode
@@ -7044,27 +6907,6 @@
   return SImode;
 }
 
-/* An early place to adjust some flags after GCC has finished processing
- * them. */
-static void
-asm_file_start (void)
-{
-  /* Variable tracking should be run after all optimizations which
-     change order of insns.  It also needs a valid CFG.  Therefore,
-     *if* we make nontrivial changes in machine-dependent reorg,
-     run variable tracking after those.  However, if we do not run
-     our machine-dependent reorg pass, we must still run the normal
-     variable tracking pass (or else we will ICE in final since
-     debug insns have not been removed).  */
-  if (TARGET_BRANCH_HINTS && optimize)
-    {
-      spu_flag_var_tracking = flag_var_tracking;
-      flag_var_tracking = 0;
-    }
-
-  default_file_start ();
-}
-
 /* Implement targetm.section_type_flags.  */
 static unsigned int
 spu_section_type_flags (tree decl, const char *name, int reloc)
@@ -7116,7 +6958,7 @@
    the result is valid for MODE.  Currently, MODE must be V4SFmode and
    SCALE must be SImode. */
 rtx
-spu_gen_exp2 (enum machine_mode mode, rtx scale)
+spu_gen_exp2 (machine_mode mode, rtx scale)
 {
   gcc_assert (mode == V4SFmode);
   gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
@@ -7191,4 +7033,437 @@
   return default_ref_may_alias_errno (ref);
 }
 
+/* Output thunk to FILE that implements a C++ virtual function call (with
+   multiple inheritance) to FUNCTION.  The thunk adjusts the this pointer
+   by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
+   stored at VCALL_OFFSET in the vtable whose address is located at offset 0
+   relative to the resulting this pointer.  */
+
+static void
+spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
+		     HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
+		     tree function)
+{
+  rtx op[8];
+
+  /* Make sure unwind info is emitted for the thunk if needed.  */
+  final_start_function (emit_barrier (), file, 1);
+
+  /* Operand 0 is the target function.  */
+  op[0] = XEXP (DECL_RTL (function), 0);
+
+  /* Operand 1 is the 'this' pointer.  */
+  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
+    op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
+  else
+    op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
+
+  /* Operands 2/3 are the low/high halfwords of delta.  */
+  op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
+  op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
+
+  /* Operands 4/5 are the low/high halfwords of vcall_offset.  */
+  op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
+  op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
+
+  /* Operands 6/7 are temporary registers.  */
+  op[6] = gen_rtx_REG (Pmode, 79);
+  op[7] = gen_rtx_REG (Pmode, 78);
+
+  /* Add DELTA to this pointer.  */
+  if (delta)
+    {
+      if (delta >= -0x200 && delta < 0x200)
+	output_asm_insn ("ai\t%1,%1,%2", op);
+      else if (delta >= -0x8000 && delta < 0x8000)
+	{
+	  output_asm_insn ("il\t%6,%2", op);
+	  output_asm_insn ("a\t%1,%1,%6", op);
+	}
+      else
+	{
+	  output_asm_insn ("ilhu\t%6,%3", op);
+	  output_asm_insn ("iohl\t%6,%2", op);
+	  output_asm_insn ("a\t%1,%1,%6", op);
+	}
+    }
+
+  /* Perform vcall adjustment.  */
+  if (vcall_offset)
+    {
+      output_asm_insn ("lqd\t%7,0(%1)", op);
+      output_asm_insn ("rotqby\t%7,%7,%1", op);
+
+      if (vcall_offset >= -0x200 && vcall_offset < 0x200)
+	output_asm_insn ("ai\t%7,%7,%4", op);
+      else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
+	{
+	  output_asm_insn ("il\t%6,%4", op);
+	  output_asm_insn ("a\t%7,%7,%6", op);
+	}
+      else
+	{
+	  output_asm_insn ("ilhu\t%6,%5", op);
+	  output_asm_insn ("iohl\t%6,%4", op);
+	  output_asm_insn ("a\t%7,%7,%6", op);
+	}
+
+      output_asm_insn ("lqd\t%6,0(%7)", op);
+      output_asm_insn ("rotqby\t%6,%6,%7", op);
+      output_asm_insn ("a\t%1,%1,%6", op);
+    }
+
+  /* Jump to target.  */
+  output_asm_insn ("br\t%0", op);
+
+  final_end_function ();
+}
+
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+static void
+spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			     bool op0_preserve_value)
+{
+  if (!op0_preserve_value
+      && (*code == LE || *code == LT || *code == LEU || *code == LTU))
+    {
+      rtx tem = *op0;
+      *op0 = *op1;
+      *op1 = tem;
+      *code = (int)swap_condition ((enum rtx_code)*code);
+    }
+}
+
+/* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
+   to perform.  MEM is the memory on which to operate.  VAL is the second
+   operand of the binary operator.  BEFORE and AFTER are optional locations to
+   return the value of MEM either before of after the operation.  */
+void
+spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
+		      rtx orig_before, rtx orig_after)
+{
+  machine_mode mode = GET_MODE (mem);
+  rtx before = orig_before, after = orig_after;
+
+  if (before == NULL_RTX)
+    before = gen_reg_rtx (mode);
+
+  emit_move_insn (before, mem);
+
+  if (code == MULT)  /* NAND operation */
+    {
+      rtx x = expand_simple_binop (mode, AND, before, val,
+				   NULL_RTX, 1, OPTAB_LIB_WIDEN);
+      after = expand_simple_unop (mode, NOT, x, after, 1);
+    }
+  else
+    {
+      after = expand_simple_binop (mode, code, before, val,
+				   after, 1, OPTAB_LIB_WIDEN);
+    }
+
+  emit_move_insn (mem, after);
+
+  if (orig_after && after != orig_after)
+    emit_move_insn (orig_after, after);
+}
+
+/* Implement TARGET_MODES_TIEABLE_P.  */
+
+static bool
+spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
+{
+  return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
+	  && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
+}
+
+/* Implement TARGET_CAN_CHANGE_MODE_CLASS.  GCC assumes that modes are
+   in the lowpart of a register, which is only true for SPU.  */
+
+static bool
+spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
+{
+  return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
+	  || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
+	  || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
+}
+
+/* Implement TARGET_TRULY_NOOP_TRUNCATION.  */
+
+static bool
+spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec)
+{
+  return inprec <= 32 && outprec <= inprec;
+}
+
+/* Implement TARGET_STATIC_RTX_ALIGNMENT.
+
+   Make all static objects 16-byte aligned.  This allows us to assume
+   they are also padded to 16 bytes, which means we can use a single
+   load or store instruction to access them.  */
+
+static HOST_WIDE_INT
+spu_static_rtx_alignment (machine_mode mode)
+{
+  return MAX (GET_MODE_ALIGNMENT (mode), 128);
+}
+
+/* Implement TARGET_CONSTANT_ALIGNMENT.
+
+   Make all static objects 16-byte aligned.  This allows us to assume
+   they are also padded to 16 bytes, which means we can use a single
+   load or store instruction to access them.  */
+
+static HOST_WIDE_INT
+spu_constant_alignment (const_tree, HOST_WIDE_INT align)
+{
+  return MAX (align, 128);
+}
+
+/*  Table of machine attributes.  */
+static const struct attribute_spec spu_attribute_table[] =
+{
+  /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
+       affects_type_identity } */
+  { "naked",          0, 0, true,  false, false, spu_handle_fndecl_attribute,
+    false },
+  { "spu_vector",     0, 0, false, true,  false, spu_handle_vector_attribute,
+    false },
+  { NULL,             0, 0, false, false, false, NULL, false }
+};
+
+/*  TARGET overrides.  */
+
+#undef TARGET_LRA_P
+#define TARGET_LRA_P hook_bool_void_false
+
+#undef TARGET_ADDR_SPACE_POINTER_MODE
+#define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
+
+#undef TARGET_ADDR_SPACE_ADDRESS_MODE
+#define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
+
+#undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
+#define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
+  spu_addr_space_legitimate_address_p
+
+#undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
+#define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
+
+#undef TARGET_ADDR_SPACE_SUBSET_P
+#define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
+
+#undef TARGET_ADDR_SPACE_CONVERT
+#define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
+
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS spu_init_builtins
+#undef TARGET_BUILTIN_DECL
+#define TARGET_BUILTIN_DECL spu_builtin_decl
+
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN spu_expand_builtin
+
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
+
+#undef TARGET_LEGITIMIZE_ADDRESS
+#define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
+
+/* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
+   and .quad for the debugger.  When it is known that the assembler is fixed,
+   these can be removed.  */
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP	"\t.long\t"
+
+#undef TARGET_ASM_ALIGNED_DI_OP
+#define TARGET_ASM_ALIGNED_DI_OP	"\t.quad\t"
+
+/* The .8byte directive doesn't seem to work well for a 32 bit
+   architecture. */
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP NULL
+
+#undef TARGET_RTX_COSTS
+#define TARGET_RTX_COSTS spu_rtx_costs
+
+#undef TARGET_ADDRESS_COST
+#define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+
+#undef TARGET_SCHED_ISSUE_RATE
+#define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
+
+#undef TARGET_SCHED_INIT_GLOBAL
+#define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
+
+#undef TARGET_SCHED_INIT
+#define TARGET_SCHED_INIT spu_sched_init
+
+#undef TARGET_SCHED_VARIABLE_ISSUE
+#define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
+
+#undef TARGET_SCHED_REORDER
+#define TARGET_SCHED_REORDER spu_sched_reorder
+
+#undef TARGET_SCHED_REORDER2
+#define TARGET_SCHED_REORDER2 spu_sched_reorder
+
+#undef TARGET_SCHED_ADJUST_COST
+#define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
+
+#undef  TARGET_ATTRIBUTE_TABLE
+#define TARGET_ATTRIBUTE_TABLE spu_attribute_table
+
+#undef TARGET_ASM_INTEGER
+#define TARGET_ASM_INTEGER spu_assemble_integer
+
+#undef TARGET_SCALAR_MODE_SUPPORTED_P
+#define TARGET_SCALAR_MODE_SUPPORTED_P	spu_scalar_mode_supported_p
+
+#undef TARGET_VECTOR_MODE_SUPPORTED_P
+#define TARGET_VECTOR_MODE_SUPPORTED_P	spu_vector_mode_supported_p
+
+#undef TARGET_FUNCTION_OK_FOR_SIBCALL
+#define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
+
+#undef TARGET_ASM_GLOBALIZE_LABEL
+#define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
+
+#undef TARGET_PASS_BY_REFERENCE
+#define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
+
+#undef TARGET_FUNCTION_ARG
+#define TARGET_FUNCTION_ARG spu_function_arg
+
+#undef TARGET_FUNCTION_ARG_ADVANCE
+#define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
+
+#undef TARGET_FUNCTION_ARG_OFFSET
+#define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
+
+#undef TARGET_FUNCTION_ARG_PADDING
+#define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
+
+#undef TARGET_MUST_PASS_IN_STACK
+#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
+
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
+
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
+
+#undef TARGET_SETUP_INCOMING_VARARGS
+#define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
+
+#undef TARGET_MACHINE_DEPENDENT_REORG
+#define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
+
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
+
+#undef TARGET_INIT_LIBFUNCS
+#define TARGET_INIT_LIBFUNCS spu_init_libfuncs
+
+#undef TARGET_RETURN_IN_MEMORY
+#define TARGET_RETURN_IN_MEMORY spu_return_in_memory
+
+#undef  TARGET_ENCODE_SECTION_INFO
+#define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
+
+#undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
+#define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
+
+#undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
+#define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
+
+#undef TARGET_VECTORIZE_INIT_COST
+#define TARGET_VECTORIZE_INIT_COST spu_init_cost
+
+#undef TARGET_VECTORIZE_ADD_STMT_COST
+#define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
+
+#undef TARGET_VECTORIZE_FINISH_COST
+#define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
+
+#undef TARGET_VECTORIZE_DESTROY_COST_DATA
+#define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
+
+#undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
+#define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
+
+#undef TARGET_LIBGCC_CMP_RETURN_MODE
+#define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
+
+#undef TARGET_LIBGCC_SHIFT_COUNT_MODE
+#define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
+
+#undef TARGET_SCHED_SMS_RES_MII
+#define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
+
+#undef TARGET_SECTION_TYPE_FLAGS
+#define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION  spu_select_section
+
+#undef TARGET_ASM_UNIQUE_SECTION
+#define TARGET_ASM_UNIQUE_SECTION  spu_unique_section
+
+#undef TARGET_LEGITIMATE_ADDRESS_P
+#define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
+
+#undef TARGET_LEGITIMATE_CONSTANT_P
+#define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
+
+#undef TARGET_TRAMPOLINE_INIT
+#define TARGET_TRAMPOLINE_INIT spu_trampoline_init
+
+#undef TARGET_WARN_FUNC_RETURN
+#define TARGET_WARN_FUNC_RETURN spu_warn_func_return
+
+#undef TARGET_OPTION_OVERRIDE
+#define TARGET_OPTION_OVERRIDE spu_option_override
+
+#undef TARGET_CONDITIONAL_REGISTER_USAGE
+#define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
+
+#undef TARGET_REF_MAY_ALIAS_ERRNO
+#define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
+
+#undef TARGET_ASM_OUTPUT_MI_THUNK
+#define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
+#undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
+#define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
+
+/* Variable tracking should be run after all optimizations which
+   change order of insns.  It also needs a valid CFG.  */
+#undef TARGET_DELAY_VARTRACK
+#define TARGET_DELAY_VARTRACK true
+
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
+
+#undef TARGET_CAN_USE_DOLOOP_P
+#define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
+
+#undef TARGET_MODES_TIEABLE_P
+#define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
+
+#undef TARGET_HARD_REGNO_NREGS
+#define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
+
+#undef TARGET_CAN_CHANGE_MODE_CLASS
+#define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
+
+#undef TARGET_TRULY_NOOP_TRUNCATION
+#define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
+
+#undef TARGET_STATIC_RTX_ALIGNMENT
+#define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
+#undef TARGET_CONSTANT_ALIGNMENT
+#define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
+
+struct gcc_target targetm = TARGET_INITIALIZER;
+
 #include "gt-spu.h"