diff gcc/config/s390/s390.c @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
line wrap: on
line diff
--- a/gcc/config/s390/s390.c	Fri Oct 27 22:46:09 2017 +0900
+++ b/gcc/config/s390/s390.c	Thu Oct 25 07:37:49 2018 +0900
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on IBM S/390 and zSeries
-   Copyright (C) 1999-2017 Free Software Foundation, Inc.
+   Copyright (C) 1999-2018 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
                   Ulrich Weigand (uweigand@de.ibm.com) and
                   Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
@@ -20,6 +20,8 @@
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -324,29 +326,18 @@
   COSTS_N_INSNS (160),   /* DSGR cracked */
 };
 
-static struct
-{
-  /* The preferred name to be used in user visible output.  */
-  const char *const name;
-  /* CPU name as it should be passed to Binutils via .machine  */
-  const char *const binutils_name;
-  const enum processor_type processor;
-  const struct processor_costs *cost;
-}
-const processor_table[] =
-{
-  { "g5",     "g5",     PROCESSOR_9672_G5,     &z900_cost },
-  { "g6",     "g6",     PROCESSOR_9672_G6,     &z900_cost },
-  { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost },
-  { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost },
-  { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
-  { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost },
-  { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost },
-  { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost },
-  { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost },
-  { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost },
-  { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost },
-  { "native", "",       PROCESSOR_NATIVE,      NULL }
+const struct s390_processor processor_table[] =
+{
+  { "z900",   "z900",   PROCESSOR_2064_Z900,   &z900_cost,   5  },
+  { "z990",   "z990",   PROCESSOR_2084_Z990,   &z990_cost,   6  },
+  { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7  },
+  { "z9-ec",  "z9-ec",  PROCESSOR_2094_Z9_EC,  &z9_109_cost, 7  },
+  { "z10",    "z10",    PROCESSOR_2097_Z10,    &z10_cost,    8  },
+  { "z196",   "z196",   PROCESSOR_2817_Z196,   &z196_cost,   9  },
+  { "zEC12",  "zEC12",  PROCESSOR_2827_ZEC12,  &zEC12_cost,  10 },
+  { "z13",    "z13",    PROCESSOR_2964_Z13,    &zEC12_cost,  11 },
+  { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost,  12 },
+  { "native", "",       PROCESSOR_NATIVE,      NULL,         0  }
 };
 
 extern int reload_completed;
@@ -388,6 +379,11 @@
    base and index are registers of the class ADDR_REGS,
    displacement is an unsigned 12-bit immediate constant.  */
 
+/* The max number of insns of backend generated memset/memcpy/memcmp
+   loops.  This value is used in the unroll adjust hook to detect such
+   loops.  Current max is 9 coming from the memcmp loop.  */
+#define BLOCK_MEM_OPS_LOOP_INSNS 9
+
 struct s390_address
 {
   rtx base;
@@ -397,84 +393,6 @@
   bool literal_pool;
 };
 
-/* The following structure is embedded in the machine
-   specific part of struct function.  */
-
-struct GTY (()) s390_frame_layout
-{
-  /* Offset within stack frame.  */
-  HOST_WIDE_INT gprs_offset;
-  HOST_WIDE_INT f0_offset;
-  HOST_WIDE_INT f4_offset;
-  HOST_WIDE_INT f8_offset;
-  HOST_WIDE_INT backchain_offset;
-
-  /* Number of first and last gpr where slots in the register
-     save area are reserved for.  */
-  int first_save_gpr_slot;
-  int last_save_gpr_slot;
-
-  /* Location (FP register number) where GPRs (r0-r15) should
-     be saved to.
-      0 - does not need to be saved at all
-     -1 - stack slot  */
-#define SAVE_SLOT_NONE   0
-#define SAVE_SLOT_STACK -1
-  signed char gpr_save_slots[16];
-
-  /* Number of first and last gpr to be saved, restored.  */
-  int first_save_gpr;
-  int first_restore_gpr;
-  int last_save_gpr;
-  int last_restore_gpr;
-
-  /* Bits standing for floating point registers. Set, if the
-     respective register has to be saved. Starting with reg 16 (f0)
-     at the rightmost bit.
-     Bit 15 14 13 12 11 10  9  8  7  6  5  4  3  2  1  0
-     fpr 15 13 11  9 14 12 10  8  7  5  3  1  6  4  2  0
-     reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16  */
-  unsigned int fpr_bitmap;
-
-  /* Number of floating point registers f8-f15 which must be saved.  */
-  int high_fprs;
-
-  /* Set if return address needs to be saved.
-     This flag is set by s390_return_addr_rtx if it could not use
-     the initial value of r14 and therefore depends on r14 saved
-     to the stack.  */
-  bool save_return_addr_p;
-
-  /* Size of stack frame.  */
-  HOST_WIDE_INT frame_size;
-};
-
-/* Define the structure for the machine field in struct function.  */
-
-struct GTY(()) machine_function
-{
-  struct s390_frame_layout frame_layout;
-
-  /* Literal pool base register.  */
-  rtx base_reg;
-
-  /* True if we may need to perform branch splitting.  */
-  bool split_branches_pending_p;
-
-  bool has_landing_pad_p;
-
-  /* True if the current function may contain a tbegin clobbering
-     FPRs.  */
-  bool tbegin_p;
-
-  /* For -fsplit-stack support: A stack local which holds a pointer to
-     the stack arguments for a function with a variable number of
-     arguments.  This is set at the start of the function and is used
-     to initialize the overflow_arg_area field of the va_list
-     structure.  */
-  rtx split_stack_varargs_pointer;
-};
-
 /* Few accessor macros for struct cfun->machine->s390_frame_layout.  */
 
 #define cfun_frame_layout (cfun->machine->frame_layout)
@@ -502,11 +420,11 @@
 #define CONST_OK_FOR_K(x) \
 	CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
 #define CONST_OK_FOR_Os(x) \
-        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
+	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
 #define CONST_OK_FOR_Op(x) \
-        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
+	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
 #define CONST_OK_FOR_On(x) \
-        CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
+	CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
 
 #define REGNO_PAIR_OK(REGNO, MODE)                               \
   (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
@@ -515,6 +433,33 @@
    bytes on a z10 (or higher) CPU.  */
 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
 
+/* Masks per jump target register indicating which thunk need to be
+   generated.  */
+static GTY(()) int indirect_branch_prez10thunk_mask = 0;
+static GTY(()) int indirect_branch_z10thunk_mask = 0;
+
+#define INDIRECT_BRANCH_NUM_OPTIONS 4
+
+enum s390_indirect_branch_option
+  {
+    s390_opt_indirect_branch_jump = 0,
+    s390_opt_indirect_branch_call,
+    s390_opt_function_return_reg,
+    s390_opt_function_return_mem
+  };
+
+static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 };
+const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \
+  { "LJUMP", "LCALL", "LRETREG", "LRETMEM" };
+const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] =	\
+  { ".s390_indirect_jump", ".s390_indirect_call",
+    ".s390_return_reg", ".s390_return_mem" };
+
+bool
+s390_return_addr_from_memory ()
+{
+  return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
+}
 
 /* Indicate which ABI has been used for passing vector args.
    0 - no vector type arguments have been passed where the ABI is relevant
@@ -974,7 +919,7 @@
 
       /* Record the vector mode used for an element selector.  This assumes:
 	 1. There is no builtin with two different vector modes and an element selector
-         2. The element selector comes after the vector type it is referring to.
+	 2. The element selector comes after the vector type it is referring to.
 	 This currently the true for all the builtins but FIXME we
 	 should better check for that.  */
       if (VECTOR_MODE_P (insn_op->mode))
@@ -1037,7 +982,7 @@
       break;
     case 1:
       if (nonvoid)
-        pat = GEN_FCN (icode) (target, op[0]);
+	pat = GEN_FCN (icode) (target, op[0]);
       else
 	pat = GEN_FCN (icode) (op[0]);
       break;
@@ -1177,11 +1122,85 @@
   return NULL_TREE;
 }
 
+/* Check syntax of function decl attributes having a string type value.  */
+
+static tree
+s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
+			      tree args ATTRIBUTE_UNUSED,
+			      int flags ATTRIBUTE_UNUSED,
+			      bool *no_add_attrs)
+{
+  tree cst;
+
+  if (TREE_CODE (*node) != FUNCTION_DECL)
+    {
+      warning (OPT_Wattributes, "%qE attribute only applies to functions",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  cst = TREE_VALUE (args);
+
+  if (TREE_CODE (cst) != STRING_CST)
+    {
+      warning (OPT_Wattributes,
+	       "%qE attribute requires a string constant argument",
+	       name);
+      *no_add_attrs = true;
+    }
+
+  if (is_attribute_p ("indirect_branch", name)
+      || is_attribute_p ("indirect_branch_call", name)
+      || is_attribute_p ("function_return", name)
+      || is_attribute_p ("function_return_reg", name)
+      || is_attribute_p ("function_return_mem", name))
+    {
+      if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0
+	  && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
+	  && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
+      {
+	warning (OPT_Wattributes,
+		 "argument to %qE attribute is not "
+		 "(keep|thunk|thunk-extern)", name);
+	*no_add_attrs = true;
+      }
+    }
+
+  if (is_attribute_p ("indirect_branch_jump", name)
+      && strcmp (TREE_STRING_POINTER (cst), "keep") != 0
+      && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0
+      && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0
+      && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0)
+    {
+      warning (OPT_Wattributes,
+	       "argument to %qE attribute is not "
+	       "(keep|thunk|thunk-inline|thunk-extern)", name);
+      *no_add_attrs = true;
+    }
+
+  return NULL_TREE;
+}
+
 static const struct attribute_spec s390_attribute_table[] = {
-  { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
-  { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
+  { "hotpatch", 2, 2, true, false, false, false,
+    s390_handle_hotpatch_attribute, NULL },
+  { "s390_vector_bool", 0, 0, false, true, false, true,
+    s390_handle_vectorbool_attribute, NULL },
+  { "indirect_branch", 1, 1, true, false, false, false,
+    s390_handle_string_attribute, NULL },
+  { "indirect_branch_jump", 1, 1, true, false, false, false,
+    s390_handle_string_attribute, NULL },
+  { "indirect_branch_call", 1, 1, true, false, false, false,
+    s390_handle_string_attribute, NULL },
+  { "function_return", 1, 1, true, false, false, false,
+    s390_handle_string_attribute, NULL },
+  { "function_return_reg", 1, 1, true, false, false, false,
+    s390_handle_string_attribute, NULL },
+  { "function_return_mem", 1, 1, true, false, false, false,
+    s390_handle_string_attribute, NULL },
+
   /* End element.  */
-  { NULL,        0, 0, false, false, false, NULL, false }
+  { NULL,        0, 0, false, false, false, false, NULL, NULL }
 };
 
 /* Return the alignment for LABEL.  We default to the -falign-labels
@@ -1208,7 +1227,7 @@
     return 0;
 
  old:
-  return align_labels_log;
+  return align_labels.levels[0].log;
 }
 
 static GTY(()) rtx got_symbol;
@@ -1313,7 +1332,7 @@
     case E_CCZmode:
       if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
 	  || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
-        return m2;
+	return m2;
       return VOIDmode;
 
     case E_CCSmode:
@@ -1373,20 +1392,20 @@
     case E_CCVFHmode:
     case E_CCVFHEmode:
       if (req_mode != set_mode)
-        return 0;
+	return 0;
       break;
 
     case E_CCZmode:
       if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
 	  && req_mode != CCSRmode && req_mode != CCURmode
 	  && req_mode != CCZ1mode)
-        return 0;
+	return 0;
       break;
 
     case E_CCAPmode:
     case E_CCANmode:
       if (req_mode != CCAmode)
-        return 0;
+	return 0;
       break;
 
     default:
@@ -1415,12 +1434,12 @@
 
   if (GET_CODE (PATTERN (insn)) == PARALLEL)
       for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
-        {
-          rtx set = XVECEXP (PATTERN (insn), 0, i);
-          if (GET_CODE (set) == SET)
-            if (!s390_match_ccmode_set (set, req_mode))
-              return false;
-        }
+	{
+	  rtx set = XVECEXP (PATTERN (insn), 0, i);
+	  if (GET_CODE (set) == SET)
+	    if (!s390_match_ccmode_set (set, req_mode))
+	      return false;
+	}
 
   return true;
 }
@@ -1460,7 +1479,7 @@
       bit1 = exact_log2 (INTVAL (op2));
       bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
       if (bit0 != -1 && bit1 != -1)
-        return bit0 > bit1 ? CCT1mode : CCT2mode;
+	return bit0 > bit1 ? CCT1mode : CCT2mode;
     }
 
   return VOIDmode;
@@ -1497,7 +1516,7 @@
 	      {
 		/* Relax CCTmode to CCZmode to allow fall-back to AND
 		   if that turns out to be beneficial.  */
-	        return ccmode == CCTmode ? CCZmode : ccmode;
+		return ccmode == CCTmode ? CCZmode : ccmode;
 	      }
 	  }
 
@@ -1524,11 +1543,11 @@
 	    && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
 	  return CCAPmode;
 
- 	/* If constants are involved in an add instruction it is possible to use
- 	   the resulting cc for comparisons with zero. Knowing the sign of the
+	/* If constants are involved in an add instruction it is possible to use
+	   the resulting cc for comparisons with zero. Knowing the sign of the
 	   constant the overflow behavior gets predictable. e.g.:
- 	     int a, b; if ((b = a + c) > 0)
- 	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
+	     int a, b; if ((b = a + c) > 0)
+	   with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP  */
 	if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
 	    && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
 		|| (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
@@ -1633,8 +1652,8 @@
 	  && (GET_MODE_SIZE (GET_MODE (inner))
 	      >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
 	  && ((INTVAL (mask)
-               & GET_MODE_MASK (GET_MODE (inner))
-               & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
+	       & GET_MODE_MASK (GET_MODE (inner))
+	       & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
 	      == 0))
 	inner = SUBREG_REG (inner);
 
@@ -1719,8 +1738,8 @@
 	{
 	  /* For CCRAWmode put the required cc mask into the second
 	     operand.  */
-        if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
-            && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
+	if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
+	    && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
 	    *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
 	  *op0 = XVECEXP (*op0, 0, 0);
 	  *code = new_code;
@@ -1737,7 +1756,7 @@
       && *op1 == const0_rtx)
     {
       if ((*code == EQ && GET_CODE (*op0) == NE)
-          || (*code == NE && GET_CODE (*op0) == EQ))
+	  || (*code == NE && GET_CODE (*op0) == EQ))
 	*code = EQ;
       else
 	*code = NE;
@@ -1806,6 +1825,21 @@
   return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
 }
 
+/* If MEM is not a legitimate compare-and-swap memory operand, return a new
+   MEM, whose address is a pseudo containing the original MEM's address.  */
+
+static rtx
+s390_legitimize_cs_operand (rtx mem)
+{
+  rtx tmp;
+
+  if (!contains_symbol_ref_p (mem))
+    return mem;
+  tmp = gen_reg_rtx (Pmode);
+  emit_move_insn (tmp, copy_rtx (XEXP (mem, 0)));
+  return change_address (mem, VOIDmode, tmp);
+}
+
 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
    matches CMP.
    Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
@@ -1817,6 +1851,7 @@
 {
   rtx cc;
 
+  mem = s390_legitimize_cs_operand (mem);
   cc = gen_rtx_REG (ccmode, CC_REGNUM);
   switch (GET_MODE (mem))
     {
@@ -1880,65 +1915,65 @@
     case E_CCZmode:
     case E_CCZ1mode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0;
+	{
+	case EQ:	return CC0;
 	case NE:	return CC1 | CC2 | CC3;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCT1mode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC1;
+	{
+	case EQ:	return CC1;
 	case NE:	return CC0 | CC2 | CC3;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCT2mode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC2;
+	{
+	case EQ:	return CC2;
 	case NE:	return CC0 | CC1 | CC3;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCT3mode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC3;
+	{
+	case EQ:	return CC3;
 	case NE:	return CC0 | CC1 | CC2;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCLmode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0 | CC2;
+	{
+	case EQ:	return CC0 | CC2;
 	case NE:	return CC1 | CC3;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCL1mode:
       switch (GET_CODE (code))
-        {
+	{
 	case LTU:	return CC2 | CC3;  /* carry */
 	case GEU:	return CC0 | CC1;  /* no carry */
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCL2mode:
       switch (GET_CODE (code))
-        {
+	{
 	case GTU:	return CC0 | CC1;  /* borrow */
 	case LEU:	return CC2 | CC3;  /* no borrow */
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCL3mode:
@@ -1955,96 +1990,96 @@
 
     case E_CCUmode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0;
-        case NE:	return CC1 | CC2 | CC3;
-        case LTU:	return CC1;
-        case GTU:	return CC2;
-        case LEU:	return CC0 | CC1;
-        case GEU:	return CC0 | CC2;
+	{
+	case EQ:	return CC0;
+	case NE:	return CC1 | CC2 | CC3;
+	case LTU:	return CC1;
+	case GTU:	return CC2;
+	case LEU:	return CC0 | CC1;
+	case GEU:	return CC0 | CC2;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCURmode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0;
-        case NE:	return CC2 | CC1 | CC3;
-        case LTU:	return CC2;
-        case GTU:	return CC1;
-        case LEU:	return CC0 | CC2;
-        case GEU:	return CC0 | CC1;
+	{
+	case EQ:	return CC0;
+	case NE:	return CC2 | CC1 | CC3;
+	case LTU:	return CC2;
+	case GTU:	return CC1;
+	case LEU:	return CC0 | CC2;
+	case GEU:	return CC0 | CC1;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCAPmode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0;
-        case NE:	return CC1 | CC2 | CC3;
-        case LT:	return CC1 | CC3;
-        case GT:	return CC2;
-        case LE:	return CC0 | CC1 | CC3;
-        case GE:	return CC0 | CC2;
+	{
+	case EQ:	return CC0;
+	case NE:	return CC1 | CC2 | CC3;
+	case LT:	return CC1 | CC3;
+	case GT:	return CC2;
+	case LE:	return CC0 | CC1 | CC3;
+	case GE:	return CC0 | CC2;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCANmode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0;
-        case NE:	return CC1 | CC2 | CC3;
-        case LT:	return CC1;
-        case GT:	return CC2 | CC3;
-        case LE:	return CC0 | CC1;
-        case GE:	return CC0 | CC2 | CC3;
+	{
+	case EQ:	return CC0;
+	case NE:	return CC1 | CC2 | CC3;
+	case LT:	return CC1;
+	case GT:	return CC2 | CC3;
+	case LE:	return CC0 | CC1;
+	case GE:	return CC0 | CC2 | CC3;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCSmode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0;
-        case NE:	return CC1 | CC2 | CC3;
-        case LT:	return CC1;
-        case GT:	return CC2;
-        case LE:	return CC0 | CC1;
-        case GE:	return CC0 | CC2;
+	{
+	case EQ:	return CC0;
+	case NE:	return CC1 | CC2 | CC3;
+	case LT:	return CC1;
+	case GT:	return CC2;
+	case LE:	return CC0 | CC1;
+	case GE:	return CC0 | CC2;
 	case UNORDERED:	return CC3;
 	case ORDERED:	return CC0 | CC1 | CC2;
 	case UNEQ:	return CC0 | CC3;
-        case UNLT:	return CC1 | CC3;
-        case UNGT:	return CC2 | CC3;
-        case UNLE:	return CC0 | CC1 | CC3;
-        case UNGE:	return CC0 | CC2 | CC3;
+	case UNLT:	return CC1 | CC3;
+	case UNGT:	return CC2 | CC3;
+	case UNLE:	return CC0 | CC1 | CC3;
+	case UNGE:	return CC0 | CC2 | CC3;
 	case LTGT:	return CC1 | CC2;
 	default:	return -1;
-        }
+	}
       break;
 
     case E_CCSRmode:
       switch (GET_CODE (code))
-        {
-        case EQ:	return CC0;
-        case NE:	return CC2 | CC1 | CC3;
-        case LT:	return CC2;
-        case GT:	return CC1;
-        case LE:	return CC0 | CC2;
-        case GE:	return CC0 | CC1;
+	{
+	case EQ:	return CC0;
+	case NE:	return CC2 | CC1 | CC3;
+	case LT:	return CC2;
+	case GT:	return CC1;
+	case LE:	return CC0 | CC2;
+	case GE:	return CC0 | CC1;
 	case UNORDERED:	return CC3;
 	case ORDERED:	return CC0 | CC2 | CC1;
 	case UNEQ:	return CC0 | CC3;
-        case UNLT:	return CC2 | CC3;
-        case UNGT:	return CC1 | CC3;
-        case UNLE:	return CC0 | CC2 | CC3;
-        case UNGE:	return CC0 | CC1 | CC3;
+	case UNLT:	return CC2 | CC3;
+	case UNGT:	return CC1 | CC3;
+	case UNLE:	return CC0 | CC2 | CC3;
+	case UNGE:	return CC0 | CC1 | CC3;
 	case LTGT:	return CC2 | CC1;
 	default:	return -1;
-        }
+	}
       break;
 
       /* Vector comparison modes.  */
@@ -2496,7 +2531,7 @@
     {
       rtx subreg = operand_subword (dst, first_subword, 0, mode);
       if (reg_overlap_mentioned_p (subreg, src))
-        return false;
+	return false;
     }
 
   return true;
@@ -2596,7 +2631,7 @@
       else if (REG_P (dst))
 	dst = gen_rtx_SUBREG (wmode, dst, 0);
       else
-        dst = gen_reg_rtx (wmode);
+	dst = gen_reg_rtx (wmode);
 
       if (GET_CODE (src1) == SUBREG
 	  && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
@@ -2718,7 +2753,7 @@
   if (GET_CODE (disp) == CONST
       && GET_CODE (XEXP (disp, 0)) == UNSPEC
       && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
-          || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
+	  || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
     return false;
 
   /* All other symbolic constants are literal pool references,
@@ -2729,6 +2764,58 @@
   return false;
 }
 
+/* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp').
+   If successful, also determines the
+   following characteristics of `ref': `is_ptr' - whether it can be an
+   LA argument, `is_base_ptr' - whether the resulting base is a well-known
+   base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is
+   considered a literal pool pointer for purposes of avoiding two different
+   literal pool pointers per insn during or after reload (`B' constraint).  */
+static bool
+s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr,
+				  bool *is_base_ptr, bool *is_pool_ptr)
+{
+  if (!*ref)
+    return true;
+
+  if (GET_CODE (*ref) == UNSPEC)
+    switch (XINT (*ref, 1))
+      {
+      case UNSPEC_LTREF:
+	if (!*disp)
+	  *disp = gen_rtx_UNSPEC (Pmode,
+				  gen_rtvec (1, XVECEXP (*ref, 0, 0)),
+				  UNSPEC_LTREL_OFFSET);
+	else
+	  return false;
+
+	*ref = XVECEXP (*ref, 0, 1);
+	break;
+
+      default:
+	return false;
+      }
+
+  if (!REG_P (*ref) || GET_MODE (*ref) != Pmode)
+    return false;
+
+  if (REGNO (*ref) == STACK_POINTER_REGNUM
+      || REGNO (*ref) == FRAME_POINTER_REGNUM
+      || ((reload_completed || reload_in_progress)
+	  && frame_pointer_needed
+	  && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM)
+      || REGNO (*ref) == ARG_POINTER_REGNUM
+      || (flag_pic
+	  && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM))
+    *is_ptr = *is_base_ptr = true;
+
+  if ((reload_completed || reload_in_progress)
+      && *ref == cfun->machine->base_reg)
+    *is_ptr = *is_base_ptr = *is_pool_ptr = true;
+
+  return true;
+}
+
 /* Decompose a RTL expression ADDR for a memory address into
    its components, returned in OUT.
 
@@ -2840,96 +2927,14 @@
     }
 
   /* Validate base register.  */
-  if (base)
-    {
-      if (GET_CODE (base) == UNSPEC)
-	switch (XINT (base, 1))
-	  {
-	  case UNSPEC_LTREF:
-	    if (!disp)
-	      disp = gen_rtx_UNSPEC (Pmode,
-				     gen_rtvec (1, XVECEXP (base, 0, 0)),
-				     UNSPEC_LTREL_OFFSET);
-	    else
-	      return false;
-
-	    base = XVECEXP (base, 0, 1);
-	    break;
-
-	  case UNSPEC_LTREL_BASE:
-	    if (XVECLEN (base, 0) == 1)
-	      base = fake_pool_base, literal_pool = true;
-	    else
-	      base = XVECEXP (base, 0, 1);
-	    break;
-
-	  default:
-	    return false;
-	  }
-
-      if (!REG_P (base) || GET_MODE (base) != Pmode)
-	return false;
-
-      if (REGNO (base) == STACK_POINTER_REGNUM
-	  || REGNO (base) == FRAME_POINTER_REGNUM
-	  || ((reload_completed || reload_in_progress)
-	      && frame_pointer_needed
-	      && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
-	  || REGNO (base) == ARG_POINTER_REGNUM
-          || (flag_pic
-              && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
-        pointer = base_ptr = true;
-
-      if ((reload_completed || reload_in_progress)
-	  && base == cfun->machine->base_reg)
-        pointer = base_ptr = literal_pool = true;
-    }
+  if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr,
+					 &literal_pool))
+    return false;
 
   /* Validate index register.  */
-  if (indx)
-    {
-      if (GET_CODE (indx) == UNSPEC)
-	switch (XINT (indx, 1))
-	  {
-	  case UNSPEC_LTREF:
-	    if (!disp)
-	      disp = gen_rtx_UNSPEC (Pmode,
-				     gen_rtvec (1, XVECEXP (indx, 0, 0)),
-				     UNSPEC_LTREL_OFFSET);
-	    else
-	      return false;
-
-	    indx = XVECEXP (indx, 0, 1);
-	    break;
-
-	  case UNSPEC_LTREL_BASE:
-	    if (XVECLEN (indx, 0) == 1)
-	      indx = fake_pool_base, literal_pool = true;
-	    else
-	      indx = XVECEXP (indx, 0, 1);
-	    break;
-
-	  default:
-	    return false;
-	  }
-
-      if (!REG_P (indx) || GET_MODE (indx) != Pmode)
-	return false;
-
-      if (REGNO (indx) == STACK_POINTER_REGNUM
-	  || REGNO (indx) == FRAME_POINTER_REGNUM
-	  || ((reload_completed || reload_in_progress)
-	      && frame_pointer_needed
-	      && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
-	  || REGNO (indx) == ARG_POINTER_REGNUM
-          || (flag_pic
-              && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
-        pointer = indx_ptr = true;
-
-      if ((reload_completed || reload_in_progress)
-	  && indx == cfun->machine->base_reg)
-        pointer = indx_ptr = literal_pool = true;
-    }
+  if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr,
+					 &literal_pool))
+    return false;
 
   /* Prefer to use pointer as base, not index.  */
   if (base && indx && !base_ptr
@@ -2972,14 +2977,14 @@
       pointer = true;
 
       /* In the small-PIC case, the linker converts @GOT
-         and @GOTNTPOFF offsets to possible displacements.  */
+	 and @GOTNTPOFF offsets to possible displacements.  */
       if (GET_CODE (disp) == UNSPEC
-          && (XINT (disp, 1) == UNSPEC_GOT
+	  && (XINT (disp, 1) == UNSPEC_GOT
 	      || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
 	  && flag_pic == 1)
-        {
+	{
 	  ;
-        }
+	}
 
       /* Accept pool label offsets.  */
       else if (GET_CODE (disp) == UNSPEC
@@ -2989,7 +2994,7 @@
       /* Accept literal pool references.  */
       else if (GET_CODE (disp) == UNSPEC
 	       && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
-        {
+	{
 	  /* In case CSE pulled a non literal pool reference out of
 	     the pool we have to reject the address.  This is
 	     especially important when loading the GOT pointer on non
@@ -3009,9 +3014,9 @@
 	      if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
 		return false;
 
-              orig_disp = plus_constant (Pmode, orig_disp, offset);
-	    }
-        }
+	      orig_disp = plus_constant (Pmode, orig_disp, offset);
+	    }
+	}
 
       else
 	return false;
@@ -3105,8 +3110,7 @@
    Valid addresses are single references or a sum of a reference and a
    constant integer. Return these parts in SYMREF and ADDEND.  You can
    pass NULL in REF and/or ADDEND if you are not interested in these
-   values.  Literal pool references are *not* considered symbol
-   references.  */
+   values.  */
 
 static bool
 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
@@ -3125,10 +3129,10 @@
       addr = XEXP (addr, 0);
     }
 
-  if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
+  if (GET_CODE (addr) == SYMBOL_REF
       || (GET_CODE (addr) == UNSPEC
 	  && (XINT (addr, 1) == UNSPEC_GOTENT
-	      || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
+	      || XINT (addr, 1) == UNSPEC_PLT)))
     {
       if (symref)
 	*symref = addr;
@@ -3148,6 +3152,7 @@
 static int
 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
 {
+  rtx symref;
   struct s390_address addr;
   bool decomposed = false;
 
@@ -3156,7 +3161,10 @@
 
   /* This check makes sure that no symbolic address (except literal
      pool references) are accepted by the R or T constraints.  */
-  if (s390_loadrelative_operand_p (op, NULL, NULL))
+  if (s390_loadrelative_operand_p (op, &symref, NULL)
+      && (!lit_pool_ok
+          || !SYMBOL_REF_P (symref)
+          || !CONSTANT_POOL_ADDRESS_P (symref)))
     return 0;
 
   /* Ensure literal pool references are only accepted if LIT_POOL_OK.  */
@@ -3384,7 +3392,7 @@
 
 static int
 s390_register_move_cost (machine_mode mode,
-                         reg_class_t from, reg_class_t to)
+			 reg_class_t from, reg_class_t to)
 {
   /* On s390, copy between fprs and gprs is expensive.  */
 
@@ -3572,8 +3580,7 @@
 		  /* mulsidi case: mr, m */
 		  *total = s390_cost->m;
 		else if (GET_CODE (left) == ZERO_EXTEND
-			 && GET_CODE (right) == ZERO_EXTEND
-			 && TARGET_CPU_ZARCH)
+			 && GET_CODE (right) == ZERO_EXTEND)
 		  /* umulsidi case: ml, mlr */
 		  *total = s390_cost->ml;
 		else
@@ -3618,14 +3625,14 @@
 
     case UDIV:
     case UMOD:
-      if (mode == TImode) 	       /* 128 bit division */
+      if (mode == TImode)	       /* 128 bit division */
 	*total = s390_cost->dlgr;
       else if (mode == DImode)
 	{
 	  rtx right = XEXP (x, 1);
 	  if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
 	    *total = s390_cost->dlr;
-	  else 	                               /* 64 by 64 bit division */
+	  else				       /* 64 by 64 bit division */
 	    *total = s390_cost->dlgr;
 	}
       else if (mode == SImode)         /* 32 bit division */
@@ -3642,7 +3649,7 @@
 	      *total = s390_cost->dsgfr;
 	    else
 	      *total = s390_cost->dr;
-	  else 	                               /* 64 by 64 bit division */
+	  else				       /* 64 by 64 bit division */
 	    *total = s390_cost->dsgr;
 	}
       else if (mode == SImode)         /* 32 bit division */
@@ -3881,7 +3888,7 @@
     return 1;
 
   /* Accept immediate LARL operands.  */
-  if (TARGET_CPU_ZARCH && larl_operand (op, mode))
+  if (larl_operand (op, mode))
     return 1;
 
   /* Thread-local symbols are never legal constants.  This is
@@ -3923,7 +3930,7 @@
 
     case SYMBOL_REF:
       /* 'Naked' TLS symbol references are never OK,
-         non-TLS symbols are OK iff we are non-PIC.  */
+	 non-TLS symbols are OK iff we are non-PIC.  */
       if (tls_symbolic_operand (x))
 	return true;
       else
@@ -3955,8 +3962,6 @@
 	/* If the literal pool shares the code section, be put
 	   execute template placeholders into the pool as well.  */
 	case UNSPEC_INSN:
-	  return TARGET_CPU_ZARCH;
-
 	default:
 	  return true;
 	}
@@ -4002,8 +4007,7 @@
     return true;
 
   /* Accept larl operands.  */
-  if (TARGET_CPU_ZARCH
-      && larl_operand (op, VOIDmode))
+  if (larl_operand (op, VOIDmode))
     return true;
 
   /* Accept floating-point zero operands that fit into a single GPR.  */
@@ -4102,7 +4106,7 @@
       case CONST:
 	/* Symrefs cannot be pushed into the literal pool with -fPIC
 	   so we *MUST NOT* return NO_REGS for these cases
-	   (s390_cannot_force_const_mem will return true).  
+	   (s390_cannot_force_const_mem will return true).
 
 	   On the other hand we MUST return NO_REGS for symrefs with
 	   invalid addend which might have been pushed to the literal
@@ -4110,8 +4114,7 @@
 	   handled via secondary reload but this does not happen if
 	   they are used as literal pool slot replacement in reload
 	   inheritance (see emit_input_reload_insns).  */
-	if (TARGET_CPU_ZARCH
-	    && GET_CODE (XEXP (op, 0)) == PLUS
+	if (GET_CODE (XEXP (op, 0)) == PLUS
 	    && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
 	    && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
 	  {
@@ -4124,7 +4127,7 @@
       case LABEL_REF:
       case SYMBOL_REF:
 	if (!legitimate_reload_constant_p (op))
-          return NO_REGS;
+	  return NO_REGS;
 	/* fallthrough */
       case PLUS:
 	/* load address will be used.  */
@@ -4165,7 +4168,7 @@
   if (GET_CODE (symref) == SYMBOL_REF)
     {
       /* We have load-relative instructions for 2-byte, 4-byte, and
-         8-byte alignment so allow only these.  */
+	 8-byte alignment so allow only these.  */
       switch (alignment)
 	{
 	case 8:	return !SYMBOL_FLAG_NOTALIGN8_P (symref);
@@ -4325,10 +4328,10 @@
 	  case E_##M##mode:						\
 	    if (TARGET_64BIT)						\
 	      sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 :	\
-                                  CODE_FOR_reload##m##di_tomem_z10;	\
+				  CODE_FOR_reload##m##di_tomem_z10;	\
 	    else							\
-  	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
-                                  CODE_FOR_reload##m##si_tomem_z10;	\
+	      sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 :	\
+				  CODE_FOR_reload##m##si_tomem_z10;	\
 	  break;
 
 	  switch (GET_MODE (x))
@@ -4494,7 +4497,7 @@
       || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
     {
       /* Otherwise, one of the operands cannot be an address register;
-         we reload its value into the scratch register.  */
+	 we reload its value into the scratch register.  */
       if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
 	{
 	  emit_move_insn (scratch, sum1);
@@ -4507,9 +4510,9 @@
 	}
 
       /* According to the way these invalid addresses are generated
-         in reload.c, it should never happen (at least on s390) that
-         *neither* of the PLUS components, after find_replacements
-         was applied, is an address register.  */
+	 in reload.c, it should never happen (at least on s390) that
+	 *neither* of the PLUS components, after find_replacements
+	 was applied, is an address register.  */
       if (sum1 == scratch && sum2 == scratch)
 	{
 	  debug_rtx (src);
@@ -4598,11 +4601,11 @@
   if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
     return false;
 
-  /* Avoid LA instructions with index register on z196; it is
-     preferable to use regular add instructions when possible.
-     Starting with zEC12 the la with index register is "uncracked"
-     again.  */
-  if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
+  /* Avoid LA instructions with index (and base) register on z196 or
+     later; it is preferable to use regular add instructions when
+     possible.  Starting with zEC12 the la with index register is
+     "uncracked" again but still slower than a regular add.  */
+  if (addr.indx && s390_tune >= PROCESSOR_2817_Z196)
     return false;
 
   if (!TARGET_64BIT && !addr.pointer)
@@ -4691,7 +4694,7 @@
        || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
        || (GET_CODE (addr) == UNSPEC &&
 	   (XINT (addr, 1) == UNSPEC_GOTENT
-	    || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
+	    || XINT (addr, 1) == UNSPEC_PLT)))
       && GET_CODE (addend) == CONST_INT)
     {
       /* This can be locally addressed.  */
@@ -4700,8 +4703,7 @@
       rtx const_addr = (GET_CODE (addr) == UNSPEC ?
 			gen_rtx_CONST (Pmode, addr) : addr);
 
-      if (TARGET_CPU_ZARCH
-	  && larl_operand (const_addr, VOIDmode)
+      if (larl_operand (const_addr, VOIDmode)
 	  && INTVAL (addend) < HOST_WIDE_INT_1 << 31
 	  && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
 	{
@@ -4773,7 +4775,7 @@
 	 that case.  So no need to do it here.  */
 
       if (reg == 0)
-        reg = gen_reg_rtx (Pmode);
+	reg = gen_reg_rtx (Pmode);
 
       if (TARGET_Z10)
 	{
@@ -4787,70 +4789,42 @@
 	  new_rtx = reg;
 	}
       else if (flag_pic == 1)
-        {
-          /* Assume GOT offset is a valid displacement operand (< 4k
-             or < 512k with z990).  This is handled the same way in
-             both 31- and 64-bit code (@GOT).
-             lg <target>, sym@GOT(r12)  */
+	{
+	  /* Assume GOT offset is a valid displacement operand (< 4k
+	     or < 512k with z990).  This is handled the same way in
+	     both 31- and 64-bit code (@GOT).
+	     lg <target>, sym@GOT(r12)  */
 
 	  if (reload_in_progress || reload_completed)
 	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
 
-          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
-          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
-          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
-          new_rtx = gen_const_mem (Pmode, new_rtx);
-          emit_move_insn (reg, new_rtx);
-          new_rtx = reg;
-        }
-      else if (TARGET_CPU_ZARCH)
-        {
-          /* If the GOT offset might be >= 4k, we determine the position
-             of the GOT entry via a PC-relative LARL (@GOTENT).
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
+	  new_rtx = gen_const_mem (Pmode, new_rtx);
+	  emit_move_insn (reg, new_rtx);
+	  new_rtx = reg;
+	}
+      else
+	{
+	  /* If the GOT offset might be >= 4k, we determine the position
+	     of the GOT entry via a PC-relative LARL (@GOTENT).
 	     larl temp, sym@GOTENT
-             lg   <target>, 0(temp) */
-
-          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
+	     lg   <target>, 0(temp) */
+
+	  rtx temp = reg ? reg : gen_reg_rtx (Pmode);
 
 	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
 		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
 
-          new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
-          new_rtx = gen_rtx_CONST (Pmode, new_rtx);
+	  new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
+	  new_rtx = gen_rtx_CONST (Pmode, new_rtx);
 	  emit_move_insn (temp, new_rtx);
-
 	  new_rtx = gen_const_mem (Pmode, temp);
-          emit_move_insn (reg, new_rtx);
-
-          new_rtx = reg;
-        }
-      else
-        {
-          /* If the GOT offset might be >= 4k, we have to load it
-             from the literal pool (@GOT).
-
-	     lg temp, lit-litbase(r13)
-             lg <target>, 0(temp)
-	     lit:  .long sym@GOT  */
-
-          rtx temp = reg ? reg : gen_reg_rtx (Pmode);
-
-	  gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
-		      || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
-
-	  if (reload_in_progress || reload_completed)
-	    df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
-
-          addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
-          addr = gen_rtx_CONST (Pmode, addr);
-          addr = force_const_mem (Pmode, addr);
-          emit_move_insn (temp, addr);
-
-          new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
-          new_rtx = gen_const_mem (Pmode, new_rtx);
-          emit_move_insn (reg, new_rtx);
-          new_rtx = reg;
-        }
+	  emit_move_insn (reg, new_rtx);
+
+	  new_rtx = reg;
+	}
     }
   else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
     {
@@ -4879,36 +4853,10 @@
 	  gcc_unreachable ();
 	  break;
 
-	  /* @PLT is OK as is on 64-bit, must be converted to
-	     GOT-relative @PLTOFF on 31-bit.  */
+	  /* For @PLT larl is used.  This is handled like local
+	     symbol refs.  */
 	case UNSPEC_PLT:
-	  if (!TARGET_CPU_ZARCH)
-	    {
-	      rtx temp = reg? reg : gen_reg_rtx (Pmode);
-
-	      if (reload_in_progress || reload_completed)
-		df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
-
-	      addr = XVECEXP (addr, 0, 0);
-	      addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
-				     UNSPEC_PLTOFF);
-	      if (addend != const0_rtx)
-		addr = gen_rtx_PLUS (Pmode, addr, addend);
-	      addr = gen_rtx_CONST (Pmode, addr);
-	      addr = force_const_mem (Pmode, addr);
-	      emit_move_insn (temp, addr);
-
-	      new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
-	      if (reg != 0)
-		{
-		  s390_load_address (reg, new_rtx);
-		  new_rtx = reg;
-		}
-	    }
-	  else
-	    /* On 64 bit larl can be used.  This case is handled like
-	       local symbol refs.  */
-	    gcc_unreachable ();
+	  gcc_unreachable ();
 	  break;
 
 	  /* Everything else cannot happen.  */
@@ -5065,7 +5013,7 @@
 	    temp = gen_reg_rtx (Pmode);
 	    emit_move_insn (temp, new_rtx);
 	  }
-	else if (TARGET_CPU_ZARCH)
+	else
 	  {
 	    /* If the GOT offset might be >= 4k, we determine the position
 	       of the GOT entry via a PC-relative LARL.  */
@@ -5079,44 +5027,6 @@
 	    temp = gen_reg_rtx (Pmode);
 	    emit_move_insn (temp, new_rtx);
 	  }
-	else if (flag_pic)
-	  {
-	    /* If the GOT offset might be >= 4k, we have to load it
-	       from the literal pool.  */
-
-	    if (reload_in_progress || reload_completed)
-	      df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
-
-	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
-	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
-	    new_rtx = force_const_mem (Pmode, new_rtx);
-	    temp = gen_reg_rtx (Pmode);
-	    emit_move_insn (temp, new_rtx);
-
-            new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
-	    new_rtx = gen_const_mem (Pmode, new_rtx);
-
-	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
-	    temp = gen_reg_rtx (Pmode);
-	    emit_insn (gen_rtx_SET (temp, new_rtx));
-	  }
-	else
-	  {
-	    /* In position-dependent code, load the absolute address of
-	       the GOT entry from the literal pool.  */
-
-	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
-	    new_rtx = gen_rtx_CONST (Pmode, new_rtx);
-	    new_rtx = force_const_mem (Pmode, new_rtx);
-	    temp = gen_reg_rtx (Pmode);
-	    emit_move_insn (temp, new_rtx);
-
-	    new_rtx = temp;
-	    new_rtx = gen_const_mem (Pmode, new_rtx);
-	    new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
-	    temp = gen_reg_rtx (Pmode);
-	    emit_insn (gen_rtx_SET (temp, new_rtx));
-	  }
 
 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
 	if (reg != 0)
@@ -5130,7 +5040,7 @@
 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
 	new_rtx = gen_rtx_CONST (Pmode, new_rtx);
 	new_rtx = force_const_mem (Pmode, new_rtx);
-        temp = gen_reg_rtx (Pmode);
+	temp = gen_reg_rtx (Pmode);
 	emit_move_insn (temp, new_rtx);
 
 	new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
@@ -5150,7 +5060,6 @@
       switch (XINT (XEXP (addr, 0), 1))
 	{
 	case UNSPEC_INDNTPOFF:
-	  gcc_assert (TARGET_CPU_ZARCH);
 	  new_rtx = addr;
 	  break;
 
@@ -5229,9 +5138,9 @@
   else if (flag_pic)
     {
       if (SYMBOLIC_CONST (x)
-          || (GET_CODE (x) == PLUS
-              && (SYMBOLIC_CONST (XEXP (x, 0))
-                  || SYMBOLIC_CONST (XEXP (x, 1)))))
+	  || (GET_CODE (x) == PLUS
+	      && (SYMBOLIC_CONST (XEXP (x, 0))
+		  || SYMBOLIC_CONST (XEXP (x, 1)))))
 	  x = legitimize_pic_address (x, 0);
 
       if (s390_legitimate_address_p (mode, x, FALSE))
@@ -5388,7 +5297,7 @@
 
       mode = GET_MODE (len);
       if (mode == VOIDmode)
-        mode = Pmode;
+	mode = Pmode;
 
       dst_addr = gen_reg_rtx (Pmode);
       src_addr = gen_reg_rtx (Pmode);
@@ -5407,12 +5316,12 @@
       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
 			   OPTAB_DIRECT);
       if (temp != count)
-        emit_move_insn (count, temp);
+	emit_move_insn (count, temp);
 
       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
 			   OPTAB_DIRECT);
       if (temp != blocks)
-        emit_move_insn (blocks, temp);
+	emit_move_insn (blocks, temp);
 
       emit_cmp_and_jump_insns (blocks, const0_rtx,
 			       EQ, NULL_RTX, mode, 1, loop_end_label);
@@ -5446,7 +5355,7 @@
       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
 			   OPTAB_DIRECT);
       if (temp != blocks)
-        emit_move_insn (blocks, temp);
+	emit_move_insn (blocks, temp);
 
       emit_cmp_and_jump_insns (blocks, const0_rtx,
 			       EQ, NULL_RTX, mode, 1, loop_end_label);
@@ -5474,12 +5383,15 @@
 
   /* Expand setmem/clrmem for a constant length operand without a
      loop if it will be shorter that way.
-     With a constant length and without pfd argument a
-     clrmem loop is 32 bytes -> 5.3 * xc
-     setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
+     clrmem loop (with PFD)    is 30 bytes -> 5 * xc
+     clrmem loop (without PFD) is 24 bytes -> 4 * xc
+     setmem loop (with PFD)    is 38 bytes -> ~4 * (mvi/stc + mvc)
+     setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */
   if (GET_CODE (len) == CONST_INT
-      && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
-	  || INTVAL (len) <= 257 * 3)
+      && ((val == const0_rtx
+	   && (INTVAL (len) <= 256 * 4
+	       || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len))))
+	  || (val != const0_rtx && INTVAL (len) <= 257 * 4))
       && (!TARGET_MVCLE || INTVAL (len) <= 256))
     {
       HOST_WIDE_INT o, l;
@@ -5593,12 +5505,11 @@
 
       emit_label (loop_start_label);
 
-      if (TARGET_Z10
-	  && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
-	{
-	  /* Issue a write prefetch for the +4 cache line.  */
-	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
-						     GEN_INT (1024)),
+      if (TARGET_SETMEM_PFD (val, len))
+	{
+	  /* Issue a write prefetch.  */
+	  rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE);
+	  rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance),
 				       const1_rtx, const0_rtx);
 	  emit_insn (prefetch);
 	  PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
@@ -5666,12 +5577,12 @@
   if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
     {
       if (INTVAL (len) > 0)
-        {
-          emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
-          emit_insn (gen_cmpint (target, ccreg));
-        }
-      else
-        emit_move_insn (target, const0_rtx);
+	{
+	  emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
+	  emit_insn (gen_cmpint (target, ccreg));
+	}
+      else
+	emit_move_insn (target, const0_rtx);
     }
   else if (TARGET_MVCLE)
     {
@@ -5688,7 +5599,7 @@
 
       mode = GET_MODE (len);
       if (mode == VOIDmode)
-        mode = Pmode;
+	mode = Pmode;
 
       addr0 = gen_reg_rtx (Pmode);
       addr1 = gen_reg_rtx (Pmode);
@@ -5707,12 +5618,12 @@
       temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
 			   OPTAB_DIRECT);
       if (temp != count)
-        emit_move_insn (count, temp);
+	emit_move_insn (count, temp);
 
       temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
 			   OPTAB_DIRECT);
       if (temp != blocks)
-        emit_move_insn (blocks, temp);
+	emit_move_insn (blocks, temp);
 
       emit_cmp_and_jump_insns (blocks, const0_rtx,
 			       EQ, NULL_RTX, mode, 1, loop_end_label);
@@ -5752,7 +5663,7 @@
       temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
 			   OPTAB_DIRECT);
       if (temp != blocks)
-        emit_move_insn (blocks, temp);
+	emit_move_insn (blocks, temp);
 
       emit_cmp_and_jump_insns (blocks, const0_rtx,
 			       EQ, NULL_RTX, mode, 1, loop_end_label);
@@ -5888,7 +5799,7 @@
 			    highest_index_to_load_reg, str_idx_reg));
 
   add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
-		        profile_probability::very_unlikely ());
+			profile_probability::very_unlikely ());
 
   expand_binop (Pmode, add_optab, str_idx_reg,
 		GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
@@ -6122,7 +6033,7 @@
 
       p = rtvec_alloc (2);
       RTVEC_ELT (p, 0) =
-        gen_rtx_SET (dst, op_res);
+	gen_rtx_SET (dst, op_res);
       RTVEC_ELT (p, 1) =
 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
@@ -6189,7 +6100,7 @@
 					      const0_rtx));
       p = rtvec_alloc (2);
       RTVEC_ELT (p, 0) =
-        gen_rtx_SET (dst, op_res);
+	gen_rtx_SET (dst, op_res);
       RTVEC_ELT (p, 1) =
 	gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
       emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
@@ -6719,11 +6630,16 @@
       return;
     }
 
+  /* Use vector replicate instructions.  vlrep/vrepi/vrep  */
   if (all_same)
     {
-      emit_insn (gen_rtx_SET (target,
-			      gen_rtx_VEC_DUPLICATE (mode,
-						     XVECEXP (vals, 0, 0))));
+      rtx elem = XVECEXP (vals, 0, 0);
+
+      /* vec_splats accepts general_operand as source.  */
+      if (!general_operand (elem, GET_MODE (elem)))
+	elem = force_reg (inner_mode, elem);
+
+      emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem)));
       return;
     }
 
@@ -7020,8 +6936,8 @@
 	  emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
 	}
       s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
-      add_reg_br_prob_note (get_last_insn (), 
-		            profile_probability::very_unlikely ());
+      add_reg_br_prob_note (get_last_insn (),
+			    profile_probability::very_unlikely ());
       /* If the jump is not taken, OUTPUT is the expected value.  */
       cmp = output;
       /* Reload newval to a register manually, *after* the compare and jump
@@ -7288,12 +7204,12 @@
 
   /* Extract the symbol ref from:
      (plus:SI (reg:SI 12 %r12)
-              (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
-	                            UNSPEC_GOTOFF/PLTOFF)))
+	      (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
+				    UNSPEC_GOTOFF/PLTOFF)))
      and
      (plus:SI (reg:SI 12 %r12)
-              (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
-                                             UNSPEC_GOTOFF/PLTOFF)
+	      (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
+					     UNSPEC_GOTOFF/PLTOFF)
 				 (const_int 4 [0x4]))))  */
   if (GET_CODE (x) == PLUS
       && REG_P (XEXP (x, 0))
@@ -7338,7 +7254,7 @@
     {
       /* Extract the symbol ref from:
 	 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
-	                               UNSPEC_PLT/GOTENT)))  */
+				       UNSPEC_PLT/GOTENT)))  */
 
       y = XEXP (x, 0);
       if (GET_CODE (y) == UNSPEC
@@ -7517,10 +7433,11 @@
 	 NOPs.  */
       function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
       if (! DECL_USER_ALIGN (decl))
-	function_alignment = MAX (function_alignment,
-				  (unsigned int) align_functions);
+	function_alignment
+	  = MAX (function_alignment,
+		 (unsigned int) align_functions.levels[0].get_value ());
       fputs ("\t# alignment for hotpatch\n", asm_out_file);
-      ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
+      ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log);
     }
 
   if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
@@ -7652,7 +7569,7 @@
 
   if (ad.base && ad.indx)
     fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
-                              reg_names[REGNO (ad.base)]);
+			      reg_names[REGNO (ad.base)]);
   else if (ad.base)
     fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
 }
@@ -7686,7 +7603,7 @@
     'o': print integer X as if it's an unsigned 32bit word.
     's': "start" of contiguous bitmask X in either DImode or vector inner mode.
     't': CONST_INT: "start" of contiguous bitmask X in SImode.
-         CONST_VECTOR: Generate a bitmask for vgbm instruction.
+	 CONST_VECTOR: Generate a bitmask for vgbm instruction.
     'x': print integer X as if it's an unsigned halfword.
     'v': print register number as vector register (v1 instead of f1).
 */
@@ -7744,7 +7661,7 @@
 
     case 'O':
       {
-        struct s390_address ad;
+	struct s390_address ad;
 	int ret;
 
 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
@@ -7757,16 +7674,16 @@
 	    return;
 	  }
 
-        if (ad.disp)
-          output_addr_const (file, ad.disp);
-        else
-          fprintf (file, "0");
+	if (ad.disp)
+	  output_addr_const (file, ad.disp);
+	else
+	  fprintf (file, "0");
       }
       return;
 
     case 'R':
       {
-        struct s390_address ad;
+	struct s390_address ad;
 	int ret;
 
 	ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
@@ -7779,10 +7696,10 @@
 	    return;
 	  }
 
-        if (ad.base)
-          fprintf (file, "%s", reg_names[REGNO (ad.base)]);
-        else
-          fprintf (file, "0");
+	if (ad.base)
+	  fprintf (file, "%s", reg_names[REGNO (ad.base)]);
+	else
+	  fprintf (file, "0");
       }
       return;
 
@@ -7928,13 +7845,13 @@
 
     case CONST_WIDE_INT:
       if (code == 'b')
-        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
 		 CONST_WIDE_INT_ELT (x, 0) & 0xff);
       else if (code == 'x')
-        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
 		 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
       else if (code == 'h')
-        fprintf (file, HOST_WIDE_INT_PRINT_DEC,
+	fprintf (file, HOST_WIDE_INT_PRINT_DEC,
 		 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
       else
 	{
@@ -8137,7 +8054,7 @@
 	priority = priority << 1;
 	break;
       default:
-        break;
+	break;
     }
   return priority;
 }
@@ -8157,8 +8074,6 @@
       return 3;
     case PROCESSOR_2097_Z10:
       return 2;
-    case PROCESSOR_9672_G5:
-    case PROCESSOR_9672_G6:
     case PROCESSOR_2064_Z900:
       /* Starting with EC12 we use the sched_reorder hook to take care
 	 of instruction dispatch constraints.  The algorithm only
@@ -8256,119 +8171,20 @@
 	}
     }
 
-  /* Annotate LTREL_BASE as well.  */
-  if (GET_CODE (*x) == UNSPEC
-      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
-    {
-      rtx base = cfun->machine->base_reg;
-      *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
-				  UNSPEC_LTREL_BASE);
-      return;
-    }
-
   fmt = GET_RTX_FORMAT (GET_CODE (*x));
   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'e')
-        {
-          annotate_constant_pool_refs (&XEXP (*x, i));
-        }
+	{
+	  annotate_constant_pool_refs (&XEXP (*x, i));
+	}
       else if (fmt[i] == 'E')
-        {
-          for (j = 0; j < XVECLEN (*x, i); j++)
-            annotate_constant_pool_refs (&XVECEXP (*x, i, j));
-        }
-    }
-}
-
-/* Split all branches that exceed the maximum distance.
-   Returns true if this created a new literal pool entry.  */
-
-static int
-s390_split_branches (void)
-{
-  rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
-  int new_literal = 0, ret;
-  rtx_insn *insn;
-  rtx pat, target;
-  rtx *label;
-
-  /* We need correct insn addresses.  */
-
-  shorten_branches (get_insns ());
-
-  /* Find all branches that exceed 64KB, and split them.  */
-
-  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-    {
-      if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
-	continue;
-
-      pat = PATTERN (insn);
-      if (GET_CODE (pat) == PARALLEL)
-	pat = XVECEXP (pat, 0, 0);
-      if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
-	continue;
-
-      if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
-	{
-	  label = &SET_SRC (pat);
-	}
-      else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
-	{
-	  if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
-	    label = &XEXP (SET_SRC (pat), 1);
-          else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
-            label = &XEXP (SET_SRC (pat), 2);
-	  else
-	    continue;
-        }
-      else
-	continue;
-
-      if (get_attr_length (insn) <= 4)
-	continue;
-
-      /* We are going to use the return register as scratch register,
-	 make sure it will be saved/restored by the prologue/epilogue.  */
-      cfun_frame_layout.save_return_addr_p = 1;
-
-      if (!flag_pic)
-	{
-	  new_literal = 1;
-	  rtx mem = force_const_mem (Pmode, *label);
-	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
-						 insn);
-	  INSN_ADDRESSES_NEW (set_insn, -1);
-	  annotate_constant_pool_refs (&PATTERN (set_insn));
-
-	  target = temp_reg;
-	}
-      else
-	{
-	  new_literal = 1;
-	  target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
-				   UNSPEC_LTREL_OFFSET);
-	  target = gen_rtx_CONST (Pmode, target);
-	  target = force_const_mem (Pmode, target);
-	  rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
-						 insn);
-	  INSN_ADDRESSES_NEW (set_insn, -1);
-	  annotate_constant_pool_refs (&PATTERN (set_insn));
-
-          target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
-							cfun->machine->base_reg),
-				   UNSPEC_LTREL_BASE);
-	  target = gen_rtx_PLUS (Pmode, temp_reg, target);
-	}
-
-      ret = validate_change (insn, label, target, 0);
-      gcc_assert (ret);
-    }
-
-  return new_literal;
-}
-
+	{
+	  for (j = 0; j < XVECLEN (*x, i); j++)
+	    annotate_constant_pool_refs (&XVECEXP (*x, i, j));
+	}
+    }
+}
 
 /* Find an annotated literal pool symbol referenced in RTX X,
    and store it at REF.  Will abort if X contains references to
@@ -8384,23 +8200,19 @@
   int i, j;
   const char *fmt;
 
-  /* Ignore LTREL_BASE references.  */
-  if (GET_CODE (x) == UNSPEC
-      && XINT (x, 1) == UNSPEC_LTREL_BASE)
-    return;
   /* Likewise POOL_ENTRY insns.  */
   if (GET_CODE (x) == UNSPEC_VOLATILE
       && XINT (x, 1) == UNSPECV_POOL_ENTRY)
     return;
 
   gcc_assert (GET_CODE (x) != SYMBOL_REF
-              || !CONSTANT_POOL_ADDRESS_P (x));
+	      || !CONSTANT_POOL_ADDRESS_P (x));
 
   if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
     {
       rtx sym = XVECEXP (x, 0, 0);
       gcc_assert (GET_CODE (sym) == SYMBOL_REF
-	          && CONSTANT_POOL_ADDRESS_P (sym));
+		  && CONSTANT_POOL_ADDRESS_P (sym));
 
       if (*ref == NULL_RTX)
 	*ref = sym;
@@ -8414,14 +8226,14 @@
   for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'e')
-        {
-          find_constant_pool_ref (XEXP (x, i), ref);
-        }
+	{
+	  find_constant_pool_ref (XEXP (x, i), ref);
+	}
       else if (fmt[i] == 'E')
-        {
-          for (j = 0; j < XVECLEN (x, i); j++)
-            find_constant_pool_ref (XVECEXP (x, i, j), ref);
-        }
+	{
+	  for (j = 0; j < XVECLEN (x, i); j++)
+	    find_constant_pool_ref (XVECEXP (x, i, j), ref);
+	}
     }
 }
 
@@ -8459,83 +8271,16 @@
   for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
     {
       if (fmt[i] == 'e')
-        {
-          replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
-        }
-      else if (fmt[i] == 'E')
-        {
-          for (j = 0; j < XVECLEN (*x, i); j++)
-            replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
-        }
-    }
-}
-
-/* Check whether X contains an UNSPEC_LTREL_BASE.
-   Return its constant pool symbol if found, NULL_RTX otherwise.  */
-
-static rtx
-find_ltrel_base (rtx x)
-{
-  int i, j;
-  const char *fmt;
-
-  if (GET_CODE (x) == UNSPEC
-      && XINT (x, 1) == UNSPEC_LTREL_BASE)
-    return XVECEXP (x, 0, 0);
-
-  fmt = GET_RTX_FORMAT (GET_CODE (x));
-  for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
-    {
-      if (fmt[i] == 'e')
-        {
-          rtx fnd = find_ltrel_base (XEXP (x, i));
-	  if (fnd)
-	    return fnd;
-        }
+	{
+	  replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
+	}
       else if (fmt[i] == 'E')
-        {
-          for (j = 0; j < XVECLEN (x, i); j++)
-	    {
-              rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
-	      if (fnd)
-		return fnd;
-	    }
-        }
-    }
-
-  return NULL_RTX;
-}
-
-/* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base.  */
-
-static void
-replace_ltrel_base (rtx *x)
-{
-  int i, j;
-  const char *fmt;
-
-  if (GET_CODE (*x) == UNSPEC
-      && XINT (*x, 1) == UNSPEC_LTREL_BASE)
-    {
-      *x = XVECEXP (*x, 0, 1);
-      return;
-    }
-
-  fmt = GET_RTX_FORMAT (GET_CODE (*x));
-  for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
-    {
-      if (fmt[i] == 'e')
-        {
-          replace_ltrel_base (&XEXP (*x, i));
-        }
-      else if (fmt[i] == 'E')
-        {
-          for (j = 0; j < XVECLEN (*x, i); j++)
-            replace_ltrel_base (&XVECEXP (*x, i, j));
-        }
-    }
-}
-
+	{
+	  for (j = 0; j < XVECLEN (*x, i); j++)
+	    replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
+	}
+    }
+}
 
 /* We keep a list of constants which we have to add to internal
    constant tables in the middle of large functions.  */
@@ -8729,37 +8474,29 @@
 static rtx
 s390_execute_label (rtx insn)
 {
-  if (NONJUMP_INSN_P (insn)
+  if (INSN_P (insn)
       && GET_CODE (PATTERN (insn)) == PARALLEL
       && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
-      && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
-    return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
+      && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE
+	  || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP))
+    {
+      if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
+	return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
+      else
+	{
+	  gcc_assert (JUMP_P (insn));
+	  /* For jump insns as execute target:
+	     - There is one operand less in the parallel (the
+	       modification register of the execute is always 0).
+	     - The execute target label is wrapped into an
+	       if_then_else in order to hide it from jump analysis.  */
+	  return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0);
+	}
+    }
 
   return NULL_RTX;
 }
 
-/* Add execute target for INSN to the constant pool POOL.  */
-
-static void
-s390_add_execute (struct constant_pool *pool, rtx insn)
-{
-  struct constant *c;
-
-  for (c = pool->execute; c != NULL; c = c->next)
-    if (INSN_UID (insn) == INSN_UID (c->value))
-      break;
-
-  if (c == NULL)
-    {
-      c = (struct constant *) xmalloc (sizeof *c);
-      c->value = insn;
-      c->label = gen_label_rtx ();
-      c->next = pool->execute;
-      pool->execute = c;
-      pool->size += 6;
-    }
-}
-
 /* Find execute target for INSN in the constant pool POOL.
    Return an RTX describing the distance from the start of
    the pool to the location of the execute target.  */
@@ -8825,17 +8562,11 @@
   int i;
 
   /* Switch to rodata section.  */
-  if (TARGET_CPU_ZARCH)
-    {
-      insn = emit_insn_after (gen_pool_section_start (), insn);
-      INSN_ADDRESSES_NEW (insn, -1);
-    }
+  insn = emit_insn_after (gen_pool_section_start (), insn);
+  INSN_ADDRESSES_NEW (insn, -1);
 
   /* Ensure minimum pool alignment.  */
-  if (TARGET_CPU_ZARCH)
-    insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
-  else
-    insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
+  insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
   INSN_ADDRESSES_NEW (insn, -1);
 
   /* Emit pool base label.  */
@@ -8883,11 +8614,8 @@
     }
 
   /* Switch back to previous section.  */
-  if (TARGET_CPU_ZARCH)
-    {
-      insn = emit_insn_after (gen_pool_section_end (), insn);
-      INSN_ADDRESSES_NEW (insn, -1);
-    }
+  insn = emit_insn_after (gen_pool_section_end (), insn);
+  INSN_ADDRESSES_NEW (insn, -1);
 
   insn = emit_barrier_after (insn);
   INSN_ADDRESSES_NEW (insn, -1);
@@ -8951,11 +8679,7 @@
 	  pool->pool_insn = insn;
 	}
 
-      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
-	{
-	  s390_add_execute (pool, insn);
-	}
-      else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
+      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
 	{
 	  rtx pool_ref = NULL_RTX;
 	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
@@ -9007,6 +8731,8 @@
 s390_mainpool_finish (struct constant_pool *pool)
 {
   rtx base_reg = cfun->machine->base_reg;
+  rtx set;
+  rtx_insn *insn;
 
   /* If the pool is empty, we're done.  */
   if (pool->size == 0)
@@ -9023,95 +8749,39 @@
   /* We need correct insn addresses.  */
   shorten_branches (get_insns ());
 
-  /* On zSeries, we use a LARL to load the pool register.  The pool is
+  /* Use a LARL to load the pool register.  The pool is
      located in the .rodata section, so we emit it after the function.  */
-  if (TARGET_CPU_ZARCH)
-    {
-      rtx set = gen_main_base_64 (base_reg, pool->label);
-      rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
-      INSN_ADDRESSES_NEW (insn, -1);
-      remove_insn (pool->pool_insn);
-
-      insn = get_last_insn ();
-      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
-      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
-
-      s390_dump_pool (pool, 0);
-    }
-
-  /* On S/390, if the total size of the function's code plus literal pool
-     does not exceed 4096 bytes, we use BASR to set up a function base
-     pointer, and emit the literal pool at the end of the function.  */
-  else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
-	   + pool->size + 8 /* alignment slop */ < 4096)
-    {
-      rtx set = gen_main_base_31_small (base_reg, pool->label);
-      rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
-      INSN_ADDRESSES_NEW (insn, -1);
-      remove_insn (pool->pool_insn);
-
-      insn = emit_label_after (pool->label, insn);
-      INSN_ADDRESSES_NEW (insn, -1);
-
-      /* emit_pool_after will be set by s390_mainpool_start to the
-	 last insn of the section where the literal pool should be
-	 emitted.  */
-      insn = pool->emit_pool_after;
-
-      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
-      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
-
-      s390_dump_pool (pool, 1);
-    }
-
-  /* Otherwise, we emit an inline literal pool and use BASR to branch
-     over it, setting up the pool register at the same time.  */
-  else
-    {
-      rtx_code_label *pool_end = gen_label_rtx ();
-
-      rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
-      rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
-      JUMP_LABEL (insn) = pool_end;
-      INSN_ADDRESSES_NEW (insn, -1);
-      remove_insn (pool->pool_insn);
-
-      insn = emit_label_after (pool->label, insn);
-      INSN_ADDRESSES_NEW (insn, -1);
-
-      pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
-      INSN_ADDRESSES_NEW (pool->pool_insn, -1);
-
-      insn = emit_label_after (pool_end, pool->pool_insn);
-      INSN_ADDRESSES_NEW (insn, -1);
-
-      s390_dump_pool (pool, 1);
-    }
-
+  set = gen_main_base_64 (base_reg, pool->label);
+  insn = emit_insn_after (set, pool->pool_insn);
+  INSN_ADDRESSES_NEW (insn, -1);
+  remove_insn (pool->pool_insn);
+
+  insn = get_last_insn ();
+  pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
+  INSN_ADDRESSES_NEW (pool->pool_insn, -1);
+
+  s390_dump_pool (pool, 0);
 
   /* Replace all literal pool references.  */
 
   for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
     {
-      if (INSN_P (insn))
-	replace_ltrel_base (&PATTERN (insn));
-
       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
-        {
-          rtx addr, pool_ref = NULL_RTX;
-          find_constant_pool_ref (PATTERN (insn), &pool_ref);
-          if (pool_ref)
-            {
+	{
+	  rtx addr, pool_ref = NULL_RTX;
+	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
+	  if (pool_ref)
+	    {
 	      if (s390_execute_label (insn))
 		addr = s390_find_execute (pool, insn);
 	      else
 		addr = s390_find_constant (pool, get_pool_constant (pool_ref),
 						 get_pool_mode (pool_ref));
 
-              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
-              INSN_CODE (insn) = -1;
-            }
-        }
+	      replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
+	      INSN_CODE (insn) = -1;
+	    }
+	}
     }
 
 
@@ -9119,18 +8789,6 @@
   s390_free_pool (pool);
 }
 
-/* POOL holds the main literal pool as collected by s390_mainpool_start.
-   We have decided we cannot use this pool, so revert all changes
-   to the current function that were done by s390_mainpool_start.  */
-static void
-s390_mainpool_cancel (struct constant_pool *pool)
-{
-  /* We didn't actually change the instruction stream, so simply
-     free the pool memory.  */
-  s390_free_pool (pool);
-}
-
-
 /* Chunkify the literal pool.  */
 
 #define S390_POOL_CHUNK_MIN	0xc00
@@ -9140,15 +8798,9 @@
 s390_chunkify_start (void)
 {
   struct constant_pool *curr_pool = NULL, *pool_list = NULL;
-  int extra_size = 0;
   bitmap far_labels;
-  rtx pending_ltrel = NULL_RTX;
   rtx_insn *insn;
 
-  rtx (*gen_reload_base) (rtx, rtx) =
-    TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
-
-
   /* We need correct insn addresses.  */
 
   shorten_branches (get_insns ());
@@ -9157,28 +8809,7 @@
 
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     {
-      bool section_switch_p = false;
-
-      /* Check for pending LTREL_BASE.  */
-      if (INSN_P (insn))
-	{
-	  rtx ltrel_base = find_ltrel_base (PATTERN (insn));
-	  if (ltrel_base)
-	    {
-	      gcc_assert (ltrel_base == pending_ltrel);
-	      pending_ltrel = NULL_RTX;
-	    }
-	}
-
-      if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
-	{
-	  if (!curr_pool)
-	    curr_pool = s390_start_pool (&pool_list, insn);
-
-	  s390_add_execute (curr_pool, insn);
-	  s390_add_pool_insn (curr_pool, insn);
-	}
-      else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
+      if (NONJUMP_INSN_P (insn) || CALL_P (insn))
 	{
 	  rtx pool_ref = NULL_RTX;
 	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
@@ -9192,16 +8823,6 @@
 
 	      s390_add_constant (curr_pool, constant, mode);
 	      s390_add_pool_insn (curr_pool, insn);
-
-	      /* Don't split the pool chunk between a LTREL_OFFSET load
-		 and the corresponding LTREL_BASE.  */
-	      if (GET_CODE (constant) == CONST
-		  && GET_CODE (XEXP (constant, 0)) == UNSPEC
-		  && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
-		{
-		  gcc_assert (!pending_ltrel);
-		  pending_ltrel = pool_ref;
-		}
 	    }
 	}
 
@@ -9209,129 +8830,25 @@
 	{
 	  if (curr_pool)
 	    s390_add_pool_insn (curr_pool, insn);
-	  /* An LTREL_BASE must follow within the same basic block.  */
-	  gcc_assert (!pending_ltrel);
-	}
-
-      if (NOTE_P (insn))
-	switch (NOTE_KIND (insn))
-	  {
-	  case NOTE_INSN_SWITCH_TEXT_SECTIONS:
-	    section_switch_p = true;
-	    break;
-	  case NOTE_INSN_VAR_LOCATION:
-	  case NOTE_INSN_CALL_ARG_LOCATION:
-	    continue;
-	  default:
-	    break;
-	  }
+	}
+
+      if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
+	continue;
 
       if (!curr_pool
 	  || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
-          || INSN_ADDRESSES (INSN_UID (insn)) == -1)
+	  || INSN_ADDRESSES (INSN_UID (insn)) == -1)
 	continue;
 
-      if (TARGET_CPU_ZARCH)
-	{
-	  if (curr_pool->size < S390_POOL_CHUNK_MAX)
-	    continue;
-
-	  s390_end_pool (curr_pool, NULL);
-	  curr_pool = NULL;
-	}
-      else
-	{
-          int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
-			   - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
-			 + extra_size;
-
-	  /* We will later have to insert base register reload insns.
-	     Those will have an effect on code size, which we need to
-	     consider here.  This calculation makes rather pessimistic
-	     worst-case assumptions.  */
-	  if (LABEL_P (insn))
-	    extra_size += 6;
-
-	  if (chunk_size < S390_POOL_CHUNK_MIN
-	      && curr_pool->size < S390_POOL_CHUNK_MIN
-	      && !section_switch_p)
-	    continue;
-
-	  /* Pool chunks can only be inserted after BARRIERs ...  */
-	  if (BARRIER_P (insn))
-	    {
-	      s390_end_pool (curr_pool, insn);
-	      curr_pool = NULL;
-	      extra_size = 0;
-	    }
-
-	  /* ... so if we don't find one in time, create one.  */
-          else if (chunk_size > S390_POOL_CHUNK_MAX
-	           || curr_pool->size > S390_POOL_CHUNK_MAX
-		   || section_switch_p)
-	    {
-	      rtx_insn *label, *jump, *barrier, *next, *prev;
-
-	      if (!section_switch_p)
-		{
-		  /* We can insert the barrier only after a 'real' insn.  */
-		  if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
-		    continue;
-		  if (get_attr_length (insn) == 0)
-		    continue;
-		  /* Don't separate LTREL_BASE from the corresponding
-		     LTREL_OFFSET load.  */
-		  if (pending_ltrel)
-		    continue;
-		  next = insn;
-		  do
-		    {
-		      insn = next;
-		      next = NEXT_INSN (insn);
-		    }
-		  while (next
-			 && NOTE_P (next)
-			 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
-			     || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
-		}
-	      else
-		{
-		  gcc_assert (!pending_ltrel);
-
-		  /* The old pool has to end before the section switch
-		     note in order to make it part of the current
-		     section.  */
-		  insn = PREV_INSN (insn);
-		}
-
-	      label = gen_label_rtx ();
-	      prev = insn;
-	      if (prev && NOTE_P (prev))
-		prev = prev_nonnote_insn (prev);
-	      if (prev)
-		jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
-						    INSN_LOCATION (prev));
-	      else
-		jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
-	      barrier = emit_barrier_after (jump);
-	      insn = emit_label_after (label, barrier);
-	      JUMP_LABEL (jump) = label;
-	      LABEL_NUSES (label) = 1;
-
-	      INSN_ADDRESSES_NEW (jump, -1);
-	      INSN_ADDRESSES_NEW (barrier, -1);
-	      INSN_ADDRESSES_NEW (insn, -1);
-
-	      s390_end_pool (curr_pool, barrier);
-	      curr_pool = NULL;
-	      extra_size = 0;
-	    }
-	}
+      if (curr_pool->size < S390_POOL_CHUNK_MAX)
+	continue;
+
+      s390_end_pool (curr_pool, NULL);
+      curr_pool = NULL;
     }
 
   if (curr_pool)
     s390_end_pool (curr_pool, NULL);
-  gcc_assert (!pending_ltrel);
 
   /* Find all labels that are branched into
      from an insn belonging to a different chunk.  */
@@ -9397,8 +8914,8 @@
 
   for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
     {
-      rtx new_insn = gen_reload_base (cfun->machine->base_reg,
-				      curr_pool->label);
+      rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
+					 curr_pool->label);
       rtx_insn *insn = curr_pool->first_insn;
       INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
     }
@@ -9407,13 +8924,13 @@
 
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     if (LABEL_P (insn)
-        && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
+	&& bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
       {
 	struct constant_pool *pool = s390_find_pool (pool_list, insn);
 	if (pool)
 	  {
-	    rtx new_insn = gen_reload_base (cfun->machine->base_reg,
-					    pool->label);
+	    rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg,
+					       pool->label);
 	    INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
 	  }
       }
@@ -9445,19 +8962,16 @@
 
   for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
     {
-      if (INSN_P (insn))
-	replace_ltrel_base (&PATTERN (insn));
-
       curr_pool = s390_find_pool (pool_list, insn);
       if (!curr_pool)
 	continue;
 
       if (NONJUMP_INSN_P (insn) || CALL_P (insn))
-        {
-          rtx addr, pool_ref = NULL_RTX;
-          find_constant_pool_ref (PATTERN (insn), &pool_ref);
-          if (pool_ref)
-            {
+	{
+	  rtx addr, pool_ref = NULL_RTX;
+	  find_constant_pool_ref (PATTERN (insn), &pool_ref);
+	  if (pool_ref)
+	    {
 	      if (s390_execute_label (insn))
 		addr = s390_find_execute (curr_pool, insn);
 	      else
@@ -9465,10 +8979,10 @@
 					   get_pool_constant (pool_ref),
 					   get_pool_mode (pool_ref));
 
-              replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
-              INSN_CODE (insn) = -1;
-            }
-        }
+	      replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
+	      INSN_CODE (insn) = -1;
+	    }
+	}
     }
 
   /* Dump out all literal pools.  */
@@ -9486,66 +9000,6 @@
     }
 }
 
-/* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
-   We have decided we cannot use this list, so revert all changes
-   to the current function that were done by s390_chunkify_start.  */
-
-static void
-s390_chunkify_cancel (struct constant_pool *pool_list)
-{
-  struct constant_pool *curr_pool = NULL;
-  rtx_insn *insn;
-
-  /* Remove all pool placeholder insns.  */
-
-  for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
-    {
-      /* Did we insert an extra barrier?  Remove it.  */
-      rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
-      rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
-      rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
-
-      if (jump && JUMP_P (jump)
-	  && barrier && BARRIER_P (barrier)
-	  && label && LABEL_P (label)
-	  && GET_CODE (PATTERN (jump)) == SET
-	  && SET_DEST (PATTERN (jump)) == pc_rtx
-	  && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
-	  && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
-	{
-	  remove_insn (jump);
-	  remove_insn (barrier);
-	  remove_insn (label);
-	}
-
-      remove_insn (curr_pool->pool_insn);
-    }
-
-  /* Remove all base register reload insns.  */
-
-  for (insn = get_insns (); insn; )
-    {
-      rtx_insn *next_insn = NEXT_INSN (insn);
-
-      if (NONJUMP_INSN_P (insn)
-	  && GET_CODE (PATTERN (insn)) == SET
-	  && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
-	  && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
-	remove_insn (insn);
-
-      insn = next_insn;
-    }
-
-  /* Free pool list.  */
-
-  while (pool_list)
-    {
-      struct constant_pool *next = pool_list->next;
-      s390_free_pool (pool_list);
-      pool_list = next;
-    }
-}
-
 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN.  */
 
 void
@@ -9608,16 +9062,7 @@
      value of RETURN_REGNUM is actually saved.  */
 
   if (count == 0)
-    {
-      /* On non-z architectures branch splitting could overwrite r14.  */
-      if (TARGET_CPU_ZARCH)
-	return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
-      else
-	{
-	  cfun_frame_layout.save_return_addr_p = true;
-	  return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
-	}
-    }
+    return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
 
   if (TARGET_PACKED_STACK)
     offset = -2 * UNITS_PER_LONG;
@@ -10036,7 +9481,6 @@
   clobbered_regs[RETURN_REGNUM]
     |= (!crtl->is_leaf
 	|| TARGET_TPF_PROFILING
-	|| cfun->machine->split_branches_pending_p
 	|| cfun_frame_layout.save_return_addr_p
 	|| crtl->calls_eh_return);
 
@@ -10075,7 +9519,6 @@
   int i;
 
   gcc_assert (epilogue_completed);
-  gcc_assert (!cfun->machine->split_branches_pending_p);
 
   s390_regs_ever_clobbered (clobbered_regs);
 
@@ -10243,20 +9686,12 @@
   if (reload_completed)
     return;
 
-  /* On S/390 machines, we may need to perform branch splitting, which
-     will require both base and return address register.  We have no
-     choice but to assume we're going to need them until right at the
-     end of the machine dependent reorg phase.  */
-  if (!TARGET_CPU_ZARCH)
-    cfun->machine->split_branches_pending_p = true;
-
   do
     {
       frame_size = cfun_frame_layout.frame_size;
 
       /* Try to predict whether we'll need the base register.  */
-      base_used = cfun->machine->split_branches_pending_p
-		  || crtl->uses_const_pool
+      base_used = crtl->uses_const_pool
 		  || (!DISP_IN_RANGE (frame_size)
 		      && !CONST_OK_FOR_K (frame_size));
 
@@ -10708,7 +10143,7 @@
 static bool
 s390_can_eliminate (const int from, const int to)
 {
-  /* On zSeries machines, we have not marked the base register as fixed.
+  /* We have not marked the base register as fixed.
      Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
      If a function requires the base register, we say here that this
      elimination cannot be performed.  This will cause reload to free
@@ -10718,13 +10153,8 @@
      to allocate the base register for any other purpose.  */
   if (from == BASE_REGNUM && to == BASE_REGNUM)
     {
-      if (TARGET_CPU_ZARCH)
-	{
-	  s390_init_frame_layout ();
-	  return cfun->machine->base_reg == NULL_RTX;
-	}
-
-      return false;
+      s390_init_frame_layout ();
+      return cfun->machine->base_reg == NULL_RTX;
     }
 
   /* Everything else must point into the stack frame.  */
@@ -10868,9 +10298,9 @@
   if (first == last)
     {
       if (TARGET_64BIT)
-        insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
-      else
-        insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
+	insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
+      else
+	insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
 
       if (!global_not_special_regno_p (first))
 	RTX_FRAME_RELATED_P (insn) = 1;
@@ -10981,9 +10411,9 @@
   if (first == last)
     {
       if (TARGET_64BIT)
-        insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
-      else
-        insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
+	insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
+      else
+	insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
 
       RTX_FRAME_RELATED_P (insn) = 1;
       return insn;
@@ -11011,27 +10441,7 @@
 
   start_sequence ();
 
-  if (TARGET_CPU_ZARCH)
-    {
-      emit_move_insn (got_rtx, s390_got_symbol ());
-    }
-  else
-    {
-      rtx offset;
-
-      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
-			       UNSPEC_LTREL_OFFSET);
-      offset = gen_rtx_CONST (Pmode, offset);
-      offset = force_const_mem (Pmode, offset);
-
-      emit_move_insn (got_rtx, offset);
-
-      offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
-			       UNSPEC_LTREL_BASE);
-      offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
-
-      emit_move_insn (got_rtx, offset);
-    }
+  emit_move_insn (got_rtx, s390_got_symbol ());
 
   insns = get_insns ();
   end_sequence ();
@@ -11170,6 +10580,12 @@
 
 } // anon namespace
 
+rtl_opt_pass *
+make_pass_s390_early_mach (gcc::context *ctxt)
+{
+  return new pass_s390_early_mach (ctxt);
+}
+
 /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it.
    - push too big immediates to the literal pool and annotate the refs
    - emit frame related notes for stack pointer changes.  */
@@ -11539,7 +10955,7 @@
 					     t, const0_rtx, const0_rtx));
 		}
 	    }
-  	}
+	}
 
       if (s390_warn_framesize > 0
 	  && cfun_frame_layout.frame_size >= s390_warn_framesize)
@@ -11674,10 +11090,9 @@
 void
 s390_emit_epilogue (bool sibcall)
 {
-  rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
+  rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX;
   int area_bottom, area_top, offset = 0;
   int next_offset;
-  rtvec p;
   int i;
 
   if (TARGET_TPF_PROFILING)
@@ -11688,7 +11103,7 @@
 	 algorithms located at the branch target.  */
 
       /* Emit a blockage here so that all code
-         lies between the profiling mechanisms.  */
+	 lies between the profiling mechanisms.  */
       emit_insn (gen_blockage ());
 
       emit_insn (gen_epilogue_tpf ());
@@ -11709,7 +11124,7 @@
       /* Nothing to restore.  */
     }
   else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
-           && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
+	   && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
     {
       /* Area is in range.  */
       offset = cfun_frame_layout.frame_size;
@@ -11784,10 +11199,6 @@
 
     }
 
-  /* Return register.  */
-
-  return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
-
   /* Restore call saved gprs.  */
 
   if (cfun_frame_layout.first_restore_gpr != -1)
@@ -11833,8 +11244,14 @@
 	  && s390_tune <= PROCESSOR_2097_Z10)
 	{
 	  int return_regnum = find_unused_clobbered_reg();
-	  if (!return_regnum)
-	    return_regnum = 4;
+	  if (!return_regnum
+	      || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION
+		  && !TARGET_CPU_Z10
+		  && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM))
+	    {
+	      gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4);
+	      return_regnum = 4;
+	    }
 	  return_reg = gen_rtx_REG (Pmode, return_regnum);
 
 	  addr = plus_constant (Pmode, frame_pointer,
@@ -11872,14 +11289,17 @@
 
   if (! sibcall)
     {
-
-      /* Return to caller.  */
-
-      p = rtvec_alloc (2);
-
-      RTVEC_ELT (p, 0) = ret_rtx;
-      RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
-      emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
+      if (!return_reg && !s390_can_use_return_insn ())
+        /* We planned to emit (return), be we are not allowed to.  */
+        return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
+
+      if (return_reg)
+        /* Emit (return) and (use).  */
+        emit_jump_insn (gen_return_use (return_reg));
+      else
+        /* The fact that RETURN_REGNUM is used is already reflected by
+           EPILOGUE_USES.  Emit plain (return).  */
+        emit_jump_insn (gen_return ());
     }
 }
 
@@ -11927,11 +11347,6 @@
   rtx tmp;
 
   gcc_assert (flag_split_stack && reload_completed);
-  if (!TARGET_CPU_ZARCH)
-    {
-      sorry ("CPUs older than z900 are not supported for -fsplit-stack");
-      return;
-    }
 
   r1 = gen_rtx_REG (Pmode, 1);
 
@@ -12015,8 +11430,8 @@
       LABEL_NUSES (call_done)++;
 
       /* Mark the jump as very unlikely to be taken.  */
-      add_reg_br_prob_note (insn, 
-		            profile_probability::very_unlikely ());
+      add_reg_br_prob_note (insn,
+			    profile_probability::very_unlikely ());
 
       if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
 	{
@@ -12284,7 +11699,7 @@
   if (type)
     {
       if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
-        return true;
+	return true;
 
       if (TREE_CODE (type) == COMPLEX_TYPE
 	  || TREE_CODE (type) == VECTOR_TYPE)
@@ -12457,9 +11872,9 @@
 
 static machine_mode
 s390_promote_function_mode (const_tree type, machine_mode mode,
-                            int *punsignedp,
-                            const_tree fntype ATTRIBUTE_UNUSED,
-                            int for_return ATTRIBUTE_UNUSED)
+			    int *punsignedp,
+			    const_tree fntype ATTRIBUTE_UNUSED,
+			    int for_return ATTRIBUTE_UNUSED)
 {
   if (INTEGRAL_MODE_P (mode)
       && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
@@ -12554,12 +11969,12 @@
    On S/390, va_list is an array type equivalent to
 
       typedef struct __va_list_tag
-        {
-            long __gpr;
-            long __fpr;
-            void *__overflow_arg_area;
-            void *__reg_save_area;
-        } va_list[1];
+	{
+	    long __gpr;
+	    long __fpr;
+	    void *__overflow_arg_area;
+	    void *__reg_save_area;
+	} va_list[1];
 
    where __gpr and __fpr hold the number of general purpose
    or floating point arguments used up to now, respectively,
@@ -12663,14 +12078,14 @@
   if (cfun->va_list_fpr_size)
     {
       t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
-	          build_int_cst (NULL_TREE, n_fpr));
+		  build_int_cst (NULL_TREE, n_fpr));
       TREE_SIDE_EFFECTS (t) = 1;
       expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
     }
 
   if (flag_split_stack
      && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
-         == NULL)
+	 == NULL)
      && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
     {
       rtx reg;
@@ -12698,9 +12113,9 @@
       || TARGET_VX_ABI)
     {
       if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
-        t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
-      else
-        t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
+	t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
+      else
+	t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
 
       off = INTVAL (crtl->args.arg_offset_rtx);
       off = off < 0 ? 0 : off;
@@ -12736,7 +12151,7 @@
 
    if (integral value) {
      if (size  <= 4 && args.gpr < 5 ||
-         size  > 4 && args.gpr < 4 )
+	 size  > 4 && args.gpr < 4 )
        ret = args.reg_save_area[args.gpr+8]
      else
        ret = *args.overflow_arg_area++;
@@ -12872,13 +12287,13 @@
     {
       /*
 	if (reg > ((typeof (reg))max_reg))
-          goto lab_false;
-
-        addr = sav + sav_ofs + reg * save_scale;
+	  goto lab_false;
+
+	addr = sav + sav_ofs + reg * save_scale;
 
 	goto lab_over;
 
-        lab_false:
+	lab_false:
       */
 
       lab_false = create_artificial_label (UNKNOWN_LOCATION);
@@ -12952,11 +12367,11 @@
    expanders.
    DEST  - Register location where CC will be stored.
    TDB   - Pointer to a 256 byte area where to store the transaction.
-           diagnostic block. NULL if TDB is not needed.
+	   diagnostic block. NULL if TDB is not needed.
    RETRY - Retry count value.  If non-NULL a retry loop for CC2
-           is emitted
+	   is emitted
    CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
-                    of the tbegin instruction pattern.  */
+		    of the tbegin instruction pattern.  */
 
 void
 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
@@ -13090,13 +12505,37 @@
   emit_move_insn (mem, fnaddr);
 }
 
+static void
+output_asm_nops (const char *user, int hw)
+{
+  asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw);
+  while (hw > 0)
+    {
+      if (hw >= 3)
+	{
+	  output_asm_insn ("brcl\t0,0", NULL);
+	  hw -= 3;
+	}
+      else if (hw >= 2)
+	{
+	  output_asm_insn ("bc\t0,0", NULL);
+	  hw -= 2;
+	}
+      else
+	{
+	  output_asm_insn ("bcr\t0,0", NULL);
+	  hw -= 1;
+	}
+    }
+}
+
 /* Output assembler code to FILE to increment profiler label # LABELNO
    for profiling a function entry.  */
 
 void
 s390_function_profiler (FILE *file, int labelno)
 {
-  rtx op[7];
+  rtx op[8];
 
   char label[128];
   ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
@@ -13106,62 +12545,72 @@
   op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
   op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
   op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
+  op[7] = GEN_INT (UNITS_PER_LONG);
 
   op[2] = gen_rtx_REG (Pmode, 1);
   op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
   SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
 
-  op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
+  op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount");
   if (flag_pic)
     {
       op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
       op[4] = gen_rtx_CONST (Pmode, op[4]);
     }
 
-  if (TARGET_64BIT)
-    {
-      output_asm_insn ("stg\t%0,%1", op);
-      output_asm_insn ("larl\t%2,%3", op);
-      output_asm_insn ("brasl\t%0,%4", op);
-      output_asm_insn ("lg\t%0,%1", op);
-    }
-  else if (TARGET_CPU_ZARCH)
-    {
-      output_asm_insn ("st\t%0,%1", op);
-      output_asm_insn ("larl\t%2,%3", op);
-      output_asm_insn ("brasl\t%0,%4", op);
-      output_asm_insn ("l\t%0,%1", op);
-    }
-  else if (!flag_pic)
-    {
-      op[6] = gen_label_rtx ();
-
-      output_asm_insn ("st\t%0,%1", op);
-      output_asm_insn ("bras\t%2,%l6", op);
-      output_asm_insn (".long\t%4", op);
-      output_asm_insn (".long\t%3", op);
-      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
-      output_asm_insn ("l\t%0,0(%2)", op);
-      output_asm_insn ("l\t%2,4(%2)", op);
-      output_asm_insn ("basr\t%0,%0", op);
-      output_asm_insn ("l\t%0,%1", op);
-    }
-  else
-    {
-      op[5] = gen_label_rtx ();
-      op[6] = gen_label_rtx ();
-
-      output_asm_insn ("st\t%0,%1", op);
-      output_asm_insn ("bras\t%2,%l6", op);
-      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
-      output_asm_insn (".long\t%4-%l5", op);
-      output_asm_insn (".long\t%3-%l5", op);
-      targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
-      output_asm_insn ("lr\t%0,%2", op);
-      output_asm_insn ("a\t%0,0(%2)", op);
-      output_asm_insn ("a\t%2,4(%2)", op);
-      output_asm_insn ("basr\t%0,%0", op);
-      output_asm_insn ("l\t%0,%1", op);
+  if (flag_record_mcount)
+    fprintf (file, "1:\n");
+
+  if (flag_fentry)
+    {
+      if (flag_nop_mcount)
+	output_asm_nops ("-mnop-mcount", /* brasl */ 3);
+      else if (cfun->static_chain_decl)
+	warning (OPT_Wcannot_profile, "nested functions cannot be profiled "
+		 "with -mfentry on s390");
+      else
+	output_asm_insn ("brasl\t0,%4", op);
+    }
+  else if (TARGET_64BIT)
+    {
+      if (flag_nop_mcount)
+	output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 +
+			 /* brasl */ 3 + /* lg */ 3);
+      else
+	{
+	  output_asm_insn ("stg\t%0,%1", op);
+	  if (flag_dwarf2_cfi_asm)
+	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
+	  output_asm_insn ("larl\t%2,%3", op);
+	  output_asm_insn ("brasl\t%0,%4", op);
+	  output_asm_insn ("lg\t%0,%1", op);
+	  if (flag_dwarf2_cfi_asm)
+	    output_asm_insn (".cfi_restore\t%0", op);
+	}
+    }
+  else
+    {
+      if (flag_nop_mcount)
+	output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 +
+			 /* brasl */ 3 + /* l */ 2);
+      else
+	{
+	  output_asm_insn ("st\t%0,%1", op);
+	  if (flag_dwarf2_cfi_asm)
+	    output_asm_insn (".cfi_rel_offset\t%0,%7", op);
+	  output_asm_insn ("larl\t%2,%3", op);
+	  output_asm_insn ("brasl\t%0,%4", op);
+	  output_asm_insn ("l\t%0,%1", op);
+	  if (flag_dwarf2_cfi_asm)
+	    output_asm_insn (".cfi_restore\t%0", op);
+	}
+    }
+
+  if (flag_record_mcount)
+    {
+      fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n");
+      fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long");
+      fprintf (file, "\t.previous\n");
     }
 }
 
@@ -13277,8 +12726,8 @@
 	    output_asm_insn ("lay\t%1,%2(%1)", op);
 	  else if (CONST_OK_FOR_K (delta))
 	    output_asm_insn ("aghi\t%1,%2", op);
- 	  else if (CONST_OK_FOR_Os (delta))
- 	    output_asm_insn ("agfi\t%1,%2", op);
+	  else if (CONST_OK_FOR_Os (delta))
+	    output_asm_insn ("agfi\t%1,%2", op);
 	  else
 	    {
 	      op[6] = gen_label_rtx ();
@@ -13300,12 +12749,12 @@
 	      output_asm_insn ("ag\t%4,0(%1)", op);
 	      output_asm_insn ("ag\t%1,0(%4)", op);
 	    }
- 	  else if (CONST_OK_FOR_Os (vcall_offset))
- 	    {
- 	      output_asm_insn ("lgfi\t%4,%3", op);
- 	      output_asm_insn ("ag\t%4,0(%1)", op);
- 	      output_asm_insn ("ag\t%1,0(%4)", op);
- 	    }
+	  else if (CONST_OK_FOR_Os (vcall_offset))
+	    {
+	      output_asm_insn ("lgfi\t%4,%3", op);
+	      output_asm_insn ("ag\t%4,0(%1)", op);
+	      output_asm_insn ("ag\t%1,0(%4)", op);
+	    }
 	  else
 	    {
 	      op[7] = gen_label_rtx ();
@@ -13343,10 +12792,10 @@
       /* Setup base pointer if required.  */
       if (!vcall_offset
 	  || (!DISP_IN_RANGE (delta)
-              && !CONST_OK_FOR_K (delta)
+	      && !CONST_OK_FOR_K (delta)
 	      && !CONST_OK_FOR_Os (delta))
 	  || (!DISP_IN_RANGE (delta)
-              && !CONST_OK_FOR_K (vcall_offset)
+	      && !CONST_OK_FOR_K (vcall_offset)
 	      && !CONST_OK_FOR_Os (vcall_offset)))
 	{
 	  op[5] = gen_label_rtx ();
@@ -13365,7 +12814,7 @@
 	  else if (CONST_OK_FOR_K (delta))
 	    output_asm_insn ("ahi\t%1,%2", op);
 	  else if (CONST_OK_FOR_Os (delta))
- 	    output_asm_insn ("afi\t%1,%2", op);
+	    output_asm_insn ("afi\t%1,%2", op);
 	  else
 	    {
 	      op[6] = gen_label_rtx ();
@@ -13375,7 +12824,7 @@
 
       /* Perform vcall adjustment.  */
       if (vcall_offset)
-        {
+	{
 	  if (CONST_OK_FOR_J (vcall_offset))
 	    {
 	      output_asm_insn ("l\t%4,0(%1)", op);
@@ -13393,11 +12842,11 @@
 	      output_asm_insn ("a\t%1,0(%4)", op);
 	    }
 	  else if (CONST_OK_FOR_Os (vcall_offset))
- 	    {
- 	      output_asm_insn ("iilf\t%4,%3", op);
- 	      output_asm_insn ("a\t%4,0(%1)", op);
- 	      output_asm_insn ("a\t%1,0(%4)", op);
- 	    }
+	    {
+	      output_asm_insn ("iilf\t%4,%3", op);
+	      output_asm_insn ("a\t%4,0(%1)", op);
+	      output_asm_insn ("a\t%1,0(%4)", op);
+	    }
 	  else
 	    {
 	      op[7] = gen_label_rtx ();
@@ -13471,6 +12920,112 @@
   final_end_function ();
 }
 
+/* Output either an indirect jump or a an indirect call
+   (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO
+   using a branch trampoline disabling branch target prediction.  */
+
+void
+s390_indirect_branch_via_thunk (unsigned int regno,
+				unsigned int return_addr_regno,
+				rtx comparison_operator,
+				enum s390_indirect_branch_type type)
+{
+  enum s390_indirect_branch_option option;
+
+  if (type == s390_indirect_branch_type_return)
+    {
+      if (s390_return_addr_from_memory ())
+	option = s390_opt_function_return_mem;
+      else
+	option = s390_opt_function_return_reg;
+    }
+  else if (type == s390_indirect_branch_type_jump)
+    option = s390_opt_indirect_branch_jump;
+  else if (type == s390_indirect_branch_type_call)
+    option = s390_opt_indirect_branch_call;
+  else
+    gcc_unreachable ();
+
+  if (TARGET_INDIRECT_BRANCH_TABLE)
+    {
+      char label[32];
+
+      ASM_GENERATE_INTERNAL_LABEL (label,
+				   indirect_branch_table_label[option],
+				   indirect_branch_table_label_no[option]++);
+      ASM_OUTPUT_LABEL (asm_out_file, label);
+    }
+
+  if (return_addr_regno != INVALID_REGNUM)
+    {
+      gcc_assert (comparison_operator == NULL_RTX);
+      fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno);
+    }
+  else
+    {
+      fputs (" \tjg", asm_out_file);
+      if (comparison_operator != NULL_RTX)
+	print_operand (asm_out_file, comparison_operator, 'C');
+
+      fputs ("\t", asm_out_file);
+    }
+
+  if (TARGET_CPU_Z10)
+    fprintf (asm_out_file,
+	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n",
+	     regno);
+  else
+    fprintf (asm_out_file,
+	     TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n",
+	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
+
+  if ((option == s390_opt_indirect_branch_jump
+       && cfun->machine->indirect_branch_jump == indirect_branch_thunk)
+      || (option == s390_opt_indirect_branch_call
+	  && cfun->machine->indirect_branch_call == indirect_branch_thunk)
+      || (option == s390_opt_function_return_reg
+	  && cfun->machine->function_return_reg == indirect_branch_thunk)
+      || (option == s390_opt_function_return_mem
+	  && cfun->machine->function_return_mem == indirect_branch_thunk))
+    {
+      if (TARGET_CPU_Z10)
+	indirect_branch_z10thunk_mask |= (1 << regno);
+      else
+	indirect_branch_prez10thunk_mask |= (1 << regno);
+    }
+}
+
+/* Output an inline thunk for indirect jumps.  EXECUTE_TARGET can
+   either be an address register or a label pointing to the location
+   of the jump instruction.  */
+
+void
+s390_indirect_branch_via_inline_thunk (rtx execute_target)
+{
+  if (TARGET_INDIRECT_BRANCH_TABLE)
+    {
+      char label[32];
+
+      ASM_GENERATE_INTERNAL_LABEL (label,
+				   indirect_branch_table_label[s390_opt_indirect_branch_jump],
+				   indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++);
+      ASM_OUTPUT_LABEL (asm_out_file, label);
+    }
+
+  if (!TARGET_ZARCH)
+    fputs ("\t.machinemode zarch\n", asm_out_file);
+
+  if (REG_P (execute_target))
+    fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target));
+  else
+    output_asm_insn ("\texrl\t%%r0,%0", &execute_target);
+
+  if (!TARGET_ZARCH)
+    fputs ("\t.machinemode esa\n", asm_out_file);
+
+  fputs ("0:\tj\t0b\n", asm_out_file);
+}
+
 static bool
 s390_valid_pointer_mode (scalar_int_mode mode)
 {
@@ -13516,10 +13071,10 @@
 	 named.  This only has an impact on vector argument register
 	 usage none of which is call-saved.  */
       if (pass_by_reference (&cum_v, mode, type, true))
- 	{
- 	  mode = Pmode;
- 	  type = build_pointer_type (type);
- 	}
+	{
+	  mode = Pmode;
+	  type = build_pointer_type (type);
+	}
 
        parm_rtx = s390_function_arg (cum, mode, type, true);
 
@@ -13529,10 +13084,10 @@
 	 continue;
 
        if (REG_P (parm_rtx))
-  	 {
+	 {
 	   for (reg = 0; reg < REG_NREGS (parm_rtx); reg++)
 	     if (!call_used_regs[reg + REGNO (parm_rtx)])
- 	       return true;
+	       return true;
 	 }
 
        if (GET_CODE (parm_rtx) == PARALLEL)
@@ -13572,6 +13127,14 @@
   if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
     return false;
 
+  /* The thunks for indirect branches require r1 if no exrl is
+     available.  r1 might not be available when doing a sibling
+     call.  */
+  if (TARGET_INDIRECT_BRANCH_NOBP_CALL
+      && !TARGET_CPU_Z10
+      && !decl)
+    return false;
+
   /* Register 6 on s390 is available as an argument register but unfortunately
      "caller saved". This makes functions needing this register for arguments
      not suitable for sibcalls.  */
@@ -13596,8 +13159,8 @@
    TLS_CALL the location of the thread-local symbol
    RESULT_REG the register where the result of the call should be stored
    RETADDR_REG the register where the return address should be stored
-               If this parameter is NULL_RTX the call is considered
-               to be a sibling call.  */
+	       If this parameter is NULL_RTX the call is considered
+	       to be a sibling call.  */
 
 rtx_insn *
 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
@@ -13605,17 +13168,21 @@
 {
   bool plt_call = false;
   rtx_insn *insn;
-  rtx call;
-  rtx clobber;
-  rtvec vec;
+  rtx vec[4] = { NULL_RTX };
+  int elts = 0;
+  rtx *call = &vec[0];
+  rtx *clobber_ret_reg = &vec[1];
+  rtx *use = &vec[2];
+  rtx *clobber_thunk_reg = &vec[3];
+  int i;
 
   /* Direct function calls need special treatment.  */
   if (GET_CODE (addr_location) == SYMBOL_REF)
     {
       /* When calling a global routine in PIC mode, we must
-         replace the symbol itself with the PLT stub.  */
+	 replace the symbol itself with the PLT stub.  */
       if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
-        {
+	{
 	  if (TARGET_64BIT || retaddr_reg != NULL_RTX)
 	    {
 	      addr_location = gen_rtx_UNSPEC (Pmode,
@@ -13635,16 +13202,6 @@
 	       optimization is illegal for S/390 so we turn the direct
 	       call into a indirect call again.  */
 	    addr_location = force_reg (Pmode, addr_location);
-        }
-
-      /* Unless we can use the bras(l) insn, force the
-         routine address into a register.  */
-      if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
-        {
-	  if (flag_pic)
-	    addr_location = legitimize_pic_address (addr_location, 0);
-	  else
-	    addr_location = force_reg (Pmode, addr_location);
 	}
     }
 
@@ -13659,26 +13216,58 @@
       addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
     }
 
+  if (TARGET_INDIRECT_BRANCH_NOBP_CALL
+      && GET_CODE (addr_location) != SYMBOL_REF
+      && !plt_call)
+    {
+      /* Indirect branch thunks require the target to be a single GPR.  */
+      addr_location = force_reg (Pmode, addr_location);
+
+      /* Without exrl the indirect branch thunks need an additional
+	 register for larl;ex */
+      if (!TARGET_CPU_Z10)
+	{
+	  *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM);
+	  *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg);
+	}
+    }
+
   addr_location = gen_rtx_MEM (QImode, addr_location);
-  call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
+  *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
 
   if (result_reg != NULL_RTX)
-    call = gen_rtx_SET (result_reg, call);
+    *call = gen_rtx_SET (result_reg, *call);
 
   if (retaddr_reg != NULL_RTX)
     {
-      clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
+      *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
 
       if (tls_call != NULL_RTX)
-	vec = gen_rtvec (3, call, clobber,
-			 gen_rtx_USE (VOIDmode, tls_call));
-      else
-	vec = gen_rtvec (2, call, clobber);
-
-      call = gen_rtx_PARALLEL (VOIDmode, vec);
-    }
-
-  insn = emit_call_insn (call);
+	*use = gen_rtx_USE (VOIDmode, tls_call);
+    }
+
+
+  for (i = 0; i < 4; i++)
+    if (vec[i] != NULL_RTX)
+      elts++;
+
+  if (elts > 1)
+    {
+      rtvec v;
+      int e = 0;
+
+      v = rtvec_alloc (elts);
+      for (i = 0; i < 4; i++)
+	if (vec[i] != NULL_RTX)
+	  {
+	    RTVEC_ELT (v, e) = vec[i];
+	    e++;
+	  }
+
+      *call = gen_rtx_PARALLEL (VOIDmode, v);
+    }
+
+  insn = emit_call_insn (*call);
 
   /* 31-bit PLT stubs and tls calls use the GOT register implicitly.  */
   if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
@@ -13703,13 +13292,10 @@
       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
       call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
     }
-  if (TARGET_CPU_ZARCH)
-    {
-      fixed_regs[BASE_REGNUM] = 0;
-      call_used_regs[BASE_REGNUM] = 0;
-      fixed_regs[RETURN_REGNUM] = 0;
-      call_used_regs[RETURN_REGNUM] = 0;
-    }
+  fixed_regs[BASE_REGNUM] = 0;
+  call_used_regs[BASE_REGNUM] = 0;
+  fixed_regs[RETURN_REGNUM] = 0;
+  call_used_regs[RETURN_REGNUM] = 0;
   if (TARGET_64BIT)
     {
       for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
@@ -13753,7 +13339,7 @@
   emit_move_insn (reg, target);
   emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
   insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
-                                     gen_rtx_REG (Pmode, RETURN_REGNUM));
+				     gen_rtx_REG (Pmode, RETURN_REGNUM));
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
 
@@ -13775,14 +13361,10 @@
      can do, so no point in walking the insn list.  */
 
   if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
-      && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
-      && (TARGET_CPU_ZARCH
-          || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
-              && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
+      && cfun_frame_layout.last_save_gpr >= BASE_REGNUM)
     return;
 
   /* Search for prologue/epilogue insns and replace them.  */
-
   for (insn = get_insns (); insn; insn = next_insn)
     {
       int first, last, off;
@@ -14193,9 +13775,9 @@
 	  && s390_non_addr_reg_read_p (*op0, prev_insn))
 	{
 	  if (REGNO (*op1) == 0)
-	    emit_insn_after (gen_nop1 (), insn);
+	    emit_insn_after (gen_nop_lr1 (), insn);
 	  else
-	    emit_insn_after (gen_nop (), insn);
+	    emit_insn_after (gen_nop_lr0 (), insn);
 	  insn_added_p = true;
 	}
       else
@@ -14358,7 +13940,8 @@
 static void
 s390_reorg (void)
 {
-  bool pool_overflow = false;
+  struct constant_pool *pool;
+  rtx_insn *insn;
   int hw_before, hw_after;
 
   if (s390_tune == PROCESSOR_2964_Z13)
@@ -14369,100 +13952,54 @@
   split_all_insns_noflow ();
 
   /* Install the main literal pool and the associated base
-     register load insns.
-
-     In addition, there are two problematic situations we need
-     to correct:
-
-     - the literal pool might be > 4096 bytes in size, so that
-       some of its elements cannot be directly accessed
-
-     - a branch target might be > 64K away from the branch, so that
-       it is not possible to use a PC-relative instruction.
-
-     To fix those, we split the single literal pool into multiple
+     register load insns.  The literal pool might be > 4096 bytes in
+     size, so that some of its elements cannot be directly accessed.
+
+     To fix this, we split the single literal pool into multiple
      pool chunks, reloading the pool base register at various
      points throughout the function to ensure it always points to
-     the pool chunk the following code expects, and / or replace
-     PC-relative branches by absolute branches.
-
-     However, the two problems are interdependent: splitting the
-     literal pool can move a branch further away from its target,
-     causing the 64K limit to overflow, and on the other hand,
-     replacing a PC-relative branch by an absolute branch means
-     we need to put the branch target address into the literal
-     pool, possibly causing it to overflow.
-
-     So, we loop trying to fix up both problems until we manage
-     to satisfy both conditions at the same time.  Note that the
-     loop is guaranteed to terminate as every pass of the loop
-     strictly decreases the total number of PC-relative branches
-     in the function.  (This is not completely true as there
-     might be branch-over-pool insns introduced by chunkify_start.
-     Those never need to be split however.)  */
-
-  for (;;)
-    {
-      struct constant_pool *pool = NULL;
-
-      /* Collect the literal pool.  */
-      if (!pool_overflow)
-	{
-	  pool = s390_mainpool_start ();
-	  if (!pool)
-	    pool_overflow = true;
-	}
-
-      /* If literal pool overflowed, start to chunkify it.  */
-      if (pool_overflow)
-        pool = s390_chunkify_start ();
-
-      /* Split out-of-range branches.  If this has created new
-	 literal pool entries, cancel current chunk list and
-	 recompute it.  zSeries machines have large branch
-	 instructions, so we never need to split a branch.  */
-      if (!TARGET_CPU_ZARCH && s390_split_branches ())
-        {
-          if (pool_overflow)
-            s390_chunkify_cancel (pool);
-	  else
-            s390_mainpool_cancel (pool);
-
-          continue;
-        }
-
-      /* If we made it up to here, both conditions are satisfied.
-	 Finish up literal pool related changes.  */
-      if (pool_overflow)
-	s390_chunkify_finish (pool);
-      else
-	s390_mainpool_finish (pool);
-
-      /* We're done splitting branches.  */
-      cfun->machine->split_branches_pending_p = false;
-      break;
+     the pool chunk the following code expects.  */
+
+  /* Collect the literal pool.  */
+  pool = s390_mainpool_start ();
+  if (pool)
+    {
+      /* Finish up literal pool related changes.  */
+      s390_mainpool_finish (pool);
+    }
+  else
+    {
+      /* If literal pool overflowed, chunkify it.  */
+      pool = s390_chunkify_start ();
+      s390_chunkify_finish (pool);
     }
 
   /* Generate out-of-pool execute target insns.  */
-  if (TARGET_CPU_ZARCH)
-    {
-      rtx_insn *insn, *target;
+  for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
       rtx label;
-
-      for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
-	{
-	  label = s390_execute_label (insn);
-	  if (!label)
-	    continue;
-
-	  gcc_assert (label != const0_rtx);
-
-	  target = emit_label (XEXP (label, 0));
-	  INSN_ADDRESSES_NEW (target, -1);
-
-	  target = emit_insn (s390_execute_target (insn));
-	  INSN_ADDRESSES_NEW (target, -1);
-	}
+      rtx_insn *target;
+
+      label = s390_execute_label (insn);
+      if (!label)
+	continue;
+
+      gcc_assert (label != const0_rtx);
+
+      target = emit_label (XEXP (label, 0));
+      INSN_ADDRESSES_NEW (target, -1);
+
+      if (JUMP_P (insn))
+	{
+	  target = emit_jump_insn (s390_execute_target (insn));
+	  /* This is important in order to keep a table jump
+	     pointing at the jump table label.  Only this makes it
+	     being recognized as table jump.  */
+	  JUMP_LABEL (target) = JUMP_LABEL (insn);
+	}
+      else
+	target = emit_insn (s390_execute_target (insn));
+      INSN_ADDRESSES_NEW (target, -1);
     }
 
   /* Try to optimize prologue and epilogue further.  */
@@ -14519,7 +14056,7 @@
       /* Output a series of NOPs before the first active insn.  */
       while (insn && hw_after > 0)
 	{
-	  if (hw_after >= 3 && TARGET_CPU_ZARCH)
+	  if (hw_after >= 3)
 	    {
 	      emit_insn_before (gen_nop_6_byte (), insn);
 	      hw_after -= 3;
@@ -15100,9 +14637,29 @@
   for (i = 0; i < loop->num_nodes; i++)
     FOR_BB_INSNS (bbs[i], insn)
       if (INSN_P (insn) && INSN_CODE (insn) != -1)
-	FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
-	  if (MEM_P (*iter))
-	    mem_count += 1;
+	{
+	  rtx set;
+
+	  /* The runtime of small loops with memory block operations
+	     will be determined by the memory operation.  Doing
+	     unrolling doesn't help here.  Measurements to confirm
+	     this where only done on recent CPU levels.  So better do
+	     not change anything for older CPUs.  */
+	  if (s390_tune >= PROCESSOR_2964_Z13
+	      && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS
+	      && ((set = single_set (insn)) != NULL_RTX)
+	      && ((GET_MODE (SET_DEST (set)) == BLKmode
+		   && (GET_MODE (SET_SRC (set)) == BLKmode
+		       || SET_SRC (set) == const0_rtx))
+		  || (GET_CODE (SET_SRC (set)) == COMPARE
+		      && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode
+		      && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode)))
+	    return 1;
+
+	  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
+	    if (MEM_P (*iter))
+	      mem_count += 1;
+	}
   free (bbs);
 
   /* Prevent division by zero, and we do not need to adjust nunroll in this case.  */
@@ -15131,27 +14688,25 @@
 }
 
 static void
-s390_option_override_internal (bool main_args_p,
-			       struct gcc_options *opts,
+s390_default_align (struct gcc_options *opts)
+{
+  /* Set the default function alignment to 16 in order to get rid of
+     some unwanted performance effects. */
+  if (opts->x_flag_align_functions && !opts->x_str_align_functions
+      && opts->x_s390_tune >= PROCESSOR_2964_Z13)
+    opts->x_str_align_functions = "16";
+}
+
+static void
+s390_override_options_after_change (void)
+{
+  s390_default_align (&global_options);
+}
+
+static void
+s390_option_override_internal (struct gcc_options *opts,
 			       const struct gcc_options *opts_set)
 {
-  const char *prefix;
-  const char *suffix;
-
-  /* Set up prefix/suffix so the error messages refer to either the command
-     line argument, or the attribute(target).  */
-  if (main_args_p)
-    {
-      prefix = "-m";
-      suffix = "";
-    }
-  else
-    {
-      prefix = "option(\"";
-      suffix = "\")";
-    }
-
-
   /* Architecture mode defaults according to ABI.  */
   if (!(opts_set->x_target_flags & MASK_ZARCH))
     {
@@ -15164,24 +14719,12 @@
   /* Set the march default in case it hasn't been specified on cmdline.  */
   if (!opts_set->x_s390_arch)
     opts->x_s390_arch = PROCESSOR_2064_Z900;
-  else if (opts->x_s390_arch == PROCESSOR_9672_G5
-	   || opts->x_s390_arch == PROCESSOR_9672_G6)
-    warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
-	     "in future releases; use at least %sarch=z900%s",
-	     prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
-	     suffix, prefix, suffix);
 
   opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
 
   /* Determine processor to tune for.  */
   if (!opts_set->x_s390_tune)
     opts->x_s390_tune = opts->x_s390_arch;
-  else if (opts->x_s390_tune == PROCESSOR_9672_G5
-	   || opts->x_s390_tune == PROCESSOR_9672_G6)
-    warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
-	     "in future releases; use at least %stune=z900%s",
-	     prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
-	     suffix, prefix, suffix);
 
   opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
 
@@ -15189,12 +14732,34 @@
   if (opts->x_s390_arch == PROCESSOR_NATIVE
       || opts->x_s390_tune == PROCESSOR_NATIVE)
     gcc_unreachable ();
-  if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
-    error ("z/Architecture mode not supported on %s",
-	   processor_table[(int)opts->x_s390_arch].name);
   if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
     error ("64-bit ABI not supported in ESA/390 mode");
 
+  if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline
+      || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline
+      || opts->x_s390_function_return == indirect_branch_thunk_inline
+      || opts->x_s390_function_return_reg == indirect_branch_thunk_inline
+      || opts->x_s390_function_return_mem == indirect_branch_thunk_inline)
+    error ("thunk-inline is only supported with -mindirect-branch-jump");
+
+  if (opts->x_s390_indirect_branch != indirect_branch_keep)
+    {
+      if (!opts_set->x_s390_indirect_branch_call)
+	opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch;
+
+      if (!opts_set->x_s390_indirect_branch_jump)
+	opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch;
+    }
+
+  if (opts->x_s390_function_return != indirect_branch_keep)
+    {
+      if (!opts_set->x_s390_function_return_reg)
+	opts->x_s390_function_return_reg = opts->x_s390_function_return;
+
+      if (!opts_set->x_s390_function_return_mem)
+	opts->x_s390_function_return_mem = opts->x_s390_function_return;
+    }
+
   /* Enable hardware transactions if available and not explicitly
      disabled by user.  E.g. with -m31 -march=zEC12 -mzarch */
   if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
@@ -15278,6 +14843,17 @@
   else if (opts->x_s390_stack_guard)
     error ("-mstack-guard implies use of -mstack-size");
 
+  /* Our implementation of the stack probe requires the probe interval
+     to be used as displacement in an address operand.  The maximum
+     probe interval currently is 64k.  This would exceed short
+     displacements.  Trim that value down to 4k if that happens.  This
+     might result in too many probes being generated only on the
+     oldest supported machine level z900.  */
+  if (!DISP_IN_RANGE ((1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL))))
+    set_param_value ("stack-clash-protection-probe-interval", 12,
+		     opts->x_param_values,
+		     opts_set->x_param_values);
+
 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
   if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
     opts->x_target_flags |= MASK_LONG_DOUBLE_128;
@@ -15324,16 +14900,25 @@
 
   /* Use the alternative scheduling-pressure algorithm by default.  */
   maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
-                         opts->x_param_values,
-                         opts_set->x_param_values);
+			 opts->x_param_values,
+			 opts_set->x_param_values);
 
   maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
 			 opts->x_param_values,
 			 opts_set->x_param_values);
 
+  /* Set the default alignment.  */
+  s390_default_align (opts);
+
   /* Call target specific restore function to do post-init work.  At the moment,
      this just sets opts->x_s390_cost_pointer.  */
   s390_function_specific_restore (opts, NULL);
+
+  /* Check whether -mfentry is supported. It cannot be used in 31-bit mode,
+     because 31-bit PLT stubs assume that %r12 contains GOT address, which is
+     not the case when the code runs before the prolog. */
+  if (opts->x_flag_fentry && !TARGET_64BIT)
+    error ("-mfentry is supported only for 64-bit CPUs");
 }
 
 static void
@@ -15353,16 +14938,11 @@
 	    {
 	      int val1;
 	      int val2;
-	      char s[256];
-	      char *t;
-
-	      strncpy (s, opt->arg, 256);
-	      s[255] = 0;
-	      t = strchr (s, ',');
+	      char *s = strtok (ASTRDUP (opt->arg), ",");
+	      char *t = strtok (NULL, "\0");
+
 	      if (t != NULL)
 		{
-		  *t = 0;
-		  t++;
 		  val1 = integral_argument (s);
 		  val2 = integral_argument (t);
 		}
@@ -15397,7 +14977,7 @@
   /* Set up function hooks.  */
   init_machine_status = s390_init_machine_status;
 
-  s390_option_override_internal (true, &global_options, &global_options_set);
+  s390_option_override_internal (&global_options, &global_options_set);
 
   /* Save the initial options in case the user does function specific
      options.  */
@@ -15417,26 +14997,11 @@
     {
       /* Don't emit DWARF3/4 unless specifically selected.  The TPF
 	 debuggers do not yet support DWARF 3/4.  */
-      if (!global_options_set.x_dwarf_strict) 
+      if (!global_options_set.x_dwarf_strict)
 	dwarf_strict = 1;
       if (!global_options_set.x_dwarf_version)
 	dwarf_version = 2;
     }
-
-  /* Register a target-specific optimization-and-lowering pass
-     to run immediately before prologue and epilogue generation.
-
-     Registering the pass must be done at start up.  It's
-     convenient to do it here.  */
-  opt_pass *new_pass = new pass_s390_early_mach (g);
-  struct register_pass_info insert_pass_s390_early_mach =
-    {
-      new_pass,			/* pass */
-      "pro_and_epilogue",	/* reference_pass_name */
-      1,			/* ref_pass_instance_number */
-      PASS_POS_INSERT_BEFORE	/* po_op */
-    };
-  register_pass (&insert_pass_s390_early_mach);
 }
 
 #if S390_USE_TARGET_ATTRIBUTE
@@ -15702,7 +15267,7 @@
 	dest[i] |= src[i];
 
       /* Do any overrides, such as arch=xxx, or tune=xxx support.  */
-      s390_option_override_internal (false, opts, &new_opts_set);
+      s390_option_override_internal (opts, &new_opts_set);
       /* Save the current options unless we are validating options for
 	 #pragma.  */
       t = build_target_option_node (opts);
@@ -15812,6 +15377,78 @@
   return ret;
 }
 
+/* Set VAL to correct enum value according to the indirect-branch or
+   function-return attribute in ATTR.  */
+
+static inline void
+s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val)
+{
+  const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr)));
+  if (strcmp (str, "keep") == 0)
+    *val = indirect_branch_keep;
+  else if (strcmp (str, "thunk") == 0)
+    *val = indirect_branch_thunk;
+  else if (strcmp (str, "thunk-inline") == 0)
+    *val = indirect_branch_thunk_inline;
+  else if (strcmp (str, "thunk-extern") == 0)
+    *val = indirect_branch_thunk_extern;
+}
+
+/* Memorize the setting for -mindirect-branch* and -mfunction-return*
+   from either the cmdline or the function attributes in
+   cfun->machine.  */
+
+static void
+s390_indirect_branch_settings (tree fndecl)
+{
+  tree attr;
+
+  if (!fndecl)
+    return;
+
+  /* Initialize with the cmdline options and let the attributes
+     override it.  */
+  cfun->machine->indirect_branch_jump = s390_indirect_branch_jump;
+  cfun->machine->indirect_branch_call = s390_indirect_branch_call;
+
+  cfun->machine->function_return_reg = s390_function_return_reg;
+  cfun->machine->function_return_mem = s390_function_return_mem;
+
+  if ((attr = lookup_attribute ("indirect_branch",
+				DECL_ATTRIBUTES (fndecl))))
+    {
+      s390_indirect_branch_attrvalue (attr,
+				      &cfun->machine->indirect_branch_jump);
+      s390_indirect_branch_attrvalue (attr,
+				      &cfun->machine->indirect_branch_call);
+    }
+
+  if ((attr = lookup_attribute ("indirect_branch_jump",
+				DECL_ATTRIBUTES (fndecl))))
+    s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump);
+
+  if ((attr = lookup_attribute ("indirect_branch_call",
+				DECL_ATTRIBUTES (fndecl))))
+    s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call);
+
+  if ((attr = lookup_attribute ("function_return",
+				DECL_ATTRIBUTES (fndecl))))
+    {
+      s390_indirect_branch_attrvalue (attr,
+				      &cfun->machine->function_return_reg);
+      s390_indirect_branch_attrvalue (attr,
+				      &cfun->machine->function_return_mem);
+    }
+
+  if ((attr = lookup_attribute ("function_return_reg",
+				DECL_ATTRIBUTES (fndecl))))
+    s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg);
+
+  if ((attr = lookup_attribute ("function_return_mem",
+				DECL_ATTRIBUTES (fndecl))))
+    s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem);
+}
+
 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
    cache.  */
 
@@ -15838,7 +15475,10 @@
      several times in the course of compiling a function, and we don't want to
      slow things down too much or call target_reinit when it isn't safe.  */
   if (fndecl == s390_previous_fndecl)
-    return;
+    {
+      s390_indirect_branch_settings (fndecl);
+      return;
+    }
 
   tree old_tree;
   if (s390_previous_fndecl == NULL_TREE)
@@ -15862,6 +15502,8 @@
   if (old_tree != new_tree)
     s390_activate_target_options (new_tree);
   s390_previous_fndecl = fndecl;
+
+  s390_indirect_branch_settings (fndecl);
 }
 #endif
 
@@ -16160,6 +15802,184 @@
   return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
 }
 
+#ifdef HAVE_GAS_HIDDEN
+# define USE_HIDDEN_LINKONCE 1
+#else
+# define USE_HIDDEN_LINKONCE 0
+#endif
+
+/* Output an indirect branch trampoline for target register REGNO.  */
+
+static void
+s390_output_indirect_thunk_function (unsigned int regno, bool z10_p)
+{
+  tree decl;
+  char thunk_label[32];
+  int i;
+
+  if (z10_p)
+    sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno);
+  else
+    sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX,
+	     INDIRECT_BRANCH_THUNK_REGNUM, regno);
+
+  decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
+		     get_identifier (thunk_label),
+		     build_function_type_list (void_type_node, NULL_TREE));
+  DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
+				   NULL_TREE, void_type_node);
+  TREE_PUBLIC (decl) = 1;
+  TREE_STATIC (decl) = 1;
+  DECL_IGNORED_P (decl) = 1;
+
+  if (USE_HIDDEN_LINKONCE)
+    {
+      cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
+
+      targetm.asm_out.unique_section (decl, 0);
+      switch_to_section (get_named_section (decl, NULL, 0));
+
+      targetm.asm_out.globalize_label (asm_out_file, thunk_label);
+      fputs ("\t.hidden\t", asm_out_file);
+      assemble_name (asm_out_file, thunk_label);
+      putc ('\n', asm_out_file);
+      ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl);
+    }
+  else
+    {
+      switch_to_section (text_section);
+      ASM_OUTPUT_LABEL (asm_out_file, thunk_label);
+    }
+
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  current_function_decl = decl;
+  allocate_struct_function (decl, false);
+  init_function_start (decl);
+  cfun->is_thunk = true;
+  first_function_block_is_cold = false;
+  final_start_function (emit_barrier (), asm_out_file, 1);
+
+  /* This makes CFI at least usable for indirect jumps.
+
+     Stopping in the thunk: backtrace will point to the thunk target
+     is if it was interrupted by a signal.  For a call this means that
+     the call chain will be: caller->callee->thunk   */
+  if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm)
+    {
+      fputs ("\t.cfi_signal_frame\n", asm_out_file);
+      fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno);
+      for (i = 0; i < FPR15_REGNUM; i++)
+	fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]);
+    }
+
+  if (z10_p)
+    {
+      /* exrl  0,1f  */
+
+      /* We generate a thunk for z10 compiled code although z10 is
+	 currently not enabled.  Tell the assembler to accept the
+	 instruction.  */
+      if (!TARGET_CPU_Z10)
+	{
+	  fputs ("\t.machine push\n", asm_out_file);
+	  fputs ("\t.machine z10\n", asm_out_file);
+	}
+      /* We use exrl even if -mzarch hasn't been specified on the
+	 command line so we have to tell the assembler to accept
+	 it.  */
+      if (!TARGET_ZARCH)
+	fputs ("\t.machinemode zarch\n", asm_out_file);
+
+      fputs ("\texrl\t0,1f\n", asm_out_file);
+
+      if (!TARGET_ZARCH)
+	fputs ("\t.machinemode esa\n", asm_out_file);
+
+      if (!TARGET_CPU_Z10)
+	fputs ("\t.machine pop\n", asm_out_file);
+    }
+  else
+    {
+      /* larl %r1,1f  */
+      fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n",
+	       INDIRECT_BRANCH_THUNK_REGNUM);
+
+      /* ex 0,0(%r1)  */
+      fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n",
+	       INDIRECT_BRANCH_THUNK_REGNUM);
+    }
+
+  /* 0:    j 0b  */
+  fputs ("0:\tj\t0b\n", asm_out_file);
+
+  /* 1:    br <regno>  */
+  fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno);
+
+  final_end_function ();
+  init_insn_lengths ();
+  free_after_compilation (cfun);
+  set_cfun (NULL);
+  current_function_decl = NULL;
+}
+
+/* Implement the asm.code_end target hook.  */
+
+static void
+s390_code_end (void)
+{
+  int i;
+
+  for (i = 1; i < 16; i++)
+    {
+      if (indirect_branch_z10thunk_mask & (1 << i))
+	s390_output_indirect_thunk_function (i, true);
+
+      if (indirect_branch_prez10thunk_mask & (1 << i))
+	s390_output_indirect_thunk_function (i, false);
+    }
+
+  if (TARGET_INDIRECT_BRANCH_TABLE)
+    {
+      int o;
+      int i;
+
+      for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++)
+	{
+	  if (indirect_branch_table_label_no[o] == 0)
+	    continue;
+
+	  switch_to_section (get_section (indirect_branch_table_name[o],
+					  0,
+					  NULL_TREE));
+	  for (i = 0; i < indirect_branch_table_label_no[o]; i++)
+	    {
+	      char label_start[32];
+
+	      ASM_GENERATE_INTERNAL_LABEL (label_start,
+					   indirect_branch_table_label[o], i);
+
+	      fputs ("\t.long\t", asm_out_file);
+	      assemble_name_raw (asm_out_file, label_start);
+	      fputs ("-.\n", asm_out_file);
+	    }
+	  switch_to_section (current_function_section ());
+	}
+    }
+}
+
+/* Implement the TARGET_CASE_VALUES_THRESHOLD target hook.  */
+
+unsigned int
+s390_case_values_threshold (void)
+{
+  /* Disabling branch prediction for indirect jumps makes jump tables
+     much more expensive.  */
+  if (TARGET_INDIRECT_BRANCH_NOBP_JUMP)
+    return 20;
+
+  return default_case_values_threshold ();
+}
+
 /* Initialize GCC target structure.  */
 
 #undef  TARGET_ASM_ALIGNED_HI_OP
@@ -16271,6 +16091,9 @@
 #undef TARGET_PASS_BY_REFERENCE
 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
 
+#undef  TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
+#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change
+
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
 #undef TARGET_FUNCTION_ARG
@@ -16442,6 +16265,12 @@
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment
 
+#undef TARGET_ASM_CODE_END
+#define TARGET_ASM_CODE_END s390_code_end
+
+#undef TARGET_CASE_VALUES_THRESHOLD
+#define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"