Mercurial > hg > CbC > CbC_gcc
diff gcc/config/s390/s390.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/config/s390/s390.c Fri Oct 27 22:46:09 2017 +0900 +++ b/gcc/config/s390/s390.c Thu Oct 25 07:37:49 2018 +0900 @@ -1,5 +1,5 @@ /* Subroutines used for code generation on IBM S/390 and zSeries - Copyright (C) 1999-2017 Free Software Foundation, Inc. + Copyright (C) 1999-2018 Free Software Foundation, Inc. Contributed by Hartmut Penner (hpenner@de.ibm.com) and Ulrich Weigand (uweigand@de.ibm.com) and Andreas Krebbel (Andreas.Krebbel@de.ibm.com). @@ -20,6 +20,8 @@ along with GCC; see the file COPYING3. If not see <http://www.gnu.org/licenses/>. */ +#define IN_TARGET_CODE 1 + #include "config.h" #include "system.h" #include "coretypes.h" @@ -324,29 +326,18 @@ COSTS_N_INSNS (160), /* DSGR cracked */ }; -static struct -{ - /* The preferred name to be used in user visible output. */ - const char *const name; - /* CPU name as it should be passed to Binutils via .machine */ - const char *const binutils_name; - const enum processor_type processor; - const struct processor_costs *cost; -} -const processor_table[] = -{ - { "g5", "g5", PROCESSOR_9672_G5, &z900_cost }, - { "g6", "g6", PROCESSOR_9672_G6, &z900_cost }, - { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost }, - { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost }, - { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost }, - { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost }, - { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost }, - { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost }, - { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost }, - { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost }, - { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost }, - { "native", "", PROCESSOR_NATIVE, NULL } +const struct s390_processor processor_table[] = +{ + { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost, 5 }, + { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost, 6 }, + { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost, 7 }, + { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost, 7 }, + { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost, 8 }, + { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost, 9 }, + { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost, 10 }, + { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost, 11 }, + { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost, 12 }, + { "native", "", PROCESSOR_NATIVE, NULL, 0 } }; extern int reload_completed; @@ -388,6 +379,11 @@ base and index are registers of the class ADDR_REGS, displacement is an unsigned 12-bit immediate constant. */ +/* The max number of insns of backend generated memset/memcpy/memcmp + loops. This value is used in the unroll adjust hook to detect such + loops. Current max is 9 coming from the memcmp loop. */ +#define BLOCK_MEM_OPS_LOOP_INSNS 9 + struct s390_address { rtx base; @@ -397,84 +393,6 @@ bool literal_pool; }; -/* The following structure is embedded in the machine - specific part of struct function. */ - -struct GTY (()) s390_frame_layout -{ - /* Offset within stack frame. */ - HOST_WIDE_INT gprs_offset; - HOST_WIDE_INT f0_offset; - HOST_WIDE_INT f4_offset; - HOST_WIDE_INT f8_offset; - HOST_WIDE_INT backchain_offset; - - /* Number of first and last gpr where slots in the register - save area are reserved for. */ - int first_save_gpr_slot; - int last_save_gpr_slot; - - /* Location (FP register number) where GPRs (r0-r15) should - be saved to. - 0 - does not need to be saved at all - -1 - stack slot */ -#define SAVE_SLOT_NONE 0 -#define SAVE_SLOT_STACK -1 - signed char gpr_save_slots[16]; - - /* Number of first and last gpr to be saved, restored. */ - int first_save_gpr; - int first_restore_gpr; - int last_save_gpr; - int last_restore_gpr; - - /* Bits standing for floating point registers. Set, if the - respective register has to be saved. Starting with reg 16 (f0) - at the rightmost bit. - Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0 - fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0 - reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */ - unsigned int fpr_bitmap; - - /* Number of floating point registers f8-f15 which must be saved. */ - int high_fprs; - - /* Set if return address needs to be saved. - This flag is set by s390_return_addr_rtx if it could not use - the initial value of r14 and therefore depends on r14 saved - to the stack. */ - bool save_return_addr_p; - - /* Size of stack frame. */ - HOST_WIDE_INT frame_size; -}; - -/* Define the structure for the machine field in struct function. */ - -struct GTY(()) machine_function -{ - struct s390_frame_layout frame_layout; - - /* Literal pool base register. */ - rtx base_reg; - - /* True if we may need to perform branch splitting. */ - bool split_branches_pending_p; - - bool has_landing_pad_p; - - /* True if the current function may contain a tbegin clobbering - FPRs. */ - bool tbegin_p; - - /* For -fsplit-stack support: A stack local which holds a pointer to - the stack arguments for a function with a variable number of - arguments. This is set at the start of the function and is used - to initialize the overflow_arg_area field of the va_list - structure. */ - rtx split_stack_varargs_pointer; -}; - /* Few accessor macros for struct cfun->machine->s390_frame_layout. */ #define cfun_frame_layout (cfun->machine->frame_layout) @@ -502,11 +420,11 @@ #define CONST_OK_FOR_K(x) \ CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K") #define CONST_OK_FOR_Os(x) \ - CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os") + CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os") #define CONST_OK_FOR_Op(x) \ - CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op") + CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op") #define CONST_OK_FOR_On(x) \ - CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On") + CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On") #define REGNO_PAIR_OK(REGNO, MODE) \ (s390_hard_regno_nregs ((REGNO), (MODE)) == 1 || !((REGNO) & 1)) @@ -515,6 +433,33 @@ bytes on a z10 (or higher) CPU. */ #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048) +/* Masks per jump target register indicating which thunk need to be + generated. */ +static GTY(()) int indirect_branch_prez10thunk_mask = 0; +static GTY(()) int indirect_branch_z10thunk_mask = 0; + +#define INDIRECT_BRANCH_NUM_OPTIONS 4 + +enum s390_indirect_branch_option + { + s390_opt_indirect_branch_jump = 0, + s390_opt_indirect_branch_call, + s390_opt_function_return_reg, + s390_opt_function_return_mem + }; + +static GTY(()) int indirect_branch_table_label_no[INDIRECT_BRANCH_NUM_OPTIONS] = { 0 }; +const char *indirect_branch_table_label[INDIRECT_BRANCH_NUM_OPTIONS] = \ + { "LJUMP", "LCALL", "LRETREG", "LRETMEM" }; +const char *indirect_branch_table_name[INDIRECT_BRANCH_NUM_OPTIONS] = \ + { ".s390_indirect_jump", ".s390_indirect_call", + ".s390_return_reg", ".s390_return_mem" }; + +bool +s390_return_addr_from_memory () +{ + return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK; +} /* Indicate which ABI has been used for passing vector args. 0 - no vector type arguments have been passed where the ABI is relevant @@ -974,7 +919,7 @@ /* Record the vector mode used for an element selector. This assumes: 1. There is no builtin with two different vector modes and an element selector - 2. The element selector comes after the vector type it is referring to. + 2. The element selector comes after the vector type it is referring to. This currently the true for all the builtins but FIXME we should better check for that. */ if (VECTOR_MODE_P (insn_op->mode)) @@ -1037,7 +982,7 @@ break; case 1: if (nonvoid) - pat = GEN_FCN (icode) (target, op[0]); + pat = GEN_FCN (icode) (target, op[0]); else pat = GEN_FCN (icode) (op[0]); break; @@ -1177,11 +1122,85 @@ return NULL_TREE; } +/* Check syntax of function decl attributes having a string type value. */ + +static tree +s390_handle_string_attribute (tree *node, tree name ATTRIBUTE_UNUSED, + tree args ATTRIBUTE_UNUSED, + int flags ATTRIBUTE_UNUSED, + bool *no_add_attrs) +{ + tree cst; + + if (TREE_CODE (*node) != FUNCTION_DECL) + { + warning (OPT_Wattributes, "%qE attribute only applies to functions", + name); + *no_add_attrs = true; + } + + cst = TREE_VALUE (args); + + if (TREE_CODE (cst) != STRING_CST) + { + warning (OPT_Wattributes, + "%qE attribute requires a string constant argument", + name); + *no_add_attrs = true; + } + + if (is_attribute_p ("indirect_branch", name) + || is_attribute_p ("indirect_branch_call", name) + || is_attribute_p ("function_return", name) + || is_attribute_p ("function_return_reg", name) + || is_attribute_p ("function_return_mem", name)) + { + if (strcmp (TREE_STRING_POINTER (cst), "keep") != 0 + && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 + && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) + { + warning (OPT_Wattributes, + "argument to %qE attribute is not " + "(keep|thunk|thunk-extern)", name); + *no_add_attrs = true; + } + } + + if (is_attribute_p ("indirect_branch_jump", name) + && strcmp (TREE_STRING_POINTER (cst), "keep") != 0 + && strcmp (TREE_STRING_POINTER (cst), "thunk") != 0 + && strcmp (TREE_STRING_POINTER (cst), "thunk-inline") != 0 + && strcmp (TREE_STRING_POINTER (cst), "thunk-extern") != 0) + { + warning (OPT_Wattributes, + "argument to %qE attribute is not " + "(keep|thunk|thunk-inline|thunk-extern)", name); + *no_add_attrs = true; + } + + return NULL_TREE; +} + static const struct attribute_spec s390_attribute_table[] = { - { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false }, - { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true }, + { "hotpatch", 2, 2, true, false, false, false, + s390_handle_hotpatch_attribute, NULL }, + { "s390_vector_bool", 0, 0, false, true, false, true, + s390_handle_vectorbool_attribute, NULL }, + { "indirect_branch", 1, 1, true, false, false, false, + s390_handle_string_attribute, NULL }, + { "indirect_branch_jump", 1, 1, true, false, false, false, + s390_handle_string_attribute, NULL }, + { "indirect_branch_call", 1, 1, true, false, false, false, + s390_handle_string_attribute, NULL }, + { "function_return", 1, 1, true, false, false, false, + s390_handle_string_attribute, NULL }, + { "function_return_reg", 1, 1, true, false, false, false, + s390_handle_string_attribute, NULL }, + { "function_return_mem", 1, 1, true, false, false, false, + s390_handle_string_attribute, NULL }, + /* End element. */ - { NULL, 0, 0, false, false, false, NULL, false } + { NULL, 0, 0, false, false, false, false, NULL, NULL } }; /* Return the alignment for LABEL. We default to the -falign-labels @@ -1208,7 +1227,7 @@ return 0; old: - return align_labels_log; + return align_labels.levels[0].log; } static GTY(()) rtx got_symbol; @@ -1313,7 +1332,7 @@ case E_CCZmode: if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode) - return m2; + return m2; return VOIDmode; case E_CCSmode: @@ -1373,20 +1392,20 @@ case E_CCVFHmode: case E_CCVFHEmode: if (req_mode != set_mode) - return 0; + return 0; break; case E_CCZmode: if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode && req_mode != CCSRmode && req_mode != CCURmode && req_mode != CCZ1mode) - return 0; + return 0; break; case E_CCAPmode: case E_CCANmode: if (req_mode != CCAmode) - return 0; + return 0; break; default: @@ -1415,12 +1434,12 @@ if (GET_CODE (PATTERN (insn)) == PARALLEL) for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++) - { - rtx set = XVECEXP (PATTERN (insn), 0, i); - if (GET_CODE (set) == SET) - if (!s390_match_ccmode_set (set, req_mode)) - return false; - } + { + rtx set = XVECEXP (PATTERN (insn), 0, i); + if (GET_CODE (set) == SET) + if (!s390_match_ccmode_set (set, req_mode)) + return false; + } return true; } @@ -1460,7 +1479,7 @@ bit1 = exact_log2 (INTVAL (op2)); bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2)); if (bit0 != -1 && bit1 != -1) - return bit0 > bit1 ? CCT1mode : CCT2mode; + return bit0 > bit1 ? CCT1mode : CCT2mode; } return VOIDmode; @@ -1497,7 +1516,7 @@ { /* Relax CCTmode to CCZmode to allow fall-back to AND if that turns out to be beneficial. */ - return ccmode == CCTmode ? CCZmode : ccmode; + return ccmode == CCTmode ? CCZmode : ccmode; } } @@ -1524,11 +1543,11 @@ && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT) return CCAPmode; - /* If constants are involved in an add instruction it is possible to use - the resulting cc for comparisons with zero. Knowing the sign of the + /* If constants are involved in an add instruction it is possible to use + the resulting cc for comparisons with zero. Knowing the sign of the constant the overflow behavior gets predictable. e.g.: - int a, b; if ((b = a + c) > 0) - with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */ + int a, b; if ((b = a + c) > 0) + with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */ if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))) || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os") @@ -1633,8 +1652,8 @@ && (GET_MODE_SIZE (GET_MODE (inner)) >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner)))) && ((INTVAL (mask) - & GET_MODE_MASK (GET_MODE (inner)) - & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner)))) + & GET_MODE_MASK (GET_MODE (inner)) + & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner)))) == 0)) inner = SUBREG_REG (inner); @@ -1719,8 +1738,8 @@ { /* For CCRAWmode put the required cc mask into the second operand. */ - if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode - && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3) + if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode + && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3) *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1))); *op0 = XVECEXP (*op0, 0, 0); *code = new_code; @@ -1737,7 +1756,7 @@ && *op1 == const0_rtx) { if ((*code == EQ && GET_CODE (*op0) == NE) - || (*code == NE && GET_CODE (*op0) == EQ)) + || (*code == NE && GET_CODE (*op0) == EQ)) *code = EQ; else *code = NE; @@ -1806,6 +1825,21 @@ return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx); } +/* If MEM is not a legitimate compare-and-swap memory operand, return a new + MEM, whose address is a pseudo containing the original MEM's address. */ + +static rtx +s390_legitimize_cs_operand (rtx mem) +{ + rtx tmp; + + if (!contains_symbol_ref_p (mem)) + return mem; + tmp = gen_reg_rtx (Pmode); + emit_move_insn (tmp, copy_rtx (XEXP (mem, 0))); + return change_address (mem, VOIDmode, tmp); +} + /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD matches CMP. Return the correct condition RTL to be placed in the IF_THEN_ELSE of the @@ -1817,6 +1851,7 @@ { rtx cc; + mem = s390_legitimize_cs_operand (mem); cc = gen_rtx_REG (ccmode, CC_REGNUM); switch (GET_MODE (mem)) { @@ -1880,65 +1915,65 @@ case E_CCZmode: case E_CCZ1mode: switch (GET_CODE (code)) - { - case EQ: return CC0; + { + case EQ: return CC0; case NE: return CC1 | CC2 | CC3; default: return -1; - } + } break; case E_CCT1mode: switch (GET_CODE (code)) - { - case EQ: return CC1; + { + case EQ: return CC1; case NE: return CC0 | CC2 | CC3; default: return -1; - } + } break; case E_CCT2mode: switch (GET_CODE (code)) - { - case EQ: return CC2; + { + case EQ: return CC2; case NE: return CC0 | CC1 | CC3; default: return -1; - } + } break; case E_CCT3mode: switch (GET_CODE (code)) - { - case EQ: return CC3; + { + case EQ: return CC3; case NE: return CC0 | CC1 | CC2; default: return -1; - } + } break; case E_CCLmode: switch (GET_CODE (code)) - { - case EQ: return CC0 | CC2; + { + case EQ: return CC0 | CC2; case NE: return CC1 | CC3; default: return -1; - } + } break; case E_CCL1mode: switch (GET_CODE (code)) - { + { case LTU: return CC2 | CC3; /* carry */ case GEU: return CC0 | CC1; /* no carry */ default: return -1; - } + } break; case E_CCL2mode: switch (GET_CODE (code)) - { + { case GTU: return CC0 | CC1; /* borrow */ case LEU: return CC2 | CC3; /* no borrow */ default: return -1; - } + } break; case E_CCL3mode: @@ -1955,96 +1990,96 @@ case E_CCUmode: switch (GET_CODE (code)) - { - case EQ: return CC0; - case NE: return CC1 | CC2 | CC3; - case LTU: return CC1; - case GTU: return CC2; - case LEU: return CC0 | CC1; - case GEU: return CC0 | CC2; + { + case EQ: return CC0; + case NE: return CC1 | CC2 | CC3; + case LTU: return CC1; + case GTU: return CC2; + case LEU: return CC0 | CC1; + case GEU: return CC0 | CC2; default: return -1; - } + } break; case E_CCURmode: switch (GET_CODE (code)) - { - case EQ: return CC0; - case NE: return CC2 | CC1 | CC3; - case LTU: return CC2; - case GTU: return CC1; - case LEU: return CC0 | CC2; - case GEU: return CC0 | CC1; + { + case EQ: return CC0; + case NE: return CC2 | CC1 | CC3; + case LTU: return CC2; + case GTU: return CC1; + case LEU: return CC0 | CC2; + case GEU: return CC0 | CC1; default: return -1; - } + } break; case E_CCAPmode: switch (GET_CODE (code)) - { - case EQ: return CC0; - case NE: return CC1 | CC2 | CC3; - case LT: return CC1 | CC3; - case GT: return CC2; - case LE: return CC0 | CC1 | CC3; - case GE: return CC0 | CC2; + { + case EQ: return CC0; + case NE: return CC1 | CC2 | CC3; + case LT: return CC1 | CC3; + case GT: return CC2; + case LE: return CC0 | CC1 | CC3; + case GE: return CC0 | CC2; default: return -1; - } + } break; case E_CCANmode: switch (GET_CODE (code)) - { - case EQ: return CC0; - case NE: return CC1 | CC2 | CC3; - case LT: return CC1; - case GT: return CC2 | CC3; - case LE: return CC0 | CC1; - case GE: return CC0 | CC2 | CC3; + { + case EQ: return CC0; + case NE: return CC1 | CC2 | CC3; + case LT: return CC1; + case GT: return CC2 | CC3; + case LE: return CC0 | CC1; + case GE: return CC0 | CC2 | CC3; default: return -1; - } + } break; case E_CCSmode: switch (GET_CODE (code)) - { - case EQ: return CC0; - case NE: return CC1 | CC2 | CC3; - case LT: return CC1; - case GT: return CC2; - case LE: return CC0 | CC1; - case GE: return CC0 | CC2; + { + case EQ: return CC0; + case NE: return CC1 | CC2 | CC3; + case LT: return CC1; + case GT: return CC2; + case LE: return CC0 | CC1; + case GE: return CC0 | CC2; case UNORDERED: return CC3; case ORDERED: return CC0 | CC1 | CC2; case UNEQ: return CC0 | CC3; - case UNLT: return CC1 | CC3; - case UNGT: return CC2 | CC3; - case UNLE: return CC0 | CC1 | CC3; - case UNGE: return CC0 | CC2 | CC3; + case UNLT: return CC1 | CC3; + case UNGT: return CC2 | CC3; + case UNLE: return CC0 | CC1 | CC3; + case UNGE: return CC0 | CC2 | CC3; case LTGT: return CC1 | CC2; default: return -1; - } + } break; case E_CCSRmode: switch (GET_CODE (code)) - { - case EQ: return CC0; - case NE: return CC2 | CC1 | CC3; - case LT: return CC2; - case GT: return CC1; - case LE: return CC0 | CC2; - case GE: return CC0 | CC1; + { + case EQ: return CC0; + case NE: return CC2 | CC1 | CC3; + case LT: return CC2; + case GT: return CC1; + case LE: return CC0 | CC2; + case GE: return CC0 | CC1; case UNORDERED: return CC3; case ORDERED: return CC0 | CC2 | CC1; case UNEQ: return CC0 | CC3; - case UNLT: return CC2 | CC3; - case UNGT: return CC1 | CC3; - case UNLE: return CC0 | CC2 | CC3; - case UNGE: return CC0 | CC1 | CC3; + case UNLT: return CC2 | CC3; + case UNGT: return CC1 | CC3; + case UNLE: return CC0 | CC2 | CC3; + case UNGE: return CC0 | CC1 | CC3; case LTGT: return CC2 | CC1; default: return -1; - } + } break; /* Vector comparison modes. */ @@ -2496,7 +2531,7 @@ { rtx subreg = operand_subword (dst, first_subword, 0, mode); if (reg_overlap_mentioned_p (subreg, src)) - return false; + return false; } return true; @@ -2596,7 +2631,7 @@ else if (REG_P (dst)) dst = gen_rtx_SUBREG (wmode, dst, 0); else - dst = gen_reg_rtx (wmode); + dst = gen_reg_rtx (wmode); if (GET_CODE (src1) == SUBREG && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0) @@ -2718,7 +2753,7 @@ if (GET_CODE (disp) == CONST && GET_CODE (XEXP (disp, 0)) == UNSPEC && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT - || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF)) + || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF)) return false; /* All other symbolic constants are literal pool references, @@ -2729,6 +2764,58 @@ return false; } +/* Attempts to split `ref', which should be UNSPEC_LTREF, into (base + `disp'). + If successful, also determines the + following characteristics of `ref': `is_ptr' - whether it can be an + LA argument, `is_base_ptr' - whether the resulting base is a well-known + base register (stack/frame pointer, etc), `is_pool_ptr` - whether it is + considered a literal pool pointer for purposes of avoiding two different + literal pool pointers per insn during or after reload (`B' constraint). */ +static bool +s390_decompose_constant_pool_ref (rtx *ref, rtx *disp, bool *is_ptr, + bool *is_base_ptr, bool *is_pool_ptr) +{ + if (!*ref) + return true; + + if (GET_CODE (*ref) == UNSPEC) + switch (XINT (*ref, 1)) + { + case UNSPEC_LTREF: + if (!*disp) + *disp = gen_rtx_UNSPEC (Pmode, + gen_rtvec (1, XVECEXP (*ref, 0, 0)), + UNSPEC_LTREL_OFFSET); + else + return false; + + *ref = XVECEXP (*ref, 0, 1); + break; + + default: + return false; + } + + if (!REG_P (*ref) || GET_MODE (*ref) != Pmode) + return false; + + if (REGNO (*ref) == STACK_POINTER_REGNUM + || REGNO (*ref) == FRAME_POINTER_REGNUM + || ((reload_completed || reload_in_progress) + && frame_pointer_needed + && REGNO (*ref) == HARD_FRAME_POINTER_REGNUM) + || REGNO (*ref) == ARG_POINTER_REGNUM + || (flag_pic + && REGNO (*ref) == PIC_OFFSET_TABLE_REGNUM)) + *is_ptr = *is_base_ptr = true; + + if ((reload_completed || reload_in_progress) + && *ref == cfun->machine->base_reg) + *is_ptr = *is_base_ptr = *is_pool_ptr = true; + + return true; +} + /* Decompose a RTL expression ADDR for a memory address into its components, returned in OUT. @@ -2840,96 +2927,14 @@ } /* Validate base register. */ - if (base) - { - if (GET_CODE (base) == UNSPEC) - switch (XINT (base, 1)) - { - case UNSPEC_LTREF: - if (!disp) - disp = gen_rtx_UNSPEC (Pmode, - gen_rtvec (1, XVECEXP (base, 0, 0)), - UNSPEC_LTREL_OFFSET); - else - return false; - - base = XVECEXP (base, 0, 1); - break; - - case UNSPEC_LTREL_BASE: - if (XVECLEN (base, 0) == 1) - base = fake_pool_base, literal_pool = true; - else - base = XVECEXP (base, 0, 1); - break; - - default: - return false; - } - - if (!REG_P (base) || GET_MODE (base) != Pmode) - return false; - - if (REGNO (base) == STACK_POINTER_REGNUM - || REGNO (base) == FRAME_POINTER_REGNUM - || ((reload_completed || reload_in_progress) - && frame_pointer_needed - && REGNO (base) == HARD_FRAME_POINTER_REGNUM) - || REGNO (base) == ARG_POINTER_REGNUM - || (flag_pic - && REGNO (base) == PIC_OFFSET_TABLE_REGNUM)) - pointer = base_ptr = true; - - if ((reload_completed || reload_in_progress) - && base == cfun->machine->base_reg) - pointer = base_ptr = literal_pool = true; - } + if (!s390_decompose_constant_pool_ref (&base, &disp, &pointer, &base_ptr, + &literal_pool)) + return false; /* Validate index register. */ - if (indx) - { - if (GET_CODE (indx) == UNSPEC) - switch (XINT (indx, 1)) - { - case UNSPEC_LTREF: - if (!disp) - disp = gen_rtx_UNSPEC (Pmode, - gen_rtvec (1, XVECEXP (indx, 0, 0)), - UNSPEC_LTREL_OFFSET); - else - return false; - - indx = XVECEXP (indx, 0, 1); - break; - - case UNSPEC_LTREL_BASE: - if (XVECLEN (indx, 0) == 1) - indx = fake_pool_base, literal_pool = true; - else - indx = XVECEXP (indx, 0, 1); - break; - - default: - return false; - } - - if (!REG_P (indx) || GET_MODE (indx) != Pmode) - return false; - - if (REGNO (indx) == STACK_POINTER_REGNUM - || REGNO (indx) == FRAME_POINTER_REGNUM - || ((reload_completed || reload_in_progress) - && frame_pointer_needed - && REGNO (indx) == HARD_FRAME_POINTER_REGNUM) - || REGNO (indx) == ARG_POINTER_REGNUM - || (flag_pic - && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM)) - pointer = indx_ptr = true; - - if ((reload_completed || reload_in_progress) - && indx == cfun->machine->base_reg) - pointer = indx_ptr = literal_pool = true; - } + if (!s390_decompose_constant_pool_ref (&indx, &disp, &pointer, &indx_ptr, + &literal_pool)) + return false; /* Prefer to use pointer as base, not index. */ if (base && indx && !base_ptr @@ -2972,14 +2977,14 @@ pointer = true; /* In the small-PIC case, the linker converts @GOT - and @GOTNTPOFF offsets to possible displacements. */ + and @GOTNTPOFF offsets to possible displacements. */ if (GET_CODE (disp) == UNSPEC - && (XINT (disp, 1) == UNSPEC_GOT + && (XINT (disp, 1) == UNSPEC_GOT || XINT (disp, 1) == UNSPEC_GOTNTPOFF) && flag_pic == 1) - { + { ; - } + } /* Accept pool label offsets. */ else if (GET_CODE (disp) == UNSPEC @@ -2989,7 +2994,7 @@ /* Accept literal pool references. */ else if (GET_CODE (disp) == UNSPEC && XINT (disp, 1) == UNSPEC_LTREL_OFFSET) - { + { /* In case CSE pulled a non literal pool reference out of the pool we have to reject the address. This is especially important when loading the GOT pointer on non @@ -3009,9 +3014,9 @@ if (offset >= GET_MODE_SIZE (get_pool_mode (sym))) return false; - orig_disp = plus_constant (Pmode, orig_disp, offset); - } - } + orig_disp = plus_constant (Pmode, orig_disp, offset); + } + } else return false; @@ -3105,8 +3110,7 @@ Valid addresses are single references or a sum of a reference and a constant integer. Return these parts in SYMREF and ADDEND. You can pass NULL in REF and/or ADDEND if you are not interested in these - values. Literal pool references are *not* considered symbol - references. */ + values. */ static bool s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend) @@ -3125,10 +3129,10 @@ addr = XEXP (addr, 0); } - if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr)) + if (GET_CODE (addr) == SYMBOL_REF || (GET_CODE (addr) == UNSPEC && (XINT (addr, 1) == UNSPEC_GOTENT - || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT)))) + || XINT (addr, 1) == UNSPEC_PLT))) { if (symref) *symref = addr; @@ -3148,6 +3152,7 @@ static int s390_check_qrst_address (char c, rtx op, bool lit_pool_ok) { + rtx symref; struct s390_address addr; bool decomposed = false; @@ -3156,7 +3161,10 @@ /* This check makes sure that no symbolic address (except literal pool references) are accepted by the R or T constraints. */ - if (s390_loadrelative_operand_p (op, NULL, NULL)) + if (s390_loadrelative_operand_p (op, &symref, NULL) + && (!lit_pool_ok + || !SYMBOL_REF_P (symref) + || !CONSTANT_POOL_ADDRESS_P (symref))) return 0; /* Ensure literal pool references are only accepted if LIT_POOL_OK. */ @@ -3384,7 +3392,7 @@ static int s390_register_move_cost (machine_mode mode, - reg_class_t from, reg_class_t to) + reg_class_t from, reg_class_t to) { /* On s390, copy between fprs and gprs is expensive. */ @@ -3572,8 +3580,7 @@ /* mulsidi case: mr, m */ *total = s390_cost->m; else if (GET_CODE (left) == ZERO_EXTEND - && GET_CODE (right) == ZERO_EXTEND - && TARGET_CPU_ZARCH) + && GET_CODE (right) == ZERO_EXTEND) /* umulsidi case: ml, mlr */ *total = s390_cost->ml; else @@ -3618,14 +3625,14 @@ case UDIV: case UMOD: - if (mode == TImode) /* 128 bit division */ + if (mode == TImode) /* 128 bit division */ *total = s390_cost->dlgr; else if (mode == DImode) { rtx right = XEXP (x, 1); if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */ *total = s390_cost->dlr; - else /* 64 by 64 bit division */ + else /* 64 by 64 bit division */ *total = s390_cost->dlgr; } else if (mode == SImode) /* 32 bit division */ @@ -3642,7 +3649,7 @@ *total = s390_cost->dsgfr; else *total = s390_cost->dr; - else /* 64 by 64 bit division */ + else /* 64 by 64 bit division */ *total = s390_cost->dsgr; } else if (mode == SImode) /* 32 bit division */ @@ -3881,7 +3888,7 @@ return 1; /* Accept immediate LARL operands. */ - if (TARGET_CPU_ZARCH && larl_operand (op, mode)) + if (larl_operand (op, mode)) return 1; /* Thread-local symbols are never legal constants. This is @@ -3923,7 +3930,7 @@ case SYMBOL_REF: /* 'Naked' TLS symbol references are never OK, - non-TLS symbols are OK iff we are non-PIC. */ + non-TLS symbols are OK iff we are non-PIC. */ if (tls_symbolic_operand (x)) return true; else @@ -3955,8 +3962,6 @@ /* If the literal pool shares the code section, be put execute template placeholders into the pool as well. */ case UNSPEC_INSN: - return TARGET_CPU_ZARCH; - default: return true; } @@ -4002,8 +4007,7 @@ return true; /* Accept larl operands. */ - if (TARGET_CPU_ZARCH - && larl_operand (op, VOIDmode)) + if (larl_operand (op, VOIDmode)) return true; /* Accept floating-point zero operands that fit into a single GPR. */ @@ -4102,7 +4106,7 @@ case CONST: /* Symrefs cannot be pushed into the literal pool with -fPIC so we *MUST NOT* return NO_REGS for these cases - (s390_cannot_force_const_mem will return true). + (s390_cannot_force_const_mem will return true). On the other hand we MUST return NO_REGS for symrefs with invalid addend which might have been pushed to the literal @@ -4110,8 +4114,7 @@ handled via secondary reload but this does not happen if they are used as literal pool slot replacement in reload inheritance (see emit_input_reload_insns). */ - if (TARGET_CPU_ZARCH - && GET_CODE (XEXP (op, 0)) == PLUS + if (GET_CODE (XEXP (op, 0)) == PLUS && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT) { @@ -4124,7 +4127,7 @@ case LABEL_REF: case SYMBOL_REF: if (!legitimate_reload_constant_p (op)) - return NO_REGS; + return NO_REGS; /* fallthrough */ case PLUS: /* load address will be used. */ @@ -4165,7 +4168,7 @@ if (GET_CODE (symref) == SYMBOL_REF) { /* We have load-relative instructions for 2-byte, 4-byte, and - 8-byte alignment so allow only these. */ + 8-byte alignment so allow only these. */ switch (alignment) { case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref); @@ -4325,10 +4328,10 @@ case E_##M##mode: \ if (TARGET_64BIT) \ sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \ - CODE_FOR_reload##m##di_tomem_z10; \ + CODE_FOR_reload##m##di_tomem_z10; \ else \ - sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \ - CODE_FOR_reload##m##si_tomem_z10; \ + sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \ + CODE_FOR_reload##m##si_tomem_z10; \ break; switch (GET_MODE (x)) @@ -4494,7 +4497,7 @@ || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))) { /* Otherwise, one of the operands cannot be an address register; - we reload its value into the scratch register. */ + we reload its value into the scratch register. */ if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15) { emit_move_insn (scratch, sum1); @@ -4507,9 +4510,9 @@ } /* According to the way these invalid addresses are generated - in reload.c, it should never happen (at least on s390) that - *neither* of the PLUS components, after find_replacements - was applied, is an address register. */ + in reload.c, it should never happen (at least on s390) that + *neither* of the PLUS components, after find_replacements + was applied, is an address register. */ if (sum1 == scratch && sum2 == scratch) { debug_rtx (src); @@ -4598,11 +4601,11 @@ if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx))) return false; - /* Avoid LA instructions with index register on z196; it is - preferable to use regular add instructions when possible. - Starting with zEC12 the la with index register is "uncracked" - again. */ - if (addr.indx && s390_tune == PROCESSOR_2817_Z196) + /* Avoid LA instructions with index (and base) register on z196 or + later; it is preferable to use regular add instructions when + possible. Starting with zEC12 the la with index register is + "uncracked" again but still slower than a regular add. */ + if (addr.indx && s390_tune >= PROCESSOR_2817_Z196) return false; if (!TARGET_64BIT && !addr.pointer) @@ -4691,7 +4694,7 @@ || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr)) || (GET_CODE (addr) == UNSPEC && (XINT (addr, 1) == UNSPEC_GOTENT - || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT)))) + || XINT (addr, 1) == UNSPEC_PLT))) && GET_CODE (addend) == CONST_INT) { /* This can be locally addressed. */ @@ -4700,8 +4703,7 @@ rtx const_addr = (GET_CODE (addr) == UNSPEC ? gen_rtx_CONST (Pmode, addr) : addr); - if (TARGET_CPU_ZARCH - && larl_operand (const_addr, VOIDmode) + if (larl_operand (const_addr, VOIDmode) && INTVAL (addend) < HOST_WIDE_INT_1 << 31 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31)) { @@ -4773,7 +4775,7 @@ that case. So no need to do it here. */ if (reg == 0) - reg = gen_reg_rtx (Pmode); + reg = gen_reg_rtx (Pmode); if (TARGET_Z10) { @@ -4787,70 +4789,42 @@ new_rtx = reg; } else if (flag_pic == 1) - { - /* Assume GOT offset is a valid displacement operand (< 4k - or < 512k with z990). This is handled the same way in - both 31- and 64-bit code (@GOT). - lg <target>, sym@GOT(r12) */ + { + /* Assume GOT offset is a valid displacement operand (< 4k + or < 512k with z990). This is handled the same way in + both 31- and 64-bit code (@GOT). + lg <target>, sym@GOT(r12) */ if (reload_in_progress || reload_completed) df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); - new_rtx = gen_const_mem (Pmode, new_rtx); - emit_move_insn (reg, new_rtx); - new_rtx = reg; - } - else if (TARGET_CPU_ZARCH) - { - /* If the GOT offset might be >= 4k, we determine the position - of the GOT entry via a PC-relative LARL (@GOTENT). + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); + new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); + new_rtx = gen_const_mem (Pmode, new_rtx); + emit_move_insn (reg, new_rtx); + new_rtx = reg; + } + else + { + /* If the GOT offset might be >= 4k, we determine the position + of the GOT entry via a PC-relative LARL (@GOTENT). larl temp, sym@GOTENT - lg <target>, 0(temp) */ - - rtx temp = reg ? reg : gen_reg_rtx (Pmode); + lg <target>, 0(temp) */ + + rtx temp = reg ? reg : gen_reg_rtx (Pmode); gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS); - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); + new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT); + new_rtx = gen_rtx_CONST (Pmode, new_rtx); emit_move_insn (temp, new_rtx); - new_rtx = gen_const_mem (Pmode, temp); - emit_move_insn (reg, new_rtx); - - new_rtx = reg; - } - else - { - /* If the GOT offset might be >= 4k, we have to load it - from the literal pool (@GOT). - - lg temp, lit-litbase(r13) - lg <target>, 0(temp) - lit: .long sym@GOT */ - - rtx temp = reg ? reg : gen_reg_rtx (Pmode); - - gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER - || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS); - - if (reload_in_progress || reload_completed) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - - addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); - addr = gen_rtx_CONST (Pmode, addr); - addr = force_const_mem (Pmode, addr); - emit_move_insn (temp, addr); - - new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); - new_rtx = gen_const_mem (Pmode, new_rtx); - emit_move_insn (reg, new_rtx); - new_rtx = reg; - } + emit_move_insn (reg, new_rtx); + + new_rtx = reg; + } } else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT) { @@ -4879,36 +4853,10 @@ gcc_unreachable (); break; - /* @PLT is OK as is on 64-bit, must be converted to - GOT-relative @PLTOFF on 31-bit. */ + /* For @PLT larl is used. This is handled like local + symbol refs. */ case UNSPEC_PLT: - if (!TARGET_CPU_ZARCH) - { - rtx temp = reg? reg : gen_reg_rtx (Pmode); - - if (reload_in_progress || reload_completed) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - - addr = XVECEXP (addr, 0, 0); - addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), - UNSPEC_PLTOFF); - if (addend != const0_rtx) - addr = gen_rtx_PLUS (Pmode, addr, addend); - addr = gen_rtx_CONST (Pmode, addr); - addr = force_const_mem (Pmode, addr); - emit_move_insn (temp, addr); - - new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); - if (reg != 0) - { - s390_load_address (reg, new_rtx); - new_rtx = reg; - } - } - else - /* On 64 bit larl can be used. This case is handled like - local symbol refs. */ - gcc_unreachable (); + gcc_unreachable (); break; /* Everything else cannot happen. */ @@ -5065,7 +5013,7 @@ temp = gen_reg_rtx (Pmode); emit_move_insn (temp, new_rtx); } - else if (TARGET_CPU_ZARCH) + else { /* If the GOT offset might be >= 4k, we determine the position of the GOT entry via a PC-relative LARL. */ @@ -5079,44 +5027,6 @@ temp = gen_reg_rtx (Pmode); emit_move_insn (temp, new_rtx); } - else if (flag_pic) - { - /* If the GOT offset might be >= 4k, we have to load it - from the literal pool. */ - - if (reload_in_progress || reload_completed) - df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); - - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - new_rtx = force_const_mem (Pmode, new_rtx); - temp = gen_reg_rtx (Pmode); - emit_move_insn (temp, new_rtx); - - new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp); - new_rtx = gen_const_mem (Pmode, new_rtx); - - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD); - temp = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (temp, new_rtx)); - } - else - { - /* In position-dependent code, load the absolute address of - the GOT entry from the literal pool. */ - - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF); - new_rtx = gen_rtx_CONST (Pmode, new_rtx); - new_rtx = force_const_mem (Pmode, new_rtx); - temp = gen_reg_rtx (Pmode); - emit_move_insn (temp, new_rtx); - - new_rtx = temp; - new_rtx = gen_const_mem (Pmode, new_rtx); - new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD); - temp = gen_reg_rtx (Pmode); - emit_insn (gen_rtx_SET (temp, new_rtx)); - } new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); if (reg != 0) @@ -5130,7 +5040,7 @@ new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF); new_rtx = gen_rtx_CONST (Pmode, new_rtx); new_rtx = force_const_mem (Pmode, new_rtx); - temp = gen_reg_rtx (Pmode); + temp = gen_reg_rtx (Pmode); emit_move_insn (temp, new_rtx); new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp); @@ -5150,7 +5060,6 @@ switch (XINT (XEXP (addr, 0), 1)) { case UNSPEC_INDNTPOFF: - gcc_assert (TARGET_CPU_ZARCH); new_rtx = addr; break; @@ -5229,9 +5138,9 @@ else if (flag_pic) { if (SYMBOLIC_CONST (x) - || (GET_CODE (x) == PLUS - && (SYMBOLIC_CONST (XEXP (x, 0)) - || SYMBOLIC_CONST (XEXP (x, 1))))) + || (GET_CODE (x) == PLUS + && (SYMBOLIC_CONST (XEXP (x, 0)) + || SYMBOLIC_CONST (XEXP (x, 1))))) x = legitimize_pic_address (x, 0); if (s390_legitimate_address_p (mode, x, FALSE)) @@ -5388,7 +5297,7 @@ mode = GET_MODE (len); if (mode == VOIDmode) - mode = Pmode; + mode = Pmode; dst_addr = gen_reg_rtx (Pmode); src_addr = gen_reg_rtx (Pmode); @@ -5407,12 +5316,12 @@ temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, OPTAB_DIRECT); if (temp != count) - emit_move_insn (count, temp); + emit_move_insn (count, temp); temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, OPTAB_DIRECT); if (temp != blocks) - emit_move_insn (blocks, temp); + emit_move_insn (blocks, temp); emit_cmp_and_jump_insns (blocks, const0_rtx, EQ, NULL_RTX, mode, 1, loop_end_label); @@ -5446,7 +5355,7 @@ temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, OPTAB_DIRECT); if (temp != blocks) - emit_move_insn (blocks, temp); + emit_move_insn (blocks, temp); emit_cmp_and_jump_insns (blocks, const0_rtx, EQ, NULL_RTX, mode, 1, loop_end_label); @@ -5474,12 +5383,15 @@ /* Expand setmem/clrmem for a constant length operand without a loop if it will be shorter that way. - With a constant length and without pfd argument a - clrmem loop is 32 bytes -> 5.3 * xc - setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */ + clrmem loop (with PFD) is 30 bytes -> 5 * xc + clrmem loop (without PFD) is 24 bytes -> 4 * xc + setmem loop (with PFD) is 38 bytes -> ~4 * (mvi/stc + mvc) + setmem loop (without PFD) is 32 bytes -> ~4 * (mvi/stc + mvc) */ if (GET_CODE (len) == CONST_INT - && ((INTVAL (len) <= 256 * 5 && val == const0_rtx) - || INTVAL (len) <= 257 * 3) + && ((val == const0_rtx + && (INTVAL (len) <= 256 * 4 + || (INTVAL (len) <= 256 * 5 && TARGET_SETMEM_PFD(val,len)))) + || (val != const0_rtx && INTVAL (len) <= 257 * 4)) && (!TARGET_MVCLE || INTVAL (len) <= 256)) { HOST_WIDE_INT o, l; @@ -5593,12 +5505,11 @@ emit_label (loop_start_label); - if (TARGET_Z10 - && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024)) - { - /* Issue a write prefetch for the +4 cache line. */ - rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, - GEN_INT (1024)), + if (TARGET_SETMEM_PFD (val, len)) + { + /* Issue a write prefetch. */ + rtx distance = GEN_INT (TARGET_SETMEM_PREFETCH_DISTANCE); + rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, distance), const1_rtx, const0_rtx); emit_insn (prefetch); PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true; @@ -5666,12 +5577,12 @@ if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256) { if (INTVAL (len) > 0) - { - emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1))); - emit_insn (gen_cmpint (target, ccreg)); - } - else - emit_move_insn (target, const0_rtx); + { + emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1))); + emit_insn (gen_cmpint (target, ccreg)); + } + else + emit_move_insn (target, const0_rtx); } else if (TARGET_MVCLE) { @@ -5688,7 +5599,7 @@ mode = GET_MODE (len); if (mode == VOIDmode) - mode = Pmode; + mode = Pmode; addr0 = gen_reg_rtx (Pmode); addr1 = gen_reg_rtx (Pmode); @@ -5707,12 +5618,12 @@ temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1, OPTAB_DIRECT); if (temp != count) - emit_move_insn (count, temp); + emit_move_insn (count, temp); temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1, OPTAB_DIRECT); if (temp != blocks) - emit_move_insn (blocks, temp); + emit_move_insn (blocks, temp); emit_cmp_and_jump_insns (blocks, const0_rtx, EQ, NULL_RTX, mode, 1, loop_end_label); @@ -5752,7 +5663,7 @@ temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1, OPTAB_DIRECT); if (temp != blocks) - emit_move_insn (blocks, temp); + emit_move_insn (blocks, temp); emit_cmp_and_jump_insns (blocks, const0_rtx, EQ, NULL_RTX, mode, 1, loop_end_label); @@ -5888,7 +5799,7 @@ highest_index_to_load_reg, str_idx_reg)); add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond), - profile_probability::very_unlikely ()); + profile_probability::very_unlikely ()); expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT); @@ -6122,7 +6033,7 @@ p = rtvec_alloc (2); RTVEC_ELT (p, 0) = - gen_rtx_SET (dst, op_res); + gen_rtx_SET (dst, op_res); RTVEC_ELT (p, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); @@ -6189,7 +6100,7 @@ const0_rtx)); p = rtvec_alloc (2); RTVEC_ELT (p, 0) = - gen_rtx_SET (dst, op_res); + gen_rtx_SET (dst, op_res); RTVEC_ELT (p, 1) = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM)); emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); @@ -6719,11 +6630,16 @@ return; } + /* Use vector replicate instructions. vlrep/vrepi/vrep */ if (all_same) { - emit_insn (gen_rtx_SET (target, - gen_rtx_VEC_DUPLICATE (mode, - XVECEXP (vals, 0, 0)))); + rtx elem = XVECEXP (vals, 0, 0); + + /* vec_splats accepts general_operand as source. */ + if (!general_operand (elem, GET_MODE (elem))) + elem = force_reg (inner_mode, elem); + + emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, elem))); return; } @@ -7020,8 +6936,8 @@ emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp))); } s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx)); - add_reg_br_prob_note (get_last_insn (), - profile_probability::very_unlikely ()); + add_reg_br_prob_note (get_last_insn (), + profile_probability::very_unlikely ()); /* If the jump is not taken, OUTPUT is the expected value. */ cmp = output; /* Reload newval to a register manually, *after* the compare and jump @@ -7288,12 +7204,12 @@ /* Extract the symbol ref from: (plus:SI (reg:SI 12 %r12) - (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))] - UNSPEC_GOTOFF/PLTOFF))) + (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))] + UNSPEC_GOTOFF/PLTOFF))) and (plus:SI (reg:SI 12 %r12) - (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))] - UNSPEC_GOTOFF/PLTOFF) + (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))] + UNSPEC_GOTOFF/PLTOFF) (const_int 4 [0x4])))) */ if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0)) @@ -7338,7 +7254,7 @@ { /* Extract the symbol ref from: (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))] - UNSPEC_PLT/GOTENT))) */ + UNSPEC_PLT/GOTENT))) */ y = XEXP (x, 0); if (GET_CODE (y) == UNSPEC @@ -7517,10 +7433,11 @@ NOPs. */ function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT); if (! DECL_USER_ALIGN (decl)) - function_alignment = MAX (function_alignment, - (unsigned int) align_functions); + function_alignment + = MAX (function_alignment, + (unsigned int) align_functions.levels[0].get_value ()); fputs ("\t# alignment for hotpatch\n", asm_out_file); - ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment)); + ASM_OUTPUT_ALIGN (asm_out_file, align_functions.levels[0].log); } if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG) @@ -7652,7 +7569,7 @@ if (ad.base && ad.indx) fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)], - reg_names[REGNO (ad.base)]); + reg_names[REGNO (ad.base)]); else if (ad.base) fprintf (file, "(%s)", reg_names[REGNO (ad.base)]); } @@ -7686,7 +7603,7 @@ 'o': print integer X as if it's an unsigned 32bit word. 's': "start" of contiguous bitmask X in either DImode or vector inner mode. 't': CONST_INT: "start" of contiguous bitmask X in SImode. - CONST_VECTOR: Generate a bitmask for vgbm instruction. + CONST_VECTOR: Generate a bitmask for vgbm instruction. 'x': print integer X as if it's an unsigned halfword. 'v': print register number as vector register (v1 instead of f1). */ @@ -7744,7 +7661,7 @@ case 'O': { - struct s390_address ad; + struct s390_address ad; int ret; ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); @@ -7757,16 +7674,16 @@ return; } - if (ad.disp) - output_addr_const (file, ad.disp); - else - fprintf (file, "0"); + if (ad.disp) + output_addr_const (file, ad.disp); + else + fprintf (file, "0"); } return; case 'R': { - struct s390_address ad; + struct s390_address ad; int ret; ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad); @@ -7779,10 +7696,10 @@ return; } - if (ad.base) - fprintf (file, "%s", reg_names[REGNO (ad.base)]); - else - fprintf (file, "0"); + if (ad.base) + fprintf (file, "%s", reg_names[REGNO (ad.base)]); + else + fprintf (file, "0"); } return; @@ -7928,13 +7845,13 @@ case CONST_WIDE_INT: if (code == 'b') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, + fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_WIDE_INT_ELT (x, 0) & 0xff); else if (code == 'x') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, + fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_WIDE_INT_ELT (x, 0) & 0xffff); else if (code == 'h') - fprintf (file, HOST_WIDE_INT_PRINT_DEC, + fprintf (file, HOST_WIDE_INT_PRINT_DEC, ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000); else { @@ -8137,7 +8054,7 @@ priority = priority << 1; break; default: - break; + break; } return priority; } @@ -8157,8 +8074,6 @@ return 3; case PROCESSOR_2097_Z10: return 2; - case PROCESSOR_9672_G5: - case PROCESSOR_9672_G6: case PROCESSOR_2064_Z900: /* Starting with EC12 we use the sched_reorder hook to take care of instruction dispatch constraints. The algorithm only @@ -8256,119 +8171,20 @@ } } - /* Annotate LTREL_BASE as well. */ - if (GET_CODE (*x) == UNSPEC - && XINT (*x, 1) == UNSPEC_LTREL_BASE) - { - rtx base = cfun->machine->base_reg; - *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base), - UNSPEC_LTREL_BASE); - return; - } - fmt = GET_RTX_FORMAT (GET_CODE (*x)); for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) { if (fmt[i] == 'e') - { - annotate_constant_pool_refs (&XEXP (*x, i)); - } + { + annotate_constant_pool_refs (&XEXP (*x, i)); + } else if (fmt[i] == 'E') - { - for (j = 0; j < XVECLEN (*x, i); j++) - annotate_constant_pool_refs (&XVECEXP (*x, i, j)); - } - } -} - -/* Split all branches that exceed the maximum distance. - Returns true if this created a new literal pool entry. */ - -static int -s390_split_branches (void) -{ - rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); - int new_literal = 0, ret; - rtx_insn *insn; - rtx pat, target; - rtx *label; - - /* We need correct insn addresses. */ - - shorten_branches (get_insns ()); - - /* Find all branches that exceed 64KB, and split them. */ - - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL)) - continue; - - pat = PATTERN (insn); - if (GET_CODE (pat) == PARALLEL) - pat = XVECEXP (pat, 0, 0); - if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx) - continue; - - if (GET_CODE (SET_SRC (pat)) == LABEL_REF) - { - label = &SET_SRC (pat); - } - else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE) - { - if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF) - label = &XEXP (SET_SRC (pat), 1); - else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF) - label = &XEXP (SET_SRC (pat), 2); - else - continue; - } - else - continue; - - if (get_attr_length (insn) <= 4) - continue; - - /* We are going to use the return register as scratch register, - make sure it will be saved/restored by the prologue/epilogue. */ - cfun_frame_layout.save_return_addr_p = 1; - - if (!flag_pic) - { - new_literal = 1; - rtx mem = force_const_mem (Pmode, *label); - rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem), - insn); - INSN_ADDRESSES_NEW (set_insn, -1); - annotate_constant_pool_refs (&PATTERN (set_insn)); - - target = temp_reg; - } - else - { - new_literal = 1; - target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label), - UNSPEC_LTREL_OFFSET); - target = gen_rtx_CONST (Pmode, target); - target = force_const_mem (Pmode, target); - rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target), - insn); - INSN_ADDRESSES_NEW (set_insn, -1); - annotate_constant_pool_refs (&PATTERN (set_insn)); - - target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0), - cfun->machine->base_reg), - UNSPEC_LTREL_BASE); - target = gen_rtx_PLUS (Pmode, temp_reg, target); - } - - ret = validate_change (insn, label, target, 0); - gcc_assert (ret); - } - - return new_literal; -} - + { + for (j = 0; j < XVECLEN (*x, i); j++) + annotate_constant_pool_refs (&XVECEXP (*x, i, j)); + } + } +} /* Find an annotated literal pool symbol referenced in RTX X, and store it at REF. Will abort if X contains references to @@ -8384,23 +8200,19 @@ int i, j; const char *fmt; - /* Ignore LTREL_BASE references. */ - if (GET_CODE (x) == UNSPEC - && XINT (x, 1) == UNSPEC_LTREL_BASE) - return; /* Likewise POOL_ENTRY insns. */ if (GET_CODE (x) == UNSPEC_VOLATILE && XINT (x, 1) == UNSPECV_POOL_ENTRY) return; gcc_assert (GET_CODE (x) != SYMBOL_REF - || !CONSTANT_POOL_ADDRESS_P (x)); + || !CONSTANT_POOL_ADDRESS_P (x)); if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF) { rtx sym = XVECEXP (x, 0, 0); gcc_assert (GET_CODE (sym) == SYMBOL_REF - && CONSTANT_POOL_ADDRESS_P (sym)); + && CONSTANT_POOL_ADDRESS_P (sym)); if (*ref == NULL_RTX) *ref = sym; @@ -8414,14 +8226,14 @@ for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) { if (fmt[i] == 'e') - { - find_constant_pool_ref (XEXP (x, i), ref); - } + { + find_constant_pool_ref (XEXP (x, i), ref); + } else if (fmt[i] == 'E') - { - for (j = 0; j < XVECLEN (x, i); j++) - find_constant_pool_ref (XVECEXP (x, i, j), ref); - } + { + for (j = 0; j < XVECLEN (x, i); j++) + find_constant_pool_ref (XVECEXP (x, i, j), ref); + } } } @@ -8459,83 +8271,16 @@ for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) { if (fmt[i] == 'e') - { - replace_constant_pool_ref (&XEXP (*x, i), ref, offset); - } - else if (fmt[i] == 'E') - { - for (j = 0; j < XVECLEN (*x, i); j++) - replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset); - } - } -} - -/* Check whether X contains an UNSPEC_LTREL_BASE. - Return its constant pool symbol if found, NULL_RTX otherwise. */ - -static rtx -find_ltrel_base (rtx x) -{ - int i, j; - const char *fmt; - - if (GET_CODE (x) == UNSPEC - && XINT (x, 1) == UNSPEC_LTREL_BASE) - return XVECEXP (x, 0, 0); - - fmt = GET_RTX_FORMAT (GET_CODE (x)); - for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) - { - if (fmt[i] == 'e') - { - rtx fnd = find_ltrel_base (XEXP (x, i)); - if (fnd) - return fnd; - } + { + replace_constant_pool_ref (&XEXP (*x, i), ref, offset); + } else if (fmt[i] == 'E') - { - for (j = 0; j < XVECLEN (x, i); j++) - { - rtx fnd = find_ltrel_base (XVECEXP (x, i, j)); - if (fnd) - return fnd; - } - } - } - - return NULL_RTX; -} - -/* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */ - -static void -replace_ltrel_base (rtx *x) -{ - int i, j; - const char *fmt; - - if (GET_CODE (*x) == UNSPEC - && XINT (*x, 1) == UNSPEC_LTREL_BASE) - { - *x = XVECEXP (*x, 0, 1); - return; - } - - fmt = GET_RTX_FORMAT (GET_CODE (*x)); - for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--) - { - if (fmt[i] == 'e') - { - replace_ltrel_base (&XEXP (*x, i)); - } - else if (fmt[i] == 'E') - { - for (j = 0; j < XVECLEN (*x, i); j++) - replace_ltrel_base (&XVECEXP (*x, i, j)); - } - } -} - + { + for (j = 0; j < XVECLEN (*x, i); j++) + replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset); + } + } +} /* We keep a list of constants which we have to add to internal constant tables in the middle of large functions. */ @@ -8729,37 +8474,29 @@ static rtx s390_execute_label (rtx insn) { - if (NONJUMP_INSN_P (insn) + if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC - && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE) - return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2); + && (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE + || XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE_JUMP)) + { + if (XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE) + return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2); + else + { + gcc_assert (JUMP_P (insn)); + /* For jump insns as execute target: + - There is one operand less in the parallel (the + modification register of the execute is always 0). + - The execute target label is wrapped into an + if_then_else in order to hide it from jump analysis. */ + return XEXP (XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 0), 0); + } + } return NULL_RTX; } -/* Add execute target for INSN to the constant pool POOL. */ - -static void -s390_add_execute (struct constant_pool *pool, rtx insn) -{ - struct constant *c; - - for (c = pool->execute; c != NULL; c = c->next) - if (INSN_UID (insn) == INSN_UID (c->value)) - break; - - if (c == NULL) - { - c = (struct constant *) xmalloc (sizeof *c); - c->value = insn; - c->label = gen_label_rtx (); - c->next = pool->execute; - pool->execute = c; - pool->size += 6; - } -} - /* Find execute target for INSN in the constant pool POOL. Return an RTX describing the distance from the start of the pool to the location of the execute target. */ @@ -8825,17 +8562,11 @@ int i; /* Switch to rodata section. */ - if (TARGET_CPU_ZARCH) - { - insn = emit_insn_after (gen_pool_section_start (), insn); - INSN_ADDRESSES_NEW (insn, -1); - } + insn = emit_insn_after (gen_pool_section_start (), insn); + INSN_ADDRESSES_NEW (insn, -1); /* Ensure minimum pool alignment. */ - if (TARGET_CPU_ZARCH) - insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn); - else - insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn); + insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn); INSN_ADDRESSES_NEW (insn, -1); /* Emit pool base label. */ @@ -8883,11 +8614,8 @@ } /* Switch back to previous section. */ - if (TARGET_CPU_ZARCH) - { - insn = emit_insn_after (gen_pool_section_end (), insn); - INSN_ADDRESSES_NEW (insn, -1); - } + insn = emit_insn_after (gen_pool_section_end (), insn); + INSN_ADDRESSES_NEW (insn, -1); insn = emit_barrier_after (insn); INSN_ADDRESSES_NEW (insn, -1); @@ -8951,11 +8679,7 @@ pool->pool_insn = insn; } - if (!TARGET_CPU_ZARCH && s390_execute_label (insn)) - { - s390_add_execute (pool, insn); - } - else if (NONJUMP_INSN_P (insn) || CALL_P (insn)) + if (NONJUMP_INSN_P (insn) || CALL_P (insn)) { rtx pool_ref = NULL_RTX; find_constant_pool_ref (PATTERN (insn), &pool_ref); @@ -9007,6 +8731,8 @@ s390_mainpool_finish (struct constant_pool *pool) { rtx base_reg = cfun->machine->base_reg; + rtx set; + rtx_insn *insn; /* If the pool is empty, we're done. */ if (pool->size == 0) @@ -9023,95 +8749,39 @@ /* We need correct insn addresses. */ shorten_branches (get_insns ()); - /* On zSeries, we use a LARL to load the pool register. The pool is + /* Use a LARL to load the pool register. The pool is located in the .rodata section, so we emit it after the function. */ - if (TARGET_CPU_ZARCH) - { - rtx set = gen_main_base_64 (base_reg, pool->label); - rtx_insn *insn = emit_insn_after (set, pool->pool_insn); - INSN_ADDRESSES_NEW (insn, -1); - remove_insn (pool->pool_insn); - - insn = get_last_insn (); - pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); - INSN_ADDRESSES_NEW (pool->pool_insn, -1); - - s390_dump_pool (pool, 0); - } - - /* On S/390, if the total size of the function's code plus literal pool - does not exceed 4096 bytes, we use BASR to set up a function base - pointer, and emit the literal pool at the end of the function. */ - else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after)) - + pool->size + 8 /* alignment slop */ < 4096) - { - rtx set = gen_main_base_31_small (base_reg, pool->label); - rtx_insn *insn = emit_insn_after (set, pool->pool_insn); - INSN_ADDRESSES_NEW (insn, -1); - remove_insn (pool->pool_insn); - - insn = emit_label_after (pool->label, insn); - INSN_ADDRESSES_NEW (insn, -1); - - /* emit_pool_after will be set by s390_mainpool_start to the - last insn of the section where the literal pool should be - emitted. */ - insn = pool->emit_pool_after; - - pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); - INSN_ADDRESSES_NEW (pool->pool_insn, -1); - - s390_dump_pool (pool, 1); - } - - /* Otherwise, we emit an inline literal pool and use BASR to branch - over it, setting up the pool register at the same time. */ - else - { - rtx_code_label *pool_end = gen_label_rtx (); - - rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end); - rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn); - JUMP_LABEL (insn) = pool_end; - INSN_ADDRESSES_NEW (insn, -1); - remove_insn (pool->pool_insn); - - insn = emit_label_after (pool->label, insn); - INSN_ADDRESSES_NEW (insn, -1); - - pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); - INSN_ADDRESSES_NEW (pool->pool_insn, -1); - - insn = emit_label_after (pool_end, pool->pool_insn); - INSN_ADDRESSES_NEW (insn, -1); - - s390_dump_pool (pool, 1); - } - + set = gen_main_base_64 (base_reg, pool->label); + insn = emit_insn_after (set, pool->pool_insn); + INSN_ADDRESSES_NEW (insn, -1); + remove_insn (pool->pool_insn); + + insn = get_last_insn (); + pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn); + INSN_ADDRESSES_NEW (pool->pool_insn, -1); + + s390_dump_pool (pool, 0); /* Replace all literal pool references. */ for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) { - if (INSN_P (insn)) - replace_ltrel_base (&PATTERN (insn)); - if (NONJUMP_INSN_P (insn) || CALL_P (insn)) - { - rtx addr, pool_ref = NULL_RTX; - find_constant_pool_ref (PATTERN (insn), &pool_ref); - if (pool_ref) - { + { + rtx addr, pool_ref = NULL_RTX; + find_constant_pool_ref (PATTERN (insn), &pool_ref); + if (pool_ref) + { if (s390_execute_label (insn)) addr = s390_find_execute (pool, insn); else addr = s390_find_constant (pool, get_pool_constant (pool_ref), get_pool_mode (pool_ref)); - replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); - INSN_CODE (insn) = -1; - } - } + replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); + INSN_CODE (insn) = -1; + } + } } @@ -9119,18 +8789,6 @@ s390_free_pool (pool); } -/* POOL holds the main literal pool as collected by s390_mainpool_start. - We have decided we cannot use this pool, so revert all changes - to the current function that were done by s390_mainpool_start. */ -static void -s390_mainpool_cancel (struct constant_pool *pool) -{ - /* We didn't actually change the instruction stream, so simply - free the pool memory. */ - s390_free_pool (pool); -} - - /* Chunkify the literal pool. */ #define S390_POOL_CHUNK_MIN 0xc00 @@ -9140,15 +8798,9 @@ s390_chunkify_start (void) { struct constant_pool *curr_pool = NULL, *pool_list = NULL; - int extra_size = 0; bitmap far_labels; - rtx pending_ltrel = NULL_RTX; rtx_insn *insn; - rtx (*gen_reload_base) (rtx, rtx) = - TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31; - - /* We need correct insn addresses. */ shorten_branches (get_insns ()); @@ -9157,28 +8809,7 @@ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) { - bool section_switch_p = false; - - /* Check for pending LTREL_BASE. */ - if (INSN_P (insn)) - { - rtx ltrel_base = find_ltrel_base (PATTERN (insn)); - if (ltrel_base) - { - gcc_assert (ltrel_base == pending_ltrel); - pending_ltrel = NULL_RTX; - } - } - - if (!TARGET_CPU_ZARCH && s390_execute_label (insn)) - { - if (!curr_pool) - curr_pool = s390_start_pool (&pool_list, insn); - - s390_add_execute (curr_pool, insn); - s390_add_pool_insn (curr_pool, insn); - } - else if (NONJUMP_INSN_P (insn) || CALL_P (insn)) + if (NONJUMP_INSN_P (insn) || CALL_P (insn)) { rtx pool_ref = NULL_RTX; find_constant_pool_ref (PATTERN (insn), &pool_ref); @@ -9192,16 +8823,6 @@ s390_add_constant (curr_pool, constant, mode); s390_add_pool_insn (curr_pool, insn); - - /* Don't split the pool chunk between a LTREL_OFFSET load - and the corresponding LTREL_BASE. */ - if (GET_CODE (constant) == CONST - && GET_CODE (XEXP (constant, 0)) == UNSPEC - && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET) - { - gcc_assert (!pending_ltrel); - pending_ltrel = pool_ref; - } } } @@ -9209,129 +8830,25 @@ { if (curr_pool) s390_add_pool_insn (curr_pool, insn); - /* An LTREL_BASE must follow within the same basic block. */ - gcc_assert (!pending_ltrel); - } - - if (NOTE_P (insn)) - switch (NOTE_KIND (insn)) - { - case NOTE_INSN_SWITCH_TEXT_SECTIONS: - section_switch_p = true; - break; - case NOTE_INSN_VAR_LOCATION: - case NOTE_INSN_CALL_ARG_LOCATION: - continue; - default: - break; - } + } + + if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION) + continue; if (!curr_pool || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn) - || INSN_ADDRESSES (INSN_UID (insn)) == -1) + || INSN_ADDRESSES (INSN_UID (insn)) == -1) continue; - if (TARGET_CPU_ZARCH) - { - if (curr_pool->size < S390_POOL_CHUNK_MAX) - continue; - - s390_end_pool (curr_pool, NULL); - curr_pool = NULL; - } - else - { - int chunk_size = INSN_ADDRESSES (INSN_UID (insn)) - - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn)) - + extra_size; - - /* We will later have to insert base register reload insns. - Those will have an effect on code size, which we need to - consider here. This calculation makes rather pessimistic - worst-case assumptions. */ - if (LABEL_P (insn)) - extra_size += 6; - - if (chunk_size < S390_POOL_CHUNK_MIN - && curr_pool->size < S390_POOL_CHUNK_MIN - && !section_switch_p) - continue; - - /* Pool chunks can only be inserted after BARRIERs ... */ - if (BARRIER_P (insn)) - { - s390_end_pool (curr_pool, insn); - curr_pool = NULL; - extra_size = 0; - } - - /* ... so if we don't find one in time, create one. */ - else if (chunk_size > S390_POOL_CHUNK_MAX - || curr_pool->size > S390_POOL_CHUNK_MAX - || section_switch_p) - { - rtx_insn *label, *jump, *barrier, *next, *prev; - - if (!section_switch_p) - { - /* We can insert the barrier only after a 'real' insn. */ - if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn)) - continue; - if (get_attr_length (insn) == 0) - continue; - /* Don't separate LTREL_BASE from the corresponding - LTREL_OFFSET load. */ - if (pending_ltrel) - continue; - next = insn; - do - { - insn = next; - next = NEXT_INSN (insn); - } - while (next - && NOTE_P (next) - && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION - || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)); - } - else - { - gcc_assert (!pending_ltrel); - - /* The old pool has to end before the section switch - note in order to make it part of the current - section. */ - insn = PREV_INSN (insn); - } - - label = gen_label_rtx (); - prev = insn; - if (prev && NOTE_P (prev)) - prev = prev_nonnote_insn (prev); - if (prev) - jump = emit_jump_insn_after_setloc (gen_jump (label), insn, - INSN_LOCATION (prev)); - else - jump = emit_jump_insn_after_noloc (gen_jump (label), insn); - barrier = emit_barrier_after (jump); - insn = emit_label_after (label, barrier); - JUMP_LABEL (jump) = label; - LABEL_NUSES (label) = 1; - - INSN_ADDRESSES_NEW (jump, -1); - INSN_ADDRESSES_NEW (barrier, -1); - INSN_ADDRESSES_NEW (insn, -1); - - s390_end_pool (curr_pool, barrier); - curr_pool = NULL; - extra_size = 0; - } - } + if (curr_pool->size < S390_POOL_CHUNK_MAX) + continue; + + s390_end_pool (curr_pool, NULL); + curr_pool = NULL; } if (curr_pool) s390_end_pool (curr_pool, NULL); - gcc_assert (!pending_ltrel); /* Find all labels that are branched into from an insn belonging to a different chunk. */ @@ -9397,8 +8914,8 @@ for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) { - rtx new_insn = gen_reload_base (cfun->machine->base_reg, - curr_pool->label); + rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg, + curr_pool->label); rtx_insn *insn = curr_pool->first_insn; INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1); } @@ -9407,13 +8924,13 @@ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) if (LABEL_P (insn) - && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn))) + && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn))) { struct constant_pool *pool = s390_find_pool (pool_list, insn); if (pool) { - rtx new_insn = gen_reload_base (cfun->machine->base_reg, - pool->label); + rtx new_insn = gen_reload_base_64 (cfun->machine->base_reg, + pool->label); INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1); } } @@ -9445,19 +8962,16 @@ for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) { - if (INSN_P (insn)) - replace_ltrel_base (&PATTERN (insn)); - curr_pool = s390_find_pool (pool_list, insn); if (!curr_pool) continue; if (NONJUMP_INSN_P (insn) || CALL_P (insn)) - { - rtx addr, pool_ref = NULL_RTX; - find_constant_pool_ref (PATTERN (insn), &pool_ref); - if (pool_ref) - { + { + rtx addr, pool_ref = NULL_RTX; + find_constant_pool_ref (PATTERN (insn), &pool_ref); + if (pool_ref) + { if (s390_execute_label (insn)) addr = s390_find_execute (curr_pool, insn); else @@ -9465,10 +8979,10 @@ get_pool_constant (pool_ref), get_pool_mode (pool_ref)); - replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); - INSN_CODE (insn) = -1; - } - } + replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr); + INSN_CODE (insn) = -1; + } + } } /* Dump out all literal pools. */ @@ -9486,66 +9000,6 @@ } } -/* POOL_LIST is a chunk list as prepared by s390_chunkify_start. - We have decided we cannot use this list, so revert all changes - to the current function that were done by s390_chunkify_start. */ - -static void -s390_chunkify_cancel (struct constant_pool *pool_list) -{ - struct constant_pool *curr_pool = NULL; - rtx_insn *insn; - - /* Remove all pool placeholder insns. */ - - for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next) - { - /* Did we insert an extra barrier? Remove it. */ - rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn); - rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL; - rtx_insn *label = NEXT_INSN (curr_pool->pool_insn); - - if (jump && JUMP_P (jump) - && barrier && BARRIER_P (barrier) - && label && LABEL_P (label) - && GET_CODE (PATTERN (jump)) == SET - && SET_DEST (PATTERN (jump)) == pc_rtx - && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF - && XEXP (SET_SRC (PATTERN (jump)), 0) == label) - { - remove_insn (jump); - remove_insn (barrier); - remove_insn (label); - } - - remove_insn (curr_pool->pool_insn); - } - - /* Remove all base register reload insns. */ - - for (insn = get_insns (); insn; ) - { - rtx_insn *next_insn = NEXT_INSN (insn); - - if (NONJUMP_INSN_P (insn) - && GET_CODE (PATTERN (insn)) == SET - && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC - && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE) - remove_insn (insn); - - insn = next_insn; - } - - /* Free pool list. */ - - while (pool_list) - { - struct constant_pool *next = pool_list->next; - s390_free_pool (pool_list); - pool_list = next; - } -} - /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */ void @@ -9608,16 +9062,7 @@ value of RETURN_REGNUM is actually saved. */ if (count == 0) - { - /* On non-z architectures branch splitting could overwrite r14. */ - if (TARGET_CPU_ZARCH) - return get_hard_reg_initial_val (Pmode, RETURN_REGNUM); - else - { - cfun_frame_layout.save_return_addr_p = true; - return gen_rtx_MEM (Pmode, return_address_pointer_rtx); - } - } + return get_hard_reg_initial_val (Pmode, RETURN_REGNUM); if (TARGET_PACKED_STACK) offset = -2 * UNITS_PER_LONG; @@ -10036,7 +9481,6 @@ clobbered_regs[RETURN_REGNUM] |= (!crtl->is_leaf || TARGET_TPF_PROFILING - || cfun->machine->split_branches_pending_p || cfun_frame_layout.save_return_addr_p || crtl->calls_eh_return); @@ -10075,7 +9519,6 @@ int i; gcc_assert (epilogue_completed); - gcc_assert (!cfun->machine->split_branches_pending_p); s390_regs_ever_clobbered (clobbered_regs); @@ -10243,20 +9686,12 @@ if (reload_completed) return; - /* On S/390 machines, we may need to perform branch splitting, which - will require both base and return address register. We have no - choice but to assume we're going to need them until right at the - end of the machine dependent reorg phase. */ - if (!TARGET_CPU_ZARCH) - cfun->machine->split_branches_pending_p = true; - do { frame_size = cfun_frame_layout.frame_size; /* Try to predict whether we'll need the base register. */ - base_used = cfun->machine->split_branches_pending_p - || crtl->uses_const_pool + base_used = crtl->uses_const_pool || (!DISP_IN_RANGE (frame_size) && !CONST_OK_FOR_K (frame_size)); @@ -10708,7 +10143,7 @@ static bool s390_can_eliminate (const int from, const int to) { - /* On zSeries machines, we have not marked the base register as fixed. + /* We have not marked the base register as fixed. Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM. If a function requires the base register, we say here that this elimination cannot be performed. This will cause reload to free @@ -10718,13 +10153,8 @@ to allocate the base register for any other purpose. */ if (from == BASE_REGNUM && to == BASE_REGNUM) { - if (TARGET_CPU_ZARCH) - { - s390_init_frame_layout (); - return cfun->machine->base_reg == NULL_RTX; - } - - return false; + s390_init_frame_layout (); + return cfun->machine->base_reg == NULL_RTX; } /* Everything else must point into the stack frame. */ @@ -10868,9 +10298,9 @@ if (first == last) { if (TARGET_64BIT) - insn = gen_movdi (addr, gen_rtx_REG (Pmode, first)); - else - insn = gen_movsi (addr, gen_rtx_REG (Pmode, first)); + insn = gen_movdi (addr, gen_rtx_REG (Pmode, first)); + else + insn = gen_movsi (addr, gen_rtx_REG (Pmode, first)); if (!global_not_special_regno_p (first)) RTX_FRAME_RELATED_P (insn) = 1; @@ -10981,9 +10411,9 @@ if (first == last) { if (TARGET_64BIT) - insn = gen_movdi (gen_rtx_REG (Pmode, first), addr); - else - insn = gen_movsi (gen_rtx_REG (Pmode, first), addr); + insn = gen_movdi (gen_rtx_REG (Pmode, first), addr); + else + insn = gen_movsi (gen_rtx_REG (Pmode, first), addr); RTX_FRAME_RELATED_P (insn) = 1; return insn; @@ -11011,27 +10441,7 @@ start_sequence (); - if (TARGET_CPU_ZARCH) - { - emit_move_insn (got_rtx, s390_got_symbol ()); - } - else - { - rtx offset; - - offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()), - UNSPEC_LTREL_OFFSET); - offset = gen_rtx_CONST (Pmode, offset); - offset = force_const_mem (Pmode, offset); - - emit_move_insn (got_rtx, offset); - - offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)), - UNSPEC_LTREL_BASE); - offset = gen_rtx_PLUS (Pmode, got_rtx, offset); - - emit_move_insn (got_rtx, offset); - } + emit_move_insn (got_rtx, s390_got_symbol ()); insns = get_insns (); end_sequence (); @@ -11170,6 +10580,12 @@ } // anon namespace +rtl_opt_pass * +make_pass_s390_early_mach (gcc::context *ctxt) +{ + return new pass_s390_early_mach (ctxt); +} + /* Calculate TARGET = REG + OFFSET as s390_emit_prologue would do it. - push too big immediates to the literal pool and annotate the refs - emit frame related notes for stack pointer changes. */ @@ -11539,7 +10955,7 @@ t, const0_rtx, const0_rtx)); } } - } + } if (s390_warn_framesize > 0 && cfun_frame_layout.frame_size >= s390_warn_framesize) @@ -11674,10 +11090,9 @@ void s390_emit_epilogue (bool sibcall) { - rtx frame_pointer, return_reg, cfa_restores = NULL_RTX; + rtx frame_pointer, return_reg = NULL_RTX, cfa_restores = NULL_RTX; int area_bottom, area_top, offset = 0; int next_offset; - rtvec p; int i; if (TARGET_TPF_PROFILING) @@ -11688,7 +11103,7 @@ algorithms located at the branch target. */ /* Emit a blockage here so that all code - lies between the profiling mechanisms. */ + lies between the profiling mechanisms. */ emit_insn (gen_blockage ()); emit_insn (gen_epilogue_tpf ()); @@ -11709,7 +11124,7 @@ /* Nothing to restore. */ } else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom) - && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1)) + && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1)) { /* Area is in range. */ offset = cfun_frame_layout.frame_size; @@ -11784,10 +11199,6 @@ } - /* Return register. */ - - return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); - /* Restore call saved gprs. */ if (cfun_frame_layout.first_restore_gpr != -1) @@ -11833,8 +11244,14 @@ && s390_tune <= PROCESSOR_2097_Z10) { int return_regnum = find_unused_clobbered_reg(); - if (!return_regnum) - return_regnum = 4; + if (!return_regnum + || (TARGET_INDIRECT_BRANCH_NOBP_RET_OPTION + && !TARGET_CPU_Z10 + && return_regnum == INDIRECT_BRANCH_THUNK_REGNUM)) + { + gcc_assert (INDIRECT_BRANCH_THUNK_REGNUM != 4); + return_regnum = 4; + } return_reg = gen_rtx_REG (Pmode, return_regnum); addr = plus_constant (Pmode, frame_pointer, @@ -11872,14 +11289,17 @@ if (! sibcall) { - - /* Return to caller. */ - - p = rtvec_alloc (2); - - RTVEC_ELT (p, 0) = ret_rtx; - RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg); - emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p)); + if (!return_reg && !s390_can_use_return_insn ()) + /* We planned to emit (return), be we are not allowed to. */ + return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM); + + if (return_reg) + /* Emit (return) and (use). */ + emit_jump_insn (gen_return_use (return_reg)); + else + /* The fact that RETURN_REGNUM is used is already reflected by + EPILOGUE_USES. Emit plain (return). */ + emit_jump_insn (gen_return ()); } } @@ -11927,11 +11347,6 @@ rtx tmp; gcc_assert (flag_split_stack && reload_completed); - if (!TARGET_CPU_ZARCH) - { - sorry ("CPUs older than z900 are not supported for -fsplit-stack"); - return; - } r1 = gen_rtx_REG (Pmode, 1); @@ -12015,8 +11430,8 @@ LABEL_NUSES (call_done)++; /* Mark the jump as very unlikely to be taken. */ - add_reg_br_prob_note (insn, - profile_probability::very_unlikely ()); + add_reg_br_prob_note (insn, + profile_probability::very_unlikely ()); if (cfun->machine->split_stack_varargs_pointer != NULL_RTX) { @@ -12284,7 +11699,7 @@ if (type) { if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0) - return true; + return true; if (TREE_CODE (type) == COMPLEX_TYPE || TREE_CODE (type) == VECTOR_TYPE) @@ -12457,9 +11872,9 @@ static machine_mode s390_promote_function_mode (const_tree type, machine_mode mode, - int *punsignedp, - const_tree fntype ATTRIBUTE_UNUSED, - int for_return ATTRIBUTE_UNUSED) + int *punsignedp, + const_tree fntype ATTRIBUTE_UNUSED, + int for_return ATTRIBUTE_UNUSED) { if (INTEGRAL_MODE_P (mode) && GET_MODE_SIZE (mode) < UNITS_PER_LONG) @@ -12554,12 +11969,12 @@ On S/390, va_list is an array type equivalent to typedef struct __va_list_tag - { - long __gpr; - long __fpr; - void *__overflow_arg_area; - void *__reg_save_area; - } va_list[1]; + { + long __gpr; + long __fpr; + void *__overflow_arg_area; + void *__reg_save_area; + } va_list[1]; where __gpr and __fpr hold the number of general purpose or floating point arguments used up to now, respectively, @@ -12663,14 +12078,14 @@ if (cfun->va_list_fpr_size) { t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr, - build_int_cst (NULL_TREE, n_fpr)); + build_int_cst (NULL_TREE, n_fpr)); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } if (flag_split_stack && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl)) - == NULL) + == NULL) && cfun->machine->split_stack_varargs_pointer == NULL_RTX) { rtx reg; @@ -12698,9 +12113,9 @@ || TARGET_VX_ABI) { if (cfun->machine->split_stack_varargs_pointer == NULL_RTX) - t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); - else - t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer); + t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx); + else + t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer); off = INTVAL (crtl->args.arg_offset_rtx); off = off < 0 ? 0 : off; @@ -12736,7 +12151,7 @@ if (integral value) { if (size <= 4 && args.gpr < 5 || - size > 4 && args.gpr < 4 ) + size > 4 && args.gpr < 4 ) ret = args.reg_save_area[args.gpr+8] else ret = *args.overflow_arg_area++; @@ -12872,13 +12287,13 @@ { /* if (reg > ((typeof (reg))max_reg)) - goto lab_false; - - addr = sav + sav_ofs + reg * save_scale; + goto lab_false; + + addr = sav + sav_ofs + reg * save_scale; goto lab_over; - lab_false: + lab_false: */ lab_false = create_artificial_label (UNKNOWN_LOCATION); @@ -12952,11 +12367,11 @@ expanders. DEST - Register location where CC will be stored. TDB - Pointer to a 256 byte area where to store the transaction. - diagnostic block. NULL if TDB is not needed. + diagnostic block. NULL if TDB is not needed. RETRY - Retry count value. If non-NULL a retry loop for CC2 - is emitted + is emitted CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part - of the tbegin instruction pattern. */ + of the tbegin instruction pattern. */ void s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p) @@ -13090,13 +12505,37 @@ emit_move_insn (mem, fnaddr); } +static void +output_asm_nops (const char *user, int hw) +{ + asm_fprintf (asm_out_file, "\t# NOPs for %s (%d halfwords)\n", user, hw); + while (hw > 0) + { + if (hw >= 3) + { + output_asm_insn ("brcl\t0,0", NULL); + hw -= 3; + } + else if (hw >= 2) + { + output_asm_insn ("bc\t0,0", NULL); + hw -= 2; + } + else + { + output_asm_insn ("bcr\t0,0", NULL); + hw -= 1; + } + } +} + /* Output assembler code to FILE to increment profiler label # LABELNO for profiling a function entry. */ void s390_function_profiler (FILE *file, int labelno) { - rtx op[7]; + rtx op[8]; char label[128]; ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno); @@ -13106,62 +12545,72 @@ op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM); op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM); op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG)); + op[7] = GEN_INT (UNITS_PER_LONG); op[2] = gen_rtx_REG (Pmode, 1); op[3] = gen_rtx_SYMBOL_REF (Pmode, label); SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL; - op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount"); + op[4] = gen_rtx_SYMBOL_REF (Pmode, flag_fentry ? "__fentry__" : "_mcount"); if (flag_pic) { op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT); op[4] = gen_rtx_CONST (Pmode, op[4]); } - if (TARGET_64BIT) - { - output_asm_insn ("stg\t%0,%1", op); - output_asm_insn ("larl\t%2,%3", op); - output_asm_insn ("brasl\t%0,%4", op); - output_asm_insn ("lg\t%0,%1", op); - } - else if (TARGET_CPU_ZARCH) - { - output_asm_insn ("st\t%0,%1", op); - output_asm_insn ("larl\t%2,%3", op); - output_asm_insn ("brasl\t%0,%4", op); - output_asm_insn ("l\t%0,%1", op); - } - else if (!flag_pic) - { - op[6] = gen_label_rtx (); - - output_asm_insn ("st\t%0,%1", op); - output_asm_insn ("bras\t%2,%l6", op); - output_asm_insn (".long\t%4", op); - output_asm_insn (".long\t%3", op); - targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6])); - output_asm_insn ("l\t%0,0(%2)", op); - output_asm_insn ("l\t%2,4(%2)", op); - output_asm_insn ("basr\t%0,%0", op); - output_asm_insn ("l\t%0,%1", op); - } - else - { - op[5] = gen_label_rtx (); - op[6] = gen_label_rtx (); - - output_asm_insn ("st\t%0,%1", op); - output_asm_insn ("bras\t%2,%l6", op); - targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5])); - output_asm_insn (".long\t%4-%l5", op); - output_asm_insn (".long\t%3-%l5", op); - targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6])); - output_asm_insn ("lr\t%0,%2", op); - output_asm_insn ("a\t%0,0(%2)", op); - output_asm_insn ("a\t%2,4(%2)", op); - output_asm_insn ("basr\t%0,%0", op); - output_asm_insn ("l\t%0,%1", op); + if (flag_record_mcount) + fprintf (file, "1:\n"); + + if (flag_fentry) + { + if (flag_nop_mcount) + output_asm_nops ("-mnop-mcount", /* brasl */ 3); + else if (cfun->static_chain_decl) + warning (OPT_Wcannot_profile, "nested functions cannot be profiled " + "with -mfentry on s390"); + else + output_asm_insn ("brasl\t0,%4", op); + } + else if (TARGET_64BIT) + { + if (flag_nop_mcount) + output_asm_nops ("-mnop-mcount", /* stg */ 3 + /* larl */ 3 + + /* brasl */ 3 + /* lg */ 3); + else + { + output_asm_insn ("stg\t%0,%1", op); + if (flag_dwarf2_cfi_asm) + output_asm_insn (".cfi_rel_offset\t%0,%7", op); + output_asm_insn ("larl\t%2,%3", op); + output_asm_insn ("brasl\t%0,%4", op); + output_asm_insn ("lg\t%0,%1", op); + if (flag_dwarf2_cfi_asm) + output_asm_insn (".cfi_restore\t%0", op); + } + } + else + { + if (flag_nop_mcount) + output_asm_nops ("-mnop-mcount", /* st */ 2 + /* larl */ 3 + + /* brasl */ 3 + /* l */ 2); + else + { + output_asm_insn ("st\t%0,%1", op); + if (flag_dwarf2_cfi_asm) + output_asm_insn (".cfi_rel_offset\t%0,%7", op); + output_asm_insn ("larl\t%2,%3", op); + output_asm_insn ("brasl\t%0,%4", op); + output_asm_insn ("l\t%0,%1", op); + if (flag_dwarf2_cfi_asm) + output_asm_insn (".cfi_restore\t%0", op); + } + } + + if (flag_record_mcount) + { + fprintf (file, "\t.section __mcount_loc, \"a\",@progbits\n"); + fprintf (file, "\t.%s 1b\n", TARGET_64BIT ? "quad" : "long"); + fprintf (file, "\t.previous\n"); } } @@ -13277,8 +12726,8 @@ output_asm_insn ("lay\t%1,%2(%1)", op); else if (CONST_OK_FOR_K (delta)) output_asm_insn ("aghi\t%1,%2", op); - else if (CONST_OK_FOR_Os (delta)) - output_asm_insn ("agfi\t%1,%2", op); + else if (CONST_OK_FOR_Os (delta)) + output_asm_insn ("agfi\t%1,%2", op); else { op[6] = gen_label_rtx (); @@ -13300,12 +12749,12 @@ output_asm_insn ("ag\t%4,0(%1)", op); output_asm_insn ("ag\t%1,0(%4)", op); } - else if (CONST_OK_FOR_Os (vcall_offset)) - { - output_asm_insn ("lgfi\t%4,%3", op); - output_asm_insn ("ag\t%4,0(%1)", op); - output_asm_insn ("ag\t%1,0(%4)", op); - } + else if (CONST_OK_FOR_Os (vcall_offset)) + { + output_asm_insn ("lgfi\t%4,%3", op); + output_asm_insn ("ag\t%4,0(%1)", op); + output_asm_insn ("ag\t%1,0(%4)", op); + } else { op[7] = gen_label_rtx (); @@ -13343,10 +12792,10 @@ /* Setup base pointer if required. */ if (!vcall_offset || (!DISP_IN_RANGE (delta) - && !CONST_OK_FOR_K (delta) + && !CONST_OK_FOR_K (delta) && !CONST_OK_FOR_Os (delta)) || (!DISP_IN_RANGE (delta) - && !CONST_OK_FOR_K (vcall_offset) + && !CONST_OK_FOR_K (vcall_offset) && !CONST_OK_FOR_Os (vcall_offset))) { op[5] = gen_label_rtx (); @@ -13365,7 +12814,7 @@ else if (CONST_OK_FOR_K (delta)) output_asm_insn ("ahi\t%1,%2", op); else if (CONST_OK_FOR_Os (delta)) - output_asm_insn ("afi\t%1,%2", op); + output_asm_insn ("afi\t%1,%2", op); else { op[6] = gen_label_rtx (); @@ -13375,7 +12824,7 @@ /* Perform vcall adjustment. */ if (vcall_offset) - { + { if (CONST_OK_FOR_J (vcall_offset)) { output_asm_insn ("l\t%4,0(%1)", op); @@ -13393,11 +12842,11 @@ output_asm_insn ("a\t%1,0(%4)", op); } else if (CONST_OK_FOR_Os (vcall_offset)) - { - output_asm_insn ("iilf\t%4,%3", op); - output_asm_insn ("a\t%4,0(%1)", op); - output_asm_insn ("a\t%1,0(%4)", op); - } + { + output_asm_insn ("iilf\t%4,%3", op); + output_asm_insn ("a\t%4,0(%1)", op); + output_asm_insn ("a\t%1,0(%4)", op); + } else { op[7] = gen_label_rtx (); @@ -13471,6 +12920,112 @@ final_end_function (); } +/* Output either an indirect jump or a an indirect call + (RETURN_ADDR_REGNO != INVALID_REGNUM) with target register REGNO + using a branch trampoline disabling branch target prediction. */ + +void +s390_indirect_branch_via_thunk (unsigned int regno, + unsigned int return_addr_regno, + rtx comparison_operator, + enum s390_indirect_branch_type type) +{ + enum s390_indirect_branch_option option; + + if (type == s390_indirect_branch_type_return) + { + if (s390_return_addr_from_memory ()) + option = s390_opt_function_return_mem; + else + option = s390_opt_function_return_reg; + } + else if (type == s390_indirect_branch_type_jump) + option = s390_opt_indirect_branch_jump; + else if (type == s390_indirect_branch_type_call) + option = s390_opt_indirect_branch_call; + else + gcc_unreachable (); + + if (TARGET_INDIRECT_BRANCH_TABLE) + { + char label[32]; + + ASM_GENERATE_INTERNAL_LABEL (label, + indirect_branch_table_label[option], + indirect_branch_table_label_no[option]++); + ASM_OUTPUT_LABEL (asm_out_file, label); + } + + if (return_addr_regno != INVALID_REGNUM) + { + gcc_assert (comparison_operator == NULL_RTX); + fprintf (asm_out_file, " \tbrasl\t%%r%d,", return_addr_regno); + } + else + { + fputs (" \tjg", asm_out_file); + if (comparison_operator != NULL_RTX) + print_operand (asm_out_file, comparison_operator, 'C'); + + fputs ("\t", asm_out_file); + } + + if (TARGET_CPU_Z10) + fprintf (asm_out_file, + TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL "\n", + regno); + else + fprintf (asm_out_file, + TARGET_INDIRECT_BRANCH_THUNK_NAME_EX "\n", + INDIRECT_BRANCH_THUNK_REGNUM, regno); + + if ((option == s390_opt_indirect_branch_jump + && cfun->machine->indirect_branch_jump == indirect_branch_thunk) + || (option == s390_opt_indirect_branch_call + && cfun->machine->indirect_branch_call == indirect_branch_thunk) + || (option == s390_opt_function_return_reg + && cfun->machine->function_return_reg == indirect_branch_thunk) + || (option == s390_opt_function_return_mem + && cfun->machine->function_return_mem == indirect_branch_thunk)) + { + if (TARGET_CPU_Z10) + indirect_branch_z10thunk_mask |= (1 << regno); + else + indirect_branch_prez10thunk_mask |= (1 << regno); + } +} + +/* Output an inline thunk for indirect jumps. EXECUTE_TARGET can + either be an address register or a label pointing to the location + of the jump instruction. */ + +void +s390_indirect_branch_via_inline_thunk (rtx execute_target) +{ + if (TARGET_INDIRECT_BRANCH_TABLE) + { + char label[32]; + + ASM_GENERATE_INTERNAL_LABEL (label, + indirect_branch_table_label[s390_opt_indirect_branch_jump], + indirect_branch_table_label_no[s390_opt_indirect_branch_jump]++); + ASM_OUTPUT_LABEL (asm_out_file, label); + } + + if (!TARGET_ZARCH) + fputs ("\t.machinemode zarch\n", asm_out_file); + + if (REG_P (execute_target)) + fprintf (asm_out_file, "\tex\t%%r0,0(%%r%d)\n", REGNO (execute_target)); + else + output_asm_insn ("\texrl\t%%r0,%0", &execute_target); + + if (!TARGET_ZARCH) + fputs ("\t.machinemode esa\n", asm_out_file); + + fputs ("0:\tj\t0b\n", asm_out_file); +} + static bool s390_valid_pointer_mode (scalar_int_mode mode) { @@ -13516,10 +13071,10 @@ named. This only has an impact on vector argument register usage none of which is call-saved. */ if (pass_by_reference (&cum_v, mode, type, true)) - { - mode = Pmode; - type = build_pointer_type (type); - } + { + mode = Pmode; + type = build_pointer_type (type); + } parm_rtx = s390_function_arg (cum, mode, type, true); @@ -13529,10 +13084,10 @@ continue; if (REG_P (parm_rtx)) - { + { for (reg = 0; reg < REG_NREGS (parm_rtx); reg++) if (!call_used_regs[reg + REGNO (parm_rtx)]) - return true; + return true; } if (GET_CODE (parm_rtx) == PARALLEL) @@ -13572,6 +13127,14 @@ if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl)) return false; + /* The thunks for indirect branches require r1 if no exrl is + available. r1 might not be available when doing a sibling + call. */ + if (TARGET_INDIRECT_BRANCH_NOBP_CALL + && !TARGET_CPU_Z10 + && !decl) + return false; + /* Register 6 on s390 is available as an argument register but unfortunately "caller saved". This makes functions needing this register for arguments not suitable for sibcalls. */ @@ -13596,8 +13159,8 @@ TLS_CALL the location of the thread-local symbol RESULT_REG the register where the result of the call should be stored RETADDR_REG the register where the return address should be stored - If this parameter is NULL_RTX the call is considered - to be a sibling call. */ + If this parameter is NULL_RTX the call is considered + to be a sibling call. */ rtx_insn * s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg, @@ -13605,17 +13168,21 @@ { bool plt_call = false; rtx_insn *insn; - rtx call; - rtx clobber; - rtvec vec; + rtx vec[4] = { NULL_RTX }; + int elts = 0; + rtx *call = &vec[0]; + rtx *clobber_ret_reg = &vec[1]; + rtx *use = &vec[2]; + rtx *clobber_thunk_reg = &vec[3]; + int i; /* Direct function calls need special treatment. */ if (GET_CODE (addr_location) == SYMBOL_REF) { /* When calling a global routine in PIC mode, we must - replace the symbol itself with the PLT stub. */ + replace the symbol itself with the PLT stub. */ if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location)) - { + { if (TARGET_64BIT || retaddr_reg != NULL_RTX) { addr_location = gen_rtx_UNSPEC (Pmode, @@ -13635,16 +13202,6 @@ optimization is illegal for S/390 so we turn the direct call into a indirect call again. */ addr_location = force_reg (Pmode, addr_location); - } - - /* Unless we can use the bras(l) insn, force the - routine address into a register. */ - if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH) - { - if (flag_pic) - addr_location = legitimize_pic_address (addr_location, 0); - else - addr_location = force_reg (Pmode, addr_location); } } @@ -13659,26 +13216,58 @@ addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM); } + if (TARGET_INDIRECT_BRANCH_NOBP_CALL + && GET_CODE (addr_location) != SYMBOL_REF + && !plt_call) + { + /* Indirect branch thunks require the target to be a single GPR. */ + addr_location = force_reg (Pmode, addr_location); + + /* Without exrl the indirect branch thunks need an additional + register for larl;ex */ + if (!TARGET_CPU_Z10) + { + *clobber_thunk_reg = gen_rtx_REG (Pmode, INDIRECT_BRANCH_THUNK_REGNUM); + *clobber_thunk_reg = gen_rtx_CLOBBER (VOIDmode, *clobber_thunk_reg); + } + } + addr_location = gen_rtx_MEM (QImode, addr_location); - call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx); + *call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx); if (result_reg != NULL_RTX) - call = gen_rtx_SET (result_reg, call); + *call = gen_rtx_SET (result_reg, *call); if (retaddr_reg != NULL_RTX) { - clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg); + *clobber_ret_reg = gen_rtx_CLOBBER (VOIDmode, retaddr_reg); if (tls_call != NULL_RTX) - vec = gen_rtvec (3, call, clobber, - gen_rtx_USE (VOIDmode, tls_call)); - else - vec = gen_rtvec (2, call, clobber); - - call = gen_rtx_PARALLEL (VOIDmode, vec); - } - - insn = emit_call_insn (call); + *use = gen_rtx_USE (VOIDmode, tls_call); + } + + + for (i = 0; i < 4; i++) + if (vec[i] != NULL_RTX) + elts++; + + if (elts > 1) + { + rtvec v; + int e = 0; + + v = rtvec_alloc (elts); + for (i = 0; i < 4; i++) + if (vec[i] != NULL_RTX) + { + RTVEC_ELT (v, e) = vec[i]; + e++; + } + + *call = gen_rtx_PARALLEL (VOIDmode, v); + } + + insn = emit_call_insn (*call); /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */ if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX) @@ -13703,13 +13292,10 @@ fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; } - if (TARGET_CPU_ZARCH) - { - fixed_regs[BASE_REGNUM] = 0; - call_used_regs[BASE_REGNUM] = 0; - fixed_regs[RETURN_REGNUM] = 0; - call_used_regs[RETURN_REGNUM] = 0; - } + fixed_regs[BASE_REGNUM] = 0; + call_used_regs[BASE_REGNUM] = 0; + fixed_regs[RETURN_REGNUM] = 0; + call_used_regs[RETURN_REGNUM] = 0; if (TARGET_64BIT) { for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++) @@ -13753,7 +13339,7 @@ emit_move_insn (reg, target); emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM)); insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg, - gen_rtx_REG (Pmode, RETURN_REGNUM)); + gen_rtx_REG (Pmode, RETURN_REGNUM)); use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg); use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra); @@ -13775,14 +13361,10 @@ can do, so no point in walking the insn list. */ if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM - && cfun_frame_layout.last_save_gpr >= BASE_REGNUM - && (TARGET_CPU_ZARCH - || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM - && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM))) + && cfun_frame_layout.last_save_gpr >= BASE_REGNUM) return; /* Search for prologue/epilogue insns and replace them. */ - for (insn = get_insns (); insn; insn = next_insn) { int first, last, off; @@ -14193,9 +13775,9 @@ && s390_non_addr_reg_read_p (*op0, prev_insn)) { if (REGNO (*op1) == 0) - emit_insn_after (gen_nop1 (), insn); + emit_insn_after (gen_nop_lr1 (), insn); else - emit_insn_after (gen_nop (), insn); + emit_insn_after (gen_nop_lr0 (), insn); insn_added_p = true; } else @@ -14358,7 +13940,8 @@ static void s390_reorg (void) { - bool pool_overflow = false; + struct constant_pool *pool; + rtx_insn *insn; int hw_before, hw_after; if (s390_tune == PROCESSOR_2964_Z13) @@ -14369,100 +13952,54 @@ split_all_insns_noflow (); /* Install the main literal pool and the associated base - register load insns. - - In addition, there are two problematic situations we need - to correct: - - - the literal pool might be > 4096 bytes in size, so that - some of its elements cannot be directly accessed - - - a branch target might be > 64K away from the branch, so that - it is not possible to use a PC-relative instruction. - - To fix those, we split the single literal pool into multiple + register load insns. The literal pool might be > 4096 bytes in + size, so that some of its elements cannot be directly accessed. + + To fix this, we split the single literal pool into multiple pool chunks, reloading the pool base register at various points throughout the function to ensure it always points to - the pool chunk the following code expects, and / or replace - PC-relative branches by absolute branches. - - However, the two problems are interdependent: splitting the - literal pool can move a branch further away from its target, - causing the 64K limit to overflow, and on the other hand, - replacing a PC-relative branch by an absolute branch means - we need to put the branch target address into the literal - pool, possibly causing it to overflow. - - So, we loop trying to fix up both problems until we manage - to satisfy both conditions at the same time. Note that the - loop is guaranteed to terminate as every pass of the loop - strictly decreases the total number of PC-relative branches - in the function. (This is not completely true as there - might be branch-over-pool insns introduced by chunkify_start. - Those never need to be split however.) */ - - for (;;) - { - struct constant_pool *pool = NULL; - - /* Collect the literal pool. */ - if (!pool_overflow) - { - pool = s390_mainpool_start (); - if (!pool) - pool_overflow = true; - } - - /* If literal pool overflowed, start to chunkify it. */ - if (pool_overflow) - pool = s390_chunkify_start (); - - /* Split out-of-range branches. If this has created new - literal pool entries, cancel current chunk list and - recompute it. zSeries machines have large branch - instructions, so we never need to split a branch. */ - if (!TARGET_CPU_ZARCH && s390_split_branches ()) - { - if (pool_overflow) - s390_chunkify_cancel (pool); - else - s390_mainpool_cancel (pool); - - continue; - } - - /* If we made it up to here, both conditions are satisfied. - Finish up literal pool related changes. */ - if (pool_overflow) - s390_chunkify_finish (pool); - else - s390_mainpool_finish (pool); - - /* We're done splitting branches. */ - cfun->machine->split_branches_pending_p = false; - break; + the pool chunk the following code expects. */ + + /* Collect the literal pool. */ + pool = s390_mainpool_start (); + if (pool) + { + /* Finish up literal pool related changes. */ + s390_mainpool_finish (pool); + } + else + { + /* If literal pool overflowed, chunkify it. */ + pool = s390_chunkify_start (); + s390_chunkify_finish (pool); } /* Generate out-of-pool execute target insns. */ - if (TARGET_CPU_ZARCH) - { - rtx_insn *insn, *target; + for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { rtx label; - - for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) - { - label = s390_execute_label (insn); - if (!label) - continue; - - gcc_assert (label != const0_rtx); - - target = emit_label (XEXP (label, 0)); - INSN_ADDRESSES_NEW (target, -1); - - target = emit_insn (s390_execute_target (insn)); - INSN_ADDRESSES_NEW (target, -1); - } + rtx_insn *target; + + label = s390_execute_label (insn); + if (!label) + continue; + + gcc_assert (label != const0_rtx); + + target = emit_label (XEXP (label, 0)); + INSN_ADDRESSES_NEW (target, -1); + + if (JUMP_P (insn)) + { + target = emit_jump_insn (s390_execute_target (insn)); + /* This is important in order to keep a table jump + pointing at the jump table label. Only this makes it + being recognized as table jump. */ + JUMP_LABEL (target) = JUMP_LABEL (insn); + } + else + target = emit_insn (s390_execute_target (insn)); + INSN_ADDRESSES_NEW (target, -1); } /* Try to optimize prologue and epilogue further. */ @@ -14519,7 +14056,7 @@ /* Output a series of NOPs before the first active insn. */ while (insn && hw_after > 0) { - if (hw_after >= 3 && TARGET_CPU_ZARCH) + if (hw_after >= 3) { emit_insn_before (gen_nop_6_byte (), insn); hw_after -= 3; @@ -15100,9 +14637,29 @@ for (i = 0; i < loop->num_nodes; i++) FOR_BB_INSNS (bbs[i], insn) if (INSN_P (insn) && INSN_CODE (insn) != -1) - FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) - if (MEM_P (*iter)) - mem_count += 1; + { + rtx set; + + /* The runtime of small loops with memory block operations + will be determined by the memory operation. Doing + unrolling doesn't help here. Measurements to confirm + this where only done on recent CPU levels. So better do + not change anything for older CPUs. */ + if (s390_tune >= PROCESSOR_2964_Z13 + && loop->ninsns <= BLOCK_MEM_OPS_LOOP_INSNS + && ((set = single_set (insn)) != NULL_RTX) + && ((GET_MODE (SET_DEST (set)) == BLKmode + && (GET_MODE (SET_SRC (set)) == BLKmode + || SET_SRC (set) == const0_rtx)) + || (GET_CODE (SET_SRC (set)) == COMPARE + && GET_MODE (XEXP (SET_SRC (set), 0)) == BLKmode + && GET_MODE (XEXP (SET_SRC (set), 1)) == BLKmode))) + return 1; + + FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST) + if (MEM_P (*iter)) + mem_count += 1; + } free (bbs); /* Prevent division by zero, and we do not need to adjust nunroll in this case. */ @@ -15131,27 +14688,25 @@ } static void -s390_option_override_internal (bool main_args_p, - struct gcc_options *opts, +s390_default_align (struct gcc_options *opts) +{ + /* Set the default function alignment to 16 in order to get rid of + some unwanted performance effects. */ + if (opts->x_flag_align_functions && !opts->x_str_align_functions + && opts->x_s390_tune >= PROCESSOR_2964_Z13) + opts->x_str_align_functions = "16"; +} + +static void +s390_override_options_after_change (void) +{ + s390_default_align (&global_options); +} + +static void +s390_option_override_internal (struct gcc_options *opts, const struct gcc_options *opts_set) { - const char *prefix; - const char *suffix; - - /* Set up prefix/suffix so the error messages refer to either the command - line argument, or the attribute(target). */ - if (main_args_p) - { - prefix = "-m"; - suffix = ""; - } - else - { - prefix = "option(\""; - suffix = "\")"; - } - - /* Architecture mode defaults according to ABI. */ if (!(opts_set->x_target_flags & MASK_ZARCH)) { @@ -15164,24 +14719,12 @@ /* Set the march default in case it hasn't been specified on cmdline. */ if (!opts_set->x_s390_arch) opts->x_s390_arch = PROCESSOR_2064_Z900; - else if (opts->x_s390_arch == PROCESSOR_9672_G5 - || opts->x_s390_arch == PROCESSOR_9672_G6) - warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed " - "in future releases; use at least %sarch=z900%s", - prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6", - suffix, prefix, suffix); opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch]; /* Determine processor to tune for. */ if (!opts_set->x_s390_tune) opts->x_s390_tune = opts->x_s390_arch; - else if (opts->x_s390_tune == PROCESSOR_9672_G5 - || opts->x_s390_tune == PROCESSOR_9672_G6) - warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed " - "in future releases; use at least %stune=z900%s", - prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6", - suffix, prefix, suffix); opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune]; @@ -15189,12 +14732,34 @@ if (opts->x_s390_arch == PROCESSOR_NATIVE || opts->x_s390_tune == PROCESSOR_NATIVE) gcc_unreachable (); - if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts)) - error ("z/Architecture mode not supported on %s", - processor_table[(int)opts->x_s390_arch].name); if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags)) error ("64-bit ABI not supported in ESA/390 mode"); + if (opts->x_s390_indirect_branch == indirect_branch_thunk_inline + || opts->x_s390_indirect_branch_call == indirect_branch_thunk_inline + || opts->x_s390_function_return == indirect_branch_thunk_inline + || opts->x_s390_function_return_reg == indirect_branch_thunk_inline + || opts->x_s390_function_return_mem == indirect_branch_thunk_inline) + error ("thunk-inline is only supported with -mindirect-branch-jump"); + + if (opts->x_s390_indirect_branch != indirect_branch_keep) + { + if (!opts_set->x_s390_indirect_branch_call) + opts->x_s390_indirect_branch_call = opts->x_s390_indirect_branch; + + if (!opts_set->x_s390_indirect_branch_jump) + opts->x_s390_indirect_branch_jump = opts->x_s390_indirect_branch; + } + + if (opts->x_s390_function_return != indirect_branch_keep) + { + if (!opts_set->x_s390_function_return_reg) + opts->x_s390_function_return_reg = opts->x_s390_function_return; + + if (!opts_set->x_s390_function_return_mem) + opts->x_s390_function_return_mem = opts->x_s390_function_return; + } + /* Enable hardware transactions if available and not explicitly disabled by user. E.g. with -m31 -march=zEC12 -mzarch */ if (!TARGET_OPT_HTM_P (opts_set->x_target_flags)) @@ -15278,6 +14843,17 @@ else if (opts->x_s390_stack_guard) error ("-mstack-guard implies use of -mstack-size"); + /* Our implementation of the stack probe requires the probe interval + to be used as displacement in an address operand. The maximum + probe interval currently is 64k. This would exceed short + displacements. Trim that value down to 4k if that happens. This + might result in too many probes being generated only on the + oldest supported machine level z900. */ + if (!DISP_IN_RANGE ((1 << PARAM_VALUE (PARAM_STACK_CLASH_PROTECTION_PROBE_INTERVAL)))) + set_param_value ("stack-clash-protection-probe-interval", 12, + opts->x_param_values, + opts_set->x_param_values); + #ifdef TARGET_DEFAULT_LONG_DOUBLE_128 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags)) opts->x_target_flags |= MASK_LONG_DOUBLE_128; @@ -15324,16 +14900,25 @@ /* Use the alternative scheduling-pressure algorithm by default. */ maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2, - opts->x_param_values, - opts_set->x_param_values); + opts->x_param_values, + opts_set->x_param_values); maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2, opts->x_param_values, opts_set->x_param_values); + /* Set the default alignment. */ + s390_default_align (opts); + /* Call target specific restore function to do post-init work. At the moment, this just sets opts->x_s390_cost_pointer. */ s390_function_specific_restore (opts, NULL); + + /* Check whether -mfentry is supported. It cannot be used in 31-bit mode, + because 31-bit PLT stubs assume that %r12 contains GOT address, which is + not the case when the code runs before the prolog. */ + if (opts->x_flag_fentry && !TARGET_64BIT) + error ("-mfentry is supported only for 64-bit CPUs"); } static void @@ -15353,16 +14938,11 @@ { int val1; int val2; - char s[256]; - char *t; - - strncpy (s, opt->arg, 256); - s[255] = 0; - t = strchr (s, ','); + char *s = strtok (ASTRDUP (opt->arg), ","); + char *t = strtok (NULL, "\0"); + if (t != NULL) { - *t = 0; - t++; val1 = integral_argument (s); val2 = integral_argument (t); } @@ -15397,7 +14977,7 @@ /* Set up function hooks. */ init_machine_status = s390_init_machine_status; - s390_option_override_internal (true, &global_options, &global_options_set); + s390_option_override_internal (&global_options, &global_options_set); /* Save the initial options in case the user does function specific options. */ @@ -15417,26 +14997,11 @@ { /* Don't emit DWARF3/4 unless specifically selected. The TPF debuggers do not yet support DWARF 3/4. */ - if (!global_options_set.x_dwarf_strict) + if (!global_options_set.x_dwarf_strict) dwarf_strict = 1; if (!global_options_set.x_dwarf_version) dwarf_version = 2; } - - /* Register a target-specific optimization-and-lowering pass - to run immediately before prologue and epilogue generation. - - Registering the pass must be done at start up. It's - convenient to do it here. */ - opt_pass *new_pass = new pass_s390_early_mach (g); - struct register_pass_info insert_pass_s390_early_mach = - { - new_pass, /* pass */ - "pro_and_epilogue", /* reference_pass_name */ - 1, /* ref_pass_instance_number */ - PASS_POS_INSERT_BEFORE /* po_op */ - }; - register_pass (&insert_pass_s390_early_mach); } #if S390_USE_TARGET_ATTRIBUTE @@ -15702,7 +15267,7 @@ dest[i] |= src[i]; /* Do any overrides, such as arch=xxx, or tune=xxx support. */ - s390_option_override_internal (false, opts, &new_opts_set); + s390_option_override_internal (opts, &new_opts_set); /* Save the current options unless we are validating options for #pragma. */ t = build_target_option_node (opts); @@ -15812,6 +15377,78 @@ return ret; } +/* Set VAL to correct enum value according to the indirect-branch or + function-return attribute in ATTR. */ + +static inline void +s390_indirect_branch_attrvalue (tree attr, enum indirect_branch *val) +{ + const char *str = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE (attr))); + if (strcmp (str, "keep") == 0) + *val = indirect_branch_keep; + else if (strcmp (str, "thunk") == 0) + *val = indirect_branch_thunk; + else if (strcmp (str, "thunk-inline") == 0) + *val = indirect_branch_thunk_inline; + else if (strcmp (str, "thunk-extern") == 0) + *val = indirect_branch_thunk_extern; +} + +/* Memorize the setting for -mindirect-branch* and -mfunction-return* + from either the cmdline or the function attributes in + cfun->machine. */ + +static void +s390_indirect_branch_settings (tree fndecl) +{ + tree attr; + + if (!fndecl) + return; + + /* Initialize with the cmdline options and let the attributes + override it. */ + cfun->machine->indirect_branch_jump = s390_indirect_branch_jump; + cfun->machine->indirect_branch_call = s390_indirect_branch_call; + + cfun->machine->function_return_reg = s390_function_return_reg; + cfun->machine->function_return_mem = s390_function_return_mem; + + if ((attr = lookup_attribute ("indirect_branch", + DECL_ATTRIBUTES (fndecl)))) + { + s390_indirect_branch_attrvalue (attr, + &cfun->machine->indirect_branch_jump); + s390_indirect_branch_attrvalue (attr, + &cfun->machine->indirect_branch_call); + } + + if ((attr = lookup_attribute ("indirect_branch_jump", + DECL_ATTRIBUTES (fndecl)))) + s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_jump); + + if ((attr = lookup_attribute ("indirect_branch_call", + DECL_ATTRIBUTES (fndecl)))) + s390_indirect_branch_attrvalue (attr, &cfun->machine->indirect_branch_call); + + if ((attr = lookup_attribute ("function_return", + DECL_ATTRIBUTES (fndecl)))) + { + s390_indirect_branch_attrvalue (attr, + &cfun->machine->function_return_reg); + s390_indirect_branch_attrvalue (attr, + &cfun->machine->function_return_mem); + } + + if ((attr = lookup_attribute ("function_return_reg", + DECL_ATTRIBUTES (fndecl)))) + s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_reg); + + if ((attr = lookup_attribute ("function_return_mem", + DECL_ATTRIBUTES (fndecl)))) + s390_indirect_branch_attrvalue (attr, &cfun->machine->function_return_mem); +} + /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl cache. */ @@ -15838,7 +15475,10 @@ several times in the course of compiling a function, and we don't want to slow things down too much or call target_reinit when it isn't safe. */ if (fndecl == s390_previous_fndecl) - return; + { + s390_indirect_branch_settings (fndecl); + return; + } tree old_tree; if (s390_previous_fndecl == NULL_TREE) @@ -15862,6 +15502,8 @@ if (old_tree != new_tree) s390_activate_target_options (new_tree); s390_previous_fndecl = fndecl; + + s390_indirect_branch_settings (fndecl); } #endif @@ -16160,6 +15802,184 @@ return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000); } +#ifdef HAVE_GAS_HIDDEN +# define USE_HIDDEN_LINKONCE 1 +#else +# define USE_HIDDEN_LINKONCE 0 +#endif + +/* Output an indirect branch trampoline for target register REGNO. */ + +static void +s390_output_indirect_thunk_function (unsigned int regno, bool z10_p) +{ + tree decl; + char thunk_label[32]; + int i; + + if (z10_p) + sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EXRL, regno); + else + sprintf (thunk_label, TARGET_INDIRECT_BRANCH_THUNK_NAME_EX, + INDIRECT_BRANCH_THUNK_REGNUM, regno); + + decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, + get_identifier (thunk_label), + build_function_type_list (void_type_node, NULL_TREE)); + DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, + NULL_TREE, void_type_node); + TREE_PUBLIC (decl) = 1; + TREE_STATIC (decl) = 1; + DECL_IGNORED_P (decl) = 1; + + if (USE_HIDDEN_LINKONCE) + { + cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl)); + + targetm.asm_out.unique_section (decl, 0); + switch_to_section (get_named_section (decl, NULL, 0)); + + targetm.asm_out.globalize_label (asm_out_file, thunk_label); + fputs ("\t.hidden\t", asm_out_file); + assemble_name (asm_out_file, thunk_label); + putc ('\n', asm_out_file); + ASM_DECLARE_FUNCTION_NAME (asm_out_file, thunk_label, decl); + } + else + { + switch_to_section (text_section); + ASM_OUTPUT_LABEL (asm_out_file, thunk_label); + } + + DECL_INITIAL (decl) = make_node (BLOCK); + current_function_decl = decl; + allocate_struct_function (decl, false); + init_function_start (decl); + cfun->is_thunk = true; + first_function_block_is_cold = false; + final_start_function (emit_barrier (), asm_out_file, 1); + + /* This makes CFI at least usable for indirect jumps. + + Stopping in the thunk: backtrace will point to the thunk target + is if it was interrupted by a signal. For a call this means that + the call chain will be: caller->callee->thunk */ + if (flag_asynchronous_unwind_tables && flag_dwarf2_cfi_asm) + { + fputs ("\t.cfi_signal_frame\n", asm_out_file); + fprintf (asm_out_file, "\t.cfi_return_column %d\n", regno); + for (i = 0; i < FPR15_REGNUM; i++) + fprintf (asm_out_file, "\t.cfi_same_value %s\n", reg_names[i]); + } + + if (z10_p) + { + /* exrl 0,1f */ + + /* We generate a thunk for z10 compiled code although z10 is + currently not enabled. Tell the assembler to accept the + instruction. */ + if (!TARGET_CPU_Z10) + { + fputs ("\t.machine push\n", asm_out_file); + fputs ("\t.machine z10\n", asm_out_file); + } + /* We use exrl even if -mzarch hasn't been specified on the + command line so we have to tell the assembler to accept + it. */ + if (!TARGET_ZARCH) + fputs ("\t.machinemode zarch\n", asm_out_file); + + fputs ("\texrl\t0,1f\n", asm_out_file); + + if (!TARGET_ZARCH) + fputs ("\t.machinemode esa\n", asm_out_file); + + if (!TARGET_CPU_Z10) + fputs ("\t.machine pop\n", asm_out_file); + } + else + { + /* larl %r1,1f */ + fprintf (asm_out_file, "\tlarl\t%%r%d,1f\n", + INDIRECT_BRANCH_THUNK_REGNUM); + + /* ex 0,0(%r1) */ + fprintf (asm_out_file, "\tex\t0,0(%%r%d)\n", + INDIRECT_BRANCH_THUNK_REGNUM); + } + + /* 0: j 0b */ + fputs ("0:\tj\t0b\n", asm_out_file); + + /* 1: br <regno> */ + fprintf (asm_out_file, "1:\tbr\t%%r%d\n", regno); + + final_end_function (); + init_insn_lengths (); + free_after_compilation (cfun); + set_cfun (NULL); + current_function_decl = NULL; +} + +/* Implement the asm.code_end target hook. */ + +static void +s390_code_end (void) +{ + int i; + + for (i = 1; i < 16; i++) + { + if (indirect_branch_z10thunk_mask & (1 << i)) + s390_output_indirect_thunk_function (i, true); + + if (indirect_branch_prez10thunk_mask & (1 << i)) + s390_output_indirect_thunk_function (i, false); + } + + if (TARGET_INDIRECT_BRANCH_TABLE) + { + int o; + int i; + + for (o = 0; o < INDIRECT_BRANCH_NUM_OPTIONS; o++) + { + if (indirect_branch_table_label_no[o] == 0) + continue; + + switch_to_section (get_section (indirect_branch_table_name[o], + 0, + NULL_TREE)); + for (i = 0; i < indirect_branch_table_label_no[o]; i++) + { + char label_start[32]; + + ASM_GENERATE_INTERNAL_LABEL (label_start, + indirect_branch_table_label[o], i); + + fputs ("\t.long\t", asm_out_file); + assemble_name_raw (asm_out_file, label_start); + fputs ("-.\n", asm_out_file); + } + switch_to_section (current_function_section ()); + } + } +} + +/* Implement the TARGET_CASE_VALUES_THRESHOLD target hook. */ + +unsigned int +s390_case_values_threshold (void) +{ + /* Disabling branch prediction for indirect jumps makes jump tables + much more expensive. */ + if (TARGET_INDIRECT_BRANCH_NOBP_JUMP) + return 20; + + return default_case_values_threshold (); +} + /* Initialize GCC target structure. */ #undef TARGET_ASM_ALIGNED_HI_OP @@ -16271,6 +16091,9 @@ #undef TARGET_PASS_BY_REFERENCE #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference +#undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE +#define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE s390_override_options_after_change + #undef TARGET_FUNCTION_OK_FOR_SIBCALL #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall #undef TARGET_FUNCTION_ARG @@ -16442,6 +16265,12 @@ #undef TARGET_CONSTANT_ALIGNMENT #define TARGET_CONSTANT_ALIGNMENT s390_constant_alignment +#undef TARGET_ASM_CODE_END +#define TARGET_ASM_CODE_END s390_code_end + +#undef TARGET_CASE_VALUES_THRESHOLD +#define TARGET_CASE_VALUES_THRESHOLD s390_case_values_threshold + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-s390.h"