Mercurial > hg > CbC > CbC_gcc
diff gcc/config/nds32/nds32-md-auxiliary.c @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
line wrap: on
line diff
--- a/gcc/config/nds32/nds32-md-auxiliary.c Fri Oct 27 22:46:09 2017 +0900 +++ b/gcc/config/nds32/nds32-md-auxiliary.c Thu Oct 25 07:37:49 2018 +0900 @@ -1,6 +1,6 @@ /* Auxiliary functions for output asm template or expand rtl pattern of Andes NDS32 cpu for GNU compiler - Copyright (C) 2012-2017 Free Software Foundation, Inc. + Copyright (C) 2012-2018 Free Software Foundation, Inc. Contributed by Andes Technology Corporation. This file is part of GCC. @@ -21,6 +21,8 @@ /* ------------------------------------------------------------------------ */ +#define IN_TARGET_CODE 1 + #include "config.h" #include "system.h" #include "coretypes.h" @@ -34,9 +36,33 @@ #include "recog.h" #include "output.h" #include "tm-constrs.h" +#include "expr.h" +#include "emit-rtl.h" +#include "explow.h" +#include "stringpool.h" +#include "attribs.h" + /* ------------------------------------------------------------------------ */ +static int +nds32_regno_to_enable4 (unsigned regno) +{ + switch (regno) + { + case 28: /* $r28/fp */ + return 0x8; + case 29: /* $r29/gp */ + return 0x4; + case 30: /* $r30/lp */ + return 0x2; + case 31: /* $r31/sp */ + return 0x1; + default: + gcc_unreachable (); + } +} + /* A helper function to return character based on byte size. */ static char nds32_byte_to_size (int byte) @@ -55,7 +81,1396 @@ } } -/* A helper function to return memory format. */ +static int +nds32_inverse_cond_code (int code) +{ + switch (code) + { + case NE: + return EQ; + case EQ: + return NE; + case GT: + return LE; + case LE: + return GT; + case GE: + return LT; + case LT: + return GE; + default: + gcc_unreachable (); + } +} + +static const char * +nds32_cond_code_str (int code) +{ + switch (code) + { + case NE: + return "ne"; + case EQ: + return "eq"; + case GT: + return "gt"; + case LE: + return "le"; + case GE: + return "ge"; + case LT: + return "lt"; + default: + gcc_unreachable (); + } +} + +static void +output_cond_branch (int code, const char *suffix, bool r5_p, + bool long_jump_p, rtx *operands) +{ + char pattern[256]; + const char *cond_code; + bool align_p = NDS32_ALIGN_P (); + const char *align = align_p ? "\t.align\t2\n" : ""; + + if (r5_p && REGNO (operands[2]) == 5 && TARGET_16_BIT) + { + /* This is special case for beqs38 and bnes38, + second operand 2 can't be $r5 and it's almost meanless, + however it may occur after copy propgation. */ + if (code == EQ) + { + /* $r5 == $r5 always taken! */ + if (long_jump_p) + snprintf (pattern, sizeof (pattern), + "j\t%%3"); + else + snprintf (pattern, sizeof (pattern), + "j8\t%%3"); + } + else + /* Don't output anything since $r5 != $r5 never taken! */ + pattern[0] = '\0'; + } + else if (long_jump_p) + { + int inverse_code = nds32_inverse_cond_code (code); + cond_code = nds32_cond_code_str (inverse_code); + + /* b<cond><suffix> $r0, $r1, .L0 + => + b<inverse_cond><suffix> $r0, $r1, .LCB0 + j .L0 + .LCB0: + + or + + b<cond><suffix> $r0, $r1, .L0 + => + b<inverse_cond><suffix> $r0, $r1, .LCB0 + j .L0 + .LCB0: + */ + if (r5_p && TARGET_16_BIT) + { + snprintf (pattern, sizeof (pattern), + "b%ss38\t %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:", + cond_code, align); + } + else + { + snprintf (pattern, sizeof (pattern), + "b%s%s\t%%1, %%2, .LCB%%=\n\tj\t%%3\n%s.LCB%%=:", + cond_code, suffix, align); + } + } + else + { + cond_code = nds32_cond_code_str (code); + if (r5_p && TARGET_16_BIT) + { + /* b<cond>s38 $r1, .L0 */ + snprintf (pattern, sizeof (pattern), + "b%ss38\t %%2, %%3", cond_code); + } + else + { + /* b<cond><suffix> $r0, $r1, .L0 */ + snprintf (pattern, sizeof (pattern), + "b%s%s\t%%1, %%2, %%3", cond_code, suffix); + } + } + + output_asm_insn (pattern, operands); +} + +static void +output_cond_branch_compare_zero (int code, const char *suffix, + bool long_jump_p, rtx *operands, + bool ta_implied_p) +{ + char pattern[256]; + const char *cond_code; + bool align_p = NDS32_ALIGN_P (); + const char *align = align_p ? "\t.align\t2\n" : ""; + if (long_jump_p) + { + int inverse_code = nds32_inverse_cond_code (code); + cond_code = nds32_cond_code_str (inverse_code); + + if (ta_implied_p && TARGET_16_BIT) + { + /* b<cond>z<suffix> .L0 + => + b<inverse_cond>z<suffix> .LCB0 + j .L0 + .LCB0: + */ + snprintf (pattern, sizeof (pattern), + "b%sz%s\t.LCB%%=\n\tj\t%%2\n%s.LCB%%=:", + cond_code, suffix, align); + } + else + { + /* b<cond>z<suffix> $r0, .L0 + => + b<inverse_cond>z<suffix> $r0, .LCB0 + j .L0 + .LCB0: + */ + snprintf (pattern, sizeof (pattern), + "b%sz%s\t%%1, .LCB%%=\n\tj\t%%2\n%s.LCB%%=:", + cond_code, suffix, align); + } + } + else + { + cond_code = nds32_cond_code_str (code); + if (ta_implied_p && TARGET_16_BIT) + { + /* b<cond>z<suffix> .L0 */ + snprintf (pattern, sizeof (pattern), + "b%sz%s\t%%2", cond_code, suffix); + } + else + { + /* b<cond>z<suffix> $r0, .L0 */ + snprintf (pattern, sizeof (pattern), + "b%sz%s\t%%1, %%2", cond_code, suffix); + } + } + + output_asm_insn (pattern, operands); +} + +static void +nds32_split_shiftrtdi3 (rtx dst, rtx src, rtx shiftamount, bool logic_shift_p) +{ + rtx src_high_part; + rtx dst_high_part, dst_low_part; + + dst_high_part = nds32_di_high_part_subreg (dst); + src_high_part = nds32_di_high_part_subreg (src); + dst_low_part = nds32_di_low_part_subreg (dst); + + if (CONST_INT_P (shiftamount)) + { + if (INTVAL (shiftamount) < 32) + { + if (logic_shift_p) + { + emit_insn (gen_uwext (dst_low_part, src, + shiftamount)); + emit_insn (gen_lshrsi3 (dst_high_part, src_high_part, + shiftamount)); + } + else + { + emit_insn (gen_wext (dst_low_part, src, + shiftamount)); + emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, + shiftamount)); + } + } + else + { + rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); + + if (logic_shift_p) + { + emit_insn (gen_lshrsi3 (dst_low_part, src_high_part, + new_shift_amout)); + emit_move_insn (dst_high_part, const0_rtx); + } + else + { + emit_insn (gen_ashrsi3 (dst_low_part, src_high_part, + new_shift_amout)); + emit_insn (gen_ashrsi3 (dst_high_part, src_high_part, + GEN_INT (31))); + } + } + } + else + { + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx new_shift_amout, select_reg; + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + new_shift_amout = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + emit_insn (gen_andsi3 (shiftamount, shiftamount, GEN_INT (0x3f))); + + if (logic_shift_p) + { + /* + if (shiftamount < 32) + dst_low_part = wext (src, shiftamount) + dst_high_part = src_high_part >> shiftamount + else + dst_low_part = src_high_part >> (shiftamount & 0x1f) + dst_high_part = 0 + */ + emit_insn (gen_uwext (dst_low_part_l32, src, shiftamount)); + emit_insn (gen_lshrsi3 (dst_high_part_l32, src_high_part, + shiftamount)); + + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_lshrsi3 (dst_low_part_g32, src_high_part, + new_shift_amout)); + emit_move_insn (dst_high_part_g32, const0_rtx); + } + else + { + /* + if (shiftamount < 32) + dst_low_part = wext (src, shiftamount) + dst_high_part = src_high_part >> shiftamount + else + dst_low_part = src_high_part >> (shiftamount & 0x1f) + # shift 31 for sign extend + dst_high_part = src_high_part >> 31 + */ + emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); + emit_insn (gen_ashrsi3 (dst_high_part_l32, src_high_part, + shiftamount)); + + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_ashrsi3 (dst_low_part_g32, src_high_part, + new_shift_amout)); + emit_insn (gen_ashrsi3 (dst_high_part_g32, src_high_part, + GEN_INT (31))); + } + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); + } +} + +/* ------------------------------------------------------------------------ */ + +/* Auxiliary function for expand RTL pattern. */ + +enum nds32_expand_result_type +nds32_expand_cbranch (rtx *operands) +{ + rtx tmp_reg; + enum rtx_code code; + + code = GET_CODE (operands[0]); + + /* If operands[2] is (const_int 0), + we can use beqz,bnez,bgtz,bgez,bltz,or blez instructions. + So we have gcc generate original template rtx. */ + if (GET_CODE (operands[2]) == CONST_INT) + if (INTVAL (operands[2]) == 0) + if ((code != GTU) + && (code != GEU) + && (code != LTU) + && (code != LEU)) + return EXPAND_CREATE_TEMPLATE; + + /* For other comparison, NDS32 ISA only has slt (Set-on-Less-Than) + behavior for the comparison, we might need to generate other + rtx patterns to achieve same semantic. */ + switch (code) + { + case GT: + case GTU: + if (GET_CODE (operands[2]) == CONST_INT) + { + /* GT reg_A, const_int => !(LT reg_A, const_int + 1) */ + if (optimize_size || optimize == 0) + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + else + tmp_reg = gen_reg_rtx (SImode); + + /* We want to plus 1 into the integer value + of operands[2] to create 'slt' instruction. + This caculation is performed on the host machine, + which may be 64-bit integer. + So the meaning of caculation result may be + different from the 32-bit nds32 target. + + For example: + 0x7fffffff + 0x1 -> 0x80000000, + this value is POSITIVE on 64-bit machine, + but the expected value on 32-bit nds32 target + should be NEGATIVE value. + + Hence, instead of using GEN_INT(), we use gen_int_mode() to + explicitly create SImode constant rtx. */ + enum rtx_code cmp_code; + + rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode); + if (satisfies_constraint_Is15 (plus1)) + { + operands[2] = plus1; + cmp_code = EQ; + if (code == GT) + { + /* GT, use slts instruction */ + emit_insn ( + gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* GTU, use slt instruction */ + emit_insn ( + gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + } + else + { + cmp_code = NE; + if (code == GT) + { + /* GT, use slts instruction */ + emit_insn ( + gen_slts_compare (tmp_reg, operands[2], operands[1])); + } + else + { + /* GTU, use slt instruction */ + emit_insn ( + gen_slt_compare (tmp_reg, operands[2], operands[1])); + } + } + + PUT_CODE (operands[0], cmp_code); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + return EXPAND_DONE; + } + else + { + /* GT reg_A, reg_B => LT reg_B, reg_A */ + if (optimize_size || optimize == 0) + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + else + tmp_reg = gen_reg_rtx (SImode); + + if (code == GT) + { + /* GT, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); + } + else + { + /* GTU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); + } + + PUT_CODE (operands[0], NE); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + return EXPAND_DONE; + } + + case GE: + case GEU: + /* GE reg_A, reg_B => !(LT reg_A, reg_B) */ + /* GE reg_A, const_int => !(LT reg_A, const_int) */ + if (optimize_size || optimize == 0) + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + else + tmp_reg = gen_reg_rtx (SImode); + + if (code == GE) + { + /* GE, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* GEU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + + PUT_CODE (operands[0], EQ); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + return EXPAND_DONE; + + case LT: + case LTU: + /* LT reg_A, reg_B => LT reg_A, reg_B */ + /* LT reg_A, const_int => LT reg_A, const_int */ + if (optimize_size || optimize == 0) + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + else + tmp_reg = gen_reg_rtx (SImode); + + if (code == LT) + { + /* LT, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* LTU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + + PUT_CODE (operands[0], NE); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + return EXPAND_DONE; + + case LE: + case LEU: + if (GET_CODE (operands[2]) == CONST_INT) + { + /* LE reg_A, const_int => LT reg_A, const_int + 1 */ + if (optimize_size || optimize == 0) + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + else + tmp_reg = gen_reg_rtx (SImode); + + enum rtx_code cmp_code; + /* Note that (le:SI X INT_MAX) is not the same as (lt:SI X INT_MIN). + We better have an assert here in case GCC does not properly + optimize it away. The INT_MAX here is 0x7fffffff for target. */ + rtx plus1 = gen_int_mode (INTVAL (operands[2]) + 1, SImode); + if (satisfies_constraint_Is15 (plus1)) + { + operands[2] = plus1; + cmp_code = NE; + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn ( + gen_slts_compare (tmp_reg, operands[1], operands[2])); + } + else + { + /* LEU, use slt instruction */ + emit_insn ( + gen_slt_compare (tmp_reg, operands[1], operands[2])); + } + } + else + { + cmp_code = EQ; + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn ( + gen_slts_compare (tmp_reg, operands[2], operands[1])); + } + else + { + /* LEU, use slt instruction */ + emit_insn ( + gen_slt_compare (tmp_reg, operands[2], operands[1])); + } + } + + PUT_CODE (operands[0], cmp_code); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + return EXPAND_DONE; + } + else + { + /* LE reg_A, reg_B => !(LT reg_B, reg_A) */ + if (optimize_size || optimize == 0) + tmp_reg = gen_rtx_REG (SImode, TA_REGNUM); + else + tmp_reg = gen_reg_rtx (SImode); + + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn (gen_slts_compare (tmp_reg, operands[2], operands[1])); + } + else + { + /* LEU, use slt instruction */ + emit_insn (gen_slt_compare (tmp_reg, operands[2], operands[1])); + } + + PUT_CODE (operands[0], EQ); + operands[1] = tmp_reg; + operands[2] = const0_rtx; + emit_insn (gen_cbranchsi4 (operands[0], operands[1], + operands[2], operands[3])); + + return EXPAND_DONE; + } + + case EQ: + case NE: + /* NDS32 ISA has various form for eq/ne behavior no matter + what kind of the operand is. + So just generate original template rtx. */ + + /* Put operands[2] into register if operands[2] is a large + const_int or ISAv2. */ + if (GET_CODE (operands[2]) == CONST_INT + && (!satisfies_constraint_Is11 (operands[2]) + || TARGET_ISA_V2)) + operands[2] = force_reg (SImode, operands[2]); + + return EXPAND_CREATE_TEMPLATE; + + default: + return EXPAND_FAIL; + } +} + +enum nds32_expand_result_type +nds32_expand_cstore (rtx *operands) +{ + rtx tmp_reg; + enum rtx_code code; + + code = GET_CODE (operands[1]); + + switch (code) + { + case EQ: + case NE: + if (GET_CODE (operands[3]) == CONST_INT) + { + /* reg_R = (reg_A == const_int_B) + --> xori reg_C, reg_A, const_int_B + slti reg_R, reg_C, const_int_1 + reg_R = (reg_A != const_int_B) + --> xori reg_C, reg_A, const_int_B + slti reg_R, const_int0, reg_C */ + tmp_reg = gen_reg_rtx (SImode); + + /* If the integer value is not in the range of imm15s, + we need to force register first because our addsi3 pattern + only accept nds32_rimm15s_operand predicate. */ + rtx new_imm = gen_int_mode (-INTVAL (operands[3]), SImode); + if (satisfies_constraint_Is15 (new_imm)) + emit_insn (gen_addsi3 (tmp_reg, operands[2], new_imm)); + else + { + if (!(satisfies_constraint_Iu15 (operands[3]) + || (TARGET_EXT_PERF + && satisfies_constraint_It15 (operands[3])))) + operands[3] = force_reg (SImode, operands[3]); + emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); + } + + if (code == EQ) + emit_insn (gen_slt_eq0 (operands[0], tmp_reg)); + else + emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); + + return EXPAND_DONE; + } + else + { + /* reg_R = (reg_A == reg_B) + --> xor reg_C, reg_A, reg_B + slti reg_R, reg_C, const_int_1 + reg_R = (reg_A != reg_B) + --> xor reg_C, reg_A, reg_B + slti reg_R, const_int0, reg_C */ + tmp_reg = gen_reg_rtx (SImode); + emit_insn (gen_xorsi3 (tmp_reg, operands[2], operands[3])); + if (code == EQ) + emit_insn (gen_slt_eq0 (operands[0], tmp_reg)); + else + emit_insn (gen_slt_compare (operands[0], const0_rtx, tmp_reg)); + + return EXPAND_DONE; + } + case GT: + case GTU: + /* reg_R = (reg_A > reg_B) --> slt reg_R, reg_B, reg_A */ + /* reg_R = (reg_A > const_int_B) --> slt reg_R, const_int_B, reg_A */ + if (code == GT) + { + /* GT, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], operands[3], operands[2])); + } + else + { + /* GTU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], operands[3], operands[2])); + } + + return EXPAND_DONE; + + case GE: + case GEU: + if (GET_CODE (operands[3]) == CONST_INT) + { + /* reg_R = (reg_A >= const_int_B) + --> movi reg_C, const_int_B - 1 + slt reg_R, reg_C, reg_A */ + tmp_reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp_reg, + gen_int_mode (INTVAL (operands[3]) - 1, + SImode))); + if (code == GE) + { + /* GE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], tmp_reg, operands[2])); + } + else + { + /* GEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], tmp_reg, operands[2])); + } + + return EXPAND_DONE; + } + else + { + /* reg_R = (reg_A >= reg_B) + --> slt reg_R, reg_A, reg_B + xori reg_R, reg_R, const_int_1 */ + if (code == GE) + { + /* GE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], + operands[2], operands[3])); + } + else + { + /* GEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], + operands[2], operands[3])); + } + + /* perform 'not' behavior */ + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + + return EXPAND_DONE; + } + + case LT: + case LTU: + /* reg_R = (reg_A < reg_B) --> slt reg_R, reg_A, reg_B */ + /* reg_R = (reg_A < const_int_B) --> slt reg_R, reg_A, const_int_B */ + if (code == LT) + { + /* LT, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], operands[2], operands[3])); + } + else + { + /* LTU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], operands[2], operands[3])); + } + + return EXPAND_DONE; + + case LE: + case LEU: + if (GET_CODE (operands[3]) == CONST_INT) + { + /* reg_R = (reg_A <= const_int_B) + --> movi reg_C, const_int_B + 1 + slt reg_R, reg_A, reg_C */ + tmp_reg = gen_reg_rtx (SImode); + + emit_insn (gen_movsi (tmp_reg, + gen_int_mode (INTVAL (operands[3]) + 1, + SImode))); + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], operands[2], tmp_reg)); + } + else + { + /* LEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], operands[2], tmp_reg)); + } + + return EXPAND_DONE; + } + else + { + /* reg_R = (reg_A <= reg_B) --> slt reg_R, reg_B, reg_A + xori reg_R, reg_R, const_int_1 */ + if (code == LE) + { + /* LE, use slts instruction */ + emit_insn (gen_slts_compare (operands[0], + operands[3], operands[2])); + } + else + { + /* LEU, use slt instruction */ + emit_insn (gen_slt_compare (operands[0], + operands[3], operands[2])); + } + + /* perform 'not' behavior */ + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + + return EXPAND_DONE; + } + + + default: + gcc_unreachable (); + } +} + +void +nds32_expand_float_cbranch (rtx *operands) +{ + enum rtx_code code = GET_CODE (operands[0]); + enum rtx_code new_code = code; + rtx cmp_op0 = operands[1]; + rtx cmp_op1 = operands[2]; + rtx tmp_reg; + rtx tmp; + + int reverse = 0; + + /* Main Goal: Use compare instruction + branch instruction. + + For example: + GT, GE: swap condition and swap operands and generate + compare instruction(LT, LE) + branch not equal instruction. + + UNORDERED, LT, LE, EQ: no need to change and generate + compare instruction(UNORDERED, LT, LE, EQ) + branch not equal instruction. + + ORDERED, NE: reverse condition and generate + compare instruction(EQ) + branch equal instruction. */ + + switch (code) + { + case GT: + case GE: + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + new_code = swap_condition (new_code); + break; + case UNORDERED: + case LT: + case LE: + case EQ: + break; + case ORDERED: + case NE: + new_code = reverse_condition (new_code); + reverse = 1; + break; + case UNGT: + case UNGE: + new_code = reverse_condition_maybe_unordered (new_code); + reverse = 1; + break; + case UNLT: + case UNLE: + new_code = reverse_condition_maybe_unordered (new_code); + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + new_code = swap_condition (new_code); + reverse = 1; + break; + default: + return; + } + + tmp_reg = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (tmp_reg, + gen_rtx_fmt_ee (new_code, SImode, + cmp_op0, cmp_op1))); + + PUT_CODE (operands[0], reverse ? EQ : NE); + emit_insn (gen_cbranchsi4 (operands[0], tmp_reg, + const0_rtx, operands[3])); +} + +void +nds32_expand_float_cstore (rtx *operands) +{ + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + machine_mode mode = GET_MODE (operands[2]); + + rtx cmp_op0 = operands[2]; + rtx cmp_op1 = operands[3]; + rtx tmp; + + /* Main Goal: Use compare instruction to store value. + + For example: + GT, GE: swap condition and swap operands. + reg_R = (reg_A > reg_B) --> fcmplt reg_R, reg_B, reg_A + reg_R = (reg_A >= reg_B) --> fcmple reg_R, reg_B, reg_A + + LT, LE, EQ: no need to change, it is already LT, LE, EQ. + reg_R = (reg_A < reg_B) --> fcmplt reg_R, reg_A, reg_B + reg_R = (reg_A <= reg_B) --> fcmple reg_R, reg_A, reg_B + reg_R = (reg_A == reg_B) --> fcmpeq reg_R, reg_A, reg_B + + ORDERED: reverse condition and using xor insturction to achieve 'ORDERED'. + reg_R = (reg_A != reg_B) --> fcmpun reg_R, reg_A, reg_B + xor reg_R, reg_R, const1_rtx + + NE: reverse condition and using xor insturction to achieve 'NE'. + reg_R = (reg_A != reg_B) --> fcmpeq reg_R, reg_A, reg_B + xor reg_R, reg_R, const1_rtx */ + switch (code) + { + case GT: + case GE: + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 =tmp; + new_code = swap_condition (new_code); + break; + case UNORDERED: + case LT: + case LE: + case EQ: + break; + case ORDERED: + if (mode == SFmode) + emit_insn (gen_cmpsf_un (operands[0], cmp_op0, cmp_op1)); + else + emit_insn (gen_cmpdf_un (operands[0], cmp_op0, cmp_op1)); + + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + return; + case NE: + if (mode == SFmode) + emit_insn (gen_cmpsf_eq (operands[0], cmp_op0, cmp_op1)); + else + emit_insn (gen_cmpdf_eq (operands[0], cmp_op0, cmp_op1)); + + emit_insn (gen_xorsi3 (operands[0], operands[0], const1_rtx)); + return; + default: + return; + } + + emit_insn (gen_rtx_SET (operands[0], + gen_rtx_fmt_ee (new_code, SImode, + cmp_op0, cmp_op1))); +} + +enum nds32_expand_result_type +nds32_expand_movcc (rtx *operands) +{ + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0)); + rtx cmp_op0 = XEXP (operands[1], 0); + rtx cmp_op1 = XEXP (operands[1], 1); + rtx tmp; + + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && XEXP (operands[1], 1) == const0_rtx) + { + /* If the operands[1] rtx is already (eq X 0) or (ne X 0), + we have gcc generate original template rtx. */ + return EXPAND_CREATE_TEMPLATE; + } + else if ((TARGET_FPU_SINGLE && cmp0_mode == SFmode) + || (TARGET_FPU_DOUBLE && cmp0_mode == DFmode)) + { + nds32_expand_float_movcc (operands); + } + else + { + /* Since there is only 'slt'(Set when Less Than) instruction for + comparison in Andes ISA, the major strategy we use here is to + convert conditional move into 'LT + EQ' or 'LT + NE' rtx combination. + We design constraints properly so that the reload phase will assist + to make one source operand to use same register as result operand. + Then we can use cmovz/cmovn to catch the other source operand + which has different register. */ + int reverse = 0; + + /* Main Goal: Use 'LT + EQ' or 'LT + NE' to target "then" part + Strategy : Reverse condition and swap comparison operands + + For example: + + a <= b ? P : Q (LE or LEU) + --> a > b ? Q : P (reverse condition) + --> b < a ? Q : P (swap comparison operands to achieve 'LT/LTU') + + a >= b ? P : Q (GE or GEU) + --> a < b ? Q : P (reverse condition to achieve 'LT/LTU') + + a < b ? P : Q (LT or LTU) + --> (NO NEED TO CHANGE, it is already 'LT/LTU') + + a > b ? P : Q (GT or GTU) + --> b < a ? P : Q (swap comparison operands to achieve 'LT/LTU') */ + switch (code) + { + case GE: case GEU: case LE: case LEU: + new_code = reverse_condition (code); + reverse = 1; + break; + case EQ: + case NE: + /* no need to reverse condition */ + break; + default: + return EXPAND_FAIL; + } + + /* For '>' comparison operator, we swap operands + so that we can have 'LT/LTU' operator. */ + if (new_code == GT || new_code == GTU) + { + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + + new_code = swap_condition (new_code); + } + + /* Use a temporary register to store slt/slts result. */ + tmp = gen_reg_rtx (SImode); + + if (new_code == EQ || new_code == NE) + { + emit_insn (gen_xorsi3 (tmp, cmp_op0, cmp_op1)); + /* tmp == 0 if cmp_op0 == cmp_op1. */ + operands[1] = gen_rtx_fmt_ee (new_code, VOIDmode, tmp, const0_rtx); + } + else + { + /* This emit_insn will create corresponding 'slt/slts' + insturction. */ + if (new_code == LT) + emit_insn (gen_slts_compare (tmp, cmp_op0, cmp_op1)); + else if (new_code == LTU) + emit_insn (gen_slt_compare (tmp, cmp_op0, cmp_op1)); + else + gcc_unreachable (); + + /* Change comparison semantic into (eq X 0) or (ne X 0) behavior + so that cmovz or cmovn will be matched later. + + For reverse condition cases, we want to create a semantic that: + (eq X 0) --> pick up "else" part + For normal cases, we want to create a semantic that: + (ne X 0) --> pick up "then" part + + Later we will have cmovz/cmovn instruction pattern to + match corresponding behavior and output instruction. */ + operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, + VOIDmode, tmp, const0_rtx); + } + } + return EXPAND_CREATE_TEMPLATE; +} + +void +nds32_expand_float_movcc (rtx *operands) +{ + if ((GET_CODE (operands[1]) == EQ || GET_CODE (operands[1]) == NE) + && GET_MODE (XEXP (operands[1], 0)) == SImode + && XEXP (operands[1], 1) == const0_rtx) + { + /* If the operands[1] rtx is already (eq X 0) or (ne X 0), + we have gcc generate original template rtx. */ + return; + } + else + { + enum rtx_code code = GET_CODE (operands[1]); + enum rtx_code new_code = code; + machine_mode cmp0_mode = GET_MODE (XEXP (operands[1], 0)); + machine_mode cmp1_mode = GET_MODE (XEXP (operands[1], 1)); + rtx cmp_op0 = XEXP (operands[1], 0); + rtx cmp_op1 = XEXP (operands[1], 1); + rtx tmp; + + /* Compare instruction Operations: (cmp_op0 condition cmp_op1) ? 1 : 0, + when result is 1, and 'reverse' be set 1 for fcmovzs instructuin. */ + int reverse = 0; + + /* Main Goal: Use cmpare instruction + conditional move instruction. + Strategy : swap condition and swap comparison operands. + + For example: + a > b ? P : Q (GT) + --> a < b ? Q : P (swap condition) + --> b < a ? Q : P (swap comparison operands to achieve 'GT') + + a >= b ? P : Q (GE) + --> a <= b ? Q : P (swap condition) + --> b <= a ? Q : P (swap comparison operands to achieve 'GE') + + a < b ? P : Q (LT) + --> (NO NEED TO CHANGE, it is already 'LT') + + a >= b ? P : Q (LE) + --> (NO NEED TO CHANGE, it is already 'LE') + + a == b ? P : Q (EQ) + --> (NO NEED TO CHANGE, it is already 'EQ') */ + + switch (code) + { + case GT: + case GE: + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 =tmp; + new_code = swap_condition (new_code); + break; + case UNORDERED: + case LT: + case LE: + case EQ: + break; + case ORDERED: + case NE: + reverse = 1; + new_code = reverse_condition (new_code); + break; + case UNGT: + case UNGE: + new_code = reverse_condition_maybe_unordered (new_code); + reverse = 1; + break; + case UNLT: + case UNLE: + new_code = reverse_condition_maybe_unordered (new_code); + tmp = cmp_op0; + cmp_op0 = cmp_op1; + cmp_op1 = tmp; + new_code = swap_condition (new_code); + reverse = 1; + break; + default: + return; + } + + /* Use a temporary register to store fcmpxxs result. */ + tmp = gen_reg_rtx (SImode); + + /* Create float compare instruction for SFmode and DFmode, + other MODE using cstoresi create compare instruction. */ + if ((cmp0_mode == DFmode || cmp0_mode == SFmode) + && (cmp1_mode == DFmode || cmp1_mode == SFmode)) + { + /* This emit_insn create corresponding float compare instruction */ + emit_insn (gen_rtx_SET (tmp, + gen_rtx_fmt_ee (new_code, SImode, + cmp_op0, cmp_op1))); + } + else + { + /* This emit_insn using cstoresi create corresponding + compare instruction */ + PUT_CODE (operands[1], new_code); + emit_insn (gen_cstoresi4 (tmp, operands[1], + cmp_op0, cmp_op1)); + } + /* operands[1] crete corresponding condition move instruction + for fcmovzs and fcmovns. */ + operands[1] = gen_rtx_fmt_ee (reverse ? EQ : NE, + VOIDmode, tmp, const0_rtx); + } +} + +void +nds32_emit_push_fpr_callee_saved (int base_offset) +{ + rtx fpu_insn; + rtx reg, mem; + unsigned int regno = cfun->machine->callee_saved_first_fpr_regno; + unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno; + + while (regno <= last_fpr) + { + /* Handling two registers, using fsdi instruction. */ + reg = gen_rtx_REG (DFmode, regno); + mem = gen_frame_mem (DFmode, plus_constant (Pmode, + stack_pointer_rtx, + base_offset)); + base_offset += 8; + regno += 2; + fpu_insn = emit_move_insn (mem, reg); + RTX_FRAME_RELATED_P (fpu_insn) = 1; + } +} + +void +nds32_emit_pop_fpr_callee_saved (int gpr_padding_size) +{ + rtx fpu_insn; + rtx reg, mem, addr; + rtx dwarf, adjust_sp_rtx; + unsigned int regno = cfun->machine->callee_saved_first_fpr_regno; + unsigned int last_fpr = cfun->machine->callee_saved_last_fpr_regno; + int padding = 0; + + while (regno <= last_fpr) + { + /* Handling two registers, using fldi.bi instruction. */ + if ((regno + 1) >= last_fpr) + padding = gpr_padding_size; + + reg = gen_rtx_REG (DFmode, (regno)); + addr = gen_rtx_POST_MODIFY (Pmode, stack_pointer_rtx, + gen_rtx_PLUS (Pmode, stack_pointer_rtx, + GEN_INT (8 + padding))); + mem = gen_frame_mem (DFmode, addr); + regno += 2; + fpu_insn = emit_move_insn (reg, mem); + + adjust_sp_rtx = + gen_rtx_SET (stack_pointer_rtx, + plus_constant (Pmode, stack_pointer_rtx, + 8 + padding)); + + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX); + /* Tell gcc we adjust SP in this insn. */ + dwarf = alloc_reg_note (REG_CFA_ADJUST_CFA, copy_rtx (adjust_sp_rtx), + dwarf); + RTX_FRAME_RELATED_P (fpu_insn) = 1; + REG_NOTES (fpu_insn) = dwarf; + } +} + +void +nds32_emit_v3pop_fpr_callee_saved (int base) +{ + int fpu_base_addr = base; + int regno; + rtx fpu_insn; + rtx reg, mem; + rtx dwarf; + + regno = cfun->machine->callee_saved_first_fpr_regno; + while (regno <= cfun->machine->callee_saved_last_fpr_regno) + { + /* Handling two registers, using fldi instruction. */ + reg = gen_rtx_REG (DFmode, regno); + mem = gen_frame_mem (DFmode, plus_constant (Pmode, + stack_pointer_rtx, + fpu_base_addr)); + fpu_base_addr += 8; + regno += 2; + fpu_insn = emit_move_insn (reg, mem); + dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, NULL_RTX); + RTX_FRAME_RELATED_P (fpu_insn) = 1; + REG_NOTES (fpu_insn) = dwarf; + } +} + +enum nds32_expand_result_type +nds32_expand_extv (rtx *operands) +{ + gcc_assert (CONST_INT_P (operands[2]) && CONST_INT_P (operands[3])); + HOST_WIDE_INT width = INTVAL (operands[2]); + HOST_WIDE_INT bitpos = INTVAL (operands[3]); + rtx dst = operands[0]; + rtx src = operands[1]; + + if (MEM_P (src) + && width == 32 + && (bitpos % BITS_PER_UNIT) == 0 + && GET_MODE_BITSIZE (GET_MODE (dst)) == width) + { + rtx newmem = adjust_address (src, GET_MODE (dst), + bitpos / BITS_PER_UNIT); + + rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); + + emit_insn (gen_unaligned_loadsi (dst, base_addr)); + + return EXPAND_DONE; + } + return EXPAND_FAIL; +} + +enum nds32_expand_result_type +nds32_expand_insv (rtx *operands) +{ + gcc_assert (CONST_INT_P (operands[1]) && CONST_INT_P (operands[2])); + HOST_WIDE_INT width = INTVAL (operands[1]); + HOST_WIDE_INT bitpos = INTVAL (operands[2]); + rtx dst = operands[0]; + rtx src = operands[3]; + + if (MEM_P (dst) + && width == 32 + && (bitpos % BITS_PER_UNIT) == 0 + && GET_MODE_BITSIZE (GET_MODE (src)) == width) + { + rtx newmem = adjust_address (dst, GET_MODE (src), + bitpos / BITS_PER_UNIT); + + rtx base_addr = force_reg (Pmode, XEXP (newmem, 0)); + + emit_insn (gen_unaligned_storesi (base_addr, src)); + + return EXPAND_DONE; + } + return EXPAND_FAIL; +} + +/* ------------------------------------------------------------------------ */ + +/* Function to generate PC relative jump table. + Refer to nds32.md for more details. + + The following is the sample for the case that diff value + can be presented in '.short' size. + + addi $r1, $r1, -(case_lower_bound) + slti $ta, $r1, (case_number) + beqz $ta, .L_skip_label + + la $ta, .L35 ! get jump table address + lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry + addi $ta, $r1, $ta + jr5 $ta + + ! jump table entry + L35: + .short .L25-.L35 + .short .L26-.L35 + .short .L27-.L35 + .short .L28-.L35 + .short .L29-.L35 + .short .L30-.L35 + .short .L31-.L35 + .short .L32-.L35 + .short .L33-.L35 + .short .L34-.L35 */ +const char * +nds32_output_casesi_pc_relative (rtx *operands) +{ + machine_mode mode; + rtx diff_vec; + + diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1]))); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + /* Step C: "t <-- operands[1]". */ + if (flag_pic) + { + output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); + output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); + output_asm_insn ("add\t$ta, $ta, $gp", operands); + } + else + output_asm_insn ("la\t$ta, %l1", operands); + + /* Get the mode of each element in the difference vector. */ + mode = GET_MODE (diff_vec); + + /* Step D: "z <-- (mem (plus (operands[0] << m) t))", + where m is 0, 1, or 2 to load address-diff value from table. */ + switch (mode) + { + case E_QImode: + output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); + break; + case E_HImode: + output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); + break; + case E_SImode: + output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); + break; + default: + gcc_unreachable (); + } + + /* Step E: "t <-- z + t". + Add table label_ref with address-diff value to + obtain target case address. */ + output_asm_insn ("add\t$ta, %2, $ta", operands); + + /* Step F: jump to target with register t. */ + if (TARGET_16_BIT) + return "jr5\t$ta"; + else + return "jr\t$ta"; +} + +/* Function to generate normal jump table. */ +const char * +nds32_output_casesi (rtx *operands) +{ + /* Step C: "t <-- operands[1]". */ + if (flag_pic) + { + output_asm_insn ("sethi\t$ta, hi20(%l1@GOTOFF)", operands); + output_asm_insn ("ori\t$ta, $ta, lo12(%l1@GOTOFF)", operands); + output_asm_insn ("add\t$ta, $ta, $gp", operands); + } + else + output_asm_insn ("la\t$ta, %l1", operands); + + /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ + output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); + + /* No need to perform Step E, which is only used for + pc relative jump table. */ + + /* Step F: jump to target with register z. */ + if (TARGET_16_BIT) + return "jr5\t%2"; + else + return "jr\t%2"; +} + +/* Function to return memory format. */ enum nds32_16bit_address_type nds32_mem_format (rtx op) { @@ -71,7 +1486,8 @@ op = XEXP (op, 0); /* 45 format. */ - if (GET_CODE (op) == REG && (mode_test == SImode)) + if (GET_CODE (op) == REG + && ((mode_test == SImode) || (mode_test == SFmode))) return ADDRESS_REG; /* 333 format for QI/HImode. */ @@ -79,7 +1495,8 @@ return ADDRESS_LO_REG_IMM3U; /* post_inc 333 format. */ - if ((GET_CODE (op) == POST_INC) && (mode_test == SImode)) + if ((GET_CODE (op) == POST_INC) + && ((mode_test == SImode) || (mode_test == SFmode))) { regno = REGNO(XEXP (op, 0)); @@ -89,14 +1506,14 @@ /* post_inc 333 format. */ if ((GET_CODE (op) == POST_MODIFY) - && (mode_test == SImode) + && ((mode_test == SImode) || (mode_test == SFmode)) && (REG_P (XEXP (XEXP (op, 1), 0))) && (CONST_INT_P (XEXP (XEXP (op, 1), 1)))) { regno = REGNO (XEXP (XEXP (op, 1), 0)); val = INTVAL (XEXP (XEXP (op, 1), 1)); - if (regno < 8 && val < 32) - return ADDRESS_POST_INC_LO_REG_IMM3U; + if (regno < 8 && val > 0 && val < 32) + return ADDRESS_POST_MODIFY_LO_REG_IMM3U; } if ((GET_CODE (op) == PLUS) @@ -107,7 +1524,7 @@ regno = REGNO(XEXP (op, 0)); - if (regno > 7 + if (regno > 8 && regno != SP_REGNUM && regno != FP_REGNUM) return ADDRESS_NOT_16BIT_FORMAT; @@ -129,6 +1546,10 @@ case E_SImode: case E_SFmode: case E_DFmode: + /* r8 imply fe format. */ + if ((regno == 8) && + (val >= -128 && val <= -4 && (val % 4 == 0))) + return ADDRESS_R8_IMM7U; /* fp imply 37 format. */ if ((regno == FP_REGNUM) && (val >= 0 && val < 512 && (val % 4 == 0))) @@ -171,7 +1592,11 @@ output_asm_insn (pattern, operands); break; case ADDRESS_POST_INC_LO_REG_IMM3U: - snprintf (pattern, sizeof (pattern), "s%ci333.bi\t%%1, %%0", size); + snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0, 4"); + output_asm_insn (pattern, operands); + break; + case ADDRESS_POST_MODIFY_LO_REG_IMM3U: + snprintf (pattern, sizeof (pattern), "swi333.bi\t%%1, %%0"); output_asm_insn (pattern, operands); break; case ADDRESS_FP_IMM7U: @@ -210,9 +1635,16 @@ output_asm_insn (pattern, operands); break; case ADDRESS_POST_INC_LO_REG_IMM3U: - snprintf (pattern, sizeof (pattern), "l%ci333.bi\t%%0, %%1", size); + snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1, 4"); output_asm_insn (pattern, operands); break; + case ADDRESS_POST_MODIFY_LO_REG_IMM3U: + snprintf (pattern, sizeof (pattern), "lwi333.bi\t%%0, %%1"); + output_asm_insn (pattern, operands); + break; + case ADDRESS_R8_IMM7U: + output_asm_insn ("lwi45.fe\t%0, %e1", operands); + break; case ADDRESS_FP_IMM7U: output_asm_insn ("lwi37\t%0, %1", operands); break; @@ -557,6 +1989,10 @@ int last_argument_regno = NDS32_FIRST_GPR_REGNUM + NDS32_MAX_GPR_REGS_FOR_ARGS - 1; + /* Pick up first and last eh data regno for further use. */ + int rb_eh_data = cfun->machine->eh_return_data_first_regno; + int re_eh_data = cfun->machine->eh_return_data_last_regno; + int first_eh_data_regno = EH_RETURN_DATA_REGNO (0); /* Pick up callee-saved first regno and last regno for further use. */ int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno; int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; @@ -576,13 +2012,26 @@ return ""; } + /* If last_argument_regno is not mentioned in par_rtx, we can confirm that + we do not need to push argument registers for variadic function. + But we still need to check if we need to push exception handling + data registers. */ + if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx)) + { + /* Set operands[0] and operands[1]. */ + operands[0] = gen_rtx_REG (SImode, rb_eh_data); + operands[1] = gen_rtx_REG (SImode, re_eh_data); + /* Create assembly code pattern: "Rb, Re, { }". */ + snprintf (pattern, sizeof (pattern), "push.s\t%s", "%0, %1, { }"); + /* We use output_asm_insn() to output assembly code by ourself. */ + output_asm_insn (pattern, operands); + return ""; + } + /* If we step here, we are going to do v3push or multiple push operation. */ - /* The v3push/v3pop instruction should only be applied on - none-isr and none-variadic function. */ - if (TARGET_V3PUSH - && !nds32_isr_function_p (current_function_decl) - && (cfun->machine->va_args_size == 0)) + /* Refer to nds32.h, where we comment when push25/pop25 are available. */ + if (NDS32_V3PUSH_AVAILABLE_P) { /* For stack v3push: operands[0]: Re @@ -598,12 +2047,25 @@ otherwise, generate 'push25 Re,0'. */ sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust)) operands[1] = GEN_INT (sp_adjust); else - operands[1] = GEN_INT (0); + { + /* Allocate callee saved fpr space. */ + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + operands[1] = GEN_INT (sp_adjust); + } + else + { + operands[1] = GEN_INT (0); + } + } /* Create assembly code pattern. */ snprintf (pattern, sizeof (pattern), "push25\t%%0, %%1"); @@ -665,17 +2127,32 @@ char pattern[100]; /* The operands array which will be used in output_asm_insn(). */ rtx operands[3]; + /* Pick up first and last eh data regno for further use. */ + int rb_eh_data = cfun->machine->eh_return_data_first_regno; + int re_eh_data = cfun->machine->eh_return_data_last_regno; + int first_eh_data_regno = EH_RETURN_DATA_REGNO (0); /* Pick up callee-saved first regno and last regno for further use. */ int rb_callee_saved = cfun->machine->callee_saved_first_gpr_regno; int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; + /* We need to check if we need to push exception handling + data registers. */ + if (reg_mentioned_p (gen_rtx_REG (SImode, first_eh_data_regno), par_rtx)) + { + /* Set operands[0] and operands[1]. */ + operands[0] = gen_rtx_REG (SImode, rb_eh_data); + operands[1] = gen_rtx_REG (SImode, re_eh_data); + /* Create assembly code pattern: "Rb, Re, { }". */ + snprintf (pattern, sizeof (pattern), "pop.s\t%s", "%0, %1, { }"); + /* We use output_asm_insn() to output assembly code by ourself. */ + output_asm_insn (pattern, operands); + return ""; + } + /* If we step here, we are going to do v3pop or multiple pop operation. */ - /* The v3push/v3pop instruction should only be applied on - none-isr and none-variadic function. */ - if (TARGET_V3PUSH - && !nds32_isr_function_p (current_function_decl) - && (cfun->machine->va_args_size == 0)) + /* Refer to nds32.h, where we comment when push25/pop25 are available. */ + if (NDS32_V3PUSH_AVAILABLE_P) { /* For stack v3pop: operands[0]: Re @@ -696,13 +2173,28 @@ and then use 'pop25 Re,0'. */ sp_adjust = cfun->machine->local_size + cfun->machine->out_args_size - + cfun->machine->callee_saved_area_gpr_padding_bytes; + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) && !cfun->calls_alloca) operands[1] = GEN_INT (sp_adjust); else - operands[1] = GEN_INT (0); + { + if (cfun->machine->callee_saved_first_fpr_regno != SP_REGNUM) + { + /* If has fpr need to restore, the $sp on callee saved fpr + position, so we need to consider gpr pading bytes and + callee saved fpr size. */ + sp_adjust = cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + operands[1] = GEN_INT (sp_adjust); + } + else + { + operands[1] = GEN_INT (0); + } + } /* Create assembly code pattern. */ snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1"); @@ -755,96 +2247,1656 @@ return ""; } -/* Function to generate PC relative jump table. - Refer to nds32.md for more details. - - The following is the sample for the case that diff value - can be presented in '.short' size. - - addi $r1, $r1, -(case_lower_bound) - slti $ta, $r1, (case_number) - beqz $ta, .L_skip_label - - la $ta, .L35 ! get jump table address - lh $r1, [$ta + $r1 << 1] ! load symbol diff from jump table entry - addi $ta, $r1, $ta - jr5 $ta - - ! jump table entry - L35: - .short .L25-.L35 - .short .L26-.L35 - .short .L27-.L35 - .short .L28-.L35 - .short .L29-.L35 - .short .L30-.L35 - .short .L31-.L35 - .short .L32-.L35 - .short .L33-.L35 - .short .L34-.L35 */ +/* Function to output return operation. */ +const char * +nds32_output_return (void) +{ + /* A string pattern for output_asm_insn(). */ + char pattern[100]; + /* The operands array which will be used in output_asm_insn(). */ + rtx operands[2]; + /* For stack v3pop: + operands[0]: Re + operands[1]: imm8u */ + int re_callee_saved = cfun->machine->callee_saved_last_gpr_regno; + int sp_adjust; + + /* Set operands[0]. */ + operands[0] = gen_rtx_REG (SImode, re_callee_saved); + + /* Check if we can generate 'pop25 Re,imm8u', + otherwise, generate 'pop25 Re,0'. + We have to consider alloca issue as well. + If the function does call alloca(), the stack pointer is not fixed. + In that case, we cannot use 'pop25 Re,imm8u' directly. + We have to caculate stack pointer from frame pointer + and then use 'pop25 Re,0'. */ + sp_adjust = cfun->machine->local_size + + cfun->machine->out_args_size + + cfun->machine->callee_saved_area_gpr_padding_bytes + + cfun->machine->callee_saved_fpr_regs_size; + if (satisfies_constraint_Iu08 (GEN_INT (sp_adjust)) + && NDS32_DOUBLE_WORD_ALIGN_P (sp_adjust) + && !cfun->calls_alloca) + operands[1] = GEN_INT (sp_adjust); + else + operands[1] = GEN_INT (0); + + /* Create assembly code pattern. */ + snprintf (pattern, sizeof (pattern), "pop25\t%%0, %%1"); + /* We use output_asm_insn() to output assembly code by ourself. */ + output_asm_insn (pattern, operands); + return ""; +} + + +/* output a float load instruction */ +const char * +nds32_output_float_load (rtx *operands) +{ + char buff[100]; + const char *pattern; + rtx addr, addr_op0, addr_op1; + int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; + addr = XEXP (operands[1], 0); + switch (GET_CODE (addr)) + { + case REG: + pattern = "fl%ci\t%%0, %%1"; + break; + + case PLUS: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && REG_P (addr_op1)) + pattern = "fl%c\t%%0, %%1"; + else if (REG_P (addr_op0) && CONST_INT_P (addr_op1)) + pattern = "fl%ci\t%%0, %%1"; + else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1) + && REG_P (XEXP (addr_op0, 0)) + && CONST_INT_P (XEXP (addr_op0, 1))) + pattern = "fl%c\t%%0, %%1"; + else + gcc_unreachable (); + break; + + case POST_MODIFY: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && REG_P (XEXP (addr_op1, 1))) + pattern = "fl%c.bi\t%%0, %%1"; + else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && CONST_INT_P (XEXP (addr_op1, 1))) + pattern = "fl%ci.bi\t%%0, %%1"; + else + gcc_unreachable (); + break; + + case POST_INC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fl%ci.bi\t%%0, %%1, 8"; + else + pattern = "fl%ci.bi\t%%0, %%1, 4"; + } + else + gcc_unreachable (); + break; + + case POST_DEC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fl%ci.bi\t%%0, %%1, -8"; + else + pattern = "fl%ci.bi\t%%0, %%1, -4"; + } + else + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + } + + sprintf (buff, pattern, dp ? 'd' : 's'); + output_asm_insn (buff, operands); + return ""; +} + +/* output a float store instruction */ +const char * +nds32_output_float_store (rtx *operands) +{ + char buff[100]; + const char *pattern; + rtx addr, addr_op0, addr_op1; + int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8; + addr = XEXP (operands[0], 0); + switch (GET_CODE (addr)) + { + case REG: + pattern = "fs%ci\t%%1, %%0"; + break; + + case PLUS: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && REG_P (addr_op1)) + pattern = "fs%c\t%%1, %%0"; + else if (REG_P (addr_op0) && CONST_INT_P (addr_op1)) + pattern = "fs%ci\t%%1, %%0"; + else if (GET_CODE (addr_op0) == MULT && REG_P (addr_op1) + && REG_P (XEXP (addr_op0, 0)) + && CONST_INT_P (XEXP (addr_op0, 1))) + pattern = "fs%c\t%%1, %%0"; + else + gcc_unreachable (); + break; + + case POST_MODIFY: + addr_op0 = XEXP (addr, 0); + addr_op1 = XEXP (addr, 1); + + if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && REG_P (XEXP (addr_op1, 1))) + pattern = "fs%c.bi\t%%1, %%0"; + else if (REG_P (addr_op0) && GET_CODE (addr_op1) == PLUS + && CONST_INT_P (XEXP (addr_op1, 1))) + pattern = "fs%ci.bi\t%%1, %%0"; + else + gcc_unreachable (); + break; + + case POST_INC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fs%ci.bi\t%%1, %%0, 8"; + else + pattern = "fs%ci.bi\t%%1, %%0, 4"; + } + else + gcc_unreachable (); + break; + + case POST_DEC: + if (REG_P (XEXP (addr, 0))) + { + if (dp) + pattern = "fs%ci.bi\t%%1, %%0, -8"; + else + pattern = "fs%ci.bi\t%%1, %%0, -4"; + } + else + gcc_unreachable (); + break; + + default: + gcc_unreachable (); + } + + sprintf (buff, pattern, dp ? 'd' : 's'); + output_asm_insn (buff, operands); + return ""; +} + +const char * +nds32_output_smw_single_word (rtx *operands) +{ + char buff[100]; + unsigned regno; + int enable4; + bool update_base_p; + rtx base_addr = operands[0]; + rtx base_reg; + rtx otherops[2]; + + if (REG_P (XEXP (base_addr, 0))) + { + update_base_p = false; + base_reg = XEXP (base_addr, 0); + } + else + { + update_base_p = true; + base_reg = XEXP (XEXP (base_addr, 0), 0); + } + + const char *update_base = update_base_p ? "m" : ""; + + regno = REGNO (operands[1]); + + otherops[0] = base_reg; + otherops[1] = operands[1]; + + if (regno >= 28) + { + enable4 = nds32_regno_to_enable4 (regno); + sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); + } + else + { + sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1", update_base); + } + output_asm_insn (buff, otherops); + return ""; +} + +/* ------------------------------------------------------------------------ */ +const char * +nds32_output_smw_double_word (rtx *operands) +{ + char buff[100]; + unsigned regno; + int enable4; + bool update_base_p; + rtx base_addr = operands[0]; + rtx base_reg; + rtx otherops[3]; + + if (REG_P (XEXP (base_addr, 0))) + { + update_base_p = false; + base_reg = XEXP (base_addr, 0); + } + else + { + update_base_p = true; + base_reg = XEXP (XEXP (base_addr, 0), 0); + } + + const char *update_base = update_base_p ? "m" : ""; + + regno = REGNO (operands[1]); + + otherops[0] = base_reg; + otherops[1] = operands[1]; + otherops[2] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);; + + if (regno >= 28) + { + enable4 = nds32_regno_to_enable4 (regno) + | nds32_regno_to_enable4 (regno + 1); + sprintf (buff, "smw.bi%s\t$sp, [%%0], $sp, %x", update_base, enable4); + } + else if (regno == 27) + { + enable4 = nds32_regno_to_enable4 (regno + 1); + sprintf (buff, "smw.bi%s\t%%1, [%%0], %%1, %x", update_base, enable4); + } + else + { + sprintf (buff, "smw.bi%s\t%%1, [%%0], %%2", update_base); + } + output_asm_insn (buff, otherops); + return ""; +} + const char * -nds32_output_casesi_pc_relative (rtx *operands) +nds32_output_lmw_single_word (rtx *operands) +{ + char buff[100]; + unsigned regno; + bool update_base_p; + int enable4; + rtx base_addr = operands[1]; + rtx base_reg; + rtx otherops[2]; + + if (REG_P (XEXP (base_addr, 0))) + { + update_base_p = false; + base_reg = XEXP (base_addr, 0); + } + else + { + update_base_p = true; + base_reg = XEXP (XEXP (base_addr, 0), 0); + } + + const char *update_base = update_base_p ? "m" : ""; + + regno = REGNO (operands[0]); + + otherops[0] = operands[0]; + otherops[1] = base_reg; + + if (regno >= 28) + { + enable4 = nds32_regno_to_enable4 (regno); + sprintf (buff, "lmw.bi%s\t$sp, [%%1], $sp, %x", update_base, enable4); + } + else + { + sprintf (buff, "lmw.bi%s\t%%0, [%%1], %%0", update_base); + } + output_asm_insn (buff, otherops); + return ""; +} + +void +nds32_expand_unaligned_load (rtx *operands, enum machine_mode mode) +{ + /* Initial memory offset. */ + int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0; + int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1; + /* Initial register shift byte. */ + int shift = 0; + /* The first load byte instruction is not the same. */ + int width = GET_MODE_SIZE (mode) - 1; + rtx mem[2]; + rtx reg[2]; + rtx sub_reg; + rtx temp_reg, temp_sub_reg; + int num_reg; + + /* Generating a series of load byte instructions. + The first load byte instructions and other + load byte instructions are not the same. like: + First: + lbi reg0, [mem] + zeh reg0, reg0 + Second: + lbi temp_reg, [mem + offset] + sll temp_reg, (8 * shift) + ior reg0, temp_reg + + lbi temp_reg, [mem + (offset + 1)] + sll temp_reg, (8 * (shift + 1)) + ior reg0, temp_reg */ + + temp_reg = gen_reg_rtx (SImode); + temp_sub_reg = gen_lowpart (QImode, temp_reg); + + if (mode == DImode) + { + /* Load doubleword, we need two registers to access. */ + reg[0] = nds32_di_low_part_subreg (operands[0]); + reg[1] = nds32_di_high_part_subreg (operands[0]); + /* A register only store 4 byte. */ + width = GET_MODE_SIZE (SImode) - 1; + } + else + { + if (VECTOR_MODE_P (mode)) + reg[0] = gen_reg_rtx (SImode); + else + reg[0] = operands[0]; + } + + for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) + { + sub_reg = gen_lowpart (QImode, reg[0]); + mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[1], offset)); + + /* Generating the first part instructions. + lbi reg0, [mem] + zeh reg0, reg0 */ + emit_move_insn (sub_reg, mem[0]); + emit_insn (gen_zero_extendqisi2 (reg[0], sub_reg)); + + while (width > 0) + { + offset = offset + offset_adj; + shift++; + width--; + + mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode, + operands[1], + offset)); + /* Generating the second part instructions. + lbi temp_reg, [mem + offset] + sll temp_reg, (8 * shift) + ior reg0, temp_reg */ + emit_move_insn (temp_sub_reg, mem[1]); + emit_insn (gen_ashlsi3 (temp_reg, temp_reg, + GEN_INT (shift * 8))); + emit_insn (gen_iorsi3 (reg[0], reg[0], temp_reg)); + } + + if (mode == DImode) + { + /* Using the second register to load memory information. */ + reg[0] = reg[1]; + shift = 0; + width = GET_MODE_SIZE (SImode) - 1; + offset = offset + offset_adj; + } + } + if (VECTOR_MODE_P (mode)) + convert_move (operands[0], reg[0], false); +} + +void +nds32_expand_unaligned_store (rtx *operands, enum machine_mode mode) { - machine_mode mode; - rtx diff_vec; - - diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[1]))); - - gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); - - /* Step C: "t <-- operands[1]". */ - output_asm_insn ("la\t$ta, %l1", operands); - - /* Get the mode of each element in the difference vector. */ - mode = GET_MODE (diff_vec); - - /* Step D: "z <-- (mem (plus (operands[0] << m) t))", - where m is 0, 1, or 2 to load address-diff value from table. */ - switch (mode) + /* Initial memory offset. */ + int offset = WORDS_BIG_ENDIAN ? GET_MODE_SIZE (mode) - 1 : 0; + int offset_adj = WORDS_BIG_ENDIAN ? -1 : 1; + /* Initial register shift byte. */ + int shift = 0; + /* The first load byte instruction is not the same. */ + int width = GET_MODE_SIZE (mode) - 1; + rtx mem[2]; + rtx reg[2]; + rtx sub_reg; + rtx temp_reg, temp_sub_reg; + int num_reg; + + /* Generating a series of store byte instructions. + The first store byte instructions and other + load byte instructions are not the same. like: + First: + sbi reg0, [mem + 0] + Second: + srli temp_reg, reg0, (8 * shift) + sbi temp_reg, [mem + offset] */ + + temp_reg = gen_reg_rtx (SImode); + temp_sub_reg = gen_lowpart (QImode, temp_reg); + + if (mode == DImode) + { + /* Load doubleword, we need two registers to access. */ + reg[0] = nds32_di_low_part_subreg (operands[1]); + reg[1] = nds32_di_high_part_subreg (operands[1]); + /* A register only store 4 byte. */ + width = GET_MODE_SIZE (SImode) - 1; + } + else + { + if (VECTOR_MODE_P (mode)) + { + reg[0] = gen_reg_rtx (SImode); + convert_move (reg[0], operands[1], false); + } + else + reg[0] = operands[1]; + } + + for (num_reg = (mode == DImode) ? 2 : 1; num_reg > 0; num_reg--) + { + sub_reg = gen_lowpart (QImode, reg[0]); + mem[0] = gen_rtx_MEM (QImode, plus_constant (Pmode, operands[0], offset)); + + /* Generating the first part instructions. + sbi reg0, [mem + 0] */ + emit_move_insn (mem[0], sub_reg); + + while (width > 0) + { + offset = offset + offset_adj; + shift++; + width--; + + mem[1] = gen_rtx_MEM (QImode, plus_constant (Pmode, + operands[0], + offset)); + /* Generating the second part instructions. + srli temp_reg, reg0, (8 * shift) + sbi temp_reg, [mem + offset] */ + emit_insn (gen_lshrsi3 (temp_reg, reg[0], + GEN_INT (shift * 8))); + emit_move_insn (mem[1], temp_sub_reg); + } + + if (mode == DImode) + { + /* Using the second register to load memory information. */ + reg[0] = reg[1]; + shift = 0; + width = GET_MODE_SIZE (SImode) - 1; + offset = offset + offset_adj; + } + } +} + +/* Using multiple load/store instruction to output doubleword instruction. */ +const char * +nds32_output_double (rtx *operands, bool load_p) +{ + char pattern[100]; + int reg = load_p ? 0 : 1; + int mem = load_p ? 1 : 0; + rtx otherops[3]; + rtx addr = XEXP (operands[mem], 0); + + otherops[0] = gen_rtx_REG (SImode, REGNO (operands[reg])); + otherops[1] = gen_rtx_REG (SImode, REGNO (operands[reg]) + 1); + + if (GET_CODE (addr) == POST_INC) + { + /* (mem (post_inc (reg))) */ + otherops[2] = XEXP (addr, 0); + snprintf (pattern, sizeof (pattern), + "%cmw.bim\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's'); + } + else + { + /* (mem (reg)) */ + otherops[2] = addr; + snprintf (pattern, sizeof (pattern), + "%cmw.bi\t%%0, [%%2], %%1, 0", load_p ? 'l' : 's'); + + } + + output_asm_insn (pattern, otherops); + return ""; +} + +const char * +nds32_output_cbranchsi4_equality_zero (rtx_insn *insn, rtx *operands) +{ + enum rtx_code code; + bool long_jump_p = false; + + code = GET_CODE (operands[0]); + + /* This zero-comparison conditional branch has two forms: + 32-bit instruction => beqz/bnez imm16s << 1 + 16-bit instruction => beqzs8/bnezs8/beqz38/bnez38 imm8s << 1 + + For 32-bit case, + we assume it is always reachable. (but check range -65500 ~ 65500) + + For 16-bit case, + it must satisfy { 255 >= (label - pc) >= -256 } condition. + However, since the $pc for nds32 is at the beginning of the instruction, + we should leave some length space for current insn. + So we use range -250 ~ 250. */ + + switch (get_attr_length (insn)) { - case E_QImode: - output_asm_insn ("lb\t%2, [$ta + %0 << 0]", operands); - break; - case E_HImode: - output_asm_insn ("lh\t%2, [$ta + %0 << 1]", operands); - break; - case E_SImode: - output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); + case 8: + long_jump_p = true; + /* fall through */ + case 2: + if (which_alternative == 0) + { + /* constraint: t */ + /* b<cond>zs8 .L0 + or + b<inverse_cond>zs8 .LCB0 + j .L0 + .LCB0: + */ + output_cond_branch_compare_zero (code, "s8", long_jump_p, + operands, true); + return ""; + } + else if (which_alternative == 1) + { + /* constraint: l */ + /* b<cond>z38 $r0, .L0 + or + b<inverse_cond>z38 $r0, .LCB0 + j .L0 + .LCB0: + */ + output_cond_branch_compare_zero (code, "38", long_jump_p, + operands, false); + return ""; + } + else + { + /* constraint: r */ + /* For which_alternative==2, it should not be here. */ + gcc_unreachable (); + } + case 10: + /* including constraints: t, l, and r */ + long_jump_p = true; + /* fall through */ + case 4: + /* including constraints: t, l, and r */ + output_cond_branch_compare_zero (code, "", long_jump_p, operands, false); + return ""; + + default: + gcc_unreachable (); + } +} + +const char * +nds32_output_cbranchsi4_equality_reg (rtx_insn *insn, rtx *operands) +{ + enum rtx_code code; + bool long_jump_p, r5_p; + int insn_length; + + insn_length = get_attr_length (insn); + + long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false; + r5_p = (insn_length == 2 || insn_length == 8) ? true : false; + + code = GET_CODE (operands[0]); + + /* This register-comparison conditional branch has one form: + 32-bit instruction => beq/bne imm14s << 1 + + For 32-bit case, + we assume it is always reachable. (but check range -16350 ~ 16350). */ + + switch (code) + { + case EQ: + case NE: + output_cond_branch (code, "", r5_p, long_jump_p, operands); + return ""; + + default: + gcc_unreachable (); + } +} + +const char * +nds32_output_cbranchsi4_equality_reg_or_const_int (rtx_insn *insn, + rtx *operands) +{ + enum rtx_code code; + bool long_jump_p, r5_p; + int insn_length; + + insn_length = get_attr_length (insn); + + long_jump_p = (insn_length == 10 || insn_length == 8) ? true : false; + r5_p = (insn_length == 2 || insn_length == 8) ? true : false; + + code = GET_CODE (operands[0]); + + /* This register-comparison conditional branch has one form: + 32-bit instruction => beq/bne imm14s << 1 + 32-bit instruction => beqc/bnec imm8s << 1 + + For 32-bit case, we assume it is always reachable. + (but check range -16350 ~ 16350 and -250 ~ 250). */ + + switch (code) + { + case EQ: + case NE: + if (which_alternative == 2) + { + /* r, Is11 */ + /* b<cond>c */ + output_cond_branch (code, "c", r5_p, long_jump_p, operands); + } + else + { + /* r, r */ + /* v, r */ + output_cond_branch (code, "", r5_p, long_jump_p, operands); + } + return ""; + default: + gcc_unreachable (); + } +} + +const char * +nds32_output_cbranchsi4_greater_less_zero (rtx_insn *insn, rtx *operands) +{ + enum rtx_code code; + bool long_jump_p; + int insn_length; + + insn_length = get_attr_length (insn); + + gcc_assert (insn_length == 4 || insn_length == 10); + + long_jump_p = (insn_length == 10) ? true : false; + + code = GET_CODE (operands[0]); + + /* This zero-greater-less-comparison conditional branch has one form: + 32-bit instruction => bgtz/bgez/bltz/blez imm16s << 1 + + For 32-bit case, we assume it is always reachable. + (but check range -65500 ~ 65500). */ + + switch (code) + { + case GT: + case GE: + case LT: + case LE: + output_cond_branch_compare_zero (code, "", long_jump_p, operands, false); break; default: gcc_unreachable (); } - - /* Step E: "t <-- z + t". - Add table label_ref with address-diff value to - obtain target case address. */ - output_asm_insn ("add\t$ta, %2, $ta", operands); - - /* Step F: jump to target with register t. */ - if (TARGET_16_BIT) - return "jr5\t$ta"; + return ""; +} + +const char * +nds32_output_unpkd8 (rtx output, rtx input, + rtx high_idx_rtx, rtx low_idx_rtx, + bool signed_p) +{ + char pattern[100]; + rtx output_operands[2]; + HOST_WIDE_INT high_idx, low_idx; + high_idx = INTVAL (high_idx_rtx); + low_idx = INTVAL (low_idx_rtx); + + gcc_assert (high_idx >= 0 && high_idx <= 3); + gcc_assert (low_idx >= 0 && low_idx <= 3); + + /* We only have 10, 20, 30 and 31. */ + if ((low_idx != 0 || high_idx == 0) && + !(low_idx == 1 && high_idx == 3)) + return "#"; + + char sign_char = signed_p ? 's' : 'z'; + + sprintf (pattern, + "%cunpkd8" HOST_WIDE_INT_PRINT_DEC HOST_WIDE_INT_PRINT_DEC "\t%%0, %%1", + sign_char, high_idx, low_idx); + output_operands[0] = output; + output_operands[1] = input; + output_asm_insn (pattern, output_operands); + return ""; +} + +/* Return true if SYMBOL_REF X binds locally. */ + +static bool +nds32_symbol_binds_local_p (const_rtx x) +{ + return (SYMBOL_REF_DECL (x) + ? targetm.binds_local_p (SYMBOL_REF_DECL (x)) + : SYMBOL_REF_LOCAL_P (x)); +} + +const char * +nds32_output_call (rtx insn, rtx *operands, rtx symbol, const char *long_call, + const char *call, bool align_p) +{ + char pattern[100]; + bool noreturn_p; + + if (nds32_long_call_p (symbol)) + strcpy (pattern, long_call); else - return "jr\t$ta"; + strcpy (pattern, call); + + if (flag_pic && CONSTANT_P (symbol) + && !nds32_symbol_binds_local_p (symbol)) + strcat (pattern, "@PLT"); + + if (align_p) + strcat (pattern, "\n\t.align 2"); + + noreturn_p = find_reg_note (insn, REG_NORETURN, NULL_RTX) != NULL_RTX; + + if (noreturn_p) + { + if (TARGET_16_BIT) + strcat (pattern, "\n\tnop16"); + else + strcat (pattern, "\n\tnop"); + } + + output_asm_insn (pattern, operands); + return ""; } -/* Function to generate normal jump table. */ +bool +nds32_need_split_sms_p (rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + /* smds or smdrs. */ + if (INTVAL (in0_idx0) == INTVAL (in1_idx0) + && INTVAL (in0_idx1) == INTVAL (in1_idx1) + && INTVAL (in0_idx0) != INTVAL (in0_idx1)) + return false; + + /* smxds. */ + if (INTVAL (in0_idx0) != INTVAL (in0_idx1) + && INTVAL (in1_idx0) != INTVAL (in1_idx1)) + return false; + + return true; +} + const char * -nds32_output_casesi (rtx *operands) +nds32_output_sms (rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + if (nds32_need_split_sms_p (in0_idx0, in1_idx0, + in0_idx1, in1_idx1)) + return "#"; + /* out = in0[in0_idx0] * in1[in1_idx0] - in0[in0_idx1] * in1[in1_idx1] */ + + /* smds or smdrs. */ + if (INTVAL (in0_idx0) == INTVAL (in1_idx0) + && INTVAL (in0_idx1) == INTVAL (in1_idx1) + && INTVAL (in0_idx0) != INTVAL (in0_idx1)) + { + if (INTVAL (in0_idx0) == 0) + { + if (TARGET_BIG_ENDIAN) + return "smds\t%0, %1, %2"; + else + return "smdrs\t%0, %1, %2"; + } + else + { + if (TARGET_BIG_ENDIAN) + return "smdrs\t%0, %1, %2"; + else + return "smds\t%0, %1, %2"; + } + } + + if (INTVAL (in0_idx0) != INTVAL (in0_idx1) + && INTVAL (in1_idx0) != INTVAL (in1_idx1)) + { + if (INTVAL (in0_idx0) == 1) + { + if (TARGET_BIG_ENDIAN) + return "smxds\t%0, %2, %1"; + else + return "smxds\t%0, %1, %2"; + } + else + { + if (TARGET_BIG_ENDIAN) + return "smxds\t%0, %1, %2"; + else + return "smxds\t%0, %2, %1"; + } + } + + gcc_unreachable (); + return ""; +} + +void +nds32_split_sms (rtx out, rtx in0, rtx in1, + rtx in0_idx0, rtx in1_idx0, + rtx in0_idx1, rtx in1_idx1) +{ + rtx result0 = gen_reg_rtx (SImode); + rtx result1 = gen_reg_rtx (SImode); + emit_insn (gen_mulhisi3v (result0, in0, in1, + in0_idx0, in1_idx0)); + emit_insn (gen_mulhisi3v (result1, in0, in1, + in0_idx1, in1_idx1)); + emit_insn (gen_subsi3 (out, result0, result1)); +} + +/* Spilt a doubleword instrucion to two single word instructions. */ +void +nds32_spilt_doubleword (rtx *operands, bool load_p) +{ + int reg = load_p ? 0 : 1; + int mem = load_p ? 1 : 0; + rtx reg_rtx = load_p ? operands[0] : operands[1]; + rtx mem_rtx = load_p ? operands[1] : operands[0]; + rtx low_part[2], high_part[2]; + rtx sub_mem = XEXP (mem_rtx, 0); + + /* Generate low_part and high_part register pattern. + i.e. register pattern like: + (reg:DI) -> (subreg:SI (reg:DI)) + (subreg:SI (reg:DI)) */ + low_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 0); + high_part[reg] = simplify_gen_subreg (SImode, reg_rtx, GET_MODE (reg_rtx), 4); + + /* Generate low_part and high_part memory pattern. + Memory format is (post_dec) will generate: + low_part: lwi.bi reg, [mem], 4 + high_part: lwi.bi reg, [mem], -12 */ + if (GET_CODE (sub_mem) == POST_DEC) + { + /* memory format is (post_dec (reg)), + so that extract (reg) from the (post_dec (reg)) pattern. */ + sub_mem = XEXP (sub_mem, 0); + + /* generate low_part and high_part memory format: + low_part: (post_modify ((reg) (plus (reg) (const 4))) + high_part: (post_modify ((reg) (plus (reg) (const -12))) */ + low_part[mem] = gen_frame_mem (SImode, + gen_rtx_POST_MODIFY (Pmode, sub_mem, + gen_rtx_PLUS (Pmode, + sub_mem, + GEN_INT (4)))); + high_part[mem] = gen_frame_mem (SImode, + gen_rtx_POST_MODIFY (Pmode, sub_mem, + gen_rtx_PLUS (Pmode, + sub_mem, + GEN_INT (-12)))); + } + else if (GET_CODE (sub_mem) == POST_MODIFY) + { + /* Memory format is (post_modify (reg) (plus (reg) (const))), + so that extract (reg) from the post_modify pattern. */ + rtx post_mem = XEXP (sub_mem, 0); + + /* Extract (const) from the (post_modify (reg) (plus (reg) (const))) + pattern. */ + + rtx plus_op = XEXP (sub_mem, 1); + rtx post_val = XEXP (plus_op, 1); + + /* Generate low_part and high_part memory format: + low_part: (post_modify ((reg) (plus (reg) (const))) + high_part: ((plus (reg) (const 4))) */ + low_part[mem] = gen_frame_mem (SImode, + gen_rtx_POST_MODIFY (Pmode, post_mem, + gen_rtx_PLUS (Pmode, + post_mem, + post_val))); + high_part[mem] = gen_frame_mem (SImode, plus_constant (Pmode, + post_mem, + 4)); + } + else + { + /* memory format: (symbol_ref), (const), (reg + const_int). */ + low_part[mem] = adjust_address (mem_rtx, SImode, 0); + high_part[mem] = adjust_address (mem_rtx, SImode, 4); + } + + /* After reload completed, we have dependent issue by low part register and + higt part memory. i.e. we cannot split a sequence + like: + load $r0, [%r1] + spilt to + lw $r0, [%r0] + lwi $r1, [%r0 + 4] + swap position + lwi $r1, [%r0 + 4] + lw $r0, [%r0] + For store instruction we don't have a problem. + + When memory format is [post_modify], we need to emit high part instruction, + before low part instruction. + expamle: + load $r0, [%r2], post_val + spilt to + load $r1, [%r2 + 4] + load $r0, [$r2], post_val. */ + if ((load_p && reg_overlap_mentioned_p (low_part[0], high_part[1])) + || GET_CODE (sub_mem) == POST_MODIFY) + { + operands[2] = high_part[0]; + operands[3] = high_part[1]; + operands[4] = low_part[0]; + operands[5] = low_part[1]; + } + else + { + operands[2] = low_part[0]; + operands[3] = low_part[1]; + operands[4] = high_part[0]; + operands[5] = high_part[1]; + } +} + +void +nds32_split_ashiftdi3 (rtx dst, rtx src, rtx shiftamount) +{ + rtx src_high_part, src_low_part; + rtx dst_high_part, dst_low_part; + + dst_high_part = nds32_di_high_part_subreg (dst); + dst_low_part = nds32_di_low_part_subreg (dst); + + src_high_part = nds32_di_high_part_subreg (src); + src_low_part = nds32_di_low_part_subreg (src); + + /* We need to handle shift more than 32 bit!!!! */ + if (CONST_INT_P (shiftamount)) + { + if (INTVAL (shiftamount) < 32) + { + rtx ext_start; + ext_start = gen_int_mode(32 - INTVAL (shiftamount), SImode); + + emit_insn (gen_wext (dst_high_part, src, ext_start)); + emit_insn (gen_ashlsi3 (dst_low_part, src_low_part, shiftamount)); + } + else + { + rtx new_shift_amout = gen_int_mode(INTVAL (shiftamount) - 32, SImode); + + emit_insn (gen_ashlsi3 (dst_high_part, src_low_part, + new_shift_amout)); + + emit_move_insn (dst_low_part, GEN_INT (0)); + } + } + else + { + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx new_shift_amout, select_reg; + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + new_shift_amout = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + rtx ext_start; + ext_start = gen_reg_rtx (SImode); + + /* + if (shiftamount < 32) + dst_low_part = src_low_part << shiftamout + dst_high_part = wext (src, 32 - shiftamount) + # wext can't handle wext (src, 32) since it's only take rb[0:4] + # for extract. + dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part + else + dst_low_part = 0 + dst_high_part = src_low_part << shiftamount & 0x1f + */ + + emit_insn (gen_subsi3 (ext_start, + gen_int_mode (32, SImode), + shiftamount)); + emit_insn (gen_wext (dst_high_part_l32, src, ext_start)); + + /* Handle for shiftamout == 0. */ + emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, + src_high_part, dst_high_part_l32)); + + emit_insn (gen_ashlsi3 (dst_low_part_l32, src_low_part, shiftamount)); + + emit_move_insn (dst_low_part_g32, const0_rtx); + emit_insn (gen_andsi3 (new_shift_amout, shiftamount, GEN_INT (0x1f))); + emit_insn (gen_ashlsi3 (dst_high_part_g32, src_low_part, + new_shift_amout)); + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); + } +} + +void +nds32_split_ashiftrtdi3 (rtx dst, rtx src, rtx shiftamount) +{ + nds32_split_shiftrtdi3 (dst, src, shiftamount, false); +} + +void +nds32_split_lshiftrtdi3 (rtx dst, rtx src, rtx shiftamount) +{ + nds32_split_shiftrtdi3 (dst, src, shiftamount, true); +} + +void +nds32_split_rotatertdi3 (rtx dst, rtx src, rtx shiftamount) +{ + rtx dst_low_part_l32, dst_high_part_l32; + rtx dst_low_part_g32, dst_high_part_g32; + rtx select_reg, low5bit, low5bit_inv, minus32sa; + rtx dst_low_part_g32_tmph; + rtx dst_low_part_g32_tmpl; + rtx dst_high_part_l32_tmph; + rtx dst_high_part_l32_tmpl; + + rtx src_low_part, src_high_part; + rtx dst_high_part, dst_low_part; + + shiftamount = force_reg (SImode, shiftamount); + + emit_insn (gen_andsi3 (shiftamount, + shiftamount, + gen_int_mode (0x3f, SImode))); + + dst_high_part = nds32_di_high_part_subreg (dst); + dst_low_part = nds32_di_low_part_subreg (dst); + + src_high_part = nds32_di_high_part_subreg (src); + src_low_part = nds32_di_low_part_subreg (src); + + dst_low_part_l32 = gen_reg_rtx (SImode); + dst_high_part_l32 = gen_reg_rtx (SImode); + dst_low_part_g32 = gen_reg_rtx (SImode); + dst_high_part_g32 = gen_reg_rtx (SImode); + low5bit = gen_reg_rtx (SImode); + low5bit_inv = gen_reg_rtx (SImode); + minus32sa = gen_reg_rtx (SImode); + select_reg = gen_reg_rtx (SImode); + + dst_low_part_g32_tmph = gen_reg_rtx (SImode); + dst_low_part_g32_tmpl = gen_reg_rtx (SImode); + + dst_high_part_l32_tmph = gen_reg_rtx (SImode); + dst_high_part_l32_tmpl = gen_reg_rtx (SImode); + + emit_insn (gen_slt_compare (select_reg, shiftamount, GEN_INT (32))); + + /* if shiftamount < 32 + dst_low_part = wext(src, shiftamount) + else + dst_low_part = ((src_high_part >> (shiftamount & 0x1f)) + | (src_low_part << (32 - (shiftamount & 0x1f)))) + */ + emit_insn (gen_andsi3 (low5bit, shiftamount, gen_int_mode (0x1f, SImode))); + emit_insn (gen_subsi3 (low5bit_inv, gen_int_mode (32, SImode), low5bit)); + + emit_insn (gen_wext (dst_low_part_l32, src, shiftamount)); + + emit_insn (gen_lshrsi3 (dst_low_part_g32_tmpl, src_high_part, low5bit)); + emit_insn (gen_ashlsi3 (dst_low_part_g32_tmph, src_low_part, low5bit_inv)); + + emit_insn (gen_iorsi3 (dst_low_part_g32, + dst_low_part_g32_tmpl, + dst_low_part_g32_tmph)); + + emit_insn (gen_cmovnsi (dst_low_part, select_reg, + dst_low_part_l32, dst_low_part_g32)); + + /* if shiftamount < 32 + dst_high_part = ((src_high_part >> shiftamount) + | (src_low_part << (32 - shiftamount))) + dst_high_part = shiftamount == 0 ? src_high_part : dst_high_part + else + dst_high_part = wext(src, shiftamount & 0x1f) + */ + + emit_insn (gen_subsi3 (minus32sa, gen_int_mode (32, SImode), shiftamount)); + + emit_insn (gen_lshrsi3 (dst_high_part_l32_tmpl, src_high_part, shiftamount)); + emit_insn (gen_ashlsi3 (dst_high_part_l32_tmph, src_low_part, minus32sa)); + + emit_insn (gen_iorsi3 (dst_high_part_l32, + dst_high_part_l32_tmpl, + dst_high_part_l32_tmph)); + + emit_insn (gen_cmovzsi (dst_high_part_l32, shiftamount, + src_high_part, dst_high_part_l32)); + + emit_insn (gen_wext (dst_high_part_g32, src, low5bit)); + + emit_insn (gen_cmovnsi (dst_high_part, select_reg, + dst_high_part_l32, dst_high_part_g32)); +} + +/* Return true if OP contains a symbol reference. */ +bool +symbolic_reference_mentioned_p (rtx op) { - /* Step C: "t <-- operands[1]". */ - output_asm_insn ("la\t$ta, %l1", operands); - - /* Step D: "z <-- (mem (plus (operands[0] << 2) t))". */ - output_asm_insn ("lw\t%2, [$ta + %0 << 2]", operands); - - /* No need to perform Step E, which is only used for - pc relative jump table. */ - - /* Step F: jump to target with register z. */ - if (TARGET_16_BIT) - return "jr5\t%2"; + const char *fmt; + int i; + + if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) + return true; + + fmt = GET_RTX_FORMAT (GET_CODE (op)); + for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) + { + if (fmt[i] == 'E') + { + int j; + + for (j = XVECLEN (op, i) - 1; j >= 0; j--) + if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) + return true; + } + + else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) + return true; + } + + return false; +} + +/* Expand PIC code for @GOTOFF and @GOT. + + Example for @GOTOFF: + + la $r0, symbol@GOTOFF + -> sethi $ta, hi20(symbol@GOTOFF) + ori $ta, $ta, lo12(symbol@GOTOFF) + add $r0, $ta, $gp + + Example for @GOT: + + la $r0, symbol@GOT + -> sethi $ta, hi20(symbol@GOT) + ori $ta, $ta, lo12(symbol@GOT) + lw $r0, [$ta + $gp] +*/ +rtx +nds32_legitimize_pic_address (rtx x) +{ + rtx addr = x; + rtx reg = gen_reg_rtx (Pmode); + rtx pat; + + if (GET_CODE (x) == LABEL_REF + || (GET_CODE (x) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (x) + || SYMBOL_REF_LOCAL_P (x)))) + { + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOTOFF); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + emit_insn (gen_lo_sum (reg, reg, addr)); + x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); + } + else if (GET_CODE (x) == SYMBOL_REF) + { + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_GOT); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + emit_insn (gen_lo_sum (reg, reg, addr)); + + x = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, pic_offset_table_rtx, + reg)); + } + else if (GET_CODE (x) == CONST) + { + /* We don't split constant in expand_pic_move because GOTOFF can combine + the addend with the symbol. */ + addr = XEXP (x, 0); + gcc_assert (GET_CODE (addr) == PLUS); + + rtx op0 = XEXP (addr, 0); + rtx op1 = XEXP (addr, 1); + + if ((GET_CODE (op0) == LABEL_REF + || (GET_CODE (op0) == SYMBOL_REF + && (CONSTANT_POOL_ADDRESS_P (op0) + || SYMBOL_REF_LOCAL_P (op0)))) + && GET_CODE (op1) == CONST_INT) + { + pat = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), UNSPEC_GOTOFF); + pat = gen_rtx_PLUS (Pmode, pat, op1); + pat = gen_rtx_CONST (Pmode, pat); + emit_insn (gen_sethi (reg, pat)); + emit_insn (gen_lo_sum (reg, reg, pat)); + x = gen_rtx_PLUS (Pmode, reg, pic_offset_table_rtx); + } + else if (GET_CODE (op0) == SYMBOL_REF + && GET_CODE (op1) == CONST_INT) + { + /* This is a constant offset from a @GOT symbol reference. */ + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, op0), UNSPEC_GOT); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + emit_insn (gen_lo_sum (reg, reg, addr)); + addr = gen_const_mem (SImode, gen_rtx_PLUS (Pmode, + pic_offset_table_rtx, + reg)); + emit_move_insn (reg, addr); + if (satisfies_constraint_Is15 (op1)) + x = gen_rtx_PLUS (Pmode, reg, op1); + else + { + rtx tmp_reg = gen_reg_rtx (SImode); + emit_insn (gen_movsi (tmp_reg, op1)); + x = gen_rtx_PLUS (Pmode, reg, tmp_reg); + } + } + else + { + /* Don't handle this pattern. */ + debug_rtx (x); + gcc_unreachable (); + } + } + return x; +} + +void +nds32_expand_pic_move (rtx *operands) +{ + rtx src; + + src = nds32_legitimize_pic_address (operands[1]); + emit_move_insn (operands[0], src); +} + +/* Expand ICT symbol. + Example for @ICT and ICT model=large: + + la $r0, symbol@ICT + -> sethi $rt, hi20(symbol@ICT) + lwi $r0, [$rt + lo12(symbol@ICT)] + +*/ +rtx +nds32_legitimize_ict_address (rtx x) +{ + rtx symbol = x; + rtx addr = x; + rtx reg = gen_reg_rtx (Pmode); + gcc_assert (GET_CODE (x) == SYMBOL_REF + && nds32_indirect_call_referenced_p (x)); + + addr = gen_rtx_UNSPEC (SImode, gen_rtvec (1, symbol), UNSPEC_ICT); + addr = gen_rtx_CONST (SImode, addr); + emit_insn (gen_sethi (reg, addr)); + + x = gen_const_mem (SImode, gen_rtx_LO_SUM (Pmode, reg, addr)); + + return x; +} + +void +nds32_expand_ict_move (rtx *operands) +{ + rtx src = operands[1]; + + src = nds32_legitimize_ict_address (src); + + emit_move_insn (operands[0], src); +} + +/* Return true X is a indirect call symbol. */ +bool +nds32_indirect_call_referenced_p (rtx x) +{ + if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_ICT) + x = XVECEXP (x, 0, 0); + + if (GET_CODE (x) == SYMBOL_REF) + { + tree decl = SYMBOL_REF_DECL (x); + + return decl + && (lookup_attribute("indirect_call", + DECL_ATTRIBUTES(decl)) + != NULL); + } + + return false; +} + +/* Return true X is need use long call. */ +bool +nds32_long_call_p (rtx symbol) +{ + if (nds32_indirect_call_referenced_p (symbol)) + return TARGET_ICT_MODEL_LARGE; else - return "jr\t%2"; + return TARGET_CMODEL_LARGE; +} + +/* Return true if X contains a thread-local symbol. */ +bool +nds32_tls_referenced_p (rtx x) +{ + if (!targetm.have_tls) + return false; + + if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS) + x = XEXP (XEXP (x, 0), 0); + + if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x)) + return true; + + return false; +} + +/* ADDR contains a thread-local SYMBOL_REF. Generate code to compute + this (thread-local) address. */ +rtx +nds32_legitimize_tls_address (rtx x) +{ + rtx tmp_reg; + rtx tp_reg = gen_rtx_REG (Pmode, TP_REGNUM); + rtx pat, insns, reg0; + + if (GET_CODE (x) == SYMBOL_REF) + switch (SYMBOL_REF_TLS_MODEL (x)) + { + case TLS_MODEL_GLOBAL_DYNAMIC: + case TLS_MODEL_LOCAL_DYNAMIC: + /* Emit UNSPEC_TLS_DESC rather than expand rtl directly because spill + may destroy the define-use chain anylysis to insert relax_hint. */ + if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC) + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSGD); + else + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLD); + + pat = gen_rtx_CONST (SImode, pat); + reg0 = gen_rtx_REG (Pmode, 0); + /* If we can confirm all clobber reigsters, it doesn't have to use call + instruction. */ + insns = emit_call_insn (gen_tls_desc (pat, GEN_INT (0))); + use_reg (&CALL_INSN_FUNCTION_USAGE (insns), pic_offset_table_rtx); + RTL_CONST_CALL_P (insns) = 1; + tmp_reg = gen_reg_rtx (SImode); + emit_move_insn (tmp_reg, reg0); + x = tmp_reg; + break; + + case TLS_MODEL_INITIAL_EXEC: + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSIE); + tmp_reg = gen_reg_rtx (SImode); + pat = gen_rtx_CONST (SImode, pat); + emit_insn (gen_tls_ie (tmp_reg, pat, GEN_INT (0))); + if (flag_pic) + emit_use (pic_offset_table_rtx); + x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); + break; + + case TLS_MODEL_LOCAL_EXEC: + /* Expand symbol_ref@TPOFF': + sethi $ta, hi20(symbol_ref@TPOFF) + ori $ta, $ta, lo12(symbol_ref@TPOFF) + add $r0, $ta, $tp */ + tmp_reg = gen_reg_rtx (SImode); + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_TLSLE); + pat = gen_rtx_CONST (SImode, pat); + emit_insn (gen_sethi (tmp_reg, pat)); + emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); + x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); + break; + + default: + gcc_unreachable (); + } + else if (GET_CODE (x) == CONST) + { + rtx base, addend; + split_const (x, &base, &addend); + + if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) + { + /* Expand symbol_ref@TPOFF': + sethi $ta, hi20(symbol_ref@TPOFF + addend) + ori $ta, $ta, lo12(symbol_ref@TPOFF + addend) + add $r0, $ta, $tp */ + tmp_reg = gen_reg_rtx (SImode); + pat = gen_rtx_UNSPEC (SImode, gen_rtvec (1, base), UNSPEC_TLSLE); + pat = gen_rtx_PLUS (SImode, pat, addend); + pat = gen_rtx_CONST (SImode, pat); + emit_insn (gen_sethi (tmp_reg, pat)); + emit_insn (gen_lo_sum (tmp_reg, tmp_reg, pat)); + x = gen_rtx_PLUS (Pmode, tmp_reg, tp_reg); + } + } + + return x; +} + +void +nds32_expand_tls_move (rtx *operands) +{ + rtx src = operands[1]; + rtx base, addend; + + if (CONSTANT_P (src)) + split_const (src, &base, &addend); + + if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC) + src = nds32_legitimize_tls_address (src); + else + { + src = nds32_legitimize_tls_address (base); + if (addend != const0_rtx) + { + src = gen_rtx_PLUS (SImode, src, addend); + src = force_operand (src, operands[0]); + } + } + + emit_move_insn (operands[0], src); +} + +void +nds32_expand_constant (machine_mode mode, HOST_WIDE_INT val, + rtx target, rtx source) +{ + rtx temp = gen_reg_rtx (mode); + int clear_sign_bit_copies = 0; + int clear_zero_bit_copies = 0; + unsigned HOST_WIDE_INT remainder = val & 0xffffffffUL; + + /* Count number of leading zeros. */ + clear_sign_bit_copies = __builtin_clz (remainder); + /* Count number of trailing zeros. */ + clear_zero_bit_copies = __builtin_ctz (remainder); + + HOST_WIDE_INT sign_shift_mask = ((0xffffffffUL + << (32 - clear_sign_bit_copies)) + & 0xffffffffUL); + HOST_WIDE_INT zero_shift_mask = (1 << clear_zero_bit_copies) - 1; + + if (clear_sign_bit_copies > 0 && clear_sign_bit_copies < 17 + && (remainder | sign_shift_mask) == 0xffffffffUL) + { + /* Transfer AND to two shifts, example: + a = b & 0x7fffffff => (b << 1) >> 1 */ + rtx shift = GEN_INT (clear_sign_bit_copies); + + emit_insn (gen_ashlsi3 (temp, source, shift)); + emit_insn (gen_lshrsi3 (target, temp, shift)); + } + else if (clear_zero_bit_copies > 0 && clear_sign_bit_copies < 17 + && (remainder | zero_shift_mask) == 0xffffffffUL) + { + /* Transfer AND to two shifts, example: + a = b & 0xfff00000 => (b >> 20) << 20 */ + rtx shift = GEN_INT (clear_zero_bit_copies); + + emit_insn (gen_lshrsi3 (temp, source, shift)); + emit_insn (gen_ashlsi3 (target, temp, shift)); + } + else + { + emit_move_insn (temp, GEN_INT (val)); + emit_move_insn (target, gen_rtx_fmt_ee (AND, mode, source, temp)); + } +} + +/* Auxiliary functions for lwm/smw. */ +bool +nds32_valid_smw_lwm_base_p (rtx op) +{ + rtx base_addr; + + if (!MEM_P (op)) + return false; + + base_addr = XEXP (op, 0); + + if (REG_P (base_addr)) + return true; + else + { + if (GET_CODE (base_addr) == POST_INC + && REG_P (XEXP (base_addr, 0))) + return true; + } + + return false; +} + +/* Auxiliary functions for manipulation DI mode. */ +rtx nds32_di_high_part_subreg(rtx reg) +{ + unsigned high_part_offset = subreg_highpart_offset (SImode, DImode); + + return simplify_gen_subreg ( + SImode, reg, + DImode, high_part_offset); +} + +rtx nds32_di_low_part_subreg(rtx reg) +{ + unsigned low_part_offset = subreg_lowpart_offset (SImode, DImode); + + return simplify_gen_subreg ( + SImode, reg, + DImode, low_part_offset); } /* ------------------------------------------------------------------------ */ + +/* Auxiliary function for output TLS patterns. */ + +const char * +nds32_output_tls_desc (rtx *operands) +{ + char pattern[1000]; + + if (TARGET_RELAX_HINT) + snprintf (pattern, sizeof (pattern), + ".relax_hint %%1\n\tsethi $r0, hi20(%%0)\n\t" + ".relax_hint %%1\n\tori $r0, $r0, lo12(%%0)\n\t" + ".relax_hint %%1\n\tlw $r15, [$r0 + $gp]\n\t" + ".relax_hint %%1\n\tadd $r0, $r0, $gp\n\t" + ".relax_hint %%1\n\tjral $r15"); + else + snprintf (pattern, sizeof (pattern), + "sethi $r0, hi20(%%0)\n\t" + "ori $r0, $r0, lo12(%%0)\n\t" + "lw $r15, [$r0 + $gp]\n\t" + "add $r0, $r0, $gp\n\t" + "jral $r15"); + output_asm_insn (pattern, operands); + return ""; +} + +const char * +nds32_output_tls_ie (rtx *operands) +{ + char pattern[1000]; + + if (flag_pic) + { + if (TARGET_RELAX_HINT) + snprintf (pattern, sizeof (pattern), + ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" + ".relax_hint %%2\n\tori %%0, %%0, lo12(%%1)\n\t" + ".relax_hint %%2\n\tlw %%0, [%%0 + $gp]"); + else + snprintf (pattern, sizeof (pattern), + "sethi %%0, hi20(%%1)\n\t" + "ori %%0, %%0, lo12(%%1)\n\t" + "lw %%0, [%%0 + $gp]"); + } + else + { + if (TARGET_RELAX_HINT) + snprintf (pattern, sizeof (pattern), + ".relax_hint %%2\n\tsethi %%0, hi20(%%1)\n\t" + ".relax_hint %%2\n\tlwi %%0, [%%0 + lo12(%%1)]"); + else + snprintf (pattern, sizeof (pattern), + "sethi %%0, hi20(%%1)\n\t" + "lwi %%0, [%%0 + lo12(%%1)]"); + } + output_asm_insn (pattern, operands); + return ""; +}