Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/spu/spu.c @ 47:3bfb6c00c1e0
update it from 4.4.2 to 4.4.3.
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Sun, 07 Feb 2010 17:44:34 +0900 |
parents | 58ad6c70ea60 |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
46:b85a337e5837 | 47:3bfb6c00c1e0 |
---|---|
187 const_tree type, unsigned char named); | 187 const_tree type, unsigned char named); |
188 static tree spu_build_builtin_va_list (void); | 188 static tree spu_build_builtin_va_list (void); |
189 static void spu_va_start (tree, rtx); | 189 static void spu_va_start (tree, rtx); |
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type, | 190 static tree spu_gimplify_va_arg_expr (tree valist, tree type, |
191 gimple_seq * pre_p, gimple_seq * post_p); | 191 gimple_seq * pre_p, gimple_seq * post_p); |
192 static int regno_aligned_for_load (int regno); | |
193 static int store_with_one_insn_p (rtx mem); | 192 static int store_with_one_insn_p (rtx mem); |
194 static int mem_is_padded_component_ref (rtx x); | 193 static int mem_is_padded_component_ref (rtx x); |
194 static int reg_aligned_for_addr (rtx x); | |
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p); | 195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p); |
196 static void spu_asm_globalize_label (FILE * file, const char *name); | 196 static void spu_asm_globalize_label (FILE * file, const char *name); |
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code, | 197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code, |
198 int *total, bool speed); | 198 int *total, bool speed); |
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp); | 199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp); |
208 static bool spu_vector_alignment_reachable (const_tree, bool); | 208 static bool spu_vector_alignment_reachable (const_tree, bool); |
209 static tree spu_builtin_vec_perm (tree, tree *); | 209 static tree spu_builtin_vec_perm (tree, tree *); |
210 static int spu_sms_res_mii (struct ddg *g); | 210 static int spu_sms_res_mii (struct ddg *g); |
211 static void asm_file_start (void); | 211 static void asm_file_start (void); |
212 static unsigned int spu_section_type_flags (tree, const char *, int); | 212 static unsigned int spu_section_type_flags (tree, const char *, int); |
213 static rtx spu_expand_load (rtx, rtx, rtx, int); | |
213 | 214 |
214 extern const char *reg_names[]; | 215 extern const char *reg_names[]; |
215 rtx spu_compare_op0, spu_compare_op1; | 216 rtx spu_compare_op0, spu_compare_op1; |
216 | 217 |
217 /* Which instruction set architecture to use. */ | 218 /* Which instruction set architecture to use. */ |
574 } | 575 } |
575 | 576 |
576 void | 577 void |
577 spu_expand_extv (rtx ops[], int unsignedp) | 578 spu_expand_extv (rtx ops[], int unsignedp) |
578 { | 579 { |
580 rtx dst = ops[0], src = ops[1]; | |
579 HOST_WIDE_INT width = INTVAL (ops[2]); | 581 HOST_WIDE_INT width = INTVAL (ops[2]); |
580 HOST_WIDE_INT start = INTVAL (ops[3]); | 582 HOST_WIDE_INT start = INTVAL (ops[3]); |
581 HOST_WIDE_INT src_size, dst_size; | 583 HOST_WIDE_INT align_mask; |
582 enum machine_mode src_mode, dst_mode; | 584 rtx s0, s1, mask, r0; |
583 rtx dst = ops[0], src = ops[1]; | 585 |
584 rtx s; | 586 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode); |
585 | 587 |
586 dst = adjust_operand (ops[0], 0); | 588 if (MEM_P (src)) |
587 dst_mode = GET_MODE (dst); | 589 { |
588 dst_size = GET_MODE_BITSIZE (GET_MODE (dst)); | 590 /* First, determine if we need 1 TImode load or 2. We need only 1 |
589 | 591 if the bits being extracted do not cross the alignment boundary |
590 src = adjust_operand (src, &start); | 592 as determined by the MEM and its address. */ |
591 src_mode = GET_MODE (src); | 593 |
592 src_size = GET_MODE_BITSIZE (GET_MODE (src)); | 594 align_mask = -MEM_ALIGN (src); |
593 | 595 if ((start & align_mask) == ((start + width - 1) & align_mask)) |
594 if (start > 0) | 596 { |
595 { | 597 /* Alignment is sufficient for 1 load. */ |
596 s = gen_reg_rtx (src_mode); | 598 s0 = gen_reg_rtx (TImode); |
597 switch (src_mode) | 599 r0 = spu_expand_load (s0, 0, src, start / 8); |
598 { | 600 start &= 7; |
599 case SImode: | 601 if (r0) |
600 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start))); | 602 emit_insn (gen_rotqby_ti (s0, s0, r0)); |
601 break; | 603 } |
602 case DImode: | 604 else |
603 emit_insn (gen_ashldi3 (s, src, GEN_INT (start))); | 605 { |
604 break; | 606 /* Need 2 loads. */ |
605 case TImode: | 607 s0 = gen_reg_rtx (TImode); |
606 emit_insn (gen_ashlti3 (s, src, GEN_INT (start))); | 608 s1 = gen_reg_rtx (TImode); |
607 break; | 609 r0 = spu_expand_load (s0, s1, src, start / 8); |
608 default: | 610 start &= 7; |
609 abort (); | 611 |
610 } | 612 gcc_assert (start + width <= 128); |
611 src = s; | 613 if (r0) |
612 } | 614 { |
613 | 615 rtx r1 = gen_reg_rtx (SImode); |
614 if (width < src_size) | 616 mask = gen_reg_rtx (TImode); |
615 { | 617 emit_move_insn (mask, GEN_INT (-1)); |
616 rtx pat; | 618 emit_insn (gen_rotqby_ti (s0, s0, r0)); |
617 int icode; | 619 emit_insn (gen_rotqby_ti (s1, s1, r0)); |
618 switch (src_mode) | 620 if (GET_CODE (r0) == CONST_INT) |
619 { | 621 r1 = GEN_INT (INTVAL (r0) & 15); |
620 case SImode: | 622 else |
621 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3; | 623 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15))); |
622 break; | 624 emit_insn (gen_shlqby_ti (mask, mask, r1)); |
623 case DImode: | 625 emit_insn (gen_selb (s0, s1, s0, mask)); |
624 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3; | 626 } |
625 break; | 627 } |
626 case TImode: | 628 |
627 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3; | 629 } |
628 break; | 630 else if (GET_CODE (src) == SUBREG) |
629 default: | 631 { |
630 abort (); | 632 rtx r = SUBREG_REG (src); |
631 } | 633 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r))); |
632 s = gen_reg_rtx (src_mode); | 634 s0 = gen_reg_rtx (TImode); |
633 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width)); | 635 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode)) |
634 emit_insn (pat); | 636 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r))); |
635 src = s; | 637 else |
636 } | 638 emit_move_insn (s0, src); |
637 | 639 } |
638 convert_move (dst, src, unsignedp); | 640 else |
641 { | |
642 gcc_assert (REG_P (src) && GET_MODE (src) == TImode); | |
643 s0 = gen_reg_rtx (TImode); | |
644 emit_move_insn (s0, src); | |
645 } | |
646 | |
647 /* Now s0 is TImode and contains the bits to extract at start. */ | |
648 | |
649 if (start) | |
650 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start))); | |
651 | |
652 if (128 - width) | |
653 { | |
654 tree c = build_int_cst (NULL_TREE, 128 - width); | |
655 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp); | |
656 } | |
657 | |
658 emit_move_insn (dst, s0); | |
639 } | 659 } |
640 | 660 |
641 void | 661 void |
642 spu_expand_insv (rtx ops[]) | 662 spu_expand_insv (rtx ops[]) |
643 { | 663 { |
726 default: | 746 default: |
727 abort (); | 747 abort (); |
728 } | 748 } |
729 if (GET_CODE (ops[0]) == MEM) | 749 if (GET_CODE (ops[0]) == MEM) |
730 { | 750 { |
731 rtx aligned = gen_reg_rtx (SImode); | |
732 rtx low = gen_reg_rtx (SImode); | 751 rtx low = gen_reg_rtx (SImode); |
733 rtx addr = gen_reg_rtx (SImode); | |
734 rtx rotl = gen_reg_rtx (SImode); | 752 rtx rotl = gen_reg_rtx (SImode); |
735 rtx mask0 = gen_reg_rtx (TImode); | 753 rtx mask0 = gen_reg_rtx (TImode); |
754 rtx addr; | |
755 rtx addr0; | |
756 rtx addr1; | |
736 rtx mem; | 757 rtx mem; |
737 | 758 |
738 emit_move_insn (addr, XEXP (ops[0], 0)); | 759 addr = force_reg (Pmode, XEXP (ops[0], 0)); |
739 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16))); | 760 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16)); |
740 emit_insn (gen_andsi3 (low, addr, GEN_INT (15))); | 761 emit_insn (gen_andsi3 (low, addr, GEN_INT (15))); |
741 emit_insn (gen_negsi2 (rotl, low)); | 762 emit_insn (gen_negsi2 (rotl, low)); |
742 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl)); | 763 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl)); |
743 emit_insn (gen_rotqmby_ti (mask0, mask, rotl)); | 764 emit_insn (gen_rotqmby_ti (mask0, mask, rotl)); |
744 mem = change_address (ops[0], TImode, aligned); | 765 mem = change_address (ops[0], TImode, addr0); |
745 set_mem_alias_set (mem, 0); | 766 set_mem_alias_set (mem, 0); |
746 emit_move_insn (dst, mem); | 767 emit_move_insn (dst, mem); |
747 emit_insn (gen_selb (dst, dst, shift_reg, mask0)); | 768 emit_insn (gen_selb (dst, dst, shift_reg, mask0)); |
748 emit_move_insn (mem, dst); | |
749 if (start + width > MEM_ALIGN (ops[0])) | 769 if (start + width > MEM_ALIGN (ops[0])) |
750 { | 770 { |
751 rtx shl = gen_reg_rtx (SImode); | 771 rtx shl = gen_reg_rtx (SImode); |
752 rtx mask1 = gen_reg_rtx (TImode); | 772 rtx mask1 = gen_reg_rtx (TImode); |
753 rtx dst1 = gen_reg_rtx (TImode); | 773 rtx dst1 = gen_reg_rtx (TImode); |
754 rtx mem1; | 774 rtx mem1; |
775 addr1 = plus_constant (addr, 16); | |
776 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16)); | |
755 emit_insn (gen_subsi3 (shl, GEN_INT (16), low)); | 777 emit_insn (gen_subsi3 (shl, GEN_INT (16), low)); |
756 emit_insn (gen_shlqby_ti (mask1, mask, shl)); | 778 emit_insn (gen_shlqby_ti (mask1, mask, shl)); |
757 mem1 = adjust_address (mem, TImode, 16); | 779 mem1 = change_address (ops[0], TImode, addr1); |
758 set_mem_alias_set (mem1, 0); | 780 set_mem_alias_set (mem1, 0); |
759 emit_move_insn (dst1, mem1); | 781 emit_move_insn (dst1, mem1); |
760 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1)); | 782 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1)); |
761 emit_move_insn (mem1, dst1); | 783 emit_move_insn (mem1, dst1); |
762 } | 784 } |
785 emit_move_insn (mem, dst); | |
763 } | 786 } |
764 else | 787 else |
765 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask)); | 788 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask)); |
766 } | 789 } |
767 | 790 |
1583 val = -(val & -8ll); | 1606 val = -(val & -8ll); |
1584 val = (val >> 3) & 0x1f; | 1607 val = (val >> 3) & 0x1f; |
1585 output_addr_const (file, GEN_INT (val)); | 1608 output_addr_const (file, GEN_INT (val)); |
1586 return; | 1609 return; |
1587 | 1610 |
1611 case 'v': | |
1612 case 'w': | |
1613 constant_to_array (mode, x, arr); | |
1614 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127; | |
1615 output_addr_const (file, GEN_INT (code == 'w' ? -val : val)); | |
1616 return; | |
1617 | |
1588 case 0: | 1618 case 0: |
1589 if (xcode == REG) | 1619 if (xcode == REG) |
1590 fprintf (file, "%s", reg_names[REGNO (x)]); | 1620 fprintf (file, "%s", reg_names[REGNO (x)]); |
1591 else if (xcode == MEM) | 1621 else if (xcode == MEM) |
1592 output_address (XEXP (x, 0)); | 1622 output_address (XEXP (x, 0)); |
1595 else | 1625 else |
1596 output_addr_const (file, x); | 1626 output_addr_const (file, x); |
1597 return; | 1627 return; |
1598 | 1628 |
1599 /* unused letters | 1629 /* unused letters |
1600 o qr uvw yz | 1630 o qr u yz |
1601 AB OPQR UVWXYZ */ | 1631 AB OPQR UVWXYZ */ |
1602 default: | 1632 default: |
1603 output_operand_lossage ("invalid %%xn code"); | 1633 output_operand_lossage ("invalid %%xn code"); |
1604 } | 1634 } |
1605 gcc_unreachable (); | 1635 gcc_unreachable (); |
1616 get_pic_reg (void) | 1646 get_pic_reg (void) |
1617 { | 1647 { |
1618 rtx pic_reg = pic_offset_table_rtx; | 1648 rtx pic_reg = pic_offset_table_rtx; |
1619 if (!reload_completed && !reload_in_progress) | 1649 if (!reload_completed && !reload_in_progress) |
1620 abort (); | 1650 abort (); |
1651 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM)) | |
1652 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM); | |
1621 return pic_reg; | 1653 return pic_reg; |
1622 } | 1654 } |
1623 | 1655 |
1624 /* Split constant addresses to handle cases that are too large. | 1656 /* Split constant addresses to handle cases that are too large. |
1625 Add in the pic register when in PIC mode. | 1657 Add in the pic register when in PIC mode. |
2763 | 2795 |
2764 insert_hbrp (); | 2796 insert_hbrp (); |
2765 | 2797 |
2766 pad_bb (); | 2798 pad_bb (); |
2767 | 2799 |
2800 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) | |
2801 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr) | |
2802 { | |
2803 /* Adjust the LABEL_REF in a hint when we have inserted a nop | |
2804 between its branch label and the branch . We don't move the | |
2805 label because GCC expects it at the beginning of the block. */ | |
2806 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0)); | |
2807 rtx label_ref = XVECEXP (unspec, 0, 0); | |
2808 rtx label = XEXP (label_ref, 0); | |
2809 rtx branch; | |
2810 int offset = 0; | |
2811 for (branch = NEXT_INSN (label); | |
2812 !JUMP_P (branch) && !CALL_P (branch); | |
2813 branch = NEXT_INSN (branch)) | |
2814 if (NONJUMP_INSN_P (branch)) | |
2815 offset += get_attr_length (branch); | |
2816 if (offset > 0) | |
2817 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset); | |
2818 } | |
2768 | 2819 |
2769 if (spu_flag_var_tracking) | 2820 if (spu_flag_var_tracking) |
2770 { | 2821 { |
2771 df_analyze (); | 2822 df_analyze (); |
2772 timevar_push (TV_VAR_TRACKING); | 2823 timevar_push (TV_VAR_TRACKING); |
2970 for (i = nready - 1; i >= 0; i--) | 3021 for (i = nready - 1; i >= 0; i--) |
2971 { | 3022 { |
2972 insn = ready[i]; | 3023 insn = ready[i]; |
2973 if (INSN_CODE (insn) == -1 | 3024 if (INSN_CODE (insn) == -1 |
2974 || INSN_CODE (insn) == CODE_FOR_blockage | 3025 || INSN_CODE (insn) == CODE_FOR_blockage |
2975 || INSN_CODE (insn) == CODE_FOR__spu_convert) | 3026 || (INSN_P (insn) && get_attr_length (insn) == 0)) |
2976 { | 3027 { |
2977 ready[i] = ready[nready - 1]; | 3028 ready[i] = ready[nready - 1]; |
2978 ready[nready - 1] = insn; | 3029 ready[nready - 1] = insn; |
2979 return 1; | 3030 return 1; |
2980 } | 3031 } |
3101 moved across it and has no cost. */ | 3152 moved across it and has no cost. */ |
3102 if (INSN_CODE (insn) == CODE_FOR_blockage | 3153 if (INSN_CODE (insn) == CODE_FOR_blockage |
3103 || INSN_CODE (dep_insn) == CODE_FOR_blockage) | 3154 || INSN_CODE (dep_insn) == CODE_FOR_blockage) |
3104 return 0; | 3155 return 0; |
3105 | 3156 |
3106 if (INSN_CODE (insn) == CODE_FOR__spu_convert | 3157 if ((INSN_P (insn) && get_attr_length (insn) == 0) |
3107 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert) | 3158 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0)) |
3108 return 0; | 3159 return 0; |
3109 | 3160 |
3110 /* Make sure hbrps are spread out. */ | 3161 /* Make sure hbrps are spread out. */ |
3111 if (INSN_CODE (insn) == CODE_FOR_iprefetch | 3162 if (INSN_CODE (insn) == CODE_FOR_iprefetch |
3112 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch) | 3163 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch) |
3501 val = trunc_int_for_mode (val, mode); | 3552 val = trunc_int_for_mode (val, mode); |
3502 | 3553 |
3503 return val >= low && val <= high; | 3554 return val >= low && val <= high; |
3504 } | 3555 } |
3505 | 3556 |
3557 /* TRUE when op is an immediate and an exact power of 2, and given that | |
3558 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector, | |
3559 all entries must be the same. */ | |
3560 bool | |
3561 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high) | |
3562 { | |
3563 enum machine_mode int_mode; | |
3564 HOST_WIDE_INT val; | |
3565 unsigned char arr[16]; | |
3566 int bytes, i, j; | |
3567 | |
3568 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE | |
3569 || GET_CODE (op) == CONST_VECTOR); | |
3570 | |
3571 if (GET_CODE (op) == CONST_VECTOR | |
3572 && !const_vector_immediate_p (op)) | |
3573 return 0; | |
3574 | |
3575 if (GET_MODE (op) != VOIDmode) | |
3576 mode = GET_MODE (op); | |
3577 | |
3578 constant_to_array (mode, op, arr); | |
3579 | |
3580 if (VECTOR_MODE_P (mode)) | |
3581 mode = GET_MODE_INNER (mode); | |
3582 | |
3583 bytes = GET_MODE_SIZE (mode); | |
3584 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); | |
3585 | |
3586 /* Check that bytes are repeated. */ | |
3587 for (i = bytes; i < 16; i += bytes) | |
3588 for (j = 0; j < bytes; j++) | |
3589 if (arr[j] != arr[i + j]) | |
3590 return 0; | |
3591 | |
3592 val = arr[0]; | |
3593 for (j = 1; j < bytes; j++) | |
3594 val = (val << 8) | arr[j]; | |
3595 | |
3596 val = trunc_int_for_mode (val, int_mode); | |
3597 | |
3598 /* Currently, we only handle SFmode */ | |
3599 gcc_assert (mode == SFmode); | |
3600 if (mode == SFmode) | |
3601 { | |
3602 int exp = (val >> 23) - 127; | |
3603 return val > 0 && (val & 0x007fffff) == 0 | |
3604 && exp >= low && exp <= high; | |
3605 } | |
3606 return FALSE; | |
3607 } | |
3608 | |
3506 /* We accept: | 3609 /* We accept: |
3507 - any 32-bit constant (SImode, SFmode) | 3610 - any 32-bit constant (SImode, SFmode) |
3508 - any constant that can be generated with fsmbi (any mode) | 3611 - any constant that can be generated with fsmbi (any mode) |
3509 - a 64-bit constant where the high and low bits are identical | 3612 - a 64-bit constant where the high and low bits are identical |
3510 (DImode, DFmode) | 3613 (DImode, DFmode) |
3531 } | 3634 } |
3532 | 3635 |
3533 /* Valid address are: | 3636 /* Valid address are: |
3534 - symbol_ref, label_ref, const | 3637 - symbol_ref, label_ref, const |
3535 - reg | 3638 - reg |
3536 - reg + const, where either reg or const is 16 byte aligned | 3639 - reg + const_int, where const_int is 16 byte aligned |
3537 - reg + reg, alignment doesn't matter | 3640 - reg + reg, alignment doesn't matter |
3538 The alignment matters in the reg+const case because lqd and stqd | 3641 The alignment matters in the reg+const case because lqd and stqd |
3539 ignore the 4 least significant bits of the const. (TODO: It might be | 3642 ignore the 4 least significant bits of the const. We only care about |
3540 preferable to allow any alignment and fix it up when splitting.) */ | 3643 16 byte modes because the expand phase will change all smaller MEM |
3644 references to TImode. */ | |
3541 int | 3645 int |
3542 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED, | 3646 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED, |
3543 rtx x, int reg_ok_strict) | 3647 rtx x, int reg_ok_strict) |
3544 { | 3648 { |
3545 if (mode == TImode && GET_CODE (x) == AND | 3649 int aligned = GET_MODE_SIZE (mode) >= 16; |
3650 if (aligned | |
3651 && GET_CODE (x) == AND | |
3546 && GET_CODE (XEXP (x, 1)) == CONST_INT | 3652 && GET_CODE (XEXP (x, 1)) == CONST_INT |
3547 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16) | 3653 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16) |
3548 x = XEXP (x, 0); | 3654 x = XEXP (x, 0); |
3549 switch (GET_CODE (x)) | 3655 switch (GET_CODE (x)) |
3550 { | 3656 { |
3657 case LABEL_REF: | |
3551 case SYMBOL_REF: | 3658 case SYMBOL_REF: |
3552 case LABEL_REF: | 3659 case CONST: |
3553 return !TARGET_LARGE_MEM; | 3660 return !TARGET_LARGE_MEM; |
3554 | |
3555 case CONST: | |
3556 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS) | |
3557 { | |
3558 rtx sym = XEXP (XEXP (x, 0), 0); | |
3559 rtx cst = XEXP (XEXP (x, 0), 1); | |
3560 | |
3561 /* Accept any symbol_ref + constant, assuming it does not | |
3562 wrap around the local store addressability limit. */ | |
3563 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT) | |
3564 return 1; | |
3565 } | |
3566 return 0; | |
3567 | 3661 |
3568 case CONST_INT: | 3662 case CONST_INT: |
3569 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; | 3663 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; |
3570 | 3664 |
3571 case SUBREG: | 3665 case SUBREG: |
3572 x = XEXP (x, 0); | 3666 x = XEXP (x, 0); |
3573 gcc_assert (GET_CODE (x) == REG); | 3667 if (REG_P (x)) |
3668 return 0; | |
3574 | 3669 |
3575 case REG: | 3670 case REG: |
3576 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict); | 3671 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict); |
3577 | 3672 |
3578 case PLUS: | 3673 case PLUS: |
3582 rtx op1 = XEXP (x, 1); | 3677 rtx op1 = XEXP (x, 1); |
3583 if (GET_CODE (op0) == SUBREG) | 3678 if (GET_CODE (op0) == SUBREG) |
3584 op0 = XEXP (op0, 0); | 3679 op0 = XEXP (op0, 0); |
3585 if (GET_CODE (op1) == SUBREG) | 3680 if (GET_CODE (op1) == SUBREG) |
3586 op1 = XEXP (op1, 0); | 3681 op1 = XEXP (op1, 0); |
3587 /* We can't just accept any aligned register because CSE can | |
3588 change it to a register that is not marked aligned and then | |
3589 recog will fail. So we only accept frame registers because | |
3590 they will only be changed to other frame registers. */ | |
3591 if (GET_CODE (op0) == REG | 3682 if (GET_CODE (op0) == REG |
3592 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) | 3683 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) |
3593 && GET_CODE (op1) == CONST_INT | 3684 && GET_CODE (op1) == CONST_INT |
3594 && INTVAL (op1) >= -0x2000 | 3685 && INTVAL (op1) >= -0x2000 |
3595 && INTVAL (op1) <= 0x1fff | 3686 && INTVAL (op1) <= 0x1fff |
3596 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0)) | 3687 && (!aligned || (INTVAL (op1) & 15) == 0)) |
3597 return 1; | 3688 return TRUE; |
3598 if (GET_CODE (op0) == REG | 3689 if (GET_CODE (op0) == REG |
3599 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) | 3690 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) |
3600 && GET_CODE (op1) == REG | 3691 && GET_CODE (op1) == REG |
3601 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)) | 3692 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)) |
3602 return 1; | 3693 return TRUE; |
3603 } | 3694 } |
3604 break; | 3695 break; |
3605 | 3696 |
3606 default: | 3697 default: |
3607 break; | 3698 break; |
3608 } | 3699 } |
3609 return 0; | 3700 return FALSE; |
3610 } | 3701 } |
3611 | 3702 |
3612 /* When the address is reg + const_int, force the const_int into a | 3703 /* When the address is reg + const_int, force the const_int into a |
3613 register. */ | 3704 register. */ |
3614 rtx | 3705 rtx |
4059 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; | 4150 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; |
4060 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; | 4151 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; |
4061 } | 4152 } |
4062 } | 4153 } |
4063 | 4154 |
4064 /* This is called to decide when we can simplify a load instruction. We | 4155 /* This is called any time we inspect the alignment of a register for |
4065 must only return true for registers which we know will always be | 4156 addresses. */ |
4066 aligned. Taking into account that CSE might replace this reg with | |
4067 another one that has not been marked aligned. | |
4068 So this is really only true for frame, stack and virtual registers, | |
4069 which we know are always aligned and should not be adversely effected | |
4070 by CSE. */ | |
4071 static int | 4157 static int |
4072 regno_aligned_for_load (int regno) | 4158 reg_aligned_for_addr (rtx x) |
4073 { | 4159 { |
4074 return regno == FRAME_POINTER_REGNUM | 4160 int regno = |
4075 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM) | 4161 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x); |
4076 || regno == ARG_POINTER_REGNUM | 4162 return REGNO_POINTER_ALIGN (regno) >= 128; |
4077 || regno == STACK_POINTER_REGNUM | |
4078 || (regno >= FIRST_VIRTUAL_REGISTER | |
4079 && regno <= LAST_VIRTUAL_REGISTER); | |
4080 } | |
4081 | |
4082 /* Return TRUE when mem is known to be 16-byte aligned. */ | |
4083 int | |
4084 aligned_mem_p (rtx mem) | |
4085 { | |
4086 if (MEM_ALIGN (mem) >= 128) | |
4087 return 1; | |
4088 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16) | |
4089 return 1; | |
4090 if (GET_CODE (XEXP (mem, 0)) == PLUS) | |
4091 { | |
4092 rtx p0 = XEXP (XEXP (mem, 0), 0); | |
4093 rtx p1 = XEXP (XEXP (mem, 0), 1); | |
4094 if (regno_aligned_for_load (REGNO (p0))) | |
4095 { | |
4096 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1))) | |
4097 return 1; | |
4098 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0) | |
4099 return 1; | |
4100 } | |
4101 } | |
4102 else if (GET_CODE (XEXP (mem, 0)) == REG) | |
4103 { | |
4104 if (regno_aligned_for_load (REGNO (XEXP (mem, 0)))) | |
4105 return 1; | |
4106 } | |
4107 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0))) | |
4108 return 1; | |
4109 else if (GET_CODE (XEXP (mem, 0)) == CONST) | |
4110 { | |
4111 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0); | |
4112 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1); | |
4113 if (GET_CODE (p0) == SYMBOL_REF | |
4114 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0) | |
4115 return 1; | |
4116 } | |
4117 return 0; | |
4118 } | 4163 } |
4119 | 4164 |
4120 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF | 4165 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF |
4121 into its SYMBOL_REF_FLAGS. */ | 4166 into its SYMBOL_REF_FLAGS. */ |
4122 static void | 4167 static void |
4141 alignment of the parameter mode and in that case the alignment never | 4186 alignment of the parameter mode and in that case the alignment never |
4142 gets adjusted by LOCAL_ALIGNMENT. */ | 4187 gets adjusted by LOCAL_ALIGNMENT. */ |
4143 static int | 4188 static int |
4144 store_with_one_insn_p (rtx mem) | 4189 store_with_one_insn_p (rtx mem) |
4145 { | 4190 { |
4191 enum machine_mode mode = GET_MODE (mem); | |
4146 rtx addr = XEXP (mem, 0); | 4192 rtx addr = XEXP (mem, 0); |
4147 if (GET_MODE (mem) == BLKmode) | 4193 if (mode == BLKmode) |
4148 return 0; | 4194 return 0; |
4195 if (GET_MODE_SIZE (mode) >= 16) | |
4196 return 1; | |
4149 /* Only static objects. */ | 4197 /* Only static objects. */ |
4150 if (GET_CODE (addr) == SYMBOL_REF) | 4198 if (GET_CODE (addr) == SYMBOL_REF) |
4151 { | 4199 { |
4152 /* We use the associated declaration to make sure the access is | 4200 /* We use the associated declaration to make sure the access is |
4153 referring to the whole object. | 4201 referring to the whole object. |
4167 return 1; | 4215 return 1; |
4168 } | 4216 } |
4169 return 0; | 4217 return 0; |
4170 } | 4218 } |
4171 | 4219 |
4220 /* Return 1 when the address is not valid for a simple load and store as | |
4221 required by the '_mov*' patterns. We could make this less strict | |
4222 for loads, but we prefer mem's to look the same so they are more | |
4223 likely to be merged. */ | |
4224 static int | |
4225 address_needs_split (rtx mem) | |
4226 { | |
4227 if (GET_MODE_SIZE (GET_MODE (mem)) < 16 | |
4228 && (GET_MODE_SIZE (GET_MODE (mem)) < 4 | |
4229 || !(store_with_one_insn_p (mem) | |
4230 || mem_is_padded_component_ref (mem)))) | |
4231 return 1; | |
4232 | |
4233 return 0; | |
4234 } | |
4235 | |
4172 int | 4236 int |
4173 spu_expand_mov (rtx * ops, enum machine_mode mode) | 4237 spu_expand_mov (rtx * ops, enum machine_mode mode) |
4174 { | 4238 { |
4175 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0])) | 4239 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0])) |
4176 abort (); | 4240 abort (); |
4211 { | 4275 { |
4212 if (CONSTANT_P (ops[1])) | 4276 if (CONSTANT_P (ops[1])) |
4213 return spu_split_immediate (ops); | 4277 return spu_split_immediate (ops); |
4214 return 0; | 4278 return 0; |
4215 } | 4279 } |
4216 else | 4280 |
4217 { | 4281 /* Catch the SImode immediates greater than 0x7fffffff, and sign |
4218 if (GET_CODE (ops[0]) == MEM) | 4282 extend them. */ |
4219 { | 4283 if (GET_CODE (ops[1]) == CONST_INT) |
4220 if (!spu_valid_move (ops)) | 4284 { |
4221 { | 4285 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode); |
4222 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode), | 4286 if (val != INTVAL (ops[1])) |
4223 gen_reg_rtx (TImode))); | 4287 { |
4224 return 1; | 4288 emit_move_insn (ops[0], GEN_INT (val)); |
4225 } | 4289 return 1; |
4226 } | 4290 } |
4227 else if (GET_CODE (ops[1]) == MEM) | 4291 } |
4228 { | 4292 if (MEM_P (ops[0])) |
4229 if (!spu_valid_move (ops)) | 4293 return spu_split_store (ops); |
4230 { | 4294 if (MEM_P (ops[1])) |
4231 emit_insn (gen_load | 4295 return spu_split_load (ops); |
4232 (ops[0], ops[1], gen_reg_rtx (TImode), | 4296 |
4233 gen_reg_rtx (SImode))); | |
4234 return 1; | |
4235 } | |
4236 } | |
4237 /* Catch the SImode immediates greater than 0x7fffffff, and sign | |
4238 extend them. */ | |
4239 if (GET_CODE (ops[1]) == CONST_INT) | |
4240 { | |
4241 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode); | |
4242 if (val != INTVAL (ops[1])) | |
4243 { | |
4244 emit_move_insn (ops[0], GEN_INT (val)); | |
4245 return 1; | |
4246 } | |
4247 } | |
4248 } | |
4249 return 0; | 4297 return 0; |
4250 } | 4298 } |
4251 | 4299 |
4252 void | 4300 static void |
4253 spu_split_load (rtx * ops) | 4301 spu_convert_move (rtx dst, rtx src) |
4254 { | 4302 { |
4255 enum machine_mode mode = GET_MODE (ops[0]); | 4303 enum machine_mode mode = GET_MODE (dst); |
4256 rtx addr, load, rot, mem, p0, p1; | 4304 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0); |
4305 rtx reg; | |
4306 gcc_assert (GET_MODE (src) == TImode); | |
4307 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst; | |
4308 emit_insn (gen_rtx_SET (VOIDmode, reg, | |
4309 gen_rtx_TRUNCATE (int_mode, | |
4310 gen_rtx_LSHIFTRT (TImode, src, | |
4311 GEN_INT (int_mode == DImode ? 64 : 96))))); | |
4312 if (int_mode != mode) | |
4313 { | |
4314 reg = simplify_gen_subreg (mode, reg, int_mode, 0); | |
4315 emit_move_insn (dst, reg); | |
4316 } | |
4317 } | |
4318 | |
4319 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using | |
4320 the address from SRC and SRC+16. Return a REG or CONST_INT that | |
4321 specifies how many bytes to rotate the loaded registers, plus any | |
4322 extra from EXTRA_ROTQBY. The address and rotate amounts are | |
4323 normalized to improve merging of loads and rotate computations. */ | |
4324 static rtx | |
4325 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby) | |
4326 { | |
4327 rtx addr = XEXP (src, 0); | |
4328 rtx p0, p1, rot, addr0, addr1; | |
4257 int rot_amt; | 4329 int rot_amt; |
4258 | |
4259 addr = XEXP (ops[1], 0); | |
4260 | 4330 |
4261 rot = 0; | 4331 rot = 0; |
4262 rot_amt = 0; | 4332 rot_amt = 0; |
4263 if (GET_CODE (addr) == PLUS) | 4333 |
4334 if (MEM_ALIGN (src) >= 128) | |
4335 /* Address is already aligned; simply perform a TImode load. */ ; | |
4336 else if (GET_CODE (addr) == PLUS) | |
4264 { | 4337 { |
4265 /* 8 cases: | 4338 /* 8 cases: |
4266 aligned reg + aligned reg => lqx | 4339 aligned reg + aligned reg => lqx |
4267 aligned reg + unaligned reg => lqx, rotqby | 4340 aligned reg + unaligned reg => lqx, rotqby |
4268 aligned reg + aligned const => lqd | 4341 aligned reg + aligned const => lqd |
4272 unaligned reg + aligned const => lqd, rotqby | 4345 unaligned reg + aligned const => lqd, rotqby |
4273 unaligned reg + unaligned const -> not allowed by legitimate address | 4346 unaligned reg + unaligned const -> not allowed by legitimate address |
4274 */ | 4347 */ |
4275 p0 = XEXP (addr, 0); | 4348 p0 = XEXP (addr, 0); |
4276 p1 = XEXP (addr, 1); | 4349 p1 = XEXP (addr, 1); |
4277 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0))) | 4350 if (!reg_aligned_for_addr (p0)) |
4278 { | 4351 { |
4279 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1))) | 4352 if (REG_P (p1) && !reg_aligned_for_addr (p1)) |
4280 { | 4353 { |
4281 emit_insn (gen_addsi3 (ops[3], p0, p1)); | 4354 rot = gen_reg_rtx (SImode); |
4282 rot = ops[3]; | 4355 emit_insn (gen_addsi3 (rot, p0, p1)); |
4356 } | |
4357 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) | |
4358 { | |
4359 if (INTVAL (p1) > 0 | |
4360 && REG_POINTER (p0) | |
4361 && INTVAL (p1) * BITS_PER_UNIT | |
4362 < REGNO_POINTER_ALIGN (REGNO (p0))) | |
4363 { | |
4364 rot = gen_reg_rtx (SImode); | |
4365 emit_insn (gen_addsi3 (rot, p0, p1)); | |
4366 addr = p0; | |
4367 } | |
4368 else | |
4369 { | |
4370 rtx x = gen_reg_rtx (SImode); | |
4371 emit_move_insn (x, p1); | |
4372 if (!spu_arith_operand (p1, SImode)) | |
4373 p1 = x; | |
4374 rot = gen_reg_rtx (SImode); | |
4375 emit_insn (gen_addsi3 (rot, p0, p1)); | |
4376 addr = gen_rtx_PLUS (Pmode, p0, x); | |
4377 } | |
4283 } | 4378 } |
4284 else | 4379 else |
4285 rot = p0; | 4380 rot = p0; |
4286 } | 4381 } |
4287 else | 4382 else |
4288 { | 4383 { |
4289 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) | 4384 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) |
4290 { | 4385 { |
4291 rot_amt = INTVAL (p1) & 15; | 4386 rot_amt = INTVAL (p1) & 15; |
4292 p1 = GEN_INT (INTVAL (p1) & -16); | 4387 if (INTVAL (p1) & -16) |
4293 addr = gen_rtx_PLUS (SImode, p0, p1); | 4388 { |
4389 p1 = GEN_INT (INTVAL (p1) & -16); | |
4390 addr = gen_rtx_PLUS (SImode, p0, p1); | |
4391 } | |
4392 else | |
4393 addr = p0; | |
4294 } | 4394 } |
4295 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1))) | 4395 else if (REG_P (p1) && !reg_aligned_for_addr (p1)) |
4296 rot = p1; | 4396 rot = p1; |
4297 } | 4397 } |
4298 } | 4398 } |
4299 else if (GET_CODE (addr) == REG) | 4399 else if (REG_P (addr)) |
4300 { | 4400 { |
4301 if (!regno_aligned_for_load (REGNO (addr))) | 4401 if (!reg_aligned_for_addr (addr)) |
4302 rot = addr; | 4402 rot = addr; |
4303 } | 4403 } |
4304 else if (GET_CODE (addr) == CONST) | 4404 else if (GET_CODE (addr) == CONST) |
4305 { | 4405 { |
4306 if (GET_CODE (XEXP (addr, 0)) == PLUS | 4406 if (GET_CODE (XEXP (addr, 0)) == PLUS |
4315 GEN_INT (rot_amt & -16))); | 4415 GEN_INT (rot_amt & -16))); |
4316 else | 4416 else |
4317 addr = XEXP (XEXP (addr, 0), 0); | 4417 addr = XEXP (XEXP (addr, 0), 0); |
4318 } | 4418 } |
4319 else | 4419 else |
4320 rot = addr; | 4420 { |
4421 rot = gen_reg_rtx (Pmode); | |
4422 emit_move_insn (rot, addr); | |
4423 } | |
4321 } | 4424 } |
4322 else if (GET_CODE (addr) == CONST_INT) | 4425 else if (GET_CODE (addr) == CONST_INT) |
4323 { | 4426 { |
4324 rot_amt = INTVAL (addr); | 4427 rot_amt = INTVAL (addr); |
4325 addr = GEN_INT (rot_amt & -16); | 4428 addr = GEN_INT (rot_amt & -16); |
4326 } | 4429 } |
4327 else if (!ALIGNED_SYMBOL_REF_P (addr)) | 4430 else if (!ALIGNED_SYMBOL_REF_P (addr)) |
4328 rot = addr; | 4431 { |
4329 | 4432 rot = gen_reg_rtx (Pmode); |
4330 if (GET_MODE_SIZE (mode) < 4) | 4433 emit_move_insn (rot, addr); |
4331 rot_amt += GET_MODE_SIZE (mode) - 4; | 4434 } |
4435 | |
4436 rot_amt += extra_rotby; | |
4332 | 4437 |
4333 rot_amt &= 15; | 4438 rot_amt &= 15; |
4334 | 4439 |
4335 if (rot && rot_amt) | 4440 if (rot && rot_amt) |
4336 { | 4441 { |
4337 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt))); | 4442 rtx x = gen_reg_rtx (SImode); |
4338 rot = ops[3]; | 4443 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt))); |
4444 rot = x; | |
4339 rot_amt = 0; | 4445 rot_amt = 0; |
4340 } | 4446 } |
4341 | 4447 if (!rot && rot_amt) |
4342 load = ops[2]; | 4448 rot = GEN_INT (rot_amt); |
4343 | 4449 |
4344 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); | 4450 addr0 = copy_rtx (addr); |
4345 mem = change_address (ops[1], TImode, addr); | 4451 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); |
4346 | 4452 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0))); |
4347 emit_insn (gen_movti (load, mem)); | 4453 |
4454 if (dst1) | |
4455 { | |
4456 addr1 = plus_constant (copy_rtx (addr), 16); | |
4457 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16)); | |
4458 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1))); | |
4459 } | |
4460 | |
4461 return rot; | |
4462 } | |
4463 | |
4464 int | |
4465 spu_split_load (rtx * ops) | |
4466 { | |
4467 enum machine_mode mode = GET_MODE (ops[0]); | |
4468 rtx addr, load, rot; | |
4469 int rot_amt; | |
4470 | |
4471 if (GET_MODE_SIZE (mode) >= 16) | |
4472 return 0; | |
4473 | |
4474 addr = XEXP (ops[1], 0); | |
4475 gcc_assert (GET_CODE (addr) != AND); | |
4476 | |
4477 if (!address_needs_split (ops[1])) | |
4478 { | |
4479 ops[1] = change_address (ops[1], TImode, addr); | |
4480 load = gen_reg_rtx (TImode); | |
4481 emit_insn (gen__movti (load, ops[1])); | |
4482 spu_convert_move (ops[0], load); | |
4483 return 1; | |
4484 } | |
4485 | |
4486 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0; | |
4487 | |
4488 load = gen_reg_rtx (TImode); | |
4489 rot = spu_expand_load (load, 0, ops[1], rot_amt); | |
4348 | 4490 |
4349 if (rot) | 4491 if (rot) |
4350 emit_insn (gen_rotqby_ti (load, load, rot)); | 4492 emit_insn (gen_rotqby_ti (load, load, rot)); |
4351 else if (rot_amt) | 4493 |
4352 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8))); | 4494 spu_convert_move (ops[0], load); |
4353 | 4495 return 1; |
4354 if (reload_completed) | 4496 } |
4355 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load))); | 4497 |
4356 else | 4498 int |
4357 emit_insn (gen_spu_convert (ops[0], load)); | |
4358 } | |
4359 | |
4360 void | |
4361 spu_split_store (rtx * ops) | 4499 spu_split_store (rtx * ops) |
4362 { | 4500 { |
4363 enum machine_mode mode = GET_MODE (ops[0]); | 4501 enum machine_mode mode = GET_MODE (ops[0]); |
4364 rtx pat = ops[2]; | 4502 rtx reg; |
4365 rtx reg = ops[3]; | |
4366 rtx addr, p0, p1, p1_lo, smem; | 4503 rtx addr, p0, p1, p1_lo, smem; |
4367 int aform; | 4504 int aform; |
4368 int scalar; | 4505 int scalar; |
4369 | 4506 |
4507 if (GET_MODE_SIZE (mode) >= 16) | |
4508 return 0; | |
4509 | |
4370 addr = XEXP (ops[0], 0); | 4510 addr = XEXP (ops[0], 0); |
4511 gcc_assert (GET_CODE (addr) != AND); | |
4512 | |
4513 if (!address_needs_split (ops[0])) | |
4514 { | |
4515 reg = gen_reg_rtx (TImode); | |
4516 emit_insn (gen_spu_convert (reg, ops[1])); | |
4517 ops[0] = change_address (ops[0], TImode, addr); | |
4518 emit_move_insn (ops[0], reg); | |
4519 return 1; | |
4520 } | |
4371 | 4521 |
4372 if (GET_CODE (addr) == PLUS) | 4522 if (GET_CODE (addr) == PLUS) |
4373 { | 4523 { |
4374 /* 8 cases: | 4524 /* 8 cases: |
4375 aligned reg + aligned reg => lqx, c?x, shuf, stqx | 4525 aligned reg + aligned reg => lqx, c?x, shuf, stqx |
4377 aligned reg + aligned const => lqd, c?d, shuf, stqx | 4527 aligned reg + aligned const => lqd, c?d, shuf, stqx |
4378 aligned reg + unaligned const => lqd, c?d, shuf, stqx | 4528 aligned reg + unaligned const => lqd, c?d, shuf, stqx |
4379 unaligned reg + aligned reg => lqx, c?x, shuf, stqx | 4529 unaligned reg + aligned reg => lqx, c?x, shuf, stqx |
4380 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx | 4530 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx |
4381 unaligned reg + aligned const => lqd, c?d, shuf, stqx | 4531 unaligned reg + aligned const => lqd, c?d, shuf, stqx |
4382 unaligned reg + unaligned const -> not allowed by legitimate address | 4532 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx |
4383 */ | 4533 */ |
4384 aform = 0; | 4534 aform = 0; |
4385 p0 = XEXP (addr, 0); | 4535 p0 = XEXP (addr, 0); |
4386 p1 = p1_lo = XEXP (addr, 1); | 4536 p1 = p1_lo = XEXP (addr, 1); |
4387 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT) | 4537 if (REG_P (p0) && GET_CODE (p1) == CONST_INT) |
4388 { | 4538 { |
4389 p1_lo = GEN_INT (INTVAL (p1) & 15); | 4539 p1_lo = GEN_INT (INTVAL (p1) & 15); |
4390 p1 = GEN_INT (INTVAL (p1) & -16); | 4540 if (reg_aligned_for_addr (p0)) |
4391 addr = gen_rtx_PLUS (SImode, p0, p1); | 4541 { |
4392 } | 4542 p1 = GEN_INT (INTVAL (p1) & -16); |
4393 } | 4543 if (p1 == const0_rtx) |
4394 else if (GET_CODE (addr) == REG) | 4544 addr = p0; |
4545 else | |
4546 addr = gen_rtx_PLUS (SImode, p0, p1); | |
4547 } | |
4548 else | |
4549 { | |
4550 rtx x = gen_reg_rtx (SImode); | |
4551 emit_move_insn (x, p1); | |
4552 addr = gen_rtx_PLUS (SImode, p0, x); | |
4553 } | |
4554 } | |
4555 } | |
4556 else if (REG_P (addr)) | |
4395 { | 4557 { |
4396 aform = 0; | 4558 aform = 0; |
4397 p0 = addr; | 4559 p0 = addr; |
4398 p1 = p1_lo = const0_rtx; | 4560 p1 = p1_lo = const0_rtx; |
4399 } | 4561 } |
4403 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); | 4565 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); |
4404 p1 = 0; /* aform doesn't use p1 */ | 4566 p1 = 0; /* aform doesn't use p1 */ |
4405 p1_lo = addr; | 4567 p1_lo = addr; |
4406 if (ALIGNED_SYMBOL_REF_P (addr)) | 4568 if (ALIGNED_SYMBOL_REF_P (addr)) |
4407 p1_lo = const0_rtx; | 4569 p1_lo = const0_rtx; |
4408 else if (GET_CODE (addr) == CONST) | 4570 else if (GET_CODE (addr) == CONST |
4409 { | 4571 && GET_CODE (XEXP (addr, 0)) == PLUS |
4410 if (GET_CODE (XEXP (addr, 0)) == PLUS | 4572 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) |
4411 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) | 4573 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) |
4412 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) | 4574 { |
4413 { | 4575 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); |
4414 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); | 4576 if ((v & -16) != 0) |
4415 if ((v & -16) != 0) | 4577 addr = gen_rtx_CONST (Pmode, |
4416 addr = gen_rtx_CONST (Pmode, | 4578 gen_rtx_PLUS (Pmode, |
4417 gen_rtx_PLUS (Pmode, | 4579 XEXP (XEXP (addr, 0), 0), |
4418 XEXP (XEXP (addr, 0), 0), | 4580 GEN_INT (v & -16))); |
4419 GEN_INT (v & -16))); | 4581 else |
4420 else | 4582 addr = XEXP (XEXP (addr, 0), 0); |
4421 addr = XEXP (XEXP (addr, 0), 0); | 4583 p1_lo = GEN_INT (v & 15); |
4422 p1_lo = GEN_INT (v & 15); | |
4423 } | |
4424 } | 4584 } |
4425 else if (GET_CODE (addr) == CONST_INT) | 4585 else if (GET_CODE (addr) == CONST_INT) |
4426 { | 4586 { |
4427 p1_lo = GEN_INT (INTVAL (addr) & 15); | 4587 p1_lo = GEN_INT (INTVAL (addr) & 15); |
4428 addr = GEN_INT (INTVAL (addr) & -16); | 4588 addr = GEN_INT (INTVAL (addr) & -16); |
4429 } | 4589 } |
4430 } | 4590 else |
4431 | 4591 { |
4432 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); | 4592 p1_lo = gen_reg_rtx (SImode); |
4593 emit_move_insn (p1_lo, addr); | |
4594 } | |
4595 } | |
4596 | |
4597 reg = gen_reg_rtx (TImode); | |
4433 | 4598 |
4434 scalar = store_with_one_insn_p (ops[0]); | 4599 scalar = store_with_one_insn_p (ops[0]); |
4435 if (!scalar) | 4600 if (!scalar) |
4436 { | 4601 { |
4437 /* We could copy the flags from the ops[0] MEM to mem here, | 4602 /* We could copy the flags from the ops[0] MEM to mem here, |
4438 We don't because we want this load to be optimized away if | 4603 We don't because we want this load to be optimized away if |
4439 possible, and copying the flags will prevent that in certain | 4604 possible, and copying the flags will prevent that in certain |
4440 cases, e.g. consider the volatile flag. */ | 4605 cases, e.g. consider the volatile flag. */ |
4441 | 4606 |
4607 rtx pat = gen_reg_rtx (TImode); | |
4442 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); | 4608 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); |
4443 set_mem_alias_set (lmem, 0); | 4609 set_mem_alias_set (lmem, 0); |
4444 emit_insn (gen_movti (reg, lmem)); | 4610 emit_insn (gen_movti (reg, lmem)); |
4445 | 4611 |
4446 if (!p0 || regno_aligned_for_load (REGNO (p0))) | 4612 if (!p0 || reg_aligned_for_addr (p0)) |
4447 p0 = stack_pointer_rtx; | 4613 p0 = stack_pointer_rtx; |
4448 if (!p1_lo) | 4614 if (!p1_lo) |
4449 p1_lo = const0_rtx; | 4615 p1_lo = const0_rtx; |
4450 | 4616 |
4451 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode)))); | 4617 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode)))); |
4452 emit_insn (gen_shufb (reg, ops[1], reg, pat)); | 4618 emit_insn (gen_shufb (reg, ops[1], reg, pat)); |
4453 } | |
4454 else if (reload_completed) | |
4455 { | |
4456 if (GET_CODE (ops[1]) == REG) | |
4457 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1]))); | |
4458 else if (GET_CODE (ops[1]) == SUBREG) | |
4459 emit_move_insn (reg, | |
4460 gen_rtx_REG (GET_MODE (reg), | |
4461 REGNO (SUBREG_REG (ops[1])))); | |
4462 else | |
4463 abort (); | |
4464 } | 4619 } |
4465 else | 4620 else |
4466 { | 4621 { |
4467 if (GET_CODE (ops[1]) == REG) | 4622 if (GET_CODE (ops[1]) == REG) |
4468 emit_insn (gen_spu_convert (reg, ops[1])); | 4623 emit_insn (gen_spu_convert (reg, ops[1])); |
4471 else | 4626 else |
4472 abort (); | 4627 abort (); |
4473 } | 4628 } |
4474 | 4629 |
4475 if (GET_MODE_SIZE (mode) < 4 && scalar) | 4630 if (GET_MODE_SIZE (mode) < 4 && scalar) |
4476 emit_insn (gen_shlqby_ti | 4631 emit_insn (gen_ashlti3 |
4477 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode)))); | 4632 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode)))); |
4478 | 4633 |
4479 smem = change_address (ops[0], TImode, addr); | 4634 smem = change_address (ops[0], TImode, copy_rtx (addr)); |
4480 /* We can't use the previous alias set because the memory has changed | 4635 /* We can't use the previous alias set because the memory has changed |
4481 size and can potentially overlap objects of other types. */ | 4636 size and can potentially overlap objects of other types. */ |
4482 set_mem_alias_set (smem, 0); | 4637 set_mem_alias_set (smem, 0); |
4483 | 4638 |
4484 emit_insn (gen_movti (smem, reg)); | 4639 emit_insn (gen_movti (smem, reg)); |
4640 return 1; | |
4485 } | 4641 } |
4486 | 4642 |
4487 /* Return TRUE if X is MEM which is a struct member reference | 4643 /* Return TRUE if X is MEM which is a struct member reference |
4488 and the member can safely be loaded and stored with a single | 4644 and the member can safely be loaded and stored with a single |
4489 instruction because it is padded. */ | 4645 instruction because it is padded. */ |
4576 break; | 4732 break; |
4577 | 4733 |
4578 *comma = ','; | 4734 *comma = ','; |
4579 str = comma + 1; | 4735 str = comma + 1; |
4580 } | 4736 } |
4581 } | |
4582 | |
4583 int | |
4584 spu_valid_move (rtx * ops) | |
4585 { | |
4586 enum machine_mode mode = GET_MODE (ops[0]); | |
4587 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode)) | |
4588 return 0; | |
4589 | |
4590 /* init_expr_once tries to recog against load and store insns to set | |
4591 the direct_load[] and direct_store[] arrays. We always want to | |
4592 consider those loads and stores valid. init_expr_once is called in | |
4593 the context of a dummy function which does not have a decl. */ | |
4594 if (cfun->decl == 0) | |
4595 return 1; | |
4596 | |
4597 /* Don't allows loads/stores which would require more than 1 insn. | |
4598 During and after reload we assume loads and stores only take 1 | |
4599 insn. */ | |
4600 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed) | |
4601 { | |
4602 if (GET_CODE (ops[0]) == MEM | |
4603 && (GET_MODE_SIZE (mode) < 4 | |
4604 || !(store_with_one_insn_p (ops[0]) | |
4605 || mem_is_padded_component_ref (ops[0])))) | |
4606 return 0; | |
4607 if (GET_CODE (ops[1]) == MEM | |
4608 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1]))) | |
4609 return 0; | |
4610 } | |
4611 return 1; | |
4612 } | 4737 } |
4613 | 4738 |
4614 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that | 4739 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that |
4615 can be generated using the fsmbi instruction. */ | 4740 can be generated using the fsmbi instruction. */ |
4616 int | 4741 int |
6322 } | 6447 } |
6323 | 6448 |
6324 | 6449 |
6325 void | 6450 void |
6326 spu_init_expanders (void) | 6451 spu_init_expanders (void) |
6327 { | 6452 { |
6328 /* HARD_FRAME_REGISTER is only 128 bit aligned when | |
6329 * frame_pointer_needed is true. We don't know that until we're | |
6330 * expanding the prologue. */ | |
6331 if (cfun) | 6453 if (cfun) |
6332 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8; | 6454 { |
6455 rtx r0, r1; | |
6456 /* HARD_FRAME_REGISTER is only 128 bit aligned when | |
6457 frame_pointer_needed is true. We don't know that until we're | |
6458 expanding the prologue. */ | |
6459 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8; | |
6460 | |
6461 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and | |
6462 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them | |
6463 to be treated as aligned, so generate them here. */ | |
6464 r0 = gen_reg_rtx (SImode); | |
6465 r1 = gen_reg_rtx (SImode); | |
6466 mark_reg_pointer (r0, 128); | |
6467 mark_reg_pointer (r1, 128); | |
6468 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1 | |
6469 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2); | |
6470 } | |
6333 } | 6471 } |
6334 | 6472 |
6335 static enum machine_mode | 6473 static enum machine_mode |
6336 spu_libgcc_cmp_return_mode (void) | 6474 spu_libgcc_cmp_return_mode (void) |
6337 { | 6475 { |
6370 if (strcmp (name, ".toe") == 0) | 6508 if (strcmp (name, ".toe") == 0) |
6371 return SECTION_BSS; | 6509 return SECTION_BSS; |
6372 return default_section_type_flags (decl, name, reloc); | 6510 return default_section_type_flags (decl, name, reloc); |
6373 } | 6511 } |
6374 | 6512 |
6513 /* Generate a constant or register which contains 2^SCALE. We assume | |
6514 the result is valid for MODE. Currently, MODE must be V4SFmode and | |
6515 SCALE must be SImode. */ | |
6516 rtx | |
6517 spu_gen_exp2 (enum machine_mode mode, rtx scale) | |
6518 { | |
6519 gcc_assert (mode == V4SFmode); | |
6520 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT); | |
6521 if (GET_CODE (scale) != CONST_INT) | |
6522 { | |
6523 /* unsigned int exp = (127 + scale) << 23; | |
6524 __vector float m = (__vector float) spu_splats (exp); */ | |
6525 rtx reg = force_reg (SImode, scale); | |
6526 rtx exp = gen_reg_rtx (SImode); | |
6527 rtx mul = gen_reg_rtx (mode); | |
6528 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127))); | |
6529 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23))); | |
6530 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0))); | |
6531 return mul; | |
6532 } | |
6533 else | |
6534 { | |
6535 HOST_WIDE_INT exp = 127 + INTVAL (scale); | |
6536 unsigned char arr[16]; | |
6537 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1; | |
6538 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7; | |
6539 arr[2] = arr[6] = arr[10] = arr[14] = 0; | |
6540 arr[3] = arr[7] = arr[11] = arr[15] = 0; | |
6541 return array_to_constant (mode, arr); | |
6542 } | |
6543 } | |
6544 | |
6545 /* After reload, just change the convert into a move instruction | |
6546 or a dead instruction. */ | |
6547 void | |
6548 spu_split_convert (rtx ops[]) | |
6549 { | |
6550 if (REGNO (ops[0]) == REGNO (ops[1])) | |
6551 emit_note (NOTE_INSN_DELETED); | |
6552 else | |
6553 { | |
6554 /* Use TImode always as this might help hard reg copyprop. */ | |
6555 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0])); | |
6556 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1])); | |
6557 emit_insn (gen_move_insn (op0, op1)); | |
6558 } | |
6559 } | |
6560 | |
6375 #include "gt-spu.h" | 6561 #include "gt-spu.h" |
6376 | 6562 |