comparison gcc/config/spu/spu.c @ 47:3bfb6c00c1e0

update it from 4.4.2 to 4.4.3.
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Sun, 07 Feb 2010 17:44:34 +0900
parents 58ad6c70ea60
children 77e2b8dfacca
comparison
equal deleted inserted replaced
46:b85a337e5837 47:3bfb6c00c1e0
187 const_tree type, unsigned char named); 187 const_tree type, unsigned char named);
188 static tree spu_build_builtin_va_list (void); 188 static tree spu_build_builtin_va_list (void);
189 static void spu_va_start (tree, rtx); 189 static void spu_va_start (tree, rtx);
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type, 190 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p); 191 gimple_seq * pre_p, gimple_seq * post_p);
192 static int regno_aligned_for_load (int regno);
193 static int store_with_one_insn_p (rtx mem); 192 static int store_with_one_insn_p (rtx mem);
194 static int mem_is_padded_component_ref (rtx x); 193 static int mem_is_padded_component_ref (rtx x);
194 static int reg_aligned_for_addr (rtx x);
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p); 195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196 static void spu_asm_globalize_label (FILE * file, const char *name); 196 static void spu_asm_globalize_label (FILE * file, const char *name);
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code, 197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
198 int *total, bool speed); 198 int *total, bool speed);
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp); 199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
208 static bool spu_vector_alignment_reachable (const_tree, bool); 208 static bool spu_vector_alignment_reachable (const_tree, bool);
209 static tree spu_builtin_vec_perm (tree, tree *); 209 static tree spu_builtin_vec_perm (tree, tree *);
210 static int spu_sms_res_mii (struct ddg *g); 210 static int spu_sms_res_mii (struct ddg *g);
211 static void asm_file_start (void); 211 static void asm_file_start (void);
212 static unsigned int spu_section_type_flags (tree, const char *, int); 212 static unsigned int spu_section_type_flags (tree, const char *, int);
213 static rtx spu_expand_load (rtx, rtx, rtx, int);
213 214
214 extern const char *reg_names[]; 215 extern const char *reg_names[];
215 rtx spu_compare_op0, spu_compare_op1; 216 rtx spu_compare_op0, spu_compare_op1;
216 217
217 /* Which instruction set architecture to use. */ 218 /* Which instruction set architecture to use. */
574 } 575 }
575 576
576 void 577 void
577 spu_expand_extv (rtx ops[], int unsignedp) 578 spu_expand_extv (rtx ops[], int unsignedp)
578 { 579 {
580 rtx dst = ops[0], src = ops[1];
579 HOST_WIDE_INT width = INTVAL (ops[2]); 581 HOST_WIDE_INT width = INTVAL (ops[2]);
580 HOST_WIDE_INT start = INTVAL (ops[3]); 582 HOST_WIDE_INT start = INTVAL (ops[3]);
581 HOST_WIDE_INT src_size, dst_size; 583 HOST_WIDE_INT align_mask;
582 enum machine_mode src_mode, dst_mode; 584 rtx s0, s1, mask, r0;
583 rtx dst = ops[0], src = ops[1]; 585
584 rtx s; 586 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
585 587
586 dst = adjust_operand (ops[0], 0); 588 if (MEM_P (src))
587 dst_mode = GET_MODE (dst); 589 {
588 dst_size = GET_MODE_BITSIZE (GET_MODE (dst)); 590 /* First, determine if we need 1 TImode load or 2. We need only 1
589 591 if the bits being extracted do not cross the alignment boundary
590 src = adjust_operand (src, &start); 592 as determined by the MEM and its address. */
591 src_mode = GET_MODE (src); 593
592 src_size = GET_MODE_BITSIZE (GET_MODE (src)); 594 align_mask = -MEM_ALIGN (src);
593 595 if ((start & align_mask) == ((start + width - 1) & align_mask))
594 if (start > 0) 596 {
595 { 597 /* Alignment is sufficient for 1 load. */
596 s = gen_reg_rtx (src_mode); 598 s0 = gen_reg_rtx (TImode);
597 switch (src_mode) 599 r0 = spu_expand_load (s0, 0, src, start / 8);
598 { 600 start &= 7;
599 case SImode: 601 if (r0)
600 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start))); 602 emit_insn (gen_rotqby_ti (s0, s0, r0));
601 break; 603 }
602 case DImode: 604 else
603 emit_insn (gen_ashldi3 (s, src, GEN_INT (start))); 605 {
604 break; 606 /* Need 2 loads. */
605 case TImode: 607 s0 = gen_reg_rtx (TImode);
606 emit_insn (gen_ashlti3 (s, src, GEN_INT (start))); 608 s1 = gen_reg_rtx (TImode);
607 break; 609 r0 = spu_expand_load (s0, s1, src, start / 8);
608 default: 610 start &= 7;
609 abort (); 611
610 } 612 gcc_assert (start + width <= 128);
611 src = s; 613 if (r0)
612 } 614 {
613 615 rtx r1 = gen_reg_rtx (SImode);
614 if (width < src_size) 616 mask = gen_reg_rtx (TImode);
615 { 617 emit_move_insn (mask, GEN_INT (-1));
616 rtx pat; 618 emit_insn (gen_rotqby_ti (s0, s0, r0));
617 int icode; 619 emit_insn (gen_rotqby_ti (s1, s1, r0));
618 switch (src_mode) 620 if (GET_CODE (r0) == CONST_INT)
619 { 621 r1 = GEN_INT (INTVAL (r0) & 15);
620 case SImode: 622 else
621 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3; 623 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
622 break; 624 emit_insn (gen_shlqby_ti (mask, mask, r1));
623 case DImode: 625 emit_insn (gen_selb (s0, s1, s0, mask));
624 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3; 626 }
625 break; 627 }
626 case TImode: 628
627 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3; 629 }
628 break; 630 else if (GET_CODE (src) == SUBREG)
629 default: 631 {
630 abort (); 632 rtx r = SUBREG_REG (src);
631 } 633 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
632 s = gen_reg_rtx (src_mode); 634 s0 = gen_reg_rtx (TImode);
633 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width)); 635 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
634 emit_insn (pat); 636 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
635 src = s; 637 else
636 } 638 emit_move_insn (s0, src);
637 639 }
638 convert_move (dst, src, unsignedp); 640 else
641 {
642 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
643 s0 = gen_reg_rtx (TImode);
644 emit_move_insn (s0, src);
645 }
646
647 /* Now s0 is TImode and contains the bits to extract at start. */
648
649 if (start)
650 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
651
652 if (128 - width)
653 {
654 tree c = build_int_cst (NULL_TREE, 128 - width);
655 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
656 }
657
658 emit_move_insn (dst, s0);
639 } 659 }
640 660
641 void 661 void
642 spu_expand_insv (rtx ops[]) 662 spu_expand_insv (rtx ops[])
643 { 663 {
726 default: 746 default:
727 abort (); 747 abort ();
728 } 748 }
729 if (GET_CODE (ops[0]) == MEM) 749 if (GET_CODE (ops[0]) == MEM)
730 { 750 {
731 rtx aligned = gen_reg_rtx (SImode);
732 rtx low = gen_reg_rtx (SImode); 751 rtx low = gen_reg_rtx (SImode);
733 rtx addr = gen_reg_rtx (SImode);
734 rtx rotl = gen_reg_rtx (SImode); 752 rtx rotl = gen_reg_rtx (SImode);
735 rtx mask0 = gen_reg_rtx (TImode); 753 rtx mask0 = gen_reg_rtx (TImode);
754 rtx addr;
755 rtx addr0;
756 rtx addr1;
736 rtx mem; 757 rtx mem;
737 758
738 emit_move_insn (addr, XEXP (ops[0], 0)); 759 addr = force_reg (Pmode, XEXP (ops[0], 0));
739 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16))); 760 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
740 emit_insn (gen_andsi3 (low, addr, GEN_INT (15))); 761 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
741 emit_insn (gen_negsi2 (rotl, low)); 762 emit_insn (gen_negsi2 (rotl, low));
742 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl)); 763 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
743 emit_insn (gen_rotqmby_ti (mask0, mask, rotl)); 764 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
744 mem = change_address (ops[0], TImode, aligned); 765 mem = change_address (ops[0], TImode, addr0);
745 set_mem_alias_set (mem, 0); 766 set_mem_alias_set (mem, 0);
746 emit_move_insn (dst, mem); 767 emit_move_insn (dst, mem);
747 emit_insn (gen_selb (dst, dst, shift_reg, mask0)); 768 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
748 emit_move_insn (mem, dst);
749 if (start + width > MEM_ALIGN (ops[0])) 769 if (start + width > MEM_ALIGN (ops[0]))
750 { 770 {
751 rtx shl = gen_reg_rtx (SImode); 771 rtx shl = gen_reg_rtx (SImode);
752 rtx mask1 = gen_reg_rtx (TImode); 772 rtx mask1 = gen_reg_rtx (TImode);
753 rtx dst1 = gen_reg_rtx (TImode); 773 rtx dst1 = gen_reg_rtx (TImode);
754 rtx mem1; 774 rtx mem1;
775 addr1 = plus_constant (addr, 16);
776 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
755 emit_insn (gen_subsi3 (shl, GEN_INT (16), low)); 777 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
756 emit_insn (gen_shlqby_ti (mask1, mask, shl)); 778 emit_insn (gen_shlqby_ti (mask1, mask, shl));
757 mem1 = adjust_address (mem, TImode, 16); 779 mem1 = change_address (ops[0], TImode, addr1);
758 set_mem_alias_set (mem1, 0); 780 set_mem_alias_set (mem1, 0);
759 emit_move_insn (dst1, mem1); 781 emit_move_insn (dst1, mem1);
760 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1)); 782 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
761 emit_move_insn (mem1, dst1); 783 emit_move_insn (mem1, dst1);
762 } 784 }
785 emit_move_insn (mem, dst);
763 } 786 }
764 else 787 else
765 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask)); 788 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
766 } 789 }
767 790
1583 val = -(val & -8ll); 1606 val = -(val & -8ll);
1584 val = (val >> 3) & 0x1f; 1607 val = (val >> 3) & 0x1f;
1585 output_addr_const (file, GEN_INT (val)); 1608 output_addr_const (file, GEN_INT (val));
1586 return; 1609 return;
1587 1610
1611 case 'v':
1612 case 'w':
1613 constant_to_array (mode, x, arr);
1614 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1615 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1616 return;
1617
1588 case 0: 1618 case 0:
1589 if (xcode == REG) 1619 if (xcode == REG)
1590 fprintf (file, "%s", reg_names[REGNO (x)]); 1620 fprintf (file, "%s", reg_names[REGNO (x)]);
1591 else if (xcode == MEM) 1621 else if (xcode == MEM)
1592 output_address (XEXP (x, 0)); 1622 output_address (XEXP (x, 0));
1595 else 1625 else
1596 output_addr_const (file, x); 1626 output_addr_const (file, x);
1597 return; 1627 return;
1598 1628
1599 /* unused letters 1629 /* unused letters
1600 o qr uvw yz 1630 o qr u yz
1601 AB OPQR UVWXYZ */ 1631 AB OPQR UVWXYZ */
1602 default: 1632 default:
1603 output_operand_lossage ("invalid %%xn code"); 1633 output_operand_lossage ("invalid %%xn code");
1604 } 1634 }
1605 gcc_unreachable (); 1635 gcc_unreachable ();
1616 get_pic_reg (void) 1646 get_pic_reg (void)
1617 { 1647 {
1618 rtx pic_reg = pic_offset_table_rtx; 1648 rtx pic_reg = pic_offset_table_rtx;
1619 if (!reload_completed && !reload_in_progress) 1649 if (!reload_completed && !reload_in_progress)
1620 abort (); 1650 abort ();
1651 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1652 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1621 return pic_reg; 1653 return pic_reg;
1622 } 1654 }
1623 1655
1624 /* Split constant addresses to handle cases that are too large. 1656 /* Split constant addresses to handle cases that are too large.
1625 Add in the pic register when in PIC mode. 1657 Add in the pic register when in PIC mode.
2763 2795
2764 insert_hbrp (); 2796 insert_hbrp ();
2765 2797
2766 pad_bb (); 2798 pad_bb ();
2767 2799
2800 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2801 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2802 {
2803 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2804 between its branch label and the branch . We don't move the
2805 label because GCC expects it at the beginning of the block. */
2806 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2807 rtx label_ref = XVECEXP (unspec, 0, 0);
2808 rtx label = XEXP (label_ref, 0);
2809 rtx branch;
2810 int offset = 0;
2811 for (branch = NEXT_INSN (label);
2812 !JUMP_P (branch) && !CALL_P (branch);
2813 branch = NEXT_INSN (branch))
2814 if (NONJUMP_INSN_P (branch))
2815 offset += get_attr_length (branch);
2816 if (offset > 0)
2817 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2818 }
2768 2819
2769 if (spu_flag_var_tracking) 2820 if (spu_flag_var_tracking)
2770 { 2821 {
2771 df_analyze (); 2822 df_analyze ();
2772 timevar_push (TV_VAR_TRACKING); 2823 timevar_push (TV_VAR_TRACKING);
2970 for (i = nready - 1; i >= 0; i--) 3021 for (i = nready - 1; i >= 0; i--)
2971 { 3022 {
2972 insn = ready[i]; 3023 insn = ready[i];
2973 if (INSN_CODE (insn) == -1 3024 if (INSN_CODE (insn) == -1
2974 || INSN_CODE (insn) == CODE_FOR_blockage 3025 || INSN_CODE (insn) == CODE_FOR_blockage
2975 || INSN_CODE (insn) == CODE_FOR__spu_convert) 3026 || (INSN_P (insn) && get_attr_length (insn) == 0))
2976 { 3027 {
2977 ready[i] = ready[nready - 1]; 3028 ready[i] = ready[nready - 1];
2978 ready[nready - 1] = insn; 3029 ready[nready - 1] = insn;
2979 return 1; 3030 return 1;
2980 } 3031 }
3101 moved across it and has no cost. */ 3152 moved across it and has no cost. */
3102 if (INSN_CODE (insn) == CODE_FOR_blockage 3153 if (INSN_CODE (insn) == CODE_FOR_blockage
3103 || INSN_CODE (dep_insn) == CODE_FOR_blockage) 3154 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3104 return 0; 3155 return 0;
3105 3156
3106 if (INSN_CODE (insn) == CODE_FOR__spu_convert 3157 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3107 || INSN_CODE (dep_insn) == CODE_FOR__spu_convert) 3158 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3108 return 0; 3159 return 0;
3109 3160
3110 /* Make sure hbrps are spread out. */ 3161 /* Make sure hbrps are spread out. */
3111 if (INSN_CODE (insn) == CODE_FOR_iprefetch 3162 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3112 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch) 3163 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3501 val = trunc_int_for_mode (val, mode); 3552 val = trunc_int_for_mode (val, mode);
3502 3553
3503 return val >= low && val <= high; 3554 return val >= low && val <= high;
3504 } 3555 }
3505 3556
3557 /* TRUE when op is an immediate and an exact power of 2, and given that
3558 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3559 all entries must be the same. */
3560 bool
3561 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3562 {
3563 enum machine_mode int_mode;
3564 HOST_WIDE_INT val;
3565 unsigned char arr[16];
3566 int bytes, i, j;
3567
3568 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3569 || GET_CODE (op) == CONST_VECTOR);
3570
3571 if (GET_CODE (op) == CONST_VECTOR
3572 && !const_vector_immediate_p (op))
3573 return 0;
3574
3575 if (GET_MODE (op) != VOIDmode)
3576 mode = GET_MODE (op);
3577
3578 constant_to_array (mode, op, arr);
3579
3580 if (VECTOR_MODE_P (mode))
3581 mode = GET_MODE_INNER (mode);
3582
3583 bytes = GET_MODE_SIZE (mode);
3584 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3585
3586 /* Check that bytes are repeated. */
3587 for (i = bytes; i < 16; i += bytes)
3588 for (j = 0; j < bytes; j++)
3589 if (arr[j] != arr[i + j])
3590 return 0;
3591
3592 val = arr[0];
3593 for (j = 1; j < bytes; j++)
3594 val = (val << 8) | arr[j];
3595
3596 val = trunc_int_for_mode (val, int_mode);
3597
3598 /* Currently, we only handle SFmode */
3599 gcc_assert (mode == SFmode);
3600 if (mode == SFmode)
3601 {
3602 int exp = (val >> 23) - 127;
3603 return val > 0 && (val & 0x007fffff) == 0
3604 && exp >= low && exp <= high;
3605 }
3606 return FALSE;
3607 }
3608
3506 /* We accept: 3609 /* We accept:
3507 - any 32-bit constant (SImode, SFmode) 3610 - any 32-bit constant (SImode, SFmode)
3508 - any constant that can be generated with fsmbi (any mode) 3611 - any constant that can be generated with fsmbi (any mode)
3509 - a 64-bit constant where the high and low bits are identical 3612 - a 64-bit constant where the high and low bits are identical
3510 (DImode, DFmode) 3613 (DImode, DFmode)
3531 } 3634 }
3532 3635
3533 /* Valid address are: 3636 /* Valid address are:
3534 - symbol_ref, label_ref, const 3637 - symbol_ref, label_ref, const
3535 - reg 3638 - reg
3536 - reg + const, where either reg or const is 16 byte aligned 3639 - reg + const_int, where const_int is 16 byte aligned
3537 - reg + reg, alignment doesn't matter 3640 - reg + reg, alignment doesn't matter
3538 The alignment matters in the reg+const case because lqd and stqd 3641 The alignment matters in the reg+const case because lqd and stqd
3539 ignore the 4 least significant bits of the const. (TODO: It might be 3642 ignore the 4 least significant bits of the const. We only care about
3540 preferable to allow any alignment and fix it up when splitting.) */ 3643 16 byte modes because the expand phase will change all smaller MEM
3644 references to TImode. */
3541 int 3645 int
3542 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED, 3646 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
3543 rtx x, int reg_ok_strict) 3647 rtx x, int reg_ok_strict)
3544 { 3648 {
3545 if (mode == TImode && GET_CODE (x) == AND 3649 int aligned = GET_MODE_SIZE (mode) >= 16;
3650 if (aligned
3651 && GET_CODE (x) == AND
3546 && GET_CODE (XEXP (x, 1)) == CONST_INT 3652 && GET_CODE (XEXP (x, 1)) == CONST_INT
3547 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16) 3653 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3548 x = XEXP (x, 0); 3654 x = XEXP (x, 0);
3549 switch (GET_CODE (x)) 3655 switch (GET_CODE (x))
3550 { 3656 {
3657 case LABEL_REF:
3551 case SYMBOL_REF: 3658 case SYMBOL_REF:
3552 case LABEL_REF: 3659 case CONST:
3553 return !TARGET_LARGE_MEM; 3660 return !TARGET_LARGE_MEM;
3554
3555 case CONST:
3556 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
3557 {
3558 rtx sym = XEXP (XEXP (x, 0), 0);
3559 rtx cst = XEXP (XEXP (x, 0), 1);
3560
3561 /* Accept any symbol_ref + constant, assuming it does not
3562 wrap around the local store addressability limit. */
3563 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
3564 return 1;
3565 }
3566 return 0;
3567 3661
3568 case CONST_INT: 3662 case CONST_INT:
3569 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff; 3663 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3570 3664
3571 case SUBREG: 3665 case SUBREG:
3572 x = XEXP (x, 0); 3666 x = XEXP (x, 0);
3573 gcc_assert (GET_CODE (x) == REG); 3667 if (REG_P (x))
3668 return 0;
3574 3669
3575 case REG: 3670 case REG:
3576 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict); 3671 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3577 3672
3578 case PLUS: 3673 case PLUS:
3582 rtx op1 = XEXP (x, 1); 3677 rtx op1 = XEXP (x, 1);
3583 if (GET_CODE (op0) == SUBREG) 3678 if (GET_CODE (op0) == SUBREG)
3584 op0 = XEXP (op0, 0); 3679 op0 = XEXP (op0, 0);
3585 if (GET_CODE (op1) == SUBREG) 3680 if (GET_CODE (op1) == SUBREG)
3586 op1 = XEXP (op1, 0); 3681 op1 = XEXP (op1, 0);
3587 /* We can't just accept any aligned register because CSE can
3588 change it to a register that is not marked aligned and then
3589 recog will fail. So we only accept frame registers because
3590 they will only be changed to other frame registers. */
3591 if (GET_CODE (op0) == REG 3682 if (GET_CODE (op0) == REG
3592 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) 3683 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3593 && GET_CODE (op1) == CONST_INT 3684 && GET_CODE (op1) == CONST_INT
3594 && INTVAL (op1) >= -0x2000 3685 && INTVAL (op1) >= -0x2000
3595 && INTVAL (op1) <= 0x1fff 3686 && INTVAL (op1) <= 0x1fff
3596 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0)) 3687 && (!aligned || (INTVAL (op1) & 15) == 0))
3597 return 1; 3688 return TRUE;
3598 if (GET_CODE (op0) == REG 3689 if (GET_CODE (op0) == REG
3599 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict) 3690 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3600 && GET_CODE (op1) == REG 3691 && GET_CODE (op1) == REG
3601 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict)) 3692 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3602 return 1; 3693 return TRUE;
3603 } 3694 }
3604 break; 3695 break;
3605 3696
3606 default: 3697 default:
3607 break; 3698 break;
3608 } 3699 }
3609 return 0; 3700 return FALSE;
3610 } 3701 }
3611 3702
3612 /* When the address is reg + const_int, force the const_int into a 3703 /* When the address is reg + const_int, force the const_int into a
3613 register. */ 3704 register. */
3614 rtx 3705 rtx
4059 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 4150 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4060 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1; 4151 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4061 } 4152 }
4062 } 4153 }
4063 4154
4064 /* This is called to decide when we can simplify a load instruction. We 4155 /* This is called any time we inspect the alignment of a register for
4065 must only return true for registers which we know will always be 4156 addresses. */
4066 aligned. Taking into account that CSE might replace this reg with
4067 another one that has not been marked aligned.
4068 So this is really only true for frame, stack and virtual registers,
4069 which we know are always aligned and should not be adversely effected
4070 by CSE. */
4071 static int 4157 static int
4072 regno_aligned_for_load (int regno) 4158 reg_aligned_for_addr (rtx x)
4073 { 4159 {
4074 return regno == FRAME_POINTER_REGNUM 4160 int regno =
4075 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM) 4161 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4076 || regno == ARG_POINTER_REGNUM 4162 return REGNO_POINTER_ALIGN (regno) >= 128;
4077 || regno == STACK_POINTER_REGNUM
4078 || (regno >= FIRST_VIRTUAL_REGISTER
4079 && regno <= LAST_VIRTUAL_REGISTER);
4080 }
4081
4082 /* Return TRUE when mem is known to be 16-byte aligned. */
4083 int
4084 aligned_mem_p (rtx mem)
4085 {
4086 if (MEM_ALIGN (mem) >= 128)
4087 return 1;
4088 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
4089 return 1;
4090 if (GET_CODE (XEXP (mem, 0)) == PLUS)
4091 {
4092 rtx p0 = XEXP (XEXP (mem, 0), 0);
4093 rtx p1 = XEXP (XEXP (mem, 0), 1);
4094 if (regno_aligned_for_load (REGNO (p0)))
4095 {
4096 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
4097 return 1;
4098 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4099 return 1;
4100 }
4101 }
4102 else if (GET_CODE (XEXP (mem, 0)) == REG)
4103 {
4104 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
4105 return 1;
4106 }
4107 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
4108 return 1;
4109 else if (GET_CODE (XEXP (mem, 0)) == CONST)
4110 {
4111 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
4112 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
4113 if (GET_CODE (p0) == SYMBOL_REF
4114 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
4115 return 1;
4116 }
4117 return 0;
4118 } 4163 }
4119 4164
4120 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF 4165 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4121 into its SYMBOL_REF_FLAGS. */ 4166 into its SYMBOL_REF_FLAGS. */
4122 static void 4167 static void
4141 alignment of the parameter mode and in that case the alignment never 4186 alignment of the parameter mode and in that case the alignment never
4142 gets adjusted by LOCAL_ALIGNMENT. */ 4187 gets adjusted by LOCAL_ALIGNMENT. */
4143 static int 4188 static int
4144 store_with_one_insn_p (rtx mem) 4189 store_with_one_insn_p (rtx mem)
4145 { 4190 {
4191 enum machine_mode mode = GET_MODE (mem);
4146 rtx addr = XEXP (mem, 0); 4192 rtx addr = XEXP (mem, 0);
4147 if (GET_MODE (mem) == BLKmode) 4193 if (mode == BLKmode)
4148 return 0; 4194 return 0;
4195 if (GET_MODE_SIZE (mode) >= 16)
4196 return 1;
4149 /* Only static objects. */ 4197 /* Only static objects. */
4150 if (GET_CODE (addr) == SYMBOL_REF) 4198 if (GET_CODE (addr) == SYMBOL_REF)
4151 { 4199 {
4152 /* We use the associated declaration to make sure the access is 4200 /* We use the associated declaration to make sure the access is
4153 referring to the whole object. 4201 referring to the whole object.
4167 return 1; 4215 return 1;
4168 } 4216 }
4169 return 0; 4217 return 0;
4170 } 4218 }
4171 4219
4220 /* Return 1 when the address is not valid for a simple load and store as
4221 required by the '_mov*' patterns. We could make this less strict
4222 for loads, but we prefer mem's to look the same so they are more
4223 likely to be merged. */
4224 static int
4225 address_needs_split (rtx mem)
4226 {
4227 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4228 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4229 || !(store_with_one_insn_p (mem)
4230 || mem_is_padded_component_ref (mem))))
4231 return 1;
4232
4233 return 0;
4234 }
4235
4172 int 4236 int
4173 spu_expand_mov (rtx * ops, enum machine_mode mode) 4237 spu_expand_mov (rtx * ops, enum machine_mode mode)
4174 { 4238 {
4175 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0])) 4239 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4176 abort (); 4240 abort ();
4211 { 4275 {
4212 if (CONSTANT_P (ops[1])) 4276 if (CONSTANT_P (ops[1]))
4213 return spu_split_immediate (ops); 4277 return spu_split_immediate (ops);
4214 return 0; 4278 return 0;
4215 } 4279 }
4216 else 4280
4217 { 4281 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4218 if (GET_CODE (ops[0]) == MEM) 4282 extend them. */
4219 { 4283 if (GET_CODE (ops[1]) == CONST_INT)
4220 if (!spu_valid_move (ops)) 4284 {
4221 { 4285 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4222 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode), 4286 if (val != INTVAL (ops[1]))
4223 gen_reg_rtx (TImode))); 4287 {
4224 return 1; 4288 emit_move_insn (ops[0], GEN_INT (val));
4225 } 4289 return 1;
4226 } 4290 }
4227 else if (GET_CODE (ops[1]) == MEM) 4291 }
4228 { 4292 if (MEM_P (ops[0]))
4229 if (!spu_valid_move (ops)) 4293 return spu_split_store (ops);
4230 { 4294 if (MEM_P (ops[1]))
4231 emit_insn (gen_load 4295 return spu_split_load (ops);
4232 (ops[0], ops[1], gen_reg_rtx (TImode), 4296
4233 gen_reg_rtx (SImode)));
4234 return 1;
4235 }
4236 }
4237 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4238 extend them. */
4239 if (GET_CODE (ops[1]) == CONST_INT)
4240 {
4241 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4242 if (val != INTVAL (ops[1]))
4243 {
4244 emit_move_insn (ops[0], GEN_INT (val));
4245 return 1;
4246 }
4247 }
4248 }
4249 return 0; 4297 return 0;
4250 } 4298 }
4251 4299
4252 void 4300 static void
4253 spu_split_load (rtx * ops) 4301 spu_convert_move (rtx dst, rtx src)
4254 { 4302 {
4255 enum machine_mode mode = GET_MODE (ops[0]); 4303 enum machine_mode mode = GET_MODE (dst);
4256 rtx addr, load, rot, mem, p0, p1; 4304 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4305 rtx reg;
4306 gcc_assert (GET_MODE (src) == TImode);
4307 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4308 emit_insn (gen_rtx_SET (VOIDmode, reg,
4309 gen_rtx_TRUNCATE (int_mode,
4310 gen_rtx_LSHIFTRT (TImode, src,
4311 GEN_INT (int_mode == DImode ? 64 : 96)))));
4312 if (int_mode != mode)
4313 {
4314 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4315 emit_move_insn (dst, reg);
4316 }
4317 }
4318
4319 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4320 the address from SRC and SRC+16. Return a REG or CONST_INT that
4321 specifies how many bytes to rotate the loaded registers, plus any
4322 extra from EXTRA_ROTQBY. The address and rotate amounts are
4323 normalized to improve merging of loads and rotate computations. */
4324 static rtx
4325 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4326 {
4327 rtx addr = XEXP (src, 0);
4328 rtx p0, p1, rot, addr0, addr1;
4257 int rot_amt; 4329 int rot_amt;
4258
4259 addr = XEXP (ops[1], 0);
4260 4330
4261 rot = 0; 4331 rot = 0;
4262 rot_amt = 0; 4332 rot_amt = 0;
4263 if (GET_CODE (addr) == PLUS) 4333
4334 if (MEM_ALIGN (src) >= 128)
4335 /* Address is already aligned; simply perform a TImode load. */ ;
4336 else if (GET_CODE (addr) == PLUS)
4264 { 4337 {
4265 /* 8 cases: 4338 /* 8 cases:
4266 aligned reg + aligned reg => lqx 4339 aligned reg + aligned reg => lqx
4267 aligned reg + unaligned reg => lqx, rotqby 4340 aligned reg + unaligned reg => lqx, rotqby
4268 aligned reg + aligned const => lqd 4341 aligned reg + aligned const => lqd
4272 unaligned reg + aligned const => lqd, rotqby 4345 unaligned reg + aligned const => lqd, rotqby
4273 unaligned reg + unaligned const -> not allowed by legitimate address 4346 unaligned reg + unaligned const -> not allowed by legitimate address
4274 */ 4347 */
4275 p0 = XEXP (addr, 0); 4348 p0 = XEXP (addr, 0);
4276 p1 = XEXP (addr, 1); 4349 p1 = XEXP (addr, 1);
4277 if (REG_P (p0) && !regno_aligned_for_load (REGNO (p0))) 4350 if (!reg_aligned_for_addr (p0))
4278 { 4351 {
4279 if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1))) 4352 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4280 { 4353 {
4281 emit_insn (gen_addsi3 (ops[3], p0, p1)); 4354 rot = gen_reg_rtx (SImode);
4282 rot = ops[3]; 4355 emit_insn (gen_addsi3 (rot, p0, p1));
4356 }
4357 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4358 {
4359 if (INTVAL (p1) > 0
4360 && REG_POINTER (p0)
4361 && INTVAL (p1) * BITS_PER_UNIT
4362 < REGNO_POINTER_ALIGN (REGNO (p0)))
4363 {
4364 rot = gen_reg_rtx (SImode);
4365 emit_insn (gen_addsi3 (rot, p0, p1));
4366 addr = p0;
4367 }
4368 else
4369 {
4370 rtx x = gen_reg_rtx (SImode);
4371 emit_move_insn (x, p1);
4372 if (!spu_arith_operand (p1, SImode))
4373 p1 = x;
4374 rot = gen_reg_rtx (SImode);
4375 emit_insn (gen_addsi3 (rot, p0, p1));
4376 addr = gen_rtx_PLUS (Pmode, p0, x);
4377 }
4283 } 4378 }
4284 else 4379 else
4285 rot = p0; 4380 rot = p0;
4286 } 4381 }
4287 else 4382 else
4288 { 4383 {
4289 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15)) 4384 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4290 { 4385 {
4291 rot_amt = INTVAL (p1) & 15; 4386 rot_amt = INTVAL (p1) & 15;
4292 p1 = GEN_INT (INTVAL (p1) & -16); 4387 if (INTVAL (p1) & -16)
4293 addr = gen_rtx_PLUS (SImode, p0, p1); 4388 {
4389 p1 = GEN_INT (INTVAL (p1) & -16);
4390 addr = gen_rtx_PLUS (SImode, p0, p1);
4391 }
4392 else
4393 addr = p0;
4294 } 4394 }
4295 else if (REG_P (p1) && !regno_aligned_for_load (REGNO (p1))) 4395 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4296 rot = p1; 4396 rot = p1;
4297 } 4397 }
4298 } 4398 }
4299 else if (GET_CODE (addr) == REG) 4399 else if (REG_P (addr))
4300 { 4400 {
4301 if (!regno_aligned_for_load (REGNO (addr))) 4401 if (!reg_aligned_for_addr (addr))
4302 rot = addr; 4402 rot = addr;
4303 } 4403 }
4304 else if (GET_CODE (addr) == CONST) 4404 else if (GET_CODE (addr) == CONST)
4305 { 4405 {
4306 if (GET_CODE (XEXP (addr, 0)) == PLUS 4406 if (GET_CODE (XEXP (addr, 0)) == PLUS
4315 GEN_INT (rot_amt & -16))); 4415 GEN_INT (rot_amt & -16)));
4316 else 4416 else
4317 addr = XEXP (XEXP (addr, 0), 0); 4417 addr = XEXP (XEXP (addr, 0), 0);
4318 } 4418 }
4319 else 4419 else
4320 rot = addr; 4420 {
4421 rot = gen_reg_rtx (Pmode);
4422 emit_move_insn (rot, addr);
4423 }
4321 } 4424 }
4322 else if (GET_CODE (addr) == CONST_INT) 4425 else if (GET_CODE (addr) == CONST_INT)
4323 { 4426 {
4324 rot_amt = INTVAL (addr); 4427 rot_amt = INTVAL (addr);
4325 addr = GEN_INT (rot_amt & -16); 4428 addr = GEN_INT (rot_amt & -16);
4326 } 4429 }
4327 else if (!ALIGNED_SYMBOL_REF_P (addr)) 4430 else if (!ALIGNED_SYMBOL_REF_P (addr))
4328 rot = addr; 4431 {
4329 4432 rot = gen_reg_rtx (Pmode);
4330 if (GET_MODE_SIZE (mode) < 4) 4433 emit_move_insn (rot, addr);
4331 rot_amt += GET_MODE_SIZE (mode) - 4; 4434 }
4435
4436 rot_amt += extra_rotby;
4332 4437
4333 rot_amt &= 15; 4438 rot_amt &= 15;
4334 4439
4335 if (rot && rot_amt) 4440 if (rot && rot_amt)
4336 { 4441 {
4337 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt))); 4442 rtx x = gen_reg_rtx (SImode);
4338 rot = ops[3]; 4443 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4444 rot = x;
4339 rot_amt = 0; 4445 rot_amt = 0;
4340 } 4446 }
4341 4447 if (!rot && rot_amt)
4342 load = ops[2]; 4448 rot = GEN_INT (rot_amt);
4343 4449
4344 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); 4450 addr0 = copy_rtx (addr);
4345 mem = change_address (ops[1], TImode, addr); 4451 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4346 4452 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4347 emit_insn (gen_movti (load, mem)); 4453
4454 if (dst1)
4455 {
4456 addr1 = plus_constant (copy_rtx (addr), 16);
4457 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4458 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4459 }
4460
4461 return rot;
4462 }
4463
4464 int
4465 spu_split_load (rtx * ops)
4466 {
4467 enum machine_mode mode = GET_MODE (ops[0]);
4468 rtx addr, load, rot;
4469 int rot_amt;
4470
4471 if (GET_MODE_SIZE (mode) >= 16)
4472 return 0;
4473
4474 addr = XEXP (ops[1], 0);
4475 gcc_assert (GET_CODE (addr) != AND);
4476
4477 if (!address_needs_split (ops[1]))
4478 {
4479 ops[1] = change_address (ops[1], TImode, addr);
4480 load = gen_reg_rtx (TImode);
4481 emit_insn (gen__movti (load, ops[1]));
4482 spu_convert_move (ops[0], load);
4483 return 1;
4484 }
4485
4486 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4487
4488 load = gen_reg_rtx (TImode);
4489 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4348 4490
4349 if (rot) 4491 if (rot)
4350 emit_insn (gen_rotqby_ti (load, load, rot)); 4492 emit_insn (gen_rotqby_ti (load, load, rot));
4351 else if (rot_amt) 4493
4352 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8))); 4494 spu_convert_move (ops[0], load);
4353 4495 return 1;
4354 if (reload_completed) 4496 }
4355 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load))); 4497
4356 else 4498 int
4357 emit_insn (gen_spu_convert (ops[0], load));
4358 }
4359
4360 void
4361 spu_split_store (rtx * ops) 4499 spu_split_store (rtx * ops)
4362 { 4500 {
4363 enum machine_mode mode = GET_MODE (ops[0]); 4501 enum machine_mode mode = GET_MODE (ops[0]);
4364 rtx pat = ops[2]; 4502 rtx reg;
4365 rtx reg = ops[3];
4366 rtx addr, p0, p1, p1_lo, smem; 4503 rtx addr, p0, p1, p1_lo, smem;
4367 int aform; 4504 int aform;
4368 int scalar; 4505 int scalar;
4369 4506
4507 if (GET_MODE_SIZE (mode) >= 16)
4508 return 0;
4509
4370 addr = XEXP (ops[0], 0); 4510 addr = XEXP (ops[0], 0);
4511 gcc_assert (GET_CODE (addr) != AND);
4512
4513 if (!address_needs_split (ops[0]))
4514 {
4515 reg = gen_reg_rtx (TImode);
4516 emit_insn (gen_spu_convert (reg, ops[1]));
4517 ops[0] = change_address (ops[0], TImode, addr);
4518 emit_move_insn (ops[0], reg);
4519 return 1;
4520 }
4371 4521
4372 if (GET_CODE (addr) == PLUS) 4522 if (GET_CODE (addr) == PLUS)
4373 { 4523 {
4374 /* 8 cases: 4524 /* 8 cases:
4375 aligned reg + aligned reg => lqx, c?x, shuf, stqx 4525 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4377 aligned reg + aligned const => lqd, c?d, shuf, stqx 4527 aligned reg + aligned const => lqd, c?d, shuf, stqx
4378 aligned reg + unaligned const => lqd, c?d, shuf, stqx 4528 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4379 unaligned reg + aligned reg => lqx, c?x, shuf, stqx 4529 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4380 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx 4530 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4381 unaligned reg + aligned const => lqd, c?d, shuf, stqx 4531 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4382 unaligned reg + unaligned const -> not allowed by legitimate address 4532 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4383 */ 4533 */
4384 aform = 0; 4534 aform = 0;
4385 p0 = XEXP (addr, 0); 4535 p0 = XEXP (addr, 0);
4386 p1 = p1_lo = XEXP (addr, 1); 4536 p1 = p1_lo = XEXP (addr, 1);
4387 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT) 4537 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4388 { 4538 {
4389 p1_lo = GEN_INT (INTVAL (p1) & 15); 4539 p1_lo = GEN_INT (INTVAL (p1) & 15);
4390 p1 = GEN_INT (INTVAL (p1) & -16); 4540 if (reg_aligned_for_addr (p0))
4391 addr = gen_rtx_PLUS (SImode, p0, p1); 4541 {
4392 } 4542 p1 = GEN_INT (INTVAL (p1) & -16);
4393 } 4543 if (p1 == const0_rtx)
4394 else if (GET_CODE (addr) == REG) 4544 addr = p0;
4545 else
4546 addr = gen_rtx_PLUS (SImode, p0, p1);
4547 }
4548 else
4549 {
4550 rtx x = gen_reg_rtx (SImode);
4551 emit_move_insn (x, p1);
4552 addr = gen_rtx_PLUS (SImode, p0, x);
4553 }
4554 }
4555 }
4556 else if (REG_P (addr))
4395 { 4557 {
4396 aform = 0; 4558 aform = 0;
4397 p0 = addr; 4559 p0 = addr;
4398 p1 = p1_lo = const0_rtx; 4560 p1 = p1_lo = const0_rtx;
4399 } 4561 }
4403 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM); 4565 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4404 p1 = 0; /* aform doesn't use p1 */ 4566 p1 = 0; /* aform doesn't use p1 */
4405 p1_lo = addr; 4567 p1_lo = addr;
4406 if (ALIGNED_SYMBOL_REF_P (addr)) 4568 if (ALIGNED_SYMBOL_REF_P (addr))
4407 p1_lo = const0_rtx; 4569 p1_lo = const0_rtx;
4408 else if (GET_CODE (addr) == CONST) 4570 else if (GET_CODE (addr) == CONST
4409 { 4571 && GET_CODE (XEXP (addr, 0)) == PLUS
4410 if (GET_CODE (XEXP (addr, 0)) == PLUS 4572 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4411 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0)) 4573 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4412 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT) 4574 {
4413 { 4575 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4414 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1)); 4576 if ((v & -16) != 0)
4415 if ((v & -16) != 0) 4577 addr = gen_rtx_CONST (Pmode,
4416 addr = gen_rtx_CONST (Pmode, 4578 gen_rtx_PLUS (Pmode,
4417 gen_rtx_PLUS (Pmode, 4579 XEXP (XEXP (addr, 0), 0),
4418 XEXP (XEXP (addr, 0), 0), 4580 GEN_INT (v & -16)));
4419 GEN_INT (v & -16))); 4581 else
4420 else 4582 addr = XEXP (XEXP (addr, 0), 0);
4421 addr = XEXP (XEXP (addr, 0), 0); 4583 p1_lo = GEN_INT (v & 15);
4422 p1_lo = GEN_INT (v & 15);
4423 }
4424 } 4584 }
4425 else if (GET_CODE (addr) == CONST_INT) 4585 else if (GET_CODE (addr) == CONST_INT)
4426 { 4586 {
4427 p1_lo = GEN_INT (INTVAL (addr) & 15); 4587 p1_lo = GEN_INT (INTVAL (addr) & 15);
4428 addr = GEN_INT (INTVAL (addr) & -16); 4588 addr = GEN_INT (INTVAL (addr) & -16);
4429 } 4589 }
4430 } 4590 else
4431 4591 {
4432 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16)); 4592 p1_lo = gen_reg_rtx (SImode);
4593 emit_move_insn (p1_lo, addr);
4594 }
4595 }
4596
4597 reg = gen_reg_rtx (TImode);
4433 4598
4434 scalar = store_with_one_insn_p (ops[0]); 4599 scalar = store_with_one_insn_p (ops[0]);
4435 if (!scalar) 4600 if (!scalar)
4436 { 4601 {
4437 /* We could copy the flags from the ops[0] MEM to mem here, 4602 /* We could copy the flags from the ops[0] MEM to mem here,
4438 We don't because we want this load to be optimized away if 4603 We don't because we want this load to be optimized away if
4439 possible, and copying the flags will prevent that in certain 4604 possible, and copying the flags will prevent that in certain
4440 cases, e.g. consider the volatile flag. */ 4605 cases, e.g. consider the volatile flag. */
4441 4606
4607 rtx pat = gen_reg_rtx (TImode);
4442 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr)); 4608 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4443 set_mem_alias_set (lmem, 0); 4609 set_mem_alias_set (lmem, 0);
4444 emit_insn (gen_movti (reg, lmem)); 4610 emit_insn (gen_movti (reg, lmem));
4445 4611
4446 if (!p0 || regno_aligned_for_load (REGNO (p0))) 4612 if (!p0 || reg_aligned_for_addr (p0))
4447 p0 = stack_pointer_rtx; 4613 p0 = stack_pointer_rtx;
4448 if (!p1_lo) 4614 if (!p1_lo)
4449 p1_lo = const0_rtx; 4615 p1_lo = const0_rtx;
4450 4616
4451 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode)))); 4617 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4452 emit_insn (gen_shufb (reg, ops[1], reg, pat)); 4618 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4453 }
4454 else if (reload_completed)
4455 {
4456 if (GET_CODE (ops[1]) == REG)
4457 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
4458 else if (GET_CODE (ops[1]) == SUBREG)
4459 emit_move_insn (reg,
4460 gen_rtx_REG (GET_MODE (reg),
4461 REGNO (SUBREG_REG (ops[1]))));
4462 else
4463 abort ();
4464 } 4619 }
4465 else 4620 else
4466 { 4621 {
4467 if (GET_CODE (ops[1]) == REG) 4622 if (GET_CODE (ops[1]) == REG)
4468 emit_insn (gen_spu_convert (reg, ops[1])); 4623 emit_insn (gen_spu_convert (reg, ops[1]));
4471 else 4626 else
4472 abort (); 4627 abort ();
4473 } 4628 }
4474 4629
4475 if (GET_MODE_SIZE (mode) < 4 && scalar) 4630 if (GET_MODE_SIZE (mode) < 4 && scalar)
4476 emit_insn (gen_shlqby_ti 4631 emit_insn (gen_ashlti3
4477 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode)))); 4632 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4478 4633
4479 smem = change_address (ops[0], TImode, addr); 4634 smem = change_address (ops[0], TImode, copy_rtx (addr));
4480 /* We can't use the previous alias set because the memory has changed 4635 /* We can't use the previous alias set because the memory has changed
4481 size and can potentially overlap objects of other types. */ 4636 size and can potentially overlap objects of other types. */
4482 set_mem_alias_set (smem, 0); 4637 set_mem_alias_set (smem, 0);
4483 4638
4484 emit_insn (gen_movti (smem, reg)); 4639 emit_insn (gen_movti (smem, reg));
4640 return 1;
4485 } 4641 }
4486 4642
4487 /* Return TRUE if X is MEM which is a struct member reference 4643 /* Return TRUE if X is MEM which is a struct member reference
4488 and the member can safely be loaded and stored with a single 4644 and the member can safely be loaded and stored with a single
4489 instruction because it is padded. */ 4645 instruction because it is padded. */
4576 break; 4732 break;
4577 4733
4578 *comma = ','; 4734 *comma = ',';
4579 str = comma + 1; 4735 str = comma + 1;
4580 } 4736 }
4581 }
4582
4583 int
4584 spu_valid_move (rtx * ops)
4585 {
4586 enum machine_mode mode = GET_MODE (ops[0]);
4587 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4588 return 0;
4589
4590 /* init_expr_once tries to recog against load and store insns to set
4591 the direct_load[] and direct_store[] arrays. We always want to
4592 consider those loads and stores valid. init_expr_once is called in
4593 the context of a dummy function which does not have a decl. */
4594 if (cfun->decl == 0)
4595 return 1;
4596
4597 /* Don't allows loads/stores which would require more than 1 insn.
4598 During and after reload we assume loads and stores only take 1
4599 insn. */
4600 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
4601 {
4602 if (GET_CODE (ops[0]) == MEM
4603 && (GET_MODE_SIZE (mode) < 4
4604 || !(store_with_one_insn_p (ops[0])
4605 || mem_is_padded_component_ref (ops[0]))))
4606 return 0;
4607 if (GET_CODE (ops[1]) == MEM
4608 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
4609 return 0;
4610 }
4611 return 1;
4612 } 4737 }
4613 4738
4614 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that 4739 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4615 can be generated using the fsmbi instruction. */ 4740 can be generated using the fsmbi instruction. */
4616 int 4741 int
6322 } 6447 }
6323 6448
6324 6449
6325 void 6450 void
6326 spu_init_expanders (void) 6451 spu_init_expanders (void)
6327 { 6452 {
6328 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6329 * frame_pointer_needed is true. We don't know that until we're
6330 * expanding the prologue. */
6331 if (cfun) 6453 if (cfun)
6332 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8; 6454 {
6455 rtx r0, r1;
6456 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6457 frame_pointer_needed is true. We don't know that until we're
6458 expanding the prologue. */
6459 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6460
6461 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6462 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6463 to be treated as aligned, so generate them here. */
6464 r0 = gen_reg_rtx (SImode);
6465 r1 = gen_reg_rtx (SImode);
6466 mark_reg_pointer (r0, 128);
6467 mark_reg_pointer (r1, 128);
6468 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6469 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6470 }
6333 } 6471 }
6334 6472
6335 static enum machine_mode 6473 static enum machine_mode
6336 spu_libgcc_cmp_return_mode (void) 6474 spu_libgcc_cmp_return_mode (void)
6337 { 6475 {
6370 if (strcmp (name, ".toe") == 0) 6508 if (strcmp (name, ".toe") == 0)
6371 return SECTION_BSS; 6509 return SECTION_BSS;
6372 return default_section_type_flags (decl, name, reloc); 6510 return default_section_type_flags (decl, name, reloc);
6373 } 6511 }
6374 6512
6513 /* Generate a constant or register which contains 2^SCALE. We assume
6514 the result is valid for MODE. Currently, MODE must be V4SFmode and
6515 SCALE must be SImode. */
6516 rtx
6517 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6518 {
6519 gcc_assert (mode == V4SFmode);
6520 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6521 if (GET_CODE (scale) != CONST_INT)
6522 {
6523 /* unsigned int exp = (127 + scale) << 23;
6524 __vector float m = (__vector float) spu_splats (exp); */
6525 rtx reg = force_reg (SImode, scale);
6526 rtx exp = gen_reg_rtx (SImode);
6527 rtx mul = gen_reg_rtx (mode);
6528 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6529 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6530 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6531 return mul;
6532 }
6533 else
6534 {
6535 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6536 unsigned char arr[16];
6537 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6538 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6539 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6540 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6541 return array_to_constant (mode, arr);
6542 }
6543 }
6544
6545 /* After reload, just change the convert into a move instruction
6546 or a dead instruction. */
6547 void
6548 spu_split_convert (rtx ops[])
6549 {
6550 if (REGNO (ops[0]) == REGNO (ops[1]))
6551 emit_note (NOTE_INSN_DELETED);
6552 else
6553 {
6554 /* Use TImode always as this might help hard reg copyprop. */
6555 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6556 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6557 emit_insn (gen_move_insn (op0, op1));
6558 }
6559 }
6560
6375 #include "gt-spu.h" 6561 #include "gt-spu.h"
6376 6562