comparison gcc/config/arm/arm.c @ 63:b7f97abdc517 gcc-4.6-20100522

update gcc from gcc-4.5.0 to gcc-4.6
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Mon, 24 May 2010 12:47:05 +0900
parents 77e2b8dfacca
children f6334be47118
comparison
equal deleted inserted replaced
56:3c8a44c06a95 63:b7f97abdc517
1 /* Output routines for GCC for ARM. 1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc. 4 Free Software Foundation, Inc.
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) 5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
6 and Martin Simmons (@harleqn.co.uk). 6 and Martin Simmons (@harleqn.co.uk).
7 More major hacks by Richard Earnshaw (rearnsha@arm.com). 7 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 8
29 #include "rtl.h" 29 #include "rtl.h"
30 #include "tree.h" 30 #include "tree.h"
31 #include "obstack.h" 31 #include "obstack.h"
32 #include "regs.h" 32 #include "regs.h"
33 #include "hard-reg-set.h" 33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h" 34 #include "insn-config.h"
36 #include "conditions.h" 35 #include "conditions.h"
37 #include "output.h" 36 #include "output.h"
38 #include "insn-attr.h" 37 #include "insn-attr.h"
39 #include "flags.h" 38 #include "flags.h"
149 static int arm_address_cost (rtx, bool); 148 static int arm_address_cost (rtx, bool);
150 static bool arm_memory_load_p (rtx); 149 static bool arm_memory_load_p (rtx);
151 static bool arm_cirrus_insn_p (rtx); 150 static bool arm_cirrus_insn_p (rtx);
152 static void cirrus_reorg (rtx); 151 static void cirrus_reorg (rtx);
153 static void arm_init_builtins (void); 152 static void arm_init_builtins (void);
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
155 static void arm_init_iwmmxt_builtins (void); 153 static void arm_init_iwmmxt_builtins (void);
156 static rtx safe_vector_operand (rtx, enum machine_mode); 154 static rtx safe_vector_operand (rtx, enum machine_mode);
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); 155 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); 156 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); 157 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
222 static bool arm_frame_pointer_required (void); 220 static bool arm_frame_pointer_required (void);
223 static bool arm_can_eliminate (const int, const int); 221 static bool arm_can_eliminate (const int, const int);
224 static void arm_asm_trampoline_template (FILE *); 222 static void arm_asm_trampoline_template (FILE *);
225 static void arm_trampoline_init (rtx, tree, rtx); 223 static void arm_trampoline_init (rtx, tree, rtx);
226 static rtx arm_trampoline_adjust_address (rtx); 224 static rtx arm_trampoline_adjust_address (rtx);
225 static rtx arm_pic_static_addr (rtx orig, rtx reg);
227 226
228 227
229 /* Table of machine attributes. */ 228 /* Table of machine attributes. */
230 static const struct attribute_spec arm_attribute_table[] = 229 static const struct attribute_spec arm_attribute_table[] =
231 { 230 {
522 /* True if we are currently building a constant table. */ 521 /* True if we are currently building a constant table. */
523 int making_const_table; 522 int making_const_table;
524 523
525 /* The processor for which instructions should be scheduled. */ 524 /* The processor for which instructions should be scheduled. */
526 enum processor_type arm_tune = arm_none; 525 enum processor_type arm_tune = arm_none;
526
527 /* The current tuning set. */
528 const struct tune_params *current_tune;
527 529
528 /* The default processor used if not overridden by commandline. */ 530 /* The default processor used if not overridden by commandline. */
529 static enum processor_type arm_default_cpu = arm_none; 531 static enum processor_type arm_default_cpu = arm_none;
530 532
531 /* Which floating point hardware to schedule for. */ 533 /* Which floating point hardware to schedule for. */
599 #define FL_FOR_ARCH6Z FL_FOR_ARCH6 601 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
600 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K 602 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K
601 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) 603 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2)
602 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) 604 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM)
603 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) 605 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM)
604 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM) 606 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K)
605 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) 607 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV)
606 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) 608 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV)
607 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) 609 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM)
608 610
609 /* The bits in this mask specify which 611 /* The bits in this mask specify which
694 unsigned arm_pic_register = INVALID_REGNUM; 696 unsigned arm_pic_register = INVALID_REGNUM;
695 697
696 /* Set to 1 after arm_reorg has started. Reset to start at the start of 698 /* Set to 1 after arm_reorg has started. Reset to start at the start of
697 the next function. */ 699 the next function. */
698 static int after_arm_reorg = 0; 700 static int after_arm_reorg = 0;
699
700 /* The maximum number of insns to be used when loading a constant. */
701 static int arm_constant_limit = 3;
702 701
703 static enum arm_pcs arm_pcs_default; 702 static enum arm_pcs arm_pcs_default;
704 703
705 /* For an explanation of these variables, see final_prescan_insn below. */ 704 /* For an explanation of these variables, see final_prescan_insn below. */
706 int arm_ccfsm_state; 705 int arm_ccfsm_state;
736 { 735 {
737 const char *const name; 736 const char *const name;
738 enum processor_type core; 737 enum processor_type core;
739 const char *arch; 738 const char *arch;
740 const unsigned long flags; 739 const unsigned long flags;
741 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool); 740 const struct tune_params *const tune;
741 };
742
743 const struct tune_params arm_slowmul_tune =
744 {
745 arm_slowmul_rtx_costs,
746 3
747 };
748
749 const struct tune_params arm_fastmul_tune =
750 {
751 arm_fastmul_rtx_costs,
752 1
753 };
754
755 const struct tune_params arm_xscale_tune =
756 {
757 arm_xscale_rtx_costs,
758 2
759 };
760
761 const struct tune_params arm_9e_tune =
762 {
763 arm_9e_rtx_costs,
764 1
742 }; 765 };
743 766
744 /* Not all of these give usefully different compilation alternatives, 767 /* Not all of these give usefully different compilation alternatives,
745 but there is no simple way of generalizing them. */ 768 but there is no simple way of generalizing them. */
746 static const struct processors all_cores[] = 769 static const struct processors all_cores[] =
747 { 770 {
748 /* ARM Cores */ 771 /* ARM Cores */
749 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ 772 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
750 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs}, 773 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
751 #include "arm-cores.def" 774 #include "arm-cores.def"
752 #undef ARM_CORE 775 #undef ARM_CORE
753 {NULL, arm_none, NULL, 0, NULL} 776 {NULL, arm_none, NULL, 0, NULL}
754 }; 777 };
755 778
756 static const struct processors all_architectures[] = 779 static const struct processors all_architectures[] =
757 { 780 {
758 /* ARM Architectures */ 781 /* ARM Architectures */
759 /* We don't specify rtx_costs here as it will be figured out 782 /* We don't specify tuning costs here as it will be figured out
760 from the core. */ 783 from the core. */
761 784
762 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, 785 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
763 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, 786 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
764 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL}, 787 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
903 TLS_LDO32, 926 TLS_LDO32,
904 TLS_IE32, 927 TLS_IE32,
905 TLS_LE32 928 TLS_LE32
906 }; 929 };
907 930
931 /* The maximum number of insns to be used when loading a constant. */
932 inline static int
933 arm_constant_limit (bool size_p)
934 {
935 return size_p ? 1 : current_tune->constant_limit;
936 }
937
908 /* Emit an insn that's a simple single-set. Both the operands must be known 938 /* Emit an insn that's a simple single-set. Both the operands must be known
909 to be valid. */ 939 to be valid. */
910 inline static rtx 940 inline static rtx
911 emit_set_insn (rtx x, rtx y) 941 emit_set_insn (rtx x, rtx y)
912 { 942 {
1443 /* The processor for which we should tune should now have been 1473 /* The processor for which we should tune should now have been
1444 chosen. */ 1474 chosen. */
1445 gcc_assert (arm_tune != arm_none); 1475 gcc_assert (arm_tune != arm_none);
1446 1476
1447 tune_flags = all_cores[(int)arm_tune].flags; 1477 tune_flags = all_cores[(int)arm_tune].flags;
1478 current_tune = all_cores[(int)arm_tune].tune;
1448 1479
1449 if (target_fp16_format_name) 1480 if (target_fp16_format_name)
1450 { 1481 {
1451 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) 1482 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++)
1452 { 1483 {
1637 { 1668 {
1638 arm_fpu_desc = &all_fpus[i]; 1669 arm_fpu_desc = &all_fpus[i];
1639 break; 1670 break;
1640 } 1671 }
1641 } 1672 }
1673
1642 if (!arm_fpu_desc) 1674 if (!arm_fpu_desc)
1643 error ("invalid floating point option: -mfpu=%s", target_fpu_name); 1675 {
1676 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1677 return;
1678 }
1644 1679
1645 switch (arm_fpu_desc->model) 1680 switch (arm_fpu_desc->model)
1646 { 1681 {
1647 case ARM_FP_MODEL_FPA: 1682 case ARM_FP_MODEL_FPA:
1648 if (arm_fpu_desc->rev == 2) 1683 if (arm_fpu_desc->rev == 2)
1756 } 1791 }
1757 1792
1758 /* Use the cp15 method if it is available. */ 1793 /* Use the cp15 method if it is available. */
1759 if (target_thread_pointer == TP_AUTO) 1794 if (target_thread_pointer == TP_AUTO)
1760 { 1795 {
1761 if (arm_arch6k && !TARGET_THUMB) 1796 if (arm_arch6k && !TARGET_THUMB1)
1762 target_thread_pointer = TP_CP15; 1797 target_thread_pointer = TP_CP15;
1763 else 1798 else
1764 target_thread_pointer = TP_SOFT; 1799 target_thread_pointer = TP_SOFT;
1765 } 1800 }
1766 1801
1835 flag_schedule_insns = 0; 1870 flag_schedule_insns = 0;
1836 } 1871 }
1837 1872
1838 if (optimize_size) 1873 if (optimize_size)
1839 { 1874 {
1840 arm_constant_limit = 1;
1841
1842 /* If optimizing for size, bump the number of instructions that we 1875 /* If optimizing for size, bump the number of instructions that we
1843 are prepared to conditionally execute (even on a StrongARM). */ 1876 are prepared to conditionally execute (even on a StrongARM). */
1844 max_insns_skipped = 6; 1877 max_insns_skipped = 6;
1845 } 1878 }
1846 else 1879 else
1847 { 1880 {
1848 /* For processors with load scheduling, it never costs more than
1849 2 cycles to load a constant, and the load scheduler may well
1850 reduce that to 1. */
1851 if (arm_ld_sched)
1852 arm_constant_limit = 1;
1853
1854 /* On XScale the longer latency of a load makes it more difficult
1855 to achieve a good schedule, so it's faster to synthesize
1856 constants that can be done in two insns. */
1857 if (arm_tune_xscale)
1858 arm_constant_limit = 2;
1859
1860 /* StrongARM has early execution of branches, so a sequence 1881 /* StrongARM has early execution of branches, so a sequence
1861 that is worth skipping is shorter. */ 1882 that is worth skipping is shorter. */
1862 if (arm_tune_strongarm) 1883 if (arm_tune_strongarm)
1863 max_insns_skipped = 3; 1884 max_insns_skipped = 3;
1864 } 1885 }
1870 inform (input_location, 1891 inform (input_location,
1871 "-freorder-blocks-and-partition not supported on this architecture"); 1892 "-freorder-blocks-and-partition not supported on this architecture");
1872 flag_reorder_blocks_and_partition = 0; 1893 flag_reorder_blocks_and_partition = 0;
1873 flag_reorder_blocks = 1; 1894 flag_reorder_blocks = 1;
1874 } 1895 }
1875
1876 /* Ideally we would want to use CFI directives to generate
1877 debug info. However this also creates the .eh_frame
1878 section, so disable them until GAS can handle
1879 this properly. See PR40521. */
1880 if (TARGET_AAPCS_BASED)
1881 flag_dwarf2_cfi_asm = 0;
1882 1896
1883 /* Register global variables with the garbage collector. */ 1897 /* Register global variables with the garbage collector. */
1884 arm_add_gc_roots (); 1898 arm_add_gc_roots ();
1885 } 1899 }
1886 1900
2362 */ 2376 */
2363 if (!after_arm_reorg 2377 if (!after_arm_reorg
2364 && !cond 2378 && !cond
2365 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, 2379 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
2366 1, 0) 2380 1, 0)
2367 > arm_constant_limit + (code != SET))) 2381 > (arm_constant_limit (optimize_function_for_size_p (cfun))
2382 + (code != SET))))
2368 { 2383 {
2369 if (code == SET) 2384 if (code == SET)
2370 { 2385 {
2371 /* Currently SET is the only monadic value for CODE, all 2386 /* Currently SET is the only monadic value for CODE, all
2372 the rest are diadic. */ 2387 the rest are diadic. */
2522 { 2537 {
2523 int can_invert = 0; 2538 int can_invert = 0;
2524 int can_negate = 0; 2539 int can_negate = 0;
2525 int final_invert = 0; 2540 int final_invert = 0;
2526 int can_negate_initial = 0; 2541 int can_negate_initial = 0;
2527 int can_shift = 0;
2528 int i; 2542 int i;
2529 int num_bits_set = 0; 2543 int num_bits_set = 0;
2530 int set_sign_bit_copies = 0; 2544 int set_sign_bit_copies = 0;
2531 int clear_sign_bit_copies = 0; 2545 int clear_sign_bit_copies = 0;
2532 int clear_zero_bit_copies = 0; 2546 int clear_zero_bit_copies = 0;
2541 are split. */ 2555 are split. */
2542 switch (code) 2556 switch (code)
2543 { 2557 {
2544 case SET: 2558 case SET:
2545 can_invert = 1; 2559 can_invert = 1;
2546 can_shift = 1;
2547 can_negate = 1; 2560 can_negate = 1;
2548 break; 2561 break;
2549 2562
2550 case PLUS: 2563 case PLUS:
2551 can_negate = 1; 2564 can_negate = 1;
4779 4792
4780 if (cfun->machine->sibcall_blocked) 4793 if (cfun->machine->sibcall_blocked)
4781 return false; 4794 return false;
4782 4795
4783 /* Never tailcall something for which we have no decl, or if we 4796 /* Never tailcall something for which we have no decl, or if we
4784 are in Thumb mode. */ 4797 are generating code for Thumb-1. */
4785 if (decl == NULL || TARGET_THUMB) 4798 if (decl == NULL || TARGET_THUMB1)
4786 return false; 4799 return false;
4787 4800
4788 /* The PIC register is live on entry to VxWorks PLT entries, so we 4801 /* The PIC register is live on entry to VxWorks PLT entries, so we
4789 must make the call before restoring the PIC register. */ 4802 must make the call before restoring the PIC register. */
4790 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) 4803 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
4906 if (GET_CODE (orig) == SYMBOL_REF 4919 if (GET_CODE (orig) == SYMBOL_REF
4907 || GET_CODE (orig) == LABEL_REF) 4920 || GET_CODE (orig) == LABEL_REF)
4908 { 4921 {
4909 rtx pic_ref, address; 4922 rtx pic_ref, address;
4910 rtx insn; 4923 rtx insn;
4911 int subregs = 0;
4912
4913 /* If this function doesn't have a pic register, create one now. */
4914 require_pic_register ();
4915 4924
4916 if (reg == 0) 4925 if (reg == 0)
4917 { 4926 {
4918 gcc_assert (can_create_pseudo_p ()); 4927 gcc_assert (can_create_pseudo_p ());
4919 reg = gen_reg_rtx (Pmode); 4928 reg = gen_reg_rtx (Pmode);
4920 4929 address = gen_reg_rtx (Pmode);
4921 subregs = 1; 4930 }
4922 }
4923
4924 if (subregs)
4925 address = gen_reg_rtx (Pmode);
4926 else 4931 else
4927 address = reg; 4932 address = reg;
4928
4929 if (TARGET_ARM)
4930 emit_insn (gen_pic_load_addr_arm (address, orig));
4931 else if (TARGET_THUMB2)
4932 emit_insn (gen_pic_load_addr_thumb2 (address, orig));
4933 else /* TARGET_THUMB1 */
4934 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4935 4933
4936 /* VxWorks does not impose a fixed gap between segments; the run-time 4934 /* VxWorks does not impose a fixed gap between segments; the run-time
4937 gap can be different from the object-file gap. We therefore can't 4935 gap can be different from the object-file gap. We therefore can't
4938 use GOTOFF unless we are absolutely sure that the symbol is in the 4936 use GOTOFF unless we are absolutely sure that the symbol is in the
4939 same segment as the GOT. Unfortunately, the flexibility of linker 4937 same segment as the GOT. Unfortunately, the flexibility of linker
4942 if ((GET_CODE (orig) == LABEL_REF 4940 if ((GET_CODE (orig) == LABEL_REF
4943 || (GET_CODE (orig) == SYMBOL_REF && 4941 || (GET_CODE (orig) == SYMBOL_REF &&
4944 SYMBOL_REF_LOCAL_P (orig))) 4942 SYMBOL_REF_LOCAL_P (orig)))
4945 && NEED_GOT_RELOC 4943 && NEED_GOT_RELOC
4946 && !TARGET_VXWORKS_RTP) 4944 && !TARGET_VXWORKS_RTP)
4947 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address); 4945 insn = arm_pic_static_addr (orig, reg);
4948 else 4946 else
4949 { 4947 {
4948 /* If this function doesn't have a pic register, create one now. */
4949 require_pic_register ();
4950
4951 if (TARGET_32BIT)
4952 emit_insn (gen_pic_load_addr_32bit (address, orig));
4953 else /* TARGET_THUMB1 */
4954 emit_insn (gen_pic_load_addr_thumb1 (address, orig));
4955
4950 pic_ref = gen_const_mem (Pmode, 4956 pic_ref = gen_const_mem (Pmode,
4951 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, 4957 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg,
4952 address)); 4958 address));
4953 } 4959 insn = emit_move_insn (reg, pic_ref);
4954 4960 }
4955 insn = emit_move_insn (reg, pic_ref);
4956 4961
4957 /* Put a REG_EQUAL note on this insn, so that it can be optimized 4962 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4958 by loop. */ 4963 by loop. */
4959 set_unique_reg_note (insn, REG_EQUAL, orig); 4964 set_unique_reg_note (insn, REG_EQUAL, orig);
4960 4965
5104 pic_reg = cfun->machine->pic_reg; 5109 pic_reg = cfun->machine->pic_reg;
5105 if (TARGET_VXWORKS_RTP) 5110 if (TARGET_VXWORKS_RTP)
5106 { 5111 {
5107 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); 5112 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
5108 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); 5113 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5109 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); 5114 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5110 5115
5111 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); 5116 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
5112 5117
5113 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); 5118 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5114 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); 5119 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
5127 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4); 5132 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5128 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), 5133 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
5129 UNSPEC_GOTSYM_OFF); 5134 UNSPEC_GOTSYM_OFF);
5130 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); 5135 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
5131 5136
5132 if (TARGET_ARM) 5137 if (TARGET_32BIT)
5133 { 5138 {
5134 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); 5139 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
5135 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); 5140 if (TARGET_ARM)
5136 } 5141 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno));
5137 else if (TARGET_THUMB2)
5138 {
5139 /* Thumb-2 only allows very limited access to the PC. Calculate the
5140 address in a temporary register. */
5141 if (arm_pic_register != INVALID_REGNUM)
5142 {
5143 pic_tmp = gen_rtx_REG (SImode,
5144 thumb_find_work_register (saved_regs));
5145 }
5146 else 5142 else
5147 { 5143 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
5148 gcc_assert (can_create_pseudo_p ());
5149 pic_tmp = gen_reg_rtx (Pmode);
5150 }
5151
5152 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx));
5153 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno));
5154 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp));
5155 } 5144 }
5156 else /* TARGET_THUMB1 */ 5145 else /* TARGET_THUMB1 */
5157 { 5146 {
5158 if (arm_pic_register != INVALID_REGNUM 5147 if (arm_pic_register != INVALID_REGNUM
5159 && REGNO (pic_reg) > LAST_LO_REGNUM) 5148 && REGNO (pic_reg) > LAST_LO_REGNUM)
5174 /* Need to emit this whether or not we obey regdecls, 5163 /* Need to emit this whether or not we obey regdecls,
5175 since setjmp/longjmp can cause life info to screw up. */ 5164 since setjmp/longjmp can cause life info to screw up. */
5176 emit_use (pic_reg); 5165 emit_use (pic_reg);
5177 } 5166 }
5178 5167
5168 /* Generate code to load the address of a static var when flag_pic is set. */
5169 static rtx
5170 arm_pic_static_addr (rtx orig, rtx reg)
5171 {
5172 rtx l1, labelno, offset_rtx, insn;
5173
5174 gcc_assert (flag_pic);
5175
5176 /* We use an UNSPEC rather than a LABEL_REF because this label
5177 never appears in the code stream. */
5178 labelno = GEN_INT (pic_labelno++);
5179 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
5180 l1 = gen_rtx_CONST (VOIDmode, l1);
5181
5182 /* On the ARM the PC register contains 'dot + 8' at the time of the
5183 addition, on the Thumb it is 'dot + 4'. */
5184 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4);
5185 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
5186 UNSPEC_SYMBOL_OFFSET);
5187 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
5188
5189 if (TARGET_32BIT)
5190 {
5191 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx));
5192 if (TARGET_ARM)
5193 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5194 else
5195 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5196 }
5197 else /* TARGET_THUMB1 */
5198 {
5199 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx));
5200 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5201 }
5202
5203 return insn;
5204 }
5179 5205
5180 /* Return nonzero if X is valid as an ARM state addressing register. */ 5206 /* Return nonzero if X is valid as an ARM state addressing register. */
5181 static int 5207 static int
5182 arm_address_register_rtx_p (rtx x, int strict_p) 5208 arm_address_register_rtx_p (rtx x, int strict_p)
5183 { 5209 {
5806 reg = load_tls_operand (sum, reg); 5832 reg = load_tls_operand (sum, reg);
5807 5833
5808 if (TARGET_ARM) 5834 if (TARGET_ARM)
5809 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); 5835 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
5810 else if (TARGET_THUMB2) 5836 else if (TARGET_THUMB2)
5811 { 5837 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5812 rtx tmp;
5813 /* Thumb-2 only allows very limited access to the PC. Calculate
5814 the address in a temporary register. */
5815 tmp = gen_reg_rtx (SImode);
5816 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5817 emit_insn (gen_addsi3(reg, reg, tmp));
5818 }
5819 else /* TARGET_THUMB1 */ 5838 else /* TARGET_THUMB1 */
5820 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); 5839 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5821 5840
5822 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */ 5841 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */
5823 Pmode, 1, reg, Pmode); 5842 Pmode, 1, reg, Pmode);
5869 reg = load_tls_operand (sum, reg); 5888 reg = load_tls_operand (sum, reg);
5870 5889
5871 if (TARGET_ARM) 5890 if (TARGET_ARM)
5872 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); 5891 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
5873 else if (TARGET_THUMB2) 5892 else if (TARGET_THUMB2)
5874 { 5893 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
5875 rtx tmp;
5876 /* Thumb-2 only allows very limited access to the PC. Calculate
5877 the address in a temporary register. */
5878 tmp = gen_reg_rtx (SImode);
5879 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno));
5880 emit_insn (gen_addsi3(reg, reg, tmp));
5881 emit_move_insn (reg, gen_const_mem (SImode, reg));
5882 }
5883 else 5894 else
5884 { 5895 {
5885 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); 5896 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
5886 emit_move_insn (reg, gen_const_mem (SImode, reg)); 5897 emit_move_insn (reg, gen_const_mem (SImode, reg));
5887 } 5898 }
6262 && INTVAL (x) < 256 && INTVAL (x) > -256) 6273 && INTVAL (x) < 256 && INTVAL (x) > -256)
6263 return 0; 6274 return 0;
6264 else if ((outer == IOR || outer == XOR || outer == AND) 6275 else if ((outer == IOR || outer == XOR || outer == AND)
6265 && INTVAL (x) < 256 && INTVAL (x) >= -256) 6276 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6266 return COSTS_N_INSNS (1); 6277 return COSTS_N_INSNS (1);
6278 else if (outer == AND)
6279 {
6280 int i;
6281 /* This duplicates the tests in the andsi3 expander. */
6282 for (i = 9; i <= 31; i++)
6283 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6284 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6285 return COSTS_N_INSNS (2);
6286 }
6267 else if (outer == ASHIFT || outer == ASHIFTRT 6287 else if (outer == ASHIFT || outer == ASHIFTRT
6268 || outer == LSHIFTRT) 6288 || outer == LSHIFTRT)
6269 return 0; 6289 return 0;
6270 return COSTS_N_INSNS (2); 6290 return COSTS_N_INSNS (2);
6271 6291
6333 { 6353 {
6334 enum machine_mode mode = GET_MODE (x); 6354 enum machine_mode mode = GET_MODE (x);
6335 enum rtx_code subcode; 6355 enum rtx_code subcode;
6336 rtx operand; 6356 rtx operand;
6337 enum rtx_code code = GET_CODE (x); 6357 enum rtx_code code = GET_CODE (x);
6338 int extra_cost;
6339 *total = 0; 6358 *total = 0;
6340 6359
6341 switch (code) 6360 switch (code)
6342 { 6361 {
6343 case MEM: 6362 case MEM:
6557 } 6576 }
6558 6577
6559 /* Fall through */ 6578 /* Fall through */
6560 6579
6561 case AND: case XOR: case IOR: 6580 case AND: case XOR: case IOR:
6562 extra_cost = 0;
6563 6581
6564 /* Normally the frame registers will be spilt into reg+const during 6582 /* Normally the frame registers will be spilt into reg+const during
6565 reload, so it is a bad idea to combine them with other instructions, 6583 reload, so it is a bad idea to combine them with other instructions,
6566 since then they might not be moved outside of loops. As a compromise 6584 since then they might not be moved outside of loops. As a compromise
6567 we allow integration with ops that have a constant as their second 6585 we allow integration with ops that have a constant as their second
6909 *total = COSTS_N_INSNS (4); 6927 *total = COSTS_N_INSNS (4);
6910 return false; 6928 return false;
6911 } 6929 }
6912 } 6930 }
6913 6931
6932 /* Estimates the size cost of thumb1 instructions.
6933 For now most of the code is copied from thumb1_rtx_costs. We need more
6934 fine grain tuning when we have more related test cases. */
6935 static inline int
6936 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
6937 {
6938 enum machine_mode mode = GET_MODE (x);
6939
6940 switch (code)
6941 {
6942 case ASHIFT:
6943 case ASHIFTRT:
6944 case LSHIFTRT:
6945 case ROTATERT:
6946 case PLUS:
6947 case MINUS:
6948 case COMPARE:
6949 case NEG:
6950 case NOT:
6951 return COSTS_N_INSNS (1);
6952
6953 case MULT:
6954 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
6955 {
6956 /* Thumb1 mul instruction can't operate on const. We must Load it
6957 into a register first. */
6958 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
6959 return COSTS_N_INSNS (1) + const_size;
6960 }
6961 return COSTS_N_INSNS (1);
6962
6963 case SET:
6964 return (COSTS_N_INSNS (1)
6965 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
6966 + GET_CODE (SET_DEST (x)) == MEM));
6967
6968 case CONST_INT:
6969 if (outer == SET)
6970 {
6971 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
6972 return 0;
6973 if (thumb_shiftable_const (INTVAL (x)))
6974 return COSTS_N_INSNS (2);
6975 return COSTS_N_INSNS (3);
6976 }
6977 else if ((outer == PLUS || outer == COMPARE)
6978 && INTVAL (x) < 256 && INTVAL (x) > -256)
6979 return 0;
6980 else if ((outer == IOR || outer == XOR || outer == AND)
6981 && INTVAL (x) < 256 && INTVAL (x) >= -256)
6982 return COSTS_N_INSNS (1);
6983 else if (outer == AND)
6984 {
6985 int i;
6986 /* This duplicates the tests in the andsi3 expander. */
6987 for (i = 9; i <= 31; i++)
6988 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
6989 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
6990 return COSTS_N_INSNS (2);
6991 }
6992 else if (outer == ASHIFT || outer == ASHIFTRT
6993 || outer == LSHIFTRT)
6994 return 0;
6995 return COSTS_N_INSNS (2);
6996
6997 case CONST:
6998 case CONST_DOUBLE:
6999 case LABEL_REF:
7000 case SYMBOL_REF:
7001 return COSTS_N_INSNS (3);
7002
7003 case UDIV:
7004 case UMOD:
7005 case DIV:
7006 case MOD:
7007 return 100;
7008
7009 case TRUNCATE:
7010 return 99;
7011
7012 case AND:
7013 case XOR:
7014 case IOR:
7015 /* XXX guess. */
7016 return 8;
7017
7018 case MEM:
7019 /* XXX another guess. */
7020 /* Memory costs quite a lot for the first word, but subsequent words
7021 load at the equivalent of a single insn each. */
7022 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
7023 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
7024 ? 4 : 0));
7025
7026 case IF_THEN_ELSE:
7027 /* XXX a guess. */
7028 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
7029 return 14;
7030 return 2;
7031
7032 case ZERO_EXTEND:
7033 /* XXX still guessing. */
7034 switch (GET_MODE (XEXP (x, 0)))
7035 {
7036 case QImode:
7037 return (1 + (mode == DImode ? 4 : 0)
7038 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7039
7040 case HImode:
7041 return (4 + (mode == DImode ? 4 : 0)
7042 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7043
7044 case SImode:
7045 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
7046
7047 default:
7048 return 99;
7049 }
7050
7051 default:
7052 return 99;
7053 }
7054 }
7055
6914 /* RTX costs when optimizing for size. */ 7056 /* RTX costs when optimizing for size. */
6915 static bool 7057 static bool
6916 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, 7058 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
6917 int *total) 7059 int *total)
6918 { 7060 {
6919 enum machine_mode mode = GET_MODE (x); 7061 enum machine_mode mode = GET_MODE (x);
6920 if (TARGET_THUMB1) 7062 if (TARGET_THUMB1)
6921 { 7063 {
6922 /* XXX TBD. For now, use the standard costs. */ 7064 *total = thumb1_size_rtx_costs (x, code, outer_code);
6923 *total = thumb1_rtx_costs (x, code, outer_code);
6924 return true; 7065 return true;
6925 } 7066 }
6926 7067
6927 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ 7068 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
6928 switch (code) 7069 switch (code)
7168 { 7309 {
7169 if (!speed) 7310 if (!speed)
7170 return arm_size_rtx_costs (x, (enum rtx_code) code, 7311 return arm_size_rtx_costs (x, (enum rtx_code) code,
7171 (enum rtx_code) outer_code, total); 7312 (enum rtx_code) outer_code, total);
7172 else 7313 else
7173 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code, 7314 return current_tune->rtx_costs (x, (enum rtx_code) code,
7174 (enum rtx_code) outer_code, 7315 (enum rtx_code) outer_code,
7175 total, speed); 7316 total, speed);
7176 } 7317 }
7177 7318
7178 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not 7319 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
7179 supported on any "slowmul" cores, so it can be ignored. */ 7320 supported on any "slowmul" cores, so it can be ignored. */
7180 7321
7315 7456
7316 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, 7457 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
7317 so it can be ignored. */ 7458 so it can be ignored. */
7318 7459
7319 static bool 7460 static bool
7320 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) 7461 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
7462 int *total, bool speed)
7321 { 7463 {
7322 enum machine_mode mode = GET_MODE (x); 7464 enum machine_mode mode = GET_MODE (x);
7323 7465
7324 if (TARGET_THUMB) 7466 if (TARGET_THUMB)
7325 { 7467 {
7655 7797
7656 if (!fp_consts_inited) 7798 if (!fp_consts_inited)
7657 init_fp_table (); 7799 init_fp_table ();
7658 7800
7659 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7801 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7660 r = REAL_VALUE_NEGATE (r); 7802 r = real_value_negate (&r);
7661 if (REAL_VALUE_MINUS_ZERO (r)) 7803 if (REAL_VALUE_MINUS_ZERO (r))
7662 return 0; 7804 return 0;
7663 7805
7664 for (i = 0; i < 8; i++) 7806 for (i = 0; i < 8; i++)
7665 if (REAL_VALUES_EQUAL (r, values_fp[i])) 7807 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7706 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) 7848 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
7707 return -1; 7849 return -1;
7708 7850
7709 /* Extract sign, exponent and mantissa. */ 7851 /* Extract sign, exponent and mantissa. */
7710 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; 7852 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
7711 r = REAL_VALUE_ABS (r); 7853 r = real_value_abs (&r);
7712 exponent = REAL_EXP (&r); 7854 exponent = REAL_EXP (&r);
7713 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the 7855 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
7714 highest (sign) bit, with a fixed binary point at bit point_pos. 7856 highest (sign) bit, with a fixed binary point at bit point_pos.
7715 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 7857 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
7716 bits for the mantissa, this may fail (low bits would be lost). */ 7858 bits for the mantissa, this may fail (low bits would be lost). */
8826 default: 8968 default:
8827 return 0; 8969 return 0;
8828 } 8970 }
8829 } 8971 }
8830 8972
8831 /* Must not copy a SET whose source operand is PC-relative. */ 8973 /* Must not copy any rtx that uses a pc-relative address. */
8974
8975 static int
8976 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED)
8977 {
8978 if (GET_CODE (*x) == UNSPEC
8979 && XINT (*x, 1) == UNSPEC_PIC_BASE)
8980 return 1;
8981 return 0;
8982 }
8832 8983
8833 static bool 8984 static bool
8834 arm_cannot_copy_insn_p (rtx insn) 8985 arm_cannot_copy_insn_p (rtx insn)
8835 { 8986 {
8836 rtx pat = PATTERN (insn); 8987 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL);
8837
8838 if (GET_CODE (pat) == SET)
8839 {
8840 rtx rhs = SET_SRC (pat);
8841
8842 if (GET_CODE (rhs) == UNSPEC
8843 && XINT (rhs, 1) == UNSPEC_PIC_BASE)
8844 return TRUE;
8845
8846 if (GET_CODE (rhs) == MEM
8847 && GET_CODE (XEXP (rhs, 0)) == UNSPEC
8848 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE)
8849 return TRUE;
8850 }
8851
8852 return FALSE;
8853 } 8988 }
8854 8989
8855 enum rtx_code 8990 enum rtx_code
8856 minmax_code (rtx x) 8991 minmax_code (rtx x)
8857 { 8992 {
8935 } 9070 }
8936 9071
8937 return 0; 9072 return 0;
8938 } 9073 }
8939 9074
9075 /* Return true iff it would be profitable to turn a sequence of NOPS loads
9076 or stores (depending on IS_STORE) into a load-multiple or store-multiple
9077 instruction. ADD_OFFSET is nonzero if the base address register needs
9078 to be modified with an add instruction before we can use it. */
9079
9080 static bool
9081 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
9082 int nops, HOST_WIDE_INT add_offset)
9083 {
9084 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9085 if the offset isn't small enough. The reason 2 ldrs are faster
9086 is because these ARMs are able to do more than one cache access
9087 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9088 whilst the ARM8 has a double bandwidth cache. This means that
9089 these cores can do both an instruction fetch and a data fetch in
9090 a single cycle, so the trick of calculating the address into a
9091 scratch register (one of the result regs) and then doing a load
9092 multiple actually becomes slower (and no smaller in code size).
9093 That is the transformation
9094
9095 ldr rd1, [rbase + offset]
9096 ldr rd2, [rbase + offset + 4]
9097
9098 to
9099
9100 add rd1, rbase, offset
9101 ldmia rd1, {rd1, rd2}
9102
9103 produces worse code -- '3 cycles + any stalls on rd2' instead of
9104 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9105 access per cycle, the first sequence could never complete in less
9106 than 6 cycles, whereas the ldm sequence would only take 5 and
9107 would make better use of sequential accesses if not hitting the
9108 cache.
9109
9110 We cheat here and test 'arm_ld_sched' which we currently know to
9111 only be true for the ARM8, ARM9 and StrongARM. If this ever
9112 changes, then the test below needs to be reworked. */
9113 if (nops == 2 && arm_ld_sched && add_offset != 0)
9114 return false;
9115
9116 return true;
9117 }
9118
9119 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
9120 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
9121 an array ORDER which describes the sequence to use when accessing the
9122 offsets that produces an ascending order. In this sequence, each
9123 offset must be larger by exactly 4 than the previous one. ORDER[0]
9124 must have been filled in with the lowest offset by the caller.
9125 If UNSORTED_REGS is nonnull, it is an array of register numbers that
9126 we use to verify that ORDER produces an ascending order of registers.
9127 Return true if it was possible to construct such an order, false if
9128 not. */
9129
9130 static bool
9131 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
9132 int *unsorted_regs)
9133 {
9134 int i;
9135 for (i = 1; i < nops; i++)
9136 {
9137 int j;
9138
9139 order[i] = order[i - 1];
9140 for (j = 0; j < nops; j++)
9141 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
9142 {
9143 /* We must find exactly one offset that is higher than the
9144 previous one by 4. */
9145 if (order[i] != order[i - 1])
9146 return false;
9147 order[i] = j;
9148 }
9149 if (order[i] == order[i - 1])
9150 return false;
9151 /* The register numbers must be ascending. */
9152 if (unsorted_regs != NULL
9153 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
9154 return false;
9155 }
9156 return true;
9157 }
9158
8940 int 9159 int
8941 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base, 9160 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
8942 HOST_WIDE_INT *load_offset) 9161 HOST_WIDE_INT *load_offset)
8943 { 9162 {
8944 int unsorted_regs[4]; 9163 int unsorted_regs[MAX_LDM_STM_OPS];
8945 HOST_WIDE_INT unsorted_offsets[4]; 9164 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
8946 int order[4]; 9165 int order[MAX_LDM_STM_OPS];
8947 int base_reg = -1; 9166 int base_reg = -1;
8948 int i; 9167 int i, ldm_case;
8949 9168
8950 /* Can only handle 2, 3, or 4 insns at present, 9169 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
8951 though could be easily extended if required. */ 9170 easily extended if required. */
8952 gcc_assert (nops >= 2 && nops <= 4); 9171 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
8953 9172
8954 memset (order, 0, 4 * sizeof (int)); 9173 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
8955 9174
8956 /* Loop over the operands and check that the memory references are 9175 /* Loop over the operands and check that the memory references are
8957 suitable (i.e. immediate offsets from the same base register). At 9176 suitable (i.e. immediate offsets from the same base register). At
8958 the same time, extract the target register, and the memory 9177 the same time, extract the target register, and the memory
8959 offsets. */ 9178 offsets. */
8985 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 9204 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
8986 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) 9205 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
8987 == CONST_INT))) 9206 == CONST_INT)))
8988 { 9207 {
8989 if (i == 0) 9208 if (i == 0)
8990 { 9209 base_reg = REGNO (reg);
8991 base_reg = REGNO (reg);
8992 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
8993 ? REGNO (operands[i])
8994 : REGNO (SUBREG_REG (operands[i])));
8995 order[0] = 0;
8996 }
8997 else 9210 else
8998 { 9211 {
8999 if (base_reg != (int) REGNO (reg)) 9212 if (base_reg != (int) REGNO (reg))
9000 /* Not addressed from the same base register. */ 9213 /* Not addressed from the same base register. */
9001 return 0; 9214 return 0;
9002
9003 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9004 ? REGNO (operands[i])
9005 : REGNO (SUBREG_REG (operands[i])));
9006 if (unsorted_regs[i] < unsorted_regs[order[0]])
9007 order[0] = i;
9008 } 9215 }
9216 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9217 ? REGNO (operands[i])
9218 : REGNO (SUBREG_REG (operands[i])));
9009 9219
9010 /* If it isn't an integer register, or if it overwrites the 9220 /* If it isn't an integer register, or if it overwrites the
9011 base register but isn't the last insn in the list, then 9221 base register but isn't the last insn in the list, then
9012 we can't do this. */ 9222 we can't do this. */
9013 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14 9223 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
9014 || (i != nops - 1 && unsorted_regs[i] == base_reg)) 9224 || (i != nops - 1 && unsorted_regs[i] == base_reg))
9015 return 0; 9225 return 0;
9016 9226
9017 unsorted_offsets[i] = INTVAL (offset); 9227 unsorted_offsets[i] = INTVAL (offset);
9228 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9229 order[0] = i;
9018 } 9230 }
9019 else 9231 else
9020 /* Not a suitable memory address. */ 9232 /* Not a suitable memory address. */
9021 return 0; 9233 return 0;
9022 } 9234 }
9023 9235
9024 /* All the useful information has now been extracted from the 9236 /* All the useful information has now been extracted from the
9025 operands into unsorted_regs and unsorted_offsets; additionally, 9237 operands into unsorted_regs and unsorted_offsets; additionally,
9026 order[0] has been set to the lowest numbered register in the 9238 order[0] has been set to the lowest offset in the list. Sort
9027 list. Sort the registers into order, and check that the memory 9239 the offsets into order, verifying that they are adjacent, and
9028 offsets are ascending and adjacent. */ 9240 check that the register numbers are ascending. */
9029 9241 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9030 for (i = 1; i < nops; i++) 9242 return 0;
9031 {
9032 int j;
9033
9034 order[i] = order[i - 1];
9035 for (j = 0; j < nops; j++)
9036 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9037 && (order[i] == order[i - 1]
9038 || unsorted_regs[j] < unsorted_regs[order[i]]))
9039 order[i] = j;
9040
9041 /* Have we found a suitable register? if not, one must be used more
9042 than once. */
9043 if (order[i] == order[i - 1])
9044 return 0;
9045
9046 /* Is the memory address adjacent and ascending? */
9047 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9048 return 0;
9049 }
9050 9243
9051 if (base) 9244 if (base)
9052 { 9245 {
9053 *base = base_reg; 9246 *base = base_reg;
9054 9247
9057 9250
9058 *load_offset = unsorted_offsets[order[0]]; 9251 *load_offset = unsorted_offsets[order[0]];
9059 } 9252 }
9060 9253
9061 if (unsorted_offsets[order[0]] == 0) 9254 if (unsorted_offsets[order[0]] == 0)
9062 return 1; /* ldmia */ 9255 ldm_case = 1; /* ldmia */
9063 9256 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9064 if (TARGET_ARM && unsorted_offsets[order[0]] == 4) 9257 ldm_case = 2; /* ldmib */
9065 return 2; /* ldmib */ 9258 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9066 9259 ldm_case = 3; /* ldmda */
9067 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) 9260 else if (unsorted_offsets[order[nops - 1]] == -4)
9068 return 3; /* ldmda */ 9261 ldm_case = 4; /* ldmdb */
9069 9262 else if (const_ok_for_arm (unsorted_offsets[order[0]])
9070 if (unsorted_offsets[order[nops - 1]] == -4) 9263 || const_ok_for_arm (-unsorted_offsets[order[0]]))
9071 return 4; /* ldmdb */ 9264 ldm_case = 5;
9072 9265 else
9073 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
9074 if the offset isn't small enough. The reason 2 ldrs are faster
9075 is because these ARMs are able to do more than one cache access
9076 in a single cycle. The ARM9 and StrongARM have Harvard caches,
9077 whilst the ARM8 has a double bandwidth cache. This means that
9078 these cores can do both an instruction fetch and a data fetch in
9079 a single cycle, so the trick of calculating the address into a
9080 scratch register (one of the result regs) and then doing a load
9081 multiple actually becomes slower (and no smaller in code size).
9082 That is the transformation
9083
9084 ldr rd1, [rbase + offset]
9085 ldr rd2, [rbase + offset + 4]
9086
9087 to
9088
9089 add rd1, rbase, offset
9090 ldmia rd1, {rd1, rd2}
9091
9092 produces worse code -- '3 cycles + any stalls on rd2' instead of
9093 '2 cycles + any stalls on rd2'. On ARMs with only one cache
9094 access per cycle, the first sequence could never complete in less
9095 than 6 cycles, whereas the ldm sequence would only take 5 and
9096 would make better use of sequential accesses if not hitting the
9097 cache.
9098
9099 We cheat here and test 'arm_ld_sched' which we currently know to
9100 only be true for the ARM8, ARM9 and StrongARM. If this ever
9101 changes, then the test below needs to be reworked. */
9102 if (nops == 2 && arm_ld_sched)
9103 return 0; 9266 return 0;
9104 9267
9105 /* Can't do it without setting up the offset, only do this if it takes 9268 if (!multiple_operation_profitable_p (false, nops,
9106 no more than one insn. */ 9269 ldm_case == 5
9107 return (const_ok_for_arm (unsorted_offsets[order[0]]) 9270 ? unsorted_offsets[order[0]] : 0))
9108 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0; 9271 return 0;
9272
9273 return ldm_case;
9109 } 9274 }
9110 9275
9111 const char * 9276 const char *
9112 emit_ldm_seq (rtx *operands, int nops) 9277 emit_ldm_seq (rtx *operands, int nops)
9113 { 9278 {
9114 int regs[4]; 9279 int regs[MAX_LDM_STM_OPS];
9115 int base_reg; 9280 int base_reg;
9116 HOST_WIDE_INT offset; 9281 HOST_WIDE_INT offset;
9117 char buf[100]; 9282 char buf[100];
9118 int i; 9283 int i;
9119 9284
9168 9333
9169 int 9334 int
9170 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base, 9335 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
9171 HOST_WIDE_INT * load_offset) 9336 HOST_WIDE_INT * load_offset)
9172 { 9337 {
9173 int unsorted_regs[4]; 9338 int unsorted_regs[MAX_LDM_STM_OPS];
9174 HOST_WIDE_INT unsorted_offsets[4]; 9339 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
9175 int order[4]; 9340 int order[MAX_LDM_STM_OPS];
9176 int base_reg = -1; 9341 int base_reg = -1;
9177 int i; 9342 int i, stm_case;
9178 9343
9179 /* Can only handle 2, 3, or 4 insns at present, though could be easily 9344 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
9180 extended if required. */ 9345 easily extended if required. */
9181 gcc_assert (nops >= 2 && nops <= 4); 9346 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
9182 9347
9183 memset (order, 0, 4 * sizeof (int)); 9348 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
9184 9349
9185 /* Loop over the operands and check that the memory references are 9350 /* Loop over the operands and check that the memory references are
9186 suitable (i.e. immediate offsets from the same base register). At 9351 suitable (i.e. immediate offsets from the same base register). At
9187 the same time, extract the target register, and the memory 9352 the same time, extract the target register, and the memory
9188 offsets. */ 9353 offsets. */
9213 || (GET_CODE (reg) == SUBREG 9378 || (GET_CODE (reg) == SUBREG
9214 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) 9379 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
9215 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) 9380 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
9216 == CONST_INT))) 9381 == CONST_INT)))
9217 { 9382 {
9383 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9384 ? REGNO (operands[i])
9385 : REGNO (SUBREG_REG (operands[i])));
9218 if (i == 0) 9386 if (i == 0)
9219 { 9387 base_reg = REGNO (reg);
9220 base_reg = REGNO (reg); 9388 else if (base_reg != (int) REGNO (reg))
9221 unsorted_regs[0] = (GET_CODE (operands[i]) == REG 9389 /* Not addressed from the same base register. */
9222 ? REGNO (operands[i]) 9390 return 0;
9223 : REGNO (SUBREG_REG (operands[i])));
9224 order[0] = 0;
9225 }
9226 else
9227 {
9228 if (base_reg != (int) REGNO (reg))
9229 /* Not addressed from the same base register. */
9230 return 0;
9231
9232 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
9233 ? REGNO (operands[i])
9234 : REGNO (SUBREG_REG (operands[i])));
9235 if (unsorted_regs[i] < unsorted_regs[order[0]])
9236 order[0] = i;
9237 }
9238 9391
9239 /* If it isn't an integer register, then we can't do this. */ 9392 /* If it isn't an integer register, then we can't do this. */
9240 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14) 9393 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
9241 return 0; 9394 return 0;
9242 9395
9243 unsorted_offsets[i] = INTVAL (offset); 9396 unsorted_offsets[i] = INTVAL (offset);
9397 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
9398 order[0] = i;
9244 } 9399 }
9245 else 9400 else
9246 /* Not a suitable memory address. */ 9401 /* Not a suitable memory address. */
9247 return 0; 9402 return 0;
9248 } 9403 }
9249 9404
9250 /* All the useful information has now been extracted from the 9405 /* All the useful information has now been extracted from the
9251 operands into unsorted_regs and unsorted_offsets; additionally, 9406 operands into unsorted_regs and unsorted_offsets; additionally,
9252 order[0] has been set to the lowest numbered register in the 9407 order[0] has been set to the lowest offset in the list. Sort
9253 list. Sort the registers into order, and check that the memory 9408 the offsets into order, verifying that they are adjacent, and
9254 offsets are ascending and adjacent. */ 9409 check that the register numbers are ascending. */
9255 9410 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs))
9256 for (i = 1; i < nops; i++) 9411 return 0;
9257 {
9258 int j;
9259
9260 order[i] = order[i - 1];
9261 for (j = 0; j < nops; j++)
9262 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
9263 && (order[i] == order[i - 1]
9264 || unsorted_regs[j] < unsorted_regs[order[i]]))
9265 order[i] = j;
9266
9267 /* Have we found a suitable register? if not, one must be used more
9268 than once. */
9269 if (order[i] == order[i - 1])
9270 return 0;
9271
9272 /* Is the memory address adjacent and ascending? */
9273 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
9274 return 0;
9275 }
9276 9412
9277 if (base) 9413 if (base)
9278 { 9414 {
9279 *base = base_reg; 9415 *base = base_reg;
9280 9416
9283 9419
9284 *load_offset = unsorted_offsets[order[0]]; 9420 *load_offset = unsorted_offsets[order[0]];
9285 } 9421 }
9286 9422
9287 if (unsorted_offsets[order[0]] == 0) 9423 if (unsorted_offsets[order[0]] == 0)
9288 return 1; /* stmia */ 9424 stm_case = 1; /* stmia */
9289 9425 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
9290 if (unsorted_offsets[order[0]] == 4) 9426 stm_case = 2; /* stmib */
9291 return 2; /* stmib */ 9427 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
9292 9428 stm_case = 3; /* stmda */
9293 if (unsorted_offsets[order[nops - 1]] == 0) 9429 else if (unsorted_offsets[order[nops - 1]] == -4)
9294 return 3; /* stmda */ 9430 stm_case = 4; /* stmdb */
9295 9431 else
9296 if (unsorted_offsets[order[nops - 1]] == -4) 9432 return 0;
9297 return 4; /* stmdb */ 9433
9298 9434 if (!multiple_operation_profitable_p (false, nops, 0))
9299 return 0; 9435 return 0;
9436
9437 return stm_case;
9300 } 9438 }
9301 9439
9302 const char * 9440 const char *
9303 emit_stm_seq (rtx *operands, int nops) 9441 emit_stm_seq (rtx *operands, int nops)
9304 { 9442 {
9305 int regs[4]; 9443 int regs[MAX_LDM_STM_OPS];
9306 int base_reg; 9444 int base_reg;
9307 HOST_WIDE_INT offset; 9445 HOST_WIDE_INT offset;
9308 char buf[100]; 9446 char buf[100];
9309 int i; 9447 int i;
9310 9448
11682 reg = gen_rtx_REG (DFmode, base_reg); 11820 reg = gen_rtx_REG (DFmode, base_reg);
11683 base_reg += 2; 11821 base_reg += 2;
11684 11822
11685 XVECEXP (par, 0, 0) 11823 XVECEXP (par, 0, 0)
11686 = gen_rtx_SET (VOIDmode, 11824 = gen_rtx_SET (VOIDmode,
11687 gen_frame_mem (BLKmode, 11825 gen_frame_mem
11688 gen_rtx_PRE_DEC (BLKmode, 11826 (BLKmode,
11689 stack_pointer_rtx)), 11827 gen_rtx_PRE_MODIFY (Pmode,
11828 stack_pointer_rtx,
11829 plus_constant
11830 (stack_pointer_rtx,
11831 - (count * 8)))
11832 ),
11690 gen_rtx_UNSPEC (BLKmode, 11833 gen_rtx_UNSPEC (BLKmode,
11691 gen_rtvec (1, reg), 11834 gen_rtvec (1, reg),
11692 UNSPEC_PUSH_MULT)); 11835 UNSPEC_PUSH_MULT));
11693 11836
11694 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, 11837 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
13751 start_reg, reg - start_reg, SP_REGNUM); 13894 start_reg, reg - start_reg, SP_REGNUM);
13752 } 13895 }
13753 13896
13754 if (TARGET_HARD_FLOAT && TARGET_VFP) 13897 if (TARGET_HARD_FLOAT && TARGET_VFP)
13755 { 13898 {
13756 start_reg = FIRST_VFP_REGNUM; 13899 int end_reg = LAST_VFP_REGNUM + 1;
13757 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) 13900
13901 /* Scan the registers in reverse order. We need to match
13902 any groupings made in the prologue and generate matching
13903 pop operations. */
13904 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2)
13758 { 13905 {
13759 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) 13906 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
13760 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) 13907 && (!df_regs_ever_live_p (reg + 1)
13908 || call_used_regs[reg + 1]))
13761 { 13909 {
13762 if (start_reg != reg) 13910 if (end_reg > reg + 2)
13763 vfp_output_fldmd (f, SP_REGNUM, 13911 vfp_output_fldmd (f, SP_REGNUM,
13764 (start_reg - FIRST_VFP_REGNUM) / 2, 13912 (reg + 2 - FIRST_VFP_REGNUM) / 2,
13765 (reg - start_reg) / 2); 13913 (end_reg - (reg + 2)) / 2);
13766 start_reg = reg + 2; 13914 end_reg = reg;
13767 } 13915 }
13768 } 13916 }
13769 if (start_reg != reg) 13917 if (end_reg > reg + 2)
13770 vfp_output_fldmd (f, SP_REGNUM, 13918 vfp_output_fldmd (f, SP_REGNUM, 0,
13771 (start_reg - FIRST_VFP_REGNUM) / 2, 13919 (end_reg - (reg + 2)) / 2);
13772 (reg - start_reg) / 2); 13920 }
13773 } 13921
13774 if (TARGET_IWMMXT) 13922 if (TARGET_IWMMXT)
13775 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++) 13923 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
13776 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 13924 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
13777 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM); 13925 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
13778 13926
13937 if (mask & (1 << PC_REGNUM)) 14085 if (mask & (1 << PC_REGNUM))
13938 num_dwarf_regs--; 14086 num_dwarf_regs--;
13939 14087
13940 /* For the body of the insn we are going to generate an UNSPEC in 14088 /* For the body of the insn we are going to generate an UNSPEC in
13941 parallel with several USEs. This allows the insn to be recognized 14089 parallel with several USEs. This allows the insn to be recognized
13942 by the push_multi pattern in the arm.md file. The insn looks 14090 by the push_multi pattern in the arm.md file.
13943 something like this: 14091
14092 The body of the insn looks something like this:
13944 14093
13945 (parallel [ 14094 (parallel [
13946 (set (mem:BLK (pre_dec:BLK (reg:SI sp))) 14095 (set (mem:BLK (pre_modify:SI (reg:SI sp)
14096 (const_int:SI <num>)))
13947 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT)) 14097 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
13948 (use (reg:SI 11 fp)) 14098 (use (reg:SI XX))
13949 (use (reg:SI 12 ip)) 14099 (use (reg:SI YY))
13950 (use (reg:SI 14 lr)) 14100 ...
13951 (use (reg:SI 15 pc))
13952 ]) 14101 ])
13953 14102
13954 For the frame note however, we try to be more explicit and actually 14103 For the frame note however, we try to be more explicit and actually
13955 show each register being stored into the stack frame, plus a (single) 14104 show each register being stored into the stack frame, plus a (single)
13956 decrement of the stack pointer. We do it this way in order to be 14105 decrement of the stack pointer. We do it this way in order to be
13959 something like this: 14108 something like this:
13960 14109
13961 (sequence [ 14110 (sequence [
13962 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20))) 14111 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
13963 (set (mem:SI (reg:SI sp)) (reg:SI r4)) 14112 (set (mem:SI (reg:SI sp)) (reg:SI r4))
13964 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp)) 14113 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
13965 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip)) 14114 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
13966 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr)) 14115 ...
13967 ]) 14116 ])
13968 14117
13969 This sequence is used both by the code to support stack unwinding for 14118 FIXME:: In an ideal world the PRE_MODIFY would not exist and
13970 exceptions handlers and the code to generate dwarf2 frame debugging. */ 14119 instead we'd have a parallel expression detailing all
14120 the stores to the various memory addresses so that debug
14121 information is more up-to-date. Remember however while writing
14122 this to take care of the constraints with the push instruction.
14123
14124 Note also that this has to be taken care of for the VFP registers.
14125
14126 For more see PR43399. */
13971 14127
13972 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs)); 14128 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
13973 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1)); 14129 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
13974 dwarf_par_index = 1; 14130 dwarf_par_index = 1;
13975 14131
13979 { 14135 {
13980 reg = gen_rtx_REG (SImode, i); 14136 reg = gen_rtx_REG (SImode, i);
13981 14137
13982 XVECEXP (par, 0, 0) 14138 XVECEXP (par, 0, 0)
13983 = gen_rtx_SET (VOIDmode, 14139 = gen_rtx_SET (VOIDmode,
13984 gen_frame_mem (BLKmode, 14140 gen_frame_mem
13985 gen_rtx_PRE_DEC (BLKmode, 14141 (BLKmode,
13986 stack_pointer_rtx)), 14142 gen_rtx_PRE_MODIFY (Pmode,
14143 stack_pointer_rtx,
14144 plus_constant
14145 (stack_pointer_rtx,
14146 -4 * num_regs))
14147 ),
13987 gen_rtx_UNSPEC (BLKmode, 14148 gen_rtx_UNSPEC (BLKmode,
13988 gen_rtvec (1, reg), 14149 gen_rtvec (1, reg),
13989 UNSPEC_PUSH_MULT)); 14150 UNSPEC_PUSH_MULT));
13990 14151
13991 if (i != PC_REGNUM) 14152 if (i != PC_REGNUM)
14012 14173
14013 if (i != PC_REGNUM) 14174 if (i != PC_REGNUM)
14014 { 14175 {
14015 tmp 14176 tmp
14016 = gen_rtx_SET (VOIDmode, 14177 = gen_rtx_SET (VOIDmode,
14017 gen_frame_mem (SImode, 14178 gen_frame_mem
14018 plus_constant (stack_pointer_rtx, 14179 (SImode,
14019 4 * j)), 14180 plus_constant (stack_pointer_rtx,
14181 4 * j)),
14020 reg); 14182 reg);
14021 RTX_FRAME_RELATED_P (tmp) = 1; 14183 RTX_FRAME_RELATED_P (tmp) = 1;
14022 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; 14184 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
14023 } 14185 }
14024 14186
14066 14228
14067 reg = gen_rtx_REG (XFmode, base_reg++); 14229 reg = gen_rtx_REG (XFmode, base_reg++);
14068 14230
14069 XVECEXP (par, 0, 0) 14231 XVECEXP (par, 0, 0)
14070 = gen_rtx_SET (VOIDmode, 14232 = gen_rtx_SET (VOIDmode,
14071 gen_frame_mem (BLKmode, 14233 gen_frame_mem
14072 gen_rtx_PRE_DEC (BLKmode, 14234 (BLKmode,
14073 stack_pointer_rtx)), 14235 gen_rtx_PRE_MODIFY (Pmode,
14236 stack_pointer_rtx,
14237 plus_constant
14238 (stack_pointer_rtx,
14239 -12 * count))
14240 ),
14074 gen_rtx_UNSPEC (BLKmode, 14241 gen_rtx_UNSPEC (BLKmode,
14075 gen_rtvec (1, reg), 14242 gen_rtvec (1, reg),
14076 UNSPEC_PUSH_MULT)); 14243 UNSPEC_PUSH_MULT));
14077 tmp = gen_rtx_SET (VOIDmode, 14244 tmp = gen_rtx_SET (VOIDmode,
14078 gen_frame_mem (XFmode, stack_pointer_rtx), reg); 14245 gen_frame_mem (XFmode, stack_pointer_rtx), reg);
14427 rtx insn; 14594 rtx insn;
14428 14595
14429 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) 14596 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
14430 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) 14597 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
14431 { 14598 {
14432 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx); 14599 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14433 insn = gen_rtx_MEM (V2SImode, insn); 14600 insn = gen_rtx_MEM (V2SImode, insn);
14434 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg)); 14601 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
14435 RTX_FRAME_RELATED_P (insn) = 1; 14602 RTX_FRAME_RELATED_P (insn) = 1;
14436 saved_size += 8; 14603 saved_size += 8;
14437 } 14604 }
14441 if (TARGET_FPA_EMU2) 14608 if (TARGET_FPA_EMU2)
14442 { 14609 {
14443 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) 14610 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
14444 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) 14611 if (df_regs_ever_live_p (reg) && !call_used_regs[reg])
14445 { 14612 {
14446 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx); 14613 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
14447 insn = gen_rtx_MEM (XFmode, insn); 14614 insn = gen_rtx_MEM (XFmode, insn);
14448 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg)); 14615 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg));
14449 RTX_FRAME_RELATED_P (insn) = 1; 14616 RTX_FRAME_RELATED_P (insn) = 1;
14450 saved_size += 12; 14617 saved_size += 12;
14451 } 14618 }
14964 15131
14965 case 'N': 15132 case 'N':
14966 { 15133 {
14967 REAL_VALUE_TYPE r; 15134 REAL_VALUE_TYPE r;
14968 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 15135 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
14969 r = REAL_VALUE_NEGATE (r); 15136 r = real_value_negate (&r);
14970 fprintf (stream, "%s", fp_const_from_val (&r)); 15137 fprintf (stream, "%s", fp_const_from_val (&r));
14971 } 15138 }
14972 return; 15139 return;
14973 15140
14974 /* An integer or symbol address without a preceding # sign. */ 15141 /* An integer or symbol address without a preceding # sign. */
19234 default: 19401 default:
19235 gcc_unreachable (); 19402 gcc_unreachable ();
19236 } 19403 }
19237 } 19404 }
19238 19405
19406 /* Given the stack offsets and register mask in OFFSETS, decide
19407 how many additional registers to push instead of subtracting
19408 a constant from SP. */
19409 static int
19410 thumb1_extra_regs_pushed (arm_stack_offsets *offsets)
19411 {
19412 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs;
19413 unsigned long live_regs_mask = offsets->saved_regs_mask;
19414 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
19415 unsigned long l_mask = live_regs_mask & 0x40ff;
19416 /* Then count how many other high registers will need to be pushed. */
19417 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
19418 int n_free;
19419
19420 /* If the stack frame size is 512 exactly, we can save one load
19421 instruction, which should make this a win even when optimizing
19422 for speed. */
19423 if (!optimize_size && amount != 512)
19424 return 0;
19425
19426 /* Can't do this if there are high registers to push, or if we
19427 are not going to do a push at all. */
19428 if (high_regs_pushed != 0 || l_mask == 0)
19429 return 0;
19430
19431 /* Don't do this if thumb1_expand_prologue wants to emit instructions
19432 between the push and the stack frame allocation. */
19433 if ((flag_pic && arm_pic_register != INVALID_REGNUM)
19434 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0))
19435 return 0;
19436
19437 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1)
19438 n_free++;
19439
19440 if (n_free == 0)
19441 return 0;
19442 gcc_assert (amount / 4 * 4 == amount);
19443
19444 if (amount >= 512 && (amount - n_free * 4) < 512)
19445 return (amount - 508) / 4;
19446 if (amount <= n_free * 4)
19447 return amount / 4;
19448 return 0;
19449 }
19450
19239 /* Generate the rest of a function's prologue. */ 19451 /* Generate the rest of a function's prologue. */
19240 void 19452 void
19241 thumb1_expand_prologue (void) 19453 thumb1_expand_prologue (void)
19242 { 19454 {
19243 rtx insn, dwarf; 19455 rtx insn, dwarf;
19270 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) 19482 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19271 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), 19483 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
19272 stack_pointer_rtx); 19484 stack_pointer_rtx);
19273 19485
19274 amount = offsets->outgoing_args - offsets->saved_regs; 19486 amount = offsets->outgoing_args - offsets->saved_regs;
19487 amount -= 4 * thumb1_extra_regs_pushed (offsets);
19275 if (amount) 19488 if (amount)
19276 { 19489 {
19277 if (amount < 512) 19490 if (amount < 512)
19278 { 19491 {
19279 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, 19492 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
19574 to push some high registers then delay our first push. This will just 19787 to push some high registers then delay our first push. This will just
19575 be a push of LR and we can combine it with the push of the first high 19788 be a push of LR and we can combine it with the push of the first high
19576 register. */ 19789 register. */
19577 else if ((l_mask & 0xff) != 0 19790 else if ((l_mask & 0xff) != 0
19578 || (high_regs_pushed == 0 && l_mask)) 19791 || (high_regs_pushed == 0 && l_mask))
19579 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask); 19792 {
19793 unsigned long mask = l_mask;
19794 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1;
19795 thumb_pushpop (f, mask, 1, &cfa_offset, mask);
19796 }
19580 19797
19581 if (high_regs_pushed) 19798 if (high_regs_pushed)
19582 { 19799 {
19583 unsigned pushable_regs; 19800 unsigned pushable_regs;
19584 unsigned next_hi_reg; 19801 unsigned next_hi_reg;
20727 return true; 20944 return true;
20728 20945
20729 return false; 20946 return false;
20730 } 20947 }
20731 20948
20949 /* Implements target hook small_register_classes_for_mode_p. */
20950 bool
20951 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED)
20952 {
20953 return TARGET_THUMB1;
20954 }
20955
20732 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal 20956 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
20733 ARM insns and therefore guarantee that the shift count is modulo 256. 20957 ARM insns and therefore guarantee that the shift count is modulo 256.
20734 DImode shifts (those implemented by lib1funcs.asm or by optabs.c) 20958 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
20735 guarantee no particular behavior for out-of-range counts. */ 20959 guarantee no particular behavior for out-of-range counts. */
20736 20960
20969 abort (); 21193 abort ();
20970 reg = REGNO (XEXP (e1, 0)); 21194 reg = REGNO (XEXP (e1, 0));
20971 offset = INTVAL (XEXP (e1, 1)); 21195 offset = INTVAL (XEXP (e1, 1));
20972 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n", 21196 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
20973 HARD_FRAME_POINTER_REGNUM, reg, 21197 HARD_FRAME_POINTER_REGNUM, reg,
20974 INTVAL (XEXP (e1, 1))); 21198 offset);
20975 } 21199 }
20976 else if (GET_CODE (e1) == REG) 21200 else if (GET_CODE (e1) == REG)
20977 { 21201 {
20978 reg = REGNO (e1); 21202 reg = REGNO (e1);
20979 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n", 21203 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
21205 fputs ("-(", fp); 21429 fputs ("-(", fp);
21206 output_addr_const (fp, XVECEXP (x, 0, 0)); 21430 output_addr_const (fp, XVECEXP (x, 0, 0));
21207 fputc (')', fp); 21431 fputc (')', fp);
21208 return TRUE; 21432 return TRUE;
21209 } 21433 }
21434 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
21435 {
21436 output_addr_const (fp, XVECEXP (x, 0, 0));
21437 if (GOT_PCREL)
21438 fputs ("+.", fp);
21439 fputs ("-(", fp);
21440 output_addr_const (fp, XVECEXP (x, 0, 1));
21441 fputc (')', fp);
21442 return TRUE;
21443 }
21210 else if (GET_CODE (x) == CONST_VECTOR) 21444 else if (GET_CODE (x) == CONST_VECTOR)
21211 return arm_emit_vector_const (fp, x); 21445 return arm_emit_vector_const (fp, x);
21212 21446
21213 return FALSE; 21447 return FALSE;
21214 } 21448 }
21249 /* Output a Thumb-1 casesi dispatch sequence. */ 21483 /* Output a Thumb-1 casesi dispatch sequence. */
21250 const char * 21484 const char *
21251 thumb1_output_casesi (rtx *operands) 21485 thumb1_output_casesi (rtx *operands)
21252 { 21486 {
21253 rtx diff_vec = PATTERN (next_real_insn (operands[0])); 21487 rtx diff_vec = PATTERN (next_real_insn (operands[0]));
21254 addr_diff_vec_flags flags;
21255 21488
21256 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); 21489 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
21257
21258 flags = ADDR_DIFF_VEC_FLAGS (diff_vec);
21259 21490
21260 switch (GET_MODE(diff_vec)) 21491 switch (GET_MODE(diff_vec))
21261 { 21492 {
21262 case QImode: 21493 case QImode:
21263 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? 21494 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
21370 has to be managled as if it is in the "std" namespace. */ 21601 has to be managled as if it is in the "std" namespace. */
21371 if (TARGET_AAPCS_BASED 21602 if (TARGET_AAPCS_BASED
21372 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) 21603 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
21373 { 21604 {
21374 static bool warned; 21605 static bool warned;
21375 if (!warned && warn_psabi) 21606 if (!warned && warn_psabi && !in_system_header)
21376 { 21607 {
21377 warned = true; 21608 warned = true;
21378 inform (input_location, 21609 inform (input_location,
21379 "the mangling of %<va_list%> has changed in GCC 4.4"); 21610 "the mangling of %<va_list%> has changed in GCC 4.4");
21380 } 21611 }