Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/arm.c @ 63:b7f97abdc517 gcc-4.6-20100522
update gcc from gcc-4.5.0 to gcc-4.6
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 24 May 2010 12:47:05 +0900 |
parents | 77e2b8dfacca |
children | f6334be47118 |
comparison
equal
deleted
inserted
replaced
56:3c8a44c06a95 | 63:b7f97abdc517 |
---|---|
1 /* Output routines for GCC for ARM. | 1 /* Output routines for GCC for ARM. |
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, | 2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, |
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 | 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
4 Free Software Foundation, Inc. | 4 Free Software Foundation, Inc. |
5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) | 5 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl) |
6 and Martin Simmons (@harleqn.co.uk). | 6 and Martin Simmons (@harleqn.co.uk). |
7 More major hacks by Richard Earnshaw (rearnsha@arm.com). | 7 More major hacks by Richard Earnshaw (rearnsha@arm.com). |
8 | 8 |
29 #include "rtl.h" | 29 #include "rtl.h" |
30 #include "tree.h" | 30 #include "tree.h" |
31 #include "obstack.h" | 31 #include "obstack.h" |
32 #include "regs.h" | 32 #include "regs.h" |
33 #include "hard-reg-set.h" | 33 #include "hard-reg-set.h" |
34 #include "real.h" | |
35 #include "insn-config.h" | 34 #include "insn-config.h" |
36 #include "conditions.h" | 35 #include "conditions.h" |
37 #include "output.h" | 36 #include "output.h" |
38 #include "insn-attr.h" | 37 #include "insn-attr.h" |
39 #include "flags.h" | 38 #include "flags.h" |
149 static int arm_address_cost (rtx, bool); | 148 static int arm_address_cost (rtx, bool); |
150 static bool arm_memory_load_p (rtx); | 149 static bool arm_memory_load_p (rtx); |
151 static bool arm_cirrus_insn_p (rtx); | 150 static bool arm_cirrus_insn_p (rtx); |
152 static void cirrus_reorg (rtx); | 151 static void cirrus_reorg (rtx); |
153 static void arm_init_builtins (void); | 152 static void arm_init_builtins (void); |
154 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); | |
155 static void arm_init_iwmmxt_builtins (void); | 153 static void arm_init_iwmmxt_builtins (void); |
156 static rtx safe_vector_operand (rtx, enum machine_mode); | 154 static rtx safe_vector_operand (rtx, enum machine_mode); |
157 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); | 155 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx); |
158 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); | 156 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int); |
159 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); | 157 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int); |
222 static bool arm_frame_pointer_required (void); | 220 static bool arm_frame_pointer_required (void); |
223 static bool arm_can_eliminate (const int, const int); | 221 static bool arm_can_eliminate (const int, const int); |
224 static void arm_asm_trampoline_template (FILE *); | 222 static void arm_asm_trampoline_template (FILE *); |
225 static void arm_trampoline_init (rtx, tree, rtx); | 223 static void arm_trampoline_init (rtx, tree, rtx); |
226 static rtx arm_trampoline_adjust_address (rtx); | 224 static rtx arm_trampoline_adjust_address (rtx); |
225 static rtx arm_pic_static_addr (rtx orig, rtx reg); | |
227 | 226 |
228 | 227 |
229 /* Table of machine attributes. */ | 228 /* Table of machine attributes. */ |
230 static const struct attribute_spec arm_attribute_table[] = | 229 static const struct attribute_spec arm_attribute_table[] = |
231 { | 230 { |
522 /* True if we are currently building a constant table. */ | 521 /* True if we are currently building a constant table. */ |
523 int making_const_table; | 522 int making_const_table; |
524 | 523 |
525 /* The processor for which instructions should be scheduled. */ | 524 /* The processor for which instructions should be scheduled. */ |
526 enum processor_type arm_tune = arm_none; | 525 enum processor_type arm_tune = arm_none; |
526 | |
527 /* The current tuning set. */ | |
528 const struct tune_params *current_tune; | |
527 | 529 |
528 /* The default processor used if not overridden by commandline. */ | 530 /* The default processor used if not overridden by commandline. */ |
529 static enum processor_type arm_default_cpu = arm_none; | 531 static enum processor_type arm_default_cpu = arm_none; |
530 | 532 |
531 /* Which floating point hardware to schedule for. */ | 533 /* Which floating point hardware to schedule for. */ |
599 #define FL_FOR_ARCH6Z FL_FOR_ARCH6 | 601 #define FL_FOR_ARCH6Z FL_FOR_ARCH6 |
600 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K | 602 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6K |
601 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) | 603 #define FL_FOR_ARCH6T2 (FL_FOR_ARCH6 | FL_THUMB2) |
602 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) | 604 #define FL_FOR_ARCH6M (FL_FOR_ARCH6 & ~FL_NOTM) |
603 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) | 605 #define FL_FOR_ARCH7 (FL_FOR_ARCH6T2 &~ FL_NOTM) |
604 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM) | 606 #define FL_FOR_ARCH7A (FL_FOR_ARCH7 | FL_NOTM | FL_ARCH6K) |
605 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) | 607 #define FL_FOR_ARCH7R (FL_FOR_ARCH7A | FL_DIV) |
606 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) | 608 #define FL_FOR_ARCH7M (FL_FOR_ARCH7 | FL_DIV) |
607 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) | 609 #define FL_FOR_ARCH7EM (FL_FOR_ARCH7M | FL_ARCH7EM) |
608 | 610 |
609 /* The bits in this mask specify which | 611 /* The bits in this mask specify which |
694 unsigned arm_pic_register = INVALID_REGNUM; | 696 unsigned arm_pic_register = INVALID_REGNUM; |
695 | 697 |
696 /* Set to 1 after arm_reorg has started. Reset to start at the start of | 698 /* Set to 1 after arm_reorg has started. Reset to start at the start of |
697 the next function. */ | 699 the next function. */ |
698 static int after_arm_reorg = 0; | 700 static int after_arm_reorg = 0; |
699 | |
700 /* The maximum number of insns to be used when loading a constant. */ | |
701 static int arm_constant_limit = 3; | |
702 | 701 |
703 static enum arm_pcs arm_pcs_default; | 702 static enum arm_pcs arm_pcs_default; |
704 | 703 |
705 /* For an explanation of these variables, see final_prescan_insn below. */ | 704 /* For an explanation of these variables, see final_prescan_insn below. */ |
706 int arm_ccfsm_state; | 705 int arm_ccfsm_state; |
736 { | 735 { |
737 const char *const name; | 736 const char *const name; |
738 enum processor_type core; | 737 enum processor_type core; |
739 const char *arch; | 738 const char *arch; |
740 const unsigned long flags; | 739 const unsigned long flags; |
741 bool (* rtx_costs) (rtx, enum rtx_code, enum rtx_code, int *, bool); | 740 const struct tune_params *const tune; |
741 }; | |
742 | |
743 const struct tune_params arm_slowmul_tune = | |
744 { | |
745 arm_slowmul_rtx_costs, | |
746 3 | |
747 }; | |
748 | |
749 const struct tune_params arm_fastmul_tune = | |
750 { | |
751 arm_fastmul_rtx_costs, | |
752 1 | |
753 }; | |
754 | |
755 const struct tune_params arm_xscale_tune = | |
756 { | |
757 arm_xscale_rtx_costs, | |
758 2 | |
759 }; | |
760 | |
761 const struct tune_params arm_9e_tune = | |
762 { | |
763 arm_9e_rtx_costs, | |
764 1 | |
742 }; | 765 }; |
743 | 766 |
744 /* Not all of these give usefully different compilation alternatives, | 767 /* Not all of these give usefully different compilation alternatives, |
745 but there is no simple way of generalizing them. */ | 768 but there is no simple way of generalizing them. */ |
746 static const struct processors all_cores[] = | 769 static const struct processors all_cores[] = |
747 { | 770 { |
748 /* ARM Cores */ | 771 /* ARM Cores */ |
749 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ | 772 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \ |
750 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs}, | 773 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune}, |
751 #include "arm-cores.def" | 774 #include "arm-cores.def" |
752 #undef ARM_CORE | 775 #undef ARM_CORE |
753 {NULL, arm_none, NULL, 0, NULL} | 776 {NULL, arm_none, NULL, 0, NULL} |
754 }; | 777 }; |
755 | 778 |
756 static const struct processors all_architectures[] = | 779 static const struct processors all_architectures[] = |
757 { | 780 { |
758 /* ARM Architectures */ | 781 /* ARM Architectures */ |
759 /* We don't specify rtx_costs here as it will be figured out | 782 /* We don't specify tuning costs here as it will be figured out |
760 from the core. */ | 783 from the core. */ |
761 | 784 |
762 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, | 785 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, |
763 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, | 786 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL}, |
764 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL}, | 787 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL}, |
903 TLS_LDO32, | 926 TLS_LDO32, |
904 TLS_IE32, | 927 TLS_IE32, |
905 TLS_LE32 | 928 TLS_LE32 |
906 }; | 929 }; |
907 | 930 |
931 /* The maximum number of insns to be used when loading a constant. */ | |
932 inline static int | |
933 arm_constant_limit (bool size_p) | |
934 { | |
935 return size_p ? 1 : current_tune->constant_limit; | |
936 } | |
937 | |
908 /* Emit an insn that's a simple single-set. Both the operands must be known | 938 /* Emit an insn that's a simple single-set. Both the operands must be known |
909 to be valid. */ | 939 to be valid. */ |
910 inline static rtx | 940 inline static rtx |
911 emit_set_insn (rtx x, rtx y) | 941 emit_set_insn (rtx x, rtx y) |
912 { | 942 { |
1443 /* The processor for which we should tune should now have been | 1473 /* The processor for which we should tune should now have been |
1444 chosen. */ | 1474 chosen. */ |
1445 gcc_assert (arm_tune != arm_none); | 1475 gcc_assert (arm_tune != arm_none); |
1446 | 1476 |
1447 tune_flags = all_cores[(int)arm_tune].flags; | 1477 tune_flags = all_cores[(int)arm_tune].flags; |
1478 current_tune = all_cores[(int)arm_tune].tune; | |
1448 | 1479 |
1449 if (target_fp16_format_name) | 1480 if (target_fp16_format_name) |
1450 { | 1481 { |
1451 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) | 1482 for (i = 0; i < ARRAY_SIZE (all_fp16_formats); i++) |
1452 { | 1483 { |
1637 { | 1668 { |
1638 arm_fpu_desc = &all_fpus[i]; | 1669 arm_fpu_desc = &all_fpus[i]; |
1639 break; | 1670 break; |
1640 } | 1671 } |
1641 } | 1672 } |
1673 | |
1642 if (!arm_fpu_desc) | 1674 if (!arm_fpu_desc) |
1643 error ("invalid floating point option: -mfpu=%s", target_fpu_name); | 1675 { |
1676 error ("invalid floating point option: -mfpu=%s", target_fpu_name); | |
1677 return; | |
1678 } | |
1644 | 1679 |
1645 switch (arm_fpu_desc->model) | 1680 switch (arm_fpu_desc->model) |
1646 { | 1681 { |
1647 case ARM_FP_MODEL_FPA: | 1682 case ARM_FP_MODEL_FPA: |
1648 if (arm_fpu_desc->rev == 2) | 1683 if (arm_fpu_desc->rev == 2) |
1756 } | 1791 } |
1757 | 1792 |
1758 /* Use the cp15 method if it is available. */ | 1793 /* Use the cp15 method if it is available. */ |
1759 if (target_thread_pointer == TP_AUTO) | 1794 if (target_thread_pointer == TP_AUTO) |
1760 { | 1795 { |
1761 if (arm_arch6k && !TARGET_THUMB) | 1796 if (arm_arch6k && !TARGET_THUMB1) |
1762 target_thread_pointer = TP_CP15; | 1797 target_thread_pointer = TP_CP15; |
1763 else | 1798 else |
1764 target_thread_pointer = TP_SOFT; | 1799 target_thread_pointer = TP_SOFT; |
1765 } | 1800 } |
1766 | 1801 |
1835 flag_schedule_insns = 0; | 1870 flag_schedule_insns = 0; |
1836 } | 1871 } |
1837 | 1872 |
1838 if (optimize_size) | 1873 if (optimize_size) |
1839 { | 1874 { |
1840 arm_constant_limit = 1; | |
1841 | |
1842 /* If optimizing for size, bump the number of instructions that we | 1875 /* If optimizing for size, bump the number of instructions that we |
1843 are prepared to conditionally execute (even on a StrongARM). */ | 1876 are prepared to conditionally execute (even on a StrongARM). */ |
1844 max_insns_skipped = 6; | 1877 max_insns_skipped = 6; |
1845 } | 1878 } |
1846 else | 1879 else |
1847 { | 1880 { |
1848 /* For processors with load scheduling, it never costs more than | |
1849 2 cycles to load a constant, and the load scheduler may well | |
1850 reduce that to 1. */ | |
1851 if (arm_ld_sched) | |
1852 arm_constant_limit = 1; | |
1853 | |
1854 /* On XScale the longer latency of a load makes it more difficult | |
1855 to achieve a good schedule, so it's faster to synthesize | |
1856 constants that can be done in two insns. */ | |
1857 if (arm_tune_xscale) | |
1858 arm_constant_limit = 2; | |
1859 | |
1860 /* StrongARM has early execution of branches, so a sequence | 1881 /* StrongARM has early execution of branches, so a sequence |
1861 that is worth skipping is shorter. */ | 1882 that is worth skipping is shorter. */ |
1862 if (arm_tune_strongarm) | 1883 if (arm_tune_strongarm) |
1863 max_insns_skipped = 3; | 1884 max_insns_skipped = 3; |
1864 } | 1885 } |
1870 inform (input_location, | 1891 inform (input_location, |
1871 "-freorder-blocks-and-partition not supported on this architecture"); | 1892 "-freorder-blocks-and-partition not supported on this architecture"); |
1872 flag_reorder_blocks_and_partition = 0; | 1893 flag_reorder_blocks_and_partition = 0; |
1873 flag_reorder_blocks = 1; | 1894 flag_reorder_blocks = 1; |
1874 } | 1895 } |
1875 | |
1876 /* Ideally we would want to use CFI directives to generate | |
1877 debug info. However this also creates the .eh_frame | |
1878 section, so disable them until GAS can handle | |
1879 this properly. See PR40521. */ | |
1880 if (TARGET_AAPCS_BASED) | |
1881 flag_dwarf2_cfi_asm = 0; | |
1882 | 1896 |
1883 /* Register global variables with the garbage collector. */ | 1897 /* Register global variables with the garbage collector. */ |
1884 arm_add_gc_roots (); | 1898 arm_add_gc_roots (); |
1885 } | 1899 } |
1886 | 1900 |
2362 */ | 2376 */ |
2363 if (!after_arm_reorg | 2377 if (!after_arm_reorg |
2364 && !cond | 2378 && !cond |
2365 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, | 2379 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source, |
2366 1, 0) | 2380 1, 0) |
2367 > arm_constant_limit + (code != SET))) | 2381 > (arm_constant_limit (optimize_function_for_size_p (cfun)) |
2382 + (code != SET)))) | |
2368 { | 2383 { |
2369 if (code == SET) | 2384 if (code == SET) |
2370 { | 2385 { |
2371 /* Currently SET is the only monadic value for CODE, all | 2386 /* Currently SET is the only monadic value for CODE, all |
2372 the rest are diadic. */ | 2387 the rest are diadic. */ |
2522 { | 2537 { |
2523 int can_invert = 0; | 2538 int can_invert = 0; |
2524 int can_negate = 0; | 2539 int can_negate = 0; |
2525 int final_invert = 0; | 2540 int final_invert = 0; |
2526 int can_negate_initial = 0; | 2541 int can_negate_initial = 0; |
2527 int can_shift = 0; | |
2528 int i; | 2542 int i; |
2529 int num_bits_set = 0; | 2543 int num_bits_set = 0; |
2530 int set_sign_bit_copies = 0; | 2544 int set_sign_bit_copies = 0; |
2531 int clear_sign_bit_copies = 0; | 2545 int clear_sign_bit_copies = 0; |
2532 int clear_zero_bit_copies = 0; | 2546 int clear_zero_bit_copies = 0; |
2541 are split. */ | 2555 are split. */ |
2542 switch (code) | 2556 switch (code) |
2543 { | 2557 { |
2544 case SET: | 2558 case SET: |
2545 can_invert = 1; | 2559 can_invert = 1; |
2546 can_shift = 1; | |
2547 can_negate = 1; | 2560 can_negate = 1; |
2548 break; | 2561 break; |
2549 | 2562 |
2550 case PLUS: | 2563 case PLUS: |
2551 can_negate = 1; | 2564 can_negate = 1; |
4779 | 4792 |
4780 if (cfun->machine->sibcall_blocked) | 4793 if (cfun->machine->sibcall_blocked) |
4781 return false; | 4794 return false; |
4782 | 4795 |
4783 /* Never tailcall something for which we have no decl, or if we | 4796 /* Never tailcall something for which we have no decl, or if we |
4784 are in Thumb mode. */ | 4797 are generating code for Thumb-1. */ |
4785 if (decl == NULL || TARGET_THUMB) | 4798 if (decl == NULL || TARGET_THUMB1) |
4786 return false; | 4799 return false; |
4787 | 4800 |
4788 /* The PIC register is live on entry to VxWorks PLT entries, so we | 4801 /* The PIC register is live on entry to VxWorks PLT entries, so we |
4789 must make the call before restoring the PIC register. */ | 4802 must make the call before restoring the PIC register. */ |
4790 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) | 4803 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl)) |
4906 if (GET_CODE (orig) == SYMBOL_REF | 4919 if (GET_CODE (orig) == SYMBOL_REF |
4907 || GET_CODE (orig) == LABEL_REF) | 4920 || GET_CODE (orig) == LABEL_REF) |
4908 { | 4921 { |
4909 rtx pic_ref, address; | 4922 rtx pic_ref, address; |
4910 rtx insn; | 4923 rtx insn; |
4911 int subregs = 0; | |
4912 | |
4913 /* If this function doesn't have a pic register, create one now. */ | |
4914 require_pic_register (); | |
4915 | 4924 |
4916 if (reg == 0) | 4925 if (reg == 0) |
4917 { | 4926 { |
4918 gcc_assert (can_create_pseudo_p ()); | 4927 gcc_assert (can_create_pseudo_p ()); |
4919 reg = gen_reg_rtx (Pmode); | 4928 reg = gen_reg_rtx (Pmode); |
4920 | 4929 address = gen_reg_rtx (Pmode); |
4921 subregs = 1; | 4930 } |
4922 } | |
4923 | |
4924 if (subregs) | |
4925 address = gen_reg_rtx (Pmode); | |
4926 else | 4931 else |
4927 address = reg; | 4932 address = reg; |
4928 | |
4929 if (TARGET_ARM) | |
4930 emit_insn (gen_pic_load_addr_arm (address, orig)); | |
4931 else if (TARGET_THUMB2) | |
4932 emit_insn (gen_pic_load_addr_thumb2 (address, orig)); | |
4933 else /* TARGET_THUMB1 */ | |
4934 emit_insn (gen_pic_load_addr_thumb1 (address, orig)); | |
4935 | 4933 |
4936 /* VxWorks does not impose a fixed gap between segments; the run-time | 4934 /* VxWorks does not impose a fixed gap between segments; the run-time |
4937 gap can be different from the object-file gap. We therefore can't | 4935 gap can be different from the object-file gap. We therefore can't |
4938 use GOTOFF unless we are absolutely sure that the symbol is in the | 4936 use GOTOFF unless we are absolutely sure that the symbol is in the |
4939 same segment as the GOT. Unfortunately, the flexibility of linker | 4937 same segment as the GOT. Unfortunately, the flexibility of linker |
4942 if ((GET_CODE (orig) == LABEL_REF | 4940 if ((GET_CODE (orig) == LABEL_REF |
4943 || (GET_CODE (orig) == SYMBOL_REF && | 4941 || (GET_CODE (orig) == SYMBOL_REF && |
4944 SYMBOL_REF_LOCAL_P (orig))) | 4942 SYMBOL_REF_LOCAL_P (orig))) |
4945 && NEED_GOT_RELOC | 4943 && NEED_GOT_RELOC |
4946 && !TARGET_VXWORKS_RTP) | 4944 && !TARGET_VXWORKS_RTP) |
4947 pic_ref = gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, address); | 4945 insn = arm_pic_static_addr (orig, reg); |
4948 else | 4946 else |
4949 { | 4947 { |
4948 /* If this function doesn't have a pic register, create one now. */ | |
4949 require_pic_register (); | |
4950 | |
4951 if (TARGET_32BIT) | |
4952 emit_insn (gen_pic_load_addr_32bit (address, orig)); | |
4953 else /* TARGET_THUMB1 */ | |
4954 emit_insn (gen_pic_load_addr_thumb1 (address, orig)); | |
4955 | |
4950 pic_ref = gen_const_mem (Pmode, | 4956 pic_ref = gen_const_mem (Pmode, |
4951 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, | 4957 gen_rtx_PLUS (Pmode, cfun->machine->pic_reg, |
4952 address)); | 4958 address)); |
4953 } | 4959 insn = emit_move_insn (reg, pic_ref); |
4954 | 4960 } |
4955 insn = emit_move_insn (reg, pic_ref); | |
4956 | 4961 |
4957 /* Put a REG_EQUAL note on this insn, so that it can be optimized | 4962 /* Put a REG_EQUAL note on this insn, so that it can be optimized |
4958 by loop. */ | 4963 by loop. */ |
4959 set_unique_reg_note (insn, REG_EQUAL, orig); | 4964 set_unique_reg_note (insn, REG_EQUAL, orig); |
4960 | 4965 |
5104 pic_reg = cfun->machine->pic_reg; | 5109 pic_reg = cfun->machine->pic_reg; |
5105 if (TARGET_VXWORKS_RTP) | 5110 if (TARGET_VXWORKS_RTP) |
5106 { | 5111 { |
5107 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); | 5112 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE); |
5108 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); | 5113 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); |
5109 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); | 5114 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); |
5110 | 5115 |
5111 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); | 5116 emit_insn (gen_rtx_SET (Pmode, pic_reg, gen_rtx_MEM (Pmode, pic_reg))); |
5112 | 5117 |
5113 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); | 5118 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); |
5114 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); | 5119 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp)); |
5127 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4); | 5132 pic_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4); |
5128 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), | 5133 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx), |
5129 UNSPEC_GOTSYM_OFF); | 5134 UNSPEC_GOTSYM_OFF); |
5130 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); | 5135 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx); |
5131 | 5136 |
5132 if (TARGET_ARM) | 5137 if (TARGET_32BIT) |
5133 { | 5138 { |
5134 emit_insn (gen_pic_load_addr_arm (pic_reg, pic_rtx)); | 5139 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx)); |
5135 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); | 5140 if (TARGET_ARM) |
5136 } | 5141 emit_insn (gen_pic_add_dot_plus_eight (pic_reg, pic_reg, labelno)); |
5137 else if (TARGET_THUMB2) | |
5138 { | |
5139 /* Thumb-2 only allows very limited access to the PC. Calculate the | |
5140 address in a temporary register. */ | |
5141 if (arm_pic_register != INVALID_REGNUM) | |
5142 { | |
5143 pic_tmp = gen_rtx_REG (SImode, | |
5144 thumb_find_work_register (saved_regs)); | |
5145 } | |
5146 else | 5142 else |
5147 { | 5143 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno)); |
5148 gcc_assert (can_create_pseudo_p ()); | |
5149 pic_tmp = gen_reg_rtx (Pmode); | |
5150 } | |
5151 | |
5152 emit_insn (gen_pic_load_addr_thumb2 (pic_reg, pic_rtx)); | |
5153 emit_insn (gen_pic_load_dot_plus_four (pic_tmp, labelno)); | |
5154 emit_insn (gen_addsi3 (pic_reg, pic_reg, pic_tmp)); | |
5155 } | 5144 } |
5156 else /* TARGET_THUMB1 */ | 5145 else /* TARGET_THUMB1 */ |
5157 { | 5146 { |
5158 if (arm_pic_register != INVALID_REGNUM | 5147 if (arm_pic_register != INVALID_REGNUM |
5159 && REGNO (pic_reg) > LAST_LO_REGNUM) | 5148 && REGNO (pic_reg) > LAST_LO_REGNUM) |
5174 /* Need to emit this whether or not we obey regdecls, | 5163 /* Need to emit this whether or not we obey regdecls, |
5175 since setjmp/longjmp can cause life info to screw up. */ | 5164 since setjmp/longjmp can cause life info to screw up. */ |
5176 emit_use (pic_reg); | 5165 emit_use (pic_reg); |
5177 } | 5166 } |
5178 | 5167 |
5168 /* Generate code to load the address of a static var when flag_pic is set. */ | |
5169 static rtx | |
5170 arm_pic_static_addr (rtx orig, rtx reg) | |
5171 { | |
5172 rtx l1, labelno, offset_rtx, insn; | |
5173 | |
5174 gcc_assert (flag_pic); | |
5175 | |
5176 /* We use an UNSPEC rather than a LABEL_REF because this label | |
5177 never appears in the code stream. */ | |
5178 labelno = GEN_INT (pic_labelno++); | |
5179 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL); | |
5180 l1 = gen_rtx_CONST (VOIDmode, l1); | |
5181 | |
5182 /* On the ARM the PC register contains 'dot + 8' at the time of the | |
5183 addition, on the Thumb it is 'dot + 4'. */ | |
5184 offset_rtx = plus_constant (l1, TARGET_ARM ? 8 : 4); | |
5185 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx), | |
5186 UNSPEC_SYMBOL_OFFSET); | |
5187 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx); | |
5188 | |
5189 if (TARGET_32BIT) | |
5190 { | |
5191 emit_insn (gen_pic_load_addr_32bit (reg, offset_rtx)); | |
5192 if (TARGET_ARM) | |
5193 insn = emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); | |
5194 else | |
5195 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); | |
5196 } | |
5197 else /* TARGET_THUMB1 */ | |
5198 { | |
5199 emit_insn (gen_pic_load_addr_thumb1 (reg, offset_rtx)); | |
5200 insn = emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); | |
5201 } | |
5202 | |
5203 return insn; | |
5204 } | |
5179 | 5205 |
5180 /* Return nonzero if X is valid as an ARM state addressing register. */ | 5206 /* Return nonzero if X is valid as an ARM state addressing register. */ |
5181 static int | 5207 static int |
5182 arm_address_register_rtx_p (rtx x, int strict_p) | 5208 arm_address_register_rtx_p (rtx x, int strict_p) |
5183 { | 5209 { |
5806 reg = load_tls_operand (sum, reg); | 5832 reg = load_tls_operand (sum, reg); |
5807 | 5833 |
5808 if (TARGET_ARM) | 5834 if (TARGET_ARM) |
5809 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); | 5835 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno)); |
5810 else if (TARGET_THUMB2) | 5836 else if (TARGET_THUMB2) |
5811 { | 5837 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
5812 rtx tmp; | |
5813 /* Thumb-2 only allows very limited access to the PC. Calculate | |
5814 the address in a temporary register. */ | |
5815 tmp = gen_reg_rtx (SImode); | |
5816 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno)); | |
5817 emit_insn (gen_addsi3(reg, reg, tmp)); | |
5818 } | |
5819 else /* TARGET_THUMB1 */ | 5838 else /* TARGET_THUMB1 */ |
5820 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); | 5839 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
5821 | 5840 |
5822 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */ | 5841 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX, LCT_PURE, /* LCT_CONST? */ |
5823 Pmode, 1, reg, Pmode); | 5842 Pmode, 1, reg, Pmode); |
5869 reg = load_tls_operand (sum, reg); | 5888 reg = load_tls_operand (sum, reg); |
5870 | 5889 |
5871 if (TARGET_ARM) | 5890 if (TARGET_ARM) |
5872 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); | 5891 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno)); |
5873 else if (TARGET_THUMB2) | 5892 else if (TARGET_THUMB2) |
5874 { | 5893 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno)); |
5875 rtx tmp; | |
5876 /* Thumb-2 only allows very limited access to the PC. Calculate | |
5877 the address in a temporary register. */ | |
5878 tmp = gen_reg_rtx (SImode); | |
5879 emit_insn (gen_pic_load_dot_plus_four (tmp, labelno)); | |
5880 emit_insn (gen_addsi3(reg, reg, tmp)); | |
5881 emit_move_insn (reg, gen_const_mem (SImode, reg)); | |
5882 } | |
5883 else | 5894 else |
5884 { | 5895 { |
5885 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); | 5896 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno)); |
5886 emit_move_insn (reg, gen_const_mem (SImode, reg)); | 5897 emit_move_insn (reg, gen_const_mem (SImode, reg)); |
5887 } | 5898 } |
6262 && INTVAL (x) < 256 && INTVAL (x) > -256) | 6273 && INTVAL (x) < 256 && INTVAL (x) > -256) |
6263 return 0; | 6274 return 0; |
6264 else if ((outer == IOR || outer == XOR || outer == AND) | 6275 else if ((outer == IOR || outer == XOR || outer == AND) |
6265 && INTVAL (x) < 256 && INTVAL (x) >= -256) | 6276 && INTVAL (x) < 256 && INTVAL (x) >= -256) |
6266 return COSTS_N_INSNS (1); | 6277 return COSTS_N_INSNS (1); |
6278 else if (outer == AND) | |
6279 { | |
6280 int i; | |
6281 /* This duplicates the tests in the andsi3 expander. */ | |
6282 for (i = 9; i <= 31; i++) | |
6283 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) | |
6284 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) | |
6285 return COSTS_N_INSNS (2); | |
6286 } | |
6267 else if (outer == ASHIFT || outer == ASHIFTRT | 6287 else if (outer == ASHIFT || outer == ASHIFTRT |
6268 || outer == LSHIFTRT) | 6288 || outer == LSHIFTRT) |
6269 return 0; | 6289 return 0; |
6270 return COSTS_N_INSNS (2); | 6290 return COSTS_N_INSNS (2); |
6271 | 6291 |
6333 { | 6353 { |
6334 enum machine_mode mode = GET_MODE (x); | 6354 enum machine_mode mode = GET_MODE (x); |
6335 enum rtx_code subcode; | 6355 enum rtx_code subcode; |
6336 rtx operand; | 6356 rtx operand; |
6337 enum rtx_code code = GET_CODE (x); | 6357 enum rtx_code code = GET_CODE (x); |
6338 int extra_cost; | |
6339 *total = 0; | 6358 *total = 0; |
6340 | 6359 |
6341 switch (code) | 6360 switch (code) |
6342 { | 6361 { |
6343 case MEM: | 6362 case MEM: |
6557 } | 6576 } |
6558 | 6577 |
6559 /* Fall through */ | 6578 /* Fall through */ |
6560 | 6579 |
6561 case AND: case XOR: case IOR: | 6580 case AND: case XOR: case IOR: |
6562 extra_cost = 0; | |
6563 | 6581 |
6564 /* Normally the frame registers will be spilt into reg+const during | 6582 /* Normally the frame registers will be spilt into reg+const during |
6565 reload, so it is a bad idea to combine them with other instructions, | 6583 reload, so it is a bad idea to combine them with other instructions, |
6566 since then they might not be moved outside of loops. As a compromise | 6584 since then they might not be moved outside of loops. As a compromise |
6567 we allow integration with ops that have a constant as their second | 6585 we allow integration with ops that have a constant as their second |
6909 *total = COSTS_N_INSNS (4); | 6927 *total = COSTS_N_INSNS (4); |
6910 return false; | 6928 return false; |
6911 } | 6929 } |
6912 } | 6930 } |
6913 | 6931 |
6932 /* Estimates the size cost of thumb1 instructions. | |
6933 For now most of the code is copied from thumb1_rtx_costs. We need more | |
6934 fine grain tuning when we have more related test cases. */ | |
6935 static inline int | |
6936 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer) | |
6937 { | |
6938 enum machine_mode mode = GET_MODE (x); | |
6939 | |
6940 switch (code) | |
6941 { | |
6942 case ASHIFT: | |
6943 case ASHIFTRT: | |
6944 case LSHIFTRT: | |
6945 case ROTATERT: | |
6946 case PLUS: | |
6947 case MINUS: | |
6948 case COMPARE: | |
6949 case NEG: | |
6950 case NOT: | |
6951 return COSTS_N_INSNS (1); | |
6952 | |
6953 case MULT: | |
6954 if (GET_CODE (XEXP (x, 1)) == CONST_INT) | |
6955 { | |
6956 /* Thumb1 mul instruction can't operate on const. We must Load it | |
6957 into a register first. */ | |
6958 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET); | |
6959 return COSTS_N_INSNS (1) + const_size; | |
6960 } | |
6961 return COSTS_N_INSNS (1); | |
6962 | |
6963 case SET: | |
6964 return (COSTS_N_INSNS (1) | |
6965 + 4 * ((GET_CODE (SET_SRC (x)) == MEM) | |
6966 + GET_CODE (SET_DEST (x)) == MEM)); | |
6967 | |
6968 case CONST_INT: | |
6969 if (outer == SET) | |
6970 { | |
6971 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256) | |
6972 return 0; | |
6973 if (thumb_shiftable_const (INTVAL (x))) | |
6974 return COSTS_N_INSNS (2); | |
6975 return COSTS_N_INSNS (3); | |
6976 } | |
6977 else if ((outer == PLUS || outer == COMPARE) | |
6978 && INTVAL (x) < 256 && INTVAL (x) > -256) | |
6979 return 0; | |
6980 else if ((outer == IOR || outer == XOR || outer == AND) | |
6981 && INTVAL (x) < 256 && INTVAL (x) >= -256) | |
6982 return COSTS_N_INSNS (1); | |
6983 else if (outer == AND) | |
6984 { | |
6985 int i; | |
6986 /* This duplicates the tests in the andsi3 expander. */ | |
6987 for (i = 9; i <= 31; i++) | |
6988 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x) | |
6989 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x)) | |
6990 return COSTS_N_INSNS (2); | |
6991 } | |
6992 else if (outer == ASHIFT || outer == ASHIFTRT | |
6993 || outer == LSHIFTRT) | |
6994 return 0; | |
6995 return COSTS_N_INSNS (2); | |
6996 | |
6997 case CONST: | |
6998 case CONST_DOUBLE: | |
6999 case LABEL_REF: | |
7000 case SYMBOL_REF: | |
7001 return COSTS_N_INSNS (3); | |
7002 | |
7003 case UDIV: | |
7004 case UMOD: | |
7005 case DIV: | |
7006 case MOD: | |
7007 return 100; | |
7008 | |
7009 case TRUNCATE: | |
7010 return 99; | |
7011 | |
7012 case AND: | |
7013 case XOR: | |
7014 case IOR: | |
7015 /* XXX guess. */ | |
7016 return 8; | |
7017 | |
7018 case MEM: | |
7019 /* XXX another guess. */ | |
7020 /* Memory costs quite a lot for the first word, but subsequent words | |
7021 load at the equivalent of a single insn each. */ | |
7022 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD) | |
7023 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) | |
7024 ? 4 : 0)); | |
7025 | |
7026 case IF_THEN_ELSE: | |
7027 /* XXX a guess. */ | |
7028 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC) | |
7029 return 14; | |
7030 return 2; | |
7031 | |
7032 case ZERO_EXTEND: | |
7033 /* XXX still guessing. */ | |
7034 switch (GET_MODE (XEXP (x, 0))) | |
7035 { | |
7036 case QImode: | |
7037 return (1 + (mode == DImode ? 4 : 0) | |
7038 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); | |
7039 | |
7040 case HImode: | |
7041 return (4 + (mode == DImode ? 4 : 0) | |
7042 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); | |
7043 | |
7044 case SImode: | |
7045 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0)); | |
7046 | |
7047 default: | |
7048 return 99; | |
7049 } | |
7050 | |
7051 default: | |
7052 return 99; | |
7053 } | |
7054 } | |
7055 | |
6914 /* RTX costs when optimizing for size. */ | 7056 /* RTX costs when optimizing for size. */ |
6915 static bool | 7057 static bool |
6916 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, | 7058 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
6917 int *total) | 7059 int *total) |
6918 { | 7060 { |
6919 enum machine_mode mode = GET_MODE (x); | 7061 enum machine_mode mode = GET_MODE (x); |
6920 if (TARGET_THUMB1) | 7062 if (TARGET_THUMB1) |
6921 { | 7063 { |
6922 /* XXX TBD. For now, use the standard costs. */ | 7064 *total = thumb1_size_rtx_costs (x, code, outer_code); |
6923 *total = thumb1_rtx_costs (x, code, outer_code); | |
6924 return true; | 7065 return true; |
6925 } | 7066 } |
6926 | 7067 |
6927 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ | 7068 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */ |
6928 switch (code) | 7069 switch (code) |
7168 { | 7309 { |
7169 if (!speed) | 7310 if (!speed) |
7170 return arm_size_rtx_costs (x, (enum rtx_code) code, | 7311 return arm_size_rtx_costs (x, (enum rtx_code) code, |
7171 (enum rtx_code) outer_code, total); | 7312 (enum rtx_code) outer_code, total); |
7172 else | 7313 else |
7173 return all_cores[(int)arm_tune].rtx_costs (x, (enum rtx_code) code, | 7314 return current_tune->rtx_costs (x, (enum rtx_code) code, |
7174 (enum rtx_code) outer_code, | 7315 (enum rtx_code) outer_code, |
7175 total, speed); | 7316 total, speed); |
7176 } | 7317 } |
7177 | 7318 |
7178 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not | 7319 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not |
7179 supported on any "slowmul" cores, so it can be ignored. */ | 7320 supported on any "slowmul" cores, so it can be ignored. */ |
7180 | 7321 |
7315 | 7456 |
7316 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, | 7457 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores, |
7317 so it can be ignored. */ | 7458 so it can be ignored. */ |
7318 | 7459 |
7319 static bool | 7460 static bool |
7320 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, int *total, bool speed) | 7461 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code, |
7462 int *total, bool speed) | |
7321 { | 7463 { |
7322 enum machine_mode mode = GET_MODE (x); | 7464 enum machine_mode mode = GET_MODE (x); |
7323 | 7465 |
7324 if (TARGET_THUMB) | 7466 if (TARGET_THUMB) |
7325 { | 7467 { |
7655 | 7797 |
7656 if (!fp_consts_inited) | 7798 if (!fp_consts_inited) |
7657 init_fp_table (); | 7799 init_fp_table (); |
7658 | 7800 |
7659 REAL_VALUE_FROM_CONST_DOUBLE (r, x); | 7801 REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
7660 r = REAL_VALUE_NEGATE (r); | 7802 r = real_value_negate (&r); |
7661 if (REAL_VALUE_MINUS_ZERO (r)) | 7803 if (REAL_VALUE_MINUS_ZERO (r)) |
7662 return 0; | 7804 return 0; |
7663 | 7805 |
7664 for (i = 0; i < 8; i++) | 7806 for (i = 0; i < 8; i++) |
7665 if (REAL_VALUES_EQUAL (r, values_fp[i])) | 7807 if (REAL_VALUES_EQUAL (r, values_fp[i])) |
7706 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) | 7848 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r)) |
7707 return -1; | 7849 return -1; |
7708 | 7850 |
7709 /* Extract sign, exponent and mantissa. */ | 7851 /* Extract sign, exponent and mantissa. */ |
7710 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; | 7852 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0; |
7711 r = REAL_VALUE_ABS (r); | 7853 r = real_value_abs (&r); |
7712 exponent = REAL_EXP (&r); | 7854 exponent = REAL_EXP (&r); |
7713 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the | 7855 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the |
7714 highest (sign) bit, with a fixed binary point at bit point_pos. | 7856 highest (sign) bit, with a fixed binary point at bit point_pos. |
7715 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 | 7857 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1 |
7716 bits for the mantissa, this may fail (low bits would be lost). */ | 7858 bits for the mantissa, this may fail (low bits would be lost). */ |
8826 default: | 8968 default: |
8827 return 0; | 8969 return 0; |
8828 } | 8970 } |
8829 } | 8971 } |
8830 | 8972 |
8831 /* Must not copy a SET whose source operand is PC-relative. */ | 8973 /* Must not copy any rtx that uses a pc-relative address. */ |
8974 | |
8975 static int | |
8976 arm_note_pic_base (rtx *x, void *date ATTRIBUTE_UNUSED) | |
8977 { | |
8978 if (GET_CODE (*x) == UNSPEC | |
8979 && XINT (*x, 1) == UNSPEC_PIC_BASE) | |
8980 return 1; | |
8981 return 0; | |
8982 } | |
8832 | 8983 |
8833 static bool | 8984 static bool |
8834 arm_cannot_copy_insn_p (rtx insn) | 8985 arm_cannot_copy_insn_p (rtx insn) |
8835 { | 8986 { |
8836 rtx pat = PATTERN (insn); | 8987 return for_each_rtx (&PATTERN (insn), arm_note_pic_base, NULL); |
8837 | |
8838 if (GET_CODE (pat) == SET) | |
8839 { | |
8840 rtx rhs = SET_SRC (pat); | |
8841 | |
8842 if (GET_CODE (rhs) == UNSPEC | |
8843 && XINT (rhs, 1) == UNSPEC_PIC_BASE) | |
8844 return TRUE; | |
8845 | |
8846 if (GET_CODE (rhs) == MEM | |
8847 && GET_CODE (XEXP (rhs, 0)) == UNSPEC | |
8848 && XINT (XEXP (rhs, 0), 1) == UNSPEC_PIC_BASE) | |
8849 return TRUE; | |
8850 } | |
8851 | |
8852 return FALSE; | |
8853 } | 8988 } |
8854 | 8989 |
8855 enum rtx_code | 8990 enum rtx_code |
8856 minmax_code (rtx x) | 8991 minmax_code (rtx x) |
8857 { | 8992 { |
8935 } | 9070 } |
8936 | 9071 |
8937 return 0; | 9072 return 0; |
8938 } | 9073 } |
8939 | 9074 |
9075 /* Return true iff it would be profitable to turn a sequence of NOPS loads | |
9076 or stores (depending on IS_STORE) into a load-multiple or store-multiple | |
9077 instruction. ADD_OFFSET is nonzero if the base address register needs | |
9078 to be modified with an add instruction before we can use it. */ | |
9079 | |
9080 static bool | |
9081 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED, | |
9082 int nops, HOST_WIDE_INT add_offset) | |
9083 { | |
9084 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm | |
9085 if the offset isn't small enough. The reason 2 ldrs are faster | |
9086 is because these ARMs are able to do more than one cache access | |
9087 in a single cycle. The ARM9 and StrongARM have Harvard caches, | |
9088 whilst the ARM8 has a double bandwidth cache. This means that | |
9089 these cores can do both an instruction fetch and a data fetch in | |
9090 a single cycle, so the trick of calculating the address into a | |
9091 scratch register (one of the result regs) and then doing a load | |
9092 multiple actually becomes slower (and no smaller in code size). | |
9093 That is the transformation | |
9094 | |
9095 ldr rd1, [rbase + offset] | |
9096 ldr rd2, [rbase + offset + 4] | |
9097 | |
9098 to | |
9099 | |
9100 add rd1, rbase, offset | |
9101 ldmia rd1, {rd1, rd2} | |
9102 | |
9103 produces worse code -- '3 cycles + any stalls on rd2' instead of | |
9104 '2 cycles + any stalls on rd2'. On ARMs with only one cache | |
9105 access per cycle, the first sequence could never complete in less | |
9106 than 6 cycles, whereas the ldm sequence would only take 5 and | |
9107 would make better use of sequential accesses if not hitting the | |
9108 cache. | |
9109 | |
9110 We cheat here and test 'arm_ld_sched' which we currently know to | |
9111 only be true for the ARM8, ARM9 and StrongARM. If this ever | |
9112 changes, then the test below needs to be reworked. */ | |
9113 if (nops == 2 && arm_ld_sched && add_offset != 0) | |
9114 return false; | |
9115 | |
9116 return true; | |
9117 } | |
9118 | |
9119 /* Subroutine of load_multiple_sequence and store_multiple_sequence. | |
9120 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute | |
9121 an array ORDER which describes the sequence to use when accessing the | |
9122 offsets that produces an ascending order. In this sequence, each | |
9123 offset must be larger by exactly 4 than the previous one. ORDER[0] | |
9124 must have been filled in with the lowest offset by the caller. | |
9125 If UNSORTED_REGS is nonnull, it is an array of register numbers that | |
9126 we use to verify that ORDER produces an ascending order of registers. | |
9127 Return true if it was possible to construct such an order, false if | |
9128 not. */ | |
9129 | |
9130 static bool | |
9131 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order, | |
9132 int *unsorted_regs) | |
9133 { | |
9134 int i; | |
9135 for (i = 1; i < nops; i++) | |
9136 { | |
9137 int j; | |
9138 | |
9139 order[i] = order[i - 1]; | |
9140 for (j = 0; j < nops; j++) | |
9141 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4) | |
9142 { | |
9143 /* We must find exactly one offset that is higher than the | |
9144 previous one by 4. */ | |
9145 if (order[i] != order[i - 1]) | |
9146 return false; | |
9147 order[i] = j; | |
9148 } | |
9149 if (order[i] == order[i - 1]) | |
9150 return false; | |
9151 /* The register numbers must be ascending. */ | |
9152 if (unsorted_regs != NULL | |
9153 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]]) | |
9154 return false; | |
9155 } | |
9156 return true; | |
9157 } | |
9158 | |
8940 int | 9159 int |
8941 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base, | 9160 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base, |
8942 HOST_WIDE_INT *load_offset) | 9161 HOST_WIDE_INT *load_offset) |
8943 { | 9162 { |
8944 int unsorted_regs[4]; | 9163 int unsorted_regs[MAX_LDM_STM_OPS]; |
8945 HOST_WIDE_INT unsorted_offsets[4]; | 9164 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; |
8946 int order[4]; | 9165 int order[MAX_LDM_STM_OPS]; |
8947 int base_reg = -1; | 9166 int base_reg = -1; |
8948 int i; | 9167 int i, ldm_case; |
8949 | 9168 |
8950 /* Can only handle 2, 3, or 4 insns at present, | 9169 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be |
8951 though could be easily extended if required. */ | 9170 easily extended if required. */ |
8952 gcc_assert (nops >= 2 && nops <= 4); | 9171 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); |
8953 | 9172 |
8954 memset (order, 0, 4 * sizeof (int)); | 9173 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); |
8955 | 9174 |
8956 /* Loop over the operands and check that the memory references are | 9175 /* Loop over the operands and check that the memory references are |
8957 suitable (i.e. immediate offsets from the same base register). At | 9176 suitable (i.e. immediate offsets from the same base register). At |
8958 the same time, extract the target register, and the memory | 9177 the same time, extract the target register, and the memory |
8959 offsets. */ | 9178 offsets. */ |
8985 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) | 9204 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) |
8986 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) | 9205 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) |
8987 == CONST_INT))) | 9206 == CONST_INT))) |
8988 { | 9207 { |
8989 if (i == 0) | 9208 if (i == 0) |
8990 { | 9209 base_reg = REGNO (reg); |
8991 base_reg = REGNO (reg); | |
8992 unsorted_regs[0] = (GET_CODE (operands[i]) == REG | |
8993 ? REGNO (operands[i]) | |
8994 : REGNO (SUBREG_REG (operands[i]))); | |
8995 order[0] = 0; | |
8996 } | |
8997 else | 9210 else |
8998 { | 9211 { |
8999 if (base_reg != (int) REGNO (reg)) | 9212 if (base_reg != (int) REGNO (reg)) |
9000 /* Not addressed from the same base register. */ | 9213 /* Not addressed from the same base register. */ |
9001 return 0; | 9214 return 0; |
9002 | |
9003 unsorted_regs[i] = (GET_CODE (operands[i]) == REG | |
9004 ? REGNO (operands[i]) | |
9005 : REGNO (SUBREG_REG (operands[i]))); | |
9006 if (unsorted_regs[i] < unsorted_regs[order[0]]) | |
9007 order[0] = i; | |
9008 } | 9215 } |
9216 unsorted_regs[i] = (GET_CODE (operands[i]) == REG | |
9217 ? REGNO (operands[i]) | |
9218 : REGNO (SUBREG_REG (operands[i]))); | |
9009 | 9219 |
9010 /* If it isn't an integer register, or if it overwrites the | 9220 /* If it isn't an integer register, or if it overwrites the |
9011 base register but isn't the last insn in the list, then | 9221 base register but isn't the last insn in the list, then |
9012 we can't do this. */ | 9222 we can't do this. */ |
9013 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14 | 9223 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14 |
9014 || (i != nops - 1 && unsorted_regs[i] == base_reg)) | 9224 || (i != nops - 1 && unsorted_regs[i] == base_reg)) |
9015 return 0; | 9225 return 0; |
9016 | 9226 |
9017 unsorted_offsets[i] = INTVAL (offset); | 9227 unsorted_offsets[i] = INTVAL (offset); |
9228 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) | |
9229 order[0] = i; | |
9018 } | 9230 } |
9019 else | 9231 else |
9020 /* Not a suitable memory address. */ | 9232 /* Not a suitable memory address. */ |
9021 return 0; | 9233 return 0; |
9022 } | 9234 } |
9023 | 9235 |
9024 /* All the useful information has now been extracted from the | 9236 /* All the useful information has now been extracted from the |
9025 operands into unsorted_regs and unsorted_offsets; additionally, | 9237 operands into unsorted_regs and unsorted_offsets; additionally, |
9026 order[0] has been set to the lowest numbered register in the | 9238 order[0] has been set to the lowest offset in the list. Sort |
9027 list. Sort the registers into order, and check that the memory | 9239 the offsets into order, verifying that they are adjacent, and |
9028 offsets are ascending and adjacent. */ | 9240 check that the register numbers are ascending. */ |
9029 | 9241 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs)) |
9030 for (i = 1; i < nops; i++) | 9242 return 0; |
9031 { | |
9032 int j; | |
9033 | |
9034 order[i] = order[i - 1]; | |
9035 for (j = 0; j < nops; j++) | |
9036 if (unsorted_regs[j] > unsorted_regs[order[i - 1]] | |
9037 && (order[i] == order[i - 1] | |
9038 || unsorted_regs[j] < unsorted_regs[order[i]])) | |
9039 order[i] = j; | |
9040 | |
9041 /* Have we found a suitable register? if not, one must be used more | |
9042 than once. */ | |
9043 if (order[i] == order[i - 1]) | |
9044 return 0; | |
9045 | |
9046 /* Is the memory address adjacent and ascending? */ | |
9047 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4) | |
9048 return 0; | |
9049 } | |
9050 | 9243 |
9051 if (base) | 9244 if (base) |
9052 { | 9245 { |
9053 *base = base_reg; | 9246 *base = base_reg; |
9054 | 9247 |
9057 | 9250 |
9058 *load_offset = unsorted_offsets[order[0]]; | 9251 *load_offset = unsorted_offsets[order[0]]; |
9059 } | 9252 } |
9060 | 9253 |
9061 if (unsorted_offsets[order[0]] == 0) | 9254 if (unsorted_offsets[order[0]] == 0) |
9062 return 1; /* ldmia */ | 9255 ldm_case = 1; /* ldmia */ |
9063 | 9256 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) |
9064 if (TARGET_ARM && unsorted_offsets[order[0]] == 4) | 9257 ldm_case = 2; /* ldmib */ |
9065 return 2; /* ldmib */ | 9258 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) |
9066 | 9259 ldm_case = 3; /* ldmda */ |
9067 if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) | 9260 else if (unsorted_offsets[order[nops - 1]] == -4) |
9068 return 3; /* ldmda */ | 9261 ldm_case = 4; /* ldmdb */ |
9069 | 9262 else if (const_ok_for_arm (unsorted_offsets[order[0]]) |
9070 if (unsorted_offsets[order[nops - 1]] == -4) | 9263 || const_ok_for_arm (-unsorted_offsets[order[0]])) |
9071 return 4; /* ldmdb */ | 9264 ldm_case = 5; |
9072 | 9265 else |
9073 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm | |
9074 if the offset isn't small enough. The reason 2 ldrs are faster | |
9075 is because these ARMs are able to do more than one cache access | |
9076 in a single cycle. The ARM9 and StrongARM have Harvard caches, | |
9077 whilst the ARM8 has a double bandwidth cache. This means that | |
9078 these cores can do both an instruction fetch and a data fetch in | |
9079 a single cycle, so the trick of calculating the address into a | |
9080 scratch register (one of the result regs) and then doing a load | |
9081 multiple actually becomes slower (and no smaller in code size). | |
9082 That is the transformation | |
9083 | |
9084 ldr rd1, [rbase + offset] | |
9085 ldr rd2, [rbase + offset + 4] | |
9086 | |
9087 to | |
9088 | |
9089 add rd1, rbase, offset | |
9090 ldmia rd1, {rd1, rd2} | |
9091 | |
9092 produces worse code -- '3 cycles + any stalls on rd2' instead of | |
9093 '2 cycles + any stalls on rd2'. On ARMs with only one cache | |
9094 access per cycle, the first sequence could never complete in less | |
9095 than 6 cycles, whereas the ldm sequence would only take 5 and | |
9096 would make better use of sequential accesses if not hitting the | |
9097 cache. | |
9098 | |
9099 We cheat here and test 'arm_ld_sched' which we currently know to | |
9100 only be true for the ARM8, ARM9 and StrongARM. If this ever | |
9101 changes, then the test below needs to be reworked. */ | |
9102 if (nops == 2 && arm_ld_sched) | |
9103 return 0; | 9266 return 0; |
9104 | 9267 |
9105 /* Can't do it without setting up the offset, only do this if it takes | 9268 if (!multiple_operation_profitable_p (false, nops, |
9106 no more than one insn. */ | 9269 ldm_case == 5 |
9107 return (const_ok_for_arm (unsorted_offsets[order[0]]) | 9270 ? unsorted_offsets[order[0]] : 0)) |
9108 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0; | 9271 return 0; |
9272 | |
9273 return ldm_case; | |
9109 } | 9274 } |
9110 | 9275 |
9111 const char * | 9276 const char * |
9112 emit_ldm_seq (rtx *operands, int nops) | 9277 emit_ldm_seq (rtx *operands, int nops) |
9113 { | 9278 { |
9114 int regs[4]; | 9279 int regs[MAX_LDM_STM_OPS]; |
9115 int base_reg; | 9280 int base_reg; |
9116 HOST_WIDE_INT offset; | 9281 HOST_WIDE_INT offset; |
9117 char buf[100]; | 9282 char buf[100]; |
9118 int i; | 9283 int i; |
9119 | 9284 |
9168 | 9333 |
9169 int | 9334 int |
9170 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base, | 9335 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base, |
9171 HOST_WIDE_INT * load_offset) | 9336 HOST_WIDE_INT * load_offset) |
9172 { | 9337 { |
9173 int unsorted_regs[4]; | 9338 int unsorted_regs[MAX_LDM_STM_OPS]; |
9174 HOST_WIDE_INT unsorted_offsets[4]; | 9339 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS]; |
9175 int order[4]; | 9340 int order[MAX_LDM_STM_OPS]; |
9176 int base_reg = -1; | 9341 int base_reg = -1; |
9177 int i; | 9342 int i, stm_case; |
9178 | 9343 |
9179 /* Can only handle 2, 3, or 4 insns at present, though could be easily | 9344 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be |
9180 extended if required. */ | 9345 easily extended if required. */ |
9181 gcc_assert (nops >= 2 && nops <= 4); | 9346 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS); |
9182 | 9347 |
9183 memset (order, 0, 4 * sizeof (int)); | 9348 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int)); |
9184 | 9349 |
9185 /* Loop over the operands and check that the memory references are | 9350 /* Loop over the operands and check that the memory references are |
9186 suitable (i.e. immediate offsets from the same base register). At | 9351 suitable (i.e. immediate offsets from the same base register). At |
9187 the same time, extract the target register, and the memory | 9352 the same time, extract the target register, and the memory |
9188 offsets. */ | 9353 offsets. */ |
9213 || (GET_CODE (reg) == SUBREG | 9378 || (GET_CODE (reg) == SUBREG |
9214 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) | 9379 && GET_CODE (reg = SUBREG_REG (reg)) == REG)) |
9215 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) | 9380 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1)) |
9216 == CONST_INT))) | 9381 == CONST_INT))) |
9217 { | 9382 { |
9383 unsorted_regs[i] = (GET_CODE (operands[i]) == REG | |
9384 ? REGNO (operands[i]) | |
9385 : REGNO (SUBREG_REG (operands[i]))); | |
9218 if (i == 0) | 9386 if (i == 0) |
9219 { | 9387 base_reg = REGNO (reg); |
9220 base_reg = REGNO (reg); | 9388 else if (base_reg != (int) REGNO (reg)) |
9221 unsorted_regs[0] = (GET_CODE (operands[i]) == REG | 9389 /* Not addressed from the same base register. */ |
9222 ? REGNO (operands[i]) | 9390 return 0; |
9223 : REGNO (SUBREG_REG (operands[i]))); | |
9224 order[0] = 0; | |
9225 } | |
9226 else | |
9227 { | |
9228 if (base_reg != (int) REGNO (reg)) | |
9229 /* Not addressed from the same base register. */ | |
9230 return 0; | |
9231 | |
9232 unsorted_regs[i] = (GET_CODE (operands[i]) == REG | |
9233 ? REGNO (operands[i]) | |
9234 : REGNO (SUBREG_REG (operands[i]))); | |
9235 if (unsorted_regs[i] < unsorted_regs[order[0]]) | |
9236 order[0] = i; | |
9237 } | |
9238 | 9391 |
9239 /* If it isn't an integer register, then we can't do this. */ | 9392 /* If it isn't an integer register, then we can't do this. */ |
9240 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14) | 9393 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14) |
9241 return 0; | 9394 return 0; |
9242 | 9395 |
9243 unsorted_offsets[i] = INTVAL (offset); | 9396 unsorted_offsets[i] = INTVAL (offset); |
9397 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]]) | |
9398 order[0] = i; | |
9244 } | 9399 } |
9245 else | 9400 else |
9246 /* Not a suitable memory address. */ | 9401 /* Not a suitable memory address. */ |
9247 return 0; | 9402 return 0; |
9248 } | 9403 } |
9249 | 9404 |
9250 /* All the useful information has now been extracted from the | 9405 /* All the useful information has now been extracted from the |
9251 operands into unsorted_regs and unsorted_offsets; additionally, | 9406 operands into unsorted_regs and unsorted_offsets; additionally, |
9252 order[0] has been set to the lowest numbered register in the | 9407 order[0] has been set to the lowest offset in the list. Sort |
9253 list. Sort the registers into order, and check that the memory | 9408 the offsets into order, verifying that they are adjacent, and |
9254 offsets are ascending and adjacent. */ | 9409 check that the register numbers are ascending. */ |
9255 | 9410 if (!compute_offset_order (nops, unsorted_offsets, order, unsorted_regs)) |
9256 for (i = 1; i < nops; i++) | 9411 return 0; |
9257 { | |
9258 int j; | |
9259 | |
9260 order[i] = order[i - 1]; | |
9261 for (j = 0; j < nops; j++) | |
9262 if (unsorted_regs[j] > unsorted_regs[order[i - 1]] | |
9263 && (order[i] == order[i - 1] | |
9264 || unsorted_regs[j] < unsorted_regs[order[i]])) | |
9265 order[i] = j; | |
9266 | |
9267 /* Have we found a suitable register? if not, one must be used more | |
9268 than once. */ | |
9269 if (order[i] == order[i - 1]) | |
9270 return 0; | |
9271 | |
9272 /* Is the memory address adjacent and ascending? */ | |
9273 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4) | |
9274 return 0; | |
9275 } | |
9276 | 9412 |
9277 if (base) | 9413 if (base) |
9278 { | 9414 { |
9279 *base = base_reg; | 9415 *base = base_reg; |
9280 | 9416 |
9283 | 9419 |
9284 *load_offset = unsorted_offsets[order[0]]; | 9420 *load_offset = unsorted_offsets[order[0]]; |
9285 } | 9421 } |
9286 | 9422 |
9287 if (unsorted_offsets[order[0]] == 0) | 9423 if (unsorted_offsets[order[0]] == 0) |
9288 return 1; /* stmia */ | 9424 stm_case = 1; /* stmia */ |
9289 | 9425 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4) |
9290 if (unsorted_offsets[order[0]] == 4) | 9426 stm_case = 2; /* stmib */ |
9291 return 2; /* stmib */ | 9427 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0) |
9292 | 9428 stm_case = 3; /* stmda */ |
9293 if (unsorted_offsets[order[nops - 1]] == 0) | 9429 else if (unsorted_offsets[order[nops - 1]] == -4) |
9294 return 3; /* stmda */ | 9430 stm_case = 4; /* stmdb */ |
9295 | 9431 else |
9296 if (unsorted_offsets[order[nops - 1]] == -4) | 9432 return 0; |
9297 return 4; /* stmdb */ | 9433 |
9298 | 9434 if (!multiple_operation_profitable_p (false, nops, 0)) |
9299 return 0; | 9435 return 0; |
9436 | |
9437 return stm_case; | |
9300 } | 9438 } |
9301 | 9439 |
9302 const char * | 9440 const char * |
9303 emit_stm_seq (rtx *operands, int nops) | 9441 emit_stm_seq (rtx *operands, int nops) |
9304 { | 9442 { |
9305 int regs[4]; | 9443 int regs[MAX_LDM_STM_OPS]; |
9306 int base_reg; | 9444 int base_reg; |
9307 HOST_WIDE_INT offset; | 9445 HOST_WIDE_INT offset; |
9308 char buf[100]; | 9446 char buf[100]; |
9309 int i; | 9447 int i; |
9310 | 9448 |
11682 reg = gen_rtx_REG (DFmode, base_reg); | 11820 reg = gen_rtx_REG (DFmode, base_reg); |
11683 base_reg += 2; | 11821 base_reg += 2; |
11684 | 11822 |
11685 XVECEXP (par, 0, 0) | 11823 XVECEXP (par, 0, 0) |
11686 = gen_rtx_SET (VOIDmode, | 11824 = gen_rtx_SET (VOIDmode, |
11687 gen_frame_mem (BLKmode, | 11825 gen_frame_mem |
11688 gen_rtx_PRE_DEC (BLKmode, | 11826 (BLKmode, |
11689 stack_pointer_rtx)), | 11827 gen_rtx_PRE_MODIFY (Pmode, |
11828 stack_pointer_rtx, | |
11829 plus_constant | |
11830 (stack_pointer_rtx, | |
11831 - (count * 8))) | |
11832 ), | |
11690 gen_rtx_UNSPEC (BLKmode, | 11833 gen_rtx_UNSPEC (BLKmode, |
11691 gen_rtvec (1, reg), | 11834 gen_rtvec (1, reg), |
11692 UNSPEC_PUSH_MULT)); | 11835 UNSPEC_PUSH_MULT)); |
11693 | 11836 |
11694 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, | 11837 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx, |
13751 start_reg, reg - start_reg, SP_REGNUM); | 13894 start_reg, reg - start_reg, SP_REGNUM); |
13752 } | 13895 } |
13753 | 13896 |
13754 if (TARGET_HARD_FLOAT && TARGET_VFP) | 13897 if (TARGET_HARD_FLOAT && TARGET_VFP) |
13755 { | 13898 { |
13756 start_reg = FIRST_VFP_REGNUM; | 13899 int end_reg = LAST_VFP_REGNUM + 1; |
13757 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2) | 13900 |
13901 /* Scan the registers in reverse order. We need to match | |
13902 any groupings made in the prologue and generate matching | |
13903 pop operations. */ | |
13904 for (reg = LAST_VFP_REGNUM - 1; reg >= FIRST_VFP_REGNUM; reg -= 2) | |
13758 { | 13905 { |
13759 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) | 13906 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg]) |
13760 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1])) | 13907 && (!df_regs_ever_live_p (reg + 1) |
13908 || call_used_regs[reg + 1])) | |
13761 { | 13909 { |
13762 if (start_reg != reg) | 13910 if (end_reg > reg + 2) |
13763 vfp_output_fldmd (f, SP_REGNUM, | 13911 vfp_output_fldmd (f, SP_REGNUM, |
13764 (start_reg - FIRST_VFP_REGNUM) / 2, | 13912 (reg + 2 - FIRST_VFP_REGNUM) / 2, |
13765 (reg - start_reg) / 2); | 13913 (end_reg - (reg + 2)) / 2); |
13766 start_reg = reg + 2; | 13914 end_reg = reg; |
13767 } | 13915 } |
13768 } | 13916 } |
13769 if (start_reg != reg) | 13917 if (end_reg > reg + 2) |
13770 vfp_output_fldmd (f, SP_REGNUM, | 13918 vfp_output_fldmd (f, SP_REGNUM, 0, |
13771 (start_reg - FIRST_VFP_REGNUM) / 2, | 13919 (end_reg - (reg + 2)) / 2); |
13772 (reg - start_reg) / 2); | 13920 } |
13773 } | 13921 |
13774 if (TARGET_IWMMXT) | 13922 if (TARGET_IWMMXT) |
13775 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++) | 13923 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++) |
13776 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) | 13924 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) |
13777 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM); | 13925 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM); |
13778 | 13926 |
13937 if (mask & (1 << PC_REGNUM)) | 14085 if (mask & (1 << PC_REGNUM)) |
13938 num_dwarf_regs--; | 14086 num_dwarf_regs--; |
13939 | 14087 |
13940 /* For the body of the insn we are going to generate an UNSPEC in | 14088 /* For the body of the insn we are going to generate an UNSPEC in |
13941 parallel with several USEs. This allows the insn to be recognized | 14089 parallel with several USEs. This allows the insn to be recognized |
13942 by the push_multi pattern in the arm.md file. The insn looks | 14090 by the push_multi pattern in the arm.md file. |
13943 something like this: | 14091 |
14092 The body of the insn looks something like this: | |
13944 | 14093 |
13945 (parallel [ | 14094 (parallel [ |
13946 (set (mem:BLK (pre_dec:BLK (reg:SI sp))) | 14095 (set (mem:BLK (pre_modify:SI (reg:SI sp) |
14096 (const_int:SI <num>))) | |
13947 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT)) | 14097 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT)) |
13948 (use (reg:SI 11 fp)) | 14098 (use (reg:SI XX)) |
13949 (use (reg:SI 12 ip)) | 14099 (use (reg:SI YY)) |
13950 (use (reg:SI 14 lr)) | 14100 ... |
13951 (use (reg:SI 15 pc)) | |
13952 ]) | 14101 ]) |
13953 | 14102 |
13954 For the frame note however, we try to be more explicit and actually | 14103 For the frame note however, we try to be more explicit and actually |
13955 show each register being stored into the stack frame, plus a (single) | 14104 show each register being stored into the stack frame, plus a (single) |
13956 decrement of the stack pointer. We do it this way in order to be | 14105 decrement of the stack pointer. We do it this way in order to be |
13959 something like this: | 14108 something like this: |
13960 | 14109 |
13961 (sequence [ | 14110 (sequence [ |
13962 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20))) | 14111 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20))) |
13963 (set (mem:SI (reg:SI sp)) (reg:SI r4)) | 14112 (set (mem:SI (reg:SI sp)) (reg:SI r4)) |
13964 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp)) | 14113 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX)) |
13965 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip)) | 14114 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY)) |
13966 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr)) | 14115 ... |
13967 ]) | 14116 ]) |
13968 | 14117 |
13969 This sequence is used both by the code to support stack unwinding for | 14118 FIXME:: In an ideal world the PRE_MODIFY would not exist and |
13970 exceptions handlers and the code to generate dwarf2 frame debugging. */ | 14119 instead we'd have a parallel expression detailing all |
14120 the stores to the various memory addresses so that debug | |
14121 information is more up-to-date. Remember however while writing | |
14122 this to take care of the constraints with the push instruction. | |
14123 | |
14124 Note also that this has to be taken care of for the VFP registers. | |
14125 | |
14126 For more see PR43399. */ | |
13971 | 14127 |
13972 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs)); | 14128 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs)); |
13973 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1)); | 14129 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1)); |
13974 dwarf_par_index = 1; | 14130 dwarf_par_index = 1; |
13975 | 14131 |
13979 { | 14135 { |
13980 reg = gen_rtx_REG (SImode, i); | 14136 reg = gen_rtx_REG (SImode, i); |
13981 | 14137 |
13982 XVECEXP (par, 0, 0) | 14138 XVECEXP (par, 0, 0) |
13983 = gen_rtx_SET (VOIDmode, | 14139 = gen_rtx_SET (VOIDmode, |
13984 gen_frame_mem (BLKmode, | 14140 gen_frame_mem |
13985 gen_rtx_PRE_DEC (BLKmode, | 14141 (BLKmode, |
13986 stack_pointer_rtx)), | 14142 gen_rtx_PRE_MODIFY (Pmode, |
14143 stack_pointer_rtx, | |
14144 plus_constant | |
14145 (stack_pointer_rtx, | |
14146 -4 * num_regs)) | |
14147 ), | |
13987 gen_rtx_UNSPEC (BLKmode, | 14148 gen_rtx_UNSPEC (BLKmode, |
13988 gen_rtvec (1, reg), | 14149 gen_rtvec (1, reg), |
13989 UNSPEC_PUSH_MULT)); | 14150 UNSPEC_PUSH_MULT)); |
13990 | 14151 |
13991 if (i != PC_REGNUM) | 14152 if (i != PC_REGNUM) |
14012 | 14173 |
14013 if (i != PC_REGNUM) | 14174 if (i != PC_REGNUM) |
14014 { | 14175 { |
14015 tmp | 14176 tmp |
14016 = gen_rtx_SET (VOIDmode, | 14177 = gen_rtx_SET (VOIDmode, |
14017 gen_frame_mem (SImode, | 14178 gen_frame_mem |
14018 plus_constant (stack_pointer_rtx, | 14179 (SImode, |
14019 4 * j)), | 14180 plus_constant (stack_pointer_rtx, |
14181 4 * j)), | |
14020 reg); | 14182 reg); |
14021 RTX_FRAME_RELATED_P (tmp) = 1; | 14183 RTX_FRAME_RELATED_P (tmp) = 1; |
14022 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; | 14184 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp; |
14023 } | 14185 } |
14024 | 14186 |
14066 | 14228 |
14067 reg = gen_rtx_REG (XFmode, base_reg++); | 14229 reg = gen_rtx_REG (XFmode, base_reg++); |
14068 | 14230 |
14069 XVECEXP (par, 0, 0) | 14231 XVECEXP (par, 0, 0) |
14070 = gen_rtx_SET (VOIDmode, | 14232 = gen_rtx_SET (VOIDmode, |
14071 gen_frame_mem (BLKmode, | 14233 gen_frame_mem |
14072 gen_rtx_PRE_DEC (BLKmode, | 14234 (BLKmode, |
14073 stack_pointer_rtx)), | 14235 gen_rtx_PRE_MODIFY (Pmode, |
14236 stack_pointer_rtx, | |
14237 plus_constant | |
14238 (stack_pointer_rtx, | |
14239 -12 * count)) | |
14240 ), | |
14074 gen_rtx_UNSPEC (BLKmode, | 14241 gen_rtx_UNSPEC (BLKmode, |
14075 gen_rtvec (1, reg), | 14242 gen_rtvec (1, reg), |
14076 UNSPEC_PUSH_MULT)); | 14243 UNSPEC_PUSH_MULT)); |
14077 tmp = gen_rtx_SET (VOIDmode, | 14244 tmp = gen_rtx_SET (VOIDmode, |
14078 gen_frame_mem (XFmode, stack_pointer_rtx), reg); | 14245 gen_frame_mem (XFmode, stack_pointer_rtx), reg); |
14427 rtx insn; | 14594 rtx insn; |
14428 | 14595 |
14429 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) | 14596 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--) |
14430 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) | 14597 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg]) |
14431 { | 14598 { |
14432 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx); | 14599 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); |
14433 insn = gen_rtx_MEM (V2SImode, insn); | 14600 insn = gen_rtx_MEM (V2SImode, insn); |
14434 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg)); | 14601 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg)); |
14435 RTX_FRAME_RELATED_P (insn) = 1; | 14602 RTX_FRAME_RELATED_P (insn) = 1; |
14436 saved_size += 8; | 14603 saved_size += 8; |
14437 } | 14604 } |
14441 if (TARGET_FPA_EMU2) | 14608 if (TARGET_FPA_EMU2) |
14442 { | 14609 { |
14443 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) | 14610 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--) |
14444 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) | 14611 if (df_regs_ever_live_p (reg) && !call_used_regs[reg]) |
14445 { | 14612 { |
14446 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx); | 14613 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx); |
14447 insn = gen_rtx_MEM (XFmode, insn); | 14614 insn = gen_rtx_MEM (XFmode, insn); |
14448 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg)); | 14615 insn = emit_set_insn (insn, gen_rtx_REG (XFmode, reg)); |
14449 RTX_FRAME_RELATED_P (insn) = 1; | 14616 RTX_FRAME_RELATED_P (insn) = 1; |
14450 saved_size += 12; | 14617 saved_size += 12; |
14451 } | 14618 } |
14964 | 15131 |
14965 case 'N': | 15132 case 'N': |
14966 { | 15133 { |
14967 REAL_VALUE_TYPE r; | 15134 REAL_VALUE_TYPE r; |
14968 REAL_VALUE_FROM_CONST_DOUBLE (r, x); | 15135 REAL_VALUE_FROM_CONST_DOUBLE (r, x); |
14969 r = REAL_VALUE_NEGATE (r); | 15136 r = real_value_negate (&r); |
14970 fprintf (stream, "%s", fp_const_from_val (&r)); | 15137 fprintf (stream, "%s", fp_const_from_val (&r)); |
14971 } | 15138 } |
14972 return; | 15139 return; |
14973 | 15140 |
14974 /* An integer or symbol address without a preceding # sign. */ | 15141 /* An integer or symbol address without a preceding # sign. */ |
19234 default: | 19401 default: |
19235 gcc_unreachable (); | 19402 gcc_unreachable (); |
19236 } | 19403 } |
19237 } | 19404 } |
19238 | 19405 |
19406 /* Given the stack offsets and register mask in OFFSETS, decide | |
19407 how many additional registers to push instead of subtracting | |
19408 a constant from SP. */ | |
19409 static int | |
19410 thumb1_extra_regs_pushed (arm_stack_offsets *offsets) | |
19411 { | |
19412 HOST_WIDE_INT amount = offsets->outgoing_args - offsets->saved_regs; | |
19413 unsigned long live_regs_mask = offsets->saved_regs_mask; | |
19414 /* Extract a mask of the ones we can give to the Thumb's push instruction. */ | |
19415 unsigned long l_mask = live_regs_mask & 0x40ff; | |
19416 /* Then count how many other high registers will need to be pushed. */ | |
19417 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00); | |
19418 int n_free; | |
19419 | |
19420 /* If the stack frame size is 512 exactly, we can save one load | |
19421 instruction, which should make this a win even when optimizing | |
19422 for speed. */ | |
19423 if (!optimize_size && amount != 512) | |
19424 return 0; | |
19425 | |
19426 /* Can't do this if there are high registers to push, or if we | |
19427 are not going to do a push at all. */ | |
19428 if (high_regs_pushed != 0 || l_mask == 0) | |
19429 return 0; | |
19430 | |
19431 /* Don't do this if thumb1_expand_prologue wants to emit instructions | |
19432 between the push and the stack frame allocation. */ | |
19433 if ((flag_pic && arm_pic_register != INVALID_REGNUM) | |
19434 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)) | |
19435 return 0; | |
19436 | |
19437 for (n_free = 0; n_free < 8 && !(live_regs_mask & 1); live_regs_mask >>= 1) | |
19438 n_free++; | |
19439 | |
19440 if (n_free == 0) | |
19441 return 0; | |
19442 gcc_assert (amount / 4 * 4 == amount); | |
19443 | |
19444 if (amount >= 512 && (amount - n_free * 4) < 512) | |
19445 return (amount - 508) / 4; | |
19446 if (amount <= n_free * 4) | |
19447 return amount / 4; | |
19448 return 0; | |
19449 } | |
19450 | |
19239 /* Generate the rest of a function's prologue. */ | 19451 /* Generate the rest of a function's prologue. */ |
19240 void | 19452 void |
19241 thumb1_expand_prologue (void) | 19453 thumb1_expand_prologue (void) |
19242 { | 19454 { |
19243 rtx insn, dwarf; | 19455 rtx insn, dwarf; |
19270 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) | 19482 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0) |
19271 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), | 19483 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM), |
19272 stack_pointer_rtx); | 19484 stack_pointer_rtx); |
19273 | 19485 |
19274 amount = offsets->outgoing_args - offsets->saved_regs; | 19486 amount = offsets->outgoing_args - offsets->saved_regs; |
19487 amount -= 4 * thumb1_extra_regs_pushed (offsets); | |
19275 if (amount) | 19488 if (amount) |
19276 { | 19489 { |
19277 if (amount < 512) | 19490 if (amount < 512) |
19278 { | 19491 { |
19279 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, | 19492 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, |
19574 to push some high registers then delay our first push. This will just | 19787 to push some high registers then delay our first push. This will just |
19575 be a push of LR and we can combine it with the push of the first high | 19788 be a push of LR and we can combine it with the push of the first high |
19576 register. */ | 19789 register. */ |
19577 else if ((l_mask & 0xff) != 0 | 19790 else if ((l_mask & 0xff) != 0 |
19578 || (high_regs_pushed == 0 && l_mask)) | 19791 || (high_regs_pushed == 0 && l_mask)) |
19579 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask); | 19792 { |
19793 unsigned long mask = l_mask; | |
19794 mask |= (1 << thumb1_extra_regs_pushed (offsets)) - 1; | |
19795 thumb_pushpop (f, mask, 1, &cfa_offset, mask); | |
19796 } | |
19580 | 19797 |
19581 if (high_regs_pushed) | 19798 if (high_regs_pushed) |
19582 { | 19799 { |
19583 unsigned pushable_regs; | 19800 unsigned pushable_regs; |
19584 unsigned next_hi_reg; | 19801 unsigned next_hi_reg; |
20727 return true; | 20944 return true; |
20728 | 20945 |
20729 return false; | 20946 return false; |
20730 } | 20947 } |
20731 | 20948 |
20949 /* Implements target hook small_register_classes_for_mode_p. */ | |
20950 bool | |
20951 arm_small_register_classes_for_mode_p (enum machine_mode mode ATTRIBUTE_UNUSED) | |
20952 { | |
20953 return TARGET_THUMB1; | |
20954 } | |
20955 | |
20732 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal | 20956 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal |
20733 ARM insns and therefore guarantee that the shift count is modulo 256. | 20957 ARM insns and therefore guarantee that the shift count is modulo 256. |
20734 DImode shifts (those implemented by lib1funcs.asm or by optabs.c) | 20958 DImode shifts (those implemented by lib1funcs.asm or by optabs.c) |
20735 guarantee no particular behavior for out-of-range counts. */ | 20959 guarantee no particular behavior for out-of-range counts. */ |
20736 | 20960 |
20969 abort (); | 21193 abort (); |
20970 reg = REGNO (XEXP (e1, 0)); | 21194 reg = REGNO (XEXP (e1, 0)); |
20971 offset = INTVAL (XEXP (e1, 1)); | 21195 offset = INTVAL (XEXP (e1, 1)); |
20972 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n", | 21196 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n", |
20973 HARD_FRAME_POINTER_REGNUM, reg, | 21197 HARD_FRAME_POINTER_REGNUM, reg, |
20974 INTVAL (XEXP (e1, 1))); | 21198 offset); |
20975 } | 21199 } |
20976 else if (GET_CODE (e1) == REG) | 21200 else if (GET_CODE (e1) == REG) |
20977 { | 21201 { |
20978 reg = REGNO (e1); | 21202 reg = REGNO (e1); |
20979 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n", | 21203 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n", |
21205 fputs ("-(", fp); | 21429 fputs ("-(", fp); |
21206 output_addr_const (fp, XVECEXP (x, 0, 0)); | 21430 output_addr_const (fp, XVECEXP (x, 0, 0)); |
21207 fputc (')', fp); | 21431 fputc (')', fp); |
21208 return TRUE; | 21432 return TRUE; |
21209 } | 21433 } |
21434 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET) | |
21435 { | |
21436 output_addr_const (fp, XVECEXP (x, 0, 0)); | |
21437 if (GOT_PCREL) | |
21438 fputs ("+.", fp); | |
21439 fputs ("-(", fp); | |
21440 output_addr_const (fp, XVECEXP (x, 0, 1)); | |
21441 fputc (')', fp); | |
21442 return TRUE; | |
21443 } | |
21210 else if (GET_CODE (x) == CONST_VECTOR) | 21444 else if (GET_CODE (x) == CONST_VECTOR) |
21211 return arm_emit_vector_const (fp, x); | 21445 return arm_emit_vector_const (fp, x); |
21212 | 21446 |
21213 return FALSE; | 21447 return FALSE; |
21214 } | 21448 } |
21249 /* Output a Thumb-1 casesi dispatch sequence. */ | 21483 /* Output a Thumb-1 casesi dispatch sequence. */ |
21250 const char * | 21484 const char * |
21251 thumb1_output_casesi (rtx *operands) | 21485 thumb1_output_casesi (rtx *operands) |
21252 { | 21486 { |
21253 rtx diff_vec = PATTERN (next_real_insn (operands[0])); | 21487 rtx diff_vec = PATTERN (next_real_insn (operands[0])); |
21254 addr_diff_vec_flags flags; | |
21255 | 21488 |
21256 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); | 21489 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); |
21257 | |
21258 flags = ADDR_DIFF_VEC_FLAGS (diff_vec); | |
21259 | 21490 |
21260 switch (GET_MODE(diff_vec)) | 21491 switch (GET_MODE(diff_vec)) |
21261 { | 21492 { |
21262 case QImode: | 21493 case QImode: |
21263 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? | 21494 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ? |
21370 has to be managled as if it is in the "std" namespace. */ | 21601 has to be managled as if it is in the "std" namespace. */ |
21371 if (TARGET_AAPCS_BASED | 21602 if (TARGET_AAPCS_BASED |
21372 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) | 21603 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type)) |
21373 { | 21604 { |
21374 static bool warned; | 21605 static bool warned; |
21375 if (!warned && warn_psabi) | 21606 if (!warned && warn_psabi && !in_system_header) |
21376 { | 21607 { |
21377 warned = true; | 21608 warned = true; |
21378 inform (input_location, | 21609 inform (input_location, |
21379 "the mangling of %<va_list%> has changed in GCC 4.4"); | 21610 "the mangling of %<va_list%> has changed in GCC 4.4"); |
21380 } | 21611 } |