Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/i386/i386.c @ 63:b7f97abdc517 gcc-4.6-20100522
update gcc from gcc-4.5.0 to gcc-4.6
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 24 May 2010 12:47:05 +0900 |
parents | 77e2b8dfacca |
children | f6334be47118 |
comparison
equal
deleted
inserted
replaced
56:3c8a44c06a95 | 63:b7f97abdc517 |
---|---|
1 /* Subroutines used for code generation on IA-32. | 1 /* Subroutines used for code generation on IA-32. |
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, | 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, |
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009 | 3 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 |
4 Free Software Foundation, Inc. | 4 Free Software Foundation, Inc. |
5 | 5 |
6 This file is part of GCC. | 6 This file is part of GCC. |
7 | 7 |
8 GCC is free software; you can redistribute it and/or modify | 8 GCC is free software; you can redistribute it and/or modify |
26 #include "rtl.h" | 26 #include "rtl.h" |
27 #include "tree.h" | 27 #include "tree.h" |
28 #include "tm_p.h" | 28 #include "tm_p.h" |
29 #include "regs.h" | 29 #include "regs.h" |
30 #include "hard-reg-set.h" | 30 #include "hard-reg-set.h" |
31 #include "real.h" | |
32 #include "insn-config.h" | 31 #include "insn-config.h" |
33 #include "conditions.h" | 32 #include "conditions.h" |
34 #include "output.h" | 33 #include "output.h" |
35 #include "insn-codes.h" | 34 #include "insn-codes.h" |
36 #include "insn-attr.h" | 35 #include "insn-attr.h" |
51 #include "dwarf2.h" | 50 #include "dwarf2.h" |
52 #include "df.h" | 51 #include "df.h" |
53 #include "tm-constrs.h" | 52 #include "tm-constrs.h" |
54 #include "params.h" | 53 #include "params.h" |
55 #include "cselib.h" | 54 #include "cselib.h" |
55 #include "debug.h" | |
56 #include "dwarf2out.h" | |
56 | 57 |
57 static rtx legitimize_dllimport_symbol (rtx, bool); | 58 static rtx legitimize_dllimport_symbol (rtx, bool); |
58 | 59 |
59 #ifndef CHECK_STACK_LIMIT | 60 #ifndef CHECK_STACK_LIMIT |
60 #define CHECK_STACK_LIMIT (-1) | 61 #define CHECK_STACK_LIMIT (-1) |
815 2, /* vec_store_cost. */ | 816 2, /* vec_store_cost. */ |
816 2, /* cond_taken_branch_cost. */ | 817 2, /* cond_taken_branch_cost. */ |
817 1, /* cond_not_taken_branch_cost. */ | 818 1, /* cond_not_taken_branch_cost. */ |
818 }; | 819 }; |
819 | 820 |
821 struct processor_costs bdver1_cost = { | |
822 COSTS_N_INSNS (1), /* cost of an add instruction */ | |
823 COSTS_N_INSNS (2), /* cost of a lea instruction */ | |
824 COSTS_N_INSNS (1), /* variable shift costs */ | |
825 COSTS_N_INSNS (1), /* constant shift costs */ | |
826 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ | |
827 COSTS_N_INSNS (4), /* HI */ | |
828 COSTS_N_INSNS (3), /* SI */ | |
829 COSTS_N_INSNS (4), /* DI */ | |
830 COSTS_N_INSNS (5)}, /* other */ | |
831 0, /* cost of multiply per each bit set */ | |
832 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ | |
833 COSTS_N_INSNS (35), /* HI */ | |
834 COSTS_N_INSNS (51), /* SI */ | |
835 COSTS_N_INSNS (83), /* DI */ | |
836 COSTS_N_INSNS (83)}, /* other */ | |
837 COSTS_N_INSNS (1), /* cost of movsx */ | |
838 COSTS_N_INSNS (1), /* cost of movzx */ | |
839 8, /* "large" insn */ | |
840 9, /* MOVE_RATIO */ | |
841 4, /* cost for loading QImode using movzbl */ | |
842 {3, 4, 3}, /* cost of loading integer registers | |
843 in QImode, HImode and SImode. | |
844 Relative to reg-reg move (2). */ | |
845 {3, 4, 3}, /* cost of storing integer registers */ | |
846 4, /* cost of reg,reg fld/fst */ | |
847 {4, 4, 12}, /* cost of loading fp registers | |
848 in SFmode, DFmode and XFmode */ | |
849 {6, 6, 8}, /* cost of storing fp registers | |
850 in SFmode, DFmode and XFmode */ | |
851 2, /* cost of moving MMX register */ | |
852 {3, 3}, /* cost of loading MMX registers | |
853 in SImode and DImode */ | |
854 {4, 4}, /* cost of storing MMX registers | |
855 in SImode and DImode */ | |
856 2, /* cost of moving SSE register */ | |
857 {4, 4, 3}, /* cost of loading SSE registers | |
858 in SImode, DImode and TImode */ | |
859 {4, 4, 5}, /* cost of storing SSE registers | |
860 in SImode, DImode and TImode */ | |
861 3, /* MMX or SSE register to integer */ | |
862 /* On K8 | |
863 MOVD reg64, xmmreg Double FSTORE 4 | |
864 MOVD reg32, xmmreg Double FSTORE 4 | |
865 On AMDFAM10 | |
866 MOVD reg64, xmmreg Double FADD 3 | |
867 1/1 1/1 | |
868 MOVD reg32, xmmreg Double FADD 3 | |
869 1/1 1/1 */ | |
870 64, /* size of l1 cache. */ | |
871 1024, /* size of l2 cache. */ | |
872 64, /* size of prefetch block */ | |
873 /* New AMD processors never drop prefetches; if they cannot be performed | |
874 immediately, they are queued. We set number of simultaneous prefetches | |
875 to a large constant to reflect this (it probably is not a good idea not | |
876 to limit number of prefetches at all, as their execution also takes some | |
877 time). */ | |
878 100, /* number of parallel prefetches */ | |
879 2, /* Branch cost */ | |
880 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ | |
881 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ | |
882 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ | |
883 COSTS_N_INSNS (2), /* cost of FABS instruction. */ | |
884 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ | |
885 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ | |
886 | |
887 /* BDVER1 has optimized REP instruction for medium sized blocks, but for | |
888 very small blocks it is better to use loop. For large blocks, libcall can | |
889 do nontemporary accesses and beat inline considerably. */ | |
890 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, | |
891 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, | |
892 {{libcall, {{8, loop}, {24, unrolled_loop}, | |
893 {2048, rep_prefix_4_byte}, {-1, libcall}}}, | |
894 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, | |
895 4, /* scalar_stmt_cost. */ | |
896 2, /* scalar load_cost. */ | |
897 2, /* scalar_store_cost. */ | |
898 6, /* vec_stmt_cost. */ | |
899 0, /* vec_to_scalar_cost. */ | |
900 2, /* scalar_to_vec_cost. */ | |
901 2, /* vec_align_load_cost. */ | |
902 2, /* vec_unalign_load_cost. */ | |
903 2, /* vec_store_cost. */ | |
904 2, /* cond_taken_branch_cost. */ | |
905 1, /* cond_not_taken_branch_cost. */ | |
906 }; | |
907 | |
820 static const | 908 static const |
821 struct processor_costs pentium4_cost = { | 909 struct processor_costs pentium4_cost = { |
822 COSTS_N_INSNS (1), /* cost of an add instruction */ | 910 COSTS_N_INSNS (1), /* cost of an add instruction */ |
823 COSTS_N_INSNS (3), /* cost of a lea instruction */ | 911 COSTS_N_INSNS (3), /* cost of a lea instruction */ |
824 COSTS_N_INSNS (4), /* variable shift costs */ | 912 COSTS_N_INSNS (4), /* variable shift costs */ |
1272 #define m_K6_GEODE (m_K6 | m_GEODE) | 1360 #define m_K6_GEODE (m_K6 | m_GEODE) |
1273 #define m_K8 (1<<PROCESSOR_K8) | 1361 #define m_K8 (1<<PROCESSOR_K8) |
1274 #define m_ATHLON (1<<PROCESSOR_ATHLON) | 1362 #define m_ATHLON (1<<PROCESSOR_ATHLON) |
1275 #define m_ATHLON_K8 (m_K8 | m_ATHLON) | 1363 #define m_ATHLON_K8 (m_K8 | m_ATHLON) |
1276 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) | 1364 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) |
1277 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10) | 1365 #define m_BDVER1 (1<<PROCESSOR_BDVER1) |
1366 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10 | m_BDVER1) | |
1278 | 1367 |
1279 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32) | 1368 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32) |
1280 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64) | 1369 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64) |
1281 | 1370 |
1282 /* Generic instruction choice should be common subset of supported CPUs | 1371 /* Generic instruction choice should be common subset of supported CPUs |
1317 | 1406 |
1318 /* X86_TUNE_DOUBLE_WITH_ADD */ | 1407 /* X86_TUNE_DOUBLE_WITH_ADD */ |
1319 ~m_386, | 1408 ~m_386, |
1320 | 1409 |
1321 /* X86_TUNE_USE_SAHF */ | 1410 /* X86_TUNE_USE_SAHF */ |
1322 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4 | 1411 m_ATOM | m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER1 | m_PENT4 |
1323 | m_NOCONA | m_CORE2 | m_GENERIC, | 1412 | m_NOCONA | m_CORE2 | m_GENERIC, |
1324 | 1413 |
1325 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid | 1414 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid |
1326 partial dependencies. */ | 1415 partial dependencies. */ |
1327 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA | 1416 m_AMD_MULTIPLE | m_ATOM | m_PPRO | m_PENT4 | m_NOCONA |
1421 results in one extra microop on 64bit SSE units. Experimental results | 1510 results in one extra microop on 64bit SSE units. Experimental results |
1422 shows that disabling this option on P4 brings over 20% SPECfp regression, | 1511 shows that disabling this option on P4 brings over 20% SPECfp regression, |
1423 while enabling it on K8 brings roughly 2.4% regression that can be partly | 1512 while enabling it on K8 brings roughly 2.4% regression that can be partly |
1424 masked by careful scheduling of moves. */ | 1513 masked by careful scheduling of moves. */ |
1425 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | 1514 m_ATOM | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC |
1426 | m_AMDFAM10, | 1515 | m_AMDFAM10 | m_BDVER1, |
1427 | 1516 |
1428 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */ | 1517 /* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */ |
1429 m_AMDFAM10, | 1518 m_AMDFAM10 | m_BDVER1, |
1519 | |
1520 /* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */ | |
1521 m_BDVER1, | |
1522 | |
1523 /* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */ | |
1524 m_BDVER1, | |
1430 | 1525 |
1431 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies | 1526 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies |
1432 are resolved on SSE register parts instead of whole registers, so we may | 1527 are resolved on SSE register parts instead of whole registers, so we may |
1433 maintain just lower part of scalar values in proper format leaving the | 1528 maintain just lower part of scalar values in proper format leaving the |
1434 upper part undefined. */ | 1529 upper part undefined. */ |
1454 | 1549 |
1455 /* X86_TUNE_USE_FFREEP */ | 1550 /* X86_TUNE_USE_FFREEP */ |
1456 m_AMD_MULTIPLE, | 1551 m_AMD_MULTIPLE, |
1457 | 1552 |
1458 /* X86_TUNE_INTER_UNIT_MOVES */ | 1553 /* X86_TUNE_INTER_UNIT_MOVES */ |
1459 ~(m_AMD_MULTIPLE | m_ATOM | m_GENERIC), | 1554 ~(m_AMD_MULTIPLE | m_GENERIC), |
1460 | 1555 |
1461 /* X86_TUNE_INTER_UNIT_CONVERSIONS */ | 1556 /* X86_TUNE_INTER_UNIT_CONVERSIONS */ |
1462 ~(m_AMDFAM10), | 1557 ~(m_AMDFAM10 | m_BDVER1), |
1463 | 1558 |
1464 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more | 1559 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more |
1465 than 4 branch instructions in the 16 byte window. */ | 1560 than 4 branch instructions in the 16 byte window. */ |
1466 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | 1561 m_ATOM | m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 |
1467 | m_GENERIC, | 1562 | m_GENERIC, |
1493 and SImode multiply, but 386 and 486 do HImode multiply faster. */ | 1588 and SImode multiply, but 386 and 486 do HImode multiply faster. */ |
1494 ~(m_386 | m_486), | 1589 ~(m_386 | m_486), |
1495 | 1590 |
1496 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is | 1591 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is |
1497 vector path on AMD machines. */ | 1592 vector path on AMD machines. */ |
1498 m_K8 | m_GENERIC64 | m_AMDFAM10, | 1593 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1, |
1499 | 1594 |
1500 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD | 1595 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD |
1501 machines. */ | 1596 machines. */ |
1502 m_K8 | m_GENERIC64 | m_AMDFAM10, | 1597 m_K8 | m_GENERIC64 | m_AMDFAM10 | m_BDVER1, |
1503 | 1598 |
1504 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR | 1599 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR |
1505 than a MOV. */ | 1600 than a MOV. */ |
1506 m_PENT, | 1601 m_PENT, |
1507 | 1602 |
1523 m_AMDFAM10, | 1618 m_AMDFAM10, |
1524 | 1619 |
1525 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction | 1620 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction |
1526 with a subsequent conditional jump instruction into a single | 1621 with a subsequent conditional jump instruction into a single |
1527 compare-and-branch uop. */ | 1622 compare-and-branch uop. */ |
1528 m_CORE2, | 1623 m_CORE2 | m_BDVER1, |
1529 | 1624 |
1530 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag | 1625 /* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag |
1531 will impact LEA instruction selection. */ | 1626 will impact LEA instruction selection. */ |
1532 m_ATOM, | 1627 m_ATOM, |
1533 }; | 1628 }; |
1876 static bool ext_80387_constants_init = 0; | 1971 static bool ext_80387_constants_init = 0; |
1877 | 1972 |
1878 | 1973 |
1879 static struct machine_function * ix86_init_machine_status (void); | 1974 static struct machine_function * ix86_init_machine_status (void); |
1880 static rtx ix86_function_value (const_tree, const_tree, bool); | 1975 static rtx ix86_function_value (const_tree, const_tree, bool); |
1976 static bool ix86_function_value_regno_p (const unsigned int); | |
1881 static rtx ix86_static_chain (const_tree, bool); | 1977 static rtx ix86_static_chain (const_tree, bool); |
1882 static int ix86_function_regparm (const_tree, const_tree); | 1978 static int ix86_function_regparm (const_tree, const_tree); |
1883 static void ix86_compute_frame_layout (struct ix86_frame *); | 1979 static void ix86_compute_frame_layout (struct ix86_frame *); |
1884 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, | 1980 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, |
1885 rtx, rtx, int); | 1981 rtx, rtx, int); |
1908 static unsigned int ix86_minimum_incoming_stack_boundary (bool); | 2004 static unsigned int ix86_minimum_incoming_stack_boundary (bool); |
1909 | 2005 |
1910 static enum calling_abi ix86_function_abi (const_tree); | 2006 static enum calling_abi ix86_function_abi (const_tree); |
1911 | 2007 |
1912 | 2008 |
2009 #ifndef SUBTARGET32_DEFAULT_CPU | |
2010 #define SUBTARGET32_DEFAULT_CPU "i386" | |
2011 #endif | |
2012 | |
1913 /* The svr4 ABI for the i386 says that records and unions are returned | 2013 /* The svr4 ABI for the i386 says that records and unions are returned |
1914 in memory. */ | 2014 in memory. */ |
1915 #ifndef DEFAULT_PCC_STRUCT_RETURN | 2015 #ifndef DEFAULT_PCC_STRUCT_RETURN |
1916 #define DEFAULT_PCC_STRUCT_RETURN 1 | 2016 #define DEFAULT_PCC_STRUCT_RETURN 1 |
1917 #endif | 2017 #endif |
2058 {&nocona_cost, 0, 0, 0, 0, 0}, | 2158 {&nocona_cost, 0, 0, 0, 0, 0}, |
2059 {&core2_cost, 16, 10, 16, 10, 16}, | 2159 {&core2_cost, 16, 10, 16, 10, 16}, |
2060 {&generic32_cost, 16, 7, 16, 7, 16}, | 2160 {&generic32_cost, 16, 7, 16, 7, 16}, |
2061 {&generic64_cost, 16, 10, 16, 10, 16}, | 2161 {&generic64_cost, 16, 10, 16, 10, 16}, |
2062 {&amdfam10_cost, 32, 24, 32, 7, 32}, | 2162 {&amdfam10_cost, 32, 24, 32, 7, 32}, |
2163 {&bdver1_cost, 32, 24, 32, 7, 32}, | |
2063 {&atom_cost, 16, 7, 16, 7, 16} | 2164 {&atom_cost, 16, 7, 16, 7, 16} |
2064 }; | 2165 }; |
2065 | 2166 |
2066 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = | 2167 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = |
2067 { | 2168 { |
2084 "k6-2", | 2185 "k6-2", |
2085 "k6-3", | 2186 "k6-3", |
2086 "athlon", | 2187 "athlon", |
2087 "athlon-4", | 2188 "athlon-4", |
2088 "k8", | 2189 "k8", |
2089 "amdfam10" | 2190 "amdfam10", |
2191 "bdver1" | |
2090 }; | 2192 }; |
2091 | 2193 |
2092 /* Implement TARGET_HANDLE_OPTION. */ | 2194 /* Implement TARGET_HANDLE_OPTION. */ |
2093 | 2195 |
2094 static bool | 2196 static bool |
2398 default: | 2500 default: |
2399 return true; | 2501 return true; |
2400 } | 2502 } |
2401 } | 2503 } |
2402 | 2504 |
2403 /* Return a string the documents the current -m options. The caller is | 2505 /* Return a string that documents the current -m options. The caller is |
2404 responsible for freeing the string. */ | 2506 responsible for freeing the string. */ |
2405 | 2507 |
2406 static char * | 2508 static char * |
2407 ix86_target_string (int isa, int flags, const char *arch, const char *tune, | 2509 ix86_target_string (int isa, int flags, const char *arch, const char *tune, |
2408 const char *fpmath, bool add_nl_p) | 2510 const char *fpmath, bool add_nl_p) |
2417 preceding options while match those first. */ | 2519 preceding options while match those first. */ |
2418 static struct ix86_target_opts isa_opts[] = | 2520 static struct ix86_target_opts isa_opts[] = |
2419 { | 2521 { |
2420 { "-m64", OPTION_MASK_ISA_64BIT }, | 2522 { "-m64", OPTION_MASK_ISA_64BIT }, |
2421 { "-mfma4", OPTION_MASK_ISA_FMA4 }, | 2523 { "-mfma4", OPTION_MASK_ISA_FMA4 }, |
2524 { "-mfma", OPTION_MASK_ISA_FMA }, | |
2422 { "-mxop", OPTION_MASK_ISA_XOP }, | 2525 { "-mxop", OPTION_MASK_ISA_XOP }, |
2423 { "-mlwp", OPTION_MASK_ISA_LWP }, | 2526 { "-mlwp", OPTION_MASK_ISA_LWP }, |
2424 { "-msse4a", OPTION_MASK_ISA_SSE4A }, | 2527 { "-msse4a", OPTION_MASK_ISA_SSE4A }, |
2425 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, | 2528 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, |
2426 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, | 2529 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, |
2503 } | 2606 } |
2504 | 2607 |
2505 if (isa && add_nl_p) | 2608 if (isa && add_nl_p) |
2506 { | 2609 { |
2507 opts[num++][0] = isa_other; | 2610 opts[num++][0] = isa_other; |
2508 sprintf (isa_other, "(other isa: 0x%x)", isa); | 2611 sprintf (isa_other, "(other isa: %#x)", isa); |
2509 } | 2612 } |
2510 | 2613 |
2511 /* Add flag options. */ | 2614 /* Add flag options. */ |
2512 for (i = 0; i < ARRAY_SIZE (flag_opts); i++) | 2615 for (i = 0; i < ARRAY_SIZE (flag_opts); i++) |
2513 { | 2616 { |
2519 } | 2622 } |
2520 | 2623 |
2521 if (flags && add_nl_p) | 2624 if (flags && add_nl_p) |
2522 { | 2625 { |
2523 opts[num++][0] = target_other; | 2626 opts[num++][0] = target_other; |
2524 sprintf (target_other, "(other flags: 0x%x)", isa); | 2627 sprintf (target_other, "(other flags: %#x)", flags); |
2525 } | 2628 } |
2526 | 2629 |
2527 /* Add -fpmath= option. */ | 2630 /* Add -fpmath= option. */ |
2528 if (fpmath) | 2631 if (fpmath) |
2529 { | 2632 { |
2619 void | 2722 void |
2620 override_options (bool main_args_p) | 2723 override_options (bool main_args_p) |
2621 { | 2724 { |
2622 int i; | 2725 int i; |
2623 unsigned int ix86_arch_mask, ix86_tune_mask; | 2726 unsigned int ix86_arch_mask, ix86_tune_mask; |
2727 const bool ix86_tune_specified = (ix86_tune_string != NULL); | |
2624 const char *prefix; | 2728 const char *prefix; |
2625 const char *suffix; | 2729 const char *suffix; |
2626 const char *sw; | 2730 const char *sw; |
2627 | 2731 |
2628 /* Comes from final.c -- no real reason to change it. */ | 2732 /* Comes from final.c -- no real reason to change it. */ |
2740 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | 2844 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
2741 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, | 2845 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, |
2742 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, | 2846 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, |
2743 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | 2847 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE |
2744 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, | 2848 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, |
2849 {"bdver1", PROCESSOR_BDVER1, CPU_BDVER1, | |
2850 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE | |
2851 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | |
2852 | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | |
2853 | PTA_PCLMUL | PTA_AVX | PTA_FMA4 | PTA_XOP | PTA_LWP}, | |
2745 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO, | 2854 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO, |
2746 0 /* flags are only used for -march switch. */ }, | 2855 0 /* flags are only used for -march switch. */ }, |
2747 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64, | 2856 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64, |
2748 PTA_64BIT /* flags are only used for -march switch. */ }, | 2857 PTA_64BIT /* flags are only used for -march switch. */ }, |
2749 }; | 2858 }; |
2819 else if (!main_args_p | 2928 else if (!main_args_p |
2820 && (!strcmp (ix86_tune_string, "generic32") | 2929 && (!strcmp (ix86_tune_string, "generic32") |
2821 || !strcmp (ix86_tune_string, "generic64"))) | 2930 || !strcmp (ix86_tune_string, "generic64"))) |
2822 ; | 2931 ; |
2823 else if (!strncmp (ix86_tune_string, "generic", 7)) | 2932 else if (!strncmp (ix86_tune_string, "generic", 7)) |
2824 error ("bad value (%s) for %stune=%s %s", | 2933 error ("bad value (%s) for %stune=%s %s", |
2825 ix86_tune_string, prefix, suffix, sw); | 2934 ix86_tune_string, prefix, suffix, sw); |
2935 else if (!strcmp (ix86_tune_string, "x86-64")) | |
2936 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " | |
2937 "%stune=k8%s or %stune=generic%s instead as appropriate.", | |
2938 prefix, suffix, prefix, suffix, prefix, suffix); | |
2826 } | 2939 } |
2827 else | 2940 else |
2828 { | 2941 { |
2829 if (ix86_arch_string) | 2942 if (ix86_arch_string) |
2830 ix86_tune_string = ix86_arch_string; | 2943 ix86_tune_string = ix86_arch_string; |
2844 ix86_tune_string = "generic64"; | 2957 ix86_tune_string = "generic64"; |
2845 else | 2958 else |
2846 ix86_tune_string = "generic32"; | 2959 ix86_tune_string = "generic32"; |
2847 } | 2960 } |
2848 } | 2961 } |
2962 | |
2849 if (ix86_stringop_string) | 2963 if (ix86_stringop_string) |
2850 { | 2964 { |
2851 if (!strcmp (ix86_stringop_string, "rep_byte")) | 2965 if (!strcmp (ix86_stringop_string, "rep_byte")) |
2852 stringop_alg = rep_prefix_1_byte; | 2966 stringop_alg = rep_prefix_1_byte; |
2853 else if (!strcmp (ix86_stringop_string, "libcall")) | 2967 else if (!strcmp (ix86_stringop_string, "libcall")) |
2866 stringop_alg = unrolled_loop; | 2980 stringop_alg = unrolled_loop; |
2867 else | 2981 else |
2868 error ("bad value (%s) for %sstringop-strategy=%s %s", | 2982 error ("bad value (%s) for %sstringop-strategy=%s %s", |
2869 ix86_stringop_string, prefix, suffix, sw); | 2983 ix86_stringop_string, prefix, suffix, sw); |
2870 } | 2984 } |
2871 if (!strcmp (ix86_tune_string, "x86-64")) | |
2872 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " | |
2873 "%stune=k8%s or %stune=generic%s instead as appropriate.", | |
2874 prefix, suffix, prefix, suffix, prefix, suffix); | |
2875 | 2985 |
2876 if (!ix86_arch_string) | 2986 if (!ix86_arch_string) |
2877 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; | 2987 ix86_arch_string = TARGET_64BIT ? "x86-64" : SUBTARGET32_DEFAULT_CPU; |
2878 else | 2988 else |
2879 ix86_arch_specified = 1; | 2989 ix86_arch_specified = 1; |
2880 | |
2881 if (!strcmp (ix86_arch_string, "generic")) | |
2882 error ("generic CPU can be used only for %stune=%s %s", | |
2883 prefix, suffix, sw); | |
2884 if (!strncmp (ix86_arch_string, "generic", 7)) | |
2885 error ("bad value (%s) for %sarch=%s %s", | |
2886 ix86_arch_string, prefix, suffix, sw); | |
2887 | 2990 |
2888 /* Validate -mabi= value. */ | 2991 /* Validate -mabi= value. */ |
2889 if (ix86_abi_string) | 2992 if (ix86_abi_string) |
2890 { | 2993 { |
2891 if (strcmp (ix86_abi_string, "sysv") == 0) | 2994 if (strcmp (ix86_abi_string, "sysv") == 0) |
3030 x86_prefetch_sse = true; | 3133 x86_prefetch_sse = true; |
3031 | 3134 |
3032 break; | 3135 break; |
3033 } | 3136 } |
3034 | 3137 |
3035 if (i == pta_size) | 3138 if (!strcmp (ix86_arch_string, "generic")) |
3139 error ("generic CPU can be used only for %stune=%s %s", | |
3140 prefix, suffix, sw); | |
3141 else if (!strncmp (ix86_arch_string, "generic", 7) || i == pta_size) | |
3036 error ("bad value (%s) for %sarch=%s %s", | 3142 error ("bad value (%s) for %sarch=%s %s", |
3037 ix86_arch_string, prefix, suffix, sw); | 3143 ix86_arch_string, prefix, suffix, sw); |
3038 | 3144 |
3039 ix86_arch_mask = 1u << ix86_arch; | 3145 ix86_arch_mask = 1u << ix86_arch; |
3040 for (i = 0; i < X86_ARCH_LAST; ++i) | 3146 for (i = 0; i < X86_ARCH_LAST; ++i) |
3069 if (TARGET_CMOVE | 3175 if (TARGET_CMOVE |
3070 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) | 3176 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) |
3071 x86_prefetch_sse = true; | 3177 x86_prefetch_sse = true; |
3072 break; | 3178 break; |
3073 } | 3179 } |
3074 if (i == pta_size) | 3180 |
3181 if (ix86_tune_specified && i == pta_size) | |
3075 error ("bad value (%s) for %stune=%s %s", | 3182 error ("bad value (%s) for %stune=%s %s", |
3076 ix86_tune_string, prefix, suffix, sw); | 3183 ix86_tune_string, prefix, suffix, sw); |
3077 | 3184 |
3078 ix86_tune_mask = 1u << ix86_tune; | 3185 ix86_tune_mask = 1u << ix86_tune; |
3079 for (i = 0; i < X86_TUNE_LAST; ++i) | 3186 for (i = 0; i < X86_TUNE_LAST; ++i) |
3189 { | 3296 { |
3190 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) | 3297 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) |
3191 ix86_tls_dialect = TLS_DIALECT_GNU; | 3298 ix86_tls_dialect = TLS_DIALECT_GNU; |
3192 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) | 3299 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) |
3193 ix86_tls_dialect = TLS_DIALECT_GNU2; | 3300 ix86_tls_dialect = TLS_DIALECT_GNU2; |
3194 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) | |
3195 ix86_tls_dialect = TLS_DIALECT_SUN; | |
3196 else | 3301 else |
3197 error ("bad value (%s) for %stls-dialect=%s %s", | 3302 error ("bad value (%s) for %stls-dialect=%s %s", |
3198 ix86_tls_dialect_string, prefix, suffix, sw); | 3303 ix86_tls_dialect_string, prefix, suffix, sw); |
3199 } | 3304 } |
3200 | 3305 |
4282 #ifdef INSN_SCHEDULING | 4387 #ifdef INSN_SCHEDULING |
4283 if (level > 1) | 4388 if (level > 1) |
4284 flag_schedule_insns = 0; | 4389 flag_schedule_insns = 0; |
4285 #endif | 4390 #endif |
4286 | 4391 |
4392 /* For -O2 and beyond, turn on -fzee for x86_64 target. */ | |
4393 if (level > 1 && TARGET_64BIT) | |
4394 flag_zee = 1; | |
4395 | |
4287 if (TARGET_MACHO) | 4396 if (TARGET_MACHO) |
4288 /* The Darwin libraries never set errno, so we might as well | 4397 /* The Darwin libraries never set errno, so we might as well |
4289 avoid calling them when that's the only reason we would. */ | 4398 avoid calling them when that's the only reason we would. */ |
4290 flag_errno_math = 0; | 4399 flag_errno_math = 0; |
4291 | 4400 |
4389 | 4498 |
4390 /* Otherwise okay. That also includes certain types of indirect calls. */ | 4499 /* Otherwise okay. That also includes certain types of indirect calls. */ |
4391 return true; | 4500 return true; |
4392 } | 4501 } |
4393 | 4502 |
4394 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" | 4503 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall", |
4395 calling convention attributes; | 4504 and "sseregparm" calling convention attributes; |
4396 arguments as in struct attribute_spec.handler. */ | 4505 arguments as in struct attribute_spec.handler. */ |
4397 | 4506 |
4398 static tree | 4507 static tree |
4399 ix86_handle_cconv_attribute (tree *node, tree name, | 4508 ix86_handle_cconv_attribute (tree *node, tree name, |
4400 tree args, | 4509 tree args, |
4420 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) | 4529 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
4421 { | 4530 { |
4422 error ("fastcall and regparm attributes are not compatible"); | 4531 error ("fastcall and regparm attributes are not compatible"); |
4423 } | 4532 } |
4424 | 4533 |
4534 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) | |
4535 { | |
4536 error ("regparam and thiscall attributes are not compatible"); | |
4537 } | |
4538 | |
4425 cst = TREE_VALUE (args); | 4539 cst = TREE_VALUE (args); |
4426 if (TREE_CODE (cst) != INTEGER_CST) | 4540 if (TREE_CODE (cst) != INTEGER_CST) |
4427 { | 4541 { |
4428 warning (OPT_Wattributes, | 4542 warning (OPT_Wattributes, |
4429 "%qE attribute requires an integer constant argument", | 4543 "%qE attribute requires an integer constant argument", |
4441 } | 4555 } |
4442 | 4556 |
4443 if (TARGET_64BIT) | 4557 if (TARGET_64BIT) |
4444 { | 4558 { |
4445 /* Do not warn when emulating the MS ABI. */ | 4559 /* Do not warn when emulating the MS ABI. */ |
4446 if (TREE_CODE (*node) != FUNCTION_TYPE | 4560 if ((TREE_CODE (*node) != FUNCTION_TYPE |
4561 && TREE_CODE (*node) != METHOD_TYPE) | |
4447 || ix86_function_type_abi (*node) != MS_ABI) | 4562 || ix86_function_type_abi (*node) != MS_ABI) |
4448 warning (OPT_Wattributes, "%qE attribute ignored", | 4563 warning (OPT_Wattributes, "%qE attribute ignored", |
4449 name); | 4564 name); |
4450 *no_add_attrs = true; | 4565 *no_add_attrs = true; |
4451 return NULL_TREE; | 4566 return NULL_TREE; |
4464 } | 4579 } |
4465 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) | 4580 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) |
4466 { | 4581 { |
4467 error ("fastcall and regparm attributes are not compatible"); | 4582 error ("fastcall and regparm attributes are not compatible"); |
4468 } | 4583 } |
4584 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) | |
4585 { | |
4586 error ("fastcall and thiscall attributes are not compatible"); | |
4587 } | |
4469 } | 4588 } |
4470 | 4589 |
4471 /* Can combine stdcall with fastcall (redundant), regparm and | 4590 /* Can combine stdcall with fastcall (redundant), regparm and |
4472 sseregparm. */ | 4591 sseregparm. */ |
4473 else if (is_attribute_p ("stdcall", name)) | 4592 else if (is_attribute_p ("stdcall", name)) |
4478 } | 4597 } |
4479 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) | 4598 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
4480 { | 4599 { |
4481 error ("stdcall and fastcall attributes are not compatible"); | 4600 error ("stdcall and fastcall attributes are not compatible"); |
4482 } | 4601 } |
4602 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) | |
4603 { | |
4604 error ("stdcall and thiscall attributes are not compatible"); | |
4605 } | |
4483 } | 4606 } |
4484 | 4607 |
4485 /* Can combine cdecl with regparm and sseregparm. */ | 4608 /* Can combine cdecl with regparm and sseregparm. */ |
4486 else if (is_attribute_p ("cdecl", name)) | 4609 else if (is_attribute_p ("cdecl", name)) |
4487 { | 4610 { |
4490 error ("stdcall and cdecl attributes are not compatible"); | 4613 error ("stdcall and cdecl attributes are not compatible"); |
4491 } | 4614 } |
4492 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) | 4615 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) |
4493 { | 4616 { |
4494 error ("fastcall and cdecl attributes are not compatible"); | 4617 error ("fastcall and cdecl attributes are not compatible"); |
4618 } | |
4619 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (*node))) | |
4620 { | |
4621 error ("cdecl and thiscall attributes are not compatible"); | |
4622 } | |
4623 } | |
4624 else if (is_attribute_p ("thiscall", name)) | |
4625 { | |
4626 if (TREE_CODE (*node) != METHOD_TYPE && pedantic) | |
4627 warning (OPT_Wattributes, "%qE attribute is used for none class-method", | |
4628 name); | |
4629 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) | |
4630 { | |
4631 error ("stdcall and thiscall attributes are not compatible"); | |
4632 } | |
4633 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) | |
4634 { | |
4635 error ("fastcall and thiscall attributes are not compatible"); | |
4636 } | |
4637 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) | |
4638 { | |
4639 error ("cdecl and thiscall attributes are not compatible"); | |
4495 } | 4640 } |
4496 } | 4641 } |
4497 | 4642 |
4498 /* Can combine sseregparm with all attributes. */ | 4643 /* Can combine sseregparm with all attributes. */ |
4499 | 4644 |
4524 /* Check for mismatched sseregparm types. */ | 4669 /* Check for mismatched sseregparm types. */ |
4525 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) | 4670 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) |
4526 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) | 4671 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) |
4527 return 0; | 4672 return 0; |
4528 | 4673 |
4674 /* Check for mismatched thiscall types. */ | |
4675 if (!lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type1)) | |
4676 != !lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type2))) | |
4677 return 0; | |
4678 | |
4529 /* Check for mismatched return types (cdecl vs stdcall). */ | 4679 /* Check for mismatched return types (cdecl vs stdcall). */ |
4530 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) | 4680 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) |
4531 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) | 4681 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) |
4532 return 0; | 4682 return 0; |
4533 | 4683 |
4556 return regparm; | 4706 return regparm; |
4557 } | 4707 } |
4558 | 4708 |
4559 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) | 4709 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) |
4560 return 2; | 4710 return 2; |
4711 | |
4712 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type))) | |
4713 return 1; | |
4561 | 4714 |
4562 /* Use register calling convention for local functions when possible. */ | 4715 /* Use register calling convention for local functions when possible. */ |
4563 if (decl | 4716 if (decl |
4564 && TREE_CODE (decl) == FUNCTION_DECL | 4717 && TREE_CODE (decl) == FUNCTION_DECL |
4565 && optimize | 4718 && optimize |
4694 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) | 4847 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) |
4695 { | 4848 { |
4696 /* Stdcall and fastcall functions will pop the stack if not | 4849 /* Stdcall and fastcall functions will pop the stack if not |
4697 variable args. */ | 4850 variable args. */ |
4698 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) | 4851 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) |
4699 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) | 4852 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)) |
4853 || lookup_attribute ("thiscall", TYPE_ATTRIBUTES (funtype))) | |
4700 rtd = 1; | 4854 rtd = 1; |
4701 | 4855 |
4702 if (rtd && ! stdarg_p (funtype)) | 4856 if (rtd && ! stdarg_p (funtype)) |
4703 return size; | 4857 return size; |
4704 } | 4858 } |
4957 | 5111 |
4958 /* Use ecx and edx registers if function has fastcall attribute, | 5112 /* Use ecx and edx registers if function has fastcall attribute, |
4959 else look for regparm information. */ | 5113 else look for regparm information. */ |
4960 if (fntype) | 5114 if (fntype) |
4961 { | 5115 { |
4962 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) | 5116 if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype))) |
5117 { | |
5118 cum->nregs = 1; | |
5119 cum->fastcall = 1; /* Same first register as in fastcall. */ | |
5120 } | |
5121 else if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) | |
4963 { | 5122 { |
4964 cum->nregs = 2; | 5123 cum->nregs = 2; |
4965 cum->fastcall = 1; | 5124 cum->fastcall = 1; |
4966 } | 5125 } |
4967 else | 5126 else |
6275 return align; | 6434 return align; |
6276 } | 6435 } |
6277 | 6436 |
6278 /* Return true if N is a possible register number of function value. */ | 6437 /* Return true if N is a possible register number of function value. */ |
6279 | 6438 |
6280 bool | 6439 static bool |
6281 ix86_function_value_regno_p (int regno) | 6440 ix86_function_value_regno_p (const unsigned int regno) |
6282 { | 6441 { |
6283 switch (regno) | 6442 switch (regno) |
6284 { | 6443 { |
6285 case 0: | 6444 case 0: |
6286 return true; | 6445 return true; |
6734 static void | 6893 static void |
6735 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) | 6894 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) |
6736 { | 6895 { |
6737 rtx save_area, mem; | 6896 rtx save_area, mem; |
6738 rtx label; | 6897 rtx label; |
6739 rtx label_ref; | |
6740 rtx tmp_reg; | 6898 rtx tmp_reg; |
6741 rtx nsse_reg; | 6899 rtx nsse_reg; |
6742 alias_set_type set; | 6900 alias_set_type set; |
6743 int i; | 6901 int i; |
6744 int regparm = ix86_regparm; | 6902 int regparm = ix86_regparm; |
6785 of SSE parameter registers used to call this function. We use | 6943 of SSE parameter registers used to call this function. We use |
6786 sse_prologue_save insn template that produces computed jump across | 6944 sse_prologue_save insn template that produces computed jump across |
6787 SSE saves. We need some preparation work to get this working. */ | 6945 SSE saves. We need some preparation work to get this working. */ |
6788 | 6946 |
6789 label = gen_label_rtx (); | 6947 label = gen_label_rtx (); |
6790 label_ref = gen_rtx_LABEL_REF (Pmode, label); | 6948 |
6791 | |
6792 /* Compute address to jump to : | |
6793 label - eax*4 + nnamed_sse_arguments*4 Or | |
6794 label - eax*5 + nnamed_sse_arguments*5 for AVX. */ | |
6795 tmp_reg = gen_reg_rtx (Pmode); | |
6796 nsse_reg = gen_reg_rtx (Pmode); | 6949 nsse_reg = gen_reg_rtx (Pmode); |
6797 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); | 6950 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); |
6798 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, | |
6799 gen_rtx_MULT (Pmode, nsse_reg, | |
6800 GEN_INT (4)))); | |
6801 | |
6802 /* vmovaps is one byte longer than movaps. */ | |
6803 if (TARGET_AVX) | |
6804 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, | |
6805 gen_rtx_PLUS (Pmode, tmp_reg, | |
6806 nsse_reg))); | |
6807 | |
6808 if (cum->sse_regno) | |
6809 emit_move_insn | |
6810 (nsse_reg, | |
6811 gen_rtx_CONST (DImode, | |
6812 gen_rtx_PLUS (DImode, | |
6813 label_ref, | |
6814 GEN_INT (cum->sse_regno | |
6815 * (TARGET_AVX ? 5 : 4))))); | |
6816 else | |
6817 emit_move_insn (nsse_reg, label_ref); | |
6818 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); | |
6819 | 6951 |
6820 /* Compute address of memory block we save into. We always use pointer | 6952 /* Compute address of memory block we save into. We always use pointer |
6821 pointing 127 bytes after first byte to store - this is needed to keep | 6953 pointing 127 bytes after first byte to store - this is needed to keep |
6822 instruction size limited by 4 bytes (5 bytes for AVX) with one | 6954 instruction size limited by 4 bytes (5 bytes for AVX) with one |
6823 byte displacement. */ | 6955 byte displacement. */ |
6826 plus_constant (save_area, | 6958 plus_constant (save_area, |
6827 ix86_varargs_gpr_size + 127))); | 6959 ix86_varargs_gpr_size + 127))); |
6828 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); | 6960 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); |
6829 MEM_NOTRAP_P (mem) = 1; | 6961 MEM_NOTRAP_P (mem) = 1; |
6830 set_mem_alias_set (mem, set); | 6962 set_mem_alias_set (mem, set); |
6831 set_mem_align (mem, BITS_PER_WORD); | 6963 set_mem_align (mem, 64); |
6832 | 6964 |
6833 /* And finally do the dirty job! */ | 6965 /* And finally do the dirty job! */ |
6834 emit_insn (gen_sse_prologue_save (mem, nsse_reg, | 6966 emit_insn (gen_sse_prologue_save (mem, nsse_reg, |
6835 GEN_INT (cum->sse_regno), label)); | 6967 GEN_INT (cum->sse_regno), label, |
6968 gen_reg_rtx (Pmode))); | |
6836 } | 6969 } |
6837 } | 6970 } |
6838 | 6971 |
6839 static void | 6972 static void |
6840 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) | 6973 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) |
6991 tree addr, t2; | 7124 tree addr, t2; |
6992 rtx container; | 7125 rtx container; |
6993 int indirect_p = 0; | 7126 int indirect_p = 0; |
6994 tree ptrtype; | 7127 tree ptrtype; |
6995 enum machine_mode nat_mode; | 7128 enum machine_mode nat_mode; |
6996 int arg_boundary; | 7129 unsigned int arg_boundary; |
6997 | 7130 |
6998 /* Only 64bit target needs something special. */ | 7131 /* Only 64bit target needs something special. */ |
6999 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) | 7132 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) |
7000 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); | 7133 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); |
7001 | 7134 |
7223 size_int (align - 1)); | 7356 size_int (align - 1)); |
7224 t = fold_convert (sizetype, t); | 7357 t = fold_convert (sizetype, t); |
7225 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, | 7358 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, |
7226 size_int (-align)); | 7359 size_int (-align)); |
7227 t = fold_convert (TREE_TYPE (ovf), t); | 7360 t = fold_convert (TREE_TYPE (ovf), t); |
7361 if (crtl->stack_alignment_needed < arg_boundary) | |
7362 crtl->stack_alignment_needed = arg_boundary; | |
7228 } | 7363 } |
7229 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); | 7364 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); |
7230 gimplify_assign (addr, t, pre_p); | 7365 gimplify_assign (addr, t, pre_p); |
7231 | 7366 |
7232 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, | 7367 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, |
7432 switch (get_attr_mode (insn)) | 7567 switch (get_attr_mode (insn)) |
7433 { | 7568 { |
7434 case MODE_V4SF: | 7569 case MODE_V4SF: |
7435 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; | 7570 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; |
7436 case MODE_V2DF: | 7571 case MODE_V2DF: |
7437 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; | 7572 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) |
7573 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; | |
7574 else | |
7575 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; | |
7438 case MODE_TI: | 7576 case MODE_TI: |
7439 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; | 7577 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) |
7578 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; | |
7579 else | |
7580 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; | |
7440 case MODE_V8SF: | 7581 case MODE_V8SF: |
7441 return "vxorps\t%x0, %x0, %x0"; | 7582 return "vxorps\t%x0, %x0, %x0"; |
7442 case MODE_V4DF: | 7583 case MODE_V4DF: |
7443 return "vxorpd\t%x0, %x0, %x0"; | 7584 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) |
7585 return "vxorps\t%x0, %x0, %x0"; | |
7586 else | |
7587 return "vxorpd\t%x0, %x0, %x0"; | |
7444 case MODE_OI: | 7588 case MODE_OI: |
7445 return "vpxor\t%x0, %x0, %x0"; | 7589 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) |
7590 return "vxorps\t%x0, %x0, %x0"; | |
7591 else | |
7592 return "vpxor\t%x0, %x0, %x0"; | |
7446 default: | 7593 default: |
7447 break; | 7594 break; |
7448 } | 7595 } |
7449 case 2: | 7596 case 2: |
7450 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0"; | 7597 return TARGET_AVX ? "vpcmpeqd\t%0, %0, %0" : "pcmpeqd\t%0, %0"; |
7574 | 7721 |
7575 | 7722 |
7576 /* This function generates code for -fpic that loads %ebx with | 7723 /* This function generates code for -fpic that loads %ebx with |
7577 the return address of the caller and then returns. */ | 7724 the return address of the caller and then returns. */ |
7578 | 7725 |
7579 void | 7726 static void |
7580 ix86_file_end (void) | 7727 ix86_code_end (void) |
7581 { | 7728 { |
7582 rtx xops[2]; | 7729 rtx xops[2]; |
7583 int regno; | 7730 int regno; |
7584 | 7731 |
7585 for (regno = 0; regno < 8; ++regno) | 7732 for (regno = 0; regno < 8; ++regno) |
7586 { | 7733 { |
7587 char name[32]; | 7734 char name[32]; |
7735 tree decl; | |
7588 | 7736 |
7589 if (! ((pic_labels_used >> regno) & 1)) | 7737 if (! ((pic_labels_used >> regno) & 1)) |
7590 continue; | 7738 continue; |
7591 | 7739 |
7592 get_pc_thunk_name (name, regno); | 7740 get_pc_thunk_name (name, regno); |
7741 | |
7742 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, | |
7743 get_identifier (name), | |
7744 build_function_type (void_type_node, void_list_node)); | |
7745 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL, | |
7746 NULL_TREE, void_type_node); | |
7747 TREE_PUBLIC (decl) = 1; | |
7748 TREE_STATIC (decl) = 1; | |
7593 | 7749 |
7594 #if TARGET_MACHO | 7750 #if TARGET_MACHO |
7595 if (TARGET_MACHO) | 7751 if (TARGET_MACHO) |
7596 { | 7752 { |
7597 switch_to_section (darwin_sections[text_coal_section]); | 7753 switch_to_section (darwin_sections[text_coal_section]); |
7599 assemble_name (asm_out_file, name); | 7755 assemble_name (asm_out_file, name); |
7600 fputs ("\n\t.private_extern\t", asm_out_file); | 7756 fputs ("\n\t.private_extern\t", asm_out_file); |
7601 assemble_name (asm_out_file, name); | 7757 assemble_name (asm_out_file, name); |
7602 fputs ("\n", asm_out_file); | 7758 fputs ("\n", asm_out_file); |
7603 ASM_OUTPUT_LABEL (asm_out_file, name); | 7759 ASM_OUTPUT_LABEL (asm_out_file, name); |
7760 DECL_WEAK (decl) = 1; | |
7604 } | 7761 } |
7605 else | 7762 else |
7606 #endif | 7763 #endif |
7607 if (USE_HIDDEN_LINKONCE) | 7764 if (USE_HIDDEN_LINKONCE) |
7608 { | 7765 { |
7609 tree decl; | |
7610 | |
7611 decl = build_decl (BUILTINS_LOCATION, | |
7612 FUNCTION_DECL, get_identifier (name), | |
7613 error_mark_node); | |
7614 TREE_PUBLIC (decl) = 1; | |
7615 TREE_STATIC (decl) = 1; | |
7616 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl); | 7766 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl); |
7617 | 7767 |
7618 (*targetm.asm_out.unique_section) (decl, 0); | 7768 (*targetm.asm_out.unique_section) (decl, 0); |
7619 switch_to_section (get_named_section (decl, NULL, 0)); | 7769 switch_to_section (get_named_section (decl, NULL, 0)); |
7620 | 7770 |
7628 { | 7778 { |
7629 switch_to_section (text_section); | 7779 switch_to_section (text_section); |
7630 ASM_OUTPUT_LABEL (asm_out_file, name); | 7780 ASM_OUTPUT_LABEL (asm_out_file, name); |
7631 } | 7781 } |
7632 | 7782 |
7783 DECL_INITIAL (decl) = make_node (BLOCK); | |
7784 current_function_decl = decl; | |
7785 init_function_start (decl); | |
7786 first_function_block_is_cold = false; | |
7787 /* Make sure unwind info is emitted for the thunk if needed. */ | |
7788 final_start_function (emit_barrier (), asm_out_file, 1); | |
7789 | |
7633 xops[0] = gen_rtx_REG (Pmode, regno); | 7790 xops[0] = gen_rtx_REG (Pmode, regno); |
7634 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); | 7791 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); |
7635 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); | 7792 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); |
7636 output_asm_insn ("ret", xops); | 7793 output_asm_insn ("ret", xops); |
7637 } | 7794 final_end_function (); |
7638 | 7795 init_insn_lengths (); |
7639 if (NEED_INDICATE_EXEC_STACK) | 7796 free_after_compilation (cfun); |
7640 file_end_indicate_exec_stack (); | 7797 set_cfun (NULL); |
7798 current_function_decl = NULL; | |
7799 } | |
7641 } | 7800 } |
7642 | 7801 |
7643 /* Emit code for the SET_GOT patterns. */ | 7802 /* Emit code for the SET_GOT patterns. */ |
7644 | 7803 |
7645 const char * | 7804 const char * |
7672 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); | 7831 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); |
7673 | 7832 |
7674 if (!flag_pic) | 7833 if (!flag_pic) |
7675 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); | 7834 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); |
7676 else | 7835 else |
7677 output_asm_insn ("call\t%a2", xops); | 7836 { |
7837 output_asm_insn ("call\t%a2", xops); | |
7838 #ifdef DWARF2_UNWIND_INFO | |
7839 /* The call to next label acts as a push. */ | |
7840 if (dwarf2out_do_frame ()) | |
7841 { | |
7842 rtx insn; | |
7843 start_sequence (); | |
7844 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, | |
7845 gen_rtx_PLUS (Pmode, | |
7846 stack_pointer_rtx, | |
7847 GEN_INT (-4)))); | |
7848 RTX_FRAME_RELATED_P (insn) = 1; | |
7849 dwarf2out_frame_debug (insn, true); | |
7850 end_sequence (); | |
7851 } | |
7852 #endif | |
7853 } | |
7678 | 7854 |
7679 #if TARGET_MACHO | 7855 #if TARGET_MACHO |
7680 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This | 7856 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This |
7681 is what will be referenced by the Mach-O PIC subsystem. */ | 7857 is what will be referenced by the Mach-O PIC subsystem. */ |
7682 if (!label) | 7858 if (!label) |
7685 | 7861 |
7686 (*targetm.asm_out.internal_label) (asm_out_file, "L", | 7862 (*targetm.asm_out.internal_label) (asm_out_file, "L", |
7687 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); | 7863 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); |
7688 | 7864 |
7689 if (flag_pic) | 7865 if (flag_pic) |
7690 output_asm_insn ("pop%z0\t%0", xops); | 7866 { |
7867 output_asm_insn ("pop%z0\t%0", xops); | |
7868 #ifdef DWARF2_UNWIND_INFO | |
7869 /* The pop is a pop and clobbers dest, but doesn't restore it | |
7870 for unwind info purposes. */ | |
7871 if (dwarf2out_do_frame ()) | |
7872 { | |
7873 rtx insn; | |
7874 start_sequence (); | |
7875 insn = emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx)); | |
7876 dwarf2out_frame_debug (insn, true); | |
7877 insn = emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, | |
7878 gen_rtx_PLUS (Pmode, | |
7879 stack_pointer_rtx, | |
7880 GEN_INT (4)))); | |
7881 RTX_FRAME_RELATED_P (insn) = 1; | |
7882 dwarf2out_frame_debug (insn, true); | |
7883 end_sequence (); | |
7884 } | |
7885 #endif | |
7886 } | |
7691 } | 7887 } |
7692 else | 7888 else |
7693 { | 7889 { |
7694 char name[32]; | 7890 char name[32]; |
7695 get_pc_thunk_name (name, REGNO (dest)); | 7891 get_pc_thunk_name (name, REGNO (dest)); |
7696 pic_labels_used |= 1 << REGNO (dest); | 7892 pic_labels_used |= 1 << REGNO (dest); |
7697 | 7893 |
7894 #ifdef DWARF2_UNWIND_INFO | |
7895 /* Ensure all queued register saves are flushed before the | |
7896 call. */ | |
7897 if (dwarf2out_do_frame ()) | |
7898 { | |
7899 rtx insn; | |
7900 start_sequence (); | |
7901 insn = emit_barrier (); | |
7902 end_sequence (); | |
7903 dwarf2out_frame_debug (insn, false); | |
7904 } | |
7905 #endif | |
7698 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); | 7906 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); |
7699 xops[2] = gen_rtx_MEM (QImode, xops[2]); | 7907 xops[2] = gen_rtx_MEM (QImode, xops[2]); |
7700 output_asm_insn ("call\t%X2", xops); | 7908 output_asm_insn ("call\t%X2", xops); |
7701 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This | 7909 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This |
7702 is what will be referenced by the Mach-O PIC subsystem. */ | 7910 is what will be referenced by the Mach-O PIC subsystem. */ |
7917 expect the decision to change within single iteration. */ | 8125 expect the decision to change within single iteration. */ |
7918 if (!optimize_function_for_size_p (cfun) | 8126 if (!optimize_function_for_size_p (cfun) |
7919 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) | 8127 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) |
7920 { | 8128 { |
7921 int count = frame->nregs; | 8129 int count = frame->nregs; |
8130 struct cgraph_node *node = cgraph_node (current_function_decl); | |
7922 | 8131 |
7923 cfun->machine->use_fast_prologue_epilogue_nregs = count; | 8132 cfun->machine->use_fast_prologue_epilogue_nregs = count; |
7924 /* The fast prologue uses move instead of push to save registers. This | 8133 /* The fast prologue uses move instead of push to save registers. This |
7925 is significantly longer, but also executes faster as modern hardware | 8134 is significantly longer, but also executes faster as modern hardware |
7926 can execute the moves in parallel, but can't do that for push/pop. | 8135 can execute the moves in parallel, but can't do that for push/pop. |
7931 feedback only). Weight the size of function by number of registers | 8140 feedback only). Weight the size of function by number of registers |
7932 to save as it is cheap to use one or two push instructions but very | 8141 to save as it is cheap to use one or two push instructions but very |
7933 slow to use many of them. */ | 8142 slow to use many of them. */ |
7934 if (count) | 8143 if (count) |
7935 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; | 8144 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; |
7936 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL | 8145 if (node->frequency < NODE_FREQUENCY_NORMAL |
7937 || (flag_branch_probabilities | 8146 || (flag_branch_probabilities |
7938 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) | 8147 && node->frequency < NODE_FREQUENCY_HOT)) |
7939 cfun->machine->use_fast_prologue_epilogue = false; | 8148 cfun->machine->use_fast_prologue_epilogue = false; |
7940 else | 8149 else |
7941 cfun->machine->use_fast_prologue_epilogue | 8150 cfun->machine->use_fast_prologue_epilogue |
7942 = !expensive_function_p (count); | 8151 = !expensive_function_p (count); |
7943 } | 8152 } |
8235 | 8444 |
8236 /* Reuse static chain register if it isn't used for parameter | 8445 /* Reuse static chain register if it isn't used for parameter |
8237 passing. */ | 8446 passing. */ |
8238 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2 | 8447 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2 |
8239 && !lookup_attribute ("fastcall", | 8448 && !lookup_attribute ("fastcall", |
8449 TYPE_ATTRIBUTES (TREE_TYPE (decl))) | |
8450 && !lookup_attribute ("thiscall", | |
8240 TYPE_ATTRIBUTES (TREE_TYPE (decl)))) | 8451 TYPE_ATTRIBUTES (TREE_TYPE (decl)))) |
8241 return CX_REG; | 8452 return CX_REG; |
8242 else | 8453 else |
8243 return DI_REG; | 8454 return DI_REG; |
8244 } | 8455 } |
8329 drap_vreg = copy_to_reg (arg_ptr); | 8540 drap_vreg = copy_to_reg (arg_ptr); |
8330 seq = get_insns (); | 8541 seq = get_insns (); |
8331 end_sequence (); | 8542 end_sequence (); |
8332 | 8543 |
8333 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); | 8544 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); |
8334 RTX_FRAME_RELATED_P (insn) = 1; | 8545 if (!optimize) |
8546 { | |
8547 add_reg_note (insn, REG_CFA_SET_VDRAP, drap_vreg); | |
8548 RTX_FRAME_RELATED_P (insn) = 1; | |
8549 } | |
8335 return drap_vreg; | 8550 return drap_vreg; |
8336 } | 8551 } |
8337 else | 8552 else |
8338 return NULL; | 8553 return NULL; |
8339 } | 8554 } |
8557 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, | 8772 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, |
8558 GEN_INT (-allocate), -1, | 8773 GEN_INT (-allocate), -1, |
8559 ix86_cfa_state->reg == stack_pointer_rtx); | 8774 ix86_cfa_state->reg == stack_pointer_rtx); |
8560 else | 8775 else |
8561 { | 8776 { |
8562 /* Only valid for Win32. */ | |
8563 rtx eax = gen_rtx_REG (Pmode, AX_REG); | 8777 rtx eax = gen_rtx_REG (Pmode, AX_REG); |
8564 bool eax_live; | 8778 bool eax_live; |
8565 rtx t; | 8779 rtx t; |
8566 | |
8567 gcc_assert (!TARGET_64BIT || cfun->machine->call_abi == MS_ABI); | |
8568 | 8780 |
8569 if (cfun->machine->call_abi == MS_ABI) | 8781 if (cfun->machine->call_abi == MS_ABI) |
8570 eax_live = false; | 8782 eax_live = false; |
8571 else | 8783 else |
8572 eax_live = ix86_eax_live_at_start_p (); | 8784 eax_live = ix86_eax_live_at_start_p (); |
9261 { | 9473 { |
9262 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; | 9474 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; |
9263 rtx base_reg, index_reg; | 9475 rtx base_reg, index_reg; |
9264 HOST_WIDE_INT scale = 1; | 9476 HOST_WIDE_INT scale = 1; |
9265 rtx scale_rtx = NULL_RTX; | 9477 rtx scale_rtx = NULL_RTX; |
9478 rtx tmp; | |
9266 int retval = 1; | 9479 int retval = 1; |
9267 enum ix86_address_seg seg = SEG_DEFAULT; | 9480 enum ix86_address_seg seg = SEG_DEFAULT; |
9268 | 9481 |
9269 if (REG_P (addr) || GET_CODE (addr) == SUBREG) | 9482 if (REG_P (addr) || GET_CODE (addr) == SUBREG) |
9270 base = addr; | 9483 base = addr; |
9296 return 0; | 9509 return 0; |
9297 index = XEXP (op, 0); | 9510 index = XEXP (op, 0); |
9298 scale_rtx = XEXP (op, 1); | 9511 scale_rtx = XEXP (op, 1); |
9299 break; | 9512 break; |
9300 | 9513 |
9514 case ASHIFT: | |
9515 if (index) | |
9516 return 0; | |
9517 index = XEXP (op, 0); | |
9518 tmp = XEXP (op, 1); | |
9519 if (!CONST_INT_P (tmp)) | |
9520 return 0; | |
9521 scale = INTVAL (tmp); | |
9522 if ((unsigned HOST_WIDE_INT) scale > 3) | |
9523 return 0; | |
9524 scale = 1 << scale; | |
9525 break; | |
9526 | |
9301 case UNSPEC: | 9527 case UNSPEC: |
9302 if (XINT (op, 1) == UNSPEC_TP | 9528 if (XINT (op, 1) == UNSPEC_TP |
9303 && TARGET_TLS_DIRECT_SEG_REFS | 9529 && TARGET_TLS_DIRECT_SEG_REFS |
9304 && seg == SEG_DEFAULT) | 9530 && seg == SEG_DEFAULT) |
9305 seg = TARGET_64BIT ? SEG_FS : SEG_GS; | 9531 seg = TARGET_64BIT ? SEG_FS : SEG_GS; |
9336 index = XEXP (addr, 0); /* index*scale */ | 9562 index = XEXP (addr, 0); /* index*scale */ |
9337 scale_rtx = XEXP (addr, 1); | 9563 scale_rtx = XEXP (addr, 1); |
9338 } | 9564 } |
9339 else if (GET_CODE (addr) == ASHIFT) | 9565 else if (GET_CODE (addr) == ASHIFT) |
9340 { | 9566 { |
9341 rtx tmp; | |
9342 | |
9343 /* We're called for lea too, which implements ashift on occasion. */ | 9567 /* We're called for lea too, which implements ashift on occasion. */ |
9344 index = XEXP (addr, 0); | 9568 index = XEXP (addr, 0); |
9345 tmp = XEXP (addr, 1); | 9569 tmp = XEXP (addr, 1); |
9346 if (!CONST_INT_P (tmp)) | 9570 if (!CONST_INT_P (tmp)) |
9347 return 0; | 9571 return 0; |
10792 fputs (ASSEMBLER_DIALECT == ASM_ATT ? | 11016 fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
10793 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); | 11017 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); |
10794 break; | 11018 break; |
10795 case UNSPEC_GOTTPOFF: | 11019 case UNSPEC_GOTTPOFF: |
10796 /* FIXME: This might be @TPOFF in Sun ld too. */ | 11020 /* FIXME: This might be @TPOFF in Sun ld too. */ |
10797 fputs ("@GOTTPOFF", file); | 11021 fputs ("@gottpoff", file); |
10798 break; | 11022 break; |
10799 case UNSPEC_TPOFF: | 11023 case UNSPEC_TPOFF: |
10800 fputs ("@TPOFF", file); | 11024 fputs ("@tpoff", file); |
10801 break; | 11025 break; |
10802 case UNSPEC_NTPOFF: | 11026 case UNSPEC_NTPOFF: |
10803 if (TARGET_64BIT) | 11027 if (TARGET_64BIT) |
10804 fputs ("@TPOFF", file); | 11028 fputs ("@tpoff", file); |
10805 else | 11029 else |
10806 fputs ("@NTPOFF", file); | 11030 fputs ("@ntpoff", file); |
10807 break; | 11031 break; |
10808 case UNSPEC_DTPOFF: | 11032 case UNSPEC_DTPOFF: |
10809 fputs ("@DTPOFF", file); | 11033 fputs ("@dtpoff", file); |
10810 break; | 11034 break; |
10811 case UNSPEC_GOTNTPOFF: | 11035 case UNSPEC_GOTNTPOFF: |
10812 if (TARGET_64BIT) | 11036 if (TARGET_64BIT) |
10813 fputs (ASSEMBLER_DIALECT == ASM_ATT ? | 11037 fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
10814 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file); | 11038 "@gottpoff(%rip)": "@gottpoff[rip]", file); |
10815 else | 11039 else |
10816 fputs ("@GOTNTPOFF", file); | 11040 fputs ("@gotntpoff", file); |
10817 break; | 11041 break; |
10818 case UNSPEC_INDNTPOFF: | 11042 case UNSPEC_INDNTPOFF: |
10819 fputs ("@INDNTPOFF", file); | 11043 fputs ("@indntpoff", file); |
10820 break; | 11044 break; |
10821 #if TARGET_MACHO | 11045 #if TARGET_MACHO |
10822 case UNSPEC_MACHOPIC_OFFSET: | 11046 case UNSPEC_MACHOPIC_OFFSET: |
10823 putc ('-', file); | 11047 putc ('-', file); |
10824 machopic_output_function_base_name (file); | 11048 machopic_output_function_base_name (file); |
10841 static void ATTRIBUTE_UNUSED | 11065 static void ATTRIBUTE_UNUSED |
10842 i386_output_dwarf_dtprel (FILE *file, int size, rtx x) | 11066 i386_output_dwarf_dtprel (FILE *file, int size, rtx x) |
10843 { | 11067 { |
10844 fputs (ASM_LONG, file); | 11068 fputs (ASM_LONG, file); |
10845 output_addr_const (file, x); | 11069 output_addr_const (file, x); |
10846 fputs ("@DTPOFF", file); | 11070 fputs ("@dtpoff", file); |
10847 switch (size) | 11071 switch (size) |
10848 { | 11072 { |
10849 case 4: | 11073 case 4: |
10850 break; | 11074 break; |
10851 case 8: | 11075 case 8: |
10882 | 11106 |
10883 static rtx | 11107 static rtx |
10884 ix86_delegitimize_address (rtx x) | 11108 ix86_delegitimize_address (rtx x) |
10885 { | 11109 { |
10886 rtx orig_x = delegitimize_mem_from_attrs (x); | 11110 rtx orig_x = delegitimize_mem_from_attrs (x); |
11111 /* addend is NULL or some rtx if x is something+GOTOFF where | |
11112 something doesn't include the PIC register. */ | |
11113 rtx addend = NULL_RTX; | |
10887 /* reg_addend is NULL or a multiple of some register. */ | 11114 /* reg_addend is NULL or a multiple of some register. */ |
10888 rtx reg_addend = NULL_RTX; | 11115 rtx reg_addend = NULL_RTX; |
10889 /* const_addend is NULL or a const_int. */ | 11116 /* const_addend is NULL or a const_int. */ |
10890 rtx const_addend = NULL_RTX; | 11117 rtx const_addend = NULL_RTX; |
10891 /* This is the result, or NULL. */ | 11118 /* This is the result, or NULL. */ |
10901 if (GET_CODE (x) != CONST | 11128 if (GET_CODE (x) != CONST |
10902 || GET_CODE (XEXP (x, 0)) != UNSPEC | 11129 || GET_CODE (XEXP (x, 0)) != UNSPEC |
10903 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL | 11130 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL |
10904 || !MEM_P (orig_x)) | 11131 || !MEM_P (orig_x)) |
10905 return orig_x; | 11132 return orig_x; |
10906 return XVECEXP (XEXP (x, 0), 0, 0); | 11133 x = XVECEXP (XEXP (x, 0), 0, 0); |
11134 if (GET_MODE (orig_x) != Pmode) | |
11135 return simplify_gen_subreg (GET_MODE (orig_x), x, Pmode, 0); | |
11136 return x; | |
10907 } | 11137 } |
10908 | 11138 |
10909 if (GET_CODE (x) != PLUS | 11139 if (GET_CODE (x) != PLUS |
10910 || GET_CODE (XEXP (x, 1)) != CONST) | 11140 || GET_CODE (XEXP (x, 1)) != CONST) |
10911 return orig_x; | 11141 return orig_x; |
10920 if (ix86_pic_register_p (XEXP (reg_addend, 0))) | 11150 if (ix86_pic_register_p (XEXP (reg_addend, 0))) |
10921 reg_addend = XEXP (reg_addend, 1); | 11151 reg_addend = XEXP (reg_addend, 1); |
10922 else if (ix86_pic_register_p (XEXP (reg_addend, 1))) | 11152 else if (ix86_pic_register_p (XEXP (reg_addend, 1))) |
10923 reg_addend = XEXP (reg_addend, 0); | 11153 reg_addend = XEXP (reg_addend, 0); |
10924 else | 11154 else |
10925 return orig_x; | 11155 { |
10926 if (!REG_P (reg_addend) | 11156 reg_addend = NULL_RTX; |
10927 && GET_CODE (reg_addend) != MULT | 11157 addend = XEXP (x, 0); |
10928 && GET_CODE (reg_addend) != ASHIFT) | 11158 } |
10929 return orig_x; | |
10930 } | 11159 } |
10931 else | 11160 else |
10932 return orig_x; | 11161 addend = XEXP (x, 0); |
10933 | 11162 |
10934 x = XEXP (XEXP (x, 1), 0); | 11163 x = XEXP (XEXP (x, 1), 0); |
10935 if (GET_CODE (x) == PLUS | 11164 if (GET_CODE (x) == PLUS |
10936 && CONST_INT_P (XEXP (x, 1))) | 11165 && CONST_INT_P (XEXP (x, 1))) |
10937 { | 11166 { |
10938 const_addend = XEXP (x, 1); | 11167 const_addend = XEXP (x, 1); |
10939 x = XEXP (x, 0); | 11168 x = XEXP (x, 0); |
10940 } | 11169 } |
10941 | 11170 |
10942 if (GET_CODE (x) == UNSPEC | 11171 if (GET_CODE (x) == UNSPEC |
10943 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x)) | 11172 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x) && !addend) |
10944 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)))) | 11173 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)))) |
10945 result = XVECEXP (x, 0, 0); | 11174 result = XVECEXP (x, 0, 0); |
10946 | 11175 |
10947 if (TARGET_MACHO && darwin_local_data_pic (x) | 11176 if (TARGET_MACHO && darwin_local_data_pic (x) |
10948 && !MEM_P (orig_x)) | 11177 && !MEM_P (orig_x)) |
10953 | 11182 |
10954 if (const_addend) | 11183 if (const_addend) |
10955 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); | 11184 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); |
10956 if (reg_addend) | 11185 if (reg_addend) |
10957 result = gen_rtx_PLUS (Pmode, reg_addend, result); | 11186 result = gen_rtx_PLUS (Pmode, reg_addend, result); |
11187 if (addend) | |
11188 { | |
11189 /* If the rest of original X doesn't involve the PIC register, add | |
11190 addend and subtract pic_offset_table_rtx. This can happen e.g. | |
11191 for code like: | |
11192 leal (%ebx, %ecx, 4), %ecx | |
11193 ... | |
11194 movl foo@GOTOFF(%ecx), %edx | |
11195 in which case we return (%ecx - %ebx) + foo. */ | |
11196 if (pic_offset_table_rtx) | |
11197 result = gen_rtx_PLUS (Pmode, gen_rtx_MINUS (Pmode, copy_rtx (addend), | |
11198 pic_offset_table_rtx), | |
11199 result); | |
11200 else | |
11201 return orig_x; | |
11202 } | |
11203 if (GET_MODE (orig_x) != Pmode && MEM_P (orig_x)) | |
11204 return simplify_gen_subreg (GET_MODE (orig_x), result, Pmode, 0); | |
10958 return result; | 11205 return result; |
10959 } | 11206 } |
10960 | 11207 |
10961 /* If X is a machine specific address (i.e. a symbol or label being | 11208 /* If X is a machine specific address (i.e. a symbol or label being |
10962 referenced as a displacement from the GOT implemented using an | 11209 referenced as a displacement from the GOT implemented using an |
11293 | 11540 |
11294 if (cfun->machine->some_ld_name) | 11541 if (cfun->machine->some_ld_name) |
11295 return cfun->machine->some_ld_name; | 11542 return cfun->machine->some_ld_name; |
11296 | 11543 |
11297 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) | 11544 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) |
11298 if (INSN_P (insn) | 11545 if (NONDEBUG_INSN_P (insn) |
11299 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) | 11546 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) |
11300 return cfun->machine->some_ld_name; | 11547 return cfun->machine->some_ld_name; |
11301 | 11548 |
11302 return NULL; | 11549 return NULL; |
11303 } | 11550 } |
11304 | 11551 |
11305 /* Meaning of CODE: | 11552 /* Meaning of CODE: |
11306 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. | 11553 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. |
11307 C -- print opcode suffix for set/cmov insn. | 11554 C -- print opcode suffix for set/cmov insn. |
11308 c -- like C, but print reversed condition | 11555 c -- like C, but print reversed condition |
11309 E,e -- likewise, but for compare-and-branch fused insn. | |
11310 F,f -- likewise, but for floating-point. | 11556 F,f -- likewise, but for floating-point. |
11311 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", | 11557 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", |
11312 otherwise nothing | 11558 otherwise nothing |
11313 R -- print the prefix for register names. | 11559 R -- print the prefix for register names. |
11314 z -- print the opcode suffix for the size of the current operand. | 11560 z -- print the opcode suffix for the size of the current operand. |
11709 putc ('.', file); | 11955 putc ('.', file); |
11710 #endif | 11956 #endif |
11711 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); | 11957 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); |
11712 return; | 11958 return; |
11713 | 11959 |
11714 case 'E': | |
11715 put_condition_code (GET_CODE (x), CCmode, 0, 0, file); | |
11716 return; | |
11717 | |
11718 case 'e': | |
11719 put_condition_code (GET_CODE (x), CCmode, 1, 0, file); | |
11720 return; | |
11721 | |
11722 case 'H': | 11960 case 'H': |
11723 /* It doesn't actually matter what mode we use here, as we're | 11961 /* It doesn't actually matter what mode we use here, as we're |
11724 only going to use this for printing. */ | 11962 only going to use this for printing. */ |
11725 x = adjust_address_nv (x, DImode, 8); | 11963 x = adjust_address_nv (x, DImode, 8); |
11726 break; | 11964 break; |
11815 return; | 12053 return; |
11816 } | 12054 } |
11817 return; | 12055 return; |
11818 | 12056 |
11819 case ';': | 12057 case ';': |
11820 #if TARGET_MACHO | 12058 #if TARGET_MACHO || !HAVE_AS_IX86_REP_LOCK_PREFIX |
11821 fputs (" ; ", file); | 12059 fputs (";", file); |
11822 #else | |
11823 putc (' ', file); | |
11824 #endif | 12060 #endif |
11825 return; | 12061 return; |
11826 | 12062 |
11827 default: | 12063 default: |
11828 output_operand_lossage ("invalid operand code '%c'", code); | 12064 output_operand_lossage ("invalid operand code '%c'", code); |
12100 switch (XINT (x, 1)) | 12336 switch (XINT (x, 1)) |
12101 { | 12337 { |
12102 case UNSPEC_GOTTPOFF: | 12338 case UNSPEC_GOTTPOFF: |
12103 output_addr_const (file, op); | 12339 output_addr_const (file, op); |
12104 /* FIXME: This might be @TPOFF in Sun ld. */ | 12340 /* FIXME: This might be @TPOFF in Sun ld. */ |
12105 fputs ("@GOTTPOFF", file); | 12341 fputs ("@gottpoff", file); |
12106 break; | 12342 break; |
12107 case UNSPEC_TPOFF: | 12343 case UNSPEC_TPOFF: |
12108 output_addr_const (file, op); | 12344 output_addr_const (file, op); |
12109 fputs ("@TPOFF", file); | 12345 fputs ("@tpoff", file); |
12110 break; | 12346 break; |
12111 case UNSPEC_NTPOFF: | 12347 case UNSPEC_NTPOFF: |
12112 output_addr_const (file, op); | 12348 output_addr_const (file, op); |
12113 if (TARGET_64BIT) | 12349 if (TARGET_64BIT) |
12114 fputs ("@TPOFF", file); | 12350 fputs ("@tpoff", file); |
12115 else | 12351 else |
12116 fputs ("@NTPOFF", file); | 12352 fputs ("@ntpoff", file); |
12117 break; | 12353 break; |
12118 case UNSPEC_DTPOFF: | 12354 case UNSPEC_DTPOFF: |
12119 output_addr_const (file, op); | 12355 output_addr_const (file, op); |
12120 fputs ("@DTPOFF", file); | 12356 fputs ("@dtpoff", file); |
12121 break; | 12357 break; |
12122 case UNSPEC_GOTNTPOFF: | 12358 case UNSPEC_GOTNTPOFF: |
12123 output_addr_const (file, op); | 12359 output_addr_const (file, op); |
12124 if (TARGET_64BIT) | 12360 if (TARGET_64BIT) |
12125 fputs (ASSEMBLER_DIALECT == ASM_ATT ? | 12361 fputs (ASSEMBLER_DIALECT == ASM_ATT ? |
12126 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file); | 12362 "@gottpoff(%rip)" : "@gottpoff[rip]", file); |
12127 else | 12363 else |
12128 fputs ("@GOTNTPOFF", file); | 12364 fputs ("@gotntpoff", file); |
12129 break; | 12365 break; |
12130 case UNSPEC_INDNTPOFF: | 12366 case UNSPEC_INDNTPOFF: |
12131 output_addr_const (file, op); | 12367 output_addr_const (file, op); |
12132 fputs ("@INDNTPOFF", file); | 12368 fputs ("@indntpoff", file); |
12133 break; | 12369 break; |
12134 #if TARGET_MACHO | 12370 #if TARGET_MACHO |
12135 case UNSPEC_MACHOPIC_OFFSET: | 12371 case UNSPEC_MACHOPIC_OFFSET: |
12136 output_addr_const (file, op); | 12372 output_addr_const (file, op); |
12137 putc ('-', file); | 12373 putc ('-', file); |
13105 case MODE_VECTOR_INT: | 13341 case MODE_VECTOR_INT: |
13106 case MODE_INT: | 13342 case MODE_INT: |
13107 switch (GET_MODE_SIZE (mode)) | 13343 switch (GET_MODE_SIZE (mode)) |
13108 { | 13344 { |
13109 case 16: | 13345 case 16: |
13346 /* If we're optimizing for size, movups is the smallest. */ | |
13347 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) | |
13348 { | |
13349 op0 = gen_lowpart (V4SFmode, op0); | |
13350 op1 = gen_lowpart (V4SFmode, op1); | |
13351 emit_insn (gen_avx_movups (op0, op1)); | |
13352 return; | |
13353 } | |
13110 op0 = gen_lowpart (V16QImode, op0); | 13354 op0 = gen_lowpart (V16QImode, op0); |
13111 op1 = gen_lowpart (V16QImode, op1); | 13355 op1 = gen_lowpart (V16QImode, op1); |
13112 emit_insn (gen_avx_movdqu (op0, op1)); | 13356 emit_insn (gen_avx_movdqu (op0, op1)); |
13113 break; | 13357 break; |
13114 case 32: | 13358 case 32: |
13131 break; | 13375 break; |
13132 case V8SFmode: | 13376 case V8SFmode: |
13133 emit_insn (gen_avx_movups256 (op0, op1)); | 13377 emit_insn (gen_avx_movups256 (op0, op1)); |
13134 break; | 13378 break; |
13135 case V2DFmode: | 13379 case V2DFmode: |
13380 if (TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) | |
13381 { | |
13382 op0 = gen_lowpart (V4SFmode, op0); | |
13383 op1 = gen_lowpart (V4SFmode, op1); | |
13384 emit_insn (gen_avx_movups (op0, op1)); | |
13385 return; | |
13386 } | |
13136 emit_insn (gen_avx_movupd (op0, op1)); | 13387 emit_insn (gen_avx_movupd (op0, op1)); |
13137 break; | 13388 break; |
13138 case V4DFmode: | 13389 case V4DFmode: |
13139 emit_insn (gen_avx_movupd256 (op0, op1)); | 13390 emit_insn (gen_avx_movupd256 (op0, op1)); |
13140 break; | 13391 break; |
13151 } | 13402 } |
13152 | 13403 |
13153 if (MEM_P (op1)) | 13404 if (MEM_P (op1)) |
13154 { | 13405 { |
13155 /* If we're optimizing for size, movups is the smallest. */ | 13406 /* If we're optimizing for size, movups is the smallest. */ |
13156 if (optimize_insn_for_size_p ()) | 13407 if (optimize_insn_for_size_p () |
13408 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) | |
13157 { | 13409 { |
13158 op0 = gen_lowpart (V4SFmode, op0); | 13410 op0 = gen_lowpart (V4SFmode, op0); |
13159 op1 = gen_lowpart (V4SFmode, op1); | 13411 op1 = gen_lowpart (V4SFmode, op1); |
13160 emit_insn (gen_sse_movups (op0, op1)); | 13412 emit_insn (gen_sse_movups (op0, op1)); |
13161 return; | 13413 return; |
13174 | 13426 |
13175 if (TARGET_SSE2 && mode == V2DFmode) | 13427 if (TARGET_SSE2 && mode == V2DFmode) |
13176 { | 13428 { |
13177 rtx zero; | 13429 rtx zero; |
13178 | 13430 |
13179 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) | 13431 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) |
13180 { | 13432 { |
13181 op0 = gen_lowpart (V2DFmode, op0); | 13433 op0 = gen_lowpart (V2DFmode, op0); |
13182 op1 = gen_lowpart (V2DFmode, op1); | 13434 op1 = gen_lowpart (V2DFmode, op1); |
13183 emit_insn (gen_sse2_movupd (op0, op1)); | 13435 emit_insn (gen_sse2_movupd (op0, op1)); |
13184 return; | 13436 return; |
13185 } | 13437 } |
13186 | 13438 |
13187 /* When SSE registers are split into halves, we can avoid | 13439 /* When SSE registers are split into halves, we can avoid |
13188 writing to the top half twice. */ | 13440 writing to the top half twice. */ |
13189 if (TARGET_SSE_SPLIT_REGS) | 13441 if (TARGET_SSE_SPLIT_REGS) |
13190 { | 13442 { |
13209 m = adjust_address (op1, DFmode, 8); | 13461 m = adjust_address (op1, DFmode, 8); |
13210 emit_insn (gen_sse2_loadhpd (op0, op0, m)); | 13462 emit_insn (gen_sse2_loadhpd (op0, op0, m)); |
13211 } | 13463 } |
13212 else | 13464 else |
13213 { | 13465 { |
13214 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) | 13466 if (TARGET_SSE_UNALIGNED_LOAD_OPTIMAL) |
13215 { | 13467 { |
13216 op0 = gen_lowpart (V4SFmode, op0); | 13468 op0 = gen_lowpart (V4SFmode, op0); |
13217 op1 = gen_lowpart (V4SFmode, op1); | 13469 op1 = gen_lowpart (V4SFmode, op1); |
13218 emit_insn (gen_sse_movups (op0, op1)); | 13470 emit_insn (gen_sse_movups (op0, op1)); |
13219 return; | 13471 return; |
13220 } | 13472 } |
13221 | 13473 |
13222 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) | 13474 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) |
13223 emit_move_insn (op0, CONST0_RTX (mode)); | 13475 emit_move_insn (op0, CONST0_RTX (mode)); |
13224 else | 13476 else |
13233 } | 13485 } |
13234 } | 13486 } |
13235 else if (MEM_P (op0)) | 13487 else if (MEM_P (op0)) |
13236 { | 13488 { |
13237 /* If we're optimizing for size, movups is the smallest. */ | 13489 /* If we're optimizing for size, movups is the smallest. */ |
13238 if (optimize_insn_for_size_p ()) | 13490 if (optimize_insn_for_size_p () |
13491 || TARGET_SSE_PACKED_SINGLE_INSN_OPTIMAL) | |
13239 { | 13492 { |
13240 op0 = gen_lowpart (V4SFmode, op0); | 13493 op0 = gen_lowpart (V4SFmode, op0); |
13241 op1 = gen_lowpart (V4SFmode, op1); | 13494 op1 = gen_lowpart (V4SFmode, op1); |
13242 emit_insn (gen_sse_movups (op0, op1)); | 13495 emit_insn (gen_sse_movups (op0, op1)); |
13243 return; | 13496 return; |
13254 return; | 13507 return; |
13255 } | 13508 } |
13256 | 13509 |
13257 if (TARGET_SSE2 && mode == V2DFmode) | 13510 if (TARGET_SSE2 && mode == V2DFmode) |
13258 { | 13511 { |
13259 m = adjust_address (op0, DFmode, 0); | 13512 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) |
13260 emit_insn (gen_sse2_storelpd (m, op1)); | 13513 { |
13261 m = adjust_address (op0, DFmode, 8); | 13514 op0 = gen_lowpart (V2DFmode, op0); |
13262 emit_insn (gen_sse2_storehpd (m, op1)); | 13515 op1 = gen_lowpart (V2DFmode, op1); |
13516 emit_insn (gen_sse2_movupd (op0, op1)); | |
13517 } | |
13518 else | |
13519 { | |
13520 m = adjust_address (op0, DFmode, 0); | |
13521 emit_insn (gen_sse2_storelpd (m, op1)); | |
13522 m = adjust_address (op0, DFmode, 8); | |
13523 emit_insn (gen_sse2_storehpd (m, op1)); | |
13524 } | |
13263 } | 13525 } |
13264 else | 13526 else |
13265 { | 13527 { |
13266 if (mode != V4SFmode) | 13528 if (mode != V4SFmode) |
13267 op1 = gen_lowpart (V4SFmode, op1); | 13529 op1 = gen_lowpart (V4SFmode, op1); |
13268 m = adjust_address (op0, V2SFmode, 0); | 13530 |
13269 emit_insn (gen_sse_storelps (m, op1)); | 13531 if (TARGET_SSE_UNALIGNED_STORE_OPTIMAL) |
13270 m = adjust_address (op0, V2SFmode, 8); | 13532 { |
13271 emit_insn (gen_sse_storehps (m, op1)); | 13533 op0 = gen_lowpart (V4SFmode, op0); |
13534 emit_insn (gen_sse_movups (op0, op1)); | |
13535 } | |
13536 else | |
13537 { | |
13538 m = adjust_address (op0, V2SFmode, 0); | |
13539 emit_insn (gen_sse_storelps (m, op1)); | |
13540 m = adjust_address (op0, V2SFmode, 8); | |
13541 emit_insn (gen_sse_storehps (m, op1)); | |
13542 } | |
13272 } | 13543 } |
13273 } | 13544 } |
13274 else | 13545 else |
13275 gcc_unreachable (); | 13546 gcc_unreachable (); |
13276 } | 13547 } |
13384 src1 = force_reg (mode, src1); | 13655 src1 = force_reg (mode, src1); |
13385 | 13656 |
13386 /* Source 1 cannot be a non-matching memory. */ | 13657 /* Source 1 cannot be a non-matching memory. */ |
13387 if (MEM_P (src1) && !rtx_equal_p (dst, src1)) | 13658 if (MEM_P (src1) && !rtx_equal_p (dst, src1)) |
13388 src1 = force_reg (mode, src1); | 13659 src1 = force_reg (mode, src1); |
13389 | |
13390 /* In order for the multiply-add patterns to get matched, we need | |
13391 to aid combine by forcing all operands into registers to start. */ | |
13392 if (optimize && TARGET_FMA4) | |
13393 { | |
13394 if (MEM_P (src2)) | |
13395 src2 = force_reg (GET_MODE (src2), src2); | |
13396 else if (MEM_P (src1)) | |
13397 src1 = force_reg (GET_MODE (src1), src1); | |
13398 } | |
13399 | 13660 |
13400 operands[1] = src1; | 13661 operands[1] = src1; |
13401 operands[2] = src2; | 13662 operands[2] = src2; |
13402 return dst; | 13663 return dst; |
13403 } | 13664 } |
13558 if (insn != BB_HEAD (bb)) | 13819 if (insn != BB_HEAD (bb)) |
13559 { | 13820 { |
13560 rtx prev = PREV_INSN (insn); | 13821 rtx prev = PREV_INSN (insn); |
13561 while (prev && distance < LEA_SEARCH_THRESHOLD) | 13822 while (prev && distance < LEA_SEARCH_THRESHOLD) |
13562 { | 13823 { |
13563 if (INSN_P (prev)) | 13824 if (NONDEBUG_INSN_P (prev)) |
13564 { | 13825 { |
13565 distance++; | 13826 distance++; |
13566 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) | 13827 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) |
13567 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF | 13828 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF |
13568 && !DF_REF_IS_ARTIFICIAL (*def_rec) | 13829 && !DF_REF_IS_ARTIFICIAL (*def_rec) |
13598 rtx prev = BB_END (bb); | 13859 rtx prev = BB_END (bb); |
13599 while (prev | 13860 while (prev |
13600 && prev != insn | 13861 && prev != insn |
13601 && distance < LEA_SEARCH_THRESHOLD) | 13862 && distance < LEA_SEARCH_THRESHOLD) |
13602 { | 13863 { |
13603 if (INSN_P (prev)) | 13864 if (NONDEBUG_INSN_P (prev)) |
13604 { | 13865 { |
13605 distance++; | 13866 distance++; |
13606 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) | 13867 for (def_rec = DF_INSN_DEFS (prev); *def_rec; def_rec++) |
13607 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF | 13868 if (DF_REF_TYPE (*def_rec) == DF_REF_REG_DEF |
13608 && !DF_REF_IS_ARTIFICIAL (*def_rec) | 13869 && !DF_REF_IS_ARTIFICIAL (*def_rec) |
13644 if (insn != BB_END (bb)) | 13905 if (insn != BB_END (bb)) |
13645 { | 13906 { |
13646 rtx next = NEXT_INSN (insn); | 13907 rtx next = NEXT_INSN (insn); |
13647 while (next && distance < LEA_SEARCH_THRESHOLD) | 13908 while (next && distance < LEA_SEARCH_THRESHOLD) |
13648 { | 13909 { |
13649 if (INSN_P (next)) | 13910 if (NONDEBUG_INSN_P (next)) |
13650 { | 13911 { |
13651 distance++; | 13912 distance++; |
13652 | 13913 |
13653 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) | 13914 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) |
13654 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD | 13915 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD |
13693 rtx next = BB_HEAD (bb); | 13954 rtx next = BB_HEAD (bb); |
13694 while (next | 13955 while (next |
13695 && next != insn | 13956 && next != insn |
13696 && distance < LEA_SEARCH_THRESHOLD) | 13957 && distance < LEA_SEARCH_THRESHOLD) |
13697 { | 13958 { |
13698 if (INSN_P (next)) | 13959 if (NONDEBUG_INSN_P (next)) |
13699 { | 13960 { |
13700 distance++; | 13961 distance++; |
13701 | 13962 |
13702 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) | 13963 for (use_rec = DF_INSN_USES (next); *use_rec; use_rec++) |
13703 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD | 13964 if ((DF_REF_TYPE (*use_rec) == DF_REF_REG_MEM_LOAD |
15389 ix86_expand_int_movcc (rtx operands[]) | 15650 ix86_expand_int_movcc (rtx operands[]) |
15390 { | 15651 { |
15391 enum rtx_code code = GET_CODE (operands[1]), compare_code; | 15652 enum rtx_code code = GET_CODE (operands[1]), compare_code; |
15392 rtx compare_seq, compare_op; | 15653 rtx compare_seq, compare_op; |
15393 enum machine_mode mode = GET_MODE (operands[0]); | 15654 enum machine_mode mode = GET_MODE (operands[0]); |
15394 bool sign_bit_compare_p = false;; | 15655 bool sign_bit_compare_p = false; |
15395 | 15656 |
15396 start_sequence (); | 15657 start_sequence (); |
15397 ix86_compare_op0 = XEXP (operands[1], 0); | 15658 ix86_compare_op0 = XEXP (operands[1], 0); |
15398 ix86_compare_op1 = XEXP (operands[1], 1); | 15659 ix86_compare_op1 = XEXP (operands[1], 1); |
15399 compare_op = ix86_expand_compare (code); | 15660 compare_op = ix86_expand_compare (code); |
15430 rtx tmp = out; | 15691 rtx tmp = out; |
15431 | 15692 |
15432 if (!sign_bit_compare_p) | 15693 if (!sign_bit_compare_p) |
15433 { | 15694 { |
15434 rtx flags; | 15695 rtx flags; |
15435 rtx (*insn)(rtx, rtx, rtx); | |
15436 bool fpcmp = false; | 15696 bool fpcmp = false; |
15437 | 15697 |
15438 compare_code = GET_CODE (compare_op); | 15698 compare_code = GET_CODE (compare_op); |
15439 | 15699 |
15440 flags = XEXP (compare_op, 0); | 15700 flags = XEXP (compare_op, 0); |
15471 if (reg_overlap_mentioned_p (out, ix86_compare_op0) | 15731 if (reg_overlap_mentioned_p (out, ix86_compare_op0) |
15472 || reg_overlap_mentioned_p (out, ix86_compare_op1)) | 15732 || reg_overlap_mentioned_p (out, ix86_compare_op1)) |
15473 tmp = gen_reg_rtx (mode); | 15733 tmp = gen_reg_rtx (mode); |
15474 | 15734 |
15475 if (mode == DImode) | 15735 if (mode == DImode) |
15476 insn = gen_x86_movdicc_0_m1; | 15736 emit_insn (gen_x86_movdicc_0_m1 (tmp, flags, compare_op)); |
15477 else | 15737 else |
15478 insn = gen_x86_movsicc_0_m1; | 15738 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), |
15479 | 15739 flags, compare_op)); |
15480 emit_insn (insn (tmp, flags, compare_op)); | |
15481 } | 15740 } |
15482 else | 15741 else |
15483 { | 15742 { |
15484 if (code == GT || code == GE) | 15743 if (code == GT || code == GE) |
15485 code = reverse_condition (code); | 15744 code = reverse_condition (code); |
16254 { | 16513 { |
16255 case V4SImode: | 16514 case V4SImode: |
16256 case V2DImode: | 16515 case V2DImode: |
16257 { | 16516 { |
16258 rtx t1, t2, mask; | 16517 rtx t1, t2, mask; |
16259 | 16518 rtx (*gen_sub3) (rtx, rtx, rtx); |
16260 /* Perform a parallel modulo subtraction. */ | 16519 |
16261 t1 = gen_reg_rtx (mode); | 16520 /* Subtract (-(INT MAX) - 1) from both operands to make |
16262 emit_insn ((mode == V4SImode | 16521 them signed. */ |
16263 ? gen_subv4si3 | |
16264 : gen_subv2di3) (t1, cop0, cop1)); | |
16265 | |
16266 /* Extract the original sign bit of op0. */ | |
16267 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode), | 16522 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode), |
16268 true, false); | 16523 true, false); |
16524 gen_sub3 = (mode == V4SImode | |
16525 ? gen_subv4si3 : gen_subv2di3); | |
16526 t1 = gen_reg_rtx (mode); | |
16527 emit_insn (gen_sub3 (t1, cop0, mask)); | |
16528 | |
16269 t2 = gen_reg_rtx (mode); | 16529 t2 = gen_reg_rtx (mode); |
16270 emit_insn ((mode == V4SImode | 16530 emit_insn (gen_sub3 (t2, cop1, mask)); |
16271 ? gen_andv4si3 | 16531 |
16272 : gen_andv2di3) (t2, cop0, mask)); | 16532 cop0 = t1; |
16273 | 16533 cop1 = t2; |
16274 /* XOR it back into the result of the subtraction. | |
16275 This results in the sign bit set iff we saw | |
16276 unsigned underflow. */ | |
16277 x = gen_reg_rtx (mode); | |
16278 emit_insn ((mode == V4SImode | |
16279 ? gen_xorv4si3 | |
16280 : gen_xorv2di3) (x, t1, t2)); | |
16281 | |
16282 code = GT; | 16534 code = GT; |
16283 } | 16535 } |
16284 break; | 16536 break; |
16285 | 16537 |
16286 case V16QImode: | 16538 case V16QImode: |
16288 /* Perform a parallel unsigned saturating subtraction. */ | 16540 /* Perform a parallel unsigned saturating subtraction. */ |
16289 x = gen_reg_rtx (mode); | 16541 x = gen_reg_rtx (mode); |
16290 emit_insn (gen_rtx_SET (VOIDmode, x, | 16542 emit_insn (gen_rtx_SET (VOIDmode, x, |
16291 gen_rtx_US_MINUS (mode, cop0, cop1))); | 16543 gen_rtx_US_MINUS (mode, cop0, cop1))); |
16292 | 16544 |
16545 cop0 = x; | |
16546 cop1 = CONST0_RTX (mode); | |
16293 code = EQ; | 16547 code = EQ; |
16294 negate = !negate; | 16548 negate = !negate; |
16295 break; | 16549 break; |
16296 | 16550 |
16297 default: | 16551 default: |
16298 gcc_unreachable (); | 16552 gcc_unreachable (); |
16299 } | 16553 } |
16300 | |
16301 cop0 = x; | |
16302 cop1 = CONST0_RTX (mode); | |
16303 } | 16554 } |
16304 } | 16555 } |
16305 | 16556 |
16306 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, | 16557 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, |
16307 operands[1+negate], operands[2-negate]); | 16558 operands[1+negate], operands[2-negate]); |
17021 emit_insn ((mode == DImode | 17272 emit_insn ((mode == DImode |
17022 ? gen_x86_shld | 17273 ? gen_x86_shld |
17023 : gen_x86_64_shld) (high[0], low[0], operands[2])); | 17274 : gen_x86_64_shld) (high[0], low[0], operands[2])); |
17024 } | 17275 } |
17025 | 17276 |
17026 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); | 17277 emit_insn ((mode == DImode |
17278 ? gen_ashlsi3 | |
17279 : gen_ashldi3) (low[0], low[0], operands[2])); | |
17027 | 17280 |
17028 if (TARGET_CMOVE && scratch) | 17281 if (TARGET_CMOVE && scratch) |
17029 { | 17282 { |
17030 ix86_expand_clear (scratch); | 17283 ix86_expand_clear (scratch); |
17031 emit_insn ((mode == DImode | 17284 emit_insn ((mode == DImode |
17032 ? gen_x86_shift_adj_1 | 17285 ? gen_x86_shiftsi_adj_1 |
17033 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2], | 17286 : gen_x86_shiftdi_adj_1) (high[0], low[0], operands[2], |
17034 scratch)); | 17287 scratch)); |
17035 } | 17288 } |
17036 else | 17289 else |
17037 emit_insn ((mode == DImode | 17290 emit_insn ((mode == DImode |
17038 ? gen_x86_shift_adj_2 | 17291 ? gen_x86_shiftsi_adj_2 |
17039 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2])); | 17292 : gen_x86_shiftdi_adj_2) (high[0], low[0], operands[2])); |
17040 } | 17293 } |
17041 | 17294 |
17042 void | 17295 void |
17043 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) | 17296 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) |
17044 { | 17297 { |
17107 emit_insn ((mode == DImode | 17360 emit_insn ((mode == DImode |
17108 ? gen_ashrsi3 | 17361 ? gen_ashrsi3 |
17109 : gen_ashrdi3) (scratch, scratch, | 17362 : gen_ashrdi3) (scratch, scratch, |
17110 GEN_INT (single_width - 1))); | 17363 GEN_INT (single_width - 1))); |
17111 emit_insn ((mode == DImode | 17364 emit_insn ((mode == DImode |
17112 ? gen_x86_shift_adj_1 | 17365 ? gen_x86_shiftsi_adj_1 |
17113 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], | 17366 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2], |
17114 scratch)); | 17367 scratch)); |
17115 } | 17368 } |
17116 else | 17369 else |
17117 emit_insn ((mode == DImode | 17370 emit_insn ((mode == DImode |
17118 ? gen_x86_shift_adj_3 | 17371 ? gen_x86_shiftsi_adj_3 |
17119 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2])); | 17372 : gen_x86_shiftdi_adj_3) (low[0], high[0], operands[2])); |
17120 } | 17373 } |
17121 } | 17374 } |
17122 | 17375 |
17123 void | 17376 void |
17124 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) | 17377 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) |
17172 /* Heh. By reversing the arguments, we can reuse this pattern. */ | 17425 /* Heh. By reversing the arguments, we can reuse this pattern. */ |
17173 if (TARGET_CMOVE && scratch) | 17426 if (TARGET_CMOVE && scratch) |
17174 { | 17427 { |
17175 ix86_expand_clear (scratch); | 17428 ix86_expand_clear (scratch); |
17176 emit_insn ((mode == DImode | 17429 emit_insn ((mode == DImode |
17177 ? gen_x86_shift_adj_1 | 17430 ? gen_x86_shiftsi_adj_1 |
17178 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], | 17431 : gen_x86_shiftdi_adj_1) (low[0], high[0], operands[2], |
17179 scratch)); | 17432 scratch)); |
17180 } | 17433 } |
17181 else | 17434 else |
17182 emit_insn ((mode == DImode | 17435 emit_insn ((mode == DImode |
17183 ? gen_x86_shift_adj_2 | 17436 ? gen_x86_shiftsi_adj_2 |
17184 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2])); | 17437 : gen_x86_shiftdi_adj_2) (low[0], high[0], operands[2])); |
17185 } | 17438 } |
17186 } | 17439 } |
17187 | 17440 |
17188 /* Predict just emitted jump instruction to be taken with probability PROB. */ | 17441 /* Predict just emitted jump instruction to be taken with probability PROB. */ |
17189 static void | 17442 static void |
19604 case PROCESSOR_K8: | 19857 case PROCESSOR_K8: |
19605 case PROCESSOR_AMDFAM10: | 19858 case PROCESSOR_AMDFAM10: |
19606 case PROCESSOR_NOCONA: | 19859 case PROCESSOR_NOCONA: |
19607 case PROCESSOR_GENERIC32: | 19860 case PROCESSOR_GENERIC32: |
19608 case PROCESSOR_GENERIC64: | 19861 case PROCESSOR_GENERIC64: |
19862 case PROCESSOR_BDVER1: | |
19609 return 3; | 19863 return 3; |
19610 | 19864 |
19611 case PROCESSOR_CORE2: | 19865 case PROCESSOR_CORE2: |
19612 return 4; | 19866 return 4; |
19613 | 19867 |
19793 break; | 20047 break; |
19794 | 20048 |
19795 case PROCESSOR_ATHLON: | 20049 case PROCESSOR_ATHLON: |
19796 case PROCESSOR_K8: | 20050 case PROCESSOR_K8: |
19797 case PROCESSOR_AMDFAM10: | 20051 case PROCESSOR_AMDFAM10: |
20052 case PROCESSOR_BDVER1: | |
19798 case PROCESSOR_ATOM: | 20053 case PROCESSOR_ATOM: |
19799 case PROCESSOR_GENERIC32: | 20054 case PROCESSOR_GENERIC32: |
19800 case PROCESSOR_GENERIC64: | 20055 case PROCESSOR_GENERIC64: |
19801 memory = get_attr_memory (insn); | 20056 memory = get_attr_memory (insn); |
19802 | 20057 |
19988 align = GET_MODE_ALIGNMENT (DFmode); | 20243 align = GET_MODE_ALIGNMENT (DFmode); |
19989 return align; | 20244 return align; |
19990 } | 20245 } |
19991 | 20246 |
19992 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned | 20247 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned |
19993 to 16byte boundary. */ | 20248 to 16byte boundary. Exact wording is: |
19994 if (TARGET_64BIT) | 20249 |
20250 An array uses the same alignment as its elements, except that a local or | |
20251 global array variable of length at least 16 bytes or | |
20252 a C99 variable-length array variable always has alignment of at least 16 bytes. | |
20253 | |
20254 This was added to allow use of aligned SSE instructions at arrays. This | |
20255 rule is meant for static storage (where compiler can not do the analysis | |
20256 by itself). We follow it for automatic variables only when convenient. | |
20257 We fully control everything in the function compiled and functions from | |
20258 other unit can not rely on the alignment. | |
20259 | |
20260 Exclude va_list type. It is the common case of local array where | |
20261 we can not benefit from the alignment. */ | |
20262 if (TARGET_64BIT && optimize_function_for_speed_p (cfun) | |
20263 && TARGET_SSE) | |
19995 { | 20264 { |
19996 if (AGGREGATE_TYPE_P (type) | 20265 if (AGGREGATE_TYPE_P (type) |
20266 && (TYPE_MAIN_VARIANT (type) | |
20267 != TYPE_MAIN_VARIANT (va_list_type_node)) | |
19997 && TYPE_SIZE (type) | 20268 && TYPE_SIZE (type) |
19998 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST | 20269 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST |
19999 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 | 20270 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 |
20000 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) | 20271 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) |
20001 return 128; | 20272 return 128; |
20096 | 20367 |
20097 fntype = TREE_TYPE (fndecl); | 20368 fntype = TREE_TYPE (fndecl); |
20098 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) | 20369 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) |
20099 { | 20370 { |
20100 /* Fastcall functions use ecx/edx for arguments, which leaves | 20371 /* Fastcall functions use ecx/edx for arguments, which leaves |
20372 us with EAX for the static chain. */ | |
20373 regno = AX_REG; | |
20374 } | |
20375 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (fntype))) | |
20376 { | |
20377 /* Thiscall functions use ecx for arguments, which leaves | |
20101 us with EAX for the static chain. */ | 20378 us with EAX for the static chain. */ |
20102 regno = AX_REG; | 20379 regno = AX_REG; |
20103 } | 20380 } |
20104 else if (ix86_function_regparm (fntype, fndecl) == 3) | 20381 else if (ix86_function_regparm (fntype, fndecl) == 3) |
20105 { | 20382 { |
20976 IX86_BUILTIN_VPERMILVARPS256, | 21253 IX86_BUILTIN_VPERMILVARPS256, |
20977 IX86_BUILTIN_VPERMILPD, | 21254 IX86_BUILTIN_VPERMILPD, |
20978 IX86_BUILTIN_VPERMILPS, | 21255 IX86_BUILTIN_VPERMILPS, |
20979 IX86_BUILTIN_VPERMILPD256, | 21256 IX86_BUILTIN_VPERMILPD256, |
20980 IX86_BUILTIN_VPERMILPS256, | 21257 IX86_BUILTIN_VPERMILPS256, |
21258 IX86_BUILTIN_VPERMIL2PD, | |
21259 IX86_BUILTIN_VPERMIL2PS, | |
21260 IX86_BUILTIN_VPERMIL2PD256, | |
21261 IX86_BUILTIN_VPERMIL2PS256, | |
20981 IX86_BUILTIN_VPERM2F128PD256, | 21262 IX86_BUILTIN_VPERM2F128PD256, |
20982 IX86_BUILTIN_VPERM2F128PS256, | 21263 IX86_BUILTIN_VPERM2F128PS256, |
20983 IX86_BUILTIN_VPERM2F128SI256, | 21264 IX86_BUILTIN_VPERM2F128SI256, |
20984 IX86_BUILTIN_VBROADCASTSS, | 21265 IX86_BUILTIN_VBROADCASTSS, |
20985 IX86_BUILTIN_VBROADCASTSD256, | 21266 IX86_BUILTIN_VBROADCASTSD256, |
22165 | 22446 |
22166 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, | 22447 { OPTION_MASK_ISA_ABM, CODE_FOR_clzhi2_abm, "__builtin_clzs", IX86_BUILTIN_CLZS, UNKNOWN, (int) UINT16_FTYPE_UINT16 }, |
22167 }; | 22448 }; |
22168 | 22449 |
22169 /* FMA4 and XOP. */ | 22450 /* FMA4 and XOP. */ |
22451 #define MULTI_ARG_4_DF2_DI_I V2DF_FTYPE_V2DF_V2DF_V2DI_INT | |
22452 #define MULTI_ARG_4_DF2_DI_I1 V4DF_FTYPE_V4DF_V4DF_V4DI_INT | |
22453 #define MULTI_ARG_4_SF2_SI_I V4SF_FTYPE_V4SF_V4SF_V4SI_INT | |
22454 #define MULTI_ARG_4_SF2_SI_I1 V8SF_FTYPE_V8SF_V8SF_V8SI_INT | |
22170 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF | 22455 #define MULTI_ARG_3_SF V4SF_FTYPE_V4SF_V4SF_V4SF |
22171 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF | 22456 #define MULTI_ARG_3_DF V2DF_FTYPE_V2DF_V2DF_V2DF |
22172 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF | 22457 #define MULTI_ARG_3_SF2 V8SF_FTYPE_V8SF_V8SF_V8SF |
22173 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF | 22458 #define MULTI_ARG_3_DF2 V4DF_FTYPE_V4DF_V4DF_V4DF |
22174 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI | 22459 #define MULTI_ARG_3_DI V2DI_FTYPE_V2DI_V2DI_V2DI |
22407 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, | 22692 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv16qi3, "__builtin_ia32_vpcomtrueub", IX86_BUILTIN_VPCOMTRUEUB, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, |
22408 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, | 22693 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv8hi3, "__builtin_ia32_vpcomtrueuw", IX86_BUILTIN_VPCOMTRUEUW, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, |
22409 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, | 22694 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv4si3, "__builtin_ia32_vpcomtrueud", IX86_BUILTIN_VPCOMTRUEUD, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, |
22410 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, | 22695 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_pcom_tfv2di3, "__builtin_ia32_vpcomtrueuq", IX86_BUILTIN_VPCOMTRUEUQ, (enum rtx_code) PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, |
22411 | 22696 |
22697 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v2df3, "__builtin_ia32_vpermil2pd", IX86_BUILTIN_VPERMIL2PD, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I }, | |
22698 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4sf3, "__builtin_ia32_vpermil2ps", IX86_BUILTIN_VPERMIL2PS, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I }, | |
22699 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v4df3, "__builtin_ia32_vpermil2pd256", IX86_BUILTIN_VPERMIL2PD256, UNKNOWN, (int)MULTI_ARG_4_DF2_DI_I1 }, | |
22700 { OPTION_MASK_ISA_XOP, CODE_FOR_xop_vpermil2v8sf3, "__builtin_ia32_vpermil2ps256", IX86_BUILTIN_VPERMIL2PS256, UNKNOWN, (int)MULTI_ARG_4_SF2_SI_I1 }, | |
22701 | |
22412 }; | 22702 }; |
22413 | 22703 |
22414 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not | 22704 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not |
22415 in the current target ISA to allow the user to compile particular modules | 22705 in the current target ISA to allow the user to compile particular modules |
22416 with different target specific options that differ from the command line | 22706 with different target specific options that differ from the command line |
22787 | 23077 |
22788 enum machine_mode tmode = insn_data[icode].operand[0].mode; | 23078 enum machine_mode tmode = insn_data[icode].operand[0].mode; |
22789 | 23079 |
22790 switch (m_type) | 23080 switch (m_type) |
22791 { | 23081 { |
23082 case MULTI_ARG_4_DF2_DI_I: | |
23083 case MULTI_ARG_4_DF2_DI_I1: | |
23084 case MULTI_ARG_4_SF2_SI_I: | |
23085 case MULTI_ARG_4_SF2_SI_I1: | |
23086 nargs = 4; | |
23087 last_arg_constant = true; | |
23088 break; | |
23089 | |
22792 case MULTI_ARG_3_SF: | 23090 case MULTI_ARG_3_SF: |
22793 case MULTI_ARG_3_DF: | 23091 case MULTI_ARG_3_DF: |
22794 case MULTI_ARG_3_SF2: | 23092 case MULTI_ARG_3_SF2: |
22795 case MULTI_ARG_3_DF2: | 23093 case MULTI_ARG_3_DF2: |
22796 case MULTI_ARG_3_DI: | 23094 case MULTI_ARG_3_DI: |
22928 } | 23226 } |
22929 break; | 23227 break; |
22930 | 23228 |
22931 case 3: | 23229 case 3: |
22932 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); | 23230 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); |
23231 break; | |
23232 | |
23233 case 4: | |
23234 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op, args[3].op); | |
22933 break; | 23235 break; |
22934 | 23236 |
22935 default: | 23237 default: |
22936 gcc_unreachable (); | 23238 gcc_unreachable (); |
22937 } | 23239 } |
23548 break; | 23850 break; |
23549 case V2DI_FTYPE_V2DI_UINT_UINT: | 23851 case V2DI_FTYPE_V2DI_UINT_UINT: |
23550 nargs = 3; | 23852 nargs = 3; |
23551 nargs_constant = 2; | 23853 nargs_constant = 2; |
23552 break; | 23854 break; |
23855 case V2DF_FTYPE_V2DF_V2DF_V2DI_INT: | |
23856 case V4DF_FTYPE_V4DF_V4DF_V4DI_INT: | |
23857 case V4SF_FTYPE_V4SF_V4SF_V4SI_INT: | |
23858 case V8SF_FTYPE_V8SF_V8SF_V8SI_INT: | |
23859 nargs = 4; | |
23860 nargs_constant = 1; | |
23861 break; | |
23553 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: | 23862 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: |
23554 nargs = 4; | 23863 nargs = 4; |
23555 nargs_constant = 2; | 23864 nargs_constant = 2; |
23556 break; | 23865 break; |
23557 default: | 23866 default: |
23617 error ("the last argument must be a 4-bit immediate"); | 23926 error ("the last argument must be a 4-bit immediate"); |
23618 return const0_rtx; | 23927 return const0_rtx; |
23619 | 23928 |
23620 case CODE_FOR_sse4_1_blendpd: | 23929 case CODE_FOR_sse4_1_blendpd: |
23621 case CODE_FOR_avx_vpermilv2df: | 23930 case CODE_FOR_avx_vpermilv2df: |
23931 case CODE_FOR_xop_vpermil2v2df3: | |
23932 case CODE_FOR_xop_vpermil2v4sf3: | |
23933 case CODE_FOR_xop_vpermil2v4df3: | |
23934 case CODE_FOR_xop_vpermil2v8sf3: | |
23622 error ("the last argument must be a 2-bit immediate"); | 23935 error ("the last argument must be a 2-bit immediate"); |
23623 return const0_rtx; | 23936 return const0_rtx; |
23624 | 23937 |
23625 case CODE_FOR_avx_vextractf128v4df: | 23938 case CODE_FOR_avx_vextractf128v4df: |
23626 case CODE_FOR_avx_vextractf128v8sf: | 23939 case CODE_FOR_avx_vextractf128v8sf: |
24273 /* Returns a function decl for a vectorized version of the builtin function | 24586 /* Returns a function decl for a vectorized version of the builtin function |
24274 with builtin function code FN and the result vector type TYPE, or NULL_TREE | 24587 with builtin function code FN and the result vector type TYPE, or NULL_TREE |
24275 if it is not available. */ | 24588 if it is not available. */ |
24276 | 24589 |
24277 static tree | 24590 static tree |
24278 ix86_builtin_vectorized_function (unsigned int fn, tree type_out, | 24591 ix86_builtin_vectorized_function (tree fndecl, tree type_out, |
24279 tree type_in) | 24592 tree type_in) |
24280 { | 24593 { |
24281 enum machine_mode in_mode, out_mode; | 24594 enum machine_mode in_mode, out_mode; |
24282 int in_n, out_n; | 24595 int in_n, out_n; |
24596 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl); | |
24283 | 24597 |
24284 if (TREE_CODE (type_out) != VECTOR_TYPE | 24598 if (TREE_CODE (type_out) != VECTOR_TYPE |
24285 || TREE_CODE (type_in) != VECTOR_TYPE) | 24599 || TREE_CODE (type_in) != VECTOR_TYPE |
24600 || DECL_BUILT_IN_CLASS (fndecl) != BUILT_IN_NORMAL) | |
24286 return NULL_TREE; | 24601 return NULL_TREE; |
24287 | 24602 |
24288 out_mode = TYPE_MODE (TREE_TYPE (type_out)); | 24603 out_mode = TYPE_MODE (TREE_TYPE (type_out)); |
24289 out_n = TYPE_VECTOR_SUBPARTS (type_out); | 24604 out_n = TYPE_VECTOR_SUBPARTS (type_out); |
24290 in_mode = TYPE_MODE (TREE_TYPE (type_in)); | 24605 in_mode = TYPE_MODE (TREE_TYPE (type_in)); |
24538 return new_fndecl; | 24853 return new_fndecl; |
24539 } | 24854 } |
24540 | 24855 |
24541 | 24856 |
24542 /* Returns a decl of a function that implements conversion of an integer vector | 24857 /* Returns a decl of a function that implements conversion of an integer vector |
24543 into a floating-point vector, or vice-versa. TYPE is the type of the integer | 24858 into a floating-point vector, or vice-versa. DEST_TYPE and SRC_TYPE |
24544 side of the conversion. | 24859 are the types involved when converting according to CODE. |
24545 Return NULL_TREE if it is not available. */ | 24860 Return NULL_TREE if it is not available. */ |
24546 | 24861 |
24547 static tree | 24862 static tree |
24548 ix86_vectorize_builtin_conversion (unsigned int code, tree type) | 24863 ix86_vectorize_builtin_conversion (unsigned int code, |
24549 { | 24864 tree dest_type, tree src_type) |
24550 if (! (TARGET_SSE2 && TREE_CODE (type) == VECTOR_TYPE)) | 24865 { |
24866 if (! TARGET_SSE2) | |
24551 return NULL_TREE; | 24867 return NULL_TREE; |
24552 | 24868 |
24553 switch (code) | 24869 switch (code) |
24554 { | 24870 { |
24555 case FLOAT_EXPR: | 24871 case FLOAT_EXPR: |
24556 switch (TYPE_MODE (type)) | 24872 switch (TYPE_MODE (src_type)) |
24557 { | 24873 { |
24558 case V4SImode: | 24874 case V4SImode: |
24559 return TYPE_UNSIGNED (type) | 24875 switch (TYPE_MODE (dest_type)) |
24560 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS] | 24876 { |
24561 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; | 24877 case V4SFmode: |
24878 return (TYPE_UNSIGNED (src_type) | |
24879 ? ix86_builtins[IX86_BUILTIN_CVTUDQ2PS] | |
24880 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]); | |
24881 case V4DFmode: | |
24882 return (TYPE_UNSIGNED (src_type) | |
24883 ? NULL_TREE | |
24884 : ix86_builtins[IX86_BUILTIN_CVTDQ2PD256]); | |
24885 default: | |
24886 return NULL_TREE; | |
24887 } | |
24888 break; | |
24889 case V8SImode: | |
24890 switch (TYPE_MODE (dest_type)) | |
24891 { | |
24892 case V8SFmode: | |
24893 return (TYPE_UNSIGNED (src_type) | |
24894 ? NULL_TREE | |
24895 : ix86_builtins[IX86_BUILTIN_CVTDQ2PS]); | |
24896 default: | |
24897 return NULL_TREE; | |
24898 } | |
24899 break; | |
24562 default: | 24900 default: |
24563 return NULL_TREE; | 24901 return NULL_TREE; |
24564 } | 24902 } |
24565 | 24903 |
24566 case FIX_TRUNC_EXPR: | 24904 case FIX_TRUNC_EXPR: |
24567 switch (TYPE_MODE (type)) | 24905 switch (TYPE_MODE (dest_type)) |
24568 { | 24906 { |
24569 case V4SImode: | 24907 case V4SImode: |
24570 return TYPE_UNSIGNED (type) | 24908 switch (TYPE_MODE (src_type)) |
24571 ? NULL_TREE | 24909 { |
24572 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; | 24910 case V4SFmode: |
24911 return (TYPE_UNSIGNED (dest_type) | |
24912 ? NULL_TREE | |
24913 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]); | |
24914 case V4DFmode: | |
24915 return (TYPE_UNSIGNED (dest_type) | |
24916 ? NULL_TREE | |
24917 : ix86_builtins[IX86_BUILTIN_CVTTPD2DQ256]); | |
24918 default: | |
24919 return NULL_TREE; | |
24920 } | |
24921 break; | |
24922 | |
24923 case V8SImode: | |
24924 switch (TYPE_MODE (src_type)) | |
24925 { | |
24926 case V8SFmode: | |
24927 return (TYPE_UNSIGNED (dest_type) | |
24928 ? NULL_TREE | |
24929 : ix86_builtins[IX86_BUILTIN_CVTTPS2DQ256]); | |
24930 default: | |
24931 return NULL_TREE; | |
24932 } | |
24933 break; | |
24934 | |
24573 default: | 24935 default: |
24574 return NULL_TREE; | 24936 return NULL_TREE; |
24575 } | 24937 } |
24938 | |
24576 default: | 24939 default: |
24577 return NULL_TREE; | 24940 return NULL_TREE; |
24578 | 24941 } |
24579 } | 24942 |
24943 return NULL_TREE; | |
24580 } | 24944 } |
24581 | 24945 |
24582 /* Returns a code for a target-specific builtin that implements | 24946 /* Returns a code for a target-specific builtin that implements |
24583 reciprocal of the function, or NULL_TREE if not available. */ | 24947 reciprocal of the function, or NULL_TREE if not available. */ |
24584 | 24948 |
24638 unsigned HOST_WIDE_INT ei; | 25002 unsigned HOST_WIDE_INT ei; |
24639 | 25003 |
24640 if (!CONST_INT_P (er)) | 25004 if (!CONST_INT_P (er)) |
24641 return 0; | 25005 return 0; |
24642 ei = INTVAL (er); | 25006 ei = INTVAL (er); |
24643 if (ei >= 2 * nelt) | 25007 if (ei >= nelt) |
24644 return 0; | 25008 return 0; |
24645 ipar[i] = ei; | 25009 ipar[i] = ei; |
24646 } | 25010 } |
24647 | 25011 |
24648 switch (mode) | 25012 switch (mode) |
25833 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); | 26197 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); |
25834 fprintf (file, "%s:\n", lazy_ptr_name); | 26198 fprintf (file, "%s:\n", lazy_ptr_name); |
25835 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); | 26199 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); |
25836 fprintf (file, ASM_LONG "%s\n", binder_name); | 26200 fprintf (file, ASM_LONG "%s\n", binder_name); |
25837 } | 26201 } |
25838 | |
25839 void | |
25840 darwin_x86_file_end (void) | |
25841 { | |
25842 darwin_file_end (); | |
25843 ix86_file_end (); | |
25844 } | |
25845 #endif /* TARGET_MACHO */ | 26202 #endif /* TARGET_MACHO */ |
25846 | 26203 |
25847 /* Order the registers for register allocator. */ | 26204 /* Order the registers for register allocator. */ |
25848 | 26205 |
25849 void | 26206 void |
26035 { | 26392 { |
26036 int regno; | 26393 int regno; |
26037 | 26394 |
26038 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) | 26395 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) |
26039 regno = aggr ? DX_REG : CX_REG; | 26396 regno = aggr ? DX_REG : CX_REG; |
26397 else if (lookup_attribute ("thiscall", TYPE_ATTRIBUTES (type))) | |
26398 { | |
26399 regno = CX_REG; | |
26400 if (aggr) | |
26401 return gen_rtx_MEM (SImode, | |
26402 plus_constant (stack_pointer_rtx, 4)); | |
26403 } | |
26040 else | 26404 else |
26041 { | 26405 { |
26042 regno = AX_REG; | 26406 regno = AX_REG; |
26043 if (aggr) | 26407 if (aggr) |
26044 { | 26408 { |
26086 the target function. DELTA is an immediate constant offset to be | 26450 the target function. DELTA is an immediate constant offset to be |
26087 added to THIS. If VCALL_OFFSET is nonzero, the word at | 26451 added to THIS. If VCALL_OFFSET is nonzero, the word at |
26088 *(*this + vcall_offset) should be added to THIS. */ | 26452 *(*this + vcall_offset) should be added to THIS. */ |
26089 | 26453 |
26090 static void | 26454 static void |
26091 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, | 26455 x86_output_mi_thunk (FILE *file, |
26092 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, | 26456 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, |
26093 HOST_WIDE_INT vcall_offset, tree function) | 26457 HOST_WIDE_INT vcall_offset, tree function) |
26094 { | 26458 { |
26095 rtx xops[3]; | 26459 rtx xops[3]; |
26096 rtx this_param = x86_this_parameter (function); | 26460 rtx this_param = x86_this_parameter (function); |
26097 rtx this_reg, tmp; | 26461 rtx this_reg, tmp; |
26462 | |
26463 /* Make sure unwind info is emitted for the thunk if needed. */ | |
26464 final_start_function (emit_barrier (), file, 1); | |
26098 | 26465 |
26099 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well | 26466 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well |
26100 pull it in now and let DELTA benefit. */ | 26467 pull it in now and let DELTA benefit. */ |
26101 if (REG_P (this_param)) | 26468 if (REG_P (this_param)) |
26102 this_reg = this_param; | 26469 this_reg = this_param; |
26111 this_reg = NULL_RTX; | 26478 this_reg = NULL_RTX; |
26112 | 26479 |
26113 /* Adjust the this parameter by a fixed constant. */ | 26480 /* Adjust the this parameter by a fixed constant. */ |
26114 if (delta) | 26481 if (delta) |
26115 { | 26482 { |
26116 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. | 26483 xops[0] = GEN_INT (delta); |
26117 Exceptions: -128 encodes smaller than 128, so swap sign and op. */ | |
26118 bool sub = delta < 0 || delta == 128; | |
26119 xops[0] = GEN_INT (sub ? -delta : delta); | |
26120 xops[1] = this_reg ? this_reg : this_param; | 26484 xops[1] = this_reg ? this_reg : this_param; |
26121 if (TARGET_64BIT) | 26485 if (TARGET_64BIT) |
26122 { | 26486 { |
26123 if (!x86_64_general_operand (xops[0], DImode)) | 26487 if (!x86_64_general_operand (xops[0], DImode)) |
26124 { | 26488 { |
26126 xops[1] = tmp; | 26490 xops[1] = tmp; |
26127 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); | 26491 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); |
26128 xops[0] = tmp; | 26492 xops[0] = tmp; |
26129 xops[1] = this_param; | 26493 xops[1] = this_param; |
26130 } | 26494 } |
26131 if (sub) | 26495 if (x86_maybe_negate_const_int (&xops[0], DImode)) |
26132 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops); | 26496 output_asm_insn ("sub{q}\t{%0, %1|%1, %0}", xops); |
26133 else | 26497 else |
26134 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); | 26498 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); |
26135 } | 26499 } |
26136 else if (sub) | 26500 else if (x86_maybe_negate_const_int (&xops[0], SImode)) |
26137 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops); | 26501 output_asm_insn ("sub{l}\t{%0, %1|%1, %0}", xops); |
26138 else | 26502 else |
26139 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); | 26503 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); |
26140 } | 26504 } |
26141 | 26505 |
26146 tmp = gen_rtx_REG (DImode, R10_REG); | 26510 tmp = gen_rtx_REG (DImode, R10_REG); |
26147 else | 26511 else |
26148 { | 26512 { |
26149 int tmp_regno = CX_REG; | 26513 int tmp_regno = CX_REG; |
26150 if (lookup_attribute ("fastcall", | 26514 if (lookup_attribute ("fastcall", |
26151 TYPE_ATTRIBUTES (TREE_TYPE (function)))) | 26515 TYPE_ATTRIBUTES (TREE_TYPE (function))) |
26516 || lookup_attribute ("thiscall", | |
26517 TYPE_ATTRIBUTES (TREE_TYPE (function)))) | |
26152 tmp_regno = AX_REG; | 26518 tmp_regno = AX_REG; |
26153 tmp = gen_rtx_REG (SImode, tmp_regno); | 26519 tmp = gen_rtx_REG (SImode, tmp_regno); |
26154 } | 26520 } |
26155 | 26521 |
26156 xops[0] = gen_rtx_MEM (Pmode, this_reg); | 26522 xops[0] = gen_rtx_MEM (Pmode, this_reg); |
26222 xops[1] = tmp; | 26588 xops[1] = tmp; |
26223 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); | 26589 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); |
26224 output_asm_insn ("jmp\t{*}%1", xops); | 26590 output_asm_insn ("jmp\t{*}%1", xops); |
26225 } | 26591 } |
26226 } | 26592 } |
26593 final_end_function (); | |
26227 } | 26594 } |
26228 | 26595 |
26229 static void | 26596 static void |
26230 x86_file_start (void) | 26597 x86_file_start (void) |
26231 { | 26598 { |
26263 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) | 26630 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) |
26264 { | 26631 { |
26265 if (TARGET_64BIT) | 26632 if (TARGET_64BIT) |
26266 { | 26633 { |
26267 #ifndef NO_PROFILE_COUNTERS | 26634 #ifndef NO_PROFILE_COUNTERS |
26268 fprintf (file, "\tleaq\t" LPREFIX "P%d@(%%rip),%%r11\n", labelno); | 26635 fprintf (file, "\tleaq\t" LPREFIX "P%d(%%rip),%%r11\n", labelno); |
26269 #endif | 26636 #endif |
26270 | 26637 |
26271 if (DEFAULT_ABI == SYSV_ABI && flag_pic) | 26638 if (DEFAULT_ABI == SYSV_ABI && flag_pic) |
26272 fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file); | 26639 fputs ("\tcall\t*" MCOUNT_NAME "@GOTPCREL(%rip)\n", file); |
26273 else | 26640 else |
26495 && ((JUMP_P (prev) && any_condjump_p (prev)) | 26862 && ((JUMP_P (prev) && any_condjump_p (prev)) |
26496 || CALL_P (prev))) | 26863 || CALL_P (prev))) |
26497 replace = true; | 26864 replace = true; |
26498 /* Empty functions get branch mispredict even when the jump destination | 26865 /* Empty functions get branch mispredict even when the jump destination |
26499 is not visible to us. */ | 26866 is not visible to us. */ |
26500 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) | 26867 if (!prev && !optimize_function_for_size_p (cfun)) |
26501 replace = true; | 26868 replace = true; |
26502 } | 26869 } |
26503 if (replace) | 26870 if (replace) |
26504 { | 26871 { |
26505 emit_jump_insn_before (gen_return_internal_long (), ret); | 26872 emit_jump_insn_before (gen_return_internal_long (), ret); |
26555 bool | 26922 bool |
26556 x86_extended_reg_mentioned_p (rtx insn) | 26923 x86_extended_reg_mentioned_p (rtx insn) |
26557 { | 26924 { |
26558 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn, | 26925 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn, |
26559 extended_reg_mentioned_1, NULL); | 26926 extended_reg_mentioned_1, NULL); |
26927 } | |
26928 | |
26929 /* If profitable, negate (without causing overflow) integer constant | |
26930 of mode MODE at location LOC. Return true in this case. */ | |
26931 bool | |
26932 x86_maybe_negate_const_int (rtx *loc, enum machine_mode mode) | |
26933 { | |
26934 HOST_WIDE_INT val; | |
26935 | |
26936 if (!CONST_INT_P (*loc)) | |
26937 return false; | |
26938 | |
26939 switch (mode) | |
26940 { | |
26941 case DImode: | |
26942 /* DImode x86_64 constants must fit in 32 bits. */ | |
26943 gcc_assert (x86_64_immediate_operand (*loc, mode)); | |
26944 | |
26945 mode = SImode; | |
26946 break; | |
26947 | |
26948 case SImode: | |
26949 case HImode: | |
26950 case QImode: | |
26951 break; | |
26952 | |
26953 default: | |
26954 gcc_unreachable (); | |
26955 } | |
26956 | |
26957 /* Avoid overflows. */ | |
26958 if (mode_signbit_p (mode, *loc)) | |
26959 return false; | |
26960 | |
26961 val = INTVAL (*loc); | |
26962 | |
26963 /* Make things pretty and `subl $4,%eax' rather than `addl $-4,%eax'. | |
26964 Exceptions: -128 encodes smaller than 128, so swap sign and op. */ | |
26965 if ((val < 0 && val != -128) | |
26966 || val == 128) | |
26967 { | |
26968 *loc = GEN_INT (-val); | |
26969 return true; | |
26970 } | |
26971 | |
26972 return false; | |
26560 } | 26973 } |
26561 | 26974 |
26562 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code | 26975 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code |
26563 optabs would emit if we didn't have TFmode patterns. */ | 26976 optabs would emit if we didn't have TFmode patterns. */ |
26564 | 26977 |
26660 /* First attempt to recognize VAL as-is. */ | 27073 /* First attempt to recognize VAL as-is. */ |
26661 dup = gen_rtx_VEC_DUPLICATE (mode, val); | 27074 dup = gen_rtx_VEC_DUPLICATE (mode, val); |
26662 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup)); | 27075 insn = emit_insn (gen_rtx_SET (VOIDmode, target, dup)); |
26663 if (recog_memoized (insn) < 0) | 27076 if (recog_memoized (insn) < 0) |
26664 { | 27077 { |
27078 rtx seq; | |
26665 /* If that fails, force VAL into a register. */ | 27079 /* If that fails, force VAL into a register. */ |
27080 | |
27081 start_sequence (); | |
26666 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val); | 27082 XEXP (dup, 0) = force_reg (GET_MODE_INNER (mode), val); |
27083 seq = get_insns (); | |
27084 end_sequence (); | |
27085 if (seq) | |
27086 emit_insn_before (seq, insn); | |
27087 | |
26667 ok = recog_memoized (insn) >= 0; | 27088 ok = recog_memoized (insn) >= 0; |
26668 gcc_assert (ok); | 27089 gcc_assert (ok); |
26669 } | 27090 } |
26670 } | 27091 } |
26671 return true; | 27092 return true; |
28831 if they are not variable. */ | 29252 if they are not variable. */ |
28832 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, | 29253 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
28833 /* Fastcall attribute says callee is responsible for popping arguments | 29254 /* Fastcall attribute says callee is responsible for popping arguments |
28834 if they are not variable. */ | 29255 if they are not variable. */ |
28835 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, | 29256 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
29257 /* Thiscall attribute says callee is responsible for popping arguments | |
29258 if they are not variable. */ | |
29259 { "thiscall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, | |
28836 /* Cdecl attribute says the callee is a normal C declaration */ | 29260 /* Cdecl attribute says the callee is a normal C declaration */ |
28837 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, | 29261 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, |
28838 /* Regparm attribute specifies how many integer arguments are to be | 29262 /* Regparm attribute specifies how many integer arguments are to be |
28839 passed in registers. */ | 29263 passed in registers. */ |
28840 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, | 29264 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, |
28892 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type) | 29316 ix86_vectorize_builtin_vec_perm (tree vec_type, tree *mask_type) |
28893 { | 29317 { |
28894 tree itype = TREE_TYPE (vec_type); | 29318 tree itype = TREE_TYPE (vec_type); |
28895 bool u = TYPE_UNSIGNED (itype); | 29319 bool u = TYPE_UNSIGNED (itype); |
28896 enum machine_mode vmode = TYPE_MODE (vec_type); | 29320 enum machine_mode vmode = TYPE_MODE (vec_type); |
28897 enum ix86_builtins fcode; | 29321 enum ix86_builtins fcode = fcode; /* Silence bogus warning. */ |
28898 bool ok = TARGET_SSE2; | 29322 bool ok = TARGET_SSE2; |
28899 | 29323 |
28900 switch (vmode) | 29324 switch (vmode) |
28901 { | 29325 { |
28902 case V4DFmode: | 29326 case V4DFmode: |
29101 mask |= (d->perm[i * 2] >= 16) << i; | 29525 mask |= (d->perm[i * 2] >= 16) << i; |
29102 | 29526 |
29103 do_subreg: | 29527 do_subreg: |
29104 vmode = V8HImode; | 29528 vmode = V8HImode; |
29105 target = gen_lowpart (vmode, target); | 29529 target = gen_lowpart (vmode, target); |
29106 op0 = gen_lowpart (vmode, target); | 29530 op0 = gen_lowpart (vmode, op0); |
29107 op1 = gen_lowpart (vmode, target); | 29531 op1 = gen_lowpart (vmode, op1); |
29108 break; | 29532 break; |
29109 | 29533 |
29110 default: | 29534 default: |
29111 gcc_unreachable (); | 29535 gcc_unreachable (); |
29112 } | 29536 } |
29113 | 29537 |
29114 /* This matches five different patterns with the different modes. */ | 29538 /* This matches five different patterns with the different modes. */ |
29115 x = gen_rtx_VEC_MERGE (vmode, op0, op1, GEN_INT (mask)); | 29539 x = gen_rtx_VEC_MERGE (vmode, op1, op0, GEN_INT (mask)); |
29116 x = gen_rtx_SET (VOIDmode, target, x); | 29540 x = gen_rtx_SET (VOIDmode, target, x); |
29117 emit_insn (x); | 29541 emit_insn (x); |
29118 | 29542 |
29119 return true; | 29543 return true; |
29120 } | 29544 } |
29222 /* Check plain VEC_SELECT first, because AVX has instructions that could | 29646 /* Check plain VEC_SELECT first, because AVX has instructions that could |
29223 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory | 29647 match both SEL and SEL+CONCAT, but the plain SEL will allow a memory |
29224 input where SEL+CONCAT may not. */ | 29648 input where SEL+CONCAT may not. */ |
29225 if (d->op0 == d->op1) | 29649 if (d->op0 == d->op1) |
29226 { | 29650 { |
29227 if (expand_vselect (d->target, d->op0, d->perm, nelt)) | 29651 int mask = nelt - 1; |
29652 | |
29653 for (i = 0; i < nelt; i++) | |
29654 perm2[i] = d->perm[i] & mask; | |
29655 | |
29656 if (expand_vselect (d->target, d->op0, perm2, nelt)) | |
29228 return true; | 29657 return true; |
29229 | 29658 |
29230 /* There are plenty of patterns in sse.md that are written for | 29659 /* There are plenty of patterns in sse.md that are written for |
29231 SEL+CONCAT and are not replicated for a single op. Perhaps | 29660 SEL+CONCAT and are not replicated for a single op. Perhaps |
29232 that should be changed, to avoid the nastiness here. */ | 29661 that should be changed, to avoid the nastiness here. */ |
29233 | 29662 |
29234 /* Recognize interleave style patterns, which means incrementing | 29663 /* Recognize interleave style patterns, which means incrementing |
29235 every other permutation operand. */ | 29664 every other permutation operand. */ |
29236 for (i = 0; i < nelt; i += 2) | 29665 for (i = 0; i < nelt; i += 2) |
29237 { | 29666 { |
29238 perm2[i] = d->perm[i]; | 29667 perm2[i] = d->perm[i] & mask; |
29239 perm2[i+1] = d->perm[i+1] + nelt; | 29668 perm2[i + 1] = (d->perm[i + 1] & mask) + nelt; |
29240 } | 29669 } |
29241 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) | 29670 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) |
29242 return true; | 29671 return true; |
29243 | 29672 |
29244 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ | 29673 /* Recognize shufps, which means adding {0, 0, nelt, nelt}. */ |
29245 if (nelt >= 4) | 29674 if (nelt >= 4) |
29246 { | 29675 { |
29247 memcpy (perm2, d->perm, nelt); | 29676 for (i = 0; i < nelt; i += 4) |
29248 for (i = 2; i < nelt; i += 4) | |
29249 { | 29677 { |
29250 perm2[i+0] += nelt; | 29678 perm2[i + 0] = d->perm[i + 0] & mask; |
29251 perm2[i+1] += nelt; | 29679 perm2[i + 1] = d->perm[i + 1] & mask; |
29680 perm2[i + 2] = (d->perm[i + 2] & mask) + nelt; | |
29681 perm2[i + 3] = (d->perm[i + 3] & mask) + nelt; | |
29252 } | 29682 } |
29253 | 29683 |
29254 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) | 29684 if (expand_vselect_vconcat (d->target, d->op0, d->op0, perm2, nelt)) |
29255 return true; | 29685 return true; |
29256 } | 29686 } |
30381 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail | 30811 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail |
30382 | 30812 |
30383 #undef TARGET_FUNCTION_VALUE | 30813 #undef TARGET_FUNCTION_VALUE |
30384 #define TARGET_FUNCTION_VALUE ix86_function_value | 30814 #define TARGET_FUNCTION_VALUE ix86_function_value |
30385 | 30815 |
30816 #undef TARGET_FUNCTION_VALUE_REGNO_P | |
30817 #define TARGET_FUNCTION_VALUE_REGNO_P ix86_function_value_regno_p | |
30818 | |
30386 #undef TARGET_SECONDARY_RELOAD | 30819 #undef TARGET_SECONDARY_RELOAD |
30387 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload | 30820 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload |
30388 | 30821 |
30389 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST | 30822 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST |
30390 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ | 30823 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \ |
30427 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required | 30860 #define TARGET_FRAME_POINTER_REQUIRED ix86_frame_pointer_required |
30428 | 30861 |
30429 #undef TARGET_CAN_ELIMINATE | 30862 #undef TARGET_CAN_ELIMINATE |
30430 #define TARGET_CAN_ELIMINATE ix86_can_eliminate | 30863 #define TARGET_CAN_ELIMINATE ix86_can_eliminate |
30431 | 30864 |
30865 #undef TARGET_ASM_CODE_END | |
30866 #define TARGET_ASM_CODE_END ix86_code_end | |
30867 | |
30432 struct gcc_target targetm = TARGET_INITIALIZER; | 30868 struct gcc_target targetm = TARGET_INITIALIZER; |
30433 | 30869 |
30434 #include "gt-i386.h" | 30870 #include "gt-i386.h" |