Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/i386/sse.md @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | 3bfb6c00c1e0 |
children | b7f97abdc517 |
comparison
equal
deleted
inserted
replaced
52:c156f1bd5cd9 | 55:77e2b8dfacca |
---|---|
17 ;; You should have received a copy of the GNU General Public License | 17 ;; You should have received a copy of the GNU General Public License |
18 ;; along with GCC; see the file COPYING3. If not see | 18 ;; along with GCC; see the file COPYING3. If not see |
19 ;; <http://www.gnu.org/licenses/>. | 19 ;; <http://www.gnu.org/licenses/>. |
20 | 20 |
21 | 21 |
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets | 22 ;; 16 byte integral modes handled by SSE |
23 ;; special-cased for TARGET_64BIT. | |
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI]) | 23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI]) |
25 | 24 |
26 ;; All 16-byte vector modes handled by SSE | 25 ;; All 16-byte vector modes handled by SSE |
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) | 26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) |
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF]) | |
28 | 28 |
29 ;; 32 byte integral vector modes handled by AVX | 29 ;; 32 byte integral vector modes handled by AVX |
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI]) | 30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI]) |
31 | 31 |
32 ;; All 32-byte vector modes handled by AVX | 32 ;; All 32-byte vector modes handled by AVX |
37 | 37 |
38 ;; All DI vector modes handled by AVX | 38 ;; All DI vector modes handled by AVX |
39 (define_mode_iterator AVXMODEDI [V4DI V2DI]) | 39 (define_mode_iterator AVXMODEDI [V4DI V2DI]) |
40 | 40 |
41 ;; All vector modes handled by AVX | 41 ;; All vector modes handled by AVX |
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) | 42 (define_mode_iterator AVXMODE |
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) | |
44 (define_mode_iterator AVXMODE16 | |
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) | |
43 | 46 |
44 ;; Mix-n-match | 47 ;; Mix-n-match |
45 (define_mode_iterator SSEMODE12 [V16QI V8HI]) | 48 (define_mode_iterator SSEMODE12 [V16QI V8HI]) |
46 (define_mode_iterator SSEMODE24 [V8HI V4SI]) | 49 (define_mode_iterator SSEMODE24 [V8HI V4SI]) |
47 (define_mode_iterator SSEMODE14 [V16QI V4SI]) | 50 (define_mode_iterator SSEMODE14 [V16QI V4SI]) |
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI]) | 51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI]) |
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI]) | 52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI]) |
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI]) | 53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI]) |
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) | 54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) |
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF]) | |
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF]) | 56 (define_mode_iterator SSEMODEF2P [V4SF V2DF]) |
53 | 57 |
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF]) | 58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF]) |
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) | 59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) |
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF]) | |
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF]) | 61 (define_mode_iterator AVX256MODE4P [V4DI V4DF]) |
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF]) | 62 (define_mode_iterator AVX256MODE8P [V8SI V8SF]) |
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF]) | 63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF]) |
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF]) | 64 (define_mode_iterator AVXMODEF4P [V4SF V4DF]) |
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF]) | |
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF]) | |
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF]) | 67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF]) |
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI]) | 68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI]) |
62 | 69 |
63 ;; Int-float size matches | 70 ;; Int-float size matches |
64 (define_mode_iterator SSEMODE4S [V4SF V4SI]) | 71 (define_mode_iterator SSEMODE4S [V4SF V4SI]) |
65 (define_mode_iterator SSEMODE2D [V2DF V2DI]) | 72 (define_mode_iterator SSEMODE2D [V2DF V2DI]) |
66 | 73 |
67 ;; Modes handled by integer vcond pattern | 74 ;; Modes handled by integer vcond pattern |
68 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI | 75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI |
69 (V2DI "TARGET_SSE4_2 || TARGET_SSE5")]) | 76 (V2DI "TARGET_SSE4_2")]) |
77 | |
78 ;; Modes handled by vec_extract_even/odd pattern. | |
79 (define_mode_iterator SSEMODE_EO | |
80 [(V4SF "TARGET_SSE") | |
81 (V2DF "TARGET_SSE2") | |
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2") | |
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2") | |
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")]) | |
70 | 85 |
71 ;; Mapping from float mode to required SSE level | 86 ;; Mapping from float mode to required SSE level |
72 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) | 87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) |
73 | 88 |
74 ;; Mapping from integer vector mode to mnemonic suffix | 89 ;; Mapping from integer vector mode to mnemonic suffix |
75 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) | 90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) |
76 | 91 |
77 ;; Mapping of the sse5 suffix | 92 ;; Mapping of the fma4 suffix |
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")]) | |
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") | |
95 (V4SF "ss") (V2DF "sd")]) | |
96 | |
97 ;; Mapping of the avx suffix | |
78 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") | 98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") |
79 (V4SF "ps") (V2DF "pd")]) | 99 (V4SF "ps") (V2DF "pd")]) |
80 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") | 100 |
81 (V4SF "ss") (V2DF "sd")]) | |
82 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) | 101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) |
83 | 102 |
84 ;; Mapping of the max integer size for sse5 rotate immediate constraint | 103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")]) |
104 | |
105 ;; Mapping of the max integer size for xop rotate immediate constraint | |
85 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) | 106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) |
86 | 107 |
87 ;; Mapping of vector modes back to the scalar modes | 108 ;; Mapping of vector modes back to the scalar modes |
88 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF") | 109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF") |
89 (V16QI "QI") (V8HI "HI") | 110 (V16QI "QI") (V8HI "HI") |
90 (V4SI "SI") (V2DI "DI")]) | 111 (V4SI "SI") (V2DI "DI")]) |
91 | 112 |
92 ;; Mapping of vector modes to a vector mode of double size | 113 ;; Mapping of vector modes to a vector mode of double size |
93 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI") | 114 (define_mode_attr ssedoublesizemode |
94 (V4SF "V8SF") (V4SI "V8SI")]) | 115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI") |
116 (V8HI "V16HI") (V16QI "V32QI") | |
117 (V4DF "V8DF") (V8SF "V16SF") | |
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")]) | |
95 | 119 |
96 ;; Number of scalar elements in each vector type | 120 ;; Number of scalar elements in each vector type |
97 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2") | 121 (define_mode_attr ssescalarnum |
98 (V16QI "16") (V8HI "8") | 122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2") |
99 (V4SI "4") (V2DI "2")]) | 123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")]) |
100 | 124 |
101 ;; Mapping for AVX | 125 ;; Mapping for AVX |
102 (define_mode_attr avxvecmode | 126 (define_mode_attr avxvecmode |
103 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF") | 127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI") |
104 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") | 128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF") |
105 (V8SF "V8SF") (V4DF "V4DF")]) | 129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")]) |
106 (define_mode_attr avxvecpsmode | 130 (define_mode_attr avxvecpsmode |
107 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF") | 131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF") |
108 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")]) | 132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")]) |
109 (define_mode_attr avxhalfvecmode | 133 (define_mode_attr avxhalfvecmode |
110 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") | 134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI") |
111 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")]) | 135 (V8SF "V4SF") (V4DF "V2DF") |
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")]) | |
112 (define_mode_attr avxscalarmode | 137 (define_mode_attr avxscalarmode |
113 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF") | 138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF") |
114 (V8SF "SF") (V4DF "DF")]) | 139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")]) |
115 (define_mode_attr avxcvtvecmode | 140 (define_mode_attr avxcvtvecmode |
116 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")]) | 141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")]) |
117 (define_mode_attr avxpermvecmode | 142 (define_mode_attr avxpermvecmode |
118 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")]) | 143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")]) |
119 (define_mode_attr avxmodesuffixf2c | 144 (define_mode_attr avxmodesuffixf2c |
120 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")]) | 145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")]) |
121 (define_mode_attr avxmodesuffixp | 146 (define_mode_attr avxmodesuffixp |
122 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si") | 147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si") |
123 (V4DF "pd")]) | 148 (V4DF "pd")]) |
124 (define_mode_attr avxmodesuffixs | |
125 [(V16QI "b") (V8HI "w") (V4SI "d")]) | |
126 (define_mode_attr avxmodesuffix | 149 (define_mode_attr avxmodesuffix |
127 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "") | 150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "") |
128 (V8SI "256") (V8SF "256") (V4DF "256")]) | 151 (V8SI "256") (V8SF "256") (V4DF "256")]) |
129 | 152 |
130 ;; Mapping of immediate bits for blend instructions | 153 ;; Mapping of immediate bits for blend instructions |
131 (define_mode_attr blendbits | 154 (define_mode_attr blendbits |
132 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) | 155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) |
133 | |
134 ;; Mapping of immediate bits for vpermil instructions | |
135 (define_mode_attr vpermilbits | |
136 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")]) | |
137 | 156 |
138 ;; Mapping of immediate bits for pinsr instructions | 157 ;; Mapping of immediate bits for pinsr instructions |
139 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")]) | 158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")]) |
140 | 159 |
141 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. | 160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. |
154 ix86_expand_vector_move (<MODE>mode, operands); | 173 ix86_expand_vector_move (<MODE>mode, operands); |
155 DONE; | 174 DONE; |
156 }) | 175 }) |
157 | 176 |
158 (define_insn "*avx_mov<mode>_internal" | 177 (define_insn "*avx_mov<mode>_internal" |
159 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m") | 178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m") |
160 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] | 179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] |
161 "TARGET_AVX | 180 "TARGET_AVX |
162 && (register_operand (operands[0], <MODE>mode) | 181 && (register_operand (operands[0], <MODE>mode) |
163 || register_operand (operands[1], <MODE>mode))" | 182 || register_operand (operands[1], <MODE>mode))" |
164 { | 183 { |
165 switch (which_alternative) | 184 switch (which_alternative) |
189 | 208 |
190 ;; All of these patterns are enabled for SSE1 as well as SSE2. | 209 ;; All of these patterns are enabled for SSE1 as well as SSE2. |
191 ;; This is essential for maintaining stable calling conventions. | 210 ;; This is essential for maintaining stable calling conventions. |
192 | 211 |
193 (define_expand "mov<mode>" | 212 (define_expand "mov<mode>" |
194 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") | 213 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "") |
195 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] | 214 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))] |
196 "TARGET_SSE" | 215 "TARGET_SSE" |
197 { | 216 { |
198 ix86_expand_vector_move (<MODE>mode, operands); | 217 ix86_expand_vector_move (<MODE>mode, operands); |
199 DONE; | 218 DONE; |
200 }) | 219 }) |
201 | 220 |
202 (define_insn "*mov<mode>_internal" | 221 (define_insn "*mov<mode>_internal" |
203 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m") | 222 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m") |
204 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] | 223 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] |
205 "TARGET_SSE | 224 "TARGET_SSE |
206 && (register_operand (operands[0], <MODE>mode) | 225 && (register_operand (operands[0], <MODE>mode) |
207 || register_operand (operands[1], <MODE>mode))" | 226 || register_operand (operands[1], <MODE>mode))" |
208 { | 227 { |
209 switch (which_alternative) | 228 switch (which_alternative) |
265 Assemble the 64-bit DImode value in an xmm register. */ | 284 Assemble the 64-bit DImode value in an xmm register. */ |
266 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), | 285 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), |
267 gen_rtx_SUBREG (SImode, operands[1], 0))); | 286 gen_rtx_SUBREG (SImode, operands[1], 0))); |
268 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), | 287 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), |
269 gen_rtx_SUBREG (SImode, operands[1], 4))); | 288 gen_rtx_SUBREG (SImode, operands[1], 4))); |
270 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2])); | 289 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0], |
290 operands[2])); | |
271 } | 291 } |
272 else if (memory_operand (operands[1], DImode)) | 292 else if (memory_operand (operands[1], DImode)) |
273 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx)); | 293 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), |
294 operands[1], const0_rtx)); | |
274 else | 295 else |
275 gcc_unreachable (); | 296 gcc_unreachable (); |
276 }) | 297 }) |
277 | 298 |
278 (define_split | 299 (define_split |
279 [(set (match_operand:V4SF 0 "register_operand" "") | 300 [(set (match_operand:V4SF 0 "register_operand" "") |
280 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] | 301 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] |
306 ix86_expand_push (<MODE>mode, operands[0]); | 327 ix86_expand_push (<MODE>mode, operands[0]); |
307 DONE; | 328 DONE; |
308 }) | 329 }) |
309 | 330 |
310 (define_expand "push<mode>1" | 331 (define_expand "push<mode>1" |
311 [(match_operand:SSEMODE 0 "register_operand" "")] | 332 [(match_operand:SSEMODE16 0 "register_operand" "")] |
312 "TARGET_SSE" | 333 "TARGET_SSE" |
313 { | 334 { |
314 ix86_expand_push (<MODE>mode, operands[0]); | 335 ix86_expand_push (<MODE>mode, operands[0]); |
315 DONE; | 336 DONE; |
316 }) | 337 }) |
323 ix86_expand_vector_move_misalign (<MODE>mode, operands); | 344 ix86_expand_vector_move_misalign (<MODE>mode, operands); |
324 DONE; | 345 DONE; |
325 }) | 346 }) |
326 | 347 |
327 (define_expand "movmisalign<mode>" | 348 (define_expand "movmisalign<mode>" |
328 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") | 349 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "") |
329 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] | 350 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))] |
330 "TARGET_SSE" | 351 "TARGET_SSE" |
331 { | 352 { |
332 ix86_expand_vector_move_misalign (<MODE>mode, operands); | 353 ix86_expand_vector_move_misalign (<MODE>mode, operands); |
333 DONE; | 354 DONE; |
334 }) | 355 }) |
340 UNSPEC_MOVU))] | 361 UNSPEC_MOVU))] |
341 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) | 362 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) |
342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | 363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
343 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}" | 364 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}" |
344 [(set_attr "type" "ssemov") | 365 [(set_attr "type" "ssemov") |
366 (set_attr "movu" "1") | |
345 (set_attr "prefix" "vex") | 367 (set_attr "prefix" "vex") |
346 (set_attr "mode" "<MODE>")]) | 368 (set_attr "mode" "<MODE>")]) |
347 | 369 |
348 (define_insn "sse2_movq128" | 370 (define_insn "sse2_movq128" |
349 [(set (match_operand:V2DI 0 "register_operand" "=x") | 371 [(set (match_operand:V2DI 0 "register_operand" "=x") |
365 UNSPEC_MOVU))] | 387 UNSPEC_MOVU))] |
366 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) | 388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) |
367 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | 389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
368 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}" | 390 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}" |
369 [(set_attr "type" "ssemov") | 391 [(set_attr "type" "ssemov") |
392 (set_attr "movu" "1") | |
370 (set_attr "mode" "<MODE>")]) | 393 (set_attr "mode" "<MODE>")]) |
371 | 394 |
372 (define_insn "avx_movdqu<avxmodesuffix>" | 395 (define_insn "avx_movdqu<avxmodesuffix>" |
373 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m") | 396 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m") |
374 (unspec:AVXMODEQI | 397 (unspec:AVXMODEQI |
375 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")] | 398 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")] |
376 UNSPEC_MOVU))] | 399 UNSPEC_MOVU))] |
377 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | 400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
378 "vmovdqu\t{%1, %0|%0, %1}" | 401 "vmovdqu\t{%1, %0|%0, %1}" |
379 [(set_attr "type" "ssemov") | 402 [(set_attr "type" "ssemov") |
403 (set_attr "movu" "1") | |
380 (set_attr "prefix" "vex") | 404 (set_attr "prefix" "vex") |
381 (set_attr "mode" "<avxvecmode>")]) | 405 (set_attr "mode" "<avxvecmode>")]) |
382 | 406 |
383 (define_insn "sse2_movdqu" | 407 (define_insn "sse2_movdqu" |
384 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") | 408 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") |
385 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] | 409 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] |
386 UNSPEC_MOVU))] | 410 UNSPEC_MOVU))] |
387 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | 411 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" |
388 "movdqu\t{%1, %0|%0, %1}" | 412 "movdqu\t{%1, %0|%0, %1}" |
389 [(set_attr "type" "ssemov") | 413 [(set_attr "type" "ssemov") |
414 (set_attr "movu" "1") | |
390 (set_attr "prefix_data16" "1") | 415 (set_attr "prefix_data16" "1") |
391 (set_attr "mode" "TI")]) | 416 (set_attr "mode" "TI")]) |
392 | 417 |
393 (define_insn "avx_movnt<mode>" | 418 (define_insn "avx_movnt<mode>" |
394 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") | 419 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") |
426 [(set (match_operand:V2DI 0 "memory_operand" "=m") | 451 [(set (match_operand:V2DI 0 "memory_operand" "=m") |
427 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] | 452 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] |
428 UNSPEC_MOVNT))] | 453 UNSPEC_MOVNT))] |
429 "TARGET_SSE2" | 454 "TARGET_SSE2" |
430 "movntdq\t{%1, %0|%0, %1}" | 455 "movntdq\t{%1, %0|%0, %1}" |
431 [(set_attr "type" "ssecvt") | 456 [(set_attr "type" "ssemov") |
432 (set_attr "prefix_data16" "1") | 457 (set_attr "prefix_data16" "1") |
433 (set_attr "mode" "TI")]) | 458 (set_attr "mode" "TI")]) |
434 | 459 |
435 (define_insn "sse2_movntsi" | 460 (define_insn "sse2_movntsi" |
436 [(set (match_operand:SI 0 "memory_operand" "=m") | 461 [(set (match_operand:SI 0 "memory_operand" "=m") |
437 (unspec:SI [(match_operand:SI 1 "register_operand" "r")] | 462 (unspec:SI [(match_operand:SI 1 "register_operand" "r")] |
438 UNSPEC_MOVNT))] | 463 UNSPEC_MOVNT))] |
439 "TARGET_SSE2" | 464 "TARGET_SSE2" |
440 "movnti\t{%1, %0|%0, %1}" | 465 "movnti\t{%1, %0|%0, %1}" |
441 [(set_attr "type" "ssecvt") | 466 [(set_attr "type" "ssemov") |
467 (set_attr "prefix_data16" "0") | |
442 (set_attr "mode" "V2DF")]) | 468 (set_attr "mode" "V2DF")]) |
443 | 469 |
444 (define_insn "avx_lddqu<avxmodesuffix>" | 470 (define_insn "avx_lddqu<avxmodesuffix>" |
445 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x") | 471 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x") |
446 (unspec:AVXMODEQI | 472 (unspec:AVXMODEQI |
447 [(match_operand:AVXMODEQI 1 "memory_operand" "m")] | 473 [(match_operand:AVXMODEQI 1 "memory_operand" "m")] |
448 UNSPEC_LDDQU))] | 474 UNSPEC_LDDQU))] |
449 "TARGET_AVX" | 475 "TARGET_AVX" |
450 "vlddqu\t{%1, %0|%0, %1}" | 476 "vlddqu\t{%1, %0|%0, %1}" |
451 [(set_attr "type" "ssecvt") | 477 [(set_attr "type" "ssecvt") |
478 (set_attr "movu" "1") | |
452 (set_attr "prefix" "vex") | 479 (set_attr "prefix" "vex") |
453 (set_attr "mode" "<avxvecmode>")]) | 480 (set_attr "mode" "<avxvecmode>")]) |
454 | 481 |
455 (define_insn "sse3_lddqu" | 482 (define_insn "sse3_lddqu" |
456 [(set (match_operand:V16QI 0 "register_operand" "=x") | 483 [(set (match_operand:V16QI 0 "register_operand" "=x") |
457 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] | 484 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] |
458 UNSPEC_LDDQU))] | 485 UNSPEC_LDDQU))] |
459 "TARGET_SSE3" | 486 "TARGET_SSE3" |
460 "lddqu\t{%1, %0|%0, %1}" | 487 "lddqu\t{%1, %0|%0, %1}" |
461 [(set_attr "type" "ssecvt") | 488 [(set_attr "type" "ssemov") |
489 (set_attr "movu" "1") | |
490 (set_attr "prefix_data16" "0") | |
462 (set_attr "prefix_rep" "1") | 491 (set_attr "prefix_rep" "1") |
463 (set_attr "mode" "TI")]) | 492 (set_attr "mode" "TI")]) |
464 | 493 |
465 ; Expand patterns for non-temporal stores. At the moment, only those | 494 ; Expand patterns for non-temporal stores. At the moment, only those |
466 ; that directly map to insns are defined; it would be possible to | 495 ; that directly map to insns are defined; it would be possible to |
647 (match_operand:V8SF 2 "nonimmediate_operand" "")))] | 676 (match_operand:V8SF 2 "nonimmediate_operand" "")))] |
648 "TARGET_AVX" | 677 "TARGET_AVX" |
649 { | 678 { |
650 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands); | 679 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands); |
651 | 680 |
652 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size | 681 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () |
653 && flag_finite_math_only && !flag_trapping_math | 682 && flag_finite_math_only && !flag_trapping_math |
654 && flag_unsafe_math_optimizations) | 683 && flag_unsafe_math_optimizations) |
655 { | 684 { |
656 ix86_emit_swdivsf (operands[0], operands[1], | 685 ix86_emit_swdivsf (operands[0], operands[1], |
657 operands[2], V8SFmode); | 686 operands[2], V8SFmode); |
763 (unspec:V4SF | 792 (unspec:V4SF |
764 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] | 793 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] |
765 "TARGET_SSE" | 794 "TARGET_SSE" |
766 "%vrcpps\t{%1, %0|%0, %1}" | 795 "%vrcpps\t{%1, %0|%0, %1}" |
767 [(set_attr "type" "sse") | 796 [(set_attr "type" "sse") |
797 (set_attr "atom_sse_attr" "rcp") | |
768 (set_attr "prefix" "maybe_vex") | 798 (set_attr "prefix" "maybe_vex") |
769 (set_attr "mode" "V4SF")]) | 799 (set_attr "mode" "V4SF")]) |
770 | 800 |
771 (define_insn "*avx_vmrcpv4sf2" | 801 (define_insn "*avx_vmrcpv4sf2" |
772 [(set (match_operand:V4SF 0 "register_operand" "=x") | 802 [(set (match_operand:V4SF 0 "register_operand" "=x") |
789 (match_operand:V4SF 2 "register_operand" "0") | 819 (match_operand:V4SF 2 "register_operand" "0") |
790 (const_int 1)))] | 820 (const_int 1)))] |
791 "TARGET_SSE" | 821 "TARGET_SSE" |
792 "rcpss\t{%1, %0|%0, %1}" | 822 "rcpss\t{%1, %0|%0, %1}" |
793 [(set_attr "type" "sse") | 823 [(set_attr "type" "sse") |
824 (set_attr "atom_sse_attr" "rcp") | |
794 (set_attr "mode" "SF")]) | 825 (set_attr "mode" "SF")]) |
795 | 826 |
796 (define_expand "sqrtv8sf2" | 827 (define_expand "sqrtv8sf2" |
797 [(set (match_operand:V8SF 0 "register_operand" "") | 828 [(set (match_operand:V8SF 0 "register_operand" "") |
798 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))] | 829 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))] |
799 "TARGET_AVX" | 830 "TARGET_AVX" |
800 { | 831 { |
801 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size | 832 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () |
802 && flag_finite_math_only && !flag_trapping_math | 833 && flag_finite_math_only && !flag_trapping_math |
803 && flag_unsafe_math_optimizations) | 834 && flag_unsafe_math_optimizations) |
804 { | 835 { |
805 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0); | 836 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0); |
806 DONE; | 837 DONE; |
834 [(set (match_operand:V4SF 0 "register_operand" "=x") | 865 [(set (match_operand:V4SF 0 "register_operand" "=x") |
835 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] | 866 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] |
836 "TARGET_SSE" | 867 "TARGET_SSE" |
837 "%vsqrtps\t{%1, %0|%0, %1}" | 868 "%vsqrtps\t{%1, %0|%0, %1}" |
838 [(set_attr "type" "sse") | 869 [(set_attr "type" "sse") |
870 (set_attr "atom_sse_attr" "sqrt") | |
839 (set_attr "prefix" "maybe_vex") | 871 (set_attr "prefix" "maybe_vex") |
840 (set_attr "mode" "V4SF")]) | 872 (set_attr "mode" "V4SF")]) |
841 | 873 |
842 (define_insn "sqrtv4df2" | 874 (define_insn "sqrtv4df2" |
843 [(set (match_operand:V4DF 0 "register_operand" "=x") | 875 [(set (match_operand:V4DF 0 "register_operand" "=x") |
878 (match_operand:SSEMODEF2P 2 "register_operand" "0") | 910 (match_operand:SSEMODEF2P 2 "register_operand" "0") |
879 (const_int 1)))] | 911 (const_int 1)))] |
880 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" | 912 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" |
881 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}" | 913 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}" |
882 [(set_attr "type" "sse") | 914 [(set_attr "type" "sse") |
915 (set_attr "atom_sse_attr" "sqrt") | |
883 (set_attr "mode" "<ssescalarmode>")]) | 916 (set_attr "mode" "<ssescalarmode>")]) |
884 | 917 |
885 (define_expand "rsqrtv8sf2" | 918 (define_expand "rsqrtv8sf2" |
886 [(set (match_operand:V8SF 0 "register_operand" "") | 919 [(set (match_operand:V8SF 0 "register_operand" "") |
887 (unspec:V8SF | 920 (unspec:V8SF |
1041 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) | 1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) |
1042 (match_dup 1) | 1075 (match_dup 1) |
1043 (const_int 1)))] | 1076 (const_int 1)))] |
1044 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" | 1077 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" |
1045 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" | 1078 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" |
1046 [(set_attr "type" "sse") | 1079 [(set_attr "type" "sseadd") |
1047 (set_attr "mode" "<ssescalarmode>")]) | 1080 (set_attr "mode" "<ssescalarmode>")]) |
1048 | 1081 |
1049 ;; These versions of the min/max patterns implement exactly the operations | 1082 ;; These versions of the min/max patterns implement exactly the operations |
1050 ;; min = (op1 < op2 ? op1 : op2) | 1083 ;; min = (op1 < op2 ? op1 : op2) |
1051 ;; max = (!(op1 < op2) ? op1 : op2) | 1084 ;; max = (!(op1 < op2) ? op1 : op2) |
1177 (minus:V2DF (match_dup 1) (match_dup 2)) | 1210 (minus:V2DF (match_dup 1) (match_dup 2)) |
1178 (const_int 2)))] | 1211 (const_int 2)))] |
1179 "TARGET_SSE3" | 1212 "TARGET_SSE3" |
1180 "addsubpd\t{%2, %0|%0, %2}" | 1213 "addsubpd\t{%2, %0|%0, %2}" |
1181 [(set_attr "type" "sseadd") | 1214 [(set_attr "type" "sseadd") |
1215 (set_attr "atom_unit" "complex") | |
1182 (set_attr "mode" "V2DF")]) | 1216 (set_attr "mode" "V2DF")]) |
1183 | 1217 |
1184 (define_insn "avx_h<plusminus_insn>v4df3" | 1218 (define_insn "avx_h<plusminus_insn>v4df3" |
1185 [(set (match_operand:V4DF 0 "register_operand" "=x") | 1219 [(set (match_operand:V4DF 0 "register_operand" "=x") |
1186 (vec_concat:V4DF | 1220 (vec_concat:V4DF |
1300 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) | 1334 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) |
1301 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] | 1335 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] |
1302 "TARGET_SSE3" | 1336 "TARGET_SSE3" |
1303 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}" | 1337 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}" |
1304 [(set_attr "type" "sseadd") | 1338 [(set_attr "type" "sseadd") |
1339 (set_attr "atom_unit" "complex") | |
1305 (set_attr "prefix_rep" "1") | 1340 (set_attr "prefix_rep" "1") |
1306 (set_attr "mode" "V4SF")]) | 1341 (set_attr "mode" "V4SF")]) |
1307 | 1342 |
1308 (define_insn "*avx_h<plusminus_insn>v2df3" | 1343 (define_insn "*avx_h<plusminus_insn>v2df3" |
1309 [(set (match_operand:V2DF 0 "register_operand" "=x") | 1344 [(set (match_operand:V2DF 0 "register_operand" "=x") |
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")] | 1434 (match_operand:SI 3 "const_0_to_31_operand" "n")] |
1400 UNSPEC_PCMP))] | 1435 UNSPEC_PCMP))] |
1401 "TARGET_AVX" | 1436 "TARGET_AVX" |
1402 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 1437 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1403 [(set_attr "type" "ssecmp") | 1438 [(set_attr "type" "ssecmp") |
1439 (set_attr "length_immediate" "1") | |
1404 (set_attr "prefix" "vex") | 1440 (set_attr "prefix" "vex") |
1405 (set_attr "mode" "<MODE>")]) | 1441 (set_attr "mode" "<MODE>")]) |
1406 | 1442 |
1407 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3" | 1443 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3" |
1408 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | 1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") |
1415 (match_dup 1) | 1451 (match_dup 1) |
1416 (const_int 1)))] | 1452 (const_int 1)))] |
1417 "TARGET_AVX" | 1453 "TARGET_AVX" |
1418 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 1454 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1419 [(set_attr "type" "ssecmp") | 1455 [(set_attr "type" "ssecmp") |
1456 (set_attr "length_immediate" "1") | |
1420 (set_attr "prefix" "vex") | 1457 (set_attr "prefix" "vex") |
1421 (set_attr "mode" "<ssescalarmode>")]) | 1458 (set_attr "mode" "<ssescalarmode>")]) |
1422 | 1459 |
1423 ;; We don't promote 128bit vector compare intrinsics. But vectorizer | 1460 ;; We don't promote 128bit vector compare intrinsics. But vectorizer |
1424 ;; may generate 256bit vector compare instructions. | 1461 ;; may generate 256bit vector compare instructions. |
1429 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))] | 1466 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))] |
1430 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)" | 1467 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)" |
1431 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" | 1468 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" |
1432 [(set_attr "type" "ssecmp") | 1469 [(set_attr "type" "ssecmp") |
1433 (set_attr "prefix" "vex") | 1470 (set_attr "prefix" "vex") |
1471 (set_attr "length_immediate" "1") | |
1434 (set_attr "mode" "<avxvecmode>")]) | 1472 (set_attr "mode" "<avxvecmode>")]) |
1435 | 1473 |
1436 (define_insn "<sse>_maskcmp<mode>3" | 1474 (define_insn "<sse>_maskcmp<mode>3" |
1437 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x") | 1475 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x") |
1438 (match_operator:SSEMODEF4 3 "sse_comparison_operator" | 1476 (match_operator:SSEMODEF4 3 "sse_comparison_operator" |
1439 [(match_operand:SSEMODEF4 1 "register_operand" "0") | 1477 [(match_operand:SSEMODEF4 1 "register_operand" "0") |
1440 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] | 1478 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] |
1441 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode)) | 1479 "!TARGET_XOP |
1442 && !TARGET_SSE5" | 1480 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))" |
1443 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" | 1481 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" |
1444 [(set_attr "type" "ssecmp") | 1482 [(set_attr "type" "ssecmp") |
1483 (set_attr "length_immediate" "1") | |
1445 (set_attr "mode" "<MODE>")]) | 1484 (set_attr "mode" "<MODE>")]) |
1446 | 1485 |
1447 (define_insn "<sse>_vmmaskcmp<mode>3" | 1486 (define_insn "<sse>_vmmaskcmp<mode>3" |
1448 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
1449 (vec_merge:SSEMODEF2P | 1488 (vec_merge:SSEMODEF2P |
1450 (match_operator:SSEMODEF2P 3 "sse_comparison_operator" | 1489 (match_operator:SSEMODEF2P 3 "sse_comparison_operator" |
1451 [(match_operand:SSEMODEF2P 1 "register_operand" "0") | 1490 [(match_operand:SSEMODEF2P 1 "register_operand" "0") |
1452 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) | 1491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) |
1453 (match_dup 1) | 1492 (match_dup 1) |
1454 (const_int 1)))] | 1493 (const_int 1)))] |
1455 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5" | 1494 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" |
1456 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" | 1495 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" |
1457 [(set_attr "type" "ssecmp") | 1496 [(set_attr "type" "ssecmp") |
1497 (set_attr "length_immediate" "1") | |
1458 (set_attr "mode" "<ssescalarmode>")]) | 1498 (set_attr "mode" "<ssescalarmode>")]) |
1459 | 1499 |
1460 (define_insn "<sse>_comi" | 1500 (define_insn "<sse>_comi" |
1461 [(set (reg:CCFP FLAGS_REG) | 1501 [(set (reg:CCFP FLAGS_REG) |
1462 (compare:CCFP | 1502 (compare:CCFP |
1468 (parallel [(const_int 0)]))))] | 1508 (parallel [(const_int 0)]))))] |
1469 "SSE_FLOAT_MODE_P (<MODE>mode)" | 1509 "SSE_FLOAT_MODE_P (<MODE>mode)" |
1470 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}" | 1510 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}" |
1471 [(set_attr "type" "ssecomi") | 1511 [(set_attr "type" "ssecomi") |
1472 (set_attr "prefix" "maybe_vex") | 1512 (set_attr "prefix" "maybe_vex") |
1513 (set_attr "prefix_rep" "0") | |
1514 (set (attr "prefix_data16") | |
1515 (if_then_else (eq_attr "mode" "DF") | |
1516 (const_string "1") | |
1517 (const_string "0"))) | |
1473 (set_attr "mode" "<MODE>")]) | 1518 (set_attr "mode" "<MODE>")]) |
1474 | 1519 |
1475 (define_insn "<sse>_ucomi" | 1520 (define_insn "<sse>_ucomi" |
1476 [(set (reg:CCFPU FLAGS_REG) | 1521 [(set (reg:CCFPU FLAGS_REG) |
1477 (compare:CCFPU | 1522 (compare:CCFPU |
1483 (parallel [(const_int 0)]))))] | 1528 (parallel [(const_int 0)]))))] |
1484 "SSE_FLOAT_MODE_P (<MODE>mode)" | 1529 "SSE_FLOAT_MODE_P (<MODE>mode)" |
1485 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}" | 1530 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}" |
1486 [(set_attr "type" "ssecomi") | 1531 [(set_attr "type" "ssecomi") |
1487 (set_attr "prefix" "maybe_vex") | 1532 (set_attr "prefix" "maybe_vex") |
1533 (set_attr "prefix_rep" "0") | |
1534 (set (attr "prefix_data16") | |
1535 (if_then_else (eq_attr "mode" "DF") | |
1536 (const_string "1") | |
1537 (const_string "0"))) | |
1488 (set_attr "mode" "<MODE>")]) | 1538 (set_attr "mode" "<MODE>")]) |
1489 | 1539 |
1490 (define_expand "vcond<mode>" | 1540 (define_expand "vcond<mode>" |
1491 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | 1541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") |
1492 (if_then_else:SSEMODEF2P | 1542 (if_then_else:SSEMODEF2P |
1531 [(set_attr "type" "sselog") | 1581 [(set_attr "type" "sselog") |
1532 (set_attr "mode" "<MODE>")]) | 1582 (set_attr "mode" "<MODE>")]) |
1533 | 1583 |
1534 (define_expand "<code><mode>3" | 1584 (define_expand "<code><mode>3" |
1535 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "") | 1585 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "") |
1536 (plogic:AVX256MODEF2P | 1586 (any_logic:AVX256MODEF2P |
1537 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "") | 1587 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "") |
1538 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))] | 1588 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))] |
1539 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)" | 1589 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)" |
1540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | 1590 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
1541 | 1591 |
1542 (define_insn "*avx_<code><mode>3" | 1592 (define_insn "*avx_<code><mode>3" |
1543 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") | 1593 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") |
1544 (plogic:AVXMODEF2P | 1594 (any_logic:AVXMODEF2P |
1545 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") | 1595 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") |
1546 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] | 1596 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] |
1547 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) | 1597 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) |
1548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | 1598 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
1549 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" | 1599 "v<logicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" |
1550 [(set_attr "type" "sselog") | 1600 [(set_attr "type" "sselog") |
1551 (set_attr "prefix" "vex") | 1601 (set_attr "prefix" "vex") |
1552 (set_attr "mode" "<avxvecmode>")]) | 1602 (set_attr "mode" "<avxvecmode>")]) |
1553 | 1603 |
1554 (define_expand "<code><mode>3" | 1604 (define_expand "<code><mode>3" |
1555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | 1605 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") |
1556 (plogic:SSEMODEF2P | 1606 (any_logic:SSEMODEF2P |
1557 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") | 1607 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") |
1558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] | 1608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] |
1559 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" | 1609 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" |
1560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | 1610 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
1561 | 1611 |
1562 (define_insn "*<code><mode>3" | 1612 (define_insn "*<code><mode>3" |
1563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 1613 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
1564 (plogic:SSEMODEF2P | 1614 (any_logic:SSEMODEF2P |
1565 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") | 1615 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") |
1566 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] | 1616 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] |
1567 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) | 1617 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) |
1568 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | 1618 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
1569 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}" | 1619 "<logicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}" |
1570 [(set_attr "type" "sselog") | 1620 [(set_attr "type" "sselog") |
1571 (set_attr "mode" "<MODE>")]) | 1621 (set_attr "mode" "<MODE>")]) |
1622 | |
1623 (define_expand "copysign<mode>3" | |
1624 [(set (match_dup 4) | |
1625 (and:SSEMODEF2P | |
1626 (not:SSEMODEF2P (match_dup 3)) | |
1627 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" ""))) | |
1628 (set (match_dup 5) | |
1629 (and:SSEMODEF2P (match_dup 3) | |
1630 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" ""))) | |
1631 (set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
1632 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))] | |
1633 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" | |
1634 { | |
1635 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0); | |
1636 | |
1637 operands[4] = gen_reg_rtx (<MODE>mode); | |
1638 operands[5] = gen_reg_rtx (<MODE>mode); | |
1639 }) | |
1572 | 1640 |
1573 ;; Also define scalar versions. These are used for abs, neg, and | 1641 ;; Also define scalar versions. These are used for abs, neg, and |
1574 ;; conditional move. Using subregs into vector modes causes register | 1642 ;; conditional move. Using subregs into vector modes causes register |
1575 ;; allocation lossage. These patterns do not allow memory operands | 1643 ;; allocation lossage. These patterns do not allow memory operands |
1576 ;; because the native instructions read the full 128-bits. | 1644 ;; because the native instructions read the full 128-bits. |
1598 [(set_attr "type" "sselog") | 1666 [(set_attr "type" "sselog") |
1599 (set_attr "mode" "<ssevecmode>")]) | 1667 (set_attr "mode" "<ssevecmode>")]) |
1600 | 1668 |
1601 (define_insn "*avx_<code><mode>3" | 1669 (define_insn "*avx_<code><mode>3" |
1602 [(set (match_operand:MODEF 0 "register_operand" "=x") | 1670 [(set (match_operand:MODEF 0 "register_operand" "=x") |
1603 (plogic:MODEF | 1671 (any_logic:MODEF |
1604 (match_operand:MODEF 1 "register_operand" "x") | 1672 (match_operand:MODEF 1 "register_operand" "x") |
1605 (match_operand:MODEF 2 "register_operand" "x")))] | 1673 (match_operand:MODEF 2 "register_operand" "x")))] |
1606 "AVX_FLOAT_MODE_P (<MODE>mode)" | 1674 "AVX_FLOAT_MODE_P (<MODE>mode)" |
1607 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}" | 1675 "v<logicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}" |
1608 [(set_attr "type" "sselog") | 1676 [(set_attr "type" "sselog") |
1609 (set_attr "prefix" "vex") | 1677 (set_attr "prefix" "vex") |
1610 (set_attr "mode" "<ssevecmode>")]) | 1678 (set_attr "mode" "<ssevecmode>")]) |
1611 | 1679 |
1612 (define_insn "*<code><mode>3" | 1680 (define_insn "*<code><mode>3" |
1613 [(set (match_operand:MODEF 0 "register_operand" "=x") | 1681 [(set (match_operand:MODEF 0 "register_operand" "=x") |
1614 (plogic:MODEF | 1682 (any_logic:MODEF |
1615 (match_operand:MODEF 1 "register_operand" "0") | 1683 (match_operand:MODEF 1 "register_operand" "0") |
1616 (match_operand:MODEF 2 "register_operand" "x")))] | 1684 (match_operand:MODEF 2 "register_operand" "x")))] |
1617 "SSE_FLOAT_MODE_P (<MODE>mode)" | 1685 "SSE_FLOAT_MODE_P (<MODE>mode)" |
1618 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}" | 1686 "<logicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}" |
1619 [(set_attr "type" "sselog") | 1687 [(set_attr "type" "sselog") |
1620 (set_attr "mode" "<ssevecmode>")]) | 1688 (set_attr "mode" "<ssevecmode>")]) |
1621 | 1689 |
1622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 1690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1623 ;; | 1691 ;; |
1624 ;; SSE5 floating point multiply/accumulate instructions This includes the | 1692 ;; FMA4 floating point multiply/accumulate instructions. This |
1625 ;; scalar version of the instructions as well as the vector | 1693 ;; includes the scalar version of the instructions as well as the |
1694 ;; vector. | |
1626 ;; | 1695 ;; |
1627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 1696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
1628 | 1697 |
1629 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow | 1698 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow |
1630 ;; combine to generate a multiply/add with two memory references. We then | 1699 ;; combine to generate a multiply/add with two memory references. We then |
1635 ;; add insns, and it can't generate a new pseudo. I.e.: | 1704 ;; add insns, and it can't generate a new pseudo. I.e.: |
1636 ;; (set (reg1) (mem (addr1))) | 1705 ;; (set (reg1) (mem (addr1))) |
1637 ;; (set (reg2) (mult (reg1) (mem (addr2)))) | 1706 ;; (set (reg2) (mult (reg1) (mem (addr2)))) |
1638 ;; (set (reg3) (plus (reg2) (mem (addr3)))) | 1707 ;; (set (reg3) (plus (reg2) (mem (addr3)))) |
1639 | 1708 |
1640 (define_insn "sse5_fmadd<mode>4" | 1709 (define_insn "fma4_fmadd<mode>4256" |
1641 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") | 1710 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") |
1711 (plus:FMA4MODEF4 | |
1712 (mult:FMA4MODEF4 | |
1713 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") | |
1714 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) | |
1715 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] | |
1716 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1717 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1718 [(set_attr "type" "ssemuladd") | |
1719 (set_attr "mode" "<MODE>")]) | |
1720 | |
1721 ;; Floating multiply and subtract. | |
1722 (define_insn "fma4_fmsub<mode>4256" | |
1723 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") | |
1724 (minus:FMA4MODEF4 | |
1725 (mult:FMA4MODEF4 | |
1726 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") | |
1727 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) | |
1728 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] | |
1729 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1730 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1731 [(set_attr "type" "ssemuladd") | |
1732 (set_attr "mode" "<MODE>")]) | |
1733 | |
1734 ;; Floating point negative multiply and add. | |
1735 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b). | |
1736 (define_insn "fma4_fnmadd<mode>4256" | |
1737 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") | |
1738 (minus:FMA4MODEF4 | |
1739 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") | |
1740 (mult:FMA4MODEF4 | |
1741 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") | |
1742 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))] | |
1743 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1744 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1745 [(set_attr "type" "ssemuladd") | |
1746 (set_attr "mode" "<MODE>")]) | |
1747 | |
1748 ;; Floating point negative multiply and subtract. | |
1749 (define_insn "fma4_fnmsub<mode>4256" | |
1750 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") | |
1751 (minus:FMA4MODEF4 | |
1752 (mult:FMA4MODEF4 | |
1753 (neg:FMA4MODEF4 | |
1754 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")) | |
1755 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) | |
1756 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))] | |
1757 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1758 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1759 [(set_attr "type" "ssemuladd") | |
1760 (set_attr "mode" "<MODE>")]) | |
1761 | |
1762 (define_insn "fma4_fmadd<mode>4" | |
1763 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") | |
1642 (plus:SSEMODEF4 | 1764 (plus:SSEMODEF4 |
1643 (mult:SSEMODEF4 | 1765 (mult:SSEMODEF4 |
1644 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") | 1766 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") |
1645 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) | 1767 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) |
1646 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] | 1768 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] |
1647 "TARGET_SSE5 && TARGET_FUSED_MADD | 1769 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1648 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" | 1770 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1649 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1650 [(set_attr "type" "ssemuladd") | 1771 [(set_attr "type" "ssemuladd") |
1651 (set_attr "mode" "<MODE>")]) | 1772 (set_attr "mode" "<MODE>")]) |
1652 | 1773 |
1653 ;; Split fmadd with two memory operands into a load and the fmadd. | |
1654 (define_split | |
1655 [(set (match_operand:SSEMODEF4 0 "register_operand" "") | |
1656 (plus:SSEMODEF4 | |
1657 (mult:SSEMODEF4 | |
1658 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") | |
1659 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) | |
1660 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] | |
1661 "TARGET_SSE5 | |
1662 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) | |
1663 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) | |
1664 && !reg_mentioned_p (operands[0], operands[1]) | |
1665 && !reg_mentioned_p (operands[0], operands[2]) | |
1666 && !reg_mentioned_p (operands[0], operands[3])" | |
1667 [(const_int 0)] | |
1668 { | |
1669 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); | |
1670 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1], | |
1671 operands[2], operands[3])); | |
1672 DONE; | |
1673 }) | |
1674 | |
1675 ;; For the scalar operations, use operand1 for the upper words that aren't | 1774 ;; For the scalar operations, use operand1 for the upper words that aren't |
1676 ;; modified, so restrict the forms that are generated. | 1775 ;; modified, so restrict the forms that are generated. |
1677 ;; Scalar version of fmadd | 1776 ;; Scalar version of fmadd. |
1678 (define_insn "sse5_vmfmadd<mode>4" | 1777 (define_insn "fma4_vmfmadd<mode>4" |
1679 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 1778 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
1680 (vec_merge:SSEMODEF2P | 1779 (vec_merge:SSEMODEF2P |
1681 (plus:SSEMODEF2P | 1780 (plus:SSEMODEF2P |
1682 (mult:SSEMODEF2P | 1781 (mult:SSEMODEF2P |
1683 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") | 1782 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
1684 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) | 1783 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
1685 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) | 1784 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) |
1686 (match_dup 1) | 1785 (match_dup 0) |
1687 (const_int 1)))] | 1786 (const_int 1)))] |
1688 "TARGET_SSE5 && TARGET_FUSED_MADD | 1787 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1689 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" | 1788 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1690 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1691 [(set_attr "type" "ssemuladd") | 1789 [(set_attr "type" "ssemuladd") |
1692 (set_attr "mode" "<MODE>")]) | 1790 (set_attr "mode" "<MODE>")]) |
1693 | 1791 |
1694 ;; Floating multiply and subtract | 1792 ;; Floating multiply and subtract. |
1695 ;; Allow two memory operands the same as fmadd | 1793 ;; Allow two memory operands the same as fmadd. |
1696 (define_insn "sse5_fmsub<mode>4" | 1794 (define_insn "fma4_fmsub<mode>4" |
1697 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") | 1795 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") |
1698 (minus:SSEMODEF4 | 1796 (minus:SSEMODEF4 |
1699 (mult:SSEMODEF4 | 1797 (mult:SSEMODEF4 |
1700 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") | 1798 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") |
1701 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) | 1799 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) |
1702 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] | 1800 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] |
1703 "TARGET_SSE5 && TARGET_FUSED_MADD | 1801 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1704 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" | 1802 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1705 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1706 [(set_attr "type" "ssemuladd") | 1803 [(set_attr "type" "ssemuladd") |
1707 (set_attr "mode" "<MODE>")]) | 1804 (set_attr "mode" "<MODE>")]) |
1708 | 1805 |
1709 ;; Split fmsub with two memory operands into a load and the fmsub. | |
1710 (define_split | |
1711 [(set (match_operand:SSEMODEF4 0 "register_operand" "") | |
1712 (minus:SSEMODEF4 | |
1713 (mult:SSEMODEF4 | |
1714 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") | |
1715 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) | |
1716 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] | |
1717 "TARGET_SSE5 | |
1718 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) | |
1719 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) | |
1720 && !reg_mentioned_p (operands[0], operands[1]) | |
1721 && !reg_mentioned_p (operands[0], operands[2]) | |
1722 && !reg_mentioned_p (operands[0], operands[3])" | |
1723 [(const_int 0)] | |
1724 { | |
1725 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); | |
1726 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1], | |
1727 operands[2], operands[3])); | |
1728 DONE; | |
1729 }) | |
1730 | |
1731 ;; For the scalar operations, use operand1 for the upper words that aren't | 1806 ;; For the scalar operations, use operand1 for the upper words that aren't |
1732 ;; modified, so restrict the forms that are generated. | 1807 ;; modified, so restrict the forms that are generated. |
1733 ;; Scalar version of fmsub | 1808 ;; Scalar version of fmsub. |
1734 (define_insn "sse5_vmfmsub<mode>4" | 1809 (define_insn "fma4_vmfmsub<mode>4" |
1735 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 1810 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
1736 (vec_merge:SSEMODEF2P | 1811 (vec_merge:SSEMODEF2P |
1737 (minus:SSEMODEF2P | 1812 (minus:SSEMODEF2P |
1738 (mult:SSEMODEF2P | 1813 (mult:SSEMODEF2P |
1739 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") | 1814 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
1740 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) | 1815 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
1741 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) | 1816 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) |
1742 (match_dup 1) | 1817 (match_dup 0) |
1743 (const_int 1)))] | 1818 (const_int 1)))] |
1744 "TARGET_SSE5 && TARGET_FUSED_MADD | 1819 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1745 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 1820 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1746 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1747 [(set_attr "type" "ssemuladd") | 1821 [(set_attr "type" "ssemuladd") |
1748 (set_attr "mode" "<MODE>")]) | 1822 (set_attr "mode" "<MODE>")]) |
1749 | 1823 |
1750 ;; Floating point negative multiply and add | 1824 ;; Floating point negative multiply and add. |
1751 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) | 1825 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b). |
1752 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p | 1826 (define_insn "fma4_fnmadd<mode>4" |
1753 ;; Allow two memory operands to help in optimizing. | 1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") |
1754 (define_insn "sse5_fnmadd<mode>4" | |
1755 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") | |
1756 (minus:SSEMODEF4 | 1828 (minus:SSEMODEF4 |
1757 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0") | 1829 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x") |
1758 (mult:SSEMODEF4 | 1830 (mult:SSEMODEF4 |
1759 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") | 1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x") |
1760 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))] | 1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))] |
1761 "TARGET_SSE5 && TARGET_FUSED_MADD | 1833 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1762 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" | 1834 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1763 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1764 [(set_attr "type" "ssemuladd") | 1835 [(set_attr "type" "ssemuladd") |
1765 (set_attr "mode" "<MODE>")]) | 1836 (set_attr "mode" "<MODE>")]) |
1766 | 1837 |
1767 ;; Split fnmadd with two memory operands into a load and the fnmadd. | |
1768 (define_split | |
1769 [(set (match_operand:SSEMODEF4 0 "register_operand" "") | |
1770 (minus:SSEMODEF4 | |
1771 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "") | |
1772 (mult:SSEMODEF4 | |
1773 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "") | |
1774 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))] | |
1775 "TARGET_SSE5 | |
1776 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true) | |
1777 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true) | |
1778 && !reg_mentioned_p (operands[0], operands[1]) | |
1779 && !reg_mentioned_p (operands[0], operands[2]) | |
1780 && !reg_mentioned_p (operands[0], operands[3])" | |
1781 [(const_int 0)] | |
1782 { | |
1783 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); | |
1784 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1], | |
1785 operands[2], operands[3])); | |
1786 DONE; | |
1787 }) | |
1788 | |
1789 ;; For the scalar operations, use operand1 for the upper words that aren't | 1838 ;; For the scalar operations, use operand1 for the upper words that aren't |
1790 ;; modified, so restrict the forms that are generated. | 1839 ;; modified, so restrict the forms that are generated. |
1791 ;; Scalar version of fnmadd | 1840 ;; Scalar version of fnmadd. |
1792 (define_insn "sse5_vmfnmadd<mode>4" | 1841 (define_insn "fma4_vmfnmadd<mode>4" |
1793 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 1842 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
1794 (vec_merge:SSEMODEF2P | 1843 (vec_merge:SSEMODEF2P |
1795 (minus:SSEMODEF2P | 1844 (minus:SSEMODEF2P |
1796 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") | 1845 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") |
1797 (mult:SSEMODEF2P | 1846 (mult:SSEMODEF2P |
1798 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") | 1847 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
1799 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) | 1848 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) |
1800 (match_dup 1) | 1849 (match_dup 0) |
1801 (const_int 1)))] | 1850 (const_int 1)))] |
1802 "TARGET_SSE5 && TARGET_FUSED_MADD | 1851 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1803 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" | 1852 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1804 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1805 [(set_attr "type" "ssemuladd") | 1853 [(set_attr "type" "ssemuladd") |
1806 (set_attr "mode" "<MODE>")]) | 1854 (set_attr "mode" "<MODE>")]) |
1807 | 1855 |
1808 ;; Floating point negative multiply and subtract | 1856 ;; Floating point negative multiply and subtract. |
1809 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c | 1857 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c. |
1810 ;; Allow 2 memory operands to help with optimization | 1858 (define_insn "fma4_fnmsub<mode>4" |
1811 (define_insn "sse5_fnmsub<mode>4" | |
1812 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") | 1859 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") |
1813 (minus:SSEMODEF4 | 1860 (minus:SSEMODEF4 |
1814 (mult:SSEMODEF4 | 1861 (mult:SSEMODEF4 |
1815 (neg:SSEMODEF4 | 1862 (neg:SSEMODEF4 |
1816 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0")) | 1863 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")) |
1817 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm")) | 1864 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m")) |
1818 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] | 1865 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] |
1819 "TARGET_SSE5 && TARGET_FUSED_MADD | 1866 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1820 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" | 1867 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1821 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1822 [(set_attr "type" "ssemuladd") | 1868 [(set_attr "type" "ssemuladd") |
1823 (set_attr "mode" "<MODE>")]) | 1869 (set_attr "mode" "<MODE>")]) |
1824 | 1870 |
1825 ;; Split fnmsub with two memory operands into a load and the fmsub. | |
1826 (define_split | |
1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "") | |
1828 (minus:SSEMODEF4 | |
1829 (mult:SSEMODEF4 | |
1830 (neg:SSEMODEF4 | |
1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")) | |
1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "")) | |
1833 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))] | |
1834 "TARGET_SSE5 | |
1835 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false) | |
1836 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false) | |
1837 && !reg_mentioned_p (operands[0], operands[1]) | |
1838 && !reg_mentioned_p (operands[0], operands[2]) | |
1839 && !reg_mentioned_p (operands[0], operands[3])" | |
1840 [(const_int 0)] | |
1841 { | |
1842 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode); | |
1843 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1], | |
1844 operands[2], operands[3])); | |
1845 DONE; | |
1846 }) | |
1847 | |
1848 ;; For the scalar operations, use operand1 for the upper words that aren't | 1871 ;; For the scalar operations, use operand1 for the upper words that aren't |
1849 ;; modified, so restrict the forms that are generated. | 1872 ;; modified, so restrict the forms that are generated. |
1850 ;; Scalar version of fnmsub | 1873 ;; Scalar version of fnmsub. |
1851 (define_insn "sse5_vmfnmsub<mode>4" | 1874 (define_insn "fma4_vmfnmsub<mode>4" |
1852 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 1875 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
1853 (vec_merge:SSEMODEF2P | 1876 (vec_merge:SSEMODEF2P |
1854 (minus:SSEMODEF2P | 1877 (minus:SSEMODEF2P |
1855 (mult:SSEMODEF2P | 1878 (mult:SSEMODEF2P |
1856 (neg:SSEMODEF2P | 1879 (neg:SSEMODEF2P |
1857 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")) | 1880 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) |
1858 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) | 1881 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
1859 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) | 1882 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) |
1860 (match_dup 1) | 1883 (match_dup 0) |
1861 (const_int 1)))] | 1884 (const_int 1)))] |
1862 "TARGET_SSE5 && TARGET_FUSED_MADD | 1885 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1863 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" | 1886 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1864 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1865 [(set_attr "type" "ssemuladd") | 1887 [(set_attr "type" "ssemuladd") |
1866 (set_attr "mode" "<MODE>")]) | 1888 (set_attr "mode" "<MODE>")]) |
1867 | 1889 |
1868 ;; The same instructions using an UNSPEC to allow the intrinsic to be used | 1890 (define_insn "fma4i_fmadd<mode>4256" |
1869 ;; even if the user used -mno-fused-madd | 1891 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") |
1870 ;; Parallel instructions. During instruction generation, just default | 1892 (unspec:FMA4MODEF4 |
1871 ;; to registers, and let combine later build the appropriate instruction. | 1893 [(plus:FMA4MODEF4 |
1872 (define_expand "sse5i_fmadd<mode>4" | 1894 (mult:FMA4MODEF4 |
1873 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | 1895 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") |
1896 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) | |
1897 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] | |
1898 UNSPEC_FMA4_INTRINSIC))] | |
1899 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1900 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1901 [(set_attr "type" "ssemuladd") | |
1902 (set_attr "mode" "<MODE>")]) | |
1903 | |
1904 (define_insn "fma4i_fmsub<mode>4256" | |
1905 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") | |
1906 (unspec:FMA4MODEF4 | |
1907 [(minus:FMA4MODEF4 | |
1908 (mult:FMA4MODEF4 | |
1909 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") | |
1910 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) | |
1911 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] | |
1912 UNSPEC_FMA4_INTRINSIC))] | |
1913 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1914 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1915 [(set_attr "type" "ssemuladd") | |
1916 (set_attr "mode" "<MODE>")]) | |
1917 | |
1918 (define_insn "fma4i_fnmadd<mode>4256" | |
1919 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") | |
1920 (unspec:FMA4MODEF4 | |
1921 [(minus:FMA4MODEF4 | |
1922 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x") | |
1923 (mult:FMA4MODEF4 | |
1924 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x") | |
1925 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))] | |
1926 UNSPEC_FMA4_INTRINSIC))] | |
1927 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1928 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1929 [(set_attr "type" "ssemuladd") | |
1930 (set_attr "mode" "<MODE>")]) | |
1931 | |
1932 (define_insn "fma4i_fnmsub<mode>4256" | |
1933 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x") | |
1934 (unspec:FMA4MODEF4 | |
1935 [(minus:FMA4MODEF4 | |
1936 (mult:FMA4MODEF4 | |
1937 (neg:FMA4MODEF4 | |
1938 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")) | |
1939 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")) | |
1940 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))] | |
1941 UNSPEC_FMA4_INTRINSIC))] | |
1942 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
1943 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1944 [(set_attr "type" "ssemuladd") | |
1945 (set_attr "mode" "<MODE>")]) | |
1946 | |
1947 (define_insn "fma4i_fmadd<mode>4" | |
1948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | |
1874 (unspec:SSEMODEF2P | 1949 (unspec:SSEMODEF2P |
1875 [(plus:SSEMODEF2P | 1950 [(plus:SSEMODEF2P |
1876 (mult:SSEMODEF2P | 1951 (mult:SSEMODEF2P |
1877 (match_operand:SSEMODEF2P 1 "register_operand" "") | 1952 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
1878 (match_operand:SSEMODEF2P 2 "register_operand" "")) | 1953 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
1879 (match_operand:SSEMODEF2P 3 "register_operand" ""))] | 1954 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] |
1880 UNSPEC_SSE5_INTRINSIC))] | 1955 UNSPEC_FMA4_INTRINSIC))] |
1881 "TARGET_SSE5" | 1956 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1882 { | 1957 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1883 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | |
1884 if (TARGET_FUSED_MADD) | |
1885 { | |
1886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1], | |
1887 operands[2], operands[3])); | |
1888 DONE; | |
1889 } | |
1890 }) | |
1891 | |
1892 (define_insn "*sse5i_fmadd<mode>4" | |
1893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") | |
1894 (unspec:SSEMODEF2P | |
1895 [(plus:SSEMODEF2P | |
1896 (mult:SSEMODEF2P | |
1897 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") | |
1898 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) | |
1899 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] | |
1900 UNSPEC_SSE5_INTRINSIC))] | |
1901 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" | |
1902 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1903 [(set_attr "type" "ssemuladd") | 1958 [(set_attr "type" "ssemuladd") |
1904 (set_attr "mode" "<MODE>")]) | 1959 (set_attr "mode" "<MODE>")]) |
1905 | 1960 |
1906 (define_expand "sse5i_fmsub<mode>4" | 1961 (define_insn "fma4i_fmsub<mode>4" |
1907 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | 1962 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
1908 (unspec:SSEMODEF2P | 1963 (unspec:SSEMODEF2P |
1909 [(minus:SSEMODEF2P | 1964 [(minus:SSEMODEF2P |
1910 (mult:SSEMODEF2P | 1965 (mult:SSEMODEF2P |
1911 (match_operand:SSEMODEF2P 1 "register_operand" "") | 1966 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
1912 (match_operand:SSEMODEF2P 2 "register_operand" "")) | 1967 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
1913 (match_operand:SSEMODEF2P 3 "register_operand" ""))] | 1968 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] |
1914 UNSPEC_SSE5_INTRINSIC))] | 1969 UNSPEC_FMA4_INTRINSIC))] |
1915 "TARGET_SSE5" | 1970 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1916 { | 1971 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1917 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | 1972 [(set_attr "type" "ssemuladd") |
1918 if (TARGET_FUSED_MADD) | 1973 (set_attr "mode" "<MODE>")]) |
1919 { | 1974 |
1920 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1], | 1975 (define_insn "fma4i_fnmadd<mode>4" |
1921 operands[2], operands[3])); | 1976 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
1922 DONE; | |
1923 } | |
1924 }) | |
1925 | |
1926 (define_insn "*sse5i_fmsub<mode>4" | |
1927 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") | |
1928 (unspec:SSEMODEF2P | 1977 (unspec:SSEMODEF2P |
1929 [(minus:SSEMODEF2P | 1978 [(minus:SSEMODEF2P |
1979 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") | |
1930 (mult:SSEMODEF2P | 1980 (mult:SSEMODEF2P |
1931 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") | 1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
1932 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) | 1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))] |
1933 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] | 1983 UNSPEC_FMA4_INTRINSIC))] |
1934 UNSPEC_SSE5_INTRINSIC))] | 1984 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" | 1985 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1936 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1937 [(set_attr "type" "ssemuladd") | 1986 [(set_attr "type" "ssemuladd") |
1938 (set_attr "mode" "<MODE>")]) | 1987 (set_attr "mode" "<MODE>")]) |
1939 | 1988 |
1940 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) | 1989 (define_insn "fma4i_fnmsub<mode>4" |
1941 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p | 1990 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
1942 (define_expand "sse5i_fnmadd<mode>4" | |
1943 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
1944 (unspec:SSEMODEF2P | |
1945 [(minus:SSEMODEF2P | |
1946 (match_operand:SSEMODEF2P 3 "register_operand" "") | |
1947 (mult:SSEMODEF2P | |
1948 (match_operand:SSEMODEF2P 1 "register_operand" "") | |
1949 (match_operand:SSEMODEF2P 2 "register_operand" "")))] | |
1950 UNSPEC_SSE5_INTRINSIC))] | |
1951 "TARGET_SSE5" | |
1952 { | |
1953 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | |
1954 if (TARGET_FUSED_MADD) | |
1955 { | |
1956 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1], | |
1957 operands[2], operands[3])); | |
1958 DONE; | |
1959 } | |
1960 }) | |
1961 | |
1962 (define_insn "*sse5i_fnmadd<mode>4" | |
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") | |
1964 (unspec:SSEMODEF2P | |
1965 [(minus:SSEMODEF2P | |
1966 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0") | |
1967 (mult:SSEMODEF2P | |
1968 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") | |
1969 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))] | |
1970 UNSPEC_SSE5_INTRINSIC))] | |
1971 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" | |
1972 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
1973 [(set_attr "type" "ssemuladd") | |
1974 (set_attr "mode" "<MODE>")]) | |
1975 | |
1976 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c | |
1977 (define_expand "sse5i_fnmsub<mode>4" | |
1978 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
1979 (unspec:SSEMODEF2P | 1991 (unspec:SSEMODEF2P |
1980 [(minus:SSEMODEF2P | 1992 [(minus:SSEMODEF2P |
1981 (mult:SSEMODEF2P | 1993 (mult:SSEMODEF2P |
1982 (neg:SSEMODEF2P | 1994 (neg:SSEMODEF2P |
1983 (match_operand:SSEMODEF2P 1 "register_operand" "")) | 1995 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) |
1984 (match_operand:SSEMODEF2P 2 "register_operand" "")) | 1996 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
1985 (match_operand:SSEMODEF2P 3 "register_operand" ""))] | 1997 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))] |
1986 UNSPEC_SSE5_INTRINSIC))] | 1998 UNSPEC_FMA4_INTRINSIC))] |
1987 "TARGET_SSE5" | 1999 "TARGET_FMA4 && TARGET_FUSED_MADD" |
1988 { | 2000 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
1989 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | |
1990 if (TARGET_FUSED_MADD) | |
1991 { | |
1992 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1], | |
1993 operands[2], operands[3])); | |
1994 DONE; | |
1995 } | |
1996 }) | |
1997 | |
1998 (define_insn "*sse5i_fnmsub<mode>4" | |
1999 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") | |
2000 (unspec:SSEMODEF2P | |
2001 [(minus:SSEMODEF2P | |
2002 (mult:SSEMODEF2P | |
2003 (neg:SSEMODEF2P | |
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")) | |
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) | |
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] | |
2007 UNSPEC_SSE5_INTRINSIC))] | |
2008 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | |
2009 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2010 [(set_attr "type" "ssemuladd") | 2001 [(set_attr "type" "ssemuladd") |
2011 (set_attr "mode" "<MODE>")]) | 2002 (set_attr "mode" "<MODE>")]) |
2012 | 2003 |
2013 ;; Scalar instructions | |
2014 (define_expand "sse5i_vmfmadd<mode>4" | |
2015 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
2016 (unspec:SSEMODEF2P | |
2017 [(vec_merge:SSEMODEF2P | |
2018 (plus:SSEMODEF2P | |
2019 (mult:SSEMODEF2P | |
2020 (match_operand:SSEMODEF2P 1 "register_operand" "") | |
2021 (match_operand:SSEMODEF2P 2 "register_operand" "")) | |
2022 (match_operand:SSEMODEF2P 3 "register_operand" "")) | |
2023 (match_dup 1) | |
2024 (const_int 0))] | |
2025 UNSPEC_SSE5_INTRINSIC))] | |
2026 "TARGET_SSE5" | |
2027 { | |
2028 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | |
2029 if (TARGET_FUSED_MADD) | |
2030 { | |
2031 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1], | |
2032 operands[2], operands[3])); | |
2033 DONE; | |
2034 } | |
2035 }) | |
2036 | |
2037 ;; For the scalar operations, use operand1 for the upper words that aren't | 2004 ;; For the scalar operations, use operand1 for the upper words that aren't |
2038 ;; modified, so restrict the forms that are accepted. | 2005 ;; modified, so restrict the forms that are accepted. |
2039 (define_insn "*sse5i_vmfmadd<mode>4" | 2006 (define_insn "fma4i_vmfmadd<mode>4" |
2040 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 2007 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
2041 (unspec:SSEMODEF2P | 2008 (unspec:SSEMODEF2P |
2042 [(vec_merge:SSEMODEF2P | 2009 [(vec_merge:SSEMODEF2P |
2043 (plus:SSEMODEF2P | 2010 (plus:SSEMODEF2P |
2044 (mult:SSEMODEF2P | 2011 (mult:SSEMODEF2P |
2045 (match_operand:SSEMODEF2P 1 "register_operand" "0,0") | 2012 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
2046 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) | 2013 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
2047 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) | 2014 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) |
2048 (match_dup 0) | 2015 (match_dup 0) |
2049 (const_int 0))] | 2016 (const_int 1))] |
2050 UNSPEC_SSE5_INTRINSIC))] | 2017 UNSPEC_FMA4_INTRINSIC))] |
2051 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 2018 "TARGET_FMA4 && TARGET_FUSED_MADD" |
2052 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 2019 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
2053 [(set_attr "type" "ssemuladd") | 2020 [(set_attr "type" "ssemuladd") |
2054 (set_attr "mode" "<ssescalarmode>")]) | 2021 (set_attr "mode" "<ssescalarmode>")]) |
2055 | 2022 |
2056 (define_expand "sse5i_vmfmsub<mode>4" | 2023 (define_insn "fma4i_vmfmsub<mode>4" |
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
2058 (unspec:SSEMODEF2P | |
2059 [(vec_merge:SSEMODEF2P | |
2060 (minus:SSEMODEF2P | |
2061 (mult:SSEMODEF2P | |
2062 (match_operand:SSEMODEF2P 1 "register_operand" "") | |
2063 (match_operand:SSEMODEF2P 2 "register_operand" "")) | |
2064 (match_operand:SSEMODEF2P 3 "register_operand" "")) | |
2065 (match_dup 0) | |
2066 (const_int 1))] | |
2067 UNSPEC_SSE5_INTRINSIC))] | |
2068 "TARGET_SSE5" | |
2069 { | |
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | |
2071 if (TARGET_FUSED_MADD) | |
2072 { | |
2073 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1], | |
2074 operands[2], operands[3])); | |
2075 DONE; | |
2076 } | |
2077 }) | |
2078 | |
2079 (define_insn "*sse5i_vmfmsub<mode>4" | |
2080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 2024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
2081 (unspec:SSEMODEF2P | 2025 (unspec:SSEMODEF2P |
2082 [(vec_merge:SSEMODEF2P | 2026 [(vec_merge:SSEMODEF2P |
2083 (minus:SSEMODEF2P | 2027 (minus:SSEMODEF2P |
2084 (mult:SSEMODEF2P | 2028 (mult:SSEMODEF2P |
2085 (match_operand:SSEMODEF2P 1 "register_operand" "0,0") | 2029 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) | 2030 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
2087 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) | 2031 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) |
2088 (match_dup 1) | 2032 (match_dup 0) |
2089 (const_int 1))] | 2033 (const_int 1))] |
2090 UNSPEC_SSE5_INTRINSIC))] | 2034 UNSPEC_FMA4_INTRINSIC))] |
2091 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 2035 "TARGET_FMA4 && TARGET_FUSED_MADD" |
2092 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 2036 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
2093 [(set_attr "type" "ssemuladd") | 2037 [(set_attr "type" "ssemuladd") |
2094 (set_attr "mode" "<ssescalarmode>")]) | 2038 (set_attr "mode" "<ssescalarmode>")]) |
2095 | 2039 |
2096 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p | 2040 (define_insn "fma4i_vmfnmadd<mode>4" |
2097 (define_expand "sse5i_vmfnmadd<mode>4" | |
2098 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
2099 (unspec:SSEMODEF2P | |
2100 [(vec_merge:SSEMODEF2P | |
2101 (minus:SSEMODEF2P | |
2102 (match_operand:SSEMODEF2P 3 "register_operand" "") | |
2103 (mult:SSEMODEF2P | |
2104 (match_operand:SSEMODEF2P 1 "register_operand" "") | |
2105 (match_operand:SSEMODEF2P 2 "register_operand" ""))) | |
2106 (match_dup 1) | |
2107 (const_int 1))] | |
2108 UNSPEC_SSE5_INTRINSIC))] | |
2109 "TARGET_SSE5" | |
2110 { | |
2111 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | |
2112 if (TARGET_FUSED_MADD) | |
2113 { | |
2114 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1], | |
2115 operands[2], operands[3])); | |
2116 DONE; | |
2117 } | |
2118 }) | |
2119 | |
2120 (define_insn "*sse5i_vmfnmadd<mode>4" | |
2121 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 2041 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
2122 (unspec:SSEMODEF2P | 2042 (unspec:SSEMODEF2P |
2123 [(vec_merge:SSEMODEF2P | 2043 [(vec_merge:SSEMODEF2P |
2124 (minus:SSEMODEF2P | 2044 (minus:SSEMODEF2P |
2125 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") | 2045 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") |
2126 (mult:SSEMODEF2P | 2046 (mult:SSEMODEF2P |
2127 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0") | 2047 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x") |
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) | 2048 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))) |
2129 (match_dup 1) | 2049 (match_dup 0) |
2130 (const_int 1))] | 2050 (const_int 1))] |
2131 UNSPEC_SSE5_INTRINSIC))] | 2051 UNSPEC_FMA4_INTRINSIC))] |
2132 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" | 2052 "TARGET_FMA4 && TARGET_FUSED_MADD" |
2133 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 2053 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
2134 [(set_attr "type" "ssemuladd") | 2054 [(set_attr "type" "ssemuladd") |
2135 (set_attr "mode" "<ssescalarmode>")]) | 2055 (set_attr "mode" "<ssescalarmode>")]) |
2136 | 2056 |
2137 (define_expand "sse5i_vmfnmsub<mode>4" | 2057 (define_insn "fma4i_vmfnmsub<mode>4" |
2138 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
2139 (unspec:SSEMODEF2P | |
2140 [(vec_merge:SSEMODEF2P | |
2141 (minus:SSEMODEF2P | |
2142 (mult:SSEMODEF2P | |
2143 (neg:SSEMODEF2P | |
2144 (match_operand:SSEMODEF2P 1 "register_operand" "")) | |
2145 (match_operand:SSEMODEF2P 2 "register_operand" "")) | |
2146 (match_operand:SSEMODEF2P 3 "register_operand" "")) | |
2147 (match_dup 1) | |
2148 (const_int 1))] | |
2149 UNSPEC_SSE5_INTRINSIC))] | |
2150 "TARGET_SSE5" | |
2151 { | |
2152 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ | |
2153 if (TARGET_FUSED_MADD) | |
2154 { | |
2155 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1], | |
2156 operands[2], operands[3])); | |
2157 DONE; | |
2158 } | |
2159 }) | |
2160 | |
2161 (define_insn "*sse5i_vmfnmsub<mode>4" | |
2162 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") | 2058 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") |
2163 (unspec:SSEMODEF2P | 2059 (unspec:SSEMODEF2P |
2164 [(vec_merge:SSEMODEF2P | 2060 [(vec_merge:SSEMODEF2P |
2165 (minus:SSEMODEF2P | 2061 (minus:SSEMODEF2P |
2166 (mult:SSEMODEF2P | 2062 (mult:SSEMODEF2P |
2167 (neg:SSEMODEF2P | 2063 (neg:SSEMODEF2P |
2168 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")) | 2064 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")) |
2169 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) | 2065 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")) |
2170 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) | 2066 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) |
2171 (match_dup 1) | 2067 (match_dup 0) |
2172 (const_int 1))] | 2068 (const_int 1))] |
2173 UNSPEC_SSE5_INTRINSIC))] | 2069 UNSPEC_FMA4_INTRINSIC))] |
2174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 2070 "TARGET_FMA4 && TARGET_FUSED_MADD" |
2175 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 2071 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
2176 [(set_attr "type" "ssemuladd") | 2072 [(set_attr "type" "ssemuladd") |
2177 (set_attr "mode" "<ssescalarmode>")]) | 2073 (set_attr "mode" "<ssescalarmode>")]) |
2074 | |
2075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
2076 ;; | |
2077 ;; FMA4 Parallel floating point multiply addsub and subadd operations. | |
2078 ;; | |
2079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
2080 | |
2081 (define_insn "fma4_fmaddsubv8sf4" | |
2082 [(set (match_operand:V8SF 0 "register_operand" "=x,x") | |
2083 (vec_merge:V8SF | |
2084 (plus:V8SF | |
2085 (mult:V8SF | |
2086 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") | |
2087 (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) | |
2088 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) | |
2089 (minus:V8SF | |
2090 (mult:V8SF | |
2091 (match_dup 1) | |
2092 (match_dup 2)) | |
2093 (match_dup 3)) | |
2094 (const_int 170)))] | |
2095 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2096 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2097 [(set_attr "type" "ssemuladd") | |
2098 (set_attr "mode" "V8SF")]) | |
2099 | |
2100 (define_insn "fma4_fmaddsubv4df4" | |
2101 [(set (match_operand:V4DF 0 "register_operand" "=x,x") | |
2102 (vec_merge:V4DF | |
2103 (plus:V4DF | |
2104 (mult:V4DF | |
2105 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") | |
2106 (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) | |
2107 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) | |
2108 (minus:V4DF | |
2109 (mult:V4DF | |
2110 (match_dup 1) | |
2111 (match_dup 2)) | |
2112 (match_dup 3)) | |
2113 (const_int 10)))] | |
2114 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2115 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2116 [(set_attr "type" "ssemuladd") | |
2117 (set_attr "mode" "V4DF")]) | |
2118 | |
2119 (define_insn "fma4_fmaddsubv4sf4" | |
2120 [(set (match_operand:V4SF 0 "register_operand" "=x,x") | |
2121 (vec_merge:V4SF | |
2122 (plus:V4SF | |
2123 (mult:V4SF | |
2124 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") | |
2125 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) | |
2126 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) | |
2127 (minus:V4SF | |
2128 (mult:V4SF | |
2129 (match_dup 1) | |
2130 (match_dup 2)) | |
2131 (match_dup 3)) | |
2132 (const_int 10)))] | |
2133 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2134 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2135 [(set_attr "type" "ssemuladd") | |
2136 (set_attr "mode" "V4SF")]) | |
2137 | |
2138 (define_insn "fma4_fmaddsubv2df4" | |
2139 [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2140 (vec_merge:V2DF | |
2141 (plus:V2DF | |
2142 (mult:V2DF | |
2143 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") | |
2144 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) | |
2145 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) | |
2146 (minus:V2DF | |
2147 (mult:V2DF | |
2148 (match_dup 1) | |
2149 (match_dup 2)) | |
2150 (match_dup 3)) | |
2151 (const_int 2)))] | |
2152 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2153 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2154 [(set_attr "type" "ssemuladd") | |
2155 (set_attr "mode" "V2DF")]) | |
2156 | |
2157 (define_insn "fma4_fmsubaddv8sf4" | |
2158 [(set (match_operand:V8SF 0 "register_operand" "=x,x") | |
2159 (vec_merge:V8SF | |
2160 (plus:V8SF | |
2161 (mult:V8SF | |
2162 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") | |
2163 (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) | |
2164 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) | |
2165 (minus:V8SF | |
2166 (mult:V8SF | |
2167 (match_dup 1) | |
2168 (match_dup 2)) | |
2169 (match_dup 3)) | |
2170 (const_int 85)))] | |
2171 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2172 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2173 [(set_attr "type" "ssemuladd") | |
2174 (set_attr "mode" "V8SF")]) | |
2175 | |
2176 (define_insn "fma4_fmsubaddv4df4" | |
2177 [(set (match_operand:V4DF 0 "register_operand" "=x,x") | |
2178 (vec_merge:V4DF | |
2179 (plus:V4DF | |
2180 (mult:V4DF | |
2181 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") | |
2182 (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) | |
2183 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) | |
2184 (minus:V4DF | |
2185 (mult:V4DF | |
2186 (match_dup 1) | |
2187 (match_dup 2)) | |
2188 (match_dup 3)) | |
2189 (const_int 5)))] | |
2190 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2191 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2192 [(set_attr "type" "ssemuladd") | |
2193 (set_attr "mode" "V4DF")]) | |
2194 | |
2195 (define_insn "fma4_fmsubaddv4sf4" | |
2196 [(set (match_operand:V4SF 0 "register_operand" "=x,x") | |
2197 (vec_merge:V4SF | |
2198 (plus:V4SF | |
2199 (mult:V4SF | |
2200 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") | |
2201 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) | |
2202 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) | |
2203 (minus:V4SF | |
2204 (mult:V4SF | |
2205 (match_dup 1) | |
2206 (match_dup 2)) | |
2207 (match_dup 3)) | |
2208 (const_int 5)))] | |
2209 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2210 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2211 [(set_attr "type" "ssemuladd") | |
2212 (set_attr "mode" "V4SF")]) | |
2213 | |
2214 (define_insn "fma4_fmsubaddv2df4" | |
2215 [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2216 (vec_merge:V2DF | |
2217 (plus:V2DF | |
2218 (mult:V2DF | |
2219 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") | |
2220 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) | |
2221 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) | |
2222 (minus:V2DF | |
2223 (mult:V2DF | |
2224 (match_dup 1) | |
2225 (match_dup 2)) | |
2226 (match_dup 3)) | |
2227 (const_int 1)))] | |
2228 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2229 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2230 [(set_attr "type" "ssemuladd") | |
2231 (set_attr "mode" "V2DF")]) | |
2232 | |
2233 (define_insn "fma4i_fmaddsubv8sf4" | |
2234 [(set (match_operand:V8SF 0 "register_operand" "=x,x") | |
2235 (unspec:V8SF | |
2236 [(vec_merge:V8SF | |
2237 (plus:V8SF | |
2238 (mult:V8SF | |
2239 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") | |
2240 (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) | |
2241 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) | |
2242 (minus:V8SF | |
2243 (mult:V8SF | |
2244 (match_dup 1) | |
2245 (match_dup 2)) | |
2246 (match_dup 3)) | |
2247 (const_int 170))] | |
2248 UNSPEC_FMA4_INTRINSIC))] | |
2249 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2250 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2251 [(set_attr "type" "ssemuladd") | |
2252 (set_attr "mode" "V8SF")]) | |
2253 | |
2254 (define_insn "fma4i_fmaddsubv4df4" | |
2255 [(set (match_operand:V4DF 0 "register_operand" "=x,x") | |
2256 (unspec:V4DF | |
2257 [(vec_merge:V4DF | |
2258 (plus:V4DF | |
2259 (mult:V4DF | |
2260 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") | |
2261 (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) | |
2262 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) | |
2263 (minus:V4DF | |
2264 (mult:V4DF | |
2265 (match_dup 1) | |
2266 (match_dup 2)) | |
2267 (match_dup 3)) | |
2268 (const_int 10))] | |
2269 UNSPEC_FMA4_INTRINSIC))] | |
2270 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2271 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2272 [(set_attr "type" "ssemuladd") | |
2273 (set_attr "mode" "V4DF")]) | |
2274 | |
2275 (define_insn "fma4i_fmaddsubv4sf4" | |
2276 [(set (match_operand:V4SF 0 "register_operand" "=x,x") | |
2277 (unspec:V4SF | |
2278 [(vec_merge:V4SF | |
2279 (plus:V4SF | |
2280 (mult:V4SF | |
2281 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") | |
2282 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) | |
2283 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) | |
2284 (minus:V4SF | |
2285 (mult:V4SF | |
2286 (match_dup 1) | |
2287 (match_dup 2)) | |
2288 (match_dup 3)) | |
2289 (const_int 10))] | |
2290 UNSPEC_FMA4_INTRINSIC))] | |
2291 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2292 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2293 [(set_attr "type" "ssemuladd") | |
2294 (set_attr "mode" "V4SF")]) | |
2295 | |
2296 (define_insn "fma4i_fmaddsubv2df4" | |
2297 [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2298 (unspec:V2DF | |
2299 [(vec_merge:V2DF | |
2300 (plus:V2DF | |
2301 (mult:V2DF | |
2302 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") | |
2303 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) | |
2304 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) | |
2305 (minus:V2DF | |
2306 (mult:V2DF | |
2307 (match_dup 1) | |
2308 (match_dup 2)) | |
2309 (match_dup 3)) | |
2310 (const_int 2))] | |
2311 UNSPEC_FMA4_INTRINSIC))] | |
2312 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2313 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2314 [(set_attr "type" "ssemuladd") | |
2315 (set_attr "mode" "V2DF")]) | |
2316 | |
2317 (define_insn "fma4i_fmsubaddv8sf4" | |
2318 [(set (match_operand:V8SF 0 "register_operand" "=x,x") | |
2319 (unspec:V8SF | |
2320 [(vec_merge:V8SF | |
2321 (plus:V8SF | |
2322 (mult:V8SF | |
2323 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x") | |
2324 (match_operand:V8SF 2 "nonimmediate_operand" "x,m")) | |
2325 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x")) | |
2326 (minus:V8SF | |
2327 (mult:V8SF | |
2328 (match_dup 1) | |
2329 (match_dup 2)) | |
2330 (match_dup 3)) | |
2331 (const_int 85))] | |
2332 UNSPEC_FMA4_INTRINSIC))] | |
2333 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2334 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2335 [(set_attr "type" "ssemuladd") | |
2336 (set_attr "mode" "V8SF")]) | |
2337 | |
2338 (define_insn "fma4i_fmsubaddv4df4" | |
2339 [(set (match_operand:V4DF 0 "register_operand" "=x,x") | |
2340 (unspec:V4DF | |
2341 [(vec_merge:V4DF | |
2342 (plus:V4DF | |
2343 (mult:V4DF | |
2344 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x") | |
2345 (match_operand:V4DF 2 "nonimmediate_operand" "x,m")) | |
2346 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x")) | |
2347 (minus:V4DF | |
2348 (mult:V4DF | |
2349 (match_dup 1) | |
2350 (match_dup 2)) | |
2351 (match_dup 3)) | |
2352 (const_int 5))] | |
2353 UNSPEC_FMA4_INTRINSIC))] | |
2354 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2355 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2356 [(set_attr "type" "ssemuladd") | |
2357 (set_attr "mode" "V4DF")]) | |
2358 | |
2359 (define_insn "fma4i_fmsubaddv4sf4" | |
2360 [(set (match_operand:V4SF 0 "register_operand" "=x,x") | |
2361 (unspec:V4SF | |
2362 [(vec_merge:V4SF | |
2363 (plus:V4SF | |
2364 (mult:V4SF | |
2365 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x") | |
2366 (match_operand:V4SF 2 "nonimmediate_operand" "x,m")) | |
2367 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x")) | |
2368 (minus:V4SF | |
2369 (mult:V4SF | |
2370 (match_dup 1) | |
2371 (match_dup 2)) | |
2372 (match_dup 3)) | |
2373 (const_int 5))] | |
2374 UNSPEC_FMA4_INTRINSIC))] | |
2375 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2376 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2377 [(set_attr "type" "ssemuladd") | |
2378 (set_attr "mode" "V4SF")]) | |
2379 | |
2380 (define_insn "fma4i_fmsubaddv2df4" | |
2381 [(set (match_operand:V2DF 0 "register_operand" "=x,x") | |
2382 (unspec:V2DF | |
2383 [(vec_merge:V2DF | |
2384 (plus:V2DF | |
2385 (mult:V2DF | |
2386 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x") | |
2387 (match_operand:V2DF 2 "nonimmediate_operand" "x,m")) | |
2388 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x")) | |
2389 (minus:V2DF | |
2390 (mult:V2DF | |
2391 (match_dup 1) | |
2392 (match_dup 2)) | |
2393 (match_dup 3)) | |
2394 (const_int 1))] | |
2395 UNSPEC_FMA4_INTRINSIC))] | |
2396 "TARGET_FMA4 && TARGET_FUSED_MADD" | |
2397 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
2398 [(set_attr "type" "ssemuladd") | |
2399 (set_attr "mode" "V2DF")]) | |
2178 | 2400 |
2179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 2401 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2180 ;; | 2402 ;; |
2181 ;; Parallel single-precision floating point conversion operations | 2403 ;; Parallel single-precision floating point conversion operations |
2182 ;; | 2404 ;; |
2213 (parallel [(const_int 0) (const_int 1)])))] | 2435 (parallel [(const_int 0) (const_int 1)])))] |
2214 "TARGET_SSE" | 2436 "TARGET_SSE" |
2215 "cvttps2pi\t{%1, %0|%0, %1}" | 2437 "cvttps2pi\t{%1, %0|%0, %1}" |
2216 [(set_attr "type" "ssecvt") | 2438 [(set_attr "type" "ssecvt") |
2217 (set_attr "unit" "mmx") | 2439 (set_attr "unit" "mmx") |
2440 (set_attr "prefix_rep" "0") | |
2218 (set_attr "mode" "SF")]) | 2441 (set_attr "mode" "SF")]) |
2219 | 2442 |
2220 (define_insn "*avx_cvtsi2ss" | 2443 (define_insn "*avx_cvtsi2ss" |
2221 [(set (match_operand:V4SF 0 "register_operand" "=x") | 2444 [(set (match_operand:V4SF 0 "register_operand" "=x") |
2222 (vec_merge:V4SF | 2445 (vec_merge:V4SF |
2252 (match_operand:V4SF 1 "register_operand" "x") | 2475 (match_operand:V4SF 1 "register_operand" "x") |
2253 (const_int 1)))] | 2476 (const_int 1)))] |
2254 "TARGET_AVX && TARGET_64BIT" | 2477 "TARGET_AVX && TARGET_64BIT" |
2255 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" | 2478 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" |
2256 [(set_attr "type" "sseicvt") | 2479 [(set_attr "type" "sseicvt") |
2480 (set_attr "length_vex" "4") | |
2257 (set_attr "prefix" "vex") | 2481 (set_attr "prefix" "vex") |
2258 (set_attr "mode" "SF")]) | 2482 (set_attr "mode" "SF")]) |
2259 | 2483 |
2260 (define_insn "sse_cvtsi2ssq" | 2484 (define_insn "sse_cvtsi2ssq" |
2261 [(set (match_operand:V4SF 0 "register_operand" "=x,x") | 2485 [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
2265 (match_operand:V4SF 1 "register_operand" "0,0") | 2489 (match_operand:V4SF 1 "register_operand" "0,0") |
2266 (const_int 1)))] | 2490 (const_int 1)))] |
2267 "TARGET_SSE && TARGET_64BIT" | 2491 "TARGET_SSE && TARGET_64BIT" |
2268 "cvtsi2ssq\t{%2, %0|%0, %2}" | 2492 "cvtsi2ssq\t{%2, %0|%0, %2}" |
2269 [(set_attr "type" "sseicvt") | 2493 [(set_attr "type" "sseicvt") |
2494 (set_attr "prefix_rex" "1") | |
2270 (set_attr "athlon_decode" "vector,double") | 2495 (set_attr "athlon_decode" "vector,double") |
2271 (set_attr "amdfam10_decode" "vector,double") | 2496 (set_attr "amdfam10_decode" "vector,double") |
2272 (set_attr "mode" "SF")]) | 2497 (set_attr "mode" "SF")]) |
2273 | 2498 |
2274 (define_insn "sse_cvtss2si" | 2499 (define_insn "sse_cvtss2si" |
2373 "TARGET_SSE2" | 2598 "TARGET_SSE2" |
2374 "cvtdq2ps\t{%1, %0|%0, %1}" | 2599 "cvtdq2ps\t{%1, %0|%0, %1}" |
2375 [(set_attr "type" "ssecvt") | 2600 [(set_attr "type" "ssecvt") |
2376 (set_attr "mode" "V4SF")]) | 2601 (set_attr "mode" "V4SF")]) |
2377 | 2602 |
2603 (define_expand "sse2_cvtudq2ps" | |
2604 [(set (match_dup 5) | |
2605 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" ""))) | |
2606 (set (match_dup 6) | |
2607 (lt:V4SF (match_dup 5) (match_dup 3))) | |
2608 (set (match_dup 7) | |
2609 (and:V4SF (match_dup 6) (match_dup 4))) | |
2610 (set (match_operand:V4SF 0 "register_operand" "") | |
2611 (plus:V4SF (match_dup 5) (match_dup 7)))] | |
2612 "TARGET_SSE2" | |
2613 { | |
2614 REAL_VALUE_TYPE TWO32r; | |
2615 rtx x; | |
2616 int i; | |
2617 | |
2618 real_ldexp (&TWO32r, &dconst1, 32); | |
2619 x = const_double_from_real_value (TWO32r, SFmode); | |
2620 | |
2621 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode)); | |
2622 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x)); | |
2623 | |
2624 for (i = 5; i < 8; i++) | |
2625 operands[i] = gen_reg_rtx (V4SFmode); | |
2626 }) | |
2627 | |
2378 (define_insn "avx_cvtps2dq<avxmodesuffix>" | 2628 (define_insn "avx_cvtps2dq<avxmodesuffix>" |
2379 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x") | 2629 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x") |
2380 (unspec:AVXMODEDCVTPS2DQ | 2630 (unspec:AVXMODEDCVTPS2DQ |
2381 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")] | 2631 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")] |
2382 UNSPEC_FIX_NOTRUNC))] | 2632 UNSPEC_FIX_NOTRUNC))] |
2411 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] | 2661 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] |
2412 "TARGET_SSE2" | 2662 "TARGET_SSE2" |
2413 "cvttps2dq\t{%1, %0|%0, %1}" | 2663 "cvttps2dq\t{%1, %0|%0, %1}" |
2414 [(set_attr "type" "ssecvt") | 2664 [(set_attr "type" "ssecvt") |
2415 (set_attr "prefix_rep" "1") | 2665 (set_attr "prefix_rep" "1") |
2666 (set_attr "prefix_data16" "0") | |
2416 (set_attr "mode" "TI")]) | 2667 (set_attr "mode" "TI")]) |
2417 | 2668 |
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 2669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2419 ;; | 2670 ;; |
2420 ;; Parallel double-precision floating point conversion operations | 2671 ;; Parallel double-precision floating point conversion operations |
2426 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] | 2677 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] |
2427 "TARGET_SSE2" | 2678 "TARGET_SSE2" |
2428 "cvtpi2pd\t{%1, %0|%0, %1}" | 2679 "cvtpi2pd\t{%1, %0|%0, %1}" |
2429 [(set_attr "type" "ssecvt") | 2680 [(set_attr "type" "ssecvt") |
2430 (set_attr "unit" "mmx,*") | 2681 (set_attr "unit" "mmx,*") |
2682 (set_attr "prefix_data16" "1,*") | |
2431 (set_attr "mode" "V2DF")]) | 2683 (set_attr "mode" "V2DF")]) |
2432 | 2684 |
2433 (define_insn "sse2_cvtpd2pi" | 2685 (define_insn "sse2_cvtpd2pi" |
2434 [(set (match_operand:V2SI 0 "register_operand" "=y") | 2686 [(set (match_operand:V2SI 0 "register_operand" "=y") |
2435 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] | 2687 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] |
2486 (match_operand:V2DF 1 "register_operand" "x") | 2738 (match_operand:V2DF 1 "register_operand" "x") |
2487 (const_int 1)))] | 2739 (const_int 1)))] |
2488 "TARGET_AVX && TARGET_64BIT" | 2740 "TARGET_AVX && TARGET_64BIT" |
2489 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" | 2741 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" |
2490 [(set_attr "type" "sseicvt") | 2742 [(set_attr "type" "sseicvt") |
2743 (set_attr "length_vex" "4") | |
2491 (set_attr "prefix" "vex") | 2744 (set_attr "prefix" "vex") |
2492 (set_attr "mode" "DF")]) | 2745 (set_attr "mode" "DF")]) |
2493 | 2746 |
2494 (define_insn "sse2_cvtsi2sdq" | 2747 (define_insn "sse2_cvtsi2sdq" |
2495 [(set (match_operand:V2DF 0 "register_operand" "=x,x") | 2748 [(set (match_operand:V2DF 0 "register_operand" "=x,x") |
2499 (match_operand:V2DF 1 "register_operand" "0,0") | 2752 (match_operand:V2DF 1 "register_operand" "0,0") |
2500 (const_int 1)))] | 2753 (const_int 1)))] |
2501 "TARGET_SSE2 && TARGET_64BIT" | 2754 "TARGET_SSE2 && TARGET_64BIT" |
2502 "cvtsi2sdq\t{%2, %0|%0, %2}" | 2755 "cvtsi2sdq\t{%2, %0|%0, %2}" |
2503 [(set_attr "type" "sseicvt") | 2756 [(set_attr "type" "sseicvt") |
2757 (set_attr "prefix_rex" "1") | |
2504 (set_attr "mode" "DF") | 2758 (set_attr "mode" "DF") |
2505 (set_attr "athlon_decode" "double,direct") | 2759 (set_attr "athlon_decode" "double,direct") |
2506 (set_attr "amdfam10_decode" "vector,double")]) | 2760 (set_attr "amdfam10_decode" "vector,double")]) |
2507 | 2761 |
2508 (define_insn "sse2_cvtsd2si" | 2762 (define_insn "sse2_cvtsd2si" |
2640 "TARGET_SSE2" | 2894 "TARGET_SSE2" |
2641 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" | 2895 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" |
2642 : \"cvtpd2dq\t{%1, %0|%0, %1}\";" | 2896 : \"cvtpd2dq\t{%1, %0|%0, %1}\";" |
2643 [(set_attr "type" "ssecvt") | 2897 [(set_attr "type" "ssecvt") |
2644 (set_attr "prefix_rep" "1") | 2898 (set_attr "prefix_rep" "1") |
2899 (set_attr "prefix_data16" "0") | |
2645 (set_attr "prefix" "maybe_vex") | 2900 (set_attr "prefix" "maybe_vex") |
2646 (set_attr "mode" "TI") | 2901 (set_attr "mode" "TI") |
2647 (set_attr "amdfam10_decode" "double")]) | 2902 (set_attr "amdfam10_decode" "double")]) |
2648 | 2903 |
2649 (define_insn "avx_cvttpd2dq256" | 2904 (define_insn "avx_cvttpd2dq256" |
2670 (match_operand:V2SI 2 "const0_operand" "")))] | 2925 (match_operand:V2SI 2 "const0_operand" "")))] |
2671 "TARGET_SSE2" | 2926 "TARGET_SSE2" |
2672 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" | 2927 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" |
2673 : \"cvttpd2dq\t{%1, %0|%0, %1}\";" | 2928 : \"cvttpd2dq\t{%1, %0|%0, %1}\";" |
2674 [(set_attr "type" "ssecvt") | 2929 [(set_attr "type" "ssecvt") |
2675 (set_attr "prefix_rep" "1") | |
2676 (set_attr "prefix" "maybe_vex") | 2930 (set_attr "prefix" "maybe_vex") |
2677 (set_attr "mode" "TI") | 2931 (set_attr "mode" "TI") |
2678 (set_attr "amdfam10_decode" "double")]) | 2932 (set_attr "amdfam10_decode" "double")]) |
2679 | 2933 |
2680 (define_insn "*avx_cvtsd2ss" | 2934 (define_insn "*avx_cvtsd2ss" |
2789 "TARGET_SSE2" | 3043 "TARGET_SSE2" |
2790 "%vcvtps2pd\t{%1, %0|%0, %1}" | 3044 "%vcvtps2pd\t{%1, %0|%0, %1}" |
2791 [(set_attr "type" "ssecvt") | 3045 [(set_attr "type" "ssecvt") |
2792 (set_attr "prefix" "maybe_vex") | 3046 (set_attr "prefix" "maybe_vex") |
2793 (set_attr "mode" "V2DF") | 3047 (set_attr "mode" "V2DF") |
3048 (set_attr "prefix_data16" "0") | |
2794 (set_attr "amdfam10_decode" "direct")]) | 3049 (set_attr "amdfam10_decode" "direct")]) |
2795 | 3050 |
2796 (define_expand "vec_unpacks_hi_v4sf" | 3051 (define_expand "vec_unpacks_hi_v4sf" |
2797 [(set (match_dup 2) | 3052 [(set (match_dup 2) |
2798 (vec_select:V4SF | 3053 (vec_select:V4SF |
2881 (float:V2DF | 3136 (float:V2DF |
2882 (vec_select:V2SI | 3137 (vec_select:V2SI |
2883 (match_dup 2) | 3138 (match_dup 2) |
2884 (parallel [(const_int 0) (const_int 1)]))))] | 3139 (parallel [(const_int 0) (const_int 1)]))))] |
2885 "TARGET_SSE2" | 3140 "TARGET_SSE2" |
2886 { | 3141 "operands[2] = gen_reg_rtx (V4SImode);") |
2887 operands[2] = gen_reg_rtx (V4SImode); | |
2888 }) | |
2889 | 3142 |
2890 (define_expand "vec_unpacks_float_lo_v4si" | 3143 (define_expand "vec_unpacks_float_lo_v4si" |
2891 [(set (match_operand:V2DF 0 "register_operand" "") | 3144 [(set (match_operand:V2DF 0 "register_operand" "") |
2892 (float:V2DF | 3145 (float:V2DF |
2893 (vec_select:V2SI | 3146 (vec_select:V2SI |
2894 (match_operand:V4SI 1 "nonimmediate_operand" "") | 3147 (match_operand:V4SI 1 "nonimmediate_operand" "") |
2895 (parallel [(const_int 0) (const_int 1)]))))] | 3148 (parallel [(const_int 0) (const_int 1)]))))] |
2896 "TARGET_SSE2") | 3149 "TARGET_SSE2") |
2897 | 3150 |
3151 (define_expand "vec_unpacku_float_hi_v4si" | |
3152 [(set (match_dup 5) | |
3153 (vec_select:V4SI | |
3154 (match_operand:V4SI 1 "nonimmediate_operand" "") | |
3155 (parallel [(const_int 2) | |
3156 (const_int 3) | |
3157 (const_int 2) | |
3158 (const_int 3)]))) | |
3159 (set (match_dup 6) | |
3160 (float:V2DF | |
3161 (vec_select:V2SI | |
3162 (match_dup 5) | |
3163 (parallel [(const_int 0) (const_int 1)])))) | |
3164 (set (match_dup 7) | |
3165 (lt:V2DF (match_dup 6) (match_dup 3))) | |
3166 (set (match_dup 8) | |
3167 (and:V2DF (match_dup 7) (match_dup 4))) | |
3168 (set (match_operand:V2DF 0 "register_operand" "") | |
3169 (plus:V2DF (match_dup 6) (match_dup 8)))] | |
3170 "TARGET_SSE2" | |
3171 { | |
3172 REAL_VALUE_TYPE TWO32r; | |
3173 rtx x; | |
3174 int i; | |
3175 | |
3176 real_ldexp (&TWO32r, &dconst1, 32); | |
3177 x = const_double_from_real_value (TWO32r, DFmode); | |
3178 | |
3179 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); | |
3180 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x)); | |
3181 | |
3182 operands[5] = gen_reg_rtx (V4SImode); | |
3183 | |
3184 for (i = 6; i < 9; i++) | |
3185 operands[i] = gen_reg_rtx (V2DFmode); | |
3186 }) | |
3187 | |
3188 (define_expand "vec_unpacku_float_lo_v4si" | |
3189 [(set (match_dup 5) | |
3190 (float:V2DF | |
3191 (vec_select:V2SI | |
3192 (match_operand:V4SI 1 "nonimmediate_operand" "") | |
3193 (parallel [(const_int 0) (const_int 1)])))) | |
3194 (set (match_dup 6) | |
3195 (lt:V2DF (match_dup 5) (match_dup 3))) | |
3196 (set (match_dup 7) | |
3197 (and:V2DF (match_dup 6) (match_dup 4))) | |
3198 (set (match_operand:V2DF 0 "register_operand" "") | |
3199 (plus:V2DF (match_dup 5) (match_dup 7)))] | |
3200 "TARGET_SSE2" | |
3201 { | |
3202 REAL_VALUE_TYPE TWO32r; | |
3203 rtx x; | |
3204 int i; | |
3205 | |
3206 real_ldexp (&TWO32r, &dconst1, 32); | |
3207 x = const_double_from_real_value (TWO32r, DFmode); | |
3208 | |
3209 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode)); | |
3210 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x)); | |
3211 | |
3212 for (i = 5; i < 8; i++) | |
3213 operands[i] = gen_reg_rtx (V2DFmode); | |
3214 }) | |
3215 | |
2898 (define_expand "vec_pack_trunc_v2df" | 3216 (define_expand "vec_pack_trunc_v2df" |
2899 [(match_operand:V4SF 0 "register_operand" "") | 3217 [(match_operand:V4SF 0 "register_operand" "") |
2900 (match_operand:V2DF 1 "nonimmediate_operand" "") | 3218 (match_operand:V2DF 1 "nonimmediate_operand" "") |
2901 (match_operand:V2DF 2 "nonimmediate_operand" "")] | 3219 (match_operand:V2DF 2 "nonimmediate_operand" "")] |
2902 "TARGET_SSE2" | 3220 "TARGET_SSE2" |
2923 r1 = gen_reg_rtx (V4SImode); | 3241 r1 = gen_reg_rtx (V4SImode); |
2924 r2 = gen_reg_rtx (V4SImode); | 3242 r2 = gen_reg_rtx (V4SImode); |
2925 | 3243 |
2926 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); | 3244 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); |
2927 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); | 3245 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); |
2928 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), | 3246 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), |
2929 gen_lowpart (V2DImode, r1), | 3247 gen_lowpart (V2DImode, r1), |
2930 gen_lowpart (V2DImode, r2))); | 3248 gen_lowpart (V2DImode, r2))); |
2931 DONE; | 3249 DONE; |
2932 }) | 3250 }) |
2933 | 3251 |
2934 (define_expand "vec_pack_sfix_v2df" | 3252 (define_expand "vec_pack_sfix_v2df" |
2935 [(match_operand:V4SI 0 "register_operand" "") | 3253 [(match_operand:V4SI 0 "register_operand" "") |
2942 r1 = gen_reg_rtx (V4SImode); | 3260 r1 = gen_reg_rtx (V4SImode); |
2943 r2 = gen_reg_rtx (V4SImode); | 3261 r2 = gen_reg_rtx (V4SImode); |
2944 | 3262 |
2945 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); | 3263 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); |
2946 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); | 3264 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); |
2947 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), | 3265 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]), |
2948 gen_lowpart (V2DImode, r1), | 3266 gen_lowpart (V2DImode, r1), |
2949 gen_lowpart (V2DImode, r2))); | 3267 gen_lowpart (V2DImode, r2))); |
2950 DONE; | 3268 DONE; |
2951 }) | 3269 }) |
2952 | 3270 |
2953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 3271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
2954 ;; | 3272 ;; |
3054 movhps\t{%2, %0|%0, %2} | 3372 movhps\t{%2, %0|%0, %2} |
3055 movlps\t{%2, %H0|%H0, %2}" | 3373 movlps\t{%2, %H0|%H0, %2}" |
3056 [(set_attr "type" "ssemov") | 3374 [(set_attr "type" "ssemov") |
3057 (set_attr "mode" "V4SF,V2SF,V2SF")]) | 3375 (set_attr "mode" "V4SF,V2SF,V2SF")]) |
3058 | 3376 |
3377 ;; Recall that the 256-bit unpck insns only shuffle within their lanes. | |
3059 (define_insn "avx_unpckhps256" | 3378 (define_insn "avx_unpckhps256" |
3060 [(set (match_operand:V8SF 0 "register_operand" "=x") | 3379 [(set (match_operand:V8SF 0 "register_operand" "=x") |
3061 (vec_select:V8SF | 3380 (vec_select:V8SF |
3062 (vec_concat:V16SF | 3381 (vec_concat:V16SF |
3063 (match_operand:V8SF 1 "register_operand" "x") | 3382 (match_operand:V8SF 1 "register_operand" "x") |
3070 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" | 3389 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" |
3071 [(set_attr "type" "sselog") | 3390 [(set_attr "type" "sselog") |
3072 (set_attr "prefix" "vex") | 3391 (set_attr "prefix" "vex") |
3073 (set_attr "mode" "V8SF")]) | 3392 (set_attr "mode" "V8SF")]) |
3074 | 3393 |
3075 (define_insn "*avx_unpckhps" | 3394 (define_insn "*avx_interleave_highv4sf" |
3076 [(set (match_operand:V4SF 0 "register_operand" "=x") | 3395 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3077 (vec_select:V4SF | 3396 (vec_select:V4SF |
3078 (vec_concat:V8SF | 3397 (vec_concat:V8SF |
3079 (match_operand:V4SF 1 "register_operand" "x") | 3398 (match_operand:V4SF 1 "register_operand" "x") |
3080 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) | 3399 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) |
3084 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" | 3403 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" |
3085 [(set_attr "type" "sselog") | 3404 [(set_attr "type" "sselog") |
3086 (set_attr "prefix" "vex") | 3405 (set_attr "prefix" "vex") |
3087 (set_attr "mode" "V4SF")]) | 3406 (set_attr "mode" "V4SF")]) |
3088 | 3407 |
3089 (define_insn "sse_unpckhps" | 3408 (define_insn "vec_interleave_highv4sf" |
3090 [(set (match_operand:V4SF 0 "register_operand" "=x") | 3409 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3091 (vec_select:V4SF | 3410 (vec_select:V4SF |
3092 (vec_concat:V8SF | 3411 (vec_concat:V8SF |
3093 (match_operand:V4SF 1 "register_operand" "0") | 3412 (match_operand:V4SF 1 "register_operand" "0") |
3094 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) | 3413 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) |
3097 "TARGET_SSE" | 3416 "TARGET_SSE" |
3098 "unpckhps\t{%2, %0|%0, %2}" | 3417 "unpckhps\t{%2, %0|%0, %2}" |
3099 [(set_attr "type" "sselog") | 3418 [(set_attr "type" "sselog") |
3100 (set_attr "mode" "V4SF")]) | 3419 (set_attr "mode" "V4SF")]) |
3101 | 3420 |
3421 ;; Recall that the 256-bit unpck insns only shuffle within their lanes. | |
3102 (define_insn "avx_unpcklps256" | 3422 (define_insn "avx_unpcklps256" |
3103 [(set (match_operand:V8SF 0 "register_operand" "=x") | 3423 [(set (match_operand:V8SF 0 "register_operand" "=x") |
3104 (vec_select:V8SF | 3424 (vec_select:V8SF |
3105 (vec_concat:V16SF | 3425 (vec_concat:V16SF |
3106 (match_operand:V8SF 1 "register_operand" "x") | 3426 (match_operand:V8SF 1 "register_operand" "x") |
3113 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" | 3433 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" |
3114 [(set_attr "type" "sselog") | 3434 [(set_attr "type" "sselog") |
3115 (set_attr "prefix" "vex") | 3435 (set_attr "prefix" "vex") |
3116 (set_attr "mode" "V8SF")]) | 3436 (set_attr "mode" "V8SF")]) |
3117 | 3437 |
3118 (define_insn "*avx_unpcklps" | 3438 (define_insn "*avx_interleave_lowv4sf" |
3119 [(set (match_operand:V4SF 0 "register_operand" "=x") | 3439 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3120 (vec_select:V4SF | 3440 (vec_select:V4SF |
3121 (vec_concat:V8SF | 3441 (vec_concat:V8SF |
3122 (match_operand:V4SF 1 "register_operand" "x") | 3442 (match_operand:V4SF 1 "register_operand" "x") |
3123 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) | 3443 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) |
3127 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" | 3447 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" |
3128 [(set_attr "type" "sselog") | 3448 [(set_attr "type" "sselog") |
3129 (set_attr "prefix" "vex") | 3449 (set_attr "prefix" "vex") |
3130 (set_attr "mode" "V4SF")]) | 3450 (set_attr "mode" "V4SF")]) |
3131 | 3451 |
3132 (define_insn "sse_unpcklps" | 3452 (define_insn "vec_interleave_lowv4sf" |
3133 [(set (match_operand:V4SF 0 "register_operand" "=x") | 3453 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3134 (vec_select:V4SF | 3454 (vec_select:V4SF |
3135 (vec_concat:V8SF | 3455 (vec_concat:V8SF |
3136 (match_operand:V4SF 1 "register_operand" "0") | 3456 (match_operand:V4SF 1 "register_operand" "0") |
3137 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) | 3457 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) |
3259 operands[3] = GEN_INT (mask); | 3579 operands[3] = GEN_INT (mask); |
3260 | 3580 |
3261 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 3581 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
3262 } | 3582 } |
3263 [(set_attr "type" "sselog") | 3583 [(set_attr "type" "sselog") |
3584 (set_attr "length_immediate" "1") | |
3264 (set_attr "prefix" "vex") | 3585 (set_attr "prefix" "vex") |
3265 (set_attr "mode" "V8SF")]) | 3586 (set_attr "mode" "V8SF")]) |
3266 | 3587 |
3267 (define_expand "sse_shufps" | 3588 (define_expand "sse_shufps" |
3268 [(match_operand:V4SF 0 "register_operand" "") | 3589 [(match_operand:V4SF 0 "register_operand" "") |
3300 operands[3] = GEN_INT (mask); | 3621 operands[3] = GEN_INT (mask); |
3301 | 3622 |
3302 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 3623 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
3303 } | 3624 } |
3304 [(set_attr "type" "sselog") | 3625 [(set_attr "type" "sselog") |
3626 (set_attr "length_immediate" "1") | |
3305 (set_attr "prefix" "vex") | 3627 (set_attr "prefix" "vex") |
3306 (set_attr "mode" "V4SF")]) | 3628 (set_attr "mode" "V4SF")]) |
3307 | 3629 |
3308 (define_insn "sse_shufps_<mode>" | 3630 (define_insn "sse_shufps_<mode>" |
3309 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x") | 3631 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x") |
3325 operands[3] = GEN_INT (mask); | 3647 operands[3] = GEN_INT (mask); |
3326 | 3648 |
3327 return "shufps\t{%3, %2, %0|%0, %2, %3}"; | 3649 return "shufps\t{%3, %2, %0|%0, %2, %3}"; |
3328 } | 3650 } |
3329 [(set_attr "type" "sselog") | 3651 [(set_attr "type" "sselog") |
3652 (set_attr "length_immediate" "1") | |
3330 (set_attr "mode" "V4SF")]) | 3653 (set_attr "mode" "V4SF")]) |
3331 | 3654 |
3332 (define_insn "sse_storehps" | 3655 (define_insn "sse_storehps" |
3333 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") | 3656 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") |
3334 (vec_select:V2SF | 3657 (vec_select:V2SF |
3432 "@ | 3755 "@ |
3433 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} | 3756 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} |
3434 vmovlps\t{%2, %1, %0|%0, %1, %2} | 3757 vmovlps\t{%2, %1, %0|%0, %1, %2} |
3435 vmovlps\t{%2, %0|%0, %2}" | 3758 vmovlps\t{%2, %0|%0, %2}" |
3436 [(set_attr "type" "sselog,ssemov,ssemov") | 3759 [(set_attr "type" "sselog,ssemov,ssemov") |
3760 (set_attr "length_immediate" "1,*,*") | |
3437 (set_attr "prefix" "vex") | 3761 (set_attr "prefix" "vex") |
3438 (set_attr "mode" "V4SF,V2SF,V2SF")]) | 3762 (set_attr "mode" "V4SF,V2SF,V2SF")]) |
3439 | 3763 |
3440 (define_insn "sse_loadlps" | 3764 (define_insn "sse_loadlps" |
3441 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") | 3765 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") |
3448 "@ | 3772 "@ |
3449 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} | 3773 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} |
3450 movlps\t{%2, %0|%0, %2} | 3774 movlps\t{%2, %0|%0, %2} |
3451 movlps\t{%2, %0|%0, %2}" | 3775 movlps\t{%2, %0|%0, %2}" |
3452 [(set_attr "type" "sselog,ssemov,ssemov") | 3776 [(set_attr "type" "sselog,ssemov,ssemov") |
3777 (set_attr "length_immediate" "1,*,*") | |
3453 (set_attr "mode" "V4SF,V2SF,V2SF")]) | 3778 (set_attr "mode" "V4SF,V2SF,V2SF")]) |
3454 | 3779 |
3455 (define_insn "*avx_movss" | 3780 (define_insn "*avx_movss" |
3456 [(set (match_operand:V4SF 0 "register_operand" "=x") | 3781 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3457 (vec_merge:V4SF | 3782 (vec_merge:V4SF |
3473 "TARGET_SSE" | 3798 "TARGET_SSE" |
3474 "movss\t{%2, %0|%0, %2}" | 3799 "movss\t{%2, %0|%0, %2}" |
3475 [(set_attr "type" "ssemov") | 3800 [(set_attr "type" "ssemov") |
3476 (set_attr "mode" "SF")]) | 3801 (set_attr "mode" "SF")]) |
3477 | 3802 |
3803 (define_expand "vec_dupv4sf" | |
3804 [(set (match_operand:V4SF 0 "register_operand" "") | |
3805 (vec_duplicate:V4SF | |
3806 (match_operand:SF 1 "nonimmediate_operand" "")))] | |
3807 "TARGET_SSE" | |
3808 { | |
3809 if (!TARGET_AVX) | |
3810 operands[1] = force_reg (V4SFmode, operands[1]); | |
3811 }) | |
3812 | |
3478 (define_insn "*vec_dupv4sf_avx" | 3813 (define_insn "*vec_dupv4sf_avx" |
3479 [(set (match_operand:V4SF 0 "register_operand" "=x") | 3814 [(set (match_operand:V4SF 0 "register_operand" "=x,x") |
3480 (vec_duplicate:V4SF | 3815 (vec_duplicate:V4SF |
3481 (match_operand:SF 1 "register_operand" "x")))] | 3816 (match_operand:SF 1 "nonimmediate_operand" "x,m")))] |
3482 "TARGET_AVX" | 3817 "TARGET_AVX" |
3483 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}" | 3818 "@ |
3484 [(set_attr "type" "sselog1") | 3819 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0} |
3820 vbroadcastss\t{%1, %0|%0, %1}" | |
3821 [(set_attr "type" "sselog1,ssemov") | |
3822 (set_attr "length_immediate" "1,0") | |
3823 (set_attr "prefix_extra" "0,1") | |
3485 (set_attr "prefix" "vex") | 3824 (set_attr "prefix" "vex") |
3486 (set_attr "mode" "V4SF")]) | 3825 (set_attr "mode" "V4SF")]) |
3487 | 3826 |
3488 (define_insn "*vec_dupv4sf" | 3827 (define_insn "*vec_dupv4sf" |
3489 [(set (match_operand:V4SF 0 "register_operand" "=x") | 3828 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3490 (vec_duplicate:V4SF | 3829 (vec_duplicate:V4SF |
3491 (match_operand:SF 1 "register_operand" "0")))] | 3830 (match_operand:SF 1 "register_operand" "0")))] |
3492 "TARGET_SSE" | 3831 "TARGET_SSE" |
3493 "shufps\t{$0, %0, %0|%0, %0, 0}" | 3832 "shufps\t{$0, %0, %0|%0, %0, 0}" |
3494 [(set_attr "type" "sselog1") | 3833 [(set_attr "type" "sselog1") |
3834 (set_attr "length_immediate" "1") | |
3495 (set_attr "mode" "V4SF")]) | 3835 (set_attr "mode" "V4SF")]) |
3496 | 3836 |
3497 (define_insn "*vec_concatv2sf_avx" | 3837 (define_insn "*vec_concatv2sf_avx" |
3498 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y") | 3838 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y") |
3499 (vec_concat:V2SF | 3839 (vec_concat:V2SF |
3505 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} | 3845 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} |
3506 vmovss\t{%1, %0|%0, %1} | 3846 vmovss\t{%1, %0|%0, %1} |
3507 punpckldq\t{%2, %0|%0, %2} | 3847 punpckldq\t{%2, %0|%0, %2} |
3508 movd\t{%1, %0|%0, %1}" | 3848 movd\t{%1, %0|%0, %1}" |
3509 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") | 3849 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") |
3850 (set_attr "length_immediate" "*,1,*,*,*") | |
3851 (set_attr "prefix_extra" "*,1,*,*,*") | |
3510 (set (attr "prefix") | 3852 (set (attr "prefix") |
3511 (if_then_else (eq_attr "alternative" "3,4") | 3853 (if_then_else (eq_attr "alternative" "3,4") |
3512 (const_string "orig") | 3854 (const_string "orig") |
3513 (const_string "vex"))) | 3855 (const_string "vex"))) |
3514 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) | 3856 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) |
3526 insertps\t{$0x10, %2, %0|%0, %2, 0x10} | 3868 insertps\t{$0x10, %2, %0|%0, %2, 0x10} |
3527 movss\t{%1, %0|%0, %1} | 3869 movss\t{%1, %0|%0, %1} |
3528 punpckldq\t{%2, %0|%0, %2} | 3870 punpckldq\t{%2, %0|%0, %2} |
3529 movd\t{%1, %0|%0, %1}" | 3871 movd\t{%1, %0|%0, %1}" |
3530 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") | 3872 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") |
3873 (set_attr "prefix_data16" "*,1,*,*,*") | |
3531 (set_attr "prefix_extra" "*,1,*,*,*") | 3874 (set_attr "prefix_extra" "*,1,*,*,*") |
3875 (set_attr "length_immediate" "*,1,*,*,*") | |
3532 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) | 3876 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) |
3533 | 3877 |
3534 ;; ??? In theory we can match memory for the MMX alternative, but allowing | 3878 ;; ??? In theory we can match memory for the MMX alternative, but allowing |
3535 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE | 3879 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE |
3536 ;; alternatives pretty much forces the MMX alternative to be chosen. | 3880 ;; alternatives pretty much forces the MMX alternative to be chosen. |
3580 { | 3924 { |
3581 ix86_expand_vector_init (false, operands[0], operands[1]); | 3925 ix86_expand_vector_init (false, operands[0], operands[1]); |
3582 DONE; | 3926 DONE; |
3583 }) | 3927 }) |
3584 | 3928 |
3585 (define_insn "*vec_setv4sf_0_avx" | 3929 (define_insn "*vec_set<mode>_0_avx" |
3586 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m") | 3930 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m") |
3587 (vec_merge:V4SF | 3931 (vec_merge:SSEMODE4S |
3588 (vec_duplicate:V4SF | 3932 (vec_duplicate:SSEMODE4S |
3589 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) | 3933 (match_operand:<ssescalarmode> 2 |
3590 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0") | 3934 "general_operand" " x,m,*r,x,*rm,x*rfF")) |
3935 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0") | |
3591 (const_int 1)))] | 3936 (const_int 1)))] |
3592 "TARGET_AVX" | 3937 "TARGET_AVX" |
3593 "@ | 3938 "@ |
3939 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe} | |
3940 vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2} | |
3941 vmovd\t{%2, %0|%0, %2} | |
3594 vmovss\t{%2, %1, %0|%0, %1, %2} | 3942 vmovss\t{%2, %1, %0|%0, %1, %2} |
3595 vmovss\t{%2, %0|%0, %2} | 3943 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0} |
3596 vmovd\t{%2, %0|%0, %2} | 3944 #" |
3945 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*") | |
3946 (set_attr "prefix_extra" "*,*,*,*,1,*") | |
3947 (set_attr "length_immediate" "*,*,*,*,1,*") | |
3948 (set_attr "prefix" "vex") | |
3949 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")]) | |
3950 | |
3951 (define_insn "*vec_set<mode>_0_sse4_1" | |
3952 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m") | |
3953 (vec_merge:SSEMODE4S | |
3954 (vec_duplicate:SSEMODE4S | |
3955 (match_operand:<ssescalarmode> 2 | |
3956 "general_operand" " x,m,*r,x,*rm,*rfF")) | |
3957 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0") | |
3958 (const_int 1)))] | |
3959 "TARGET_SSE4_1" | |
3960 "@ | |
3961 insertps\t{$0xe, %2, %0|%0, %2, 0xe} | |
3962 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2} | |
3963 movd\t{%2, %0|%0, %2} | |
3964 movss\t{%2, %0|%0, %2} | |
3965 pinsrd\t{$0, %2, %0|%0, %2, 0} | |
3966 #" | |
3967 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*") | |
3968 (set_attr "prefix_extra" "*,*,*,*,1,*") | |
3969 (set_attr "length_immediate" "*,*,*,*,1,*") | |
3970 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")]) | |
3971 | |
3972 (define_insn "*vec_set<mode>_0_sse2" | |
3973 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m") | |
3974 (vec_merge:SSEMODE4S | |
3975 (vec_duplicate:SSEMODE4S | |
3976 (match_operand:<ssescalarmode> 2 | |
3977 "general_operand" " m,*r,x,x*rfF")) | |
3978 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0") | |
3979 (const_int 1)))] | |
3980 "TARGET_SSE2" | |
3981 "@ | |
3982 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2} | |
3983 movd\t{%2, %0|%0, %2} | |
3984 movss\t{%2, %0|%0, %2} | |
3597 #" | 3985 #" |
3598 [(set_attr "type" "ssemov") | 3986 [(set_attr "type" "ssemov") |
3599 (set_attr "prefix" "vex") | 3987 (set_attr "mode" "<ssescalarmode>,SI,SF,*")]) |
3600 (set_attr "mode" "SF")]) | 3988 |
3601 | 3989 (define_insn "vec_set<mode>_0" |
3602 (define_insn "vec_setv4sf_0" | 3990 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m") |
3603 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m") | 3991 (vec_merge:SSEMODE4S |
3604 (vec_merge:V4SF | 3992 (vec_duplicate:SSEMODE4S |
3605 (vec_duplicate:V4SF | 3993 (match_operand:<ssescalarmode> 2 |
3606 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) | 3994 "general_operand" " m,x,x*rfF")) |
3607 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") | 3995 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0") |
3608 (const_int 1)))] | 3996 (const_int 1)))] |
3609 "TARGET_SSE" | 3997 "TARGET_SSE" |
3610 "@ | 3998 "@ |
3611 movss\t{%2, %0|%0, %2} | 3999 movss\t{%2, %0|%0, %2} |
3612 movss\t{%2, %0|%0, %2} | 4000 movss\t{%2, %0|%0, %2} |
3613 movd\t{%2, %0|%0, %2} | |
3614 #" | 4001 #" |
3615 [(set_attr "type" "ssemov") | 4002 [(set_attr "type" "ssemov") |
3616 (set_attr "mode" "SF")]) | 4003 (set_attr "mode" "SF")]) |
3617 | 4004 |
3618 ;; A subset is vec_setv4sf. | 4005 ;; A subset is vec_setv4sf. |
3627 { | 4014 { |
3628 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); | 4015 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); |
3629 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 4016 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
3630 } | 4017 } |
3631 [(set_attr "type" "sselog") | 4018 [(set_attr "type" "sselog") |
4019 (set_attr "prefix_extra" "1") | |
4020 (set_attr "length_immediate" "1") | |
3632 (set_attr "prefix" "vex") | 4021 (set_attr "prefix" "vex") |
3633 (set_attr "mode" "V4SF")]) | 4022 (set_attr "mode" "V4SF")]) |
3634 | 4023 |
3635 (define_insn "*vec_setv4sf_sse4_1" | 4024 (define_insn "*vec_setv4sf_sse4_1" |
3636 [(set (match_operand:V4SF 0 "register_operand" "=x") | 4025 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3643 { | 4032 { |
3644 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); | 4033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); |
3645 return "insertps\t{%3, %2, %0|%0, %2, %3}"; | 4034 return "insertps\t{%3, %2, %0|%0, %2, %3}"; |
3646 } | 4035 } |
3647 [(set_attr "type" "sselog") | 4036 [(set_attr "type" "sselog") |
3648 (set_attr "prefix_extra" "1") | 4037 (set_attr "prefix_data16" "1") |
4038 (set_attr "prefix_extra" "1") | |
4039 (set_attr "length_immediate" "1") | |
3649 (set_attr "mode" "V4SF")]) | 4040 (set_attr "mode" "V4SF")]) |
3650 | 4041 |
3651 (define_insn "*avx_insertps" | 4042 (define_insn "*avx_insertps" |
3652 [(set (match_operand:V4SF 0 "register_operand" "=x") | 4043 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3653 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm") | 4044 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm") |
3656 UNSPEC_INSERTPS))] | 4047 UNSPEC_INSERTPS))] |
3657 "TARGET_AVX" | 4048 "TARGET_AVX" |
3658 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 4049 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
3659 [(set_attr "type" "sselog") | 4050 [(set_attr "type" "sselog") |
3660 (set_attr "prefix" "vex") | 4051 (set_attr "prefix" "vex") |
4052 (set_attr "prefix_extra" "1") | |
4053 (set_attr "length_immediate" "1") | |
3661 (set_attr "mode" "V4SF")]) | 4054 (set_attr "mode" "V4SF")]) |
3662 | 4055 |
3663 (define_insn "sse4_1_insertps" | 4056 (define_insn "sse4_1_insertps" |
3664 [(set (match_operand:V4SF 0 "register_operand" "=x") | 4057 [(set (match_operand:V4SF 0 "register_operand" "=x") |
3665 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") | 4058 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") |
3667 (match_operand:SI 3 "const_0_to_255_operand" "n")] | 4060 (match_operand:SI 3 "const_0_to_255_operand" "n")] |
3668 UNSPEC_INSERTPS))] | 4061 UNSPEC_INSERTPS))] |
3669 "TARGET_SSE4_1" | 4062 "TARGET_SSE4_1" |
3670 "insertps\t{%3, %2, %0|%0, %2, %3}"; | 4063 "insertps\t{%3, %2, %0|%0, %2, %3}"; |
3671 [(set_attr "type" "sselog") | 4064 [(set_attr "type" "sselog") |
3672 (set_attr "prefix_extra" "1") | 4065 (set_attr "prefix_data16" "1") |
4066 (set_attr "prefix_extra" "1") | |
4067 (set_attr "length_immediate" "1") | |
3673 (set_attr "mode" "V4SF")]) | 4068 (set_attr "mode" "V4SF")]) |
3674 | 4069 |
3675 (define_split | 4070 (define_split |
3676 [(set (match_operand:V4SF 0 "memory_operand" "") | 4071 [(set (match_operand:V4SF 0 "memory_operand" "") |
3677 (vec_merge:V4SF | 4072 (vec_merge:V4SF |
3742 (match_operand:AVX256MODE4P 1 "register_operand" "x,x") | 4137 (match_operand:AVX256MODE4P 1 "register_operand" "x,x") |
3743 (parallel [(const_int 0) (const_int 1)])))] | 4138 (parallel [(const_int 0) (const_int 1)])))] |
3744 "TARGET_AVX" | 4139 "TARGET_AVX" |
3745 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}" | 4140 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}" |
3746 [(set_attr "type" "sselog") | 4141 [(set_attr "type" "sselog") |
4142 (set_attr "prefix_extra" "1") | |
4143 (set_attr "length_immediate" "1") | |
3747 (set_attr "memory" "none,store") | 4144 (set_attr "memory" "none,store") |
3748 (set_attr "prefix" "vex") | 4145 (set_attr "prefix" "vex") |
3749 (set_attr "mode" "V8SF")]) | 4146 (set_attr "mode" "V8SF")]) |
3750 | 4147 |
3751 (define_insn "vec_extract_hi_<mode>" | 4148 (define_insn "vec_extract_hi_<mode>" |
3754 (match_operand:AVX256MODE4P 1 "register_operand" "x,x") | 4151 (match_operand:AVX256MODE4P 1 "register_operand" "x,x") |
3755 (parallel [(const_int 2) (const_int 3)])))] | 4152 (parallel [(const_int 2) (const_int 3)])))] |
3756 "TARGET_AVX" | 4153 "TARGET_AVX" |
3757 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" | 4154 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" |
3758 [(set_attr "type" "sselog") | 4155 [(set_attr "type" "sselog") |
4156 (set_attr "prefix_extra" "1") | |
4157 (set_attr "length_immediate" "1") | |
3759 (set_attr "memory" "none,store") | 4158 (set_attr "memory" "none,store") |
3760 (set_attr "prefix" "vex") | 4159 (set_attr "prefix" "vex") |
3761 (set_attr "mode" "V8SF")]) | 4160 (set_attr "mode" "V8SF")]) |
3762 | 4161 |
3763 (define_insn "vec_extract_lo_<mode>" | 4162 (define_insn "vec_extract_lo_<mode>" |
3767 (parallel [(const_int 0) (const_int 1) | 4166 (parallel [(const_int 0) (const_int 1) |
3768 (const_int 2) (const_int 3)])))] | 4167 (const_int 2) (const_int 3)])))] |
3769 "TARGET_AVX" | 4168 "TARGET_AVX" |
3770 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" | 4169 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" |
3771 [(set_attr "type" "sselog") | 4170 [(set_attr "type" "sselog") |
4171 (set_attr "prefix_extra" "1") | |
4172 (set_attr "length_immediate" "1") | |
3772 (set_attr "memory" "none,store") | 4173 (set_attr "memory" "none,store") |
3773 (set_attr "prefix" "vex") | 4174 (set_attr "prefix" "vex") |
3774 (set_attr "mode" "V8SF")]) | 4175 (set_attr "mode" "V8SF")]) |
3775 | 4176 |
3776 (define_insn "vec_extract_hi_<mode>" | 4177 (define_insn "vec_extract_hi_<mode>" |
3780 (parallel [(const_int 4) (const_int 5) | 4181 (parallel [(const_int 4) (const_int 5) |
3781 (const_int 6) (const_int 7)])))] | 4182 (const_int 6) (const_int 7)])))] |
3782 "TARGET_AVX" | 4183 "TARGET_AVX" |
3783 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" | 4184 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" |
3784 [(set_attr "type" "sselog") | 4185 [(set_attr "type" "sselog") |
4186 (set_attr "prefix_extra" "1") | |
4187 (set_attr "length_immediate" "1") | |
3785 (set_attr "memory" "none,store") | 4188 (set_attr "memory" "none,store") |
3786 (set_attr "prefix" "vex") | 4189 (set_attr "prefix" "vex") |
3787 (set_attr "mode" "V8SF")]) | 4190 (set_attr "mode" "V8SF")]) |
3788 | 4191 |
3789 (define_insn "vec_extract_lo_v16hi" | 4192 (define_insn "vec_extract_lo_v16hi" |
3795 (const_int 4) (const_int 5) | 4198 (const_int 4) (const_int 5) |
3796 (const_int 6) (const_int 7)])))] | 4199 (const_int 6) (const_int 7)])))] |
3797 "TARGET_AVX" | 4200 "TARGET_AVX" |
3798 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" | 4201 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" |
3799 [(set_attr "type" "sselog") | 4202 [(set_attr "type" "sselog") |
4203 (set_attr "prefix_extra" "1") | |
4204 (set_attr "length_immediate" "1") | |
3800 (set_attr "memory" "none,store") | 4205 (set_attr "memory" "none,store") |
3801 (set_attr "prefix" "vex") | 4206 (set_attr "prefix" "vex") |
3802 (set_attr "mode" "V8SF")]) | 4207 (set_attr "mode" "V8SF")]) |
3803 | 4208 |
3804 (define_insn "vec_extract_hi_v16hi" | 4209 (define_insn "vec_extract_hi_v16hi" |
3810 (const_int 12) (const_int 13) | 4215 (const_int 12) (const_int 13) |
3811 (const_int 14) (const_int 15)])))] | 4216 (const_int 14) (const_int 15)])))] |
3812 "TARGET_AVX" | 4217 "TARGET_AVX" |
3813 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" | 4218 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" |
3814 [(set_attr "type" "sselog") | 4219 [(set_attr "type" "sselog") |
4220 (set_attr "prefix_extra" "1") | |
4221 (set_attr "length_immediate" "1") | |
3815 (set_attr "memory" "none,store") | 4222 (set_attr "memory" "none,store") |
3816 (set_attr "prefix" "vex") | 4223 (set_attr "prefix" "vex") |
3817 (set_attr "mode" "V8SF")]) | 4224 (set_attr "mode" "V8SF")]) |
3818 | 4225 |
3819 (define_insn "vec_extract_lo_v32qi" | 4226 (define_insn "vec_extract_lo_v32qi" |
3829 (const_int 12) (const_int 13) | 4236 (const_int 12) (const_int 13) |
3830 (const_int 14) (const_int 15)])))] | 4237 (const_int 14) (const_int 15)])))] |
3831 "TARGET_AVX" | 4238 "TARGET_AVX" |
3832 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" | 4239 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" |
3833 [(set_attr "type" "sselog") | 4240 [(set_attr "type" "sselog") |
4241 (set_attr "prefix_extra" "1") | |
4242 (set_attr "length_immediate" "1") | |
3834 (set_attr "memory" "none,store") | 4243 (set_attr "memory" "none,store") |
3835 (set_attr "prefix" "vex") | 4244 (set_attr "prefix" "vex") |
3836 (set_attr "mode" "V8SF")]) | 4245 (set_attr "mode" "V8SF")]) |
3837 | 4246 |
3838 (define_insn "vec_extract_hi_v32qi" | 4247 (define_insn "vec_extract_hi_v32qi" |
3848 (const_int 28) (const_int 29) | 4257 (const_int 28) (const_int 29) |
3849 (const_int 30) (const_int 31)])))] | 4258 (const_int 30) (const_int 31)])))] |
3850 "TARGET_AVX" | 4259 "TARGET_AVX" |
3851 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" | 4260 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" |
3852 [(set_attr "type" "sselog") | 4261 [(set_attr "type" "sselog") |
4262 (set_attr "prefix_extra" "1") | |
4263 (set_attr "length_immediate" "1") | |
3853 (set_attr "memory" "none,store") | 4264 (set_attr "memory" "none,store") |
3854 (set_attr "prefix" "vex") | 4265 (set_attr "prefix" "vex") |
3855 (set_attr "mode" "V8SF")]) | 4266 (set_attr "mode" "V8SF")]) |
3856 | 4267 |
3857 (define_insn "*sse4_1_extractps" | 4268 (define_insn "*sse4_1_extractps" |
3860 (match_operand:V4SF 1 "register_operand" "x") | 4271 (match_operand:V4SF 1 "register_operand" "x") |
3861 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] | 4272 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] |
3862 "TARGET_SSE4_1" | 4273 "TARGET_SSE4_1" |
3863 "%vextractps\t{%2, %1, %0|%0, %1, %2}" | 4274 "%vextractps\t{%2, %1, %0|%0, %1, %2}" |
3864 [(set_attr "type" "sselog") | 4275 [(set_attr "type" "sselog") |
3865 (set_attr "prefix_extra" "1") | 4276 (set_attr "prefix_data16" "1") |
4277 (set_attr "prefix_extra" "1") | |
4278 (set_attr "length_immediate" "1") | |
3866 (set_attr "prefix" "maybe_vex") | 4279 (set_attr "prefix" "maybe_vex") |
3867 (set_attr "mode" "V4SF")]) | 4280 (set_attr "mode" "V4SF")]) |
3868 | 4281 |
3869 (define_insn_and_split "*vec_extract_v4sf_mem" | 4282 (define_insn_and_split "*vec_extract_v4sf_mem" |
3870 [(set (match_operand:SF 0 "register_operand" "=x*rf") | 4283 [(set (match_operand:SF 0 "register_operand" "=x*rf") |
3897 ;; | 4310 ;; |
3898 ;; Parallel double-precision floating point element swizzling | 4311 ;; Parallel double-precision floating point element swizzling |
3899 ;; | 4312 ;; |
3900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 4313 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
3901 | 4314 |
4315 ;; Recall that the 256-bit unpck insns only shuffle within their lanes. | |
3902 (define_insn "avx_unpckhpd256" | 4316 (define_insn "avx_unpckhpd256" |
3903 [(set (match_operand:V4DF 0 "register_operand" "=x") | 4317 [(set (match_operand:V4DF 0 "register_operand" "=x") |
3904 (vec_select:V4DF | 4318 (vec_select:V4DF |
3905 (vec_concat:V8DF | 4319 (vec_concat:V8DF |
3906 (match_operand:V4DF 1 "register_operand" "x") | 4320 (match_operand:V4DF 1 "register_operand" "x") |
3911 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}" | 4325 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}" |
3912 [(set_attr "type" "sselog") | 4326 [(set_attr "type" "sselog") |
3913 (set_attr "prefix" "vex") | 4327 (set_attr "prefix" "vex") |
3914 (set_attr "mode" "V4DF")]) | 4328 (set_attr "mode" "V4DF")]) |
3915 | 4329 |
3916 (define_expand "sse2_unpckhpd_exp" | 4330 (define_expand "vec_interleave_highv2df" |
3917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") | 4331 [(set (match_operand:V2DF 0 "register_operand" "") |
3918 (vec_select:V2DF | 4332 (vec_select:V2DF |
3919 (vec_concat:V4DF | 4333 (vec_concat:V4DF |
3920 (match_operand:V2DF 1 "nonimmediate_operand" "") | 4334 (match_operand:V2DF 1 "nonimmediate_operand" "") |
3921 (match_operand:V2DF 2 "nonimmediate_operand" "")) | 4335 (match_operand:V2DF 2 "nonimmediate_operand" "")) |
3922 (parallel [(const_int 1) | 4336 (parallel [(const_int 1) |
3923 (const_int 3)])))] | 4337 (const_int 3)])))] |
3924 "TARGET_SSE2" | 4338 "TARGET_SSE2" |
3925 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") | 4339 { |
3926 | 4340 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1)) |
3927 (define_insn "*avx_unpckhpd" | 4341 operands[2] = force_reg (V2DFmode, operands[2]); |
3928 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") | 4342 }) |
4343 | |
4344 (define_insn "*avx_interleave_highv2df" | |
4345 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m") | |
3929 (vec_select:V2DF | 4346 (vec_select:V2DF |
3930 (vec_concat:V4DF | 4347 (vec_concat:V4DF |
3931 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x") | 4348 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x") |
3932 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0")) | 4349 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0")) |
3933 (parallel [(const_int 1) | 4350 (parallel [(const_int 1) |
3934 (const_int 3)])))] | 4351 (const_int 3)])))] |
3935 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | 4352 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)" |
3936 "@ | 4353 "@ |
3937 vunpckhpd\t{%2, %1, %0|%0, %1, %2} | 4354 vunpckhpd\t{%2, %1, %0|%0, %1, %2} |
4355 vmovddup\t{%H1, %0|%0, %H1} | |
3938 vmovlpd\t{%H1, %2, %0|%0, %2, %H1} | 4356 vmovlpd\t{%H1, %2, %0|%0, %2, %H1} |
3939 vmovhpd\t{%1, %0|%0, %1}" | 4357 vmovhpd\t{%1, %0|%0, %1}" |
3940 [(set_attr "type" "sselog,ssemov,ssemov") | 4358 [(set_attr "type" "sselog,sselog,ssemov,ssemov") |
3941 (set_attr "prefix" "vex") | 4359 (set_attr "prefix" "vex") |
3942 (set_attr "mode" "V2DF,V1DF,V1DF")]) | 4360 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) |
3943 | 4361 |
3944 (define_insn "sse2_unpckhpd" | 4362 (define_insn "*sse3_interleave_highv2df" |
4363 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m") | |
4364 (vec_select:V2DF | |
4365 (vec_concat:V4DF | |
4366 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x") | |
4367 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0")) | |
4368 (parallel [(const_int 1) | |
4369 (const_int 3)])))] | |
4370 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" | |
4371 "@ | |
4372 unpckhpd\t{%2, %0|%0, %2} | |
4373 movddup\t{%H1, %0|%0, %H1} | |
4374 movlpd\t{%H1, %0|%0, %H1} | |
4375 movhpd\t{%1, %0|%0, %1}" | |
4376 [(set_attr "type" "sselog,sselog,ssemov,ssemov") | |
4377 (set_attr "prefix_data16" "*,*,1,1") | |
4378 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) | |
4379 | |
4380 (define_insn "*sse2_interleave_highv2df" | |
3945 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") | 4381 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") |
3946 (vec_select:V2DF | 4382 (vec_select:V2DF |
3947 (vec_concat:V4DF | 4383 (vec_concat:V4DF |
3948 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") | 4384 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") |
3949 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) | 4385 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) |
3950 (parallel [(const_int 1) | 4386 (parallel [(const_int 1) |
3951 (const_int 3)])))] | 4387 (const_int 3)])))] |
3952 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | 4388 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)" |
3953 "@ | 4389 "@ |
3954 unpckhpd\t{%2, %0|%0, %2} | 4390 unpckhpd\t{%2, %0|%0, %2} |
3955 movlpd\t{%H1, %0|%0, %H1} | 4391 movlpd\t{%H1, %0|%0, %H1} |
3956 movhpd\t{%1, %0|%0, %1}" | 4392 movhpd\t{%1, %0|%0, %1}" |
3957 [(set_attr "type" "sselog,ssemov,ssemov") | 4393 [(set_attr "type" "sselog,ssemov,ssemov") |
4394 (set_attr "prefix_data16" "*,1,1") | |
3958 (set_attr "mode" "V2DF,V1DF,V1DF")]) | 4395 (set_attr "mode" "V2DF,V1DF,V1DF")]) |
3959 | 4396 |
3960 (define_insn "avx_movddup256" | 4397 ;; Recall that the 256-bit unpck insns only shuffle within their lanes. |
3961 [(set (match_operand:V4DF 0 "register_operand" "=x") | 4398 (define_expand "avx_movddup256" |
4399 [(set (match_operand:V4DF 0 "register_operand" "") | |
3962 (vec_select:V4DF | 4400 (vec_select:V4DF |
3963 (vec_concat:V8DF | 4401 (vec_concat:V8DF |
3964 (match_operand:V4DF 1 "nonimmediate_operand" "xm") | 4402 (match_operand:V4DF 1 "nonimmediate_operand" "") |
3965 (match_dup 1)) | 4403 (match_dup 1)) |
3966 (parallel [(const_int 0) (const_int 2) | 4404 (parallel [(const_int 0) (const_int 4) |
3967 (const_int 4) (const_int 6)])))] | 4405 (const_int 2) (const_int 6)])))] |
3968 "TARGET_AVX" | 4406 "TARGET_AVX" |
3969 "vmovddup\t{%1, %0|%0, %1}" | 4407 "") |
3970 [(set_attr "type" "sselog1") | 4408 |
4409 (define_expand "avx_unpcklpd256" | |
4410 [(set (match_operand:V4DF 0 "register_operand" "") | |
4411 (vec_select:V4DF | |
4412 (vec_concat:V8DF | |
4413 (match_operand:V4DF 1 "register_operand" "") | |
4414 (match_operand:V4DF 2 "nonimmediate_operand" "")) | |
4415 (parallel [(const_int 0) (const_int 4) | |
4416 (const_int 2) (const_int 6)])))] | |
4417 "TARGET_AVX" | |
4418 "") | |
4419 | |
4420 (define_insn "*avx_unpcklpd256" | |
4421 [(set (match_operand:V4DF 0 "register_operand" "=x,x") | |
4422 (vec_select:V4DF | |
4423 (vec_concat:V8DF | |
4424 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x") | |
4425 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm")) | |
4426 (parallel [(const_int 0) (const_int 4) | |
4427 (const_int 2) (const_int 6)])))] | |
4428 "TARGET_AVX | |
4429 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))" | |
4430 "@ | |
4431 vmovddup\t{%1, %0|%0, %1} | |
4432 vunpcklpd\t{%2, %1, %0|%0, %1, %2}" | |
4433 [(set_attr "type" "sselog") | |
3971 (set_attr "prefix" "vex") | 4434 (set_attr "prefix" "vex") |
3972 (set_attr "mode" "V4DF")]) | 4435 (set_attr "mode" "V4DF")]) |
3973 | 4436 |
3974 (define_insn "*avx_movddup" | 4437 (define_expand "vec_interleave_lowv2df" |
3975 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") | 4438 [(set (match_operand:V2DF 0 "register_operand" "") |
3976 (vec_select:V2DF | 4439 (vec_select:V2DF |
3977 (vec_concat:V4DF | 4440 (vec_concat:V4DF |
3978 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") | 4441 (match_operand:V2DF 1 "nonimmediate_operand" "") |
3979 (match_dup 1)) | 4442 (match_operand:V2DF 2 "nonimmediate_operand" "")) |
3980 (parallel [(const_int 0) | 4443 (parallel [(const_int 0) |
3981 (const_int 2)])))] | 4444 (const_int 2)])))] |
3982 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | 4445 "TARGET_SSE2" |
3983 "@ | 4446 { |
3984 vmovddup\t{%1, %0|%0, %1} | 4447 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0)) |
3985 #" | 4448 operands[1] = force_reg (V2DFmode, operands[1]); |
3986 [(set_attr "type" "sselog1,ssemov") | 4449 }) |
3987 (set_attr "prefix" "vex") | 4450 |
3988 (set_attr "mode" "V2DF")]) | 4451 (define_insn "*avx_interleave_lowv2df" |
3989 | 4452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") |
3990 (define_insn "*sse3_movddup" | |
3991 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") | |
3992 (vec_select:V2DF | 4453 (vec_select:V2DF |
3993 (vec_concat:V4DF | 4454 (vec_concat:V4DF |
3994 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") | 4455 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0") |
3995 (match_dup 1)) | 4456 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x")) |
3996 (parallel [(const_int 0) | 4457 (parallel [(const_int 0) |
3997 (const_int 2)])))] | 4458 (const_int 2)])))] |
3998 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" | 4459 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)" |
3999 "@ | 4460 "@ |
4461 vunpcklpd\t{%2, %1, %0|%0, %1, %2} | |
4462 vmovddup\t{%1, %0|%0, %1} | |
4463 vmovhpd\t{%2, %1, %0|%0, %1, %2} | |
4464 vmovlpd\t{%2, %H0|%H0, %2}" | |
4465 [(set_attr "type" "sselog,sselog,ssemov,ssemov") | |
4466 (set_attr "prefix" "vex") | |
4467 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) | |
4468 | |
4469 (define_insn "*sse3_interleave_lowv2df" | |
4470 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o") | |
4471 (vec_select:V2DF | |
4472 (vec_concat:V4DF | |
4473 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0") | |
4474 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x")) | |
4475 (parallel [(const_int 0) | |
4476 (const_int 2)])))] | |
4477 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" | |
4478 "@ | |
4479 unpcklpd\t{%2, %0|%0, %2} | |
4000 movddup\t{%1, %0|%0, %1} | 4480 movddup\t{%1, %0|%0, %1} |
4001 #" | 4481 movhpd\t{%2, %0|%0, %2} |
4002 [(set_attr "type" "sselog1,ssemov") | 4482 movlpd\t{%2, %H0|%H0, %2}" |
4003 (set_attr "mode" "V2DF")]) | 4483 [(set_attr "type" "sselog,sselog,ssemov,ssemov") |
4484 (set_attr "prefix_data16" "*,*,1,1") | |
4485 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")]) | |
4486 | |
4487 (define_insn "*sse2_interleave_lowv2df" | |
4488 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") | |
4489 (vec_select:V2DF | |
4490 (vec_concat:V4DF | |
4491 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") | |
4492 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) | |
4493 (parallel [(const_int 0) | |
4494 (const_int 2)])))] | |
4495 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)" | |
4496 "@ | |
4497 unpcklpd\t{%2, %0|%0, %2} | |
4498 movhpd\t{%2, %0|%0, %2} | |
4499 movlpd\t{%2, %H0|%H0, %2}" | |
4500 [(set_attr "type" "sselog,ssemov,ssemov") | |
4501 (set_attr "prefix_data16" "*,1,1") | |
4502 (set_attr "mode" "V2DF,V1DF,V1DF")]) | |
4004 | 4503 |
4005 (define_split | 4504 (define_split |
4006 [(set (match_operand:V2DF 0 "memory_operand" "") | 4505 [(set (match_operand:V2DF 0 "memory_operand" "") |
4007 (vec_select:V2DF | 4506 (vec_select:V2DF |
4008 (vec_concat:V4DF | 4507 (vec_concat:V4DF |
4017 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); | 4516 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); |
4018 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); | 4517 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); |
4019 DONE; | 4518 DONE; |
4020 }) | 4519 }) |
4021 | 4520 |
4022 (define_insn "avx_unpcklpd256" | 4521 (define_split |
4023 [(set (match_operand:V4DF 0 "register_operand" "=x") | 4522 [(set (match_operand:V2DF 0 "register_operand" "") |
4024 (vec_select:V4DF | |
4025 (vec_concat:V8DF | |
4026 (match_operand:V4DF 1 "register_operand" "x") | |
4027 (match_operand:V4DF 2 "nonimmediate_operand" "xm")) | |
4028 (parallel [(const_int 0) (const_int 4) | |
4029 (const_int 2) (const_int 6)])))] | |
4030 "TARGET_AVX" | |
4031 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}" | |
4032 [(set_attr "type" "sselog") | |
4033 (set_attr "prefix" "vex") | |
4034 (set_attr "mode" "V4DF")]) | |
4035 | |
4036 (define_expand "sse2_unpcklpd_exp" | |
4037 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") | |
4038 (vec_select:V2DF | 4523 (vec_select:V2DF |
4039 (vec_concat:V4DF | 4524 (vec_concat:V4DF |
4040 (match_operand:V2DF 1 "nonimmediate_operand" "") | 4525 (match_operand:V2DF 1 "memory_operand" "") |
4041 (match_operand:V2DF 2 "nonimmediate_operand" "")) | 4526 (match_dup 1)) |
4042 (parallel [(const_int 0) | 4527 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "") |
4043 (const_int 2)])))] | 4528 (match_operand:SI 3 "const_int_operand" "")])))] |
4044 "TARGET_SSE2" | 4529 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])" |
4045 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") | 4530 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))] |
4046 | 4531 { |
4047 (define_insn "*avx_unpcklpd" | 4532 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8); |
4048 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") | 4533 }) |
4049 (vec_select:V2DF | |
4050 (vec_concat:V4DF | |
4051 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0") | |
4052 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) | |
4053 (parallel [(const_int 0) | |
4054 (const_int 2)])))] | |
4055 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
4056 "@ | |
4057 vunpcklpd\t{%2, %1, %0|%0, %1, %2} | |
4058 vmovhpd\t{%2, %1, %0|%0, %1, %2} | |
4059 vmovlpd\t{%2, %H0|%H0, %2}" | |
4060 [(set_attr "type" "sselog,ssemov,ssemov") | |
4061 (set_attr "prefix" "vex") | |
4062 (set_attr "mode" "V2DF,V1DF,V1DF")]) | |
4063 | |
4064 (define_insn "sse2_unpcklpd" | |
4065 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") | |
4066 (vec_select:V2DF | |
4067 (vec_concat:V4DF | |
4068 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0") | |
4069 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x")) | |
4070 (parallel [(const_int 0) | |
4071 (const_int 2)])))] | |
4072 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" | |
4073 "@ | |
4074 unpcklpd\t{%2, %0|%0, %2} | |
4075 movhpd\t{%2, %0|%0, %2} | |
4076 movlpd\t{%2, %H0|%H0, %2}" | |
4077 [(set_attr "type" "sselog,ssemov,ssemov") | |
4078 (set_attr "mode" "V2DF,V1DF,V1DF")]) | |
4079 | 4534 |
4080 (define_expand "avx_shufpd256" | 4535 (define_expand "avx_shufpd256" |
4081 [(match_operand:V4DF 0 "register_operand" "") | 4536 [(match_operand:V4DF 0 "register_operand" "") |
4082 (match_operand:V4DF 1 "register_operand" "") | 4537 (match_operand:V4DF 1 "register_operand" "") |
4083 (match_operand:V4DF 2 "nonimmediate_operand" "") | 4538 (match_operand:V4DF 2 "nonimmediate_operand" "") |
4113 operands[3] = GEN_INT (mask); | 4568 operands[3] = GEN_INT (mask); |
4114 | 4569 |
4115 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 4570 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
4116 } | 4571 } |
4117 [(set_attr "type" "sselog") | 4572 [(set_attr "type" "sselog") |
4573 (set_attr "length_immediate" "1") | |
4118 (set_attr "prefix" "vex") | 4574 (set_attr "prefix" "vex") |
4119 (set_attr "mode" "V4DF")]) | 4575 (set_attr "mode" "V4DF")]) |
4120 | 4576 |
4121 (define_expand "sse2_shufpd" | 4577 (define_expand "sse2_shufpd" |
4122 [(match_operand:V2DF 0 "register_operand" "") | 4578 [(match_operand:V2DF 0 "register_operand" "") |
4131 GEN_INT (mask & 2 ? 3 : 2))); | 4587 GEN_INT (mask & 2 ? 3 : 2))); |
4132 DONE; | 4588 DONE; |
4133 }) | 4589 }) |
4134 | 4590 |
4135 (define_expand "vec_extract_even<mode>" | 4591 (define_expand "vec_extract_even<mode>" |
4136 [(set (match_operand:SSEMODE4S 0 "register_operand" "") | 4592 [(match_operand:SSEMODE_EO 0 "register_operand" "") |
4137 (vec_select:SSEMODE4S | 4593 (match_operand:SSEMODE_EO 1 "register_operand" "") |
4138 (vec_concat:<ssedoublesizemode> | 4594 (match_operand:SSEMODE_EO 2 "register_operand" "")] |
4139 (match_operand:SSEMODE4S 1 "register_operand" "") | 4595 "" |
4140 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) | 4596 { |
4141 (parallel [(const_int 0) | 4597 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0); |
4142 (const_int 2) | 4598 DONE; |
4143 (const_int 4) | 4599 }) |
4144 (const_int 6)])))] | |
4145 "TARGET_SSE") | |
4146 | 4600 |
4147 (define_expand "vec_extract_odd<mode>" | 4601 (define_expand "vec_extract_odd<mode>" |
4148 [(set (match_operand:SSEMODE4S 0 "register_operand" "") | 4602 [(match_operand:SSEMODE_EO 0 "register_operand" "") |
4149 (vec_select:SSEMODE4S | 4603 (match_operand:SSEMODE_EO 1 "register_operand" "") |
4150 (vec_concat:<ssedoublesizemode> | 4604 (match_operand:SSEMODE_EO 2 "register_operand" "")] |
4151 (match_operand:SSEMODE4S 1 "register_operand" "") | 4605 "" |
4152 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) | 4606 { |
4153 (parallel [(const_int 1) | 4607 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1); |
4154 (const_int 3) | 4608 DONE; |
4155 (const_int 5) | 4609 }) |
4156 (const_int 7)])))] | |
4157 "TARGET_SSE") | |
4158 | |
4159 (define_expand "vec_extract_even<mode>" | |
4160 [(set (match_operand:SSEMODE2D 0 "register_operand" "") | |
4161 (vec_select:SSEMODE2D | |
4162 (vec_concat:<ssedoublesizemode> | |
4163 (match_operand:SSEMODE2D 1 "register_operand" "") | |
4164 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "")) | |
4165 (parallel [(const_int 0) | |
4166 (const_int 2)])))] | |
4167 "TARGET_SSE2") | |
4168 | |
4169 (define_expand "vec_extract_odd<mode>" | |
4170 [(set (match_operand:SSEMODE2D 0 "register_operand" "") | |
4171 (vec_select:SSEMODE2D | |
4172 (vec_concat:<ssedoublesizemode> | |
4173 (match_operand:SSEMODE2D 1 "register_operand" "") | |
4174 (match_operand:SSEMODE2D 2 "nonimmediate_operand" "")) | |
4175 (parallel [(const_int 1) | |
4176 (const_int 3)])))] | |
4177 "TARGET_SSE2") | |
4178 | 4610 |
4179 ;; punpcklqdq and punpckhqdq are shorter than shufpd. | 4611 ;; punpcklqdq and punpckhqdq are shorter than shufpd. |
4180 (define_insn "*avx_punpckhqdq" | 4612 (define_insn "*avx_interleave_highv2di" |
4181 [(set (match_operand:V2DI 0 "register_operand" "=x") | 4613 [(set (match_operand:V2DI 0 "register_operand" "=x") |
4182 (vec_select:V2DI | 4614 (vec_select:V2DI |
4183 (vec_concat:V4DI | 4615 (vec_concat:V4DI |
4184 (match_operand:V2DI 1 "register_operand" "x") | 4616 (match_operand:V2DI 1 "register_operand" "x") |
4185 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) | 4617 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) |
4189 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" | 4621 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" |
4190 [(set_attr "type" "sselog") | 4622 [(set_attr "type" "sselog") |
4191 (set_attr "prefix" "vex") | 4623 (set_attr "prefix" "vex") |
4192 (set_attr "mode" "TI")]) | 4624 (set_attr "mode" "TI")]) |
4193 | 4625 |
4194 (define_insn "sse2_punpckhqdq" | 4626 (define_insn "vec_interleave_highv2di" |
4195 [(set (match_operand:V2DI 0 "register_operand" "=x") | 4627 [(set (match_operand:V2DI 0 "register_operand" "=x") |
4196 (vec_select:V2DI | 4628 (vec_select:V2DI |
4197 (vec_concat:V4DI | 4629 (vec_concat:V4DI |
4198 (match_operand:V2DI 1 "register_operand" "0") | 4630 (match_operand:V2DI 1 "register_operand" "0") |
4199 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) | 4631 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) |
4203 "punpckhqdq\t{%2, %0|%0, %2}" | 4635 "punpckhqdq\t{%2, %0|%0, %2}" |
4204 [(set_attr "type" "sselog") | 4636 [(set_attr "type" "sselog") |
4205 (set_attr "prefix_data16" "1") | 4637 (set_attr "prefix_data16" "1") |
4206 (set_attr "mode" "TI")]) | 4638 (set_attr "mode" "TI")]) |
4207 | 4639 |
4208 (define_insn "*avx_punpcklqdq" | 4640 (define_insn "*avx_interleave_lowv2di" |
4209 [(set (match_operand:V2DI 0 "register_operand" "=x") | 4641 [(set (match_operand:V2DI 0 "register_operand" "=x") |
4210 (vec_select:V2DI | 4642 (vec_select:V2DI |
4211 (vec_concat:V4DI | 4643 (vec_concat:V4DI |
4212 (match_operand:V2DI 1 "register_operand" "x") | 4644 (match_operand:V2DI 1 "register_operand" "x") |
4213 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) | 4645 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) |
4217 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" | 4649 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" |
4218 [(set_attr "type" "sselog") | 4650 [(set_attr "type" "sselog") |
4219 (set_attr "prefix" "vex") | 4651 (set_attr "prefix" "vex") |
4220 (set_attr "mode" "TI")]) | 4652 (set_attr "mode" "TI")]) |
4221 | 4653 |
4222 (define_insn "sse2_punpcklqdq" | 4654 (define_insn "vec_interleave_lowv2di" |
4223 [(set (match_operand:V2DI 0 "register_operand" "=x") | 4655 [(set (match_operand:V2DI 0 "register_operand" "=x") |
4224 (vec_select:V2DI | 4656 (vec_select:V2DI |
4225 (vec_concat:V4DI | 4657 (vec_concat:V4DI |
4226 (match_operand:V2DI 1 "register_operand" "0") | 4658 (match_operand:V2DI 1 "register_operand" "0") |
4227 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) | 4659 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) |
4249 operands[3] = GEN_INT (mask); | 4681 operands[3] = GEN_INT (mask); |
4250 | 4682 |
4251 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 4683 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
4252 } | 4684 } |
4253 [(set_attr "type" "sselog") | 4685 [(set_attr "type" "sselog") |
4686 (set_attr "length_immediate" "1") | |
4254 (set_attr "prefix" "vex") | 4687 (set_attr "prefix" "vex") |
4255 (set_attr "mode" "V2DF")]) | 4688 (set_attr "mode" "V2DF")]) |
4256 | 4689 |
4257 (define_insn "sse2_shufpd_<mode>" | 4690 (define_insn "sse2_shufpd_<mode>" |
4258 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x") | 4691 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x") |
4270 operands[3] = GEN_INT (mask); | 4703 operands[3] = GEN_INT (mask); |
4271 | 4704 |
4272 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; | 4705 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; |
4273 } | 4706 } |
4274 [(set_attr "type" "sselog") | 4707 [(set_attr "type" "sselog") |
4708 (set_attr "length_immediate" "1") | |
4275 (set_attr "mode" "V2DF")]) | 4709 (set_attr "mode" "V2DF")]) |
4276 | 4710 |
4277 ;; Avoid combining registers from different units in a single alternative, | 4711 ;; Avoid combining registers from different units in a single alternative, |
4278 ;; see comment above inline_secondary_memory_needed function in i386.c | 4712 ;; see comment above inline_secondary_memory_needed function in i386.c |
4279 (define_insn "*avx_storehpd" | 4713 (define_insn "*avx_storehpd" |
4303 unpckhpd\t%0, %0 | 4737 unpckhpd\t%0, %0 |
4304 # | 4738 # |
4305 # | 4739 # |
4306 #" | 4740 #" |
4307 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov") | 4741 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov") |
4742 (set_attr "prefix_data16" "1,*,*,*,*") | |
4308 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")]) | 4743 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")]) |
4309 | 4744 |
4310 (define_split | 4745 (define_split |
4311 [(set (match_operand:DF 0 "register_operand" "") | 4746 [(set (match_operand:DF 0 "register_operand" "") |
4312 (vec_select:DF | 4747 (vec_select:DF |
4331 # | 4766 # |
4332 # | 4767 # |
4333 # | 4768 # |
4334 #" | 4769 #" |
4335 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") | 4770 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") |
4771 (set_attr "prefix_data16" "1,*,*,*,*") | |
4336 (set_attr "prefix" "maybe_vex") | 4772 (set_attr "prefix" "maybe_vex") |
4337 (set_attr "mode" "V1DF,DF,DF,DF,DF")]) | 4773 (set_attr "mode" "V1DF,DF,DF,DF,DF")]) |
4338 | 4774 |
4339 (define_split | 4775 (define_split |
4340 [(set (match_operand:DF 0 "register_operand" "") | 4776 [(set (match_operand:DF 0 "register_operand" "") |
4397 shufpd\t{$1, %1, %0|%0, %1, 1} | 4833 shufpd\t{$1, %1, %0|%0, %1, 1} |
4398 # | 4834 # |
4399 # | 4835 # |
4400 #" | 4836 #" |
4401 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov") | 4837 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov") |
4838 (set_attr "prefix_data16" "1,*,*,*,*,*") | |
4839 (set_attr "length_immediate" "*,*,1,*,*,*") | |
4402 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")]) | 4840 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")]) |
4403 | 4841 |
4404 (define_split | 4842 (define_split |
4405 [(set (match_operand:V2DF 0 "memory_operand" "") | 4843 [(set (match_operand:V2DF 0 "memory_operand" "") |
4406 (vec_concat:V2DF | 4844 (vec_concat:V2DF |
4460 movhpd\t{%H1, %0|%0, %H1} | 4898 movhpd\t{%H1, %0|%0, %H1} |
4461 # | 4899 # |
4462 # | 4900 # |
4463 #" | 4901 #" |
4464 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov") | 4902 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov") |
4903 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*") | |
4904 (set_attr "length_immediate" "*,*,*,1,*,*,*,*") | |
4465 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")]) | 4905 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")]) |
4466 | 4906 |
4467 (define_split | 4907 (define_split |
4468 [(set (match_operand:V2DF 0 "memory_operand" "") | 4908 [(set (match_operand:V2DF 0 "memory_operand" "") |
4469 (vec_concat:V2DF | 4909 (vec_concat:V2DF |
4535 movlpd\t{%2, %0|%0, %2} | 4975 movlpd\t{%2, %0|%0, %2} |
4536 shufpd\t{$2, %2, %0|%0, %2, 2} | 4976 shufpd\t{$2, %2, %0|%0, %2, 2} |
4537 movhps\t{%H1, %0|%0, %H1} | 4977 movhps\t{%H1, %0|%0, %H1} |
4538 movhps\t{%1, %H0|%H0, %1}" | 4978 movhps\t{%1, %H0|%H0, %1}" |
4539 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") | 4979 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") |
4980 (set_attr "prefix_data16" "*,1,1,*,*,*") | |
4981 (set_attr "length_immediate" "*,*,*,1,*,*") | |
4540 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) | 4982 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) |
4541 | 4983 |
4542 (define_insn "*vec_dupv2df_sse3" | 4984 (define_insn "*vec_dupv2df_sse3" |
4543 [(set (match_operand:V2DF 0 "register_operand" "=x") | 4985 [(set (match_operand:V2DF 0 "register_operand" "=x") |
4544 (vec_duplicate:V2DF | 4986 (vec_duplicate:V2DF |
4594 movhpd\t{%2, %0|%0, %2} | 5036 movhpd\t{%2, %0|%0, %2} |
4595 movsd\t{%1, %0|%0, %1} | 5037 movsd\t{%1, %0|%0, %1} |
4596 movlhps\t{%2, %0|%0, %2} | 5038 movlhps\t{%2, %0|%0, %2} |
4597 movhps\t{%2, %0|%0, %2}" | 5039 movhps\t{%2, %0|%0, %2}" |
4598 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") | 5040 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") |
5041 (set_attr "prefix_data16" "*,1,*,*,*") | |
4599 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) | 5042 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) |
4600 | 5043 |
4601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 5044 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
4602 ;; | 5045 ;; |
4603 ;; Parallel integral arithmetic | 5046 ;; Parallel integral arithmetic |
4675 (define_insn_and_split "mulv16qi3" | 5118 (define_insn_and_split "mulv16qi3" |
4676 [(set (match_operand:V16QI 0 "register_operand" "") | 5119 [(set (match_operand:V16QI 0 "register_operand" "") |
4677 (mult:V16QI (match_operand:V16QI 1 "register_operand" "") | 5120 (mult:V16QI (match_operand:V16QI 1 "register_operand" "") |
4678 (match_operand:V16QI 2 "register_operand" "")))] | 5121 (match_operand:V16QI 2 "register_operand" "")))] |
4679 "TARGET_SSE2 | 5122 "TARGET_SSE2 |
4680 && !(reload_completed || reload_in_progress)" | 5123 && can_create_pseudo_p ()" |
4681 "#" | 5124 "#" |
4682 "&& 1" | 5125 "&& 1" |
4683 [(const_int 0)] | 5126 [(const_int 0)] |
4684 { | 5127 { |
4685 rtx t[12], op0, op[3]; | 5128 rtx t[6]; |
4686 int i; | 5129 int i; |
4687 | 5130 |
4688 if (TARGET_SSE5) | 5131 for (i = 0; i < 6; ++i) |
4689 { | |
4690 /* On SSE5, we can take advantage of the pperm instruction to pack and | |
4691 unpack the bytes. Unpack data such that we've got a source byte in | |
4692 each low byte of each word. We don't care what goes into the high | |
4693 byte, so put 0 there. */ | |
4694 for (i = 0; i < 6; ++i) | |
4695 t[i] = gen_reg_rtx (V8HImode); | |
4696 | |
4697 for (i = 0; i < 2; i++) | |
4698 { | |
4699 op[0] = t[i]; | |
4700 op[1] = operands[i+1]; | |
4701 ix86_expand_sse5_unpack (op, true, true); /* high bytes */ | |
4702 | |
4703 op[0] = t[i+2]; | |
4704 ix86_expand_sse5_unpack (op, true, false); /* low bytes */ | |
4705 } | |
4706 | |
4707 /* Multiply words. */ | |
4708 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */ | |
4709 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */ | |
4710 | |
4711 /* Pack the low byte of each word back into a single xmm */ | |
4712 op[0] = operands[0]; | |
4713 op[1] = t[5]; | |
4714 op[2] = t[4]; | |
4715 ix86_expand_sse5_pack (op); | |
4716 DONE; | |
4717 } | |
4718 | |
4719 for (i = 0; i < 12; ++i) | |
4720 t[i] = gen_reg_rtx (V16QImode); | 5132 t[i] = gen_reg_rtx (V16QImode); |
4721 | 5133 |
4722 /* Unpack data such that we've got a source byte in each low byte of | 5134 /* Unpack data such that we've got a source byte in each low byte of |
4723 each word. We don't care what goes into the high byte of each word. | 5135 each word. We don't care what goes into the high byte of each word. |
4724 Rather than trying to get zero in there, most convenient is to let | 5136 Rather than trying to get zero in there, most convenient is to let |
4725 it be a copy of the low byte. */ | 5137 it be a copy of the low byte. */ |
4726 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); | 5138 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1])); |
4727 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); | 5139 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2])); |
4728 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); | 5140 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1])); |
4729 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); | 5141 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2])); |
4730 | 5142 |
4731 /* Multiply words. The end-of-line annotations here give a picture of what | 5143 /* Multiply words. The end-of-line annotations here give a picture of what |
4732 the output of that instruction looks like. Dot means don't care; the | 5144 the output of that instruction looks like. Dot means don't care; the |
4733 letters are the bytes of the result with A being the most significant. */ | 5145 letters are the bytes of the result with A being the most significant. */ |
4734 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ | 5146 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ |
4736 gen_lowpart (V8HImode, t[1]))); | 5148 gen_lowpart (V8HImode, t[1]))); |
4737 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ | 5149 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ |
4738 gen_lowpart (V8HImode, t[2]), | 5150 gen_lowpart (V8HImode, t[2]), |
4739 gen_lowpart (V8HImode, t[3]))); | 5151 gen_lowpart (V8HImode, t[3]))); |
4740 | 5152 |
4741 /* Extract the relevant bytes and merge them back together. */ | 5153 /* Extract the even bytes and merge them back together. */ |
4742 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ | 5154 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0); |
4743 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */ | |
4744 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */ | |
4745 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */ | |
4746 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */ | |
4747 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */ | |
4748 | |
4749 op0 = operands[0]; | |
4750 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */ | |
4751 DONE; | 5155 DONE; |
4752 }) | 5156 }) |
4753 | 5157 |
4754 (define_expand "mulv8hi3" | 5158 (define_expand "mulv8hi3" |
4755 [(set (match_operand:V8HI 0 "register_operand" "") | 5159 [(set (match_operand:V8HI 0 "register_operand" "") |
4942 (match_operand:V4SI 2 "nonimmediate_operand" "xm") | 5346 (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
4943 (parallel [(const_int 0) (const_int 2)])))))] | 5347 (parallel [(const_int 0) (const_int 2)])))))] |
4944 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" | 5348 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" |
4945 "vpmuldq\t{%2, %1, %0|%0, %1, %2}" | 5349 "vpmuldq\t{%2, %1, %0|%0, %1, %2}" |
4946 [(set_attr "type" "sseimul") | 5350 [(set_attr "type" "sseimul") |
5351 (set_attr "prefix_extra" "1") | |
4947 (set_attr "prefix" "vex") | 5352 (set_attr "prefix" "vex") |
4948 (set_attr "mode" "TI")]) | 5353 (set_attr "mode" "TI")]) |
4949 | 5354 |
4950 (define_insn "*sse4_1_mulv2siv2di3" | 5355 (define_insn "*sse4_1_mulv2siv2di3" |
4951 [(set (match_operand:V2DI 0 "register_operand" "=x") | 5356 [(set (match_operand:V2DI 0 "register_operand" "=x") |
5067 (const_int 5) | 5472 (const_int 5) |
5068 (const_int 7)]))))))] | 5473 (const_int 7)]))))))] |
5069 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" | 5474 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" |
5070 "pmaddwd\t{%2, %0|%0, %2}" | 5475 "pmaddwd\t{%2, %0|%0, %2}" |
5071 [(set_attr "type" "sseiadd") | 5476 [(set_attr "type" "sseiadd") |
5477 (set_attr "atom_unit" "simul") | |
5072 (set_attr "prefix_data16" "1") | 5478 (set_attr "prefix_data16" "1") |
5073 (set_attr "mode" "TI")]) | 5479 (set_attr "mode" "TI")]) |
5074 | 5480 |
5075 (define_expand "mulv4si3" | 5481 (define_expand "mulv4si3" |
5076 [(set (match_operand:V4SI 0 "register_operand" "") | 5482 [(set (match_operand:V4SI 0 "register_operand" "") |
5077 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") | 5483 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") |
5078 (match_operand:V4SI 2 "register_operand" "")))] | 5484 (match_operand:V4SI 2 "register_operand" "")))] |
5079 "TARGET_SSE2" | 5485 "TARGET_SSE2" |
5080 { | 5486 { |
5081 if (TARGET_SSE4_1 || TARGET_SSE5) | 5487 if (TARGET_SSE4_1 || TARGET_AVX) |
5082 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); | 5488 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); |
5083 }) | 5489 }) |
5084 | 5490 |
5085 (define_insn "*avx_mulv4si3" | 5491 (define_insn "*avx_mulv4si3" |
5086 [(set (match_operand:V4SI 0 "register_operand" "=x") | 5492 [(set (match_operand:V4SI 0 "register_operand" "=x") |
5087 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") | 5493 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
5088 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] | 5494 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] |
5089 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" | 5495 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" |
5090 "vpmulld\t{%2, %1, %0|%0, %1, %2}" | 5496 "vpmulld\t{%2, %1, %0|%0, %1, %2}" |
5091 [(set_attr "type" "sseimul") | 5497 [(set_attr "type" "sseimul") |
5498 (set_attr "prefix_extra" "1") | |
5092 (set_attr "prefix" "vex") | 5499 (set_attr "prefix" "vex") |
5093 (set_attr "mode" "TI")]) | 5500 (set_attr "mode" "TI")]) |
5094 | 5501 |
5095 (define_insn "*sse4_1_mulv4si3" | 5502 (define_insn "*sse4_1_mulv4si3" |
5096 [(set (match_operand:V4SI 0 "register_operand" "=x") | 5503 [(set (match_operand:V4SI 0 "register_operand" "=x") |
5100 "pmulld\t{%2, %0|%0, %2}" | 5507 "pmulld\t{%2, %0|%0, %2}" |
5101 [(set_attr "type" "sseimul") | 5508 [(set_attr "type" "sseimul") |
5102 (set_attr "prefix_extra" "1") | 5509 (set_attr "prefix_extra" "1") |
5103 (set_attr "mode" "TI")]) | 5510 (set_attr "mode" "TI")]) |
5104 | 5511 |
5105 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a | |
5106 ;; multiply/add. In general, we expect the define_split to occur before | |
5107 ;; register allocation, so we have to handle the corner case where the target | |
5108 ;; is the same as one of the inputs. | |
5109 (define_insn_and_split "*sse5_mulv4si3" | |
5110 [(set (match_operand:V4SI 0 "register_operand" "=&x") | |
5111 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x") | |
5112 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] | |
5113 "TARGET_SSE5" | |
5114 "#" | |
5115 "&& (reload_completed | |
5116 || (!reg_mentioned_p (operands[0], operands[1]) | |
5117 && !reg_mentioned_p (operands[0], operands[2])))" | |
5118 [(set (match_dup 0) | |
5119 (match_dup 3)) | |
5120 (set (match_dup 0) | |
5121 (plus:V4SI (mult:V4SI (match_dup 1) | |
5122 (match_dup 2)) | |
5123 (match_dup 0)))] | |
5124 { | |
5125 operands[3] = CONST0_RTX (V4SImode); | |
5126 } | |
5127 [(set_attr "type" "ssemuladd") | |
5128 (set_attr "mode" "TI")]) | |
5129 | |
5130 (define_insn_and_split "*sse2_mulv4si3" | 5512 (define_insn_and_split "*sse2_mulv4si3" |
5131 [(set (match_operand:V4SI 0 "register_operand" "") | 5513 [(set (match_operand:V4SI 0 "register_operand" "") |
5132 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") | 5514 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") |
5133 (match_operand:V4SI 2 "register_operand" "")))] | 5515 (match_operand:V4SI 2 "register_operand" "")))] |
5134 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5 | 5516 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX |
5135 && !(reload_completed || reload_in_progress)" | 5517 && can_create_pseudo_p ()" |
5136 "#" | 5518 "#" |
5137 "&& 1" | 5519 "&& 1" |
5138 [(const_int 0)] | 5520 [(const_int 0)] |
5139 { | 5521 { |
5140 rtx t1, t2, t3, t4, t5, t6, thirtytwo; | 5522 rtx t1, t2, t3, t4, t5, t6, thirtytwo; |
5156 op1, op2)); | 5538 op1, op2)); |
5157 | 5539 |
5158 /* Shift both input vectors down one element, so that elements 3 | 5540 /* Shift both input vectors down one element, so that elements 3 |
5159 and 1 are now in the slots for elements 2 and 0. For K8, at | 5541 and 1 are now in the slots for elements 2 and 0. For K8, at |
5160 least, this is faster than using a shuffle. */ | 5542 least, this is faster than using a shuffle. */ |
5161 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), | 5543 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), |
5162 gen_lowpart (TImode, op1), | 5544 gen_lowpart (V1TImode, op1), |
5163 thirtytwo)); | 5545 thirtytwo)); |
5164 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), | 5546 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), |
5165 gen_lowpart (TImode, op2), | 5547 gen_lowpart (V1TImode, op2), |
5166 thirtytwo)); | 5548 thirtytwo)); |
5167 /* Multiply elements 3 and 1. */ | 5549 /* Multiply elements 3 and 1. */ |
5168 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), | 5550 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), |
5169 t2, t3)); | 5551 t2, t3)); |
5170 | 5552 |
5171 /* Move the results in element 2 down to element 1; we don't care | 5553 /* Move the results in element 2 down to element 1; we don't care |
5174 const0_rtx, const0_rtx)); | 5556 const0_rtx, const0_rtx)); |
5175 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, | 5557 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, |
5176 const0_rtx, const0_rtx)); | 5558 const0_rtx, const0_rtx)); |
5177 | 5559 |
5178 /* Merge the parts back together. */ | 5560 /* Merge the parts back together. */ |
5179 emit_insn (gen_sse2_punpckldq (op0, t5, t6)); | 5561 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6)); |
5180 DONE; | 5562 DONE; |
5181 }) | 5563 }) |
5182 | 5564 |
5183 (define_insn_and_split "mulv2di3" | 5565 (define_insn_and_split "mulv2di3" |
5184 [(set (match_operand:V2DI 0 "register_operand" "") | 5566 [(set (match_operand:V2DI 0 "register_operand" "") |
5185 (mult:V2DI (match_operand:V2DI 1 "register_operand" "") | 5567 (mult:V2DI (match_operand:V2DI 1 "register_operand" "") |
5186 (match_operand:V2DI 2 "register_operand" "")))] | 5568 (match_operand:V2DI 2 "register_operand" "")))] |
5187 "TARGET_SSE2 | 5569 "TARGET_SSE2 |
5188 && !(reload_completed || reload_in_progress)" | 5570 && can_create_pseudo_p ()" |
5189 "#" | 5571 "#" |
5190 "&& 1" | 5572 "&& 1" |
5191 [(const_int 0)] | 5573 [(const_int 0)] |
5192 { | 5574 { |
5193 rtx t1, t2, t3, t4, t5, t6, thirtytwo; | 5575 rtx t1, t2, t3, t4, t5, t6, thirtytwo; |
5194 rtx op0, op1, op2; | 5576 rtx op0, op1, op2; |
5195 | 5577 |
5196 if (TARGET_SSE5) | 5578 op0 = operands[0]; |
5579 op1 = operands[1]; | |
5580 op2 = operands[2]; | |
5581 | |
5582 if (TARGET_XOP) | |
5197 { | 5583 { |
5198 /* op1: A,B,C,D, op2: E,F,G,H */ | 5584 /* op1: A,B,C,D, op2: E,F,G,H */ |
5199 op0 = operands[0]; | 5585 op1 = gen_lowpart (V4SImode, op1); |
5200 op1 = gen_lowpart (V4SImode, operands[1]); | 5586 op2 = gen_lowpart (V4SImode, op2); |
5201 op2 = gen_lowpart (V4SImode, operands[2]); | 5587 |
5202 t1 = gen_reg_rtx (V4SImode); | 5588 t1 = gen_reg_rtx (V4SImode); |
5203 t2 = gen_reg_rtx (V4SImode); | 5589 t2 = gen_reg_rtx (V4SImode); |
5204 t3 = gen_reg_rtx (V4SImode); | 5590 t3 = gen_reg_rtx (V2DImode); |
5205 t4 = gen_reg_rtx (V2DImode); | 5591 t4 = gen_reg_rtx (V2DImode); |
5206 t5 = gen_reg_rtx (V2DImode); | |
5207 | 5592 |
5208 /* t1: B,A,D,C */ | 5593 /* t1: B,A,D,C */ |
5209 emit_insn (gen_sse2_pshufd_1 (t1, op1, | 5594 emit_insn (gen_sse2_pshufd_1 (t1, op1, |
5210 GEN_INT (1), | 5595 GEN_INT (1), |
5211 GEN_INT (0), | 5596 GEN_INT (0), |
5212 GEN_INT (3), | 5597 GEN_INT (3), |
5213 GEN_INT (2))); | 5598 GEN_INT (2))); |
5214 | 5599 |
5215 /* t2: 0 */ | 5600 /* t2: (B*E),(A*F),(D*G),(C*H) */ |
5216 emit_move_insn (t2, CONST0_RTX (V4SImode)); | 5601 emit_insn (gen_mulv4si3 (t2, t1, op2)); |
5217 | |
5218 /* t3: (B*E),(A*F),(D*G),(C*H) */ | |
5219 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2)); | |
5220 | 5602 |
5221 /* t4: (B*E)+(A*F), (D*G)+(C*H) */ | 5603 /* t4: (B*E)+(A*F), (D*G)+(C*H) */ |
5222 emit_insn (gen_sse5_phadddq (t4, t3)); | 5604 emit_insn (gen_xop_phadddq (t3, t2)); |
5223 | 5605 |
5224 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ | 5606 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ |
5225 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32))); | 5607 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32))); |
5226 | 5608 |
5227 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ | 5609 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ |
5228 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5)); | 5610 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4)); |
5229 DONE; | |
5230 } | 5611 } |
5231 | 5612 else |
5232 op0 = operands[0]; | 5613 { |
5233 op1 = operands[1]; | 5614 t1 = gen_reg_rtx (V2DImode); |
5234 op2 = operands[2]; | 5615 t2 = gen_reg_rtx (V2DImode); |
5235 t1 = gen_reg_rtx (V2DImode); | 5616 t3 = gen_reg_rtx (V2DImode); |
5236 t2 = gen_reg_rtx (V2DImode); | 5617 t4 = gen_reg_rtx (V2DImode); |
5237 t3 = gen_reg_rtx (V2DImode); | 5618 t5 = gen_reg_rtx (V2DImode); |
5238 t4 = gen_reg_rtx (V2DImode); | 5619 t6 = gen_reg_rtx (V2DImode); |
5239 t5 = gen_reg_rtx (V2DImode); | 5620 thirtytwo = GEN_INT (32); |
5240 t6 = gen_reg_rtx (V2DImode); | 5621 |
5241 thirtytwo = GEN_INT (32); | 5622 /* Multiply low parts. */ |
5242 | 5623 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), |
5243 /* Multiply low parts. */ | 5624 gen_lowpart (V4SImode, op2))); |
5244 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), | 5625 |
5245 gen_lowpart (V4SImode, op2))); | 5626 /* Shift input vectors left 32 bits so we can multiply high parts. */ |
5246 | 5627 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); |
5247 /* Shift input vectors left 32 bits so we can multiply high parts. */ | 5628 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); |
5248 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); | 5629 |
5249 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); | 5630 /* Multiply high parts by low parts. */ |
5250 | 5631 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), |
5251 /* Multiply high parts by low parts. */ | 5632 gen_lowpart (V4SImode, t3))); |
5252 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), | 5633 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), |
5253 gen_lowpart (V4SImode, t3))); | 5634 gen_lowpart (V4SImode, t2))); |
5254 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), | 5635 |
5255 gen_lowpart (V4SImode, t2))); | 5636 /* Shift them back. */ |
5256 | 5637 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); |
5257 /* Shift them back. */ | 5638 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); |
5258 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); | 5639 |
5259 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); | 5640 /* Add the three parts together. */ |
5260 | 5641 emit_insn (gen_addv2di3 (t6, t1, t4)); |
5261 /* Add the three parts together. */ | 5642 emit_insn (gen_addv2di3 (op0, t6, t5)); |
5262 emit_insn (gen_addv2di3 (t6, t1, t4)); | 5643 } |
5263 emit_insn (gen_addv2di3 (op0, t6, t5)); | |
5264 DONE; | 5644 DONE; |
5265 }) | 5645 }) |
5266 | 5646 |
5267 (define_expand "vec_widen_smult_hi_v8hi" | 5647 (define_expand "vec_widen_smult_hi_v8hi" |
5268 [(match_operand:V4SI 0 "register_operand" "") | 5648 [(match_operand:V4SI 0 "register_operand" "") |
5346 | 5726 |
5347 (define_expand "vec_widen_smult_hi_v4si" | 5727 (define_expand "vec_widen_smult_hi_v4si" |
5348 [(match_operand:V2DI 0 "register_operand" "") | 5728 [(match_operand:V2DI 0 "register_operand" "") |
5349 (match_operand:V4SI 1 "register_operand" "") | 5729 (match_operand:V4SI 1 "register_operand" "") |
5350 (match_operand:V4SI 2 "register_operand" "")] | 5730 (match_operand:V4SI 2 "register_operand" "")] |
5351 "TARGET_SSE5" | 5731 "TARGET_XOP" |
5352 { | 5732 { |
5353 rtx t1, t2; | 5733 rtx t1, t2; |
5354 | 5734 |
5355 t1 = gen_reg_rtx (V4SImode); | 5735 t1 = gen_reg_rtx (V4SImode); |
5356 t2 = gen_reg_rtx (V4SImode); | 5736 t2 = gen_reg_rtx (V4SImode); |
5363 emit_insn (gen_sse2_pshufd_1 (t2, operands[2], | 5743 emit_insn (gen_sse2_pshufd_1 (t2, operands[2], |
5364 GEN_INT (0), | 5744 GEN_INT (0), |
5365 GEN_INT (2), | 5745 GEN_INT (2), |
5366 GEN_INT (1), | 5746 GEN_INT (1), |
5367 GEN_INT (3))); | 5747 GEN_INT (3))); |
5368 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2)); | 5748 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2)); |
5369 DONE; | 5749 DONE; |
5370 }) | 5750 }) |
5371 | 5751 |
5372 (define_expand "vec_widen_smult_lo_v4si" | 5752 (define_expand "vec_widen_smult_lo_v4si" |
5373 [(match_operand:V2DI 0 "register_operand" "") | 5753 [(match_operand:V2DI 0 "register_operand" "") |
5374 (match_operand:V4SI 1 "register_operand" "") | 5754 (match_operand:V4SI 1 "register_operand" "") |
5375 (match_operand:V4SI 2 "register_operand" "")] | 5755 (match_operand:V4SI 2 "register_operand" "")] |
5376 "TARGET_SSE5" | 5756 "TARGET_XOP" |
5377 { | 5757 { |
5378 rtx t1, t2; | 5758 rtx t1, t2; |
5379 | 5759 |
5380 t1 = gen_reg_rtx (V4SImode); | 5760 t1 = gen_reg_rtx (V4SImode); |
5381 t2 = gen_reg_rtx (V4SImode); | 5761 t2 = gen_reg_rtx (V4SImode); |
5388 emit_insn (gen_sse2_pshufd_1 (t2, operands[2], | 5768 emit_insn (gen_sse2_pshufd_1 (t2, operands[2], |
5389 GEN_INT (0), | 5769 GEN_INT (0), |
5390 GEN_INT (2), | 5770 GEN_INT (2), |
5391 GEN_INT (1), | 5771 GEN_INT (1), |
5392 GEN_INT (3))); | 5772 GEN_INT (3))); |
5393 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2)); | 5773 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2)); |
5394 DONE; | |
5395 DONE; | 5774 DONE; |
5396 }) | 5775 }) |
5397 | 5776 |
5398 (define_expand "vec_widen_umult_hi_v4si" | 5777 (define_expand "vec_widen_umult_hi_v4si" |
5399 [(match_operand:V2DI 0 "register_operand" "") | 5778 [(match_operand:V2DI 0 "register_operand" "") |
5459 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); | 5838 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); |
5460 emit_insn (gen_addv2di3 (t1, t1, operands[3])); | 5839 emit_insn (gen_addv2di3 (t1, t1, operands[3])); |
5461 | 5840 |
5462 t2 = gen_reg_rtx (V4SImode); | 5841 t2 = gen_reg_rtx (V4SImode); |
5463 t3 = gen_reg_rtx (V4SImode); | 5842 t3 = gen_reg_rtx (V4SImode); |
5464 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), | 5843 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2), |
5465 gen_lowpart (TImode, operands[1]), | 5844 gen_lowpart (V1TImode, operands[1]), |
5466 GEN_INT (32))); | 5845 GEN_INT (32))); |
5467 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), | 5846 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3), |
5468 gen_lowpart (TImode, operands[2]), | 5847 gen_lowpart (V1TImode, operands[2]), |
5469 GEN_INT (32))); | 5848 GEN_INT (32))); |
5470 | 5849 |
5471 t4 = gen_reg_rtx (V2DImode); | 5850 t4 = gen_reg_rtx (V2DImode); |
5472 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); | 5851 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); |
5473 | 5852 |
5474 emit_insn (gen_addv2di3 (operands[0], t1, t4)); | 5853 emit_insn (gen_addv2di3 (operands[0], t1, t4)); |
5482 (match_operand:SI 2 "nonmemory_operand" "xN")))] | 5861 (match_operand:SI 2 "nonmemory_operand" "xN")))] |
5483 "TARGET_AVX" | 5862 "TARGET_AVX" |
5484 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 5863 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
5485 [(set_attr "type" "sseishft") | 5864 [(set_attr "type" "sseishft") |
5486 (set_attr "prefix" "vex") | 5865 (set_attr "prefix" "vex") |
5866 (set (attr "length_immediate") | |
5867 (if_then_else (match_operand 2 "const_int_operand" "") | |
5868 (const_string "1") | |
5869 (const_string "0"))) | |
5487 (set_attr "mode" "TI")]) | 5870 (set_attr "mode" "TI")]) |
5488 | 5871 |
5489 (define_insn "ashr<mode>3" | 5872 (define_insn "ashr<mode>3" |
5490 [(set (match_operand:SSEMODE24 0 "register_operand" "=x") | 5873 [(set (match_operand:SSEMODE24 0 "register_operand" "=x") |
5491 (ashiftrt:SSEMODE24 | 5874 (ashiftrt:SSEMODE24 |
5493 (match_operand:SI 2 "nonmemory_operand" "xN")))] | 5876 (match_operand:SI 2 "nonmemory_operand" "xN")))] |
5494 "TARGET_SSE2" | 5877 "TARGET_SSE2" |
5495 "psra<ssevecsize>\t{%2, %0|%0, %2}" | 5878 "psra<ssevecsize>\t{%2, %0|%0, %2}" |
5496 [(set_attr "type" "sseishft") | 5879 [(set_attr "type" "sseishft") |
5497 (set_attr "prefix_data16" "1") | 5880 (set_attr "prefix_data16" "1") |
5881 (set (attr "length_immediate") | |
5882 (if_then_else (match_operand 2 "const_int_operand" "") | |
5883 (const_string "1") | |
5884 (const_string "0"))) | |
5885 (set_attr "mode" "TI")]) | |
5886 | |
5887 (define_insn "*avx_lshrv1ti3" | |
5888 [(set (match_operand:V1TI 0 "register_operand" "=x") | |
5889 (lshiftrt:V1TI | |
5890 (match_operand:V1TI 1 "register_operand" "x") | |
5891 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] | |
5892 "TARGET_AVX" | |
5893 { | |
5894 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
5895 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}"; | |
5896 } | |
5897 [(set_attr "type" "sseishft") | |
5898 (set_attr "prefix" "vex") | |
5899 (set_attr "length_immediate" "1") | |
5498 (set_attr "mode" "TI")]) | 5900 (set_attr "mode" "TI")]) |
5499 | 5901 |
5500 (define_insn "*avx_lshr<mode>3" | 5902 (define_insn "*avx_lshr<mode>3" |
5501 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") | 5903 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") |
5502 (lshiftrt:SSEMODE248 | 5904 (lshiftrt:SSEMODE248 |
5504 (match_operand:SI 2 "nonmemory_operand" "xN")))] | 5906 (match_operand:SI 2 "nonmemory_operand" "xN")))] |
5505 "TARGET_AVX" | 5907 "TARGET_AVX" |
5506 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 5908 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
5507 [(set_attr "type" "sseishft") | 5909 [(set_attr "type" "sseishft") |
5508 (set_attr "prefix" "vex") | 5910 (set_attr "prefix" "vex") |
5911 (set (attr "length_immediate") | |
5912 (if_then_else (match_operand 2 "const_int_operand" "") | |
5913 (const_string "1") | |
5914 (const_string "0"))) | |
5915 (set_attr "mode" "TI")]) | |
5916 | |
5917 (define_insn "sse2_lshrv1ti3" | |
5918 [(set (match_operand:V1TI 0 "register_operand" "=x") | |
5919 (lshiftrt:V1TI | |
5920 (match_operand:V1TI 1 "register_operand" "0") | |
5921 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] | |
5922 "TARGET_SSE2" | |
5923 { | |
5924 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
5925 return "psrldq\t{%2, %0|%0, %2}"; | |
5926 } | |
5927 [(set_attr "type" "sseishft") | |
5928 (set_attr "prefix_data16" "1") | |
5929 (set_attr "length_immediate" "1") | |
5509 (set_attr "mode" "TI")]) | 5930 (set_attr "mode" "TI")]) |
5510 | 5931 |
5511 (define_insn "lshr<mode>3" | 5932 (define_insn "lshr<mode>3" |
5512 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") | 5933 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") |
5513 (lshiftrt:SSEMODE248 | 5934 (lshiftrt:SSEMODE248 |
5515 (match_operand:SI 2 "nonmemory_operand" "xN")))] | 5936 (match_operand:SI 2 "nonmemory_operand" "xN")))] |
5516 "TARGET_SSE2" | 5937 "TARGET_SSE2" |
5517 "psrl<ssevecsize>\t{%2, %0|%0, %2}" | 5938 "psrl<ssevecsize>\t{%2, %0|%0, %2}" |
5518 [(set_attr "type" "sseishft") | 5939 [(set_attr "type" "sseishft") |
5519 (set_attr "prefix_data16" "1") | 5940 (set_attr "prefix_data16" "1") |
5941 (set (attr "length_immediate") | |
5942 (if_then_else (match_operand 2 "const_int_operand" "") | |
5943 (const_string "1") | |
5944 (const_string "0"))) | |
5945 (set_attr "mode" "TI")]) | |
5946 | |
5947 (define_insn "*avx_ashlv1ti3" | |
5948 [(set (match_operand:V1TI 0 "register_operand" "=x") | |
5949 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x") | |
5950 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] | |
5951 "TARGET_AVX" | |
5952 { | |
5953 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
5954 return "vpslldq\t{%2, %1, %0|%0, %1, %2}"; | |
5955 } | |
5956 [(set_attr "type" "sseishft") | |
5957 (set_attr "prefix" "vex") | |
5958 (set_attr "length_immediate" "1") | |
5520 (set_attr "mode" "TI")]) | 5959 (set_attr "mode" "TI")]) |
5521 | 5960 |
5522 (define_insn "*avx_ashl<mode>3" | 5961 (define_insn "*avx_ashl<mode>3" |
5523 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") | 5962 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") |
5524 (ashift:SSEMODE248 | 5963 (ashift:SSEMODE248 |
5526 (match_operand:SI 2 "nonmemory_operand" "xN")))] | 5965 (match_operand:SI 2 "nonmemory_operand" "xN")))] |
5527 "TARGET_AVX" | 5966 "TARGET_AVX" |
5528 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 5967 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
5529 [(set_attr "type" "sseishft") | 5968 [(set_attr "type" "sseishft") |
5530 (set_attr "prefix" "vex") | 5969 (set_attr "prefix" "vex") |
5970 (set (attr "length_immediate") | |
5971 (if_then_else (match_operand 2 "const_int_operand" "") | |
5972 (const_string "1") | |
5973 (const_string "0"))) | |
5974 (set_attr "mode" "TI")]) | |
5975 | |
5976 (define_insn "sse2_ashlv1ti3" | |
5977 [(set (match_operand:V1TI 0 "register_operand" "=x") | |
5978 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0") | |
5979 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))] | |
5980 "TARGET_SSE2" | |
5981 { | |
5982 operands[2] = GEN_INT (INTVAL (operands[2]) / 8); | |
5983 return "pslldq\t{%2, %0|%0, %2}"; | |
5984 } | |
5985 [(set_attr "type" "sseishft") | |
5986 (set_attr "prefix_data16" "1") | |
5987 (set_attr "length_immediate" "1") | |
5531 (set_attr "mode" "TI")]) | 5988 (set_attr "mode" "TI")]) |
5532 | 5989 |
5533 (define_insn "ashl<mode>3" | 5990 (define_insn "ashl<mode>3" |
5534 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") | 5991 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") |
5535 (ashift:SSEMODE248 | 5992 (ashift:SSEMODE248 |
5537 (match_operand:SI 2 "nonmemory_operand" "xN")))] | 5994 (match_operand:SI 2 "nonmemory_operand" "xN")))] |
5538 "TARGET_SSE2" | 5995 "TARGET_SSE2" |
5539 "psll<ssevecsize>\t{%2, %0|%0, %2}" | 5996 "psll<ssevecsize>\t{%2, %0|%0, %2}" |
5540 [(set_attr "type" "sseishft") | 5997 [(set_attr "type" "sseishft") |
5541 (set_attr "prefix_data16" "1") | 5998 (set_attr "prefix_data16" "1") |
5999 (set (attr "length_immediate") | |
6000 (if_then_else (match_operand 2 "const_int_operand" "") | |
6001 (const_string "1") | |
6002 (const_string "0"))) | |
5542 (set_attr "mode" "TI")]) | 6003 (set_attr "mode" "TI")]) |
5543 | 6004 |
5544 (define_expand "vec_shl_<mode>" | 6005 (define_expand "vec_shl_<mode>" |
5545 [(set (match_operand:SSEMODEI 0 "register_operand" "") | 6006 [(set (match_operand:SSEMODEI 0 "register_operand" "") |
5546 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") | 6007 (ashift:V1TI |
5547 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] | 6008 (match_operand:SSEMODEI 1 "register_operand" "") |
5548 "TARGET_SSE2" | 6009 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] |
5549 { | 6010 "TARGET_SSE2" |
5550 operands[0] = gen_lowpart (TImode, operands[0]); | 6011 { |
5551 operands[1] = gen_lowpart (TImode, operands[1]); | 6012 operands[0] = gen_lowpart (V1TImode, operands[0]); |
6013 operands[1] = gen_lowpart (V1TImode, operands[1]); | |
5552 }) | 6014 }) |
5553 | 6015 |
5554 (define_expand "vec_shr_<mode>" | 6016 (define_expand "vec_shr_<mode>" |
5555 [(set (match_operand:SSEMODEI 0 "register_operand" "") | 6017 [(set (match_operand:SSEMODEI 0 "register_operand" "") |
5556 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") | 6018 (lshiftrt:V1TI |
5557 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] | 6019 (match_operand:SSEMODEI 1 "register_operand" "") |
5558 "TARGET_SSE2" | 6020 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] |
5559 { | 6021 "TARGET_SSE2" |
5560 operands[0] = gen_lowpart (TImode, operands[0]); | 6022 { |
5561 operands[1] = gen_lowpart (TImode, operands[1]); | 6023 operands[0] = gen_lowpart (V1TImode, operands[0]); |
6024 operands[1] = gen_lowpart (V1TImode, operands[1]); | |
5562 }) | 6025 }) |
5563 | 6026 |
5564 (define_insn "*avx_<code><mode>3" | 6027 (define_insn "*avx_<code><mode>3" |
5565 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") | 6028 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") |
5566 (maxmin:SSEMODE124 | 6029 (maxmin:SSEMODE124 |
5567 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x") | 6030 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x") |
5568 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] | 6031 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] |
5569 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | 6032 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
5570 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 6033 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
5571 [(set_attr "type" "sseiadd") | 6034 [(set_attr "type" "sseiadd") |
6035 (set (attr "prefix_extra") | |
6036 (if_then_else | |
6037 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)") | |
6038 (const_int 0)) | |
6039 (const_string "1") | |
6040 (const_string "0"))) | |
5572 (set_attr "prefix" "vex") | 6041 (set_attr "prefix" "vex") |
5573 (set_attr "mode" "TI")]) | 6042 (set_attr "mode" "TI")]) |
5574 | 6043 |
5575 (define_expand "<code>v16qi3" | 6044 (define_expand "<code>v16qi3" |
5576 [(set (match_operand:V16QI 0 "register_operand" "") | 6045 [(set (match_operand:V16QI 0 "register_operand" "") |
5760 (define_expand "sse2_eq<mode>3" | 6229 (define_expand "sse2_eq<mode>3" |
5761 [(set (match_operand:SSEMODE124 0 "register_operand" "") | 6230 [(set (match_operand:SSEMODE124 0 "register_operand" "") |
5762 (eq:SSEMODE124 | 6231 (eq:SSEMODE124 |
5763 (match_operand:SSEMODE124 1 "nonimmediate_operand" "") | 6232 (match_operand:SSEMODE124 1 "nonimmediate_operand" "") |
5764 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))] | 6233 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))] |
5765 "TARGET_SSE2 && !TARGET_SSE5" | 6234 "TARGET_SSE2 && !TARGET_XOP " |
5766 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") | 6235 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") |
5767 | 6236 |
5768 (define_insn "*avx_eq<mode>3" | 6237 (define_insn "*avx_eq<mode>3" |
5769 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") | 6238 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") |
5770 (eq:SSEMODE1248 | 6239 (eq:SSEMODE1248 |
5771 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x") | 6240 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x") |
5772 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))] | 6241 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))] |
5773 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" | 6242 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" |
5774 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 6243 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
5775 [(set_attr "type" "ssecmp") | 6244 [(set_attr "type" "ssecmp") |
6245 (set (attr "prefix_extra") | |
6246 (if_then_else (match_operand:V2DI 0 "" "") | |
6247 (const_string "1") | |
6248 (const_string "*"))) | |
5776 (set_attr "prefix" "vex") | 6249 (set_attr "prefix" "vex") |
5777 (set_attr "mode" "TI")]) | 6250 (set_attr "mode" "TI")]) |
5778 | 6251 |
5779 (define_insn "*sse2_eq<mode>3" | 6252 (define_insn "*sse2_eq<mode>3" |
5780 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") | 6253 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") |
5781 (eq:SSEMODE124 | 6254 (eq:SSEMODE124 |
5782 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") | 6255 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") |
5783 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] | 6256 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] |
5784 "TARGET_SSE2 && !TARGET_SSE5 | 6257 "TARGET_SSE2 && !TARGET_XOP |
5785 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" | 6258 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" |
5786 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" | 6259 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" |
5787 [(set_attr "type" "ssecmp") | 6260 [(set_attr "type" "ssecmp") |
5788 (set_attr "prefix_data16" "1") | 6261 (set_attr "prefix_data16" "1") |
5789 (set_attr "mode" "TI")]) | 6262 (set_attr "mode" "TI")]) |
5813 (match_operand:SSEMODE1248 1 "register_operand" "x") | 6286 (match_operand:SSEMODE1248 1 "register_operand" "x") |
5814 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))] | 6287 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))] |
5815 "TARGET_AVX" | 6288 "TARGET_AVX" |
5816 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 6289 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
5817 [(set_attr "type" "ssecmp") | 6290 [(set_attr "type" "ssecmp") |
6291 (set (attr "prefix_extra") | |
6292 (if_then_else (match_operand:V2DI 0 "" "") | |
6293 (const_string "1") | |
6294 (const_string "*"))) | |
5818 (set_attr "prefix" "vex") | 6295 (set_attr "prefix" "vex") |
5819 (set_attr "mode" "TI")]) | 6296 (set_attr "mode" "TI")]) |
5820 | 6297 |
5821 (define_insn "sse2_gt<mode>3" | 6298 (define_insn "sse2_gt<mode>3" |
5822 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") | 6299 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") |
5823 (gt:SSEMODE124 | 6300 (gt:SSEMODE124 |
5824 (match_operand:SSEMODE124 1 "register_operand" "0") | 6301 (match_operand:SSEMODE124 1 "register_operand" "0") |
5825 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] | 6302 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] |
5826 "TARGET_SSE2 && !TARGET_SSE5" | 6303 "TARGET_SSE2 && !TARGET_XOP" |
5827 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" | 6304 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" |
5828 [(set_attr "type" "ssecmp") | 6305 [(set_attr "type" "ssecmp") |
5829 (set_attr "prefix_data16" "1") | 6306 (set_attr "prefix_data16" "1") |
5830 (set_attr "mode" "TI")]) | 6307 (set_attr "mode" "TI")]) |
5831 | 6308 |
5835 (match_operand:V2DI 1 "register_operand" "0") | 6312 (match_operand:V2DI 1 "register_operand" "0") |
5836 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] | 6313 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] |
5837 "TARGET_SSE4_2" | 6314 "TARGET_SSE4_2" |
5838 "pcmpgtq\t{%2, %0|%0, %2}" | 6315 "pcmpgtq\t{%2, %0|%0, %2}" |
5839 [(set_attr "type" "ssecmp") | 6316 [(set_attr "type" "ssecmp") |
6317 (set_attr "prefix_extra" "1") | |
5840 (set_attr "mode" "TI")]) | 6318 (set_attr "mode" "TI")]) |
5841 | 6319 |
5842 (define_expand "vcond<mode>" | 6320 (define_expand "vcond<mode>" |
5843 [(set (match_operand:SSEMODE124C8 0 "register_operand" "") | 6321 [(set (match_operand:SSEMODE124C8 0 "register_operand" "") |
5844 (if_then_else:SSEMODE124C8 | 6322 (if_then_else:SSEMODE124C8 |
5944 (set_attr "prefix_data16" "1") | 6422 (set_attr "prefix_data16" "1") |
5945 (set_attr "mode" "TI")]) | 6423 (set_attr "mode" "TI")]) |
5946 | 6424 |
5947 (define_expand "<code><mode>3" | 6425 (define_expand "<code><mode>3" |
5948 [(set (match_operand:SSEMODEI 0 "register_operand" "") | 6426 [(set (match_operand:SSEMODEI 0 "register_operand" "") |
5949 (plogic:SSEMODEI | 6427 (any_logic:SSEMODEI |
5950 (match_operand:SSEMODEI 1 "nonimmediate_operand" "") | 6428 (match_operand:SSEMODEI 1 "nonimmediate_operand" "") |
5951 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] | 6429 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] |
5952 "TARGET_SSE" | 6430 "TARGET_SSE" |
5953 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") | 6431 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") |
5954 | 6432 |
5955 (define_insn "*avx_<code><mode>3" | 6433 (define_insn "*avx_<code><mode>3" |
5956 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x") | 6434 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x") |
5957 (plogic:AVX256MODEI | 6435 (any_logic:AVX256MODEI |
5958 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x") | 6436 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x") |
5959 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))] | 6437 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))] |
5960 "TARGET_AVX | 6438 "TARGET_AVX |
5961 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | 6439 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
5962 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}" | 6440 "v<logicprefix>ps\t{%2, %1, %0|%0, %1, %2}" |
5963 [(set_attr "type" "sselog") | 6441 [(set_attr "type" "sselog") |
5964 (set_attr "prefix" "vex") | 6442 (set_attr "prefix" "vex") |
5965 (set_attr "mode" "<avxvecpsmode>")]) | 6443 (set_attr "mode" "<avxvecpsmode>")]) |
5966 | 6444 |
5967 (define_insn "*sse_<code><mode>3" | 6445 (define_insn "*sse_<code><mode>3" |
5968 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") | 6446 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") |
5969 (plogic:SSEMODEI | 6447 (any_logic:SSEMODEI |
5970 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") | 6448 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") |
5971 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] | 6449 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] |
5972 "(TARGET_SSE && !TARGET_SSE2) | 6450 "(TARGET_SSE && !TARGET_SSE2) |
5973 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | 6451 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
5974 "<plogicprefix>ps\t{%2, %0|%0, %2}" | 6452 "<logicprefix>ps\t{%2, %0|%0, %2}" |
5975 [(set_attr "type" "sselog") | 6453 [(set_attr "type" "sselog") |
5976 (set_attr "mode" "V4SF")]) | 6454 (set_attr "mode" "V4SF")]) |
5977 | 6455 |
5978 (define_insn "*avx_<code><mode>3" | 6456 (define_insn "*avx_<code><mode>3" |
5979 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") | 6457 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") |
5980 (plogic:SSEMODEI | 6458 (any_logic:SSEMODEI |
5981 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x") | 6459 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x") |
5982 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] | 6460 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] |
5983 "TARGET_AVX | 6461 "TARGET_AVX |
5984 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | 6462 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
5985 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}" | 6463 "vp<logicprefix>\t{%2, %1, %0|%0, %1, %2}" |
5986 [(set_attr "type" "sselog") | 6464 [(set_attr "type" "sselog") |
5987 (set_attr "prefix" "vex") | 6465 (set_attr "prefix" "vex") |
5988 (set_attr "mode" "TI")]) | 6466 (set_attr "mode" "TI")]) |
5989 | 6467 |
5990 (define_insn "*sse2_<code><mode>3" | 6468 (define_insn "*sse2_<code><mode>3" |
5991 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") | 6469 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") |
5992 (plogic:SSEMODEI | 6470 (any_logic:SSEMODEI |
5993 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") | 6471 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") |
5994 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] | 6472 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] |
5995 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" | 6473 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" |
5996 "p<plogicprefix>\t{%2, %0|%0, %2}" | 6474 "p<logicprefix>\t{%2, %0|%0, %2}" |
5997 [(set_attr "type" "sselog") | 6475 [(set_attr "type" "sselog") |
5998 (set_attr "prefix_data16" "1") | 6476 (set_attr "prefix_data16" "1") |
5999 (set_attr "mode" "TI")]) | 6477 (set_attr "mode" "TI")]) |
6000 | 6478 |
6001 (define_expand "<code>tf3" | 6479 (define_expand "<code>tf3" |
6002 [(set (match_operand:TF 0 "register_operand" "") | 6480 [(set (match_operand:TF 0 "register_operand" "") |
6003 (plogic:TF | 6481 (any_logic:TF |
6004 (match_operand:TF 1 "nonimmediate_operand" "") | 6482 (match_operand:TF 1 "nonimmediate_operand" "") |
6005 (match_operand:TF 2 "nonimmediate_operand" "")))] | 6483 (match_operand:TF 2 "nonimmediate_operand" "")))] |
6006 "TARGET_SSE2" | 6484 "TARGET_SSE2" |
6007 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") | 6485 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") |
6008 | 6486 |
6009 (define_insn "*<code>tf3" | 6487 (define_insn "*<code>tf3" |
6010 [(set (match_operand:TF 0 "register_operand" "=x") | 6488 [(set (match_operand:TF 0 "register_operand" "=x") |
6011 (plogic:TF | 6489 (any_logic:TF |
6012 (match_operand:TF 1 "nonimmediate_operand" "%0") | 6490 (match_operand:TF 1 "nonimmediate_operand" "%0") |
6013 (match_operand:TF 2 "nonimmediate_operand" "xm")))] | 6491 (match_operand:TF 2 "nonimmediate_operand" "xm")))] |
6014 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)" | 6492 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)" |
6015 "p<plogicprefix>\t{%2, %0|%0, %2}" | 6493 "p<logicprefix>\t{%2, %0|%0, %2}" |
6016 [(set_attr "type" "sselog") | 6494 [(set_attr "type" "sselog") |
6017 (set_attr "prefix_data16" "1") | 6495 (set_attr "prefix_data16" "1") |
6018 (set_attr "mode" "TI")]) | 6496 (set_attr "mode" "TI")]) |
6019 | 6497 |
6020 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 6498 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
6021 ;; | 6499 ;; |
6022 ;; Parallel integral element swizzling | 6500 ;; Parallel integral element swizzling |
6023 ;; | 6501 ;; |
6024 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 6502 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
6025 | 6503 |
6026 ;; Reduce: | |
6027 ;; op1 = abcdefghijklmnop | |
6028 ;; op2 = qrstuvwxyz012345 | |
6029 ;; h1 = aqbrcsdteufvgwhx | |
6030 ;; l1 = iyjzk0l1m2n3o4p5 | |
6031 ;; h2 = aiqybjrzcks0dlt1 | |
6032 ;; l2 = emu2fnv3gow4hpx5 | |
6033 ;; h3 = aeimquy2bfjnrvz3 | |
6034 ;; l3 = cgkosw04dhlptx15 | |
6035 ;; result = bdfhjlnprtvxz135 | |
6036 (define_expand "vec_pack_trunc_v8hi" | 6504 (define_expand "vec_pack_trunc_v8hi" |
6037 [(match_operand:V16QI 0 "register_operand" "") | 6505 [(match_operand:V16QI 0 "register_operand" "") |
6038 (match_operand:V8HI 1 "register_operand" "") | 6506 (match_operand:V8HI 1 "register_operand" "") |
6039 (match_operand:V8HI 2 "register_operand" "")] | 6507 (match_operand:V8HI 2 "register_operand" "")] |
6040 "TARGET_SSE2" | 6508 "TARGET_SSE2" |
6041 { | 6509 { |
6042 rtx op1, op2, h1, l1, h2, l2, h3, l3; | 6510 rtx op1 = gen_lowpart (V16QImode, operands[1]); |
6043 | 6511 rtx op2 = gen_lowpart (V16QImode, operands[2]); |
6044 if (TARGET_SSE5) | 6512 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); |
6045 { | |
6046 ix86_expand_sse5_pack (operands); | |
6047 DONE; | |
6048 } | |
6049 | |
6050 op1 = gen_lowpart (V16QImode, operands[1]); | |
6051 op2 = gen_lowpart (V16QImode, operands[2]); | |
6052 h1 = gen_reg_rtx (V16QImode); | |
6053 l1 = gen_reg_rtx (V16QImode); | |
6054 h2 = gen_reg_rtx (V16QImode); | |
6055 l2 = gen_reg_rtx (V16QImode); | |
6056 h3 = gen_reg_rtx (V16QImode); | |
6057 l3 = gen_reg_rtx (V16QImode); | |
6058 | |
6059 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2)); | |
6060 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2)); | |
6061 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1)); | |
6062 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1)); | |
6063 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2)); | |
6064 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2)); | |
6065 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3)); | |
6066 DONE; | 6513 DONE; |
6067 }) | 6514 }) |
6068 | 6515 |
6069 ;; Reduce: | |
6070 ;; op1 = abcdefgh | |
6071 ;; op2 = ijklmnop | |
6072 ;; h1 = aibjckdl | |
6073 ;; l1 = emfngohp | |
6074 ;; h2 = aeimbfjn | |
6075 ;; l2 = cgkodhlp | |
6076 ;; result = bdfhjlnp | |
6077 (define_expand "vec_pack_trunc_v4si" | 6516 (define_expand "vec_pack_trunc_v4si" |
6078 [(match_operand:V8HI 0 "register_operand" "") | 6517 [(match_operand:V8HI 0 "register_operand" "") |
6079 (match_operand:V4SI 1 "register_operand" "") | 6518 (match_operand:V4SI 1 "register_operand" "") |
6080 (match_operand:V4SI 2 "register_operand" "")] | 6519 (match_operand:V4SI 2 "register_operand" "")] |
6081 "TARGET_SSE2" | 6520 "TARGET_SSE2" |
6082 { | 6521 { |
6083 rtx op1, op2, h1, l1, h2, l2; | 6522 rtx op1 = gen_lowpart (V8HImode, operands[1]); |
6084 | 6523 rtx op2 = gen_lowpart (V8HImode, operands[2]); |
6085 if (TARGET_SSE5) | 6524 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); |
6086 { | |
6087 ix86_expand_sse5_pack (operands); | |
6088 DONE; | |
6089 } | |
6090 | |
6091 op1 = gen_lowpart (V8HImode, operands[1]); | |
6092 op2 = gen_lowpart (V8HImode, operands[2]); | |
6093 h1 = gen_reg_rtx (V8HImode); | |
6094 l1 = gen_reg_rtx (V8HImode); | |
6095 h2 = gen_reg_rtx (V8HImode); | |
6096 l2 = gen_reg_rtx (V8HImode); | |
6097 | |
6098 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2)); | |
6099 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2)); | |
6100 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1)); | |
6101 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1)); | |
6102 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2)); | |
6103 DONE; | 6525 DONE; |
6104 }) | 6526 }) |
6105 | 6527 |
6106 ;; Reduce: | |
6107 ;; op1 = abcd | |
6108 ;; op2 = efgh | |
6109 ;; h1 = aebf | |
6110 ;; l1 = cgdh | |
6111 ;; result = bdfh | |
6112 (define_expand "vec_pack_trunc_v2di" | 6528 (define_expand "vec_pack_trunc_v2di" |
6113 [(match_operand:V4SI 0 "register_operand" "") | 6529 [(match_operand:V4SI 0 "register_operand" "") |
6114 (match_operand:V2DI 1 "register_operand" "") | 6530 (match_operand:V2DI 1 "register_operand" "") |
6115 (match_operand:V2DI 2 "register_operand" "")] | 6531 (match_operand:V2DI 2 "register_operand" "")] |
6116 "TARGET_SSE2" | 6532 "TARGET_SSE2" |
6117 { | 6533 { |
6118 rtx op1, op2, h1, l1; | 6534 rtx op1 = gen_lowpart (V4SImode, operands[1]); |
6119 | 6535 rtx op2 = gen_lowpart (V4SImode, operands[2]); |
6120 if (TARGET_SSE5) | 6536 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0); |
6121 { | |
6122 ix86_expand_sse5_pack (operands); | |
6123 DONE; | |
6124 } | |
6125 | |
6126 op1 = gen_lowpart (V4SImode, operands[1]); | |
6127 op2 = gen_lowpart (V4SImode, operands[2]); | |
6128 h1 = gen_reg_rtx (V4SImode); | |
6129 l1 = gen_reg_rtx (V4SImode); | |
6130 | |
6131 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2)); | |
6132 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2)); | |
6133 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1)); | |
6134 DONE; | 6537 DONE; |
6135 }) | 6538 }) |
6136 | |
6137 (define_expand "vec_interleave_highv16qi" | |
6138 [(set (match_operand:V16QI 0 "register_operand" "") | |
6139 (vec_select:V16QI | |
6140 (vec_concat:V32QI | |
6141 (match_operand:V16QI 1 "register_operand" "") | |
6142 (match_operand:V16QI 2 "nonimmediate_operand" "")) | |
6143 (parallel [(const_int 8) (const_int 24) | |
6144 (const_int 9) (const_int 25) | |
6145 (const_int 10) (const_int 26) | |
6146 (const_int 11) (const_int 27) | |
6147 (const_int 12) (const_int 28) | |
6148 (const_int 13) (const_int 29) | |
6149 (const_int 14) (const_int 30) | |
6150 (const_int 15) (const_int 31)])))] | |
6151 "TARGET_SSE2" | |
6152 { | |
6153 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2])); | |
6154 DONE; | |
6155 }) | |
6156 | |
6157 (define_expand "vec_interleave_lowv16qi" | |
6158 [(set (match_operand:V16QI 0 "register_operand" "") | |
6159 (vec_select:V16QI | |
6160 (vec_concat:V32QI | |
6161 (match_operand:V16QI 1 "register_operand" "") | |
6162 (match_operand:V16QI 2 "nonimmediate_operand" "")) | |
6163 (parallel [(const_int 0) (const_int 16) | |
6164 (const_int 1) (const_int 17) | |
6165 (const_int 2) (const_int 18) | |
6166 (const_int 3) (const_int 19) | |
6167 (const_int 4) (const_int 20) | |
6168 (const_int 5) (const_int 21) | |
6169 (const_int 6) (const_int 22) | |
6170 (const_int 7) (const_int 23)])))] | |
6171 "TARGET_SSE2" | |
6172 { | |
6173 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2])); | |
6174 DONE; | |
6175 }) | |
6176 | |
6177 (define_expand "vec_interleave_highv8hi" | |
6178 [(set (match_operand:V8HI 0 "register_operand" "=") | |
6179 (vec_select:V8HI | |
6180 (vec_concat:V16HI | |
6181 (match_operand:V8HI 1 "register_operand" "") | |
6182 (match_operand:V8HI 2 "nonimmediate_operand" "")) | |
6183 (parallel [(const_int 4) (const_int 12) | |
6184 (const_int 5) (const_int 13) | |
6185 (const_int 6) (const_int 14) | |
6186 (const_int 7) (const_int 15)])))] | |
6187 "TARGET_SSE2" | |
6188 { | |
6189 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2])); | |
6190 DONE; | |
6191 }) | |
6192 | |
6193 (define_expand "vec_interleave_lowv8hi" | |
6194 [(set (match_operand:V8HI 0 "register_operand" "") | |
6195 (vec_select:V8HI | |
6196 (vec_concat:V16HI | |
6197 (match_operand:V8HI 1 "register_operand" "") | |
6198 (match_operand:V8HI 2 "nonimmediate_operand" "")) | |
6199 (parallel [(const_int 0) (const_int 8) | |
6200 (const_int 1) (const_int 9) | |
6201 (const_int 2) (const_int 10) | |
6202 (const_int 3) (const_int 11)])))] | |
6203 "TARGET_SSE2" | |
6204 { | |
6205 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2])); | |
6206 DONE; | |
6207 }) | |
6208 | |
6209 (define_expand "vec_interleave_highv4si" | |
6210 [(set (match_operand:V4SI 0 "register_operand" "") | |
6211 (vec_select:V4SI | |
6212 (vec_concat:V8SI | |
6213 (match_operand:V4SI 1 "register_operand" "") | |
6214 (match_operand:V4SI 2 "nonimmediate_operand" "")) | |
6215 (parallel [(const_int 2) (const_int 6) | |
6216 (const_int 3) (const_int 7)])))] | |
6217 "TARGET_SSE2" | |
6218 { | |
6219 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2])); | |
6220 DONE; | |
6221 }) | |
6222 | |
6223 (define_expand "vec_interleave_lowv4si" | |
6224 [(set (match_operand:V4SI 0 "register_operand" "") | |
6225 (vec_select:V4SI | |
6226 (vec_concat:V8SI | |
6227 (match_operand:V4SI 1 "register_operand" "") | |
6228 (match_operand:V4SI 2 "nonimmediate_operand" "")) | |
6229 (parallel [(const_int 0) (const_int 4) | |
6230 (const_int 1) (const_int 5)])))] | |
6231 "TARGET_SSE2" | |
6232 { | |
6233 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2])); | |
6234 DONE; | |
6235 }) | |
6236 | |
6237 (define_expand "vec_interleave_highv2di" | |
6238 [(set (match_operand:V2DI 0 "register_operand" "") | |
6239 (vec_select:V2DI | |
6240 (vec_concat:V4DI | |
6241 (match_operand:V2DI 1 "register_operand" "") | |
6242 (match_operand:V2DI 2 "nonimmediate_operand" "")) | |
6243 (parallel [(const_int 1) | |
6244 (const_int 3)])))] | |
6245 "TARGET_SSE2" | |
6246 { | |
6247 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2])); | |
6248 DONE; | |
6249 }) | |
6250 | |
6251 (define_expand "vec_interleave_lowv2di" | |
6252 [(set (match_operand:V2DI 0 "register_operand" "") | |
6253 (vec_select:V2DI | |
6254 (vec_concat:V4DI | |
6255 (match_operand:V2DI 1 "register_operand" "") | |
6256 (match_operand:V2DI 2 "nonimmediate_operand" "")) | |
6257 (parallel [(const_int 0) | |
6258 (const_int 2)])))] | |
6259 "TARGET_SSE2" | |
6260 { | |
6261 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2])); | |
6262 DONE; | |
6263 }) | |
6264 | |
6265 (define_expand "vec_interleave_highv4sf" | |
6266 [(set (match_operand:V4SF 0 "register_operand" "") | |
6267 (vec_select:V4SF | |
6268 (vec_concat:V8SF | |
6269 (match_operand:V4SF 1 "register_operand" "") | |
6270 (match_operand:V4SF 2 "nonimmediate_operand" "")) | |
6271 (parallel [(const_int 2) (const_int 6) | |
6272 (const_int 3) (const_int 7)])))] | |
6273 "TARGET_SSE") | |
6274 | |
6275 (define_expand "vec_interleave_lowv4sf" | |
6276 [(set (match_operand:V4SF 0 "register_operand" "") | |
6277 (vec_select:V4SF | |
6278 (vec_concat:V8SF | |
6279 (match_operand:V4SF 1 "register_operand" "") | |
6280 (match_operand:V4SF 2 "nonimmediate_operand" "")) | |
6281 (parallel [(const_int 0) (const_int 4) | |
6282 (const_int 1) (const_int 5)])))] | |
6283 "TARGET_SSE") | |
6284 | |
6285 (define_expand "vec_interleave_highv2df" | |
6286 [(set (match_operand:V2DF 0 "register_operand" "") | |
6287 (vec_select:V2DF | |
6288 (vec_concat:V4DF | |
6289 (match_operand:V2DF 1 "register_operand" "") | |
6290 (match_operand:V2DF 2 "nonimmediate_operand" "")) | |
6291 (parallel [(const_int 1) | |
6292 (const_int 3)])))] | |
6293 "TARGET_SSE2") | |
6294 | |
6295 (define_expand "vec_interleave_lowv2df" | |
6296 [(set (match_operand:V2DF 0 "register_operand" "") | |
6297 (vec_select:V2DF | |
6298 (vec_concat:V4DF | |
6299 (match_operand:V2DF 1 "register_operand" "") | |
6300 (match_operand:V2DF 2 "nonimmediate_operand" "")) | |
6301 (parallel [(const_int 0) | |
6302 (const_int 2)])))] | |
6303 "TARGET_SSE2") | |
6304 | 6539 |
6305 (define_insn "*avx_packsswb" | 6540 (define_insn "*avx_packsswb" |
6306 [(set (match_operand:V16QI 0 "register_operand" "=x") | 6541 [(set (match_operand:V16QI 0 "register_operand" "=x") |
6307 (vec_concat:V16QI | 6542 (vec_concat:V16QI |
6308 (ss_truncate:V8QI | 6543 (ss_truncate:V8QI |
6378 "packuswb\t{%2, %0|%0, %2}" | 6613 "packuswb\t{%2, %0|%0, %2}" |
6379 [(set_attr "type" "sselog") | 6614 [(set_attr "type" "sselog") |
6380 (set_attr "prefix_data16" "1") | 6615 (set_attr "prefix_data16" "1") |
6381 (set_attr "mode" "TI")]) | 6616 (set_attr "mode" "TI")]) |
6382 | 6617 |
6383 (define_insn "*avx_punpckhbw" | 6618 (define_insn "*avx_interleave_highv16qi" |
6384 [(set (match_operand:V16QI 0 "register_operand" "=x") | 6619 [(set (match_operand:V16QI 0 "register_operand" "=x") |
6385 (vec_select:V16QI | 6620 (vec_select:V16QI |
6386 (vec_concat:V32QI | 6621 (vec_concat:V32QI |
6387 (match_operand:V16QI 1 "register_operand" "x") | 6622 (match_operand:V16QI 1 "register_operand" "x") |
6388 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) | 6623 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) |
6398 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" | 6633 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" |
6399 [(set_attr "type" "sselog") | 6634 [(set_attr "type" "sselog") |
6400 (set_attr "prefix" "vex") | 6635 (set_attr "prefix" "vex") |
6401 (set_attr "mode" "TI")]) | 6636 (set_attr "mode" "TI")]) |
6402 | 6637 |
6403 (define_insn "sse2_punpckhbw" | 6638 (define_insn "vec_interleave_highv16qi" |
6404 [(set (match_operand:V16QI 0 "register_operand" "=x") | 6639 [(set (match_operand:V16QI 0 "register_operand" "=x") |
6405 (vec_select:V16QI | 6640 (vec_select:V16QI |
6406 (vec_concat:V32QI | 6641 (vec_concat:V32QI |
6407 (match_operand:V16QI 1 "register_operand" "0") | 6642 (match_operand:V16QI 1 "register_operand" "0") |
6408 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) | 6643 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) |
6418 "punpckhbw\t{%2, %0|%0, %2}" | 6653 "punpckhbw\t{%2, %0|%0, %2}" |
6419 [(set_attr "type" "sselog") | 6654 [(set_attr "type" "sselog") |
6420 (set_attr "prefix_data16" "1") | 6655 (set_attr "prefix_data16" "1") |
6421 (set_attr "mode" "TI")]) | 6656 (set_attr "mode" "TI")]) |
6422 | 6657 |
6423 (define_insn "*avx_punpcklbw" | 6658 (define_insn "*avx_interleave_lowv16qi" |
6424 [(set (match_operand:V16QI 0 "register_operand" "=x") | 6659 [(set (match_operand:V16QI 0 "register_operand" "=x") |
6425 (vec_select:V16QI | 6660 (vec_select:V16QI |
6426 (vec_concat:V32QI | 6661 (vec_concat:V32QI |
6427 (match_operand:V16QI 1 "register_operand" "x") | 6662 (match_operand:V16QI 1 "register_operand" "x") |
6428 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) | 6663 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) |
6438 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" | 6673 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" |
6439 [(set_attr "type" "sselog") | 6674 [(set_attr "type" "sselog") |
6440 (set_attr "prefix" "vex") | 6675 (set_attr "prefix" "vex") |
6441 (set_attr "mode" "TI")]) | 6676 (set_attr "mode" "TI")]) |
6442 | 6677 |
6443 (define_insn "sse2_punpcklbw" | 6678 (define_insn "vec_interleave_lowv16qi" |
6444 [(set (match_operand:V16QI 0 "register_operand" "=x") | 6679 [(set (match_operand:V16QI 0 "register_operand" "=x") |
6445 (vec_select:V16QI | 6680 (vec_select:V16QI |
6446 (vec_concat:V32QI | 6681 (vec_concat:V32QI |
6447 (match_operand:V16QI 1 "register_operand" "0") | 6682 (match_operand:V16QI 1 "register_operand" "0") |
6448 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) | 6683 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) |
6458 "punpcklbw\t{%2, %0|%0, %2}" | 6693 "punpcklbw\t{%2, %0|%0, %2}" |
6459 [(set_attr "type" "sselog") | 6694 [(set_attr "type" "sselog") |
6460 (set_attr "prefix_data16" "1") | 6695 (set_attr "prefix_data16" "1") |
6461 (set_attr "mode" "TI")]) | 6696 (set_attr "mode" "TI")]) |
6462 | 6697 |
6463 (define_insn "*avx_punpckhwd" | 6698 (define_insn "*avx_interleave_highv8hi" |
6464 [(set (match_operand:V8HI 0 "register_operand" "=x") | 6699 [(set (match_operand:V8HI 0 "register_operand" "=x") |
6465 (vec_select:V8HI | 6700 (vec_select:V8HI |
6466 (vec_concat:V16HI | 6701 (vec_concat:V16HI |
6467 (match_operand:V8HI 1 "register_operand" "x") | 6702 (match_operand:V8HI 1 "register_operand" "x") |
6468 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) | 6703 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) |
6474 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" | 6709 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" |
6475 [(set_attr "type" "sselog") | 6710 [(set_attr "type" "sselog") |
6476 (set_attr "prefix" "vex") | 6711 (set_attr "prefix" "vex") |
6477 (set_attr "mode" "TI")]) | 6712 (set_attr "mode" "TI")]) |
6478 | 6713 |
6479 (define_insn "sse2_punpckhwd" | 6714 (define_insn "vec_interleave_highv8hi" |
6480 [(set (match_operand:V8HI 0 "register_operand" "=x") | 6715 [(set (match_operand:V8HI 0 "register_operand" "=x") |
6481 (vec_select:V8HI | 6716 (vec_select:V8HI |
6482 (vec_concat:V16HI | 6717 (vec_concat:V16HI |
6483 (match_operand:V8HI 1 "register_operand" "0") | 6718 (match_operand:V8HI 1 "register_operand" "0") |
6484 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) | 6719 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) |
6490 "punpckhwd\t{%2, %0|%0, %2}" | 6725 "punpckhwd\t{%2, %0|%0, %2}" |
6491 [(set_attr "type" "sselog") | 6726 [(set_attr "type" "sselog") |
6492 (set_attr "prefix_data16" "1") | 6727 (set_attr "prefix_data16" "1") |
6493 (set_attr "mode" "TI")]) | 6728 (set_attr "mode" "TI")]) |
6494 | 6729 |
6495 (define_insn "*avx_punpcklwd" | 6730 (define_insn "*avx_interleave_lowv8hi" |
6496 [(set (match_operand:V8HI 0 "register_operand" "=x") | 6731 [(set (match_operand:V8HI 0 "register_operand" "=x") |
6497 (vec_select:V8HI | 6732 (vec_select:V8HI |
6498 (vec_concat:V16HI | 6733 (vec_concat:V16HI |
6499 (match_operand:V8HI 1 "register_operand" "x") | 6734 (match_operand:V8HI 1 "register_operand" "x") |
6500 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) | 6735 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) |
6506 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" | 6741 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" |
6507 [(set_attr "type" "sselog") | 6742 [(set_attr "type" "sselog") |
6508 (set_attr "prefix" "vex") | 6743 (set_attr "prefix" "vex") |
6509 (set_attr "mode" "TI")]) | 6744 (set_attr "mode" "TI")]) |
6510 | 6745 |
6511 (define_insn "sse2_punpcklwd" | 6746 (define_insn "vec_interleave_lowv8hi" |
6512 [(set (match_operand:V8HI 0 "register_operand" "=x") | 6747 [(set (match_operand:V8HI 0 "register_operand" "=x") |
6513 (vec_select:V8HI | 6748 (vec_select:V8HI |
6514 (vec_concat:V16HI | 6749 (vec_concat:V16HI |
6515 (match_operand:V8HI 1 "register_operand" "0") | 6750 (match_operand:V8HI 1 "register_operand" "0") |
6516 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) | 6751 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) |
6522 "punpcklwd\t{%2, %0|%0, %2}" | 6757 "punpcklwd\t{%2, %0|%0, %2}" |
6523 [(set_attr "type" "sselog") | 6758 [(set_attr "type" "sselog") |
6524 (set_attr "prefix_data16" "1") | 6759 (set_attr "prefix_data16" "1") |
6525 (set_attr "mode" "TI")]) | 6760 (set_attr "mode" "TI")]) |
6526 | 6761 |
6527 (define_insn "*avx_punpckhdq" | 6762 (define_insn "*avx_interleave_highv4si" |
6528 [(set (match_operand:V4SI 0 "register_operand" "=x") | 6763 [(set (match_operand:V4SI 0 "register_operand" "=x") |
6529 (vec_select:V4SI | 6764 (vec_select:V4SI |
6530 (vec_concat:V8SI | 6765 (vec_concat:V8SI |
6531 (match_operand:V4SI 1 "register_operand" "x") | 6766 (match_operand:V4SI 1 "register_operand" "x") |
6532 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) | 6767 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) |
6536 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" | 6771 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" |
6537 [(set_attr "type" "sselog") | 6772 [(set_attr "type" "sselog") |
6538 (set_attr "prefix" "vex") | 6773 (set_attr "prefix" "vex") |
6539 (set_attr "mode" "TI")]) | 6774 (set_attr "mode" "TI")]) |
6540 | 6775 |
6541 (define_insn "sse2_punpckhdq" | 6776 (define_insn "vec_interleave_highv4si" |
6542 [(set (match_operand:V4SI 0 "register_operand" "=x") | 6777 [(set (match_operand:V4SI 0 "register_operand" "=x") |
6543 (vec_select:V4SI | 6778 (vec_select:V4SI |
6544 (vec_concat:V8SI | 6779 (vec_concat:V8SI |
6545 (match_operand:V4SI 1 "register_operand" "0") | 6780 (match_operand:V4SI 1 "register_operand" "0") |
6546 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) | 6781 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) |
6550 "punpckhdq\t{%2, %0|%0, %2}" | 6785 "punpckhdq\t{%2, %0|%0, %2}" |
6551 [(set_attr "type" "sselog") | 6786 [(set_attr "type" "sselog") |
6552 (set_attr "prefix_data16" "1") | 6787 (set_attr "prefix_data16" "1") |
6553 (set_attr "mode" "TI")]) | 6788 (set_attr "mode" "TI")]) |
6554 | 6789 |
6555 (define_insn "*avx_punpckldq" | 6790 (define_insn "*avx_interleave_lowv4si" |
6556 [(set (match_operand:V4SI 0 "register_operand" "=x") | 6791 [(set (match_operand:V4SI 0 "register_operand" "=x") |
6557 (vec_select:V4SI | 6792 (vec_select:V4SI |
6558 (vec_concat:V8SI | 6793 (vec_concat:V8SI |
6559 (match_operand:V4SI 1 "register_operand" "x") | 6794 (match_operand:V4SI 1 "register_operand" "x") |
6560 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) | 6795 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) |
6564 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}" | 6799 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}" |
6565 [(set_attr "type" "sselog") | 6800 [(set_attr "type" "sselog") |
6566 (set_attr "prefix" "vex") | 6801 (set_attr "prefix" "vex") |
6567 (set_attr "mode" "TI")]) | 6802 (set_attr "mode" "TI")]) |
6568 | 6803 |
6569 (define_insn "sse2_punpckldq" | 6804 (define_insn "vec_interleave_lowv4si" |
6570 [(set (match_operand:V4SI 0 "register_operand" "=x") | 6805 [(set (match_operand:V4SI 0 "register_operand" "=x") |
6571 (vec_select:V4SI | 6806 (vec_select:V4SI |
6572 (vec_concat:V8SI | 6807 (vec_concat:V8SI |
6573 (match_operand:V4SI 1 "register_operand" "0") | 6808 (match_operand:V4SI 1 "register_operand" "0") |
6574 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) | 6809 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) |
6578 "punpckldq\t{%2, %0|%0, %2}" | 6813 "punpckldq\t{%2, %0|%0, %2}" |
6579 [(set_attr "type" "sselog") | 6814 [(set_attr "type" "sselog") |
6580 (set_attr "prefix_data16" "1") | 6815 (set_attr "prefix_data16" "1") |
6581 (set_attr "mode" "TI")]) | 6816 (set_attr "mode" "TI")]) |
6582 | 6817 |
6583 (define_insn "*avx_pinsr<avxmodesuffixs>" | 6818 (define_insn "*avx_pinsr<ssevecsize>" |
6584 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") | 6819 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") |
6585 (vec_merge:SSEMODE124 | 6820 (vec_merge:SSEMODE124 |
6586 (vec_duplicate:SSEMODE124 | 6821 (vec_duplicate:SSEMODE124 |
6587 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm")) | 6822 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm")) |
6588 (match_operand:SSEMODE124 1 "register_operand" "x") | 6823 (match_operand:SSEMODE124 1 "register_operand" "x") |
6589 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))] | 6824 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))] |
6590 "TARGET_AVX" | 6825 "TARGET_AVX" |
6591 { | 6826 { |
6592 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); | 6827 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); |
6593 if (MEM_P (operands[2])) | 6828 if (MEM_P (operands[2])) |
6594 return "vpinsr<avxmodesuffixs>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 6829 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
6595 else | 6830 else |
6596 return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; | 6831 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; |
6597 } | 6832 } |
6598 [(set_attr "type" "sselog") | 6833 [(set_attr "type" "sselog") |
6834 (set (attr "prefix_extra") | |
6835 (if_then_else (match_operand:V8HI 0 "register_operand" "") | |
6836 (const_string "0") | |
6837 (const_string "1"))) | |
6838 (set_attr "length_immediate" "1") | |
6599 (set_attr "prefix" "vex") | 6839 (set_attr "prefix" "vex") |
6600 (set_attr "mode" "TI")]) | 6840 (set_attr "mode" "TI")]) |
6601 | 6841 |
6602 (define_insn "*sse4_1_pinsrb" | 6842 (define_insn "*sse4_1_pinsrb" |
6603 [(set (match_operand:V16QI 0 "register_operand" "=x") | 6843 [(set (match_operand:V16QI 0 "register_operand" "=x") |
6614 else | 6854 else |
6615 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}"; | 6855 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}"; |
6616 } | 6856 } |
6617 [(set_attr "type" "sselog") | 6857 [(set_attr "type" "sselog") |
6618 (set_attr "prefix_extra" "1") | 6858 (set_attr "prefix_extra" "1") |
6859 (set_attr "length_immediate" "1") | |
6619 (set_attr "mode" "TI")]) | 6860 (set_attr "mode" "TI")]) |
6620 | 6861 |
6621 (define_insn "*sse2_pinsrw" | 6862 (define_insn "*sse2_pinsrw" |
6622 [(set (match_operand:V8HI 0 "register_operand" "=x") | 6863 [(set (match_operand:V8HI 0 "register_operand" "=x") |
6623 (vec_merge:V8HI | 6864 (vec_merge:V8HI |
6633 else | 6874 else |
6634 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; | 6875 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; |
6635 } | 6876 } |
6636 [(set_attr "type" "sselog") | 6877 [(set_attr "type" "sselog") |
6637 (set_attr "prefix_data16" "1") | 6878 (set_attr "prefix_data16" "1") |
6879 (set_attr "length_immediate" "1") | |
6638 (set_attr "mode" "TI")]) | 6880 (set_attr "mode" "TI")]) |
6639 | 6881 |
6640 ;; It must come before sse2_loadld since it is preferred. | 6882 ;; It must come before sse2_loadld since it is preferred. |
6641 (define_insn "*sse4_1_pinsrd" | 6883 (define_insn "*sse4_1_pinsrd" |
6642 [(set (match_operand:V4SI 0 "register_operand" "=x") | 6884 [(set (match_operand:V4SI 0 "register_operand" "=x") |
6650 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); | 6892 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); |
6651 return "pinsrd\t{%3, %2, %0|%0, %2, %3}"; | 6893 return "pinsrd\t{%3, %2, %0|%0, %2, %3}"; |
6652 } | 6894 } |
6653 [(set_attr "type" "sselog") | 6895 [(set_attr "type" "sselog") |
6654 (set_attr "prefix_extra" "1") | 6896 (set_attr "prefix_extra" "1") |
6897 (set_attr "length_immediate" "1") | |
6655 (set_attr "mode" "TI")]) | 6898 (set_attr "mode" "TI")]) |
6656 | 6899 |
6657 (define_insn "*avx_pinsrq" | 6900 (define_insn "*avx_pinsrq" |
6658 [(set (match_operand:V2DI 0 "register_operand" "=x") | 6901 [(set (match_operand:V2DI 0 "register_operand" "=x") |
6659 (vec_merge:V2DI | 6902 (vec_merge:V2DI |
6665 { | 6908 { |
6666 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); | 6909 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); |
6667 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 6910 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
6668 } | 6911 } |
6669 [(set_attr "type" "sselog") | 6912 [(set_attr "type" "sselog") |
6913 (set_attr "prefix_extra" "1") | |
6914 (set_attr "length_immediate" "1") | |
6670 (set_attr "prefix" "vex") | 6915 (set_attr "prefix" "vex") |
6671 (set_attr "mode" "TI")]) | 6916 (set_attr "mode" "TI")]) |
6672 | 6917 |
6673 (define_insn "*sse4_1_pinsrq" | 6918 (define_insn "*sse4_1_pinsrq" |
6674 [(set (match_operand:V2DI 0 "register_operand" "=x") | 6919 [(set (match_operand:V2DI 0 "register_operand" "=x") |
6681 { | 6926 { |
6682 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); | 6927 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); |
6683 return "pinsrq\t{%3, %2, %0|%0, %2, %3}"; | 6928 return "pinsrq\t{%3, %2, %0|%0, %2, %3}"; |
6684 } | 6929 } |
6685 [(set_attr "type" "sselog") | 6930 [(set_attr "type" "sselog") |
6686 (set_attr "prefix_extra" "1") | 6931 (set_attr "prefix_rex" "1") |
6932 (set_attr "prefix_extra" "1") | |
6933 (set_attr "length_immediate" "1") | |
6687 (set_attr "mode" "TI")]) | 6934 (set_attr "mode" "TI")]) |
6688 | 6935 |
6689 (define_insn "*sse4_1_pextrb" | 6936 (define_insn "*sse4_1_pextrb" |
6690 [(set (match_operand:SI 0 "register_operand" "=r") | 6937 [(set (match_operand:SI 0 "register_operand" "=r") |
6691 (zero_extend:SI | 6938 (zero_extend:SI |
6694 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))] | 6941 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))] |
6695 "TARGET_SSE4_1" | 6942 "TARGET_SSE4_1" |
6696 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" | 6943 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" |
6697 [(set_attr "type" "sselog") | 6944 [(set_attr "type" "sselog") |
6698 (set_attr "prefix_extra" "1") | 6945 (set_attr "prefix_extra" "1") |
6946 (set_attr "length_immediate" "1") | |
6699 (set_attr "prefix" "maybe_vex") | 6947 (set_attr "prefix" "maybe_vex") |
6700 (set_attr "mode" "TI")]) | 6948 (set_attr "mode" "TI")]) |
6701 | 6949 |
6702 (define_insn "*sse4_1_pextrb_memory" | 6950 (define_insn "*sse4_1_pextrb_memory" |
6703 [(set (match_operand:QI 0 "memory_operand" "=m") | 6951 [(set (match_operand:QI 0 "memory_operand" "=m") |
6706 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))] | 6954 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))] |
6707 "TARGET_SSE4_1" | 6955 "TARGET_SSE4_1" |
6708 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" | 6956 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" |
6709 [(set_attr "type" "sselog") | 6957 [(set_attr "type" "sselog") |
6710 (set_attr "prefix_extra" "1") | 6958 (set_attr "prefix_extra" "1") |
6959 (set_attr "length_immediate" "1") | |
6711 (set_attr "prefix" "maybe_vex") | 6960 (set_attr "prefix" "maybe_vex") |
6712 (set_attr "mode" "TI")]) | 6961 (set_attr "mode" "TI")]) |
6713 | 6962 |
6714 (define_insn "*sse2_pextrw" | 6963 (define_insn "*sse2_pextrw" |
6715 [(set (match_operand:SI 0 "register_operand" "=r") | 6964 [(set (match_operand:SI 0 "register_operand" "=r") |
6719 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] | 6968 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] |
6720 "TARGET_SSE2" | 6969 "TARGET_SSE2" |
6721 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" | 6970 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" |
6722 [(set_attr "type" "sselog") | 6971 [(set_attr "type" "sselog") |
6723 (set_attr "prefix_data16" "1") | 6972 (set_attr "prefix_data16" "1") |
6973 (set_attr "length_immediate" "1") | |
6724 (set_attr "prefix" "maybe_vex") | 6974 (set_attr "prefix" "maybe_vex") |
6725 (set_attr "mode" "TI")]) | 6975 (set_attr "mode" "TI")]) |
6726 | 6976 |
6727 (define_insn "*sse4_1_pextrw_memory" | 6977 (define_insn "*sse4_1_pextrw_memory" |
6728 [(set (match_operand:HI 0 "memory_operand" "=m") | 6978 [(set (match_operand:HI 0 "memory_operand" "=m") |
6731 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))] | 6981 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))] |
6732 "TARGET_SSE4_1" | 6982 "TARGET_SSE4_1" |
6733 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" | 6983 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" |
6734 [(set_attr "type" "sselog") | 6984 [(set_attr "type" "sselog") |
6735 (set_attr "prefix_extra" "1") | 6985 (set_attr "prefix_extra" "1") |
6986 (set_attr "length_immediate" "1") | |
6736 (set_attr "prefix" "maybe_vex") | 6987 (set_attr "prefix" "maybe_vex") |
6737 (set_attr "mode" "TI")]) | 6988 (set_attr "mode" "TI")]) |
6738 | 6989 |
6739 (define_insn "*sse4_1_pextrd" | 6990 (define_insn "*sse4_1_pextrd" |
6740 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") | 6991 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") |
6743 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] | 6994 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] |
6744 "TARGET_SSE4_1" | 6995 "TARGET_SSE4_1" |
6745 "%vpextrd\t{%2, %1, %0|%0, %1, %2}" | 6996 "%vpextrd\t{%2, %1, %0|%0, %1, %2}" |
6746 [(set_attr "type" "sselog") | 6997 [(set_attr "type" "sselog") |
6747 (set_attr "prefix_extra" "1") | 6998 (set_attr "prefix_extra" "1") |
6999 (set_attr "length_immediate" "1") | |
6748 (set_attr "prefix" "maybe_vex") | 7000 (set_attr "prefix" "maybe_vex") |
6749 (set_attr "mode" "TI")]) | 7001 (set_attr "mode" "TI")]) |
6750 | 7002 |
6751 ;; It must come before *vec_extractv2di_1_sse since it is preferred. | 7003 ;; It must come before *vec_extractv2di_1_sse since it is preferred. |
6752 (define_insn "*sse4_1_pextrq" | 7004 (define_insn "*sse4_1_pextrq" |
6755 (match_operand:V2DI 1 "register_operand" "x") | 7007 (match_operand:V2DI 1 "register_operand" "x") |
6756 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] | 7008 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] |
6757 "TARGET_SSE4_1 && TARGET_64BIT" | 7009 "TARGET_SSE4_1 && TARGET_64BIT" |
6758 "%vpextrq\t{%2, %1, %0|%0, %1, %2}" | 7010 "%vpextrq\t{%2, %1, %0|%0, %1, %2}" |
6759 [(set_attr "type" "sselog") | 7011 [(set_attr "type" "sselog") |
6760 (set_attr "prefix_extra" "1") | 7012 (set_attr "prefix_rex" "1") |
7013 (set_attr "prefix_extra" "1") | |
7014 (set_attr "length_immediate" "1") | |
6761 (set_attr "prefix" "maybe_vex") | 7015 (set_attr "prefix" "maybe_vex") |
6762 (set_attr "mode" "TI")]) | 7016 (set_attr "mode" "TI")]) |
6763 | 7017 |
6764 (define_expand "sse2_pshufd" | 7018 (define_expand "sse2_pshufd" |
6765 [(match_operand:V4SI 0 "register_operand" "") | 7019 [(match_operand:V4SI 0 "register_operand" "") |
6795 | 7049 |
6796 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}"; | 7050 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}"; |
6797 } | 7051 } |
6798 [(set_attr "type" "sselog1") | 7052 [(set_attr "type" "sselog1") |
6799 (set_attr "prefix_data16" "1") | 7053 (set_attr "prefix_data16" "1") |
6800 (set_attr "prefix" "vex") | 7054 (set_attr "prefix" "maybe_vex") |
7055 (set_attr "length_immediate" "1") | |
6801 (set_attr "mode" "TI")]) | 7056 (set_attr "mode" "TI")]) |
6802 | 7057 |
6803 (define_expand "sse2_pshuflw" | 7058 (define_expand "sse2_pshuflw" |
6804 [(match_operand:V8HI 0 "register_operand" "") | 7059 [(match_operand:V8HI 0 "register_operand" "") |
6805 (match_operand:V8HI 1 "nonimmediate_operand" "") | 7060 (match_operand:V8HI 1 "nonimmediate_operand" "") |
6837 operands[2] = GEN_INT (mask); | 7092 operands[2] = GEN_INT (mask); |
6838 | 7093 |
6839 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; | 7094 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; |
6840 } | 7095 } |
6841 [(set_attr "type" "sselog") | 7096 [(set_attr "type" "sselog") |
7097 (set_attr "prefix_data16" "0") | |
6842 (set_attr "prefix_rep" "1") | 7098 (set_attr "prefix_rep" "1") |
6843 (set_attr "prefix" "maybe_vex") | 7099 (set_attr "prefix" "maybe_vex") |
7100 (set_attr "length_immediate" "1") | |
6844 (set_attr "mode" "TI")]) | 7101 (set_attr "mode" "TI")]) |
6845 | 7102 |
6846 (define_expand "sse2_pshufhw" | 7103 (define_expand "sse2_pshufhw" |
6847 [(match_operand:V8HI 0 "register_operand" "") | 7104 [(match_operand:V8HI 0 "register_operand" "") |
6848 (match_operand:V8HI 1 "nonimmediate_operand" "") | 7105 (match_operand:V8HI 1 "nonimmediate_operand" "") |
6881 | 7138 |
6882 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; | 7139 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; |
6883 } | 7140 } |
6884 [(set_attr "type" "sselog") | 7141 [(set_attr "type" "sselog") |
6885 (set_attr "prefix_rep" "1") | 7142 (set_attr "prefix_rep" "1") |
7143 (set_attr "prefix_data16" "0") | |
6886 (set_attr "prefix" "maybe_vex") | 7144 (set_attr "prefix" "maybe_vex") |
7145 (set_attr "length_immediate" "1") | |
6887 (set_attr "mode" "TI")]) | 7146 (set_attr "mode" "TI")]) |
6888 | 7147 |
6889 (define_expand "sse2_loadd" | 7148 (define_expand "sse2_loadd" |
6890 [(set (match_operand:V4SI 0 "register_operand" "") | 7149 [(set (match_operand:V4SI 0 "register_operand" "") |
6891 (vec_merge:V4SI | 7150 (vec_merge:V4SI |
7017 vmovhps\t{%1, %0|%0, %1} | 7276 vmovhps\t{%1, %0|%0, %1} |
7018 vpsrldq\t{$8, %1, %0|%0, %1, 8} | 7277 vpsrldq\t{$8, %1, %0|%0, %1, 8} |
7019 vmovq\t{%H1, %0|%0, %H1} | 7278 vmovq\t{%H1, %0|%0, %H1} |
7020 vmov{q}\t{%H1, %0|%0, %H1}" | 7279 vmov{q}\t{%H1, %0|%0, %H1}" |
7021 [(set_attr "type" "ssemov,sseishft,ssemov,imov") | 7280 [(set_attr "type" "ssemov,sseishft,ssemov,imov") |
7281 (set_attr "length_immediate" "*,1,*,*") | |
7022 (set_attr "memory" "*,none,*,*") | 7282 (set_attr "memory" "*,none,*,*") |
7023 (set_attr "prefix" "vex") | 7283 (set_attr "prefix" "vex") |
7024 (set_attr "mode" "V2SF,TI,TI,DI")]) | 7284 (set_attr "mode" "V2SF,TI,TI,DI")]) |
7025 | 7285 |
7026 (define_insn "*vec_extractv2di_1_rex64" | 7286 (define_insn "*vec_extractv2di_1_rex64" |
7033 movhps\t{%1, %0|%0, %1} | 7293 movhps\t{%1, %0|%0, %1} |
7034 psrldq\t{$8, %0|%0, 8} | 7294 psrldq\t{$8, %0|%0, 8} |
7035 movq\t{%H1, %0|%0, %H1} | 7295 movq\t{%H1, %0|%0, %H1} |
7036 mov{q}\t{%H1, %0|%0, %H1}" | 7296 mov{q}\t{%H1, %0|%0, %H1}" |
7037 [(set_attr "type" "ssemov,sseishft,ssemov,imov") | 7297 [(set_attr "type" "ssemov,sseishft,ssemov,imov") |
7298 (set_attr "length_immediate" "*,1,*,*") | |
7299 (set_attr "atom_unit" "*,sishuf,*,*") | |
7038 (set_attr "memory" "*,none,*,*") | 7300 (set_attr "memory" "*,none,*,*") |
7039 (set_attr "mode" "V2SF,TI,TI,DI")]) | 7301 (set_attr "mode" "V2SF,TI,TI,DI")]) |
7040 | 7302 |
7041 (define_insn "*vec_extractv2di_1_avx" | 7303 (define_insn "*vec_extractv2di_1_avx" |
7042 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") | 7304 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") |
7049 "@ | 7311 "@ |
7050 vmovhps\t{%1, %0|%0, %1} | 7312 vmovhps\t{%1, %0|%0, %1} |
7051 vpsrldq\t{$8, %1, %0|%0, %1, 8} | 7313 vpsrldq\t{$8, %1, %0|%0, %1, 8} |
7052 vmovq\t{%H1, %0|%0, %H1}" | 7314 vmovq\t{%H1, %0|%0, %H1}" |
7053 [(set_attr "type" "ssemov,sseishft,ssemov") | 7315 [(set_attr "type" "ssemov,sseishft,ssemov") |
7316 (set_attr "length_immediate" "*,1,*") | |
7054 (set_attr "memory" "*,none,*") | 7317 (set_attr "memory" "*,none,*") |
7055 (set_attr "prefix" "vex") | 7318 (set_attr "prefix" "vex") |
7056 (set_attr "mode" "V2SF,TI,TI")]) | 7319 (set_attr "mode" "V2SF,TI,TI")]) |
7057 | 7320 |
7058 (define_insn "*vec_extractv2di_1_sse2" | 7321 (define_insn "*vec_extractv2di_1_sse2" |
7065 "@ | 7328 "@ |
7066 movhps\t{%1, %0|%0, %1} | 7329 movhps\t{%1, %0|%0, %1} |
7067 psrldq\t{$8, %0|%0, 8} | 7330 psrldq\t{$8, %0|%0, 8} |
7068 movq\t{%H1, %0|%0, %H1}" | 7331 movq\t{%H1, %0|%0, %H1}" |
7069 [(set_attr "type" "ssemov,sseishft,ssemov") | 7332 [(set_attr "type" "ssemov,sseishft,ssemov") |
7333 (set_attr "length_immediate" "*,1,*") | |
7334 (set_attr "atom_unit" "*,sishuf,*") | |
7070 (set_attr "memory" "*,none,*") | 7335 (set_attr "memory" "*,none,*") |
7071 (set_attr "mode" "V2SF,TI,TI")]) | 7336 (set_attr "mode" "V2SF,TI,TI")]) |
7072 | 7337 |
7073 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva | 7338 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva |
7074 (define_insn "*vec_extractv2di_1_sse" | 7339 (define_insn "*vec_extractv2di_1_sse" |
7083 movhlps\t{%1, %0|%0, %1} | 7348 movhlps\t{%1, %0|%0, %1} |
7084 movlps\t{%H1, %0|%0, %H1}" | 7349 movlps\t{%H1, %0|%0, %H1}" |
7085 [(set_attr "type" "ssemov") | 7350 [(set_attr "type" "ssemov") |
7086 (set_attr "mode" "V2SF,V4SF,V2SF")]) | 7351 (set_attr "mode" "V2SF,V4SF,V2SF")]) |
7087 | 7352 |
7353 (define_insn "*vec_dupv4si_avx" | |
7354 [(set (match_operand:V4SI 0 "register_operand" "=x,x") | |
7355 (vec_duplicate:V4SI | |
7356 (match_operand:SI 1 "register_operand" "x,m")))] | |
7357 "TARGET_AVX" | |
7358 "@ | |
7359 vpshufd\t{$0, %1, %0|%0, %1, 0} | |
7360 vbroadcastss\t{%1, %0|%0, %1}" | |
7361 [(set_attr "type" "sselog1,ssemov") | |
7362 (set_attr "length_immediate" "1,0") | |
7363 (set_attr "prefix_extra" "0,1") | |
7364 (set_attr "prefix" "vex") | |
7365 (set_attr "mode" "TI,V4SF")]) | |
7366 | |
7088 (define_insn "*vec_dupv4si" | 7367 (define_insn "*vec_dupv4si" |
7089 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x") | 7368 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x") |
7090 (vec_duplicate:V4SI | 7369 (vec_duplicate:V4SI |
7091 (match_operand:SI 1 "register_operand" " Y2,0")))] | 7370 (match_operand:SI 1 "register_operand" " Y2,0")))] |
7092 "TARGET_SSE" | 7371 "TARGET_SSE" |
7093 "@ | 7372 "@ |
7094 %vpshufd\t{$0, %1, %0|%0, %1, 0} | 7373 %vpshufd\t{$0, %1, %0|%0, %1, 0} |
7095 shufps\t{$0, %0, %0|%0, %0, 0}" | 7374 shufps\t{$0, %0, %0|%0, %0, 0}" |
7096 [(set_attr "type" "sselog1") | 7375 [(set_attr "type" "sselog1") |
7097 (set_attr "prefix" "maybe_vex,orig") | 7376 (set_attr "length_immediate" "1") |
7098 (set_attr "mode" "TI,V4SF")]) | 7377 (set_attr "mode" "TI,V4SF")]) |
7099 | 7378 |
7100 (define_insn "*vec_dupv2di_avx" | 7379 (define_insn "*vec_dupv2di_avx" |
7101 [(set (match_operand:V2DI 0 "register_operand" "=x") | 7380 [(set (match_operand:V2DI 0 "register_operand" "=x,x") |
7102 (vec_duplicate:V2DI | 7381 (vec_duplicate:V2DI |
7103 (match_operand:DI 1 "register_operand" "x")))] | 7382 (match_operand:DI 1 "nonimmediate_operand" " x,m")))] |
7104 "TARGET_AVX" | 7383 "TARGET_AVX" |
7105 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}" | 7384 "@ |
7385 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1} | |
7386 vmovddup\t{%1, %0|%0, %1}" | |
7106 [(set_attr "type" "sselog1") | 7387 [(set_attr "type" "sselog1") |
7107 (set_attr "prefix" "vex") | 7388 (set_attr "prefix" "vex") |
7108 (set_attr "mode" "TI")]) | 7389 (set_attr "mode" "TI,DF")]) |
7390 | |
7391 (define_insn "*vec_dupv2di_sse3" | |
7392 [(set (match_operand:V2DI 0 "register_operand" "=x,x") | |
7393 (vec_duplicate:V2DI | |
7394 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))] | |
7395 "TARGET_SSE3" | |
7396 "@ | |
7397 punpcklqdq\t%0, %0 | |
7398 movddup\t{%1, %0|%0, %1}" | |
7399 [(set_attr "type" "sselog1") | |
7400 (set_attr "mode" "TI,DF")]) | |
7109 | 7401 |
7110 (define_insn "*vec_dupv2di" | 7402 (define_insn "*vec_dupv2di" |
7111 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x") | 7403 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x") |
7112 (vec_duplicate:V2DI | 7404 (vec_duplicate:V2DI |
7113 (match_operand:DI 1 "register_operand" " 0 ,0")))] | 7405 (match_operand:DI 1 "register_operand" " 0 ,0")))] |
7129 vpunpckldq\t{%2, %1, %0|%0, %1, %2} | 7421 vpunpckldq\t{%2, %1, %0|%0, %1, %2} |
7130 vmovd\t{%1, %0|%0, %1} | 7422 vmovd\t{%1, %0|%0, %1} |
7131 punpckldq\t{%2, %0|%0, %2} | 7423 punpckldq\t{%2, %0|%0, %2} |
7132 movd\t{%1, %0|%0, %1}" | 7424 movd\t{%1, %0|%0, %1}" |
7133 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") | 7425 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") |
7426 (set_attr "prefix_extra" "1,*,*,*,*") | |
7427 (set_attr "length_immediate" "1,*,*,*,*") | |
7134 (set (attr "prefix") | 7428 (set (attr "prefix") |
7135 (if_then_else (eq_attr "alternative" "3,4") | 7429 (if_then_else (eq_attr "alternative" "3,4") |
7136 (const_string "orig") | 7430 (const_string "orig") |
7137 (const_string "vex"))) | 7431 (const_string "vex"))) |
7138 (set_attr "mode" "TI,TI,TI,DI,DI")]) | 7432 (set_attr "mode" "TI,TI,TI,DI,DI")]) |
7149 movd\t{%1, %0|%0, %1} | 7443 movd\t{%1, %0|%0, %1} |
7150 punpckldq\t{%2, %0|%0, %2} | 7444 punpckldq\t{%2, %0|%0, %2} |
7151 movd\t{%1, %0|%0, %1}" | 7445 movd\t{%1, %0|%0, %1}" |
7152 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") | 7446 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") |
7153 (set_attr "prefix_extra" "1,*,*,*,*") | 7447 (set_attr "prefix_extra" "1,*,*,*,*") |
7448 (set_attr "length_immediate" "1,*,*,*,*") | |
7154 (set_attr "mode" "TI,TI,TI,DI,DI")]) | 7449 (set_attr "mode" "TI,TI,TI,DI,DI")]) |
7155 | 7450 |
7156 ;; ??? In theory we can match memory for the MMX alternative, but allowing | 7451 ;; ??? In theory we can match memory for the MMX alternative, but allowing |
7157 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE | 7452 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE |
7158 ;; alternatives pretty much forces the MMX alternative to be chosen. | 7453 ;; alternatives pretty much forces the MMX alternative to be chosen. |
7255 vmovq\t{%1, %0|%0, %1} | 7550 vmovq\t{%1, %0|%0, %1} |
7256 movq2dq\t{%1, %0|%0, %1} | 7551 movq2dq\t{%1, %0|%0, %1} |
7257 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} | 7552 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} |
7258 vmovhps\t{%2, %1, %0|%0, %1, %2}" | 7553 vmovhps\t{%2, %1, %0|%0, %1, %2}" |
7259 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov") | 7554 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov") |
7555 (set_attr "prefix_extra" "1,*,*,*,*,*") | |
7556 (set_attr "length_immediate" "1,*,*,*,*,*") | |
7260 (set (attr "prefix") | 7557 (set (attr "prefix") |
7261 (if_then_else (eq_attr "alternative" "3") | 7558 (if_then_else (eq_attr "alternative" "3") |
7262 (const_string "orig") | 7559 (const_string "orig") |
7263 (const_string "vex"))) | 7560 (const_string "vex"))) |
7264 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")]) | 7561 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")]) |
7276 movq2dq\t{%1, %0|%0, %1} | 7573 movq2dq\t{%1, %0|%0, %1} |
7277 punpcklqdq\t{%2, %0|%0, %2} | 7574 punpcklqdq\t{%2, %0|%0, %2} |
7278 movlhps\t{%2, %0|%0, %2} | 7575 movlhps\t{%2, %0|%0, %2} |
7279 movhps\t{%2, %0|%0, %2}" | 7576 movhps\t{%2, %0|%0, %2}" |
7280 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov") | 7577 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov") |
7578 (set_attr "prefix_rex" "1,*,1,*,*,*,*") | |
7281 (set_attr "prefix_extra" "1,*,*,*,*,*,*") | 7579 (set_attr "prefix_extra" "1,*,*,*,*,*,*") |
7580 (set_attr "length_immediate" "1,*,*,*,*,*,*") | |
7282 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")]) | 7581 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")]) |
7283 | 7582 |
7284 (define_insn "*vec_concatv2di_rex64_sse" | 7583 (define_insn "*vec_concatv2di_rex64_sse" |
7285 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x") | 7584 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x") |
7286 (vec_concat:V2DI | 7585 (vec_concat:V2DI |
7293 movq2dq\t{%1, %0|%0, %1} | 7592 movq2dq\t{%1, %0|%0, %1} |
7294 punpcklqdq\t{%2, %0|%0, %2} | 7593 punpcklqdq\t{%2, %0|%0, %2} |
7295 movlhps\t{%2, %0|%0, %2} | 7594 movlhps\t{%2, %0|%0, %2} |
7296 movhps\t{%2, %0|%0, %2}" | 7595 movhps\t{%2, %0|%0, %2}" |
7297 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") | 7596 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") |
7597 (set_attr "prefix_rex" "*,1,*,*,*,*") | |
7298 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")]) | 7598 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")]) |
7299 | 7599 |
7300 (define_expand "vec_unpacku_hi_v16qi" | 7600 (define_expand "vec_unpacku_hi_v16qi" |
7301 [(match_operand:V8HI 0 "register_operand" "") | 7601 [(match_operand:V8HI 0 "register_operand" "") |
7302 (match_operand:V16QI 1 "register_operand" "")] | 7602 (match_operand:V16QI 1 "register_operand" "")] |
7303 "TARGET_SSE2" | 7603 "TARGET_SSE2" |
7304 { | 7604 { |
7305 if (TARGET_SSE4_1) | 7605 if (TARGET_SSE4_1) |
7306 ix86_expand_sse4_unpack (operands, true, true); | 7606 ix86_expand_sse4_unpack (operands, true, true); |
7307 else if (TARGET_SSE5) | |
7308 ix86_expand_sse5_unpack (operands, true, true); | |
7309 else | 7607 else |
7310 ix86_expand_sse_unpack (operands, true, true); | 7608 ix86_expand_sse_unpack (operands, true, true); |
7311 DONE; | 7609 DONE; |
7312 }) | 7610 }) |
7313 | 7611 |
7316 (match_operand:V16QI 1 "register_operand" "")] | 7614 (match_operand:V16QI 1 "register_operand" "")] |
7317 "TARGET_SSE2" | 7615 "TARGET_SSE2" |
7318 { | 7616 { |
7319 if (TARGET_SSE4_1) | 7617 if (TARGET_SSE4_1) |
7320 ix86_expand_sse4_unpack (operands, false, true); | 7618 ix86_expand_sse4_unpack (operands, false, true); |
7321 else if (TARGET_SSE5) | |
7322 ix86_expand_sse5_unpack (operands, false, true); | |
7323 else | 7619 else |
7324 ix86_expand_sse_unpack (operands, false, true); | 7620 ix86_expand_sse_unpack (operands, false, true); |
7325 DONE; | 7621 DONE; |
7326 }) | 7622 }) |
7327 | 7623 |
7330 (match_operand:V16QI 1 "register_operand" "")] | 7626 (match_operand:V16QI 1 "register_operand" "")] |
7331 "TARGET_SSE2" | 7627 "TARGET_SSE2" |
7332 { | 7628 { |
7333 if (TARGET_SSE4_1) | 7629 if (TARGET_SSE4_1) |
7334 ix86_expand_sse4_unpack (operands, true, false); | 7630 ix86_expand_sse4_unpack (operands, true, false); |
7335 else if (TARGET_SSE5) | |
7336 ix86_expand_sse5_unpack (operands, true, false); | |
7337 else | 7631 else |
7338 ix86_expand_sse_unpack (operands, true, false); | 7632 ix86_expand_sse_unpack (operands, true, false); |
7339 DONE; | 7633 DONE; |
7340 }) | 7634 }) |
7341 | 7635 |
7344 (match_operand:V16QI 1 "register_operand" "")] | 7638 (match_operand:V16QI 1 "register_operand" "")] |
7345 "TARGET_SSE2" | 7639 "TARGET_SSE2" |
7346 { | 7640 { |
7347 if (TARGET_SSE4_1) | 7641 if (TARGET_SSE4_1) |
7348 ix86_expand_sse4_unpack (operands, false, false); | 7642 ix86_expand_sse4_unpack (operands, false, false); |
7349 else if (TARGET_SSE5) | |
7350 ix86_expand_sse5_unpack (operands, false, false); | |
7351 else | 7643 else |
7352 ix86_expand_sse_unpack (operands, false, false); | 7644 ix86_expand_sse_unpack (operands, false, false); |
7353 DONE; | 7645 DONE; |
7354 }) | 7646 }) |
7355 | 7647 |
7358 (match_operand:V8HI 1 "register_operand" "")] | 7650 (match_operand:V8HI 1 "register_operand" "")] |
7359 "TARGET_SSE2" | 7651 "TARGET_SSE2" |
7360 { | 7652 { |
7361 if (TARGET_SSE4_1) | 7653 if (TARGET_SSE4_1) |
7362 ix86_expand_sse4_unpack (operands, true, true); | 7654 ix86_expand_sse4_unpack (operands, true, true); |
7363 else if (TARGET_SSE5) | |
7364 ix86_expand_sse5_unpack (operands, true, true); | |
7365 else | 7655 else |
7366 ix86_expand_sse_unpack (operands, true, true); | 7656 ix86_expand_sse_unpack (operands, true, true); |
7367 DONE; | 7657 DONE; |
7368 }) | 7658 }) |
7369 | 7659 |
7372 (match_operand:V8HI 1 "register_operand" "")] | 7662 (match_operand:V8HI 1 "register_operand" "")] |
7373 "TARGET_SSE2" | 7663 "TARGET_SSE2" |
7374 { | 7664 { |
7375 if (TARGET_SSE4_1) | 7665 if (TARGET_SSE4_1) |
7376 ix86_expand_sse4_unpack (operands, false, true); | 7666 ix86_expand_sse4_unpack (operands, false, true); |
7377 else if (TARGET_SSE5) | |
7378 ix86_expand_sse5_unpack (operands, false, true); | |
7379 else | 7667 else |
7380 ix86_expand_sse_unpack (operands, false, true); | 7668 ix86_expand_sse_unpack (operands, false, true); |
7381 DONE; | 7669 DONE; |
7382 }) | 7670 }) |
7383 | 7671 |
7386 (match_operand:V8HI 1 "register_operand" "")] | 7674 (match_operand:V8HI 1 "register_operand" "")] |
7387 "TARGET_SSE2" | 7675 "TARGET_SSE2" |
7388 { | 7676 { |
7389 if (TARGET_SSE4_1) | 7677 if (TARGET_SSE4_1) |
7390 ix86_expand_sse4_unpack (operands, true, false); | 7678 ix86_expand_sse4_unpack (operands, true, false); |
7391 else if (TARGET_SSE5) | |
7392 ix86_expand_sse5_unpack (operands, true, false); | |
7393 else | 7679 else |
7394 ix86_expand_sse_unpack (operands, true, false); | 7680 ix86_expand_sse_unpack (operands, true, false); |
7395 DONE; | 7681 DONE; |
7396 }) | 7682 }) |
7397 | 7683 |
7400 (match_operand:V8HI 1 "register_operand" "")] | 7686 (match_operand:V8HI 1 "register_operand" "")] |
7401 "TARGET_SSE2" | 7687 "TARGET_SSE2" |
7402 { | 7688 { |
7403 if (TARGET_SSE4_1) | 7689 if (TARGET_SSE4_1) |
7404 ix86_expand_sse4_unpack (operands, false, false); | 7690 ix86_expand_sse4_unpack (operands, false, false); |
7405 else if (TARGET_SSE5) | |
7406 ix86_expand_sse5_unpack (operands, false, false); | |
7407 else | 7691 else |
7408 ix86_expand_sse_unpack (operands, false, false); | 7692 ix86_expand_sse_unpack (operands, false, false); |
7409 DONE; | 7693 DONE; |
7410 }) | 7694 }) |
7411 | 7695 |
7414 (match_operand:V4SI 1 "register_operand" "")] | 7698 (match_operand:V4SI 1 "register_operand" "")] |
7415 "TARGET_SSE2" | 7699 "TARGET_SSE2" |
7416 { | 7700 { |
7417 if (TARGET_SSE4_1) | 7701 if (TARGET_SSE4_1) |
7418 ix86_expand_sse4_unpack (operands, true, true); | 7702 ix86_expand_sse4_unpack (operands, true, true); |
7419 else if (TARGET_SSE5) | |
7420 ix86_expand_sse5_unpack (operands, true, true); | |
7421 else | 7703 else |
7422 ix86_expand_sse_unpack (operands, true, true); | 7704 ix86_expand_sse_unpack (operands, true, true); |
7423 DONE; | 7705 DONE; |
7424 }) | 7706 }) |
7425 | 7707 |
7428 (match_operand:V4SI 1 "register_operand" "")] | 7710 (match_operand:V4SI 1 "register_operand" "")] |
7429 "TARGET_SSE2" | 7711 "TARGET_SSE2" |
7430 { | 7712 { |
7431 if (TARGET_SSE4_1) | 7713 if (TARGET_SSE4_1) |
7432 ix86_expand_sse4_unpack (operands, false, true); | 7714 ix86_expand_sse4_unpack (operands, false, true); |
7433 else if (TARGET_SSE5) | |
7434 ix86_expand_sse5_unpack (operands, false, true); | |
7435 else | 7715 else |
7436 ix86_expand_sse_unpack (operands, false, true); | 7716 ix86_expand_sse_unpack (operands, false, true); |
7437 DONE; | 7717 DONE; |
7438 }) | 7718 }) |
7439 | 7719 |
7442 (match_operand:V4SI 1 "register_operand" "")] | 7722 (match_operand:V4SI 1 "register_operand" "")] |
7443 "TARGET_SSE2" | 7723 "TARGET_SSE2" |
7444 { | 7724 { |
7445 if (TARGET_SSE4_1) | 7725 if (TARGET_SSE4_1) |
7446 ix86_expand_sse4_unpack (operands, true, false); | 7726 ix86_expand_sse4_unpack (operands, true, false); |
7447 else if (TARGET_SSE5) | |
7448 ix86_expand_sse5_unpack (operands, true, false); | |
7449 else | 7727 else |
7450 ix86_expand_sse_unpack (operands, true, false); | 7728 ix86_expand_sse_unpack (operands, true, false); |
7451 DONE; | 7729 DONE; |
7452 }) | 7730 }) |
7453 | 7731 |
7456 (match_operand:V4SI 1 "register_operand" "")] | 7734 (match_operand:V4SI 1 "register_operand" "")] |
7457 "TARGET_SSE2" | 7735 "TARGET_SSE2" |
7458 { | 7736 { |
7459 if (TARGET_SSE4_1) | 7737 if (TARGET_SSE4_1) |
7460 ix86_expand_sse4_unpack (operands, false, false); | 7738 ix86_expand_sse4_unpack (operands, false, false); |
7461 else if (TARGET_SSE5) | |
7462 ix86_expand_sse5_unpack (operands, false, false); | |
7463 else | 7739 else |
7464 ix86_expand_sse_unpack (operands, false, false); | 7740 ix86_expand_sse_unpack (operands, false, false); |
7465 DONE; | 7741 DONE; |
7466 }) | 7742 }) |
7467 | 7743 |
7622 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] | 7898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] |
7623 UNSPEC_PSADBW))] | 7899 UNSPEC_PSADBW))] |
7624 "TARGET_SSE2" | 7900 "TARGET_SSE2" |
7625 "psadbw\t{%2, %0|%0, %2}" | 7901 "psadbw\t{%2, %0|%0, %2}" |
7626 [(set_attr "type" "sseiadd") | 7902 [(set_attr "type" "sseiadd") |
7903 (set_attr "atom_unit" "simul") | |
7627 (set_attr "prefix_data16" "1") | 7904 (set_attr "prefix_data16" "1") |
7628 (set_attr "mode" "TI")]) | 7905 (set_attr "mode" "TI")]) |
7629 | 7906 |
7630 (define_insn "avx_movmskp<avxmodesuffixf2c>256" | 7907 (define_insn "avx_movmskp<avxmodesuffixf2c>256" |
7631 [(set (match_operand:SI 0 "register_operand" "=r") | 7908 [(set (match_operand:SI 0 "register_operand" "=r") |
7643 (unspec:SI | 7920 (unspec:SI |
7644 [(match_operand:SSEMODEF2P 1 "register_operand" "x")] | 7921 [(match_operand:SSEMODEF2P 1 "register_operand" "x")] |
7645 UNSPEC_MOVMSK))] | 7922 UNSPEC_MOVMSK))] |
7646 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" | 7923 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" |
7647 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}" | 7924 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}" |
7648 [(set_attr "type" "ssecvt") | 7925 [(set_attr "type" "ssemov") |
7649 (set_attr "prefix" "maybe_vex") | 7926 (set_attr "prefix" "maybe_vex") |
7650 (set_attr "mode" "<MODE>")]) | 7927 (set_attr "mode" "<MODE>")]) |
7651 | 7928 |
7652 (define_insn "sse2_pmovmskb" | 7929 (define_insn "sse2_pmovmskb" |
7653 [(set (match_operand:SI 0 "register_operand" "=r") | 7930 [(set (match_operand:SI 0 "register_operand" "=r") |
7654 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] | 7931 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] |
7655 UNSPEC_MOVMSK))] | 7932 UNSPEC_MOVMSK))] |
7656 "TARGET_SSE2" | 7933 "TARGET_SSE2" |
7657 "%vpmovmskb\t{%1, %0|%0, %1}" | 7934 "%vpmovmskb\t{%1, %0|%0, %1}" |
7658 [(set_attr "type" "ssecvt") | 7935 [(set_attr "type" "ssemov") |
7659 (set_attr "prefix_data16" "1") | 7936 (set_attr "prefix_data16" "1") |
7660 (set_attr "prefix" "maybe_vex") | 7937 (set_attr "prefix" "maybe_vex") |
7661 (set_attr "mode" "SI")]) | 7938 (set_attr "mode" "SI")]) |
7662 | 7939 |
7663 (define_expand "sse2_maskmovdqu" | 7940 (define_expand "sse2_maskmovdqu" |
7676 (mem:V16QI (match_dup 0))] | 7953 (mem:V16QI (match_dup 0))] |
7677 UNSPEC_MASKMOV))] | 7954 UNSPEC_MASKMOV))] |
7678 "TARGET_SSE2 && !TARGET_64BIT" | 7955 "TARGET_SSE2 && !TARGET_64BIT" |
7679 ;; @@@ check ordering of operands in intel/nonintel syntax | 7956 ;; @@@ check ordering of operands in intel/nonintel syntax |
7680 "%vmaskmovdqu\t{%2, %1|%1, %2}" | 7957 "%vmaskmovdqu\t{%2, %1|%1, %2}" |
7681 [(set_attr "type" "ssecvt") | 7958 [(set_attr "type" "ssemov") |
7682 (set_attr "prefix_data16" "1") | 7959 (set_attr "prefix_data16" "1") |
7960 ;; The implicit %rdi operand confuses default length_vex computation. | |
7961 (set_attr "length_vex" "3") | |
7683 (set_attr "prefix" "maybe_vex") | 7962 (set_attr "prefix" "maybe_vex") |
7684 (set_attr "mode" "TI")]) | 7963 (set_attr "mode" "TI")]) |
7685 | 7964 |
7686 (define_insn "*sse2_maskmovdqu_rex64" | 7965 (define_insn "*sse2_maskmovdqu_rex64" |
7687 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) | 7966 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) |
7690 (mem:V16QI (match_dup 0))] | 7969 (mem:V16QI (match_dup 0))] |
7691 UNSPEC_MASKMOV))] | 7970 UNSPEC_MASKMOV))] |
7692 "TARGET_SSE2 && TARGET_64BIT" | 7971 "TARGET_SSE2 && TARGET_64BIT" |
7693 ;; @@@ check ordering of operands in intel/nonintel syntax | 7972 ;; @@@ check ordering of operands in intel/nonintel syntax |
7694 "%vmaskmovdqu\t{%2, %1|%1, %2}" | 7973 "%vmaskmovdqu\t{%2, %1|%1, %2}" |
7695 [(set_attr "type" "ssecvt") | 7974 [(set_attr "type" "ssemov") |
7696 (set_attr "prefix_data16" "1") | 7975 (set_attr "prefix_data16" "1") |
7976 ;; The implicit %rdi operand confuses default length_vex computation. | |
7977 (set (attr "length_vex") | |
7978 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1"))) | |
7697 (set_attr "prefix" "maybe_vex") | 7979 (set_attr "prefix" "maybe_vex") |
7698 (set_attr "mode" "TI")]) | 7980 (set_attr "mode" "TI")]) |
7699 | 7981 |
7700 (define_insn "sse_ldmxcsr" | 7982 (define_insn "sse_ldmxcsr" |
7701 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] | 7983 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] |
7702 UNSPECV_LDMXCSR)] | 7984 UNSPECV_LDMXCSR)] |
7703 "TARGET_SSE" | 7985 "TARGET_SSE" |
7704 "%vldmxcsr\t%0" | 7986 "%vldmxcsr\t%0" |
7705 [(set_attr "type" "sse") | 7987 [(set_attr "type" "sse") |
7988 (set_attr "atom_sse_attr" "mxcsr") | |
7706 (set_attr "prefix" "maybe_vex") | 7989 (set_attr "prefix" "maybe_vex") |
7707 (set_attr "memory" "load")]) | 7990 (set_attr "memory" "load")]) |
7708 | 7991 |
7709 (define_insn "sse_stmxcsr" | 7992 (define_insn "sse_stmxcsr" |
7710 [(set (match_operand:SI 0 "memory_operand" "=m") | 7993 [(set (match_operand:SI 0 "memory_operand" "=m") |
7711 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] | 7994 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] |
7712 "TARGET_SSE" | 7995 "TARGET_SSE" |
7713 "%vstmxcsr\t%0" | 7996 "%vstmxcsr\t%0" |
7714 [(set_attr "type" "sse") | 7997 [(set_attr "type" "sse") |
7998 (set_attr "atom_sse_attr" "mxcsr") | |
7715 (set_attr "prefix" "maybe_vex") | 7999 (set_attr "prefix" "maybe_vex") |
7716 (set_attr "memory" "store")]) | 8000 (set_attr "memory" "store")]) |
7717 | 8001 |
7718 (define_expand "sse_sfence" | 8002 (define_expand "sse_sfence" |
7719 [(set (match_dup 0) | 8003 [(set (match_dup 0) |
7728 [(set (match_operand:BLK 0 "" "") | 8012 [(set (match_operand:BLK 0 "" "") |
7729 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] | 8013 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] |
7730 "TARGET_SSE || TARGET_3DNOW_A" | 8014 "TARGET_SSE || TARGET_3DNOW_A" |
7731 "sfence" | 8015 "sfence" |
7732 [(set_attr "type" "sse") | 8016 [(set_attr "type" "sse") |
8017 (set_attr "length_address" "0") | |
8018 (set_attr "atom_sse_attr" "fence") | |
7733 (set_attr "memory" "unknown")]) | 8019 (set_attr "memory" "unknown")]) |
7734 | 8020 |
7735 (define_insn "sse2_clflush" | 8021 (define_insn "sse2_clflush" |
7736 [(unspec_volatile [(match_operand 0 "address_operand" "p")] | 8022 [(unspec_volatile [(match_operand 0 "address_operand" "p")] |
7737 UNSPECV_CLFLUSH)] | 8023 UNSPECV_CLFLUSH)] |
7738 "TARGET_SSE2" | 8024 "TARGET_SSE2" |
7739 "clflush\t%a0" | 8025 "clflush\t%a0" |
7740 [(set_attr "type" "sse") | 8026 [(set_attr "type" "sse") |
8027 (set_attr "atom_sse_attr" "fence") | |
7741 (set_attr "memory" "unknown")]) | 8028 (set_attr "memory" "unknown")]) |
7742 | 8029 |
7743 (define_expand "sse2_mfence" | 8030 (define_expand "sse2_mfence" |
7744 [(set (match_dup 0) | 8031 [(set (match_dup 0) |
7745 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] | 8032 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] |
7753 [(set (match_operand:BLK 0 "" "") | 8040 [(set (match_operand:BLK 0 "" "") |
7754 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] | 8041 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] |
7755 "TARGET_64BIT || TARGET_SSE2" | 8042 "TARGET_64BIT || TARGET_SSE2" |
7756 "mfence" | 8043 "mfence" |
7757 [(set_attr "type" "sse") | 8044 [(set_attr "type" "sse") |
8045 (set_attr "length_address" "0") | |
8046 (set_attr "atom_sse_attr" "fence") | |
7758 (set_attr "memory" "unknown")]) | 8047 (set_attr "memory" "unknown")]) |
7759 | 8048 |
7760 (define_expand "sse2_lfence" | 8049 (define_expand "sse2_lfence" |
7761 [(set (match_dup 0) | 8050 [(set (match_dup 0) |
7762 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] | 8051 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] |
7770 [(set (match_operand:BLK 0 "" "") | 8059 [(set (match_operand:BLK 0 "" "") |
7771 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] | 8060 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] |
7772 "TARGET_SSE2" | 8061 "TARGET_SSE2" |
7773 "lfence" | 8062 "lfence" |
7774 [(set_attr "type" "sse") | 8063 [(set_attr "type" "sse") |
8064 (set_attr "length_address" "0") | |
8065 (set_attr "atom_sse_attr" "lfence") | |
7775 (set_attr "memory" "unknown")]) | 8066 (set_attr "memory" "unknown")]) |
7776 | 8067 |
7777 (define_insn "sse3_mwait" | 8068 (define_insn "sse3_mwait" |
7778 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") | 8069 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") |
7779 (match_operand:SI 1 "register_operand" "c")] | 8070 (match_operand:SI 1 "register_operand" "c")] |
7850 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8141 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
7851 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8142 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
7852 "TARGET_AVX" | 8143 "TARGET_AVX" |
7853 "vphaddw\t{%2, %1, %0|%0, %1, %2}" | 8144 "vphaddw\t{%2, %1, %0|%0, %1, %2}" |
7854 [(set_attr "type" "sseiadd") | 8145 [(set_attr "type" "sseiadd") |
8146 (set_attr "prefix_extra" "1") | |
7855 (set_attr "prefix" "vex") | 8147 (set_attr "prefix" "vex") |
7856 (set_attr "mode" "TI")]) | 8148 (set_attr "mode" "TI")]) |
7857 | 8149 |
7858 (define_insn "ssse3_phaddwv8hi3" | 8150 (define_insn "ssse3_phaddwv8hi3" |
7859 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8151 [(set (match_operand:V8HI 0 "register_operand" "=x") |
7893 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8185 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
7894 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8186 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
7895 "TARGET_SSSE3" | 8187 "TARGET_SSSE3" |
7896 "phaddw\t{%2, %0|%0, %2}" | 8188 "phaddw\t{%2, %0|%0, %2}" |
7897 [(set_attr "type" "sseiadd") | 8189 [(set_attr "type" "sseiadd") |
8190 (set_attr "atom_unit" "complex") | |
7898 (set_attr "prefix_data16" "1") | 8191 (set_attr "prefix_data16" "1") |
7899 (set_attr "prefix_extra" "1") | 8192 (set_attr "prefix_extra" "1") |
7900 (set_attr "mode" "TI")]) | 8193 (set_attr "mode" "TI")]) |
7901 | 8194 |
7902 (define_insn "ssse3_phaddwv4hi3" | 8195 (define_insn "ssse3_phaddwv4hi3" |
7921 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) | 8214 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
7922 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] | 8215 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] |
7923 "TARGET_SSSE3" | 8216 "TARGET_SSSE3" |
7924 "phaddw\t{%2, %0|%0, %2}" | 8217 "phaddw\t{%2, %0|%0, %2}" |
7925 [(set_attr "type" "sseiadd") | 8218 [(set_attr "type" "sseiadd") |
7926 (set_attr "prefix_extra" "1") | 8219 (set_attr "atom_unit" "complex") |
8220 (set_attr "prefix_extra" "1") | |
8221 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
7927 (set_attr "mode" "DI")]) | 8222 (set_attr "mode" "DI")]) |
7928 | 8223 |
7929 (define_insn "*avx_phadddv4si3" | 8224 (define_insn "*avx_phadddv4si3" |
7930 [(set (match_operand:V4SI 0 "register_operand" "=x") | 8225 [(set (match_operand:V4SI 0 "register_operand" "=x") |
7931 (vec_concat:V4SI | 8226 (vec_concat:V4SI |
7948 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) | 8243 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
7949 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] | 8244 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] |
7950 "TARGET_AVX" | 8245 "TARGET_AVX" |
7951 "vphaddd\t{%2, %1, %0|%0, %1, %2}" | 8246 "vphaddd\t{%2, %1, %0|%0, %1, %2}" |
7952 [(set_attr "type" "sseiadd") | 8247 [(set_attr "type" "sseiadd") |
8248 (set_attr "prefix_extra" "1") | |
7953 (set_attr "prefix" "vex") | 8249 (set_attr "prefix" "vex") |
7954 (set_attr "mode" "TI")]) | 8250 (set_attr "mode" "TI")]) |
7955 | 8251 |
7956 (define_insn "ssse3_phadddv4si3" | 8252 (define_insn "ssse3_phadddv4si3" |
7957 [(set (match_operand:V4SI 0 "register_operand" "=x") | 8253 [(set (match_operand:V4SI 0 "register_operand" "=x") |
7975 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) | 8271 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
7976 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] | 8272 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] |
7977 "TARGET_SSSE3" | 8273 "TARGET_SSSE3" |
7978 "phaddd\t{%2, %0|%0, %2}" | 8274 "phaddd\t{%2, %0|%0, %2}" |
7979 [(set_attr "type" "sseiadd") | 8275 [(set_attr "type" "sseiadd") |
8276 (set_attr "atom_unit" "complex") | |
7980 (set_attr "prefix_data16" "1") | 8277 (set_attr "prefix_data16" "1") |
7981 (set_attr "prefix_extra" "1") | 8278 (set_attr "prefix_extra" "1") |
7982 (set_attr "mode" "TI")]) | 8279 (set_attr "mode" "TI")]) |
7983 | 8280 |
7984 (define_insn "ssse3_phadddv2si3" | 8281 (define_insn "ssse3_phadddv2si3" |
7995 (parallel [(const_int 0)])) | 8292 (parallel [(const_int 0)])) |
7996 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] | 8293 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] |
7997 "TARGET_SSSE3" | 8294 "TARGET_SSSE3" |
7998 "phaddd\t{%2, %0|%0, %2}" | 8295 "phaddd\t{%2, %0|%0, %2}" |
7999 [(set_attr "type" "sseiadd") | 8296 [(set_attr "type" "sseiadd") |
8000 (set_attr "prefix_extra" "1") | 8297 (set_attr "atom_unit" "complex") |
8298 (set_attr "prefix_extra" "1") | |
8299 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8001 (set_attr "mode" "DI")]) | 8300 (set_attr "mode" "DI")]) |
8002 | 8301 |
8003 (define_insn "*avx_phaddswv8hi3" | 8302 (define_insn "*avx_phaddswv8hi3" |
8004 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8303 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8005 (vec_concat:V8HI | 8304 (vec_concat:V8HI |
8038 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8337 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
8039 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8338 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
8040 "TARGET_AVX" | 8339 "TARGET_AVX" |
8041 "vphaddsw\t{%2, %1, %0|%0, %1, %2}" | 8340 "vphaddsw\t{%2, %1, %0|%0, %1, %2}" |
8042 [(set_attr "type" "sseiadd") | 8341 [(set_attr "type" "sseiadd") |
8342 (set_attr "prefix_extra" "1") | |
8043 (set_attr "prefix" "vex") | 8343 (set_attr "prefix" "vex") |
8044 (set_attr "mode" "TI")]) | 8344 (set_attr "mode" "TI")]) |
8045 | 8345 |
8046 (define_insn "ssse3_phaddswv8hi3" | 8346 (define_insn "ssse3_phaddswv8hi3" |
8047 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8347 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8081 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8381 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
8082 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8382 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
8083 "TARGET_SSSE3" | 8383 "TARGET_SSSE3" |
8084 "phaddsw\t{%2, %0|%0, %2}" | 8384 "phaddsw\t{%2, %0|%0, %2}" |
8085 [(set_attr "type" "sseiadd") | 8385 [(set_attr "type" "sseiadd") |
8386 (set_attr "atom_unit" "complex") | |
8086 (set_attr "prefix_data16" "1") | 8387 (set_attr "prefix_data16" "1") |
8087 (set_attr "prefix_extra" "1") | 8388 (set_attr "prefix_extra" "1") |
8088 (set_attr "mode" "TI")]) | 8389 (set_attr "mode" "TI")]) |
8089 | 8390 |
8090 (define_insn "ssse3_phaddswv4hi3" | 8391 (define_insn "ssse3_phaddswv4hi3" |
8109 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) | 8410 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
8110 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] | 8411 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] |
8111 "TARGET_SSSE3" | 8412 "TARGET_SSSE3" |
8112 "phaddsw\t{%2, %0|%0, %2}" | 8413 "phaddsw\t{%2, %0|%0, %2}" |
8113 [(set_attr "type" "sseiadd") | 8414 [(set_attr "type" "sseiadd") |
8114 (set_attr "prefix_extra" "1") | 8415 (set_attr "atom_unit" "complex") |
8416 (set_attr "prefix_extra" "1") | |
8417 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8115 (set_attr "mode" "DI")]) | 8418 (set_attr "mode" "DI")]) |
8116 | 8419 |
8117 (define_insn "*avx_phsubwv8hi3" | 8420 (define_insn "*avx_phsubwv8hi3" |
8118 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8421 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8119 (vec_concat:V8HI | 8422 (vec_concat:V8HI |
8152 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8455 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
8153 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8456 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
8154 "TARGET_AVX" | 8457 "TARGET_AVX" |
8155 "vphsubw\t{%2, %1, %0|%0, %1, %2}" | 8458 "vphsubw\t{%2, %1, %0|%0, %1, %2}" |
8156 [(set_attr "type" "sseiadd") | 8459 [(set_attr "type" "sseiadd") |
8460 (set_attr "prefix_extra" "1") | |
8157 (set_attr "prefix" "vex") | 8461 (set_attr "prefix" "vex") |
8158 (set_attr "mode" "TI")]) | 8462 (set_attr "mode" "TI")]) |
8159 | 8463 |
8160 (define_insn "ssse3_phsubwv8hi3" | 8464 (define_insn "ssse3_phsubwv8hi3" |
8161 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8465 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8195 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8499 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
8196 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8500 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
8197 "TARGET_SSSE3" | 8501 "TARGET_SSSE3" |
8198 "phsubw\t{%2, %0|%0, %2}" | 8502 "phsubw\t{%2, %0|%0, %2}" |
8199 [(set_attr "type" "sseiadd") | 8503 [(set_attr "type" "sseiadd") |
8504 (set_attr "atom_unit" "complex") | |
8200 (set_attr "prefix_data16" "1") | 8505 (set_attr "prefix_data16" "1") |
8201 (set_attr "prefix_extra" "1") | 8506 (set_attr "prefix_extra" "1") |
8202 (set_attr "mode" "TI")]) | 8507 (set_attr "mode" "TI")]) |
8203 | 8508 |
8204 (define_insn "ssse3_phsubwv4hi3" | 8509 (define_insn "ssse3_phsubwv4hi3" |
8223 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) | 8528 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
8224 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] | 8529 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] |
8225 "TARGET_SSSE3" | 8530 "TARGET_SSSE3" |
8226 "phsubw\t{%2, %0|%0, %2}" | 8531 "phsubw\t{%2, %0|%0, %2}" |
8227 [(set_attr "type" "sseiadd") | 8532 [(set_attr "type" "sseiadd") |
8228 (set_attr "prefix_extra" "1") | 8533 (set_attr "atom_unit" "complex") |
8534 (set_attr "prefix_extra" "1") | |
8535 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8229 (set_attr "mode" "DI")]) | 8536 (set_attr "mode" "DI")]) |
8230 | 8537 |
8231 (define_insn "*avx_phsubdv4si3" | 8538 (define_insn "*avx_phsubdv4si3" |
8232 [(set (match_operand:V4SI 0 "register_operand" "=x") | 8539 [(set (match_operand:V4SI 0 "register_operand" "=x") |
8233 (vec_concat:V4SI | 8540 (vec_concat:V4SI |
8250 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) | 8557 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
8251 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] | 8558 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] |
8252 "TARGET_AVX" | 8559 "TARGET_AVX" |
8253 "vphsubd\t{%2, %1, %0|%0, %1, %2}" | 8560 "vphsubd\t{%2, %1, %0|%0, %1, %2}" |
8254 [(set_attr "type" "sseiadd") | 8561 [(set_attr "type" "sseiadd") |
8562 (set_attr "prefix_extra" "1") | |
8255 (set_attr "prefix" "vex") | 8563 (set_attr "prefix" "vex") |
8256 (set_attr "mode" "TI")]) | 8564 (set_attr "mode" "TI")]) |
8257 | 8565 |
8258 (define_insn "ssse3_phsubdv4si3" | 8566 (define_insn "ssse3_phsubdv4si3" |
8259 [(set (match_operand:V4SI 0 "register_operand" "=x") | 8567 [(set (match_operand:V4SI 0 "register_operand" "=x") |
8277 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) | 8585 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) |
8278 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] | 8586 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] |
8279 "TARGET_SSSE3" | 8587 "TARGET_SSSE3" |
8280 "phsubd\t{%2, %0|%0, %2}" | 8588 "phsubd\t{%2, %0|%0, %2}" |
8281 [(set_attr "type" "sseiadd") | 8589 [(set_attr "type" "sseiadd") |
8590 (set_attr "atom_unit" "complex") | |
8282 (set_attr "prefix_data16" "1") | 8591 (set_attr "prefix_data16" "1") |
8283 (set_attr "prefix_extra" "1") | 8592 (set_attr "prefix_extra" "1") |
8284 (set_attr "mode" "TI")]) | 8593 (set_attr "mode" "TI")]) |
8285 | 8594 |
8286 (define_insn "ssse3_phsubdv2si3" | 8595 (define_insn "ssse3_phsubdv2si3" |
8297 (parallel [(const_int 0)])) | 8606 (parallel [(const_int 0)])) |
8298 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] | 8607 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] |
8299 "TARGET_SSSE3" | 8608 "TARGET_SSSE3" |
8300 "phsubd\t{%2, %0|%0, %2}" | 8609 "phsubd\t{%2, %0|%0, %2}" |
8301 [(set_attr "type" "sseiadd") | 8610 [(set_attr "type" "sseiadd") |
8302 (set_attr "prefix_extra" "1") | 8611 (set_attr "atom_unit" "complex") |
8612 (set_attr "prefix_extra" "1") | |
8613 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8303 (set_attr "mode" "DI")]) | 8614 (set_attr "mode" "DI")]) |
8304 | 8615 |
8305 (define_insn "*avx_phsubswv8hi3" | 8616 (define_insn "*avx_phsubswv8hi3" |
8306 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8617 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8307 (vec_concat:V8HI | 8618 (vec_concat:V8HI |
8340 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8651 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
8341 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8652 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
8342 "TARGET_AVX" | 8653 "TARGET_AVX" |
8343 "vphsubsw\t{%2, %1, %0|%0, %1, %2}" | 8654 "vphsubsw\t{%2, %1, %0|%0, %1, %2}" |
8344 [(set_attr "type" "sseiadd") | 8655 [(set_attr "type" "sseiadd") |
8656 (set_attr "prefix_extra" "1") | |
8345 (set_attr "prefix" "vex") | 8657 (set_attr "prefix" "vex") |
8346 (set_attr "mode" "TI")]) | 8658 (set_attr "mode" "TI")]) |
8347 | 8659 |
8348 (define_insn "ssse3_phsubswv8hi3" | 8660 (define_insn "ssse3_phsubswv8hi3" |
8349 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8661 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8383 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) | 8695 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) |
8384 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] | 8696 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] |
8385 "TARGET_SSSE3" | 8697 "TARGET_SSSE3" |
8386 "phsubsw\t{%2, %0|%0, %2}" | 8698 "phsubsw\t{%2, %0|%0, %2}" |
8387 [(set_attr "type" "sseiadd") | 8699 [(set_attr "type" "sseiadd") |
8700 (set_attr "atom_unit" "complex") | |
8388 (set_attr "prefix_data16" "1") | 8701 (set_attr "prefix_data16" "1") |
8389 (set_attr "prefix_extra" "1") | 8702 (set_attr "prefix_extra" "1") |
8390 (set_attr "mode" "TI")]) | 8703 (set_attr "mode" "TI")]) |
8391 | 8704 |
8392 (define_insn "ssse3_phsubswv4hi3" | 8705 (define_insn "ssse3_phsubswv4hi3" |
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) | 8724 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) |
8412 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] | 8725 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] |
8413 "TARGET_SSSE3" | 8726 "TARGET_SSSE3" |
8414 "phsubsw\t{%2, %0|%0, %2}" | 8727 "phsubsw\t{%2, %0|%0, %2}" |
8415 [(set_attr "type" "sseiadd") | 8728 [(set_attr "type" "sseiadd") |
8416 (set_attr "prefix_extra" "1") | 8729 (set_attr "atom_unit" "complex") |
8730 (set_attr "prefix_extra" "1") | |
8731 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8417 (set_attr "mode" "DI")]) | 8732 (set_attr "mode" "DI")]) |
8418 | 8733 |
8419 (define_insn "*avx_pmaddubsw128" | 8734 (define_insn "*avx_pmaddubsw128" |
8420 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8735 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8421 (ss_plus:V8HI | 8736 (ss_plus:V8HI |
8464 (const_int 13) | 8779 (const_int 13) |
8465 (const_int 15)]))))))] | 8780 (const_int 15)]))))))] |
8466 "TARGET_AVX" | 8781 "TARGET_AVX" |
8467 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" | 8782 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" |
8468 [(set_attr "type" "sseiadd") | 8783 [(set_attr "type" "sseiadd") |
8784 (set_attr "prefix_extra" "1") | |
8469 (set_attr "prefix" "vex") | 8785 (set_attr "prefix" "vex") |
8470 (set_attr "mode" "TI")]) | 8786 (set_attr "mode" "TI")]) |
8471 | 8787 |
8472 (define_insn "ssse3_pmaddubsw128" | 8788 (define_insn "ssse3_pmaddubsw128" |
8473 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8789 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8517 (const_int 13) | 8833 (const_int 13) |
8518 (const_int 15)]))))))] | 8834 (const_int 15)]))))))] |
8519 "TARGET_SSSE3" | 8835 "TARGET_SSSE3" |
8520 "pmaddubsw\t{%2, %0|%0, %2}" | 8836 "pmaddubsw\t{%2, %0|%0, %2}" |
8521 [(set_attr "type" "sseiadd") | 8837 [(set_attr "type" "sseiadd") |
8838 (set_attr "atom_unit" "simul") | |
8522 (set_attr "prefix_data16" "1") | 8839 (set_attr "prefix_data16" "1") |
8523 (set_attr "prefix_extra" "1") | 8840 (set_attr "prefix_extra" "1") |
8524 (set_attr "mode" "TI")]) | 8841 (set_attr "mode" "TI")]) |
8525 | 8842 |
8526 (define_insn "ssse3_pmaddubsw" | 8843 (define_insn "ssse3_pmaddubsw" |
8555 (const_int 5) | 8872 (const_int 5) |
8556 (const_int 7)]))))))] | 8873 (const_int 7)]))))))] |
8557 "TARGET_SSSE3" | 8874 "TARGET_SSSE3" |
8558 "pmaddubsw\t{%2, %0|%0, %2}" | 8875 "pmaddubsw\t{%2, %0|%0, %2}" |
8559 [(set_attr "type" "sseiadd") | 8876 [(set_attr "type" "sseiadd") |
8560 (set_attr "prefix_extra" "1") | 8877 (set_attr "atom_unit" "simul") |
8878 (set_attr "prefix_extra" "1") | |
8879 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8561 (set_attr "mode" "DI")]) | 8880 (set_attr "mode" "DI")]) |
8562 | 8881 |
8563 (define_expand "ssse3_pmulhrswv8hi3" | 8882 (define_expand "ssse3_pmulhrswv8hi3" |
8564 [(set (match_operand:V8HI 0 "register_operand" "") | 8883 [(set (match_operand:V8HI 0 "register_operand" "") |
8565 (truncate:V8HI | 8884 (truncate:V8HI |
8598 (const_int 1) (const_int 1)])) | 8917 (const_int 1) (const_int 1)])) |
8599 (const_int 1))))] | 8918 (const_int 1))))] |
8600 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)" | 8919 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)" |
8601 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" | 8920 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" |
8602 [(set_attr "type" "sseimul") | 8921 [(set_attr "type" "sseimul") |
8922 (set_attr "prefix_extra" "1") | |
8603 (set_attr "prefix" "vex") | 8923 (set_attr "prefix" "vex") |
8604 (set_attr "mode" "TI")]) | 8924 (set_attr "mode" "TI")]) |
8605 | 8925 |
8606 (define_insn "*ssse3_pmulhrswv8hi3" | 8926 (define_insn "*ssse3_pmulhrswv8hi3" |
8607 [(set (match_operand:V8HI 0 "register_operand" "=x") | 8927 [(set (match_operand:V8HI 0 "register_operand" "=x") |
8662 (const_int 1))))] | 8982 (const_int 1))))] |
8663 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" | 8983 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" |
8664 "pmulhrsw\t{%2, %0|%0, %2}" | 8984 "pmulhrsw\t{%2, %0|%0, %2}" |
8665 [(set_attr "type" "sseimul") | 8985 [(set_attr "type" "sseimul") |
8666 (set_attr "prefix_extra" "1") | 8986 (set_attr "prefix_extra" "1") |
8987 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8667 (set_attr "mode" "DI")]) | 8988 (set_attr "mode" "DI")]) |
8668 | 8989 |
8669 (define_insn "*avx_pshufbv16qi3" | 8990 (define_insn "*avx_pshufbv16qi3" |
8670 [(set (match_operand:V16QI 0 "register_operand" "=x") | 8991 [(set (match_operand:V16QI 0 "register_operand" "=x") |
8671 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") | 8992 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") |
8672 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] | 8993 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] |
8673 UNSPEC_PSHUFB))] | 8994 UNSPEC_PSHUFB))] |
8674 "TARGET_AVX" | 8995 "TARGET_AVX" |
8675 "vpshufb\t{%2, %1, %0|%0, %1, %2}"; | 8996 "vpshufb\t{%2, %1, %0|%0, %1, %2}"; |
8676 [(set_attr "type" "sselog1") | 8997 [(set_attr "type" "sselog1") |
8998 (set_attr "prefix_extra" "1") | |
8677 (set_attr "prefix" "vex") | 8999 (set_attr "prefix" "vex") |
8678 (set_attr "mode" "TI")]) | 9000 (set_attr "mode" "TI")]) |
8679 | 9001 |
8680 (define_insn "ssse3_pshufbv16qi3" | 9002 (define_insn "ssse3_pshufbv16qi3" |
8681 [(set (match_operand:V16QI 0 "register_operand" "=x") | 9003 [(set (match_operand:V16QI 0 "register_operand" "=x") |
8696 UNSPEC_PSHUFB))] | 9018 UNSPEC_PSHUFB))] |
8697 "TARGET_SSSE3" | 9019 "TARGET_SSSE3" |
8698 "pshufb\t{%2, %0|%0, %2}"; | 9020 "pshufb\t{%2, %0|%0, %2}"; |
8699 [(set_attr "type" "sselog1") | 9021 [(set_attr "type" "sselog1") |
8700 (set_attr "prefix_extra" "1") | 9022 (set_attr "prefix_extra" "1") |
9023 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8701 (set_attr "mode" "DI")]) | 9024 (set_attr "mode" "DI")]) |
8702 | 9025 |
8703 (define_insn "*avx_psign<mode>3" | 9026 (define_insn "*avx_psign<mode>3" |
8704 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") | 9027 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") |
8705 (unspec:SSEMODE124 | 9028 (unspec:SSEMODE124 |
8707 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] | 9030 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] |
8708 UNSPEC_PSIGN))] | 9031 UNSPEC_PSIGN))] |
8709 "TARGET_AVX" | 9032 "TARGET_AVX" |
8710 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"; | 9033 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"; |
8711 [(set_attr "type" "sselog1") | 9034 [(set_attr "type" "sselog1") |
9035 (set_attr "prefix_extra" "1") | |
8712 (set_attr "prefix" "vex") | 9036 (set_attr "prefix" "vex") |
8713 (set_attr "mode" "TI")]) | 9037 (set_attr "mode" "TI")]) |
8714 | 9038 |
8715 (define_insn "ssse3_psign<mode>3" | 9039 (define_insn "ssse3_psign<mode>3" |
8716 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") | 9040 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") |
8733 UNSPEC_PSIGN))] | 9057 UNSPEC_PSIGN))] |
8734 "TARGET_SSSE3" | 9058 "TARGET_SSSE3" |
8735 "psign<mmxvecsize>\t{%2, %0|%0, %2}"; | 9059 "psign<mmxvecsize>\t{%2, %0|%0, %2}"; |
8736 [(set_attr "type" "sselog1") | 9060 [(set_attr "type" "sselog1") |
8737 (set_attr "prefix_extra" "1") | 9061 (set_attr "prefix_extra" "1") |
9062 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8738 (set_attr "mode" "DI")]) | 9063 (set_attr "mode" "DI")]) |
8739 | 9064 |
8740 (define_insn "*avx_palignrti" | 9065 (define_insn "*avx_palignrti" |
8741 [(set (match_operand:TI 0 "register_operand" "=x") | 9066 [(set (match_operand:TI 0 "register_operand" "=x") |
8742 (unspec:TI [(match_operand:TI 1 "register_operand" "x") | 9067 (unspec:TI [(match_operand:TI 1 "register_operand" "x") |
8747 { | 9072 { |
8748 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | 9073 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); |
8749 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; | 9074 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
8750 } | 9075 } |
8751 [(set_attr "type" "sseishft") | 9076 [(set_attr "type" "sseishft") |
9077 (set_attr "prefix_extra" "1") | |
9078 (set_attr "length_immediate" "1") | |
8752 (set_attr "prefix" "vex") | 9079 (set_attr "prefix" "vex") |
8753 (set_attr "mode" "TI")]) | 9080 (set_attr "mode" "TI")]) |
8754 | 9081 |
8755 (define_insn "ssse3_palignrti" | 9082 (define_insn "ssse3_palignrti" |
8756 [(set (match_operand:TI 0 "register_operand" "=x") | 9083 [(set (match_operand:TI 0 "register_operand" "=x") |
8762 { | 9089 { |
8763 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | 9090 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); |
8764 return "palignr\t{%3, %2, %0|%0, %2, %3}"; | 9091 return "palignr\t{%3, %2, %0|%0, %2, %3}"; |
8765 } | 9092 } |
8766 [(set_attr "type" "sseishft") | 9093 [(set_attr "type" "sseishft") |
9094 (set_attr "atom_unit" "sishuf") | |
8767 (set_attr "prefix_data16" "1") | 9095 (set_attr "prefix_data16" "1") |
8768 (set_attr "prefix_extra" "1") | 9096 (set_attr "prefix_extra" "1") |
9097 (set_attr "length_immediate" "1") | |
8769 (set_attr "mode" "TI")]) | 9098 (set_attr "mode" "TI")]) |
8770 | 9099 |
8771 (define_insn "ssse3_palignrdi" | 9100 (define_insn "ssse3_palignrdi" |
8772 [(set (match_operand:DI 0 "register_operand" "=y") | 9101 [(set (match_operand:DI 0 "register_operand" "=y") |
8773 (unspec:DI [(match_operand:DI 1 "register_operand" "0") | 9102 (unspec:DI [(match_operand:DI 1 "register_operand" "0") |
8778 { | 9107 { |
8779 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); | 9108 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); |
8780 return "palignr\t{%3, %2, %0|%0, %2, %3}"; | 9109 return "palignr\t{%3, %2, %0|%0, %2, %3}"; |
8781 } | 9110 } |
8782 [(set_attr "type" "sseishft") | 9111 [(set_attr "type" "sseishft") |
8783 (set_attr "prefix_extra" "1") | 9112 (set_attr "atom_unit" "sishuf") |
9113 (set_attr "prefix_extra" "1") | |
9114 (set_attr "length_immediate" "1") | |
9115 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8784 (set_attr "mode" "DI")]) | 9116 (set_attr "mode" "DI")]) |
8785 | 9117 |
8786 (define_insn "abs<mode>2" | 9118 (define_insn "abs<mode>2" |
8787 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") | 9119 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") |
8788 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] | 9120 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] |
8798 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") | 9130 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") |
8799 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] | 9131 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] |
8800 "TARGET_SSSE3" | 9132 "TARGET_SSSE3" |
8801 "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; | 9133 "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; |
8802 [(set_attr "type" "sselog1") | 9134 [(set_attr "type" "sselog1") |
8803 (set_attr "prefix_extra" "1") | 9135 (set_attr "prefix_rep" "0") |
9136 (set_attr "prefix_extra" "1") | |
9137 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)")) | |
8804 (set_attr "mode" "DI")]) | 9138 (set_attr "mode" "DI")]) |
8805 | 9139 |
8806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 9140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
8807 ;; | 9141 ;; |
8808 ;; AMD SSE4A instructions | 9142 ;; AMD SSE4A instructions |
8839 UNSPEC_EXTRQI))] | 9173 UNSPEC_EXTRQI))] |
8840 "TARGET_SSE4A" | 9174 "TARGET_SSE4A" |
8841 "extrq\t{%3, %2, %0|%0, %2, %3}" | 9175 "extrq\t{%3, %2, %0|%0, %2, %3}" |
8842 [(set_attr "type" "sse") | 9176 [(set_attr "type" "sse") |
8843 (set_attr "prefix_data16" "1") | 9177 (set_attr "prefix_data16" "1") |
9178 (set_attr "length_immediate" "2") | |
8844 (set_attr "mode" "TI")]) | 9179 (set_attr "mode" "TI")]) |
8845 | 9180 |
8846 (define_insn "sse4a_extrq" | 9181 (define_insn "sse4a_extrq" |
8847 [(set (match_operand:V2DI 0 "register_operand" "=x") | 9182 [(set (match_operand:V2DI 0 "register_operand" "=x") |
8848 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | 9183 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
8862 (match_operand 4 "const_int_operand" "")] | 9197 (match_operand 4 "const_int_operand" "")] |
8863 UNSPEC_INSERTQI))] | 9198 UNSPEC_INSERTQI))] |
8864 "TARGET_SSE4A" | 9199 "TARGET_SSE4A" |
8865 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" | 9200 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" |
8866 [(set_attr "type" "sseins") | 9201 [(set_attr "type" "sseins") |
9202 (set_attr "prefix_data16" "0") | |
8867 (set_attr "prefix_rep" "1") | 9203 (set_attr "prefix_rep" "1") |
9204 (set_attr "length_immediate" "2") | |
8868 (set_attr "mode" "TI")]) | 9205 (set_attr "mode" "TI")]) |
8869 | 9206 |
8870 (define_insn "sse4a_insertq" | 9207 (define_insn "sse4a_insertq" |
8871 [(set (match_operand:V2DI 0 "register_operand" "=x") | 9208 [(set (match_operand:V2DI 0 "register_operand" "=x") |
8872 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | 9209 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") |
8873 (match_operand:V2DI 2 "register_operand" "x")] | 9210 (match_operand:V2DI 2 "register_operand" "x")] |
8874 UNSPEC_INSERTQ))] | 9211 UNSPEC_INSERTQ))] |
8875 "TARGET_SSE4A" | 9212 "TARGET_SSE4A" |
8876 "insertq\t{%2, %0|%0, %2}" | 9213 "insertq\t{%2, %0|%0, %2}" |
8877 [(set_attr "type" "sseins") | 9214 [(set_attr "type" "sseins") |
9215 (set_attr "prefix_data16" "0") | |
8878 (set_attr "prefix_rep" "1") | 9216 (set_attr "prefix_rep" "1") |
8879 (set_attr "mode" "TI")]) | 9217 (set_attr "mode" "TI")]) |
8880 | 9218 |
8881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 9219 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
8882 ;; | 9220 ;; |
8891 (match_operand:AVXMODEF2P 1 "register_operand" "x") | 9229 (match_operand:AVXMODEF2P 1 "register_operand" "x") |
8892 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] | 9230 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] |
8893 "TARGET_AVX" | 9231 "TARGET_AVX" |
8894 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 9232 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
8895 [(set_attr "type" "ssemov") | 9233 [(set_attr "type" "ssemov") |
9234 (set_attr "prefix_extra" "1") | |
9235 (set_attr "length_immediate" "1") | |
8896 (set_attr "prefix" "vex") | 9236 (set_attr "prefix" "vex") |
8897 (set_attr "mode" "<avxvecmode>")]) | 9237 (set_attr "mode" "<avxvecmode>")]) |
8898 | 9238 |
8899 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>" | 9239 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>" |
8900 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") | 9240 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") |
8904 (match_operand:AVXMODEF2P 3 "register_operand" "x")] | 9244 (match_operand:AVXMODEF2P 3 "register_operand" "x")] |
8905 UNSPEC_BLENDV))] | 9245 UNSPEC_BLENDV))] |
8906 "TARGET_AVX" | 9246 "TARGET_AVX" |
8907 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 9247 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
8908 [(set_attr "type" "ssemov") | 9248 [(set_attr "type" "ssemov") |
9249 (set_attr "prefix_extra" "1") | |
9250 (set_attr "length_immediate" "1") | |
8909 (set_attr "prefix" "vex") | 9251 (set_attr "prefix" "vex") |
8910 (set_attr "mode" "<avxvecmode>")]) | 9252 (set_attr "mode" "<avxvecmode>")]) |
8911 | 9253 |
8912 (define_insn "sse4_1_blendp<ssemodesuffixf2c>" | 9254 (define_insn "sse4_1_blendp<ssemodesuffixf2c>" |
8913 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 9255 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
8916 (match_operand:SSEMODEF2P 1 "register_operand" "0") | 9258 (match_operand:SSEMODEF2P 1 "register_operand" "0") |
8917 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] | 9259 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] |
8918 "TARGET_SSE4_1" | 9260 "TARGET_SSE4_1" |
8919 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" | 9261 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" |
8920 [(set_attr "type" "ssemov") | 9262 [(set_attr "type" "ssemov") |
8921 (set_attr "prefix_extra" "1") | 9263 (set_attr "prefix_data16" "1") |
9264 (set_attr "prefix_extra" "1") | |
9265 (set_attr "length_immediate" "1") | |
8922 (set_attr "mode" "<MODE>")]) | 9266 (set_attr "mode" "<MODE>")]) |
8923 | 9267 |
8924 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>" | 9268 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>" |
8925 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x") | 9269 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x") |
8926 (unspec:SSEMODEF2P | 9270 (unspec:SSEMODEF2P |
8929 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")] | 9273 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")] |
8930 UNSPEC_BLENDV))] | 9274 UNSPEC_BLENDV))] |
8931 "TARGET_SSE4_1" | 9275 "TARGET_SSE4_1" |
8932 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" | 9276 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" |
8933 [(set_attr "type" "ssemov") | 9277 [(set_attr "type" "ssemov") |
9278 (set_attr "prefix_data16" "1") | |
8934 (set_attr "prefix_extra" "1") | 9279 (set_attr "prefix_extra" "1") |
8935 (set_attr "mode" "<MODE>")]) | 9280 (set_attr "mode" "<MODE>")]) |
8936 | 9281 |
8937 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>" | 9282 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>" |
8938 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") | 9283 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") |
8943 UNSPEC_DP))] | 9288 UNSPEC_DP))] |
8944 "TARGET_AVX" | 9289 "TARGET_AVX" |
8945 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 9290 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
8946 [(set_attr "type" "ssemul") | 9291 [(set_attr "type" "ssemul") |
8947 (set_attr "prefix" "vex") | 9292 (set_attr "prefix" "vex") |
9293 (set_attr "prefix_extra" "1") | |
9294 (set_attr "length_immediate" "1") | |
8948 (set_attr "mode" "<avxvecmode>")]) | 9295 (set_attr "mode" "<avxvecmode>")]) |
8949 | 9296 |
8950 (define_insn "sse4_1_dpp<ssemodesuffixf2c>" | 9297 (define_insn "sse4_1_dpp<ssemodesuffixf2c>" |
8951 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 9298 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
8952 (unspec:SSEMODEF2P | 9299 (unspec:SSEMODEF2P |
8955 (match_operand:SI 3 "const_0_to_255_operand" "n")] | 9302 (match_operand:SI 3 "const_0_to_255_operand" "n")] |
8956 UNSPEC_DP))] | 9303 UNSPEC_DP))] |
8957 "TARGET_SSE4_1" | 9304 "TARGET_SSE4_1" |
8958 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" | 9305 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" |
8959 [(set_attr "type" "ssemul") | 9306 [(set_attr "type" "ssemul") |
8960 (set_attr "prefix_extra" "1") | 9307 (set_attr "prefix_data16" "1") |
9308 (set_attr "prefix_extra" "1") | |
9309 (set_attr "length_immediate" "1") | |
8961 (set_attr "mode" "<MODE>")]) | 9310 (set_attr "mode" "<MODE>")]) |
8962 | 9311 |
8963 (define_insn "sse4_1_movntdqa" | 9312 (define_insn "sse4_1_movntdqa" |
8964 [(set (match_operand:V2DI 0 "register_operand" "=x") | 9313 [(set (match_operand:V2DI 0 "register_operand" "=x") |
8965 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")] | 9314 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")] |
8966 UNSPEC_MOVNTDQA))] | 9315 UNSPEC_MOVNTDQA))] |
8967 "TARGET_SSE4_1" | 9316 "TARGET_SSE4_1" |
8968 "%vmovntdqa\t{%1, %0|%0, %1}" | 9317 "%vmovntdqa\t{%1, %0|%0, %1}" |
8969 [(set_attr "type" "ssecvt") | 9318 [(set_attr "type" "ssemov") |
8970 (set_attr "prefix_extra" "1") | 9319 (set_attr "prefix_extra" "1") |
8971 (set_attr "prefix" "maybe_vex") | 9320 (set_attr "prefix" "maybe_vex") |
8972 (set_attr "mode" "TI")]) | 9321 (set_attr "mode" "TI")]) |
8973 | 9322 |
8974 (define_insn "*avx_mpsadbw" | 9323 (define_insn "*avx_mpsadbw" |
8979 UNSPEC_MPSADBW))] | 9328 UNSPEC_MPSADBW))] |
8980 "TARGET_AVX" | 9329 "TARGET_AVX" |
8981 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 9330 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
8982 [(set_attr "type" "sselog1") | 9331 [(set_attr "type" "sselog1") |
8983 (set_attr "prefix" "vex") | 9332 (set_attr "prefix" "vex") |
9333 (set_attr "prefix_extra" "1") | |
9334 (set_attr "length_immediate" "1") | |
8984 (set_attr "mode" "TI")]) | 9335 (set_attr "mode" "TI")]) |
8985 | 9336 |
8986 (define_insn "sse4_1_mpsadbw" | 9337 (define_insn "sse4_1_mpsadbw" |
8987 [(set (match_operand:V16QI 0 "register_operand" "=x") | 9338 [(set (match_operand:V16QI 0 "register_operand" "=x") |
8988 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") | 9339 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") |
8991 UNSPEC_MPSADBW))] | 9342 UNSPEC_MPSADBW))] |
8992 "TARGET_SSE4_1" | 9343 "TARGET_SSE4_1" |
8993 "mpsadbw\t{%3, %2, %0|%0, %2, %3}" | 9344 "mpsadbw\t{%3, %2, %0|%0, %2, %3}" |
8994 [(set_attr "type" "sselog1") | 9345 [(set_attr "type" "sselog1") |
8995 (set_attr "prefix_extra" "1") | 9346 (set_attr "prefix_extra" "1") |
9347 (set_attr "length_immediate" "1") | |
8996 (set_attr "mode" "TI")]) | 9348 (set_attr "mode" "TI")]) |
8997 | 9349 |
8998 (define_insn "*avx_packusdw" | 9350 (define_insn "*avx_packusdw" |
8999 [(set (match_operand:V8HI 0 "register_operand" "=x") | 9351 [(set (match_operand:V8HI 0 "register_operand" "=x") |
9000 (vec_concat:V8HI | 9352 (vec_concat:V8HI |
9003 (us_truncate:V4HI | 9355 (us_truncate:V4HI |
9004 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] | 9356 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] |
9005 "TARGET_AVX" | 9357 "TARGET_AVX" |
9006 "vpackusdw\t{%2, %1, %0|%0, %1, %2}" | 9358 "vpackusdw\t{%2, %1, %0|%0, %1, %2}" |
9007 [(set_attr "type" "sselog") | 9359 [(set_attr "type" "sselog") |
9360 (set_attr "prefix_extra" "1") | |
9008 (set_attr "prefix" "vex") | 9361 (set_attr "prefix" "vex") |
9009 (set_attr "mode" "TI")]) | 9362 (set_attr "mode" "TI")]) |
9010 | 9363 |
9011 (define_insn "sse4_1_packusdw" | 9364 (define_insn "sse4_1_packusdw" |
9012 [(set (match_operand:V8HI 0 "register_operand" "=x") | 9365 [(set (match_operand:V8HI 0 "register_operand" "=x") |
9028 (match_operand:V16QI 3 "register_operand" "x")] | 9381 (match_operand:V16QI 3 "register_operand" "x")] |
9029 UNSPEC_BLENDV))] | 9382 UNSPEC_BLENDV))] |
9030 "TARGET_AVX" | 9383 "TARGET_AVX" |
9031 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 9384 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9032 [(set_attr "type" "ssemov") | 9385 [(set_attr "type" "ssemov") |
9386 (set_attr "prefix_extra" "1") | |
9387 (set_attr "length_immediate" "1") | |
9033 (set_attr "prefix" "vex") | 9388 (set_attr "prefix" "vex") |
9034 (set_attr "mode" "TI")]) | 9389 (set_attr "mode" "TI")]) |
9035 | 9390 |
9036 (define_insn "sse4_1_pblendvb" | 9391 (define_insn "sse4_1_pblendvb" |
9037 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x") | 9392 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x") |
9053 (match_operand:SI 3 "const_0_to_255_operand" "n")))] | 9408 (match_operand:SI 3 "const_0_to_255_operand" "n")))] |
9054 "TARGET_AVX" | 9409 "TARGET_AVX" |
9055 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 9410 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9056 [(set_attr "type" "ssemov") | 9411 [(set_attr "type" "ssemov") |
9057 (set_attr "prefix" "vex") | 9412 (set_attr "prefix" "vex") |
9413 (set_attr "prefix_extra" "1") | |
9414 (set_attr "length_immediate" "1") | |
9058 (set_attr "mode" "TI")]) | 9415 (set_attr "mode" "TI")]) |
9059 | 9416 |
9060 (define_insn "sse4_1_pblendw" | 9417 (define_insn "sse4_1_pblendw" |
9061 [(set (match_operand:V8HI 0 "register_operand" "=x") | 9418 [(set (match_operand:V8HI 0 "register_operand" "=x") |
9062 (vec_merge:V8HI | 9419 (vec_merge:V8HI |
9065 (match_operand:SI 3 "const_0_to_255_operand" "n")))] | 9422 (match_operand:SI 3 "const_0_to_255_operand" "n")))] |
9066 "TARGET_SSE4_1" | 9423 "TARGET_SSE4_1" |
9067 "pblendw\t{%3, %2, %0|%0, %2, %3}" | 9424 "pblendw\t{%3, %2, %0|%0, %2, %3}" |
9068 [(set_attr "type" "ssemov") | 9425 [(set_attr "type" "ssemov") |
9069 (set_attr "prefix_extra" "1") | 9426 (set_attr "prefix_extra" "1") |
9427 (set_attr "length_immediate" "1") | |
9070 (set_attr "mode" "TI")]) | 9428 (set_attr "mode" "TI")]) |
9071 | 9429 |
9072 (define_insn "sse4_1_phminposuw" | 9430 (define_insn "sse4_1_phminposuw" |
9073 [(set (match_operand:V8HI 0 "register_operand" "=x") | 9431 [(set (match_operand:V8HI 0 "register_operand" "=x") |
9074 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] | 9432 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] |
9476 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")] | 9834 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")] |
9477 UNSPEC_VTESTP))] | 9835 UNSPEC_VTESTP))] |
9478 "TARGET_AVX" | 9836 "TARGET_AVX" |
9479 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}" | 9837 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}" |
9480 [(set_attr "type" "ssecomi") | 9838 [(set_attr "type" "ssecomi") |
9839 (set_attr "prefix_extra" "1") | |
9481 (set_attr "prefix" "vex") | 9840 (set_attr "prefix" "vex") |
9482 (set_attr "mode" "<MODE>")]) | 9841 (set_attr "mode" "<MODE>")]) |
9483 | 9842 |
9484 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. | 9843 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. |
9485 ;; But it is not a really compare instruction. | 9844 ;; But it is not a really compare instruction. |
9489 (match_operand:V4DI 1 "nonimmediate_operand" "xm")] | 9848 (match_operand:V4DI 1 "nonimmediate_operand" "xm")] |
9490 UNSPEC_PTEST))] | 9849 UNSPEC_PTEST))] |
9491 "TARGET_AVX" | 9850 "TARGET_AVX" |
9492 "vptest\t{%1, %0|%0, %1}" | 9851 "vptest\t{%1, %0|%0, %1}" |
9493 [(set_attr "type" "ssecomi") | 9852 [(set_attr "type" "ssecomi") |
9853 (set_attr "prefix_extra" "1") | |
9494 (set_attr "prefix" "vex") | 9854 (set_attr "prefix" "vex") |
9495 (set_attr "mode" "OI")]) | 9855 (set_attr "mode" "OI")]) |
9496 | 9856 |
9497 (define_insn "sse4_1_ptest" | 9857 (define_insn "sse4_1_ptest" |
9498 [(set (reg:CC FLAGS_REG) | 9858 [(set (reg:CC FLAGS_REG) |
9513 (match_operand:SI 2 "const_0_to_15_operand" "n")] | 9873 (match_operand:SI 2 "const_0_to_15_operand" "n")] |
9514 UNSPEC_ROUND))] | 9874 UNSPEC_ROUND))] |
9515 "TARGET_AVX" | 9875 "TARGET_AVX" |
9516 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" | 9876 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" |
9517 [(set_attr "type" "ssecvt") | 9877 [(set_attr "type" "ssecvt") |
9878 (set_attr "prefix_extra" "1") | |
9879 (set_attr "length_immediate" "1") | |
9518 (set_attr "prefix" "vex") | 9880 (set_attr "prefix" "vex") |
9519 (set_attr "mode" "<MODE>")]) | 9881 (set_attr "mode" "<MODE>")]) |
9520 | 9882 |
9521 (define_insn "sse4_1_roundp<ssemodesuffixf2c>" | 9883 (define_insn "sse4_1_roundp<ssemodesuffixf2c>" |
9522 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 9884 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
9525 (match_operand:SI 2 "const_0_to_15_operand" "n")] | 9887 (match_operand:SI 2 "const_0_to_15_operand" "n")] |
9526 UNSPEC_ROUND))] | 9888 UNSPEC_ROUND))] |
9527 "TARGET_ROUND" | 9889 "TARGET_ROUND" |
9528 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" | 9890 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" |
9529 [(set_attr "type" "ssecvt") | 9891 [(set_attr "type" "ssecvt") |
9530 (set_attr "prefix_extra" "1") | 9892 (set_attr "prefix_data16" "1") |
9893 (set_attr "prefix_extra" "1") | |
9894 (set_attr "length_immediate" "1") | |
9531 (set_attr "prefix" "maybe_vex") | 9895 (set_attr "prefix" "maybe_vex") |
9532 (set_attr "mode" "<MODE>")]) | 9896 (set_attr "mode" "<MODE>")]) |
9533 | 9897 |
9534 (define_insn "*avx_rounds<ssemodesuffixf2c>" | 9898 (define_insn "*avx_rounds<ssemodesuffixf2c>" |
9535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 9899 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
9541 (match_operand:SSEMODEF2P 1 "register_operand" "x") | 9905 (match_operand:SSEMODEF2P 1 "register_operand" "x") |
9542 (const_int 1)))] | 9906 (const_int 1)))] |
9543 "TARGET_AVX" | 9907 "TARGET_AVX" |
9544 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 9908 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9545 [(set_attr "type" "ssecvt") | 9909 [(set_attr "type" "ssecvt") |
9910 (set_attr "prefix_extra" "1") | |
9911 (set_attr "length_immediate" "1") | |
9546 (set_attr "prefix" "vex") | 9912 (set_attr "prefix" "vex") |
9547 (set_attr "mode" "<MODE>")]) | 9913 (set_attr "mode" "<MODE>")]) |
9548 | 9914 |
9549 (define_insn "sse4_1_rounds<ssemodesuffixf2c>" | 9915 (define_insn "sse4_1_rounds<ssemodesuffixf2c>" |
9550 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 9916 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
9556 (match_operand:SSEMODEF2P 1 "register_operand" "0") | 9922 (match_operand:SSEMODEF2P 1 "register_operand" "0") |
9557 (const_int 1)))] | 9923 (const_int 1)))] |
9558 "TARGET_ROUND" | 9924 "TARGET_ROUND" |
9559 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" | 9925 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" |
9560 [(set_attr "type" "ssecvt") | 9926 [(set_attr "type" "ssecvt") |
9561 (set_attr "prefix_extra" "1") | 9927 (set_attr "prefix_data16" "1") |
9928 (set_attr "prefix_extra" "1") | |
9929 (set_attr "length_immediate" "1") | |
9562 (set_attr "mode" "<MODE>")]) | 9930 (set_attr "mode" "<MODE>")]) |
9563 | 9931 |
9564 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 9932 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
9565 ;; | 9933 ;; |
9566 ;; Intel SSE4.2 string/text processing instructions | 9934 ;; Intel SSE4.2 string/text processing instructions |
9591 (match_dup 4) | 9959 (match_dup 4) |
9592 (match_dup 5) | 9960 (match_dup 5) |
9593 (match_dup 6)] | 9961 (match_dup 6)] |
9594 UNSPEC_PCMPESTR))] | 9962 UNSPEC_PCMPESTR))] |
9595 "TARGET_SSE4_2 | 9963 "TARGET_SSE4_2 |
9596 && !(reload_completed || reload_in_progress)" | 9964 && can_create_pseudo_p ()" |
9597 "#" | 9965 "#" |
9598 "&& 1" | 9966 "&& 1" |
9599 [(const_int 0)] | 9967 [(const_int 0)] |
9600 { | 9968 { |
9601 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | 9969 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); |
9618 DONE; | 9986 DONE; |
9619 } | 9987 } |
9620 [(set_attr "type" "sselog") | 9988 [(set_attr "type" "sselog") |
9621 (set_attr "prefix_data16" "1") | 9989 (set_attr "prefix_data16" "1") |
9622 (set_attr "prefix_extra" "1") | 9990 (set_attr "prefix_extra" "1") |
9991 (set_attr "length_immediate" "1") | |
9623 (set_attr "memory" "none,load") | 9992 (set_attr "memory" "none,load") |
9624 (set_attr "mode" "TI")]) | 9993 (set_attr "mode" "TI")]) |
9625 | 9994 |
9626 (define_insn "sse4_2_pcmpestri" | 9995 (define_insn "sse4_2_pcmpestri" |
9627 [(set (match_operand:SI 0 "register_operand" "=c,c") | 9996 [(set (match_operand:SI 0 "register_operand" "=c,c") |
9644 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" | 10013 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" |
9645 [(set_attr "type" "sselog") | 10014 [(set_attr "type" "sselog") |
9646 (set_attr "prefix_data16" "1") | 10015 (set_attr "prefix_data16" "1") |
9647 (set_attr "prefix_extra" "1") | 10016 (set_attr "prefix_extra" "1") |
9648 (set_attr "prefix" "maybe_vex") | 10017 (set_attr "prefix" "maybe_vex") |
10018 (set_attr "length_immediate" "1") | |
9649 (set_attr "memory" "none,load") | 10019 (set_attr "memory" "none,load") |
9650 (set_attr "mode" "TI")]) | 10020 (set_attr "mode" "TI")]) |
9651 | 10021 |
9652 (define_insn "sse4_2_pcmpestrm" | 10022 (define_insn "sse4_2_pcmpestrm" |
9653 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") | 10023 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") |
9669 "TARGET_SSE4_2" | 10039 "TARGET_SSE4_2" |
9670 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" | 10040 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" |
9671 [(set_attr "type" "sselog") | 10041 [(set_attr "type" "sselog") |
9672 (set_attr "prefix_data16" "1") | 10042 (set_attr "prefix_data16" "1") |
9673 (set_attr "prefix_extra" "1") | 10043 (set_attr "prefix_extra" "1") |
10044 (set_attr "length_immediate" "1") | |
9674 (set_attr "prefix" "maybe_vex") | 10045 (set_attr "prefix" "maybe_vex") |
9675 (set_attr "memory" "none,load") | 10046 (set_attr "memory" "none,load") |
9676 (set_attr "mode" "TI")]) | 10047 (set_attr "mode" "TI")]) |
9677 | 10048 |
9678 (define_insn "sse4_2_pcmpestr_cconly" | 10049 (define_insn "sse4_2_pcmpestr_cconly" |
9693 %vpcmpestri\t{%6, %4, %2|%2, %4, %6} | 10064 %vpcmpestri\t{%6, %4, %2|%2, %4, %6} |
9694 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" | 10065 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" |
9695 [(set_attr "type" "sselog") | 10066 [(set_attr "type" "sselog") |
9696 (set_attr "prefix_data16" "1") | 10067 (set_attr "prefix_data16" "1") |
9697 (set_attr "prefix_extra" "1") | 10068 (set_attr "prefix_extra" "1") |
10069 (set_attr "length_immediate" "1") | |
9698 (set_attr "memory" "none,load,none,load") | 10070 (set_attr "memory" "none,load,none,load") |
9699 (set_attr "prefix" "maybe_vex") | 10071 (set_attr "prefix" "maybe_vex") |
9700 (set_attr "mode" "TI")]) | 10072 (set_attr "mode" "TI")]) |
9701 | 10073 |
9702 (define_insn_and_split "sse4_2_pcmpistr" | 10074 (define_insn_and_split "sse4_2_pcmpistr" |
9717 [(match_dup 2) | 10089 [(match_dup 2) |
9718 (match_dup 3) | 10090 (match_dup 3) |
9719 (match_dup 4)] | 10091 (match_dup 4)] |
9720 UNSPEC_PCMPISTR))] | 10092 UNSPEC_PCMPISTR))] |
9721 "TARGET_SSE4_2 | 10093 "TARGET_SSE4_2 |
9722 && !(reload_completed || reload_in_progress)" | 10094 && can_create_pseudo_p ()" |
9723 "#" | 10095 "#" |
9724 "&& 1" | 10096 "&& 1" |
9725 [(const_int 0)] | 10097 [(const_int 0)] |
9726 { | 10098 { |
9727 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); | 10099 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); |
9741 DONE; | 10113 DONE; |
9742 } | 10114 } |
9743 [(set_attr "type" "sselog") | 10115 [(set_attr "type" "sselog") |
9744 (set_attr "prefix_data16" "1") | 10116 (set_attr "prefix_data16" "1") |
9745 (set_attr "prefix_extra" "1") | 10117 (set_attr "prefix_extra" "1") |
10118 (set_attr "length_immediate" "1") | |
9746 (set_attr "memory" "none,load") | 10119 (set_attr "memory" "none,load") |
9747 (set_attr "mode" "TI")]) | 10120 (set_attr "mode" "TI")]) |
9748 | 10121 |
9749 (define_insn "sse4_2_pcmpistri" | 10122 (define_insn "sse4_2_pcmpistri" |
9750 [(set (match_operand:SI 0 "register_operand" "=c,c") | 10123 [(set (match_operand:SI 0 "register_operand" "=c,c") |
9762 "TARGET_SSE4_2" | 10135 "TARGET_SSE4_2" |
9763 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" | 10136 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" |
9764 [(set_attr "type" "sselog") | 10137 [(set_attr "type" "sselog") |
9765 (set_attr "prefix_data16" "1") | 10138 (set_attr "prefix_data16" "1") |
9766 (set_attr "prefix_extra" "1") | 10139 (set_attr "prefix_extra" "1") |
10140 (set_attr "length_immediate" "1") | |
9767 (set_attr "prefix" "maybe_vex") | 10141 (set_attr "prefix" "maybe_vex") |
9768 (set_attr "memory" "none,load") | 10142 (set_attr "memory" "none,load") |
9769 (set_attr "mode" "TI")]) | 10143 (set_attr "mode" "TI")]) |
9770 | 10144 |
9771 (define_insn "sse4_2_pcmpistrm" | 10145 (define_insn "sse4_2_pcmpistrm" |
9784 "TARGET_SSE4_2" | 10158 "TARGET_SSE4_2" |
9785 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" | 10159 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" |
9786 [(set_attr "type" "sselog") | 10160 [(set_attr "type" "sselog") |
9787 (set_attr "prefix_data16" "1") | 10161 (set_attr "prefix_data16" "1") |
9788 (set_attr "prefix_extra" "1") | 10162 (set_attr "prefix_extra" "1") |
10163 (set_attr "length_immediate" "1") | |
9789 (set_attr "prefix" "maybe_vex") | 10164 (set_attr "prefix" "maybe_vex") |
9790 (set_attr "memory" "none,load") | 10165 (set_attr "memory" "none,load") |
9791 (set_attr "mode" "TI")]) | 10166 (set_attr "mode" "TI")]) |
9792 | 10167 |
9793 (define_insn "sse4_2_pcmpistr_cconly" | 10168 (define_insn "sse4_2_pcmpistr_cconly" |
9806 %vpcmpistri\t{%4, %3, %2|%2, %3, %4} | 10181 %vpcmpistri\t{%4, %3, %2|%2, %3, %4} |
9807 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" | 10182 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" |
9808 [(set_attr "type" "sselog") | 10183 [(set_attr "type" "sselog") |
9809 (set_attr "prefix_data16" "1") | 10184 (set_attr "prefix_data16" "1") |
9810 (set_attr "prefix_extra" "1") | 10185 (set_attr "prefix_extra" "1") |
10186 (set_attr "length_immediate" "1") | |
9811 (set_attr "memory" "none,load,none,load") | 10187 (set_attr "memory" "none,load,none,load") |
9812 (set_attr "prefix" "maybe_vex") | 10188 (set_attr "prefix" "maybe_vex") |
9813 (set_attr "mode" "TI")]) | 10189 (set_attr "mode" "TI")]) |
9814 | 10190 |
9815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 10191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
9816 ;; | 10192 ;; |
9817 ;; SSE5 instructions | 10193 ;; XOP instructions |
9818 ;; | 10194 ;; |
9819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | 10195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; |
9820 | 10196 |
9821 ;; SSE5 parallel integer multiply/add instructions. | 10197 ;; XOP parallel integer multiply/add instructions. |
9822 ;; Note the instruction does not allow the value being added to be a memory | 10198 ;; Note the XOP multiply/add instructions |
9823 ;; operation. However by pretending via the nonimmediate_operand predicate | 10199 ;; a[i] = b[i] * c[i] + d[i]; |
9824 ;; that it does and splitting it later allows the following to be recognized: | 10200 ;; do not allow the value being added to be a memory operation. |
9825 ;; a[i] = b[i] * c[i] + d[i]; | 10201 (define_insn "xop_pmacsww" |
9826 (define_insn "sse5_pmacsww" | 10202 [(set (match_operand:V8HI 0 "register_operand" "=x") |
9827 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") | |
9828 (plus:V8HI | 10203 (plus:V8HI |
9829 (mult:V8HI | 10204 (mult:V8HI |
9830 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm") | 10205 (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
9831 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")) | 10206 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) |
9832 (match_operand:V8HI 3 "register_operand" "0,0,0")))] | 10207 (match_operand:V8HI 3 "nonimmediate_operand" "x")))] |
9833 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" | 10208 "TARGET_XOP" |
9834 "@ | 10209 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9835 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9836 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9837 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
9838 [(set_attr "type" "ssemuladd") | 10210 [(set_attr "type" "ssemuladd") |
9839 (set_attr "mode" "TI")]) | 10211 (set_attr "mode" "TI")]) |
9840 | 10212 |
9841 ;; Split pmacsww with two memory operands into a load and the pmacsww. | 10213 (define_insn "xop_pmacssww" |
9842 (define_split | 10214 [(set (match_operand:V8HI 0 "register_operand" "=x") |
9843 [(set (match_operand:V8HI 0 "register_operand" "") | |
9844 (plus:V8HI | |
9845 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "") | |
9846 (match_operand:V8HI 2 "nonimmediate_operand" "")) | |
9847 (match_operand:V8HI 3 "nonimmediate_operand" "")))] | |
9848 "TARGET_SSE5 | |
9849 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true) | |
9850 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true) | |
9851 && !reg_mentioned_p (operands[0], operands[1]) | |
9852 && !reg_mentioned_p (operands[0], operands[2]) | |
9853 && !reg_mentioned_p (operands[0], operands[3])" | |
9854 [(const_int 0)] | |
9855 { | |
9856 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode); | |
9857 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2], | |
9858 operands[3])); | |
9859 DONE; | |
9860 }) | |
9861 | |
9862 (define_insn "sse5_pmacssww" | |
9863 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x") | |
9864 (ss_plus:V8HI | 10215 (ss_plus:V8HI |
9865 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") | 10216 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
9866 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) | 10217 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) |
9867 (match_operand:V8HI 3 "register_operand" "0,0,0")))] | 10218 (match_operand:V8HI 3 "nonimmediate_operand" "x")))] |
9868 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10219 "TARGET_XOP" |
9869 "@ | 10220 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9870 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9871 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9872 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
9873 [(set_attr "type" "ssemuladd") | 10221 [(set_attr "type" "ssemuladd") |
9874 (set_attr "mode" "TI")]) | 10222 (set_attr "mode" "TI")]) |
9875 | 10223 |
9876 ;; Note the instruction does not allow the value being added to be a memory | 10224 (define_insn "xop_pmacsdd" |
9877 ;; operation. However by pretending via the nonimmediate_operand predicate | 10225 [(set (match_operand:V4SI 0 "register_operand" "=x") |
9878 ;; that it does and splitting it later allows the following to be recognized: | |
9879 ;; a[i] = b[i] * c[i] + d[i]; | |
9880 (define_insn "sse5_pmacsdd" | |
9881 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | |
9882 (plus:V4SI | 10226 (plus:V4SI |
9883 (mult:V4SI | 10227 (mult:V4SI |
9884 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | 10228 (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
9885 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) | 10229 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) |
9886 (match_operand:V4SI 3 "register_operand" "0,0,0")))] | 10230 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] |
9887 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" | 10231 "TARGET_XOP" |
9888 "@ | 10232 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9889 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9890 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9891 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
9892 [(set_attr "type" "ssemuladd") | 10233 [(set_attr "type" "ssemuladd") |
9893 (set_attr "mode" "TI")]) | 10234 (set_attr "mode" "TI")]) |
9894 | 10235 |
9895 ;; Split pmacsdd with two memory operands into a load and the pmacsdd. | 10236 (define_insn "xop_pmacssdd" |
9896 (define_split | 10237 [(set (match_operand:V4SI 0 "register_operand" "=x") |
9897 [(set (match_operand:V4SI 0 "register_operand" "") | |
9898 (plus:V4SI | |
9899 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "") | |
9900 (match_operand:V4SI 2 "nonimmediate_operand" "")) | |
9901 (match_operand:V4SI 3 "nonimmediate_operand" "")))] | |
9902 "TARGET_SSE5 | |
9903 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true) | |
9904 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true) | |
9905 && !reg_mentioned_p (operands[0], operands[1]) | |
9906 && !reg_mentioned_p (operands[0], operands[2]) | |
9907 && !reg_mentioned_p (operands[0], operands[3])" | |
9908 [(const_int 0)] | |
9909 { | |
9910 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode); | |
9911 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2], | |
9912 operands[3])); | |
9913 DONE; | |
9914 }) | |
9915 | |
9916 (define_insn "sse5_pmacssdd" | |
9917 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | |
9918 (ss_plus:V4SI | 10238 (ss_plus:V4SI |
9919 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | 10239 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
9920 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) | 10240 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) |
9921 (match_operand:V4SI 3 "register_operand" "0,0,0")))] | 10241 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] |
9922 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10242 "TARGET_XOP" |
9923 "@ | 10243 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9924 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9925 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9926 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
9927 [(set_attr "type" "ssemuladd") | 10244 [(set_attr "type" "ssemuladd") |
9928 (set_attr "mode" "TI")]) | 10245 (set_attr "mode" "TI")]) |
9929 | 10246 |
9930 (define_insn "sse5_pmacssdql" | 10247 (define_insn "xop_pmacssdql" |
9931 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") | 10248 [(set (match_operand:V2DI 0 "register_operand" "=x") |
9932 (ss_plus:V2DI | 10249 (ss_plus:V2DI |
9933 (mult:V2DI | 10250 (mult:V2DI |
9934 (sign_extend:V2DI | 10251 (sign_extend:V2DI |
9935 (vec_select:V2SI | 10252 (vec_select:V2SI |
9936 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | 10253 (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
9937 (parallel [(const_int 1) | 10254 (parallel [(const_int 1) |
9938 (const_int 3)]))) | 10255 (const_int 3)]))) |
9939 (vec_select:V2SI | 10256 (vec_select:V2SI |
9940 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") | 10257 (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
9941 (parallel [(const_int 1) | 10258 (parallel [(const_int 1) |
9942 (const_int 3)]))) | 10259 (const_int 3)]))) |
9943 (match_operand:V2DI 3 "register_operand" "0,0,0")))] | 10260 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] |
9944 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10261 "TARGET_XOP" |
9945 "@ | 10262 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9946 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9947 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9948 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
9949 [(set_attr "type" "ssemuladd") | 10263 [(set_attr "type" "ssemuladd") |
9950 (set_attr "mode" "TI")]) | 10264 (set_attr "mode" "TI")]) |
9951 | 10265 |
9952 (define_insn "sse5_pmacssdqh" | 10266 (define_insn "xop_pmacssdqh" |
9953 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") | 10267 [(set (match_operand:V2DI 0 "register_operand" "=x") |
9954 (ss_plus:V2DI | 10268 (ss_plus:V2DI |
9955 (mult:V2DI | 10269 (mult:V2DI |
9956 (sign_extend:V2DI | 10270 (sign_extend:V2DI |
9957 (vec_select:V2SI | 10271 (vec_select:V2SI |
9958 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | 10272 (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
9959 (parallel [(const_int 0) | 10273 (parallel [(const_int 0) |
9960 (const_int 2)]))) | 10274 (const_int 2)]))) |
9961 (sign_extend:V2DI | 10275 (sign_extend:V2DI |
9962 (vec_select:V2SI | 10276 (vec_select:V2SI |
9963 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") | 10277 (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
9964 (parallel [(const_int 0) | 10278 (parallel [(const_int 0) |
9965 (const_int 2)])))) | 10279 (const_int 2)])))) |
9966 (match_operand:V2DI 3 "register_operand" "0,0,0")))] | 10280 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] |
9967 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10281 "TARGET_XOP" |
9968 "@ | 10282 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9969 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9970 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9971 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
9972 [(set_attr "type" "ssemuladd") | 10283 [(set_attr "type" "ssemuladd") |
9973 (set_attr "mode" "TI")]) | 10284 (set_attr "mode" "TI")]) |
9974 | 10285 |
9975 (define_insn "sse5_pmacsdql" | 10286 (define_insn "xop_pmacsdql" |
9976 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") | 10287 [(set (match_operand:V2DI 0 "register_operand" "=x") |
9977 (plus:V2DI | 10288 (plus:V2DI |
9978 (mult:V2DI | 10289 (mult:V2DI |
9979 (sign_extend:V2DI | 10290 (sign_extend:V2DI |
9980 (vec_select:V2SI | 10291 (vec_select:V2SI |
9981 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | 10292 (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
9982 (parallel [(const_int 1) | 10293 (parallel [(const_int 1) |
9983 (const_int 3)]))) | 10294 (const_int 3)]))) |
9984 (sign_extend:V2DI | 10295 (sign_extend:V2DI |
9985 (vec_select:V2SI | 10296 (vec_select:V2SI |
9986 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") | 10297 (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
9987 (parallel [(const_int 1) | 10298 (parallel [(const_int 1) |
9988 (const_int 3)])))) | 10299 (const_int 3)])))) |
9989 (match_operand:V2DI 3 "register_operand" "0,0,0")))] | 10300 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] |
9990 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10301 "TARGET_XOP" |
9991 "@ | 10302 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
9992 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9993 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
9994 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
9995 [(set_attr "type" "ssemuladd") | 10303 [(set_attr "type" "ssemuladd") |
9996 (set_attr "mode" "TI")]) | 10304 (set_attr "mode" "TI")]) |
9997 | 10305 |
9998 (define_insn_and_split "*sse5_pmacsdql_mem" | 10306 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so |
9999 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") | |
10000 (plus:V2DI | |
10001 (mult:V2DI | |
10002 (sign_extend:V2DI | |
10003 (vec_select:V2SI | |
10004 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | |
10005 (parallel [(const_int 1) | |
10006 (const_int 3)]))) | |
10007 (sign_extend:V2DI | |
10008 (vec_select:V2SI | |
10009 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") | |
10010 (parallel [(const_int 1) | |
10011 (const_int 3)])))) | |
10012 (match_operand:V2DI 3 "memory_operand" "m,m,m")))] | |
10013 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)" | |
10014 "#" | |
10015 "&& (reload_completed | |
10016 || (!reg_mentioned_p (operands[0], operands[1]) | |
10017 && !reg_mentioned_p (operands[0], operands[2])))" | |
10018 [(set (match_dup 0) | |
10019 (match_dup 3)) | |
10020 (set (match_dup 0) | |
10021 (plus:V2DI | |
10022 (mult:V2DI | |
10023 (sign_extend:V2DI | |
10024 (vec_select:V2SI | |
10025 (match_dup 1) | |
10026 (parallel [(const_int 1) | |
10027 (const_int 3)]))) | |
10028 (sign_extend:V2DI | |
10029 (vec_select:V2SI | |
10030 (match_dup 2) | |
10031 (parallel [(const_int 1) | |
10032 (const_int 3)])))) | |
10033 (match_dup 0)))]) | |
10034 | |
10035 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so | |
10036 ;; fake it with a multiply/add. In general, we expect the define_split to | 10307 ;; fake it with a multiply/add. In general, we expect the define_split to |
10037 ;; occur before register allocation, so we have to handle the corner case where | 10308 ;; occur before register allocation, so we have to handle the corner case where |
10038 ;; the target is the same as operands 1/2 | 10309 ;; the target is the same as operands 1/2 |
10039 (define_insn_and_split "sse5_mulv2div2di3_low" | 10310 (define_insn_and_split "xop_mulv2div2di3_low" |
10040 [(set (match_operand:V2DI 0 "register_operand" "=&x") | 10311 [(set (match_operand:V2DI 0 "register_operand" "=&x") |
10041 (mult:V2DI | 10312 (mult:V2DI |
10042 (sign_extend:V2DI | 10313 (sign_extend:V2DI |
10043 (vec_select:V2SI | 10314 (vec_select:V2SI |
10044 (match_operand:V4SI 1 "nonimmediate_operand" "%x") | 10315 (match_operand:V4SI 1 "register_operand" "%x") |
10045 (parallel [(const_int 1) | 10316 (parallel [(const_int 1) |
10046 (const_int 3)]))) | 10317 (const_int 3)]))) |
10047 (sign_extend:V2DI | 10318 (sign_extend:V2DI |
10048 (vec_select:V2SI | 10319 (vec_select:V2SI |
10049 (match_operand:V4SI 2 "nonimmediate_operand" "xm") | 10320 (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
10050 (parallel [(const_int 1) | 10321 (parallel [(const_int 1) |
10051 (const_int 3)])))))] | 10322 (const_int 3)])))))] |
10052 "TARGET_SSE5" | 10323 "TARGET_XOP" |
10053 "#" | 10324 "#" |
10054 "&& (reload_completed | 10325 "&& reload_completed" |
10055 || (!reg_mentioned_p (operands[0], operands[1]) | |
10056 && !reg_mentioned_p (operands[0], operands[2])))" | |
10057 [(set (match_dup 0) | 10326 [(set (match_dup 0) |
10058 (match_dup 3)) | 10327 (match_dup 3)) |
10059 (set (match_dup 0) | 10328 (set (match_dup 0) |
10060 (plus:V2DI | 10329 (plus:V2DI |
10061 (mult:V2DI | 10330 (mult:V2DI |
10074 operands[3] = CONST0_RTX (V2DImode); | 10343 operands[3] = CONST0_RTX (V2DImode); |
10075 } | 10344 } |
10076 [(set_attr "type" "ssemuladd") | 10345 [(set_attr "type" "ssemuladd") |
10077 (set_attr "mode" "TI")]) | 10346 (set_attr "mode" "TI")]) |
10078 | 10347 |
10079 (define_insn "sse5_pmacsdqh" | 10348 (define_insn "xop_pmacsdqh" |
10080 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") | 10349 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10081 (plus:V2DI | 10350 (plus:V2DI |
10082 (mult:V2DI | 10351 (mult:V2DI |
10083 (sign_extend:V2DI | 10352 (sign_extend:V2DI |
10084 (vec_select:V2SI | 10353 (vec_select:V2SI |
10085 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | 10354 (match_operand:V4SI 1 "nonimmediate_operand" "%x") |
10086 (parallel [(const_int 0) | 10355 (parallel [(const_int 0) |
10087 (const_int 2)]))) | 10356 (const_int 2)]))) |
10088 (sign_extend:V2DI | 10357 (sign_extend:V2DI |
10089 (vec_select:V2SI | 10358 (vec_select:V2SI |
10090 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") | 10359 (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
10091 (parallel [(const_int 0) | 10360 (parallel [(const_int 0) |
10092 (const_int 2)])))) | 10361 (const_int 2)])))) |
10093 (match_operand:V2DI 3 "register_operand" "0,0,0")))] | 10362 (match_operand:V2DI 3 "nonimmediate_operand" "x")))] |
10094 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10363 "TARGET_XOP" |
10095 "@ | 10364 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10096 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10097 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10098 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
10099 [(set_attr "type" "ssemuladd") | 10365 [(set_attr "type" "ssemuladd") |
10100 (set_attr "mode" "TI")]) | 10366 (set_attr "mode" "TI")]) |
10101 | 10367 |
10102 (define_insn_and_split "*sse5_pmacsdqh_mem" | 10368 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so |
10103 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x") | |
10104 (plus:V2DI | |
10105 (mult:V2DI | |
10106 (sign_extend:V2DI | |
10107 (vec_select:V2SI | |
10108 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") | |
10109 (parallel [(const_int 0) | |
10110 (const_int 2)]))) | |
10111 (sign_extend:V2DI | |
10112 (vec_select:V2SI | |
10113 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") | |
10114 (parallel [(const_int 0) | |
10115 (const_int 2)])))) | |
10116 (match_operand:V2DI 3 "memory_operand" "m,m,m")))] | |
10117 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)" | |
10118 "#" | |
10119 "&& (reload_completed | |
10120 || (!reg_mentioned_p (operands[0], operands[1]) | |
10121 && !reg_mentioned_p (operands[0], operands[2])))" | |
10122 [(set (match_dup 0) | |
10123 (match_dup 3)) | |
10124 (set (match_dup 0) | |
10125 (plus:V2DI | |
10126 (mult:V2DI | |
10127 (sign_extend:V2DI | |
10128 (vec_select:V2SI | |
10129 (match_dup 1) | |
10130 (parallel [(const_int 0) | |
10131 (const_int 2)]))) | |
10132 (sign_extend:V2DI | |
10133 (vec_select:V2SI | |
10134 (match_dup 2) | |
10135 (parallel [(const_int 0) | |
10136 (const_int 2)])))) | |
10137 (match_dup 0)))]) | |
10138 | |
10139 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so | |
10140 ;; fake it with a multiply/add. In general, we expect the define_split to | 10369 ;; fake it with a multiply/add. In general, we expect the define_split to |
10141 ;; occur before register allocation, so we have to handle the corner case where | 10370 ;; occur before register allocation, so we have to handle the corner case where |
10142 ;; the target is the same as either operands[1] or operands[2] | 10371 ;; the target is the same as either operands[1] or operands[2] |
10143 (define_insn_and_split "sse5_mulv2div2di3_high" | 10372 (define_insn_and_split "xop_mulv2div2di3_high" |
10144 [(set (match_operand:V2DI 0 "register_operand" "=&x") | 10373 [(set (match_operand:V2DI 0 "register_operand" "=&x") |
10145 (mult:V2DI | 10374 (mult:V2DI |
10146 (sign_extend:V2DI | 10375 (sign_extend:V2DI |
10147 (vec_select:V2SI | 10376 (vec_select:V2SI |
10148 (match_operand:V4SI 1 "nonimmediate_operand" "%x") | 10377 (match_operand:V4SI 1 "register_operand" "%x") |
10149 (parallel [(const_int 0) | 10378 (parallel [(const_int 0) |
10150 (const_int 2)]))) | 10379 (const_int 2)]))) |
10151 (sign_extend:V2DI | 10380 (sign_extend:V2DI |
10152 (vec_select:V2SI | 10381 (vec_select:V2SI |
10153 (match_operand:V4SI 2 "nonimmediate_operand" "xm") | 10382 (match_operand:V4SI 2 "nonimmediate_operand" "xm") |
10154 (parallel [(const_int 0) | 10383 (parallel [(const_int 0) |
10155 (const_int 2)])))))] | 10384 (const_int 2)])))))] |
10156 "TARGET_SSE5" | 10385 "TARGET_XOP" |
10157 "#" | 10386 "#" |
10158 "&& (reload_completed | 10387 "&& reload_completed" |
10159 || (!reg_mentioned_p (operands[0], operands[1]) | |
10160 && !reg_mentioned_p (operands[0], operands[2])))" | |
10161 [(set (match_dup 0) | 10388 [(set (match_dup 0) |
10162 (match_dup 3)) | 10389 (match_dup 3)) |
10163 (set (match_dup 0) | 10390 (set (match_dup 0) |
10164 (plus:V2DI | 10391 (plus:V2DI |
10165 (mult:V2DI | 10392 (mult:V2DI |
10178 operands[3] = CONST0_RTX (V2DImode); | 10405 operands[3] = CONST0_RTX (V2DImode); |
10179 } | 10406 } |
10180 [(set_attr "type" "ssemuladd") | 10407 [(set_attr "type" "ssemuladd") |
10181 (set_attr "mode" "TI")]) | 10408 (set_attr "mode" "TI")]) |
10182 | 10409 |
10183 ;; SSE5 parallel integer multiply/add instructions for the intrinisics | 10410 ;; XOP parallel integer multiply/add instructions for the intrinisics |
10184 (define_insn "sse5_pmacsswd" | 10411 (define_insn "xop_pmacsswd" |
10185 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | 10412 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10186 (ss_plus:V4SI | 10413 (ss_plus:V4SI |
10187 (mult:V4SI | 10414 (mult:V4SI |
10188 (sign_extend:V4SI | 10415 (sign_extend:V4SI |
10189 (vec_select:V4HI | 10416 (vec_select:V4HI |
10190 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") | 10417 (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
10191 (parallel [(const_int 1) | 10418 (parallel [(const_int 1) |
10192 (const_int 3) | 10419 (const_int 3) |
10193 (const_int 5) | 10420 (const_int 5) |
10194 (const_int 7)]))) | 10421 (const_int 7)]))) |
10195 (sign_extend:V4SI | 10422 (sign_extend:V4SI |
10196 (vec_select:V4HI | 10423 (vec_select:V4HI |
10197 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") | 10424 (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
10198 (parallel [(const_int 1) | 10425 (parallel [(const_int 1) |
10199 (const_int 3) | 10426 (const_int 3) |
10200 (const_int 5) | 10427 (const_int 5) |
10201 (const_int 7)])))) | 10428 (const_int 7)])))) |
10202 (match_operand:V4SI 3 "register_operand" "0,0,0")))] | 10429 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] |
10203 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10430 "TARGET_XOP" |
10204 "@ | 10431 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10205 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10206 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10207 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
10208 [(set_attr "type" "ssemuladd") | 10432 [(set_attr "type" "ssemuladd") |
10209 (set_attr "mode" "TI")]) | 10433 (set_attr "mode" "TI")]) |
10210 | 10434 |
10211 (define_insn "sse5_pmacswd" | 10435 (define_insn "xop_pmacswd" |
10212 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | 10436 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10213 (plus:V4SI | 10437 (plus:V4SI |
10214 (mult:V4SI | 10438 (mult:V4SI |
10215 (sign_extend:V4SI | 10439 (sign_extend:V4SI |
10216 (vec_select:V4HI | 10440 (vec_select:V4HI |
10217 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") | 10441 (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
10218 (parallel [(const_int 1) | 10442 (parallel [(const_int 1) |
10219 (const_int 3) | 10443 (const_int 3) |
10220 (const_int 5) | 10444 (const_int 5) |
10221 (const_int 7)]))) | 10445 (const_int 7)]))) |
10222 (sign_extend:V4SI | 10446 (sign_extend:V4SI |
10223 (vec_select:V4HI | 10447 (vec_select:V4HI |
10224 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") | 10448 (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
10225 (parallel [(const_int 1) | 10449 (parallel [(const_int 1) |
10226 (const_int 3) | 10450 (const_int 3) |
10227 (const_int 5) | 10451 (const_int 5) |
10228 (const_int 7)])))) | 10452 (const_int 7)])))) |
10229 (match_operand:V4SI 3 "register_operand" "0,0,0")))] | 10453 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] |
10230 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10454 "TARGET_XOP" |
10231 "@ | 10455 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10232 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10233 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10234 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
10235 [(set_attr "type" "ssemuladd") | 10456 [(set_attr "type" "ssemuladd") |
10236 (set_attr "mode" "TI")]) | 10457 (set_attr "mode" "TI")]) |
10237 | 10458 |
10238 (define_insn "sse5_pmadcsswd" | 10459 (define_insn "xop_pmadcsswd" |
10239 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | 10460 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10240 (ss_plus:V4SI | 10461 (ss_plus:V4SI |
10241 (plus:V4SI | 10462 (plus:V4SI |
10242 (mult:V4SI | 10463 (mult:V4SI |
10243 (sign_extend:V4SI | 10464 (sign_extend:V4SI |
10244 (vec_select:V4HI | 10465 (vec_select:V4HI |
10245 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") | 10466 (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
10246 (parallel [(const_int 0) | 10467 (parallel [(const_int 0) |
10247 (const_int 2) | 10468 (const_int 2) |
10248 (const_int 4) | 10469 (const_int 4) |
10249 (const_int 6)]))) | 10470 (const_int 6)]))) |
10250 (sign_extend:V4SI | 10471 (sign_extend:V4SI |
10251 (vec_select:V4HI | 10472 (vec_select:V4HI |
10252 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") | 10473 (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
10253 (parallel [(const_int 0) | 10474 (parallel [(const_int 0) |
10254 (const_int 2) | 10475 (const_int 2) |
10255 (const_int 4) | 10476 (const_int 4) |
10256 (const_int 6)])))) | 10477 (const_int 6)])))) |
10257 (mult:V4SI | 10478 (mult:V4SI |
10267 (match_dup 2) | 10488 (match_dup 2) |
10268 (parallel [(const_int 1) | 10489 (parallel [(const_int 1) |
10269 (const_int 3) | 10490 (const_int 3) |
10270 (const_int 5) | 10491 (const_int 5) |
10271 (const_int 7)]))))) | 10492 (const_int 7)]))))) |
10272 (match_operand:V4SI 3 "register_operand" "0,0,0")))] | 10493 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] |
10273 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10494 "TARGET_XOP" |
10274 "@ | 10495 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10275 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10276 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10277 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
10278 [(set_attr "type" "ssemuladd") | 10496 [(set_attr "type" "ssemuladd") |
10279 (set_attr "mode" "TI")]) | 10497 (set_attr "mode" "TI")]) |
10280 | 10498 |
10281 (define_insn "sse5_pmadcswd" | 10499 (define_insn "xop_pmadcswd" |
10282 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") | 10500 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10283 (plus:V4SI | 10501 (plus:V4SI |
10284 (plus:V4SI | 10502 (plus:V4SI |
10285 (mult:V4SI | 10503 (mult:V4SI |
10286 (sign_extend:V4SI | 10504 (sign_extend:V4SI |
10287 (vec_select:V4HI | 10505 (vec_select:V4HI |
10288 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") | 10506 (match_operand:V8HI 1 "nonimmediate_operand" "%x") |
10289 (parallel [(const_int 0) | 10507 (parallel [(const_int 0) |
10290 (const_int 2) | 10508 (const_int 2) |
10291 (const_int 4) | 10509 (const_int 4) |
10292 (const_int 6)]))) | 10510 (const_int 6)]))) |
10293 (sign_extend:V4SI | 10511 (sign_extend:V4SI |
10294 (vec_select:V4HI | 10512 (vec_select:V4HI |
10295 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") | 10513 (match_operand:V8HI 2 "nonimmediate_operand" "xm") |
10296 (parallel [(const_int 0) | 10514 (parallel [(const_int 0) |
10297 (const_int 2) | 10515 (const_int 2) |
10298 (const_int 4) | 10516 (const_int 4) |
10299 (const_int 6)])))) | 10517 (const_int 6)])))) |
10300 (mult:V4SI | 10518 (mult:V4SI |
10310 (match_dup 2) | 10528 (match_dup 2) |
10311 (parallel [(const_int 1) | 10529 (parallel [(const_int 1) |
10312 (const_int 3) | 10530 (const_int 3) |
10313 (const_int 5) | 10531 (const_int 5) |
10314 (const_int 7)]))))) | 10532 (const_int 7)]))))) |
10315 (match_operand:V4SI 3 "register_operand" "0,0,0")))] | 10533 (match_operand:V4SI 3 "nonimmediate_operand" "x")))] |
10316 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" | 10534 "TARGET_XOP" |
10317 "@ | 10535 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10318 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10319 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10320 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}" | |
10321 [(set_attr "type" "ssemuladd") | 10536 [(set_attr "type" "ssemuladd") |
10322 (set_attr "mode" "TI")]) | 10537 (set_attr "mode" "TI")]) |
10323 | 10538 |
10324 ;; SSE5 parallel XMM conditional moves | 10539 ;; XOP parallel XMM conditional moves |
10325 (define_insn "sse5_pcmov_<mode>" | 10540 (define_insn "xop_pcmov_<mode>" |
10326 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x") | 10541 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x") |
10327 (if_then_else:SSEMODE | 10542 (if_then_else:SSEMODE |
10328 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x") | 10543 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m") |
10329 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0") | 10544 (match_operand:SSEMODE 1 "vector_move_operand" "x,x") |
10330 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))] | 10545 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))] |
10331 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 10546 "TARGET_XOP" |
10332 "@ | 10547 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10333 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10334 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10335 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3} | |
10336 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
10337 [(set_attr "type" "sse4arg")]) | 10548 [(set_attr "type" "sse4arg")]) |
10338 | 10549 |
10339 ;; SSE5 horizontal add/subtract instructions | 10550 (define_insn "xop_pcmov_<mode>256" |
10340 (define_insn "sse5_phaddbw" | 10551 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x") |
10552 (if_then_else:AVX256MODE | |
10553 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m") | |
10554 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x") | |
10555 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))] | |
10556 "TARGET_XOP" | |
10557 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
10558 [(set_attr "type" "sse4arg")]) | |
10559 | |
10560 ;; XOP horizontal add/subtract instructions | |
10561 (define_insn "xop_phaddbw" | |
10341 [(set (match_operand:V8HI 0 "register_operand" "=x") | 10562 [(set (match_operand:V8HI 0 "register_operand" "=x") |
10342 (plus:V8HI | 10563 (plus:V8HI |
10343 (sign_extend:V8HI | 10564 (sign_extend:V8HI |
10344 (vec_select:V8QI | 10565 (vec_select:V8QI |
10345 (match_operand:V16QI 1 "nonimmediate_operand" "xm") | 10566 (match_operand:V16QI 1 "nonimmediate_operand" "xm") |
10360 (const_int 7) | 10581 (const_int 7) |
10361 (const_int 9) | 10582 (const_int 9) |
10362 (const_int 11) | 10583 (const_int 11) |
10363 (const_int 13) | 10584 (const_int 13) |
10364 (const_int 15)])))))] | 10585 (const_int 15)])))))] |
10365 "TARGET_SSE5" | 10586 "TARGET_XOP" |
10366 "phaddbw\t{%1, %0|%0, %1}" | 10587 "vphaddbw\t{%1, %0|%0, %1}" |
10367 [(set_attr "type" "sseiadd1")]) | 10588 [(set_attr "type" "sseiadd1")]) |
10368 | 10589 |
10369 (define_insn "sse5_phaddbd" | 10590 (define_insn "xop_phaddbd" |
10370 [(set (match_operand:V4SI 0 "register_operand" "=x") | 10591 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10371 (plus:V4SI | 10592 (plus:V4SI |
10372 (plus:V4SI | 10593 (plus:V4SI |
10373 (sign_extend:V4SI | 10594 (sign_extend:V4SI |
10374 (vec_select:V4QI | 10595 (vec_select:V4QI |
10397 (match_dup 1) | 10618 (match_dup 1) |
10398 (parallel [(const_int 3) | 10619 (parallel [(const_int 3) |
10399 (const_int 7) | 10620 (const_int 7) |
10400 (const_int 11) | 10621 (const_int 11) |
10401 (const_int 15)]))))))] | 10622 (const_int 15)]))))))] |
10402 "TARGET_SSE5" | 10623 "TARGET_XOP" |
10403 "phaddbd\t{%1, %0|%0, %1}" | 10624 "vphaddbd\t{%1, %0|%0, %1}" |
10404 [(set_attr "type" "sseiadd1")]) | 10625 [(set_attr "type" "sseiadd1")]) |
10405 | 10626 |
10406 (define_insn "sse5_phaddbq" | 10627 (define_insn "xop_phaddbq" |
10407 [(set (match_operand:V2DI 0 "register_operand" "=x") | 10628 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10408 (plus:V2DI | 10629 (plus:V2DI |
10409 (plus:V2DI | 10630 (plus:V2DI |
10410 (plus:V2DI | 10631 (plus:V2DI |
10411 (sign_extend:V2DI | 10632 (sign_extend:V2DI |
10450 (sign_extend:V2DI | 10671 (sign_extend:V2DI |
10451 (vec_select:V2QI | 10672 (vec_select:V2QI |
10452 (match_dup 1) | 10673 (match_dup 1) |
10453 (parallel [(const_int 11) | 10674 (parallel [(const_int 11) |
10454 (const_int 15)])))))))] | 10675 (const_int 15)])))))))] |
10455 "TARGET_SSE5" | 10676 "TARGET_XOP" |
10456 "phaddbq\t{%1, %0|%0, %1}" | 10677 "vphaddbq\t{%1, %0|%0, %1}" |
10457 [(set_attr "type" "sseiadd1")]) | 10678 [(set_attr "type" "sseiadd1")]) |
10458 | 10679 |
10459 (define_insn "sse5_phaddwd" | 10680 (define_insn "xop_phaddwd" |
10460 [(set (match_operand:V4SI 0 "register_operand" "=x") | 10681 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10461 (plus:V4SI | 10682 (plus:V4SI |
10462 (sign_extend:V4SI | 10683 (sign_extend:V4SI |
10463 (vec_select:V4HI | 10684 (vec_select:V4HI |
10464 (match_operand:V8HI 1 "nonimmediate_operand" "xm") | 10685 (match_operand:V8HI 1 "nonimmediate_operand" "xm") |
10471 (match_dup 1) | 10692 (match_dup 1) |
10472 (parallel [(const_int 1) | 10693 (parallel [(const_int 1) |
10473 (const_int 3) | 10694 (const_int 3) |
10474 (const_int 5) | 10695 (const_int 5) |
10475 (const_int 7)])))))] | 10696 (const_int 7)])))))] |
10476 "TARGET_SSE5" | 10697 "TARGET_XOP" |
10477 "phaddwd\t{%1, %0|%0, %1}" | 10698 "vphaddwd\t{%1, %0|%0, %1}" |
10478 [(set_attr "type" "sseiadd1")]) | 10699 [(set_attr "type" "sseiadd1")]) |
10479 | 10700 |
10480 (define_insn "sse5_phaddwq" | 10701 (define_insn "xop_phaddwq" |
10481 [(set (match_operand:V2DI 0 "register_operand" "=x") | 10702 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10482 (plus:V2DI | 10703 (plus:V2DI |
10483 (plus:V2DI | 10704 (plus:V2DI |
10484 (sign_extend:V2DI | 10705 (sign_extend:V2DI |
10485 (vec_select:V2HI | 10706 (vec_select:V2HI |
10500 (sign_extend:V2DI | 10721 (sign_extend:V2DI |
10501 (vec_select:V2HI | 10722 (vec_select:V2HI |
10502 (match_dup 1) | 10723 (match_dup 1) |
10503 (parallel [(const_int 3) | 10724 (parallel [(const_int 3) |
10504 (const_int 7)]))))))] | 10725 (const_int 7)]))))))] |
10505 "TARGET_SSE5" | 10726 "TARGET_XOP" |
10506 "phaddwq\t{%1, %0|%0, %1}" | 10727 "vphaddwq\t{%1, %0|%0, %1}" |
10507 [(set_attr "type" "sseiadd1")]) | 10728 [(set_attr "type" "sseiadd1")]) |
10508 | 10729 |
10509 (define_insn "sse5_phadddq" | 10730 (define_insn "xop_phadddq" |
10510 [(set (match_operand:V2DI 0 "register_operand" "=x") | 10731 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10511 (plus:V2DI | 10732 (plus:V2DI |
10512 (sign_extend:V2DI | 10733 (sign_extend:V2DI |
10513 (vec_select:V2SI | 10734 (vec_select:V2SI |
10514 (match_operand:V4SI 1 "nonimmediate_operand" "xm") | 10735 (match_operand:V4SI 1 "nonimmediate_operand" "xm") |
10517 (sign_extend:V2DI | 10738 (sign_extend:V2DI |
10518 (vec_select:V2SI | 10739 (vec_select:V2SI |
10519 (match_dup 1) | 10740 (match_dup 1) |
10520 (parallel [(const_int 1) | 10741 (parallel [(const_int 1) |
10521 (const_int 3)])))))] | 10742 (const_int 3)])))))] |
10522 "TARGET_SSE5" | 10743 "TARGET_XOP" |
10523 "phadddq\t{%1, %0|%0, %1}" | 10744 "vphadddq\t{%1, %0|%0, %1}" |
10524 [(set_attr "type" "sseiadd1")]) | 10745 [(set_attr "type" "sseiadd1")]) |
10525 | 10746 |
10526 (define_insn "sse5_phaddubw" | 10747 (define_insn "xop_phaddubw" |
10527 [(set (match_operand:V8HI 0 "register_operand" "=x") | 10748 [(set (match_operand:V8HI 0 "register_operand" "=x") |
10528 (plus:V8HI | 10749 (plus:V8HI |
10529 (zero_extend:V8HI | 10750 (zero_extend:V8HI |
10530 (vec_select:V8QI | 10751 (vec_select:V8QI |
10531 (match_operand:V16QI 1 "nonimmediate_operand" "xm") | 10752 (match_operand:V16QI 1 "nonimmediate_operand" "xm") |
10546 (const_int 7) | 10767 (const_int 7) |
10547 (const_int 9) | 10768 (const_int 9) |
10548 (const_int 11) | 10769 (const_int 11) |
10549 (const_int 13) | 10770 (const_int 13) |
10550 (const_int 15)])))))] | 10771 (const_int 15)])))))] |
10551 "TARGET_SSE5" | 10772 "TARGET_XOP" |
10552 "phaddubw\t{%1, %0|%0, %1}" | 10773 "vphaddubw\t{%1, %0|%0, %1}" |
10553 [(set_attr "type" "sseiadd1")]) | 10774 [(set_attr "type" "sseiadd1")]) |
10554 | 10775 |
10555 (define_insn "sse5_phaddubd" | 10776 (define_insn "xop_phaddubd" |
10556 [(set (match_operand:V4SI 0 "register_operand" "=x") | 10777 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10557 (plus:V4SI | 10778 (plus:V4SI |
10558 (plus:V4SI | 10779 (plus:V4SI |
10559 (zero_extend:V4SI | 10780 (zero_extend:V4SI |
10560 (vec_select:V4QI | 10781 (vec_select:V4QI |
10583 (match_dup 1) | 10804 (match_dup 1) |
10584 (parallel [(const_int 3) | 10805 (parallel [(const_int 3) |
10585 (const_int 7) | 10806 (const_int 7) |
10586 (const_int 11) | 10807 (const_int 11) |
10587 (const_int 15)]))))))] | 10808 (const_int 15)]))))))] |
10588 "TARGET_SSE5" | 10809 "TARGET_XOP" |
10589 "phaddubd\t{%1, %0|%0, %1}" | 10810 "vphaddubd\t{%1, %0|%0, %1}" |
10590 [(set_attr "type" "sseiadd1")]) | 10811 [(set_attr "type" "sseiadd1")]) |
10591 | 10812 |
10592 (define_insn "sse5_phaddubq" | 10813 (define_insn "xop_phaddubq" |
10593 [(set (match_operand:V2DI 0 "register_operand" "=x") | 10814 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10594 (plus:V2DI | 10815 (plus:V2DI |
10595 (plus:V2DI | 10816 (plus:V2DI |
10596 (plus:V2DI | 10817 (plus:V2DI |
10597 (zero_extend:V2DI | 10818 (zero_extend:V2DI |
10636 (zero_extend:V2DI | 10857 (zero_extend:V2DI |
10637 (vec_select:V2QI | 10858 (vec_select:V2QI |
10638 (match_dup 1) | 10859 (match_dup 1) |
10639 (parallel [(const_int 11) | 10860 (parallel [(const_int 11) |
10640 (const_int 15)])))))))] | 10861 (const_int 15)])))))))] |
10641 "TARGET_SSE5" | 10862 "TARGET_XOP" |
10642 "phaddubq\t{%1, %0|%0, %1}" | 10863 "vphaddubq\t{%1, %0|%0, %1}" |
10643 [(set_attr "type" "sseiadd1")]) | 10864 [(set_attr "type" "sseiadd1")]) |
10644 | 10865 |
10645 (define_insn "sse5_phadduwd" | 10866 (define_insn "xop_phadduwd" |
10646 [(set (match_operand:V4SI 0 "register_operand" "=x") | 10867 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10647 (plus:V4SI | 10868 (plus:V4SI |
10648 (zero_extend:V4SI | 10869 (zero_extend:V4SI |
10649 (vec_select:V4HI | 10870 (vec_select:V4HI |
10650 (match_operand:V8HI 1 "nonimmediate_operand" "xm") | 10871 (match_operand:V8HI 1 "nonimmediate_operand" "xm") |
10657 (match_dup 1) | 10878 (match_dup 1) |
10658 (parallel [(const_int 1) | 10879 (parallel [(const_int 1) |
10659 (const_int 3) | 10880 (const_int 3) |
10660 (const_int 5) | 10881 (const_int 5) |
10661 (const_int 7)])))))] | 10882 (const_int 7)])))))] |
10662 "TARGET_SSE5" | 10883 "TARGET_XOP" |
10663 "phadduwd\t{%1, %0|%0, %1}" | 10884 "vphadduwd\t{%1, %0|%0, %1}" |
10664 [(set_attr "type" "sseiadd1")]) | 10885 [(set_attr "type" "sseiadd1")]) |
10665 | 10886 |
10666 (define_insn "sse5_phadduwq" | 10887 (define_insn "xop_phadduwq" |
10667 [(set (match_operand:V2DI 0 "register_operand" "=x") | 10888 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10668 (plus:V2DI | 10889 (plus:V2DI |
10669 (plus:V2DI | 10890 (plus:V2DI |
10670 (zero_extend:V2DI | 10891 (zero_extend:V2DI |
10671 (vec_select:V2HI | 10892 (vec_select:V2HI |
10686 (zero_extend:V2DI | 10907 (zero_extend:V2DI |
10687 (vec_select:V2HI | 10908 (vec_select:V2HI |
10688 (match_dup 1) | 10909 (match_dup 1) |
10689 (parallel [(const_int 3) | 10910 (parallel [(const_int 3) |
10690 (const_int 7)]))))))] | 10911 (const_int 7)]))))))] |
10691 "TARGET_SSE5" | 10912 "TARGET_XOP" |
10692 "phadduwq\t{%1, %0|%0, %1}" | 10913 "vphadduwq\t{%1, %0|%0, %1}" |
10693 [(set_attr "type" "sseiadd1")]) | 10914 [(set_attr "type" "sseiadd1")]) |
10694 | 10915 |
10695 (define_insn "sse5_phaddudq" | 10916 (define_insn "xop_phaddudq" |
10696 [(set (match_operand:V2DI 0 "register_operand" "=x") | 10917 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10697 (plus:V2DI | 10918 (plus:V2DI |
10698 (zero_extend:V2DI | 10919 (zero_extend:V2DI |
10699 (vec_select:V2SI | 10920 (vec_select:V2SI |
10700 (match_operand:V4SI 1 "nonimmediate_operand" "xm") | 10921 (match_operand:V4SI 1 "nonimmediate_operand" "xm") |
10703 (zero_extend:V2DI | 10924 (zero_extend:V2DI |
10704 (vec_select:V2SI | 10925 (vec_select:V2SI |
10705 (match_dup 1) | 10926 (match_dup 1) |
10706 (parallel [(const_int 1) | 10927 (parallel [(const_int 1) |
10707 (const_int 3)])))))] | 10928 (const_int 3)])))))] |
10708 "TARGET_SSE5" | 10929 "TARGET_XOP" |
10709 "phaddudq\t{%1, %0|%0, %1}" | 10930 "vphaddudq\t{%1, %0|%0, %1}" |
10710 [(set_attr "type" "sseiadd1")]) | 10931 [(set_attr "type" "sseiadd1")]) |
10711 | 10932 |
10712 (define_insn "sse5_phsubbw" | 10933 (define_insn "xop_phsubbw" |
10713 [(set (match_operand:V8HI 0 "register_operand" "=x") | 10934 [(set (match_operand:V8HI 0 "register_operand" "=x") |
10714 (minus:V8HI | 10935 (minus:V8HI |
10715 (sign_extend:V8HI | 10936 (sign_extend:V8HI |
10716 (vec_select:V8QI | 10937 (vec_select:V8QI |
10717 (match_operand:V16QI 1 "nonimmediate_operand" "xm") | 10938 (match_operand:V16QI 1 "nonimmediate_operand" "xm") |
10732 (const_int 7) | 10953 (const_int 7) |
10733 (const_int 9) | 10954 (const_int 9) |
10734 (const_int 11) | 10955 (const_int 11) |
10735 (const_int 13) | 10956 (const_int 13) |
10736 (const_int 15)])))))] | 10957 (const_int 15)])))))] |
10737 "TARGET_SSE5" | 10958 "TARGET_XOP" |
10738 "phsubbw\t{%1, %0|%0, %1}" | 10959 "vphsubbw\t{%1, %0|%0, %1}" |
10739 [(set_attr "type" "sseiadd1")]) | 10960 [(set_attr "type" "sseiadd1")]) |
10740 | 10961 |
10741 (define_insn "sse5_phsubwd" | 10962 (define_insn "xop_phsubwd" |
10742 [(set (match_operand:V4SI 0 "register_operand" "=x") | 10963 [(set (match_operand:V4SI 0 "register_operand" "=x") |
10743 (minus:V4SI | 10964 (minus:V4SI |
10744 (sign_extend:V4SI | 10965 (sign_extend:V4SI |
10745 (vec_select:V4HI | 10966 (vec_select:V4HI |
10746 (match_operand:V8HI 1 "nonimmediate_operand" "xm") | 10967 (match_operand:V8HI 1 "nonimmediate_operand" "xm") |
10753 (match_dup 1) | 10974 (match_dup 1) |
10754 (parallel [(const_int 1) | 10975 (parallel [(const_int 1) |
10755 (const_int 3) | 10976 (const_int 3) |
10756 (const_int 5) | 10977 (const_int 5) |
10757 (const_int 7)])))))] | 10978 (const_int 7)])))))] |
10758 "TARGET_SSE5" | 10979 "TARGET_XOP" |
10759 "phsubwd\t{%1, %0|%0, %1}" | 10980 "vphsubwd\t{%1, %0|%0, %1}" |
10760 [(set_attr "type" "sseiadd1")]) | 10981 [(set_attr "type" "sseiadd1")]) |
10761 | 10982 |
10762 (define_insn "sse5_phsubdq" | 10983 (define_insn "xop_phsubdq" |
10763 [(set (match_operand:V2DI 0 "register_operand" "=x") | 10984 [(set (match_operand:V2DI 0 "register_operand" "=x") |
10764 (minus:V2DI | 10985 (minus:V2DI |
10765 (sign_extend:V2DI | 10986 (sign_extend:V2DI |
10766 (vec_select:V2SI | 10987 (vec_select:V2SI |
10767 (match_operand:V4SI 1 "nonimmediate_operand" "xm") | 10988 (match_operand:V4SI 1 "nonimmediate_operand" "xm") |
10770 (sign_extend:V2DI | 10991 (sign_extend:V2DI |
10771 (vec_select:V2SI | 10992 (vec_select:V2SI |
10772 (match_dup 1) | 10993 (match_dup 1) |
10773 (parallel [(const_int 1) | 10994 (parallel [(const_int 1) |
10774 (const_int 3)])))))] | 10995 (const_int 3)])))))] |
10775 "TARGET_SSE5" | 10996 "TARGET_XOP" |
10776 "phsubdq\t{%1, %0|%0, %1}" | 10997 "vphsubdq\t{%1, %0|%0, %1}" |
10777 [(set_attr "type" "sseiadd1")]) | 10998 [(set_attr "type" "sseiadd1")]) |
10778 | 10999 |
10779 ;; SSE5 permute instructions | 11000 ;; XOP permute instructions |
10780 (define_insn "sse5_pperm" | 11001 (define_insn "xop_pperm" |
10781 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") | 11002 [(set (match_operand:V16QI 0 "register_operand" "=x,x") |
10782 (unspec:V16QI | 11003 (unspec:V16QI |
10783 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm") | 11004 [(match_operand:V16QI 1 "register_operand" "x,x") |
10784 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x") | 11005 (match_operand:V16QI 2 "nonimmediate_operand" "x,m") |
10785 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] | 11006 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")] |
10786 UNSPEC_SSE5_PERMUTE))] | 11007 UNSPEC_XOP_PERMUTE))] |
10787 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 11008 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" |
10788 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 11009 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10789 [(set_attr "type" "sse4arg") | 11010 [(set_attr "type" "sse4arg") |
10790 (set_attr "mode" "TI")]) | 11011 (set_attr "mode" "TI")]) |
10791 | 11012 |
10792 ;; The following are for the various unpack insns which doesn't need the first | 11013 ;; XOP pack instructions that combine two vectors into a smaller vector |
10793 ;; source operand, so we can just use the output operand for the first operand. | 11014 (define_insn "xop_pperm_pack_v2di_v4si" |
10794 ;; This allows either of the other two operands to be a memory operand. We | |
10795 ;; can't just use the first operand as an argument to the normal pperm because | |
10796 ;; then an output only argument, suddenly becomes an input operand. | |
10797 (define_insn "sse5_pperm_zero_v16qi_v8hi" | |
10798 [(set (match_operand:V8HI 0 "register_operand" "=x,x") | |
10799 (zero_extend:V8HI | |
10800 (vec_select:V8QI | |
10801 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") | |
10802 (match_operand 2 "" "")))) ;; parallel with const_int's | |
10803 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] | |
10804 "TARGET_SSE5 | |
10805 && (register_operand (operands[1], V16QImode) | |
10806 || register_operand (operands[2], V16QImode))" | |
10807 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" | |
10808 [(set_attr "type" "sseadd") | |
10809 (set_attr "mode" "TI")]) | |
10810 | |
10811 (define_insn "sse5_pperm_sign_v16qi_v8hi" | |
10812 [(set (match_operand:V8HI 0 "register_operand" "=x,x") | |
10813 (sign_extend:V8HI | |
10814 (vec_select:V8QI | |
10815 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x") | |
10816 (match_operand 2 "" "")))) ;; parallel with const_int's | |
10817 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] | |
10818 "TARGET_SSE5 | |
10819 && (register_operand (operands[1], V16QImode) | |
10820 || register_operand (operands[2], V16QImode))" | |
10821 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" | |
10822 [(set_attr "type" "sseadd") | |
10823 (set_attr "mode" "TI")]) | |
10824 | |
10825 (define_insn "sse5_pperm_zero_v8hi_v4si" | |
10826 [(set (match_operand:V4SI 0 "register_operand" "=x,x") | 11015 [(set (match_operand:V4SI 0 "register_operand" "=x,x") |
10827 (zero_extend:V4SI | |
10828 (vec_select:V4HI | |
10829 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") | |
10830 (match_operand 2 "" "")))) ;; parallel with const_int's | |
10831 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] | |
10832 "TARGET_SSE5 | |
10833 && (register_operand (operands[1], V8HImode) | |
10834 || register_operand (operands[2], V16QImode))" | |
10835 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" | |
10836 [(set_attr "type" "sseadd") | |
10837 (set_attr "mode" "TI")]) | |
10838 | |
10839 (define_insn "sse5_pperm_sign_v8hi_v4si" | |
10840 [(set (match_operand:V4SI 0 "register_operand" "=x,x") | |
10841 (sign_extend:V4SI | |
10842 (vec_select:V4HI | |
10843 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x") | |
10844 (match_operand 2 "" "")))) ;; parallel with const_int's | |
10845 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] | |
10846 "TARGET_SSE5 | |
10847 && (register_operand (operands[1], V8HImode) | |
10848 || register_operand (operands[2], V16QImode))" | |
10849 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" | |
10850 [(set_attr "type" "sseadd") | |
10851 (set_attr "mode" "TI")]) | |
10852 | |
10853 (define_insn "sse5_pperm_zero_v4si_v2di" | |
10854 [(set (match_operand:V2DI 0 "register_operand" "=x,x") | |
10855 (zero_extend:V2DI | |
10856 (vec_select:V2SI | |
10857 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") | |
10858 (match_operand 2 "" "")))) ;; parallel with const_int's | |
10859 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] | |
10860 "TARGET_SSE5 | |
10861 && (register_operand (operands[1], V4SImode) | |
10862 || register_operand (operands[2], V16QImode))" | |
10863 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" | |
10864 [(set_attr "type" "sseadd") | |
10865 (set_attr "mode" "TI")]) | |
10866 | |
10867 (define_insn "sse5_pperm_sign_v4si_v2di" | |
10868 [(set (match_operand:V2DI 0 "register_operand" "=x,x") | |
10869 (sign_extend:V2DI | |
10870 (vec_select:V2SI | |
10871 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x") | |
10872 (match_operand 2 "" "")))) ;; parallel with const_int's | |
10873 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))] | |
10874 "TARGET_SSE5 | |
10875 && (register_operand (operands[1], V4SImode) | |
10876 || register_operand (operands[2], V16QImode))" | |
10877 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}" | |
10878 [(set_attr "type" "sseadd") | |
10879 (set_attr "mode" "TI")]) | |
10880 | |
10881 ;; SSE5 pack instructions that combine two vectors into a smaller vector | |
10882 (define_insn "sse5_pperm_pack_v2di_v4si" | |
10883 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x") | |
10884 (vec_concat:V4SI | 11016 (vec_concat:V4SI |
10885 (truncate:V2SI | 11017 (truncate:V2SI |
10886 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm")) | 11018 (match_operand:V2DI 1 "register_operand" "x,x")) |
10887 (truncate:V2SI | 11019 (truncate:V2SI |
10888 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x")))) | 11020 (match_operand:V2DI 2 "nonimmediate_operand" "x,m")))) |
10889 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] | 11021 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] |
10890 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 11022 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" |
10891 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 11023 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10892 [(set_attr "type" "sse4arg") | 11024 [(set_attr "type" "sse4arg") |
10893 (set_attr "mode" "TI")]) | 11025 (set_attr "mode" "TI")]) |
10894 | 11026 |
10895 (define_insn "sse5_pperm_pack_v4si_v8hi" | 11027 (define_insn "xop_pperm_pack_v4si_v8hi" |
10896 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") | 11028 [(set (match_operand:V8HI 0 "register_operand" "=x,x") |
10897 (vec_concat:V8HI | 11029 (vec_concat:V8HI |
10898 (truncate:V4HI | 11030 (truncate:V4HI |
10899 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm")) | 11031 (match_operand:V4SI 1 "register_operand" "x,x")) |
10900 (truncate:V4HI | 11032 (truncate:V4HI |
10901 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x")))) | 11033 (match_operand:V4SI 2 "nonimmediate_operand" "x,m")))) |
10902 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] | 11034 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] |
10903 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 11035 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" |
10904 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 11036 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10905 [(set_attr "type" "sse4arg") | 11037 [(set_attr "type" "sse4arg") |
10906 (set_attr "mode" "TI")]) | 11038 (set_attr "mode" "TI")]) |
10907 | 11039 |
10908 (define_insn "sse5_pperm_pack_v8hi_v16qi" | 11040 (define_insn "xop_pperm_pack_v8hi_v16qi" |
10909 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") | 11041 [(set (match_operand:V16QI 0 "register_operand" "=x,x") |
10910 (vec_concat:V16QI | 11042 (vec_concat:V16QI |
10911 (truncate:V8QI | 11043 (truncate:V8QI |
10912 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm")) | 11044 (match_operand:V8HI 1 "register_operand" "x,x")) |
10913 (truncate:V8QI | 11045 (truncate:V8QI |
10914 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x")))) | 11046 (match_operand:V8HI 2 "nonimmediate_operand" "x,m")))) |
10915 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] | 11047 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))] |
10916 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | 11048 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))" |
10917 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 11049 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
10918 [(set_attr "type" "sse4arg") | 11050 [(set_attr "type" "sse4arg") |
10919 (set_attr "mode" "TI")]) | 11051 (set_attr "mode" "TI")]) |
10920 | 11052 |
10921 ;; Floating point permutation (permps, permpd) | 11053 ;; XOP packed rotate instructions |
10922 (define_insn "sse5_perm<mode>" | |
10923 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x") | |
10924 (unspec:SSEMODEF2P | |
10925 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm") | |
10926 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x") | |
10927 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] | |
10928 UNSPEC_SSE5_PERMUTE))] | |
10929 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" | |
10930 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}" | |
10931 [(set_attr "type" "sse4arg") | |
10932 (set_attr "mode" "<MODE>")]) | |
10933 | |
10934 ;; SSE5 packed rotate instructions | |
10935 (define_expand "rotl<mode>3" | 11054 (define_expand "rotl<mode>3" |
10936 [(set (match_operand:SSEMODE1248 0 "register_operand" "") | 11055 [(set (match_operand:SSEMODE1248 0 "register_operand" "") |
10937 (rotate:SSEMODE1248 | 11056 (rotate:SSEMODE1248 |
10938 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") | 11057 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") |
10939 (match_operand:SI 2 "general_operand")))] | 11058 (match_operand:SI 2 "general_operand")))] |
10940 "TARGET_SSE5" | 11059 "TARGET_XOP" |
10941 { | 11060 { |
10942 /* If we were given a scalar, convert it to parallel */ | 11061 /* If we were given a scalar, convert it to parallel */ |
10943 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) | 11062 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) |
10944 { | 11063 { |
10945 rtvec vs = rtvec_alloc (<ssescalarnum>); | 11064 rtvec vs = rtvec_alloc (<ssescalarnum>); |
10956 | 11075 |
10957 for (i = 0; i < <ssescalarnum>; i++) | 11076 for (i = 0; i < <ssescalarnum>; i++) |
10958 RTVEC_ELT (vs, i) = op2; | 11077 RTVEC_ELT (vs, i) = op2; |
10959 | 11078 |
10960 emit_insn (gen_vec_init<mode> (reg, par)); | 11079 emit_insn (gen_vec_init<mode> (reg, par)); |
10961 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); | 11080 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); |
10962 DONE; | 11081 DONE; |
10963 } | 11082 } |
10964 }) | 11083 }) |
10965 | 11084 |
10966 (define_expand "rotr<mode>3" | 11085 (define_expand "rotr<mode>3" |
10967 [(set (match_operand:SSEMODE1248 0 "register_operand" "") | 11086 [(set (match_operand:SSEMODE1248 0 "register_operand" "") |
10968 (rotatert:SSEMODE1248 | 11087 (rotatert:SSEMODE1248 |
10969 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") | 11088 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") |
10970 (match_operand:SI 2 "general_operand")))] | 11089 (match_operand:SI 2 "general_operand")))] |
10971 "TARGET_SSE5" | 11090 "TARGET_XOP" |
10972 { | 11091 { |
10973 /* If we were given a scalar, convert it to parallel */ | 11092 /* If we were given a scalar, convert it to parallel */ |
10974 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) | 11093 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) |
10975 { | 11094 { |
10976 rtvec vs = rtvec_alloc (<ssescalarnum>); | 11095 rtvec vs = rtvec_alloc (<ssescalarnum>); |
10989 for (i = 0; i < <ssescalarnum>; i++) | 11108 for (i = 0; i < <ssescalarnum>; i++) |
10990 RTVEC_ELT (vs, i) = op2; | 11109 RTVEC_ELT (vs, i) = op2; |
10991 | 11110 |
10992 emit_insn (gen_vec_init<mode> (reg, par)); | 11111 emit_insn (gen_vec_init<mode> (reg, par)); |
10993 emit_insn (gen_neg<mode>2 (neg, reg)); | 11112 emit_insn (gen_neg<mode>2 (neg, reg)); |
10994 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg)); | 11113 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg)); |
10995 DONE; | 11114 DONE; |
10996 } | 11115 } |
10997 }) | 11116 }) |
10998 | 11117 |
10999 (define_insn "sse5_rotl<mode>3" | 11118 (define_insn "xop_rotl<mode>3" |
11000 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") | 11119 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") |
11001 (rotate:SSEMODE1248 | 11120 (rotate:SSEMODE1248 |
11002 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") | 11121 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") |
11003 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] | 11122 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] |
11004 "TARGET_SSE5" | 11123 "TARGET_XOP" |
11005 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 11124 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
11006 [(set_attr "type" "sseishft") | 11125 [(set_attr "type" "sseishft") |
11007 (set_attr "mode" "TI")]) | 11126 (set_attr "length_immediate" "1") |
11008 | 11127 (set_attr "mode" "TI")]) |
11009 (define_insn "sse5_rotr<mode>3" | 11128 |
11129 (define_insn "xop_rotr<mode>3" | |
11010 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") | 11130 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") |
11011 (rotatert:SSEMODE1248 | 11131 (rotatert:SSEMODE1248 |
11012 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") | 11132 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") |
11013 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] | 11133 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] |
11014 "TARGET_SSE5" | 11134 "TARGET_XOP" |
11015 { | 11135 { |
11016 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); | 11136 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); |
11017 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; | 11137 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; |
11018 } | 11138 } |
11019 [(set_attr "type" "sseishft") | 11139 [(set_attr "type" "sseishft") |
11140 (set_attr "length_immediate" "1") | |
11020 (set_attr "mode" "TI")]) | 11141 (set_attr "mode" "TI")]) |
11021 | 11142 |
11022 (define_expand "vrotr<mode>3" | 11143 (define_expand "vrotr<mode>3" |
11023 [(match_operand:SSEMODE1248 0 "register_operand" "") | 11144 [(match_operand:SSEMODE1248 0 "register_operand" "") |
11024 (match_operand:SSEMODE1248 1 "register_operand" "") | 11145 (match_operand:SSEMODE1248 1 "register_operand" "") |
11025 (match_operand:SSEMODE1248 2 "register_operand" "")] | 11146 (match_operand:SSEMODE1248 2 "register_operand" "")] |
11026 "TARGET_SSE5" | 11147 "TARGET_XOP" |
11027 { | 11148 { |
11028 rtx reg = gen_reg_rtx (<MODE>mode); | 11149 rtx reg = gen_reg_rtx (<MODE>mode); |
11029 emit_insn (gen_neg<mode>2 (reg, operands[2])); | 11150 emit_insn (gen_neg<mode>2 (reg, operands[2])); |
11030 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); | 11151 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg)); |
11031 DONE; | 11152 DONE; |
11032 }) | 11153 }) |
11033 | 11154 |
11034 (define_expand "vrotl<mode>3" | 11155 (define_expand "vrotl<mode>3" |
11035 [(match_operand:SSEMODE1248 0 "register_operand" "") | 11156 [(match_operand:SSEMODE1248 0 "register_operand" "") |
11036 (match_operand:SSEMODE1248 1 "register_operand" "") | 11157 (match_operand:SSEMODE1248 1 "register_operand" "") |
11037 (match_operand:SSEMODE1248 2 "register_operand" "")] | 11158 (match_operand:SSEMODE1248 2 "register_operand" "")] |
11038 "TARGET_SSE5" | 11159 "TARGET_XOP" |
11039 { | 11160 { |
11040 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2])); | 11161 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2])); |
11041 DONE; | 11162 DONE; |
11042 }) | 11163 }) |
11043 | 11164 |
11044 (define_insn "sse5_vrotl<mode>3" | 11165 (define_insn "xop_vrotl<mode>3" |
11045 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") | 11166 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") |
11046 (if_then_else:SSEMODE1248 | 11167 (if_then_else:SSEMODE1248 |
11047 (ge:SSEMODE1248 | 11168 (ge:SSEMODE1248 |
11048 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") | 11169 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") |
11049 (const_int 0)) | 11170 (const_int 0)) |
11050 (rotate:SSEMODE1248 | 11171 (rotate:SSEMODE1248 |
11051 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") | 11172 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") |
11052 (match_dup 2)) | 11173 (match_dup 2)) |
11053 (rotatert:SSEMODE1248 | 11174 (rotatert:SSEMODE1248 |
11054 (match_dup 1) | 11175 (match_dup 1) |
11055 (neg:SSEMODE1248 (match_dup 2)))))] | 11176 (neg:SSEMODE1248 (match_dup 2)))))] |
11056 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" | 11177 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
11057 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 11178 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
11058 [(set_attr "type" "sseishft") | 11179 [(set_attr "type" "sseishft") |
11059 (set_attr "mode" "TI")]) | 11180 (set_attr "prefix_data16" "0") |
11060 | 11181 (set_attr "prefix_extra" "2") |
11061 ;; SSE5 packed shift instructions. | 11182 (set_attr "mode" "TI")]) |
11183 | |
11184 ;; XOP packed shift instructions. | |
11062 ;; FIXME: add V2DI back in | 11185 ;; FIXME: add V2DI back in |
11063 (define_expand "vlshr<mode>3" | 11186 (define_expand "vlshr<mode>3" |
11064 [(match_operand:SSEMODE124 0 "register_operand" "") | 11187 [(match_operand:SSEMODE124 0 "register_operand" "") |
11065 (match_operand:SSEMODE124 1 "register_operand" "") | 11188 (match_operand:SSEMODE124 1 "register_operand" "") |
11066 (match_operand:SSEMODE124 2 "register_operand" "")] | 11189 (match_operand:SSEMODE124 2 "register_operand" "")] |
11067 "TARGET_SSE5" | 11190 "TARGET_XOP" |
11068 { | 11191 { |
11069 rtx neg = gen_reg_rtx (<MODE>mode); | 11192 rtx neg = gen_reg_rtx (<MODE>mode); |
11070 emit_insn (gen_neg<mode>2 (neg, operands[2])); | 11193 emit_insn (gen_neg<mode>2 (neg, operands[2])); |
11071 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg)); | 11194 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg)); |
11072 DONE; | 11195 DONE; |
11073 }) | 11196 }) |
11074 | 11197 |
11075 (define_expand "vashr<mode>3" | 11198 (define_expand "vashr<mode>3" |
11076 [(match_operand:SSEMODE124 0 "register_operand" "") | 11199 [(match_operand:SSEMODE124 0 "register_operand" "") |
11077 (match_operand:SSEMODE124 1 "register_operand" "") | 11200 (match_operand:SSEMODE124 1 "register_operand" "") |
11078 (match_operand:SSEMODE124 2 "register_operand" "")] | 11201 (match_operand:SSEMODE124 2 "register_operand" "")] |
11079 "TARGET_SSE5" | 11202 "TARGET_XOP" |
11080 { | 11203 { |
11081 rtx neg = gen_reg_rtx (<MODE>mode); | 11204 rtx neg = gen_reg_rtx (<MODE>mode); |
11082 emit_insn (gen_neg<mode>2 (neg, operands[2])); | 11205 emit_insn (gen_neg<mode>2 (neg, operands[2])); |
11083 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg)); | 11206 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg)); |
11084 DONE; | 11207 DONE; |
11085 }) | 11208 }) |
11086 | 11209 |
11087 (define_expand "vashl<mode>3" | 11210 (define_expand "vashl<mode>3" |
11088 [(match_operand:SSEMODE124 0 "register_operand" "") | 11211 [(match_operand:SSEMODE124 0 "register_operand" "") |
11089 (match_operand:SSEMODE124 1 "register_operand" "") | 11212 (match_operand:SSEMODE124 1 "register_operand" "") |
11090 (match_operand:SSEMODE124 2 "register_operand" "")] | 11213 (match_operand:SSEMODE124 2 "register_operand" "")] |
11091 "TARGET_SSE5" | 11214 "TARGET_XOP" |
11092 { | 11215 { |
11093 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2])); | 11216 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2])); |
11094 DONE; | 11217 DONE; |
11095 }) | 11218 }) |
11096 | 11219 |
11097 (define_insn "sse5_ashl<mode>3" | 11220 (define_insn "xop_ashl<mode>3" |
11098 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") | 11221 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") |
11099 (if_then_else:SSEMODE1248 | 11222 (if_then_else:SSEMODE1248 |
11100 (ge:SSEMODE1248 | 11223 (ge:SSEMODE1248 |
11101 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") | 11224 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") |
11102 (const_int 0)) | 11225 (const_int 0)) |
11103 (ashift:SSEMODE1248 | 11226 (ashift:SSEMODE1248 |
11104 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") | 11227 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") |
11105 (match_dup 2)) | 11228 (match_dup 2)) |
11106 (ashiftrt:SSEMODE1248 | 11229 (ashiftrt:SSEMODE1248 |
11107 (match_dup 1) | 11230 (match_dup 1) |
11108 (neg:SSEMODE1248 (match_dup 2)))))] | 11231 (neg:SSEMODE1248 (match_dup 2)))))] |
11109 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" | 11232 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
11110 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 11233 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
11111 [(set_attr "type" "sseishft") | 11234 [(set_attr "type" "sseishft") |
11112 (set_attr "mode" "TI")]) | 11235 (set_attr "prefix_data16" "0") |
11113 | 11236 (set_attr "prefix_extra" "2") |
11114 (define_insn "sse5_lshl<mode>3" | 11237 (set_attr "mode" "TI")]) |
11238 | |
11239 (define_insn "xop_lshl<mode>3" | |
11115 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") | 11240 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") |
11116 (if_then_else:SSEMODE1248 | 11241 (if_then_else:SSEMODE1248 |
11117 (ge:SSEMODE1248 | 11242 (ge:SSEMODE1248 |
11118 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") | 11243 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m") |
11119 (const_int 0)) | 11244 (const_int 0)) |
11120 (ashift:SSEMODE1248 | 11245 (ashift:SSEMODE1248 |
11121 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") | 11246 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x") |
11122 (match_dup 2)) | 11247 (match_dup 2)) |
11123 (lshiftrt:SSEMODE1248 | 11248 (lshiftrt:SSEMODE1248 |
11124 (match_dup 1) | 11249 (match_dup 1) |
11125 (neg:SSEMODE1248 (match_dup 2)))))] | 11250 (neg:SSEMODE1248 (match_dup 2)))))] |
11126 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" | 11251 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))" |
11127 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 11252 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
11128 [(set_attr "type" "sseishft") | 11253 [(set_attr "type" "sseishft") |
11129 (set_attr "mode" "TI")]) | 11254 (set_attr "prefix_data16" "0") |
11130 | 11255 (set_attr "prefix_extra" "2") |
11131 ;; SSE2 doesn't have some shift varients, so define versions for SSE5 | 11256 (set_attr "mode" "TI")]) |
11257 | |
11258 ;; SSE2 doesn't have some shift varients, so define versions for XOP | |
11132 (define_expand "ashlv16qi3" | 11259 (define_expand "ashlv16qi3" |
11133 [(match_operand:V16QI 0 "register_operand" "") | 11260 [(match_operand:V16QI 0 "register_operand" "") |
11134 (match_operand:V16QI 1 "register_operand" "") | 11261 (match_operand:V16QI 1 "register_operand" "") |
11135 (match_operand:SI 2 "nonmemory_operand" "")] | 11262 (match_operand:SI 2 "nonmemory_operand" "")] |
11136 "TARGET_SSE5" | 11263 "TARGET_XOP" |
11137 { | 11264 { |
11138 rtvec vs = rtvec_alloc (16); | 11265 rtvec vs = rtvec_alloc (16); |
11139 rtx par = gen_rtx_PARALLEL (V16QImode, vs); | 11266 rtx par = gen_rtx_PARALLEL (V16QImode, vs); |
11140 rtx reg = gen_reg_rtx (V16QImode); | 11267 rtx reg = gen_reg_rtx (V16QImode); |
11141 int i; | 11268 int i; |
11142 for (i = 0; i < 16; i++) | 11269 for (i = 0; i < 16; i++) |
11143 RTVEC_ELT (vs, i) = operands[2]; | 11270 RTVEC_ELT (vs, i) = operands[2]; |
11144 | 11271 |
11145 emit_insn (gen_vec_initv16qi (reg, par)); | 11272 emit_insn (gen_vec_initv16qi (reg, par)); |
11146 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); | 11273 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); |
11147 DONE; | 11274 DONE; |
11148 }) | 11275 }) |
11149 | 11276 |
11150 (define_expand "lshlv16qi3" | 11277 (define_expand "lshlv16qi3" |
11151 [(match_operand:V16QI 0 "register_operand" "") | 11278 [(match_operand:V16QI 0 "register_operand" "") |
11152 (match_operand:V16QI 1 "register_operand" "") | 11279 (match_operand:V16QI 1 "register_operand" "") |
11153 (match_operand:SI 2 "nonmemory_operand" "")] | 11280 (match_operand:SI 2 "nonmemory_operand" "")] |
11154 "TARGET_SSE5" | 11281 "TARGET_XOP" |
11155 { | 11282 { |
11156 rtvec vs = rtvec_alloc (16); | 11283 rtvec vs = rtvec_alloc (16); |
11157 rtx par = gen_rtx_PARALLEL (V16QImode, vs); | 11284 rtx par = gen_rtx_PARALLEL (V16QImode, vs); |
11158 rtx reg = gen_reg_rtx (V16QImode); | 11285 rtx reg = gen_reg_rtx (V16QImode); |
11159 int i; | 11286 int i; |
11160 for (i = 0; i < 16; i++) | 11287 for (i = 0; i < 16; i++) |
11161 RTVEC_ELT (vs, i) = operands[2]; | 11288 RTVEC_ELT (vs, i) = operands[2]; |
11162 | 11289 |
11163 emit_insn (gen_vec_initv16qi (reg, par)); | 11290 emit_insn (gen_vec_initv16qi (reg, par)); |
11164 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg)); | 11291 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg)); |
11165 DONE; | 11292 DONE; |
11166 }) | 11293 }) |
11167 | 11294 |
11168 (define_expand "ashrv16qi3" | 11295 (define_expand "ashrv16qi3" |
11169 [(match_operand:V16QI 0 "register_operand" "") | 11296 [(match_operand:V16QI 0 "register_operand" "") |
11170 (match_operand:V16QI 1 "register_operand" "") | 11297 (match_operand:V16QI 1 "register_operand" "") |
11171 (match_operand:SI 2 "nonmemory_operand" "")] | 11298 (match_operand:SI 2 "nonmemory_operand" "")] |
11172 "TARGET_SSE5" | 11299 "TARGET_XOP" |
11173 { | 11300 { |
11174 rtvec vs = rtvec_alloc (16); | 11301 rtvec vs = rtvec_alloc (16); |
11175 rtx par = gen_rtx_PARALLEL (V16QImode, vs); | 11302 rtx par = gen_rtx_PARALLEL (V16QImode, vs); |
11176 rtx reg = gen_reg_rtx (V16QImode); | 11303 rtx reg = gen_reg_rtx (V16QImode); |
11177 int i; | 11304 int i; |
11178 rtx ele = ((GET_CODE (operands[2]) == CONST_INT) | 11305 rtx ele = ((CONST_INT_P (operands[2])) |
11179 ? GEN_INT (- INTVAL (operands[2])) | 11306 ? GEN_INT (- INTVAL (operands[2])) |
11180 : operands[2]); | 11307 : operands[2]); |
11181 | 11308 |
11182 for (i = 0; i < 16; i++) | 11309 for (i = 0; i < 16; i++) |
11183 RTVEC_ELT (vs, i) = ele; | 11310 RTVEC_ELT (vs, i) = ele; |
11184 | 11311 |
11185 emit_insn (gen_vec_initv16qi (reg, par)); | 11312 emit_insn (gen_vec_initv16qi (reg, par)); |
11186 | 11313 |
11187 if (GET_CODE (operands[2]) != CONST_INT) | 11314 if (!CONST_INT_P (operands[2])) |
11188 { | 11315 { |
11189 rtx neg = gen_reg_rtx (V16QImode); | 11316 rtx neg = gen_reg_rtx (V16QImode); |
11190 emit_insn (gen_negv16qi2 (neg, reg)); | 11317 emit_insn (gen_negv16qi2 (neg, reg)); |
11191 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg)); | 11318 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg)); |
11192 } | 11319 } |
11193 else | 11320 else |
11194 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); | 11321 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg)); |
11195 | 11322 |
11196 DONE; | 11323 DONE; |
11197 }) | 11324 }) |
11198 | 11325 |
11199 (define_expand "ashrv2di3" | 11326 (define_expand "ashrv2di3" |
11200 [(match_operand:V2DI 0 "register_operand" "") | 11327 [(match_operand:V2DI 0 "register_operand" "") |
11201 (match_operand:V2DI 1 "register_operand" "") | 11328 (match_operand:V2DI 1 "register_operand" "") |
11202 (match_operand:DI 2 "nonmemory_operand" "")] | 11329 (match_operand:DI 2 "nonmemory_operand" "")] |
11203 "TARGET_SSE5" | 11330 "TARGET_XOP" |
11204 { | 11331 { |
11205 rtvec vs = rtvec_alloc (2); | 11332 rtvec vs = rtvec_alloc (2); |
11206 rtx par = gen_rtx_PARALLEL (V2DImode, vs); | 11333 rtx par = gen_rtx_PARALLEL (V2DImode, vs); |
11207 rtx reg = gen_reg_rtx (V2DImode); | 11334 rtx reg = gen_reg_rtx (V2DImode); |
11208 rtx ele; | 11335 rtx ele; |
11209 | 11336 |
11210 if (GET_CODE (operands[2]) == CONST_INT) | 11337 if (CONST_INT_P (operands[2])) |
11211 ele = GEN_INT (- INTVAL (operands[2])); | 11338 ele = GEN_INT (- INTVAL (operands[2])); |
11212 else if (GET_MODE (operands[2]) != DImode) | 11339 else if (GET_MODE (operands[2]) != DImode) |
11213 { | 11340 { |
11214 rtx move = gen_reg_rtx (DImode); | 11341 rtx move = gen_reg_rtx (DImode); |
11215 ele = gen_reg_rtx (DImode); | 11342 ele = gen_reg_rtx (DImode); |
11223 } | 11350 } |
11224 | 11351 |
11225 RTVEC_ELT (vs, 0) = ele; | 11352 RTVEC_ELT (vs, 0) = ele; |
11226 RTVEC_ELT (vs, 1) = ele; | 11353 RTVEC_ELT (vs, 1) = ele; |
11227 emit_insn (gen_vec_initv2di (reg, par)); | 11354 emit_insn (gen_vec_initv2di (reg, par)); |
11228 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg)); | 11355 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg)); |
11229 DONE; | 11356 DONE; |
11230 }) | 11357 }) |
11231 | 11358 |
11232 ;; SSE5 FRCZ support | 11359 ;; XOP FRCZ support |
11233 ;; parallel insns | 11360 ;; parallel insns |
11234 (define_insn "sse5_frcz<mode>2" | 11361 (define_insn "xop_frcz<mode>2" |
11235 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 11362 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
11236 (unspec:SSEMODEF2P | 11363 (unspec:SSEMODEF2P |
11237 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] | 11364 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] |
11238 UNSPEC_FRCZ))] | 11365 UNSPEC_FRCZ))] |
11239 "TARGET_SSE5" | 11366 "TARGET_XOP" |
11240 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" | 11367 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" |
11241 [(set_attr "type" "ssecvt1") | 11368 [(set_attr "type" "ssecvt1") |
11242 (set_attr "prefix_extra" "1") | |
11243 (set_attr "mode" "<MODE>")]) | 11369 (set_attr "mode" "<MODE>")]) |
11244 | 11370 |
11245 ;; scalar insns | 11371 ;; scalar insns |
11246 (define_insn "sse5_vmfrcz<mode>2" | 11372 (define_insn "xop_vmfrcz<mode>2" |
11247 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | 11373 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") |
11248 (vec_merge:SSEMODEF2P | 11374 (vec_merge:SSEMODEF2P |
11249 (unspec:SSEMODEF2P | 11375 (unspec:SSEMODEF2P |
11250 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] | 11376 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] |
11251 UNSPEC_FRCZ) | 11377 UNSPEC_FRCZ) |
11252 (match_operand:SSEMODEF2P 1 "register_operand" "0") | 11378 (match_operand:SSEMODEF2P 1 "register_operand" "0") |
11253 (const_int 1)))] | 11379 (const_int 1)))] |
11254 "TARGET_SSE5" | 11380 "TARGET_XOP" |
11255 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" | 11381 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" |
11256 [(set_attr "type" "ssecvt1") | 11382 [(set_attr "type" "ssecvt1") |
11257 (set_attr "prefix_extra" "1") | |
11258 (set_attr "mode" "<MODE>")]) | 11383 (set_attr "mode" "<MODE>")]) |
11259 | 11384 |
11260 (define_insn "sse5_cvtph2ps" | 11385 (define_insn "xop_frcz<mode>2256" |
11261 [(set (match_operand:V4SF 0 "register_operand" "=x") | 11386 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x") |
11262 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")] | 11387 (unspec:FMA4MODEF4 |
11263 UNSPEC_CVTPH2PS))] | 11388 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")] |
11264 "TARGET_SSE5" | 11389 UNSPEC_FRCZ))] |
11265 "cvtph2ps\t{%1, %0|%0, %1}" | 11390 "TARGET_XOP" |
11266 [(set_attr "type" "ssecvt") | 11391 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}" |
11267 (set_attr "mode" "V4SF")]) | 11392 [(set_attr "type" "ssecvt1") |
11268 | |
11269 (define_insn "sse5_cvtps2ph" | |
11270 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm") | |
11271 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")] | |
11272 UNSPEC_CVTPS2PH))] | |
11273 "TARGET_SSE5" | |
11274 "cvtps2ph\t{%1, %0|%0, %1}" | |
11275 [(set_attr "type" "ssecvt") | |
11276 (set_attr "mode" "V4SF")]) | |
11277 | |
11278 ;; Scalar versions of the com instructions that use vector types that are | |
11279 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the | |
11280 ;; com instructions fill in 0's in the upper bits instead of leaving them | |
11281 ;; unmodified, so we use const_vector of 0 instead of match_dup. | |
11282 (define_expand "sse5_vmmaskcmp<mode>3" | |
11283 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") | |
11284 (vec_merge:SSEMODEF2P | |
11285 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" | |
11286 [(match_operand:SSEMODEF2P 2 "register_operand" "") | |
11287 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")]) | |
11288 (match_dup 4) | |
11289 (const_int 1)))] | |
11290 "TARGET_SSE5" | |
11291 { | |
11292 operands[4] = CONST0_RTX (<MODE>mode); | |
11293 }) | |
11294 | |
11295 (define_insn "*sse5_vmmaskcmp<mode>3" | |
11296 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | |
11297 (vec_merge:SSEMODEF2P | |
11298 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" | |
11299 [(match_operand:SSEMODEF2P 2 "register_operand" "x") | |
11300 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]) | |
11301 (match_operand:SSEMODEF2P 4 "") | |
11302 (const_int 1)))] | |
11303 "TARGET_SSE5" | |
11304 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}" | |
11305 [(set_attr "type" "sse4arg") | |
11306 (set_attr "mode" "<ssescalarmode>")]) | |
11307 | |
11308 ;; We don't have a comparison operator that always returns true/false, so | |
11309 ;; handle comfalse and comtrue specially. | |
11310 (define_insn "sse5_com_tf<mode>3" | |
11311 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | |
11312 (unspec:SSEMODEF2P | |
11313 [(match_operand:SSEMODEF2P 1 "register_operand" "x") | |
11314 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm") | |
11315 (match_operand:SI 3 "const_int_operand" "n")] | |
11316 UNSPEC_SSE5_TRUEFALSE))] | |
11317 "TARGET_SSE5" | |
11318 { | |
11319 const char *ret = NULL; | |
11320 | |
11321 switch (INTVAL (operands[3])) | |
11322 { | |
11323 case COM_FALSE_S: | |
11324 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; | |
11325 break; | |
11326 | |
11327 case COM_FALSE_P: | |
11328 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; | |
11329 break; | |
11330 | |
11331 case COM_TRUE_S: | |
11332 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; | |
11333 break; | |
11334 | |
11335 case COM_TRUE_P: | |
11336 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\"; | |
11337 break; | |
11338 | |
11339 default: | |
11340 gcc_unreachable (); | |
11341 } | |
11342 | |
11343 return ret; | |
11344 } | |
11345 [(set_attr "type" "ssecmp") | |
11346 (set_attr "mode" "<MODE>")]) | 11393 (set_attr "mode" "<MODE>")]) |
11347 | 11394 |
11348 (define_insn "sse5_maskcmp<mode>3" | 11395 (define_insn "xop_maskcmp<mode>3" |
11349 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") | |
11350 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator" | |
11351 [(match_operand:SSEMODEF2P 2 "register_operand" "x") | |
11352 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))] | |
11353 "TARGET_SSE5" | |
11354 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}" | |
11355 [(set_attr "type" "ssecmp") | |
11356 (set_attr "mode" "<MODE>")]) | |
11357 | |
11358 (define_insn "sse5_maskcmp<mode>3" | |
11359 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") | 11396 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") |
11360 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" | 11397 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" |
11361 [(match_operand:SSEMODE1248 2 "register_operand" "x") | 11398 [(match_operand:SSEMODE1248 2 "register_operand" "x") |
11362 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] | 11399 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] |
11363 "TARGET_SSE5" | 11400 "TARGET_XOP" |
11364 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" | 11401 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" |
11365 [(set_attr "type" "sse4arg") | 11402 [(set_attr "type" "sse4arg") |
11366 (set_attr "mode" "TI")]) | 11403 (set_attr "prefix_data16" "0") |
11367 | 11404 (set_attr "prefix_rep" "0") |
11368 (define_insn "sse5_maskcmp_uns<mode>3" | 11405 (set_attr "prefix_extra" "2") |
11406 (set_attr "length_immediate" "1") | |
11407 (set_attr "mode" "TI")]) | |
11408 | |
11409 (define_insn "xop_maskcmp_uns<mode>3" | |
11369 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") | 11410 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") |
11370 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" | 11411 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" |
11371 [(match_operand:SSEMODE1248 2 "register_operand" "x") | 11412 [(match_operand:SSEMODE1248 2 "register_operand" "x") |
11372 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] | 11413 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] |
11373 "TARGET_SSE5" | 11414 "TARGET_XOP" |
11374 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" | 11415 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" |
11375 [(set_attr "type" "ssecmp") | 11416 [(set_attr "type" "ssecmp") |
11417 (set_attr "prefix_data16" "0") | |
11418 (set_attr "prefix_rep" "0") | |
11419 (set_attr "prefix_extra" "2") | |
11420 (set_attr "length_immediate" "1") | |
11376 (set_attr "mode" "TI")]) | 11421 (set_attr "mode" "TI")]) |
11377 | 11422 |
11378 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* | 11423 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* |
11379 ;; and pcomneu* not to be converted to the signed ones in case somebody needs | 11424 ;; and pcomneu* not to be converted to the signed ones in case somebody needs |
11380 ;; the exact instruction generated for the intrinsic. | 11425 ;; the exact instruction generated for the intrinsic. |
11381 (define_insn "sse5_maskcmp_uns2<mode>3" | 11426 (define_insn "xop_maskcmp_uns2<mode>3" |
11382 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") | 11427 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") |
11383 (unspec:SSEMODE1248 | 11428 (unspec:SSEMODE1248 |
11384 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" | 11429 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" |
11385 [(match_operand:SSEMODE1248 2 "register_operand" "x") | 11430 [(match_operand:SSEMODE1248 2 "register_operand" "x") |
11386 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] | 11431 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] |
11387 UNSPEC_SSE5_UNSIGNED_CMP))] | 11432 UNSPEC_XOP_UNSIGNED_CMP))] |
11388 "TARGET_SSE5" | 11433 "TARGET_XOP" |
11389 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" | 11434 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" |
11390 [(set_attr "type" "ssecmp") | 11435 [(set_attr "type" "ssecmp") |
11436 (set_attr "prefix_data16" "0") | |
11437 (set_attr "prefix_extra" "2") | |
11438 (set_attr "length_immediate" "1") | |
11391 (set_attr "mode" "TI")]) | 11439 (set_attr "mode" "TI")]) |
11392 | 11440 |
11393 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are | 11441 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are |
11394 ;; being added here to be complete. | 11442 ;; being added here to be complete. |
11395 (define_insn "sse5_pcom_tf<mode>3" | 11443 (define_insn "xop_pcom_tf<mode>3" |
11396 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") | 11444 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") |
11397 (unspec:SSEMODE1248 | 11445 (unspec:SSEMODE1248 |
11398 [(match_operand:SSEMODE1248 1 "register_operand" "x") | 11446 [(match_operand:SSEMODE1248 1 "register_operand" "x") |
11399 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") | 11447 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") |
11400 (match_operand:SI 3 "const_int_operand" "n")] | 11448 (match_operand:SI 3 "const_int_operand" "n")] |
11401 UNSPEC_SSE5_TRUEFALSE))] | 11449 UNSPEC_XOP_TRUEFALSE))] |
11402 "TARGET_SSE5" | 11450 "TARGET_XOP" |
11403 { | 11451 { |
11404 return ((INTVAL (operands[3]) != 0) | 11452 return ((INTVAL (operands[3]) != 0) |
11405 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" | 11453 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" |
11406 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); | 11454 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); |
11407 } | 11455 } |
11408 [(set_attr "type" "ssecmp") | 11456 [(set_attr "type" "ssecmp") |
11409 (set_attr "mode" "TI")]) | 11457 (set_attr "prefix_data16" "0") |
11410 | 11458 (set_attr "prefix_extra" "2") |
11459 (set_attr "length_immediate" "1") | |
11460 (set_attr "mode" "TI")]) | |
11461 | |
11462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; | |
11411 (define_insn "*avx_aesenc" | 11463 (define_insn "*avx_aesenc" |
11412 [(set (match_operand:V2DI 0 "register_operand" "=x") | 11464 [(set (match_operand:V2DI 0 "register_operand" "=x") |
11413 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") | 11465 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") |
11414 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | 11466 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] |
11415 UNSPEC_AESENC))] | 11467 UNSPEC_AESENC))] |
11416 "TARGET_AES && TARGET_AVX" | 11468 "TARGET_AES && TARGET_AVX" |
11417 "vaesenc\t{%2, %1, %0|%0, %1, %2}" | 11469 "vaesenc\t{%2, %1, %0|%0, %1, %2}" |
11418 [(set_attr "type" "sselog1") | 11470 [(set_attr "type" "sselog1") |
11471 (set_attr "prefix_extra" "1") | |
11419 (set_attr "prefix" "vex") | 11472 (set_attr "prefix" "vex") |
11420 (set_attr "mode" "TI")]) | 11473 (set_attr "mode" "TI")]) |
11421 | 11474 |
11422 (define_insn "aesenc" | 11475 (define_insn "aesenc" |
11423 [(set (match_operand:V2DI 0 "register_operand" "=x") | 11476 [(set (match_operand:V2DI 0 "register_operand" "=x") |
11436 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | 11489 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] |
11437 UNSPEC_AESENCLAST))] | 11490 UNSPEC_AESENCLAST))] |
11438 "TARGET_AES && TARGET_AVX" | 11491 "TARGET_AES && TARGET_AVX" |
11439 "vaesenclast\t{%2, %1, %0|%0, %1, %2}" | 11492 "vaesenclast\t{%2, %1, %0|%0, %1, %2}" |
11440 [(set_attr "type" "sselog1") | 11493 [(set_attr "type" "sselog1") |
11494 (set_attr "prefix_extra" "1") | |
11441 (set_attr "prefix" "vex") | 11495 (set_attr "prefix" "vex") |
11442 (set_attr "mode" "TI")]) | 11496 (set_attr "mode" "TI")]) |
11443 | 11497 |
11444 (define_insn "aesenclast" | 11498 (define_insn "aesenclast" |
11445 [(set (match_operand:V2DI 0 "register_operand" "=x") | 11499 [(set (match_operand:V2DI 0 "register_operand" "=x") |
11458 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | 11512 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] |
11459 UNSPEC_AESDEC))] | 11513 UNSPEC_AESDEC))] |
11460 "TARGET_AES && TARGET_AVX" | 11514 "TARGET_AES && TARGET_AVX" |
11461 "vaesdec\t{%2, %1, %0|%0, %1, %2}" | 11515 "vaesdec\t{%2, %1, %0|%0, %1, %2}" |
11462 [(set_attr "type" "sselog1") | 11516 [(set_attr "type" "sselog1") |
11517 (set_attr "prefix_extra" "1") | |
11463 (set_attr "prefix" "vex") | 11518 (set_attr "prefix" "vex") |
11464 (set_attr "mode" "TI")]) | 11519 (set_attr "mode" "TI")]) |
11465 | 11520 |
11466 (define_insn "aesdec" | 11521 (define_insn "aesdec" |
11467 [(set (match_operand:V2DI 0 "register_operand" "=x") | 11522 [(set (match_operand:V2DI 0 "register_operand" "=x") |
11480 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] | 11535 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] |
11481 UNSPEC_AESDECLAST))] | 11536 UNSPEC_AESDECLAST))] |
11482 "TARGET_AES && TARGET_AVX" | 11537 "TARGET_AES && TARGET_AVX" |
11483 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" | 11538 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" |
11484 [(set_attr "type" "sselog1") | 11539 [(set_attr "type" "sselog1") |
11540 (set_attr "prefix_extra" "1") | |
11485 (set_attr "prefix" "vex") | 11541 (set_attr "prefix" "vex") |
11486 (set_attr "mode" "TI")]) | 11542 (set_attr "mode" "TI")]) |
11487 | 11543 |
11488 (define_insn "aesdeclast" | 11544 (define_insn "aesdeclast" |
11489 [(set (match_operand:V2DI 0 "register_operand" "=x") | 11545 [(set (match_operand:V2DI 0 "register_operand" "=x") |
11514 UNSPEC_AESKEYGENASSIST))] | 11570 UNSPEC_AESKEYGENASSIST))] |
11515 "TARGET_AES" | 11571 "TARGET_AES" |
11516 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" | 11572 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" |
11517 [(set_attr "type" "sselog1") | 11573 [(set_attr "type" "sselog1") |
11518 (set_attr "prefix_extra" "1") | 11574 (set_attr "prefix_extra" "1") |
11575 (set_attr "length_immediate" "1") | |
11519 (set_attr "prefix" "maybe_vex") | 11576 (set_attr "prefix" "maybe_vex") |
11520 (set_attr "mode" "TI")]) | 11577 (set_attr "mode" "TI")]) |
11521 | 11578 |
11522 (define_insn "*vpclmulqdq" | 11579 (define_insn "*vpclmulqdq" |
11523 [(set (match_operand:V2DI 0 "register_operand" "=x") | 11580 [(set (match_operand:V2DI 0 "register_operand" "=x") |
11526 (match_operand:SI 3 "const_0_to_255_operand" "n")] | 11583 (match_operand:SI 3 "const_0_to_255_operand" "n")] |
11527 UNSPEC_PCLMUL))] | 11584 UNSPEC_PCLMUL))] |
11528 "TARGET_PCLMUL && TARGET_AVX" | 11585 "TARGET_PCLMUL && TARGET_AVX" |
11529 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 11586 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
11530 [(set_attr "type" "sselog1") | 11587 [(set_attr "type" "sselog1") |
11588 (set_attr "prefix_extra" "1") | |
11589 (set_attr "length_immediate" "1") | |
11531 (set_attr "prefix" "vex") | 11590 (set_attr "prefix" "vex") |
11532 (set_attr "mode" "TI")]) | 11591 (set_attr "mode" "TI")]) |
11533 | 11592 |
11534 (define_insn "pclmulqdq" | 11593 (define_insn "pclmulqdq" |
11535 [(set (match_operand:V2DI 0 "register_operand" "=x") | 11594 [(set (match_operand:V2DI 0 "register_operand" "=x") |
11539 UNSPEC_PCLMUL))] | 11598 UNSPEC_PCLMUL))] |
11540 "TARGET_PCLMUL" | 11599 "TARGET_PCLMUL" |
11541 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}" | 11600 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}" |
11542 [(set_attr "type" "sselog1") | 11601 [(set_attr "type" "sselog1") |
11543 (set_attr "prefix_extra" "1") | 11602 (set_attr "prefix_extra" "1") |
11603 (set_attr "length_immediate" "1") | |
11544 (set_attr "mode" "TI")]) | 11604 (set_attr "mode" "TI")]) |
11545 | 11605 |
11546 (define_expand "avx_vzeroall" | 11606 (define_expand "avx_vzeroall" |
11547 [(match_par_dup 0 [(const_int 0)])] | 11607 [(match_par_dup 0 [(const_int 0)])] |
11548 "TARGET_AVX" | 11608 "TARGET_AVX" |
11563 CONST0_RTX (V8SImode)); | 11623 CONST0_RTX (V8SImode)); |
11564 }) | 11624 }) |
11565 | 11625 |
11566 (define_insn "*avx_vzeroall" | 11626 (define_insn "*avx_vzeroall" |
11567 [(match_parallel 0 "vzeroall_operation" | 11627 [(match_parallel 0 "vzeroall_operation" |
11568 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL) | 11628 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])] |
11569 (set (match_operand 1 "register_operand" "=x") | |
11570 (match_operand 2 "const0_operand" "X"))])] | |
11571 "TARGET_AVX" | 11629 "TARGET_AVX" |
11572 "vzeroall" | 11630 "vzeroall" |
11573 [(set_attr "type" "sse") | 11631 [(set_attr "type" "sse") |
11632 (set_attr "modrm" "0") | |
11574 (set_attr "memory" "none") | 11633 (set_attr "memory" "none") |
11575 (set_attr "prefix" "vex") | 11634 (set_attr "prefix" "vex") |
11576 (set_attr "mode" "OI")]) | 11635 (set_attr "mode" "OI")]) |
11577 | 11636 |
11578 ;; vzeroupper clobbers the upper 128bits of AVX registers. | 11637 ;; vzeroupper clobbers the upper 128bits of AVX registers. |
11579 (define_insn "avx_vzeroupper" | 11638 (define_expand "avx_vzeroupper" |
11580 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER) | 11639 [(match_par_dup 0 [(const_int 0)])] |
11581 (clobber (reg:V8SI XMM0_REG)) | 11640 "TARGET_AVX" |
11582 (clobber (reg:V8SI XMM1_REG)) | 11641 { |
11583 (clobber (reg:V8SI XMM2_REG)) | 11642 int nregs = TARGET_64BIT ? 16 : 8; |
11584 (clobber (reg:V8SI XMM3_REG)) | 11643 int regno; |
11585 (clobber (reg:V8SI XMM4_REG)) | 11644 |
11586 (clobber (reg:V8SI XMM5_REG)) | 11645 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1)); |
11587 (clobber (reg:V8SI XMM6_REG)) | 11646 |
11588 (clobber (reg:V8SI XMM7_REG))] | 11647 XVECEXP (operands[0], 0, 0) |
11589 "TARGET_AVX && !TARGET_64BIT" | 11648 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx), |
11649 UNSPECV_VZEROUPPER); | |
11650 | |
11651 for (regno = 0; regno < nregs; regno++) | |
11652 XVECEXP (operands[0], 0, regno + 1) | |
11653 = gen_rtx_CLOBBER (VOIDmode, | |
11654 gen_rtx_REG (V8SImode, SSE_REGNO (regno))); | |
11655 }) | |
11656 | |
11657 (define_insn "*avx_vzeroupper" | |
11658 [(match_parallel 0 "vzeroupper_operation" | |
11659 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])] | |
11660 "TARGET_AVX" | |
11590 "vzeroupper" | 11661 "vzeroupper" |
11591 [(set_attr "type" "sse") | 11662 [(set_attr "type" "sse") |
11663 (set_attr "modrm" "0") | |
11592 (set_attr "memory" "none") | 11664 (set_attr "memory" "none") |
11593 (set_attr "prefix" "vex") | 11665 (set_attr "prefix" "vex") |
11594 (set_attr "mode" "OI")]) | 11666 (set_attr "mode" "OI")]) |
11595 | 11667 |
11596 (define_insn "avx_vzeroupper_rex64" | 11668 (define_insn_and_split "vec_dup<mode>" |
11597 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER) | 11669 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x") |
11598 (clobber (reg:V8SI XMM0_REG)) | 11670 (vec_duplicate:AVX256MODE24P |
11599 (clobber (reg:V8SI XMM1_REG)) | 11671 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))] |
11600 (clobber (reg:V8SI XMM2_REG)) | 11672 "TARGET_AVX" |
11601 (clobber (reg:V8SI XMM3_REG)) | 11673 "@ |
11602 (clobber (reg:V8SI XMM4_REG)) | 11674 vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1} |
11603 (clobber (reg:V8SI XMM5_REG)) | 11675 #" |
11604 (clobber (reg:V8SI XMM6_REG)) | 11676 "&& reload_completed && REG_P (operands[1])" |
11605 (clobber (reg:V8SI XMM7_REG)) | 11677 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1))) |
11606 (clobber (reg:V8SI XMM8_REG)) | 11678 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))] |
11607 (clobber (reg:V8SI XMM9_REG)) | 11679 { |
11608 (clobber (reg:V8SI XMM10_REG)) | 11680 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0])); |
11609 (clobber (reg:V8SI XMM11_REG)) | 11681 } |
11610 (clobber (reg:V8SI XMM12_REG)) | 11682 [(set_attr "type" "ssemov") |
11611 (clobber (reg:V8SI XMM13_REG)) | 11683 (set_attr "prefix_extra" "1") |
11612 (clobber (reg:V8SI XMM14_REG)) | 11684 (set_attr "prefix" "vex") |
11613 (clobber (reg:V8SI XMM15_REG))] | 11685 (set_attr "mode" "V8SF")]) |
11614 "TARGET_AVX && TARGET_64BIT" | 11686 |
11615 "vzeroupper" | 11687 (define_insn "avx_vbroadcastf128_<mode>" |
11616 [(set_attr "type" "sse") | 11688 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x") |
11617 (set_attr "memory" "none") | 11689 (vec_concat:AVX256MODE |
11618 (set_attr "prefix" "vex") | 11690 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x") |
11619 (set_attr "mode" "OI")]) | 11691 (match_dup 1)))] |
11620 | 11692 "TARGET_AVX" |
11621 (define_insn "avx_vpermil<mode>" | 11693 "@ |
11694 vbroadcastf128\t{%1, %0|%0, %1} | |
11695 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1} | |
11696 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}" | |
11697 [(set_attr "type" "ssemov,sselog1,sselog1") | |
11698 (set_attr "prefix_extra" "1") | |
11699 (set_attr "length_immediate" "0,1,1") | |
11700 (set_attr "prefix" "vex") | |
11701 (set_attr "mode" "V4SF,V8SF,V8SF")]) | |
11702 | |
11703 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm. | |
11704 ;; If it so happens that the input is in memory, use vbroadcast. | |
11705 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128). | |
11706 (define_insn "*avx_vperm_broadcast_v4sf" | |
11707 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x") | |
11708 (vec_select:V4SF | |
11709 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x") | |
11710 (match_parallel 2 "avx_vbroadcast_operand" | |
11711 [(match_operand 3 "const_int_operand" "C,n,n")])))] | |
11712 "TARGET_AVX" | |
11713 { | |
11714 int elt = INTVAL (operands[3]); | |
11715 switch (which_alternative) | |
11716 { | |
11717 case 0: | |
11718 case 1: | |
11719 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4); | |
11720 return "vbroadcastss\t{%1, %0|%0, %1}"; | |
11721 case 2: | |
11722 operands[2] = GEN_INT (elt * 0x55); | |
11723 return "vpermilps\t{%2, %1, %0|%0, %1, %2}"; | |
11724 default: | |
11725 gcc_unreachable (); | |
11726 } | |
11727 } | |
11728 [(set_attr "type" "ssemov,ssemov,sselog1") | |
11729 (set_attr "prefix_extra" "1") | |
11730 (set_attr "length_immediate" "0,0,1") | |
11731 (set_attr "prefix" "vex") | |
11732 (set_attr "mode" "SF,SF,V4SF")]) | |
11733 | |
11734 (define_insn_and_split "*avx_vperm_broadcast_<mode>" | |
11735 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x") | |
11736 (vec_select:AVX256MODEF2P | |
11737 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x") | |
11738 (match_parallel 2 "avx_vbroadcast_operand" | |
11739 [(match_operand 3 "const_int_operand" "C,n,n")])))] | |
11740 "TARGET_AVX" | |
11741 "#" | |
11742 "&& reload_completed" | |
11743 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))] | |
11744 { | |
11745 rtx op0 = operands[0], op1 = operands[1]; | |
11746 int elt = INTVAL (operands[3]); | |
11747 | |
11748 if (REG_P (op1)) | |
11749 { | |
11750 int mask; | |
11751 | |
11752 /* Shuffle element we care about into all elements of the 128-bit lane. | |
11753 The other lane gets shuffled too, but we don't care. */ | |
11754 if (<MODE>mode == V4DFmode) | |
11755 mask = (elt & 1 ? 15 : 0); | |
11756 else | |
11757 mask = (elt & 3) * 0x55; | |
11758 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask))); | |
11759 | |
11760 /* Shuffle the lane we care about into both lanes of the dest. */ | |
11761 mask = (elt / (<ssescalarnum> / 2)) * 0x11; | |
11762 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask))); | |
11763 DONE; | |
11764 } | |
11765 | |
11766 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode, | |
11767 elt * GET_MODE_SIZE (<avxscalarmode>mode)); | |
11768 }) | |
11769 | |
11770 (define_expand "avx_vpermil<mode>" | |
11771 [(set (match_operand:AVXMODEFDP 0 "register_operand" "") | |
11772 (vec_select:AVXMODEFDP | |
11773 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "") | |
11774 (match_operand:SI 2 "const_0_to_255_operand" "")))] | |
11775 "TARGET_AVX" | |
11776 { | |
11777 int mask = INTVAL (operands[2]); | |
11778 rtx perm[<ssescalarnum>]; | |
11779 | |
11780 perm[0] = GEN_INT (mask & 1); | |
11781 perm[1] = GEN_INT ((mask >> 1) & 1); | |
11782 if (<MODE>mode == V4DFmode) | |
11783 { | |
11784 perm[2] = GEN_INT (((mask >> 2) & 1) + 2); | |
11785 perm[3] = GEN_INT (((mask >> 3) & 1) + 2); | |
11786 } | |
11787 | |
11788 operands[2] | |
11789 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); | |
11790 }) | |
11791 | |
11792 (define_expand "avx_vpermil<mode>" | |
11793 [(set (match_operand:AVXMODEFSP 0 "register_operand" "") | |
11794 (vec_select:AVXMODEFSP | |
11795 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "") | |
11796 (match_operand:SI 2 "const_0_to_255_operand" "")))] | |
11797 "TARGET_AVX" | |
11798 { | |
11799 int mask = INTVAL (operands[2]); | |
11800 rtx perm[<ssescalarnum>]; | |
11801 | |
11802 perm[0] = GEN_INT (mask & 3); | |
11803 perm[1] = GEN_INT ((mask >> 2) & 3); | |
11804 perm[2] = GEN_INT ((mask >> 4) & 3); | |
11805 perm[3] = GEN_INT ((mask >> 6) & 3); | |
11806 if (<MODE>mode == V8SFmode) | |
11807 { | |
11808 perm[4] = GEN_INT ((mask & 3) + 4); | |
11809 perm[5] = GEN_INT (((mask >> 2) & 3) + 4); | |
11810 perm[6] = GEN_INT (((mask >> 4) & 3) + 4); | |
11811 perm[7] = GEN_INT (((mask >> 6) & 3) + 4); | |
11812 } | |
11813 | |
11814 operands[2] | |
11815 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm)); | |
11816 }) | |
11817 | |
11818 (define_insn "*avx_vpermilp<mode>" | |
11622 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") | 11819 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") |
11623 (unspec:AVXMODEF2P | 11820 (vec_select:AVXMODEF2P |
11624 [(match_operand:AVXMODEF2P 1 "register_operand" "xm") | 11821 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm") |
11625 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")] | 11822 (match_parallel 2 "avx_vpermilp_<mode>_operand" |
11626 UNSPEC_VPERMIL))] | 11823 [(match_operand 3 "const_int_operand" "")])))] |
11627 "TARGET_AVX" | 11824 "TARGET_AVX" |
11628 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" | 11825 { |
11826 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1; | |
11827 operands[2] = GEN_INT (mask); | |
11828 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"; | |
11829 } | |
11629 [(set_attr "type" "sselog") | 11830 [(set_attr "type" "sselog") |
11831 (set_attr "prefix_extra" "1") | |
11832 (set_attr "length_immediate" "1") | |
11630 (set_attr "prefix" "vex") | 11833 (set_attr "prefix" "vex") |
11631 (set_attr "mode" "<MODE>")]) | 11834 (set_attr "mode" "<MODE>")]) |
11632 | 11835 |
11633 (define_insn "avx_vpermilvar<mode>3" | 11836 (define_insn "avx_vpermilvar<mode>3" |
11634 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") | 11837 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") |
11637 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")] | 11840 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")] |
11638 UNSPEC_VPERMIL))] | 11841 UNSPEC_VPERMIL))] |
11639 "TARGET_AVX" | 11842 "TARGET_AVX" |
11640 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" | 11843 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" |
11641 [(set_attr "type" "sselog") | 11844 [(set_attr "type" "sselog") |
11845 (set_attr "prefix_extra" "1") | |
11642 (set_attr "prefix" "vex") | 11846 (set_attr "prefix" "vex") |
11643 (set_attr "mode" "<MODE>")]) | 11847 (set_attr "mode" "<MODE>")]) |
11644 | 11848 |
11645 (define_insn "avx_vperm2f128<mode>3" | 11849 (define_expand "avx_vperm2f128<mode>3" |
11850 [(set (match_operand:AVX256MODE2P 0 "register_operand" "") | |
11851 (unspec:AVX256MODE2P | |
11852 [(match_operand:AVX256MODE2P 1 "register_operand" "") | |
11853 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "") | |
11854 (match_operand:SI 3 "const_0_to_255_operand" "")] | |
11855 UNSPEC_VPERMIL2F128))] | |
11856 "TARGET_AVX" | |
11857 { | |
11858 int mask = INTVAL (operands[2]); | |
11859 if ((mask & 0x88) == 0) | |
11860 { | |
11861 rtx perm[<ssescalarnum>], t1, t2; | |
11862 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2; | |
11863 | |
11864 base = (mask & 3) * nelt2; | |
11865 for (i = 0; i < nelt2; ++i) | |
11866 perm[i] = GEN_INT (base + i); | |
11867 | |
11868 base = ((mask >> 4) & 3) * nelt2; | |
11869 for (i = 0; i < nelt2; ++i) | |
11870 perm[i + nelt2] = GEN_INT (base + i); | |
11871 | |
11872 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode, | |
11873 operands[1], operands[2]); | |
11874 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm)); | |
11875 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1); | |
11876 t2 = gen_rtx_SET (VOIDmode, operands[0], t2); | |
11877 emit_insn (t2); | |
11878 DONE; | |
11879 } | |
11880 }) | |
11881 | |
11882 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which | |
11883 ;; means that in order to represent this properly in rtl we'd have to | |
11884 ;; nest *another* vec_concat with a zero operand and do the select from | |
11885 ;; a 4x wide vector. That doesn't seem very nice. | |
11886 (define_insn "*avx_vperm2f128<mode>_full" | |
11646 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") | 11887 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") |
11647 (unspec:AVX256MODE2P | 11888 (unspec:AVX256MODE2P |
11648 [(match_operand:AVX256MODE2P 1 "register_operand" "x") | 11889 [(match_operand:AVX256MODE2P 1 "register_operand" "x") |
11649 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") | 11890 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") |
11650 (match_operand:SI 3 "const_0_to_255_operand" "n")] | 11891 (match_operand:SI 3 "const_0_to_255_operand" "n")] |
11651 UNSPEC_VPERMIL2F128))] | 11892 UNSPEC_VPERMIL2F128))] |
11652 "TARGET_AVX" | 11893 "TARGET_AVX" |
11653 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}" | 11894 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}" |
11654 [(set_attr "type" "sselog") | 11895 [(set_attr "type" "sselog") |
11896 (set_attr "prefix_extra" "1") | |
11897 (set_attr "length_immediate" "1") | |
11655 (set_attr "prefix" "vex") | 11898 (set_attr "prefix" "vex") |
11656 (set_attr "mode" "V8SF")]) | 11899 (set_attr "mode" "V8SF")]) |
11657 | 11900 |
11658 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>" | 11901 (define_insn "*avx_vperm2f128<mode>_nozero" |
11659 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x") | 11902 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") |
11660 (vec_concat:AVXMODEF4P | 11903 (vec_select:AVX256MODE2P |
11661 (vec_concat:<avxhalfvecmode> | 11904 (vec_concat:<ssedoublesizemode> |
11662 (match_operand:<avxscalarmode> 1 "memory_operand" "m") | 11905 (match_operand:AVX256MODE2P 1 "register_operand" "x") |
11663 (match_dup 1)) | 11906 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")) |
11664 (vec_concat:<avxhalfvecmode> | 11907 (match_parallel 3 "avx_vperm2f128_<mode>_operand" |
11665 (match_dup 1) | 11908 [(match_operand 4 "const_int_operand" "")])))] |
11666 (match_dup 1))))] | 11909 "TARGET_AVX" |
11667 "TARGET_AVX" | 11910 { |
11668 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}" | 11911 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1; |
11669 [(set_attr "type" "ssemov") | 11912 operands[3] = GEN_INT (mask); |
11670 (set_attr "prefix" "vex") | 11913 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"; |
11671 (set_attr "mode" "<avxscalarmode>")]) | 11914 } |
11672 | 11915 [(set_attr "type" "sselog") |
11673 (define_insn "avx_vbroadcastss256" | 11916 (set_attr "prefix_extra" "1") |
11674 [(set (match_operand:V8SF 0 "register_operand" "=x") | 11917 (set_attr "length_immediate" "1") |
11675 (vec_concat:V8SF | 11918 (set_attr "prefix" "vex") |
11676 (vec_concat:V4SF | 11919 (set_attr "mode" "V8SF")]) |
11677 (vec_concat:V2SF | |
11678 (match_operand:SF 1 "memory_operand" "m") | |
11679 (match_dup 1)) | |
11680 (vec_concat:V2SF | |
11681 (match_dup 1) | |
11682 (match_dup 1))) | |
11683 (vec_concat:V4SF | |
11684 (vec_concat:V2SF | |
11685 (match_dup 1) | |
11686 (match_dup 1)) | |
11687 (vec_concat:V2SF | |
11688 (match_dup 1) | |
11689 (match_dup 1)))))] | |
11690 "TARGET_AVX" | |
11691 "vbroadcastss\t{%1, %0|%0, %1}" | |
11692 [(set_attr "type" "ssemov") | |
11693 (set_attr "prefix" "vex") | |
11694 (set_attr "mode" "SF")]) | |
11695 | |
11696 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256" | |
11697 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x") | |
11698 (vec_concat:AVX256MODEF2P | |
11699 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m") | |
11700 (match_dup 1)))] | |
11701 "TARGET_AVX" | |
11702 "vbroadcastf128\t{%1, %0|%0, %1}" | |
11703 [(set_attr "type" "ssemov") | |
11704 (set_attr "prefix" "vex") | |
11705 (set_attr "mode" "V4SF")]) | |
11706 | 11920 |
11707 (define_expand "avx_vinsertf128<mode>" | 11921 (define_expand "avx_vinsertf128<mode>" |
11708 [(match_operand:AVX256MODE 0 "register_operand" "") | 11922 [(match_operand:AVX256MODE 0 "register_operand" "") |
11709 (match_operand:AVX256MODE 1 "register_operand" "") | 11923 (match_operand:AVX256MODE 1 "register_operand" "") |
11710 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "") | 11924 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "") |
11735 (match_operand:AVX256MODE4P 1 "register_operand" "x") | 11949 (match_operand:AVX256MODE4P 1 "register_operand" "x") |
11736 (parallel [(const_int 2) (const_int 3)]))))] | 11950 (parallel [(const_int 2) (const_int 3)]))))] |
11737 "TARGET_AVX" | 11951 "TARGET_AVX" |
11738 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" | 11952 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
11739 [(set_attr "type" "sselog") | 11953 [(set_attr "type" "sselog") |
11954 (set_attr "prefix_extra" "1") | |
11955 (set_attr "length_immediate" "1") | |
11740 (set_attr "prefix" "vex") | 11956 (set_attr "prefix" "vex") |
11741 (set_attr "mode" "V8SF")]) | 11957 (set_attr "mode" "V8SF")]) |
11742 | 11958 |
11743 (define_insn "vec_set_hi_<mode>" | 11959 (define_insn "vec_set_hi_<mode>" |
11744 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x") | 11960 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x") |
11748 (parallel [(const_int 0) (const_int 1)])) | 11964 (parallel [(const_int 0) (const_int 1)])) |
11749 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))] | 11965 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))] |
11750 "TARGET_AVX" | 11966 "TARGET_AVX" |
11751 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" | 11967 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
11752 [(set_attr "type" "sselog") | 11968 [(set_attr "type" "sselog") |
11969 (set_attr "prefix_extra" "1") | |
11970 (set_attr "length_immediate" "1") | |
11753 (set_attr "prefix" "vex") | 11971 (set_attr "prefix" "vex") |
11754 (set_attr "mode" "V8SF")]) | 11972 (set_attr "mode" "V8SF")]) |
11755 | 11973 |
11756 (define_insn "vec_set_lo_<mode>" | 11974 (define_insn "vec_set_lo_<mode>" |
11757 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") | 11975 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") |
11762 (parallel [(const_int 4) (const_int 5) | 11980 (parallel [(const_int 4) (const_int 5) |
11763 (const_int 6) (const_int 7)]))))] | 11981 (const_int 6) (const_int 7)]))))] |
11764 "TARGET_AVX" | 11982 "TARGET_AVX" |
11765 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" | 11983 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
11766 [(set_attr "type" "sselog") | 11984 [(set_attr "type" "sselog") |
11985 (set_attr "prefix_extra" "1") | |
11986 (set_attr "length_immediate" "1") | |
11767 (set_attr "prefix" "vex") | 11987 (set_attr "prefix" "vex") |
11768 (set_attr "mode" "V8SF")]) | 11988 (set_attr "mode" "V8SF")]) |
11769 | 11989 |
11770 (define_insn "vec_set_hi_<mode>" | 11990 (define_insn "vec_set_hi_<mode>" |
11771 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") | 11991 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") |
11776 (const_int 2) (const_int 3)])) | 11996 (const_int 2) (const_int 3)])) |
11777 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))] | 11997 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))] |
11778 "TARGET_AVX" | 11998 "TARGET_AVX" |
11779 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" | 11999 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
11780 [(set_attr "type" "sselog") | 12000 [(set_attr "type" "sselog") |
12001 (set_attr "prefix_extra" "1") | |
12002 (set_attr "length_immediate" "1") | |
11781 (set_attr "prefix" "vex") | 12003 (set_attr "prefix" "vex") |
11782 (set_attr "mode" "V8SF")]) | 12004 (set_attr "mode" "V8SF")]) |
11783 | 12005 |
11784 (define_insn "vec_set_lo_v16hi" | 12006 (define_insn "vec_set_lo_v16hi" |
11785 [(set (match_operand:V16HI 0 "register_operand" "=x") | 12007 [(set (match_operand:V16HI 0 "register_operand" "=x") |
11792 (const_int 12) (const_int 13) | 12014 (const_int 12) (const_int 13) |
11793 (const_int 14) (const_int 15)]))))] | 12015 (const_int 14) (const_int 15)]))))] |
11794 "TARGET_AVX" | 12016 "TARGET_AVX" |
11795 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" | 12017 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
11796 [(set_attr "type" "sselog") | 12018 [(set_attr "type" "sselog") |
12019 (set_attr "prefix_extra" "1") | |
12020 (set_attr "length_immediate" "1") | |
11797 (set_attr "prefix" "vex") | 12021 (set_attr "prefix" "vex") |
11798 (set_attr "mode" "V8SF")]) | 12022 (set_attr "mode" "V8SF")]) |
11799 | 12023 |
11800 (define_insn "vec_set_hi_v16hi" | 12024 (define_insn "vec_set_hi_v16hi" |
11801 [(set (match_operand:V16HI 0 "register_operand" "=x") | 12025 [(set (match_operand:V16HI 0 "register_operand" "=x") |
11808 (const_int 6) (const_int 7)])) | 12032 (const_int 6) (const_int 7)])) |
11809 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] | 12033 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] |
11810 "TARGET_AVX" | 12034 "TARGET_AVX" |
11811 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" | 12035 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
11812 [(set_attr "type" "sselog") | 12036 [(set_attr "type" "sselog") |
12037 (set_attr "prefix_extra" "1") | |
12038 (set_attr "length_immediate" "1") | |
11813 (set_attr "prefix" "vex") | 12039 (set_attr "prefix" "vex") |
11814 (set_attr "mode" "V8SF")]) | 12040 (set_attr "mode" "V8SF")]) |
11815 | 12041 |
11816 (define_insn "vec_set_lo_v32qi" | 12042 (define_insn "vec_set_lo_v32qi" |
11817 [(set (match_operand:V32QI 0 "register_operand" "=x") | 12043 [(set (match_operand:V32QI 0 "register_operand" "=x") |
11828 (const_int 28) (const_int 29) | 12054 (const_int 28) (const_int 29) |
11829 (const_int 30) (const_int 31)]))))] | 12055 (const_int 30) (const_int 31)]))))] |
11830 "TARGET_AVX" | 12056 "TARGET_AVX" |
11831 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" | 12057 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" |
11832 [(set_attr "type" "sselog") | 12058 [(set_attr "type" "sselog") |
12059 (set_attr "prefix_extra" "1") | |
12060 (set_attr "length_immediate" "1") | |
11833 (set_attr "prefix" "vex") | 12061 (set_attr "prefix" "vex") |
11834 (set_attr "mode" "V8SF")]) | 12062 (set_attr "mode" "V8SF")]) |
11835 | 12063 |
11836 (define_insn "vec_set_hi_v32qi" | 12064 (define_insn "vec_set_hi_v32qi" |
11837 [(set (match_operand:V32QI 0 "register_operand" "=x") | 12065 [(set (match_operand:V32QI 0 "register_operand" "=x") |
11848 (const_int 14) (const_int 15)])) | 12076 (const_int 14) (const_int 15)])) |
11849 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] | 12077 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] |
11850 "TARGET_AVX" | 12078 "TARGET_AVX" |
11851 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" | 12079 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" |
11852 [(set_attr "type" "sselog") | 12080 [(set_attr "type" "sselog") |
12081 (set_attr "prefix_extra" "1") | |
12082 (set_attr "length_immediate" "1") | |
11853 (set_attr "prefix" "vex") | 12083 (set_attr "prefix" "vex") |
11854 (set_attr "mode" "V8SF")]) | 12084 (set_attr "mode" "V8SF")]) |
11855 | 12085 |
11856 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>" | 12086 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>" |
11857 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") | 12087 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") |
11861 (match_dup 0)] | 12091 (match_dup 0)] |
11862 UNSPEC_MASKLOAD))] | 12092 UNSPEC_MASKLOAD))] |
11863 "TARGET_AVX" | 12093 "TARGET_AVX" |
11864 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}" | 12094 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}" |
11865 [(set_attr "type" "sselog1") | 12095 [(set_attr "type" "sselog1") |
12096 (set_attr "prefix_extra" "1") | |
11866 (set_attr "prefix" "vex") | 12097 (set_attr "prefix" "vex") |
11867 (set_attr "mode" "<MODE>")]) | 12098 (set_attr "mode" "<MODE>")]) |
11868 | 12099 |
11869 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>" | 12100 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>" |
11870 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") | 12101 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") |
11874 (match_dup 0)] | 12105 (match_dup 0)] |
11875 UNSPEC_MASKSTORE))] | 12106 UNSPEC_MASKSTORE))] |
11876 "TARGET_AVX" | 12107 "TARGET_AVX" |
11877 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" | 12108 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" |
11878 [(set_attr "type" "sselog1") | 12109 [(set_attr "type" "sselog1") |
12110 (set_attr "prefix_extra" "1") | |
11879 (set_attr "prefix" "vex") | 12111 (set_attr "prefix" "vex") |
11880 (set_attr "mode" "<MODE>")]) | 12112 (set_attr "mode" "<MODE>")]) |
11881 | 12113 |
11882 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>" | 12114 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>" |
11883 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x") | 12115 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x") |
11984 default: | 12216 default: |
11985 gcc_unreachable (); | 12217 gcc_unreachable (); |
11986 } | 12218 } |
11987 } | 12219 } |
11988 [(set_attr "type" "sselog,ssemov") | 12220 [(set_attr "type" "sselog,ssemov") |
12221 (set_attr "prefix_extra" "1,*") | |
12222 (set_attr "length_immediate" "1,*") | |
11989 (set_attr "prefix" "vex") | 12223 (set_attr "prefix" "vex") |
11990 (set_attr "mode" "<avxvecmode>")]) | 12224 (set_attr "mode" "<avxvecmode>")]) |