comparison gcc/config/i386/sse.md @ 55:77e2b8dfacca gcc-4.4.5

update it from 4.4.3 to 4.5.0
author ryoma <e075725@ie.u-ryukyu.ac.jp>
date Fri, 12 Feb 2010 23:39:51 +0900
parents 3bfb6c00c1e0
children b7f97abdc517
comparison
equal deleted inserted replaced
52:c156f1bd5cd9 55:77e2b8dfacca
17 ;; You should have received a copy of the GNU General Public License 17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see 18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>. 19 ;; <http://www.gnu.org/licenses/>.
20 20
21 21
22 ;; 16 byte integral modes handled by SSE, minus TImode, which gets 22 ;; 16 byte integral modes handled by SSE
23 ;; special-cased for TARGET_64BIT.
24 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI]) 23 (define_mode_iterator SSEMODEI [V16QI V8HI V4SI V2DI])
25 24
26 ;; All 16-byte vector modes handled by SSE 25 ;; All 16-byte vector modes handled by SSE
27 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF]) 26 (define_mode_iterator SSEMODE [V16QI V8HI V4SI V2DI V4SF V2DF])
27 (define_mode_iterator SSEMODE16 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF])
28 28
29 ;; 32 byte integral vector modes handled by AVX 29 ;; 32 byte integral vector modes handled by AVX
30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI]) 30 (define_mode_iterator AVX256MODEI [V32QI V16HI V8SI V4DI])
31 31
32 ;; All 32-byte vector modes handled by AVX 32 ;; All 32-byte vector modes handled by AVX
37 37
38 ;; All DI vector modes handled by AVX 38 ;; All DI vector modes handled by AVX
39 (define_mode_iterator AVXMODEDI [V4DI V2DI]) 39 (define_mode_iterator AVXMODEDI [V4DI V2DI])
40 40
41 ;; All vector modes handled by AVX 41 ;; All vector modes handled by AVX
42 (define_mode_iterator AVXMODE [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF]) 42 (define_mode_iterator AVXMODE
43 [V16QI V8HI V4SI V2DI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
44 (define_mode_iterator AVXMODE16
45 [V16QI V8HI V4SI V2DI V1TI V4SF V2DF V32QI V16HI V8SI V4DI V8SF V4DF])
43 46
44 ;; Mix-n-match 47 ;; Mix-n-match
45 (define_mode_iterator SSEMODE12 [V16QI V8HI]) 48 (define_mode_iterator SSEMODE12 [V16QI V8HI])
46 (define_mode_iterator SSEMODE24 [V8HI V4SI]) 49 (define_mode_iterator SSEMODE24 [V8HI V4SI])
47 (define_mode_iterator SSEMODE14 [V16QI V4SI]) 50 (define_mode_iterator SSEMODE14 [V16QI V4SI])
48 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI]) 51 (define_mode_iterator SSEMODE124 [V16QI V8HI V4SI])
49 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI]) 52 (define_mode_iterator SSEMODE248 [V8HI V4SI V2DI])
50 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI]) 53 (define_mode_iterator SSEMODE1248 [V16QI V8HI V4SI V2DI])
51 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF]) 54 (define_mode_iterator SSEMODEF4 [SF DF V4SF V2DF])
55 (define_mode_iterator FMA4MODEF4 [V8SF V4DF])
52 (define_mode_iterator SSEMODEF2P [V4SF V2DF]) 56 (define_mode_iterator SSEMODEF2P [V4SF V2DF])
53 57
54 (define_mode_iterator AVX256MODEF2P [V8SF V4DF]) 58 (define_mode_iterator AVX256MODEF2P [V8SF V4DF])
55 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF]) 59 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
60 (define_mode_iterator AVX256MODE24P [V8SI V8SF V4DI V4DF])
56 (define_mode_iterator AVX256MODE4P [V4DI V4DF]) 61 (define_mode_iterator AVX256MODE4P [V4DI V4DF])
57 (define_mode_iterator AVX256MODE8P [V8SI V8SF]) 62 (define_mode_iterator AVX256MODE8P [V8SI V8SF])
58 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF]) 63 (define_mode_iterator AVXMODEF2P [V4SF V2DF V8SF V4DF])
59 (define_mode_iterator AVXMODEF4P [V4SF V4DF]) 64 (define_mode_iterator AVXMODEF4P [V4SF V4DF])
65 (define_mode_iterator AVXMODEFDP [V2DF V4DF])
66 (define_mode_iterator AVXMODEFSP [V4SF V8SF])
60 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF]) 67 (define_mode_iterator AVXMODEDCVTDQ2PS [V4SF V8SF])
61 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI]) 68 (define_mode_iterator AVXMODEDCVTPS2DQ [V4SI V8SI])
62 69
63 ;; Int-float size matches 70 ;; Int-float size matches
64 (define_mode_iterator SSEMODE4S [V4SF V4SI]) 71 (define_mode_iterator SSEMODE4S [V4SF V4SI])
65 (define_mode_iterator SSEMODE2D [V2DF V2DI]) 72 (define_mode_iterator SSEMODE2D [V2DF V2DI])
66 73
67 ;; Modes handled by integer vcond pattern 74 ;; Modes handled by integer vcond pattern
68 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI 75 (define_mode_iterator SSEMODE124C8 [V16QI V8HI V4SI
69 (V2DI "TARGET_SSE4_2 || TARGET_SSE5")]) 76 (V2DI "TARGET_SSE4_2")])
77
78 ;; Modes handled by vec_extract_even/odd pattern.
79 (define_mode_iterator SSEMODE_EO
80 [(V4SF "TARGET_SSE")
81 (V2DF "TARGET_SSE2")
82 (V2DI "TARGET_SSE2") (V4SI "TARGET_SSE2")
83 (V8HI "TARGET_SSE2") (V16QI "TARGET_SSE2")
84 (V4DF "TARGET_AVX") (V8SF "TARGET_AVX")])
70 85
71 ;; Mapping from float mode to required SSE level 86 ;; Mapping from float mode to required SSE level
72 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")]) 87 (define_mode_attr sse [(SF "sse") (DF "sse2") (V4SF "sse") (V2DF "sse2")])
73 88
74 ;; Mapping from integer vector mode to mnemonic suffix 89 ;; Mapping from integer vector mode to mnemonic suffix
75 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")]) 90 (define_mode_attr ssevecsize [(V16QI "b") (V8HI "w") (V4SI "d") (V2DI "q")])
76 91
77 ;; Mapping of the sse5 suffix 92 ;; Mapping of the fma4 suffix
93 (define_mode_attr fma4modesuffixf4 [(V8SF "ps") (V4DF "pd")])
94 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd")
95 (V4SF "ss") (V2DF "sd")])
96
97 ;; Mapping of the avx suffix
78 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd") 98 (define_mode_attr ssemodesuffixf4 [(SF "ss") (DF "sd")
79 (V4SF "ps") (V2DF "pd")]) 99 (V4SF "ps") (V2DF "pd")])
80 (define_mode_attr ssemodesuffixf2s [(SF "ss") (DF "sd") 100
81 (V4SF "ss") (V2DF "sd")])
82 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")]) 101 (define_mode_attr ssemodesuffixf2c [(V4SF "s") (V2DF "d")])
83 102
84 ;; Mapping of the max integer size for sse5 rotate immediate constraint 103 (define_mode_attr ssescalarmodesuffix2s [(V4SF "ss") (V4SI "d")])
104
105 ;; Mapping of the max integer size for xop rotate immediate constraint
85 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")]) 106 (define_mode_attr sserotatemax [(V16QI "7") (V8HI "15") (V4SI "31") (V2DI "63")])
86 107
87 ;; Mapping of vector modes back to the scalar modes 108 ;; Mapping of vector modes back to the scalar modes
88 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF") 109 (define_mode_attr ssescalarmode [(V4SF "SF") (V2DF "DF")
89 (V16QI "QI") (V8HI "HI") 110 (V16QI "QI") (V8HI "HI")
90 (V4SI "SI") (V2DI "DI")]) 111 (V4SI "SI") (V2DI "DI")])
91 112
92 ;; Mapping of vector modes to a vector mode of double size 113 ;; Mapping of vector modes to a vector mode of double size
93 (define_mode_attr ssedoublesizemode [(V2DF "V4DF") (V2DI "V4DI") 114 (define_mode_attr ssedoublesizemode
94 (V4SF "V8SF") (V4SI "V8SI")]) 115 [(V2DF "V4DF") (V2DI "V4DI") (V4SF "V8SF") (V4SI "V8SI")
116 (V8HI "V16HI") (V16QI "V32QI")
117 (V4DF "V8DF") (V8SF "V16SF")
118 (V4DI "V8DI") (V8SI "V16SI") (V16HI "V32HI") (V32QI "V64QI")])
95 119
96 ;; Number of scalar elements in each vector type 120 ;; Number of scalar elements in each vector type
97 (define_mode_attr ssescalarnum [(V4SF "4") (V2DF "2") 121 (define_mode_attr ssescalarnum
98 (V16QI "16") (V8HI "8") 122 [(V4SF "4") (V2DF "2") (V16QI "16") (V8HI "8") (V4SI "4") (V2DI "2")
99 (V4SI "4") (V2DI "2")]) 123 (V8SF "8") (V4DF "4") (V32QI "32") (V16HI "16") (V8SI "8") (V4DI "4")])
100 124
101 ;; Mapping for AVX 125 ;; Mapping for AVX
102 (define_mode_attr avxvecmode 126 (define_mode_attr avxvecmode
103 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V4SF "V4SF") 127 [(V16QI "TI") (V8HI "TI") (V4SI "TI") (V2DI "TI") (V1TI "TI")
104 (V2DF "V2DF") (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI") 128 (V4SF "V4SF") (V8SF "V8SF") (V2DF "V2DF") (V4DF "V4DF")
105 (V8SF "V8SF") (V4DF "V4DF")]) 129 (V32QI "OI") (V16HI "OI") (V8SI "OI") (V4DI "OI")])
106 (define_mode_attr avxvecpsmode 130 (define_mode_attr avxvecpsmode
107 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF") 131 [(V16QI "V4SF") (V8HI "V4SF") (V4SI "V4SF") (V2DI "V4SF")
108 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")]) 132 (V32QI "V8SF") (V16HI "V8SF") (V8SI "V8SF") (V4DI "V8SF")])
109 (define_mode_attr avxhalfvecmode 133 (define_mode_attr avxhalfvecmode
110 [(V4SF "V2SF") (V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") 134 [(V32QI "V16QI") (V16HI "V8HI") (V8SI "V4SI") (V4DI "V2DI")
111 (V4DI "V2DI") (V8SF "V4SF") (V4DF "V2DF")]) 135 (V8SF "V4SF") (V4DF "V2DF")
136 (V16QI "V8QI") (V8HI "V4HI") (V4SI "V2SI") (V4SF "V2SF")])
112 (define_mode_attr avxscalarmode 137 (define_mode_attr avxscalarmode
113 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V4SF "SF") (V2DF "DF") 138 [(V16QI "QI") (V8HI "HI") (V4SI "SI") (V2DI "DI") (V4SF "SF") (V2DF "DF")
114 (V8SF "SF") (V4DF "DF")]) 139 (V32QI "QI") (V16HI "HI") (V8SI "SI") (V4DI "DI") (V8SF "SF") (V4DF "DF")])
115 (define_mode_attr avxcvtvecmode 140 (define_mode_attr avxcvtvecmode
116 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")]) 141 [(V4SF "V4SI") (V8SF "V8SI") (V4SI "V4SF") (V8SI "V8SF")])
117 (define_mode_attr avxpermvecmode 142 (define_mode_attr avxpermvecmode
118 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")]) 143 [(V2DF "V2DI") (V4SF "V4SI") (V4DF "V4DI") (V8SF "V8SI")])
119 (define_mode_attr avxmodesuffixf2c 144 (define_mode_attr avxmodesuffixf2c
120 [(V4SF "s") (V2DF "d") (V8SF "s") (V4DF "d")]) 145 [(V4SF "s") (V2DF "d") (V8SI "s") (V8SF "s") (V4DI "d") (V4DF "d")])
121 (define_mode_attr avxmodesuffixp 146 (define_mode_attr avxmodesuffixp
122 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si") 147 [(V2DF "pd") (V4SI "si") (V4SF "ps") (V8SF "ps") (V8SI "si")
123 (V4DF "pd")]) 148 (V4DF "pd")])
124 (define_mode_attr avxmodesuffixs
125 [(V16QI "b") (V8HI "w") (V4SI "d")])
126 (define_mode_attr avxmodesuffix 149 (define_mode_attr avxmodesuffix
127 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "") 150 [(V16QI "") (V32QI "256") (V4SI "") (V4SF "") (V2DF "")
128 (V8SI "256") (V8SF "256") (V4DF "256")]) 151 (V8SI "256") (V8SF "256") (V4DF "256")])
129 152
130 ;; Mapping of immediate bits for blend instructions 153 ;; Mapping of immediate bits for blend instructions
131 (define_mode_attr blendbits 154 (define_mode_attr blendbits
132 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")]) 155 [(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
133
134 ;; Mapping of immediate bits for vpermil instructions
135 (define_mode_attr vpermilbits
136 [(V8SF "255") (V4SF "255") (V4DF "15") (V2DF "3")])
137 156
138 ;; Mapping of immediate bits for pinsr instructions 157 ;; Mapping of immediate bits for pinsr instructions
139 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")]) 158 (define_mode_attr pinsrbits [(V16QI "32768") (V8HI "128") (V4SI "8")])
140 159
141 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics. 160 ;; Patterns whose name begins with "sse{,2,3}_" are invoked by intrinsics.
154 ix86_expand_vector_move (<MODE>mode, operands); 173 ix86_expand_vector_move (<MODE>mode, operands);
155 DONE; 174 DONE;
156 }) 175 })
157 176
158 (define_insn "*avx_mov<mode>_internal" 177 (define_insn "*avx_mov<mode>_internal"
159 [(set (match_operand:AVXMODE 0 "nonimmediate_operand" "=x,x ,m") 178 [(set (match_operand:AVXMODE16 0 "nonimmediate_operand" "=x,x ,m")
160 (match_operand:AVXMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 179 (match_operand:AVXMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
161 "TARGET_AVX 180 "TARGET_AVX
162 && (register_operand (operands[0], <MODE>mode) 181 && (register_operand (operands[0], <MODE>mode)
163 || register_operand (operands[1], <MODE>mode))" 182 || register_operand (operands[1], <MODE>mode))"
164 { 183 {
165 switch (which_alternative) 184 switch (which_alternative)
189 208
190 ;; All of these patterns are enabled for SSE1 as well as SSE2. 209 ;; All of these patterns are enabled for SSE1 as well as SSE2.
191 ;; This is essential for maintaining stable calling conventions. 210 ;; This is essential for maintaining stable calling conventions.
192 211
193 (define_expand "mov<mode>" 212 (define_expand "mov<mode>"
194 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") 213 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
195 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] 214 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
196 "TARGET_SSE" 215 "TARGET_SSE"
197 { 216 {
198 ix86_expand_vector_move (<MODE>mode, operands); 217 ix86_expand_vector_move (<MODE>mode, operands);
199 DONE; 218 DONE;
200 }) 219 })
201 220
202 (define_insn "*mov<mode>_internal" 221 (define_insn "*mov<mode>_internal"
203 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "=x,x ,m") 222 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "=x,x ,m")
204 (match_operand:SSEMODE 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))] 223 (match_operand:SSEMODE16 1 "nonimmediate_or_sse_const_operand" "C ,xm,x"))]
205 "TARGET_SSE 224 "TARGET_SSE
206 && (register_operand (operands[0], <MODE>mode) 225 && (register_operand (operands[0], <MODE>mode)
207 || register_operand (operands[1], <MODE>mode))" 226 || register_operand (operands[1], <MODE>mode))"
208 { 227 {
209 switch (which_alternative) 228 switch (which_alternative)
265 Assemble the 64-bit DImode value in an xmm register. */ 284 Assemble the 64-bit DImode value in an xmm register. */
266 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode), 285 emit_insn (gen_sse2_loadld (operands[0], CONST0_RTX (V4SImode),
267 gen_rtx_SUBREG (SImode, operands[1], 0))); 286 gen_rtx_SUBREG (SImode, operands[1], 0)));
268 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode), 287 emit_insn (gen_sse2_loadld (operands[2], CONST0_RTX (V4SImode),
269 gen_rtx_SUBREG (SImode, operands[1], 4))); 288 gen_rtx_SUBREG (SImode, operands[1], 4)));
270 emit_insn (gen_sse2_punpckldq (operands[0], operands[0], operands[2])); 289 emit_insn (gen_vec_interleave_lowv4si (operands[0], operands[0],
290 operands[2]));
271 } 291 }
272 else if (memory_operand (operands[1], DImode)) 292 else if (memory_operand (operands[1], DImode))
273 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]), operands[1], const0_rtx)); 293 emit_insn (gen_vec_concatv2di (gen_lowpart (V2DImode, operands[0]),
294 operands[1], const0_rtx));
274 else 295 else
275 gcc_unreachable (); 296 gcc_unreachable ();
276 }) 297 })
277 298
278 (define_split 299 (define_split
279 [(set (match_operand:V4SF 0 "register_operand" "") 300 [(set (match_operand:V4SF 0 "register_operand" "")
280 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))] 301 (match_operand:V4SF 1 "zero_extended_scalar_load_operand" ""))]
306 ix86_expand_push (<MODE>mode, operands[0]); 327 ix86_expand_push (<MODE>mode, operands[0]);
307 DONE; 328 DONE;
308 }) 329 })
309 330
310 (define_expand "push<mode>1" 331 (define_expand "push<mode>1"
311 [(match_operand:SSEMODE 0 "register_operand" "")] 332 [(match_operand:SSEMODE16 0 "register_operand" "")]
312 "TARGET_SSE" 333 "TARGET_SSE"
313 { 334 {
314 ix86_expand_push (<MODE>mode, operands[0]); 335 ix86_expand_push (<MODE>mode, operands[0]);
315 DONE; 336 DONE;
316 }) 337 })
323 ix86_expand_vector_move_misalign (<MODE>mode, operands); 344 ix86_expand_vector_move_misalign (<MODE>mode, operands);
324 DONE; 345 DONE;
325 }) 346 })
326 347
327 (define_expand "movmisalign<mode>" 348 (define_expand "movmisalign<mode>"
328 [(set (match_operand:SSEMODE 0 "nonimmediate_operand" "") 349 [(set (match_operand:SSEMODE16 0 "nonimmediate_operand" "")
329 (match_operand:SSEMODE 1 "nonimmediate_operand" ""))] 350 (match_operand:SSEMODE16 1 "nonimmediate_operand" ""))]
330 "TARGET_SSE" 351 "TARGET_SSE"
331 { 352 {
332 ix86_expand_vector_move_misalign (<MODE>mode, operands); 353 ix86_expand_vector_move_misalign (<MODE>mode, operands);
333 DONE; 354 DONE;
334 }) 355 })
340 UNSPEC_MOVU))] 361 UNSPEC_MOVU))]
341 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) 362 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
342 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 363 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
343 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}" 364 "vmovup<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
344 [(set_attr "type" "ssemov") 365 [(set_attr "type" "ssemov")
366 (set_attr "movu" "1")
345 (set_attr "prefix" "vex") 367 (set_attr "prefix" "vex")
346 (set_attr "mode" "<MODE>")]) 368 (set_attr "mode" "<MODE>")])
347 369
348 (define_insn "sse2_movq128" 370 (define_insn "sse2_movq128"
349 [(set (match_operand:V2DI 0 "register_operand" "=x") 371 [(set (match_operand:V2DI 0 "register_operand" "=x")
365 UNSPEC_MOVU))] 387 UNSPEC_MOVU))]
366 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) 388 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
367 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 389 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
368 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}" 390 "movup<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
369 [(set_attr "type" "ssemov") 391 [(set_attr "type" "ssemov")
392 (set_attr "movu" "1")
370 (set_attr "mode" "<MODE>")]) 393 (set_attr "mode" "<MODE>")])
371 394
372 (define_insn "avx_movdqu<avxmodesuffix>" 395 (define_insn "avx_movdqu<avxmodesuffix>"
373 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m") 396 [(set (match_operand:AVXMODEQI 0 "nonimmediate_operand" "=x,m")
374 (unspec:AVXMODEQI 397 (unspec:AVXMODEQI
375 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")] 398 [(match_operand:AVXMODEQI 1 "nonimmediate_operand" "xm,x")]
376 UNSPEC_MOVU))] 399 UNSPEC_MOVU))]
377 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 400 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
378 "vmovdqu\t{%1, %0|%0, %1}" 401 "vmovdqu\t{%1, %0|%0, %1}"
379 [(set_attr "type" "ssemov") 402 [(set_attr "type" "ssemov")
403 (set_attr "movu" "1")
380 (set_attr "prefix" "vex") 404 (set_attr "prefix" "vex")
381 (set_attr "mode" "<avxvecmode>")]) 405 (set_attr "mode" "<avxvecmode>")])
382 406
383 (define_insn "sse2_movdqu" 407 (define_insn "sse2_movdqu"
384 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m") 408 [(set (match_operand:V16QI 0 "nonimmediate_operand" "=x,m")
385 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")] 409 (unspec:V16QI [(match_operand:V16QI 1 "nonimmediate_operand" "xm,x")]
386 UNSPEC_MOVU))] 410 UNSPEC_MOVU))]
387 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 411 "TARGET_SSE2 && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
388 "movdqu\t{%1, %0|%0, %1}" 412 "movdqu\t{%1, %0|%0, %1}"
389 [(set_attr "type" "ssemov") 413 [(set_attr "type" "ssemov")
414 (set_attr "movu" "1")
390 (set_attr "prefix_data16" "1") 415 (set_attr "prefix_data16" "1")
391 (set_attr "mode" "TI")]) 416 (set_attr "mode" "TI")])
392 417
393 (define_insn "avx_movnt<mode>" 418 (define_insn "avx_movnt<mode>"
394 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") 419 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
426 [(set (match_operand:V2DI 0 "memory_operand" "=m") 451 [(set (match_operand:V2DI 0 "memory_operand" "=m")
427 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")] 452 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")]
428 UNSPEC_MOVNT))] 453 UNSPEC_MOVNT))]
429 "TARGET_SSE2" 454 "TARGET_SSE2"
430 "movntdq\t{%1, %0|%0, %1}" 455 "movntdq\t{%1, %0|%0, %1}"
431 [(set_attr "type" "ssecvt") 456 [(set_attr "type" "ssemov")
432 (set_attr "prefix_data16" "1") 457 (set_attr "prefix_data16" "1")
433 (set_attr "mode" "TI")]) 458 (set_attr "mode" "TI")])
434 459
435 (define_insn "sse2_movntsi" 460 (define_insn "sse2_movntsi"
436 [(set (match_operand:SI 0 "memory_operand" "=m") 461 [(set (match_operand:SI 0 "memory_operand" "=m")
437 (unspec:SI [(match_operand:SI 1 "register_operand" "r")] 462 (unspec:SI [(match_operand:SI 1 "register_operand" "r")]
438 UNSPEC_MOVNT))] 463 UNSPEC_MOVNT))]
439 "TARGET_SSE2" 464 "TARGET_SSE2"
440 "movnti\t{%1, %0|%0, %1}" 465 "movnti\t{%1, %0|%0, %1}"
441 [(set_attr "type" "ssecvt") 466 [(set_attr "type" "ssemov")
467 (set_attr "prefix_data16" "0")
442 (set_attr "mode" "V2DF")]) 468 (set_attr "mode" "V2DF")])
443 469
444 (define_insn "avx_lddqu<avxmodesuffix>" 470 (define_insn "avx_lddqu<avxmodesuffix>"
445 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x") 471 [(set (match_operand:AVXMODEQI 0 "register_operand" "=x")
446 (unspec:AVXMODEQI 472 (unspec:AVXMODEQI
447 [(match_operand:AVXMODEQI 1 "memory_operand" "m")] 473 [(match_operand:AVXMODEQI 1 "memory_operand" "m")]
448 UNSPEC_LDDQU))] 474 UNSPEC_LDDQU))]
449 "TARGET_AVX" 475 "TARGET_AVX"
450 "vlddqu\t{%1, %0|%0, %1}" 476 "vlddqu\t{%1, %0|%0, %1}"
451 [(set_attr "type" "ssecvt") 477 [(set_attr "type" "ssecvt")
478 (set_attr "movu" "1")
452 (set_attr "prefix" "vex") 479 (set_attr "prefix" "vex")
453 (set_attr "mode" "<avxvecmode>")]) 480 (set_attr "mode" "<avxvecmode>")])
454 481
455 (define_insn "sse3_lddqu" 482 (define_insn "sse3_lddqu"
456 [(set (match_operand:V16QI 0 "register_operand" "=x") 483 [(set (match_operand:V16QI 0 "register_operand" "=x")
457 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")] 484 (unspec:V16QI [(match_operand:V16QI 1 "memory_operand" "m")]
458 UNSPEC_LDDQU))] 485 UNSPEC_LDDQU))]
459 "TARGET_SSE3" 486 "TARGET_SSE3"
460 "lddqu\t{%1, %0|%0, %1}" 487 "lddqu\t{%1, %0|%0, %1}"
461 [(set_attr "type" "ssecvt") 488 [(set_attr "type" "ssemov")
489 (set_attr "movu" "1")
490 (set_attr "prefix_data16" "0")
462 (set_attr "prefix_rep" "1") 491 (set_attr "prefix_rep" "1")
463 (set_attr "mode" "TI")]) 492 (set_attr "mode" "TI")])
464 493
465 ; Expand patterns for non-temporal stores. At the moment, only those 494 ; Expand patterns for non-temporal stores. At the moment, only those
466 ; that directly map to insns are defined; it would be possible to 495 ; that directly map to insns are defined; it would be possible to
647 (match_operand:V8SF 2 "nonimmediate_operand" "")))] 676 (match_operand:V8SF 2 "nonimmediate_operand" "")))]
648 "TARGET_AVX" 677 "TARGET_AVX"
649 { 678 {
650 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands); 679 ix86_fixup_binary_operands_no_copy (DIV, V8SFmode, operands);
651 680
652 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size 681 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
653 && flag_finite_math_only && !flag_trapping_math 682 && flag_finite_math_only && !flag_trapping_math
654 && flag_unsafe_math_optimizations) 683 && flag_unsafe_math_optimizations)
655 { 684 {
656 ix86_emit_swdivsf (operands[0], operands[1], 685 ix86_emit_swdivsf (operands[0], operands[1],
657 operands[2], V8SFmode); 686 operands[2], V8SFmode);
763 (unspec:V4SF 792 (unspec:V4SF
764 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))] 793 [(match_operand:V4SF 1 "nonimmediate_operand" "xm")] UNSPEC_RCP))]
765 "TARGET_SSE" 794 "TARGET_SSE"
766 "%vrcpps\t{%1, %0|%0, %1}" 795 "%vrcpps\t{%1, %0|%0, %1}"
767 [(set_attr "type" "sse") 796 [(set_attr "type" "sse")
797 (set_attr "atom_sse_attr" "rcp")
768 (set_attr "prefix" "maybe_vex") 798 (set_attr "prefix" "maybe_vex")
769 (set_attr "mode" "V4SF")]) 799 (set_attr "mode" "V4SF")])
770 800
771 (define_insn "*avx_vmrcpv4sf2" 801 (define_insn "*avx_vmrcpv4sf2"
772 [(set (match_operand:V4SF 0 "register_operand" "=x") 802 [(set (match_operand:V4SF 0 "register_operand" "=x")
789 (match_operand:V4SF 2 "register_operand" "0") 819 (match_operand:V4SF 2 "register_operand" "0")
790 (const_int 1)))] 820 (const_int 1)))]
791 "TARGET_SSE" 821 "TARGET_SSE"
792 "rcpss\t{%1, %0|%0, %1}" 822 "rcpss\t{%1, %0|%0, %1}"
793 [(set_attr "type" "sse") 823 [(set_attr "type" "sse")
824 (set_attr "atom_sse_attr" "rcp")
794 (set_attr "mode" "SF")]) 825 (set_attr "mode" "SF")])
795 826
796 (define_expand "sqrtv8sf2" 827 (define_expand "sqrtv8sf2"
797 [(set (match_operand:V8SF 0 "register_operand" "") 828 [(set (match_operand:V8SF 0 "register_operand" "")
798 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))] 829 (sqrt:V8SF (match_operand:V8SF 1 "nonimmediate_operand" "")))]
799 "TARGET_AVX" 830 "TARGET_AVX"
800 { 831 {
801 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size 832 if (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
802 && flag_finite_math_only && !flag_trapping_math 833 && flag_finite_math_only && !flag_trapping_math
803 && flag_unsafe_math_optimizations) 834 && flag_unsafe_math_optimizations)
804 { 835 {
805 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0); 836 ix86_emit_swsqrtsf (operands[0], operands[1], V8SFmode, 0);
806 DONE; 837 DONE;
834 [(set (match_operand:V4SF 0 "register_operand" "=x") 865 [(set (match_operand:V4SF 0 "register_operand" "=x")
835 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 866 (sqrt:V4SF (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
836 "TARGET_SSE" 867 "TARGET_SSE"
837 "%vsqrtps\t{%1, %0|%0, %1}" 868 "%vsqrtps\t{%1, %0|%0, %1}"
838 [(set_attr "type" "sse") 869 [(set_attr "type" "sse")
870 (set_attr "atom_sse_attr" "sqrt")
839 (set_attr "prefix" "maybe_vex") 871 (set_attr "prefix" "maybe_vex")
840 (set_attr "mode" "V4SF")]) 872 (set_attr "mode" "V4SF")])
841 873
842 (define_insn "sqrtv4df2" 874 (define_insn "sqrtv4df2"
843 [(set (match_operand:V4DF 0 "register_operand" "=x") 875 [(set (match_operand:V4DF 0 "register_operand" "=x")
878 (match_operand:SSEMODEF2P 2 "register_operand" "0") 910 (match_operand:SSEMODEF2P 2 "register_operand" "0")
879 (const_int 1)))] 911 (const_int 1)))]
880 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" 912 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
881 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}" 913 "sqrts<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
882 [(set_attr "type" "sse") 914 [(set_attr "type" "sse")
915 (set_attr "atom_sse_attr" "sqrt")
883 (set_attr "mode" "<ssescalarmode>")]) 916 (set_attr "mode" "<ssescalarmode>")])
884 917
885 (define_expand "rsqrtv8sf2" 918 (define_expand "rsqrtv8sf2"
886 [(set (match_operand:V8SF 0 "register_operand" "") 919 [(set (match_operand:V8SF 0 "register_operand" "")
887 (unspec:V8SF 920 (unspec:V8SF
1041 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")) 1074 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm"))
1042 (match_dup 1) 1075 (match_dup 1)
1043 (const_int 1)))] 1076 (const_int 1)))]
1044 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" 1077 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1045 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" 1078 "<maxminfprefix>s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1046 [(set_attr "type" "sse") 1079 [(set_attr "type" "sseadd")
1047 (set_attr "mode" "<ssescalarmode>")]) 1080 (set_attr "mode" "<ssescalarmode>")])
1048 1081
1049 ;; These versions of the min/max patterns implement exactly the operations 1082 ;; These versions of the min/max patterns implement exactly the operations
1050 ;; min = (op1 < op2 ? op1 : op2) 1083 ;; min = (op1 < op2 ? op1 : op2)
1051 ;; max = (!(op1 < op2) ? op1 : op2) 1084 ;; max = (!(op1 < op2) ? op1 : op2)
1177 (minus:V2DF (match_dup 1) (match_dup 2)) 1210 (minus:V2DF (match_dup 1) (match_dup 2))
1178 (const_int 2)))] 1211 (const_int 2)))]
1179 "TARGET_SSE3" 1212 "TARGET_SSE3"
1180 "addsubpd\t{%2, %0|%0, %2}" 1213 "addsubpd\t{%2, %0|%0, %2}"
1181 [(set_attr "type" "sseadd") 1214 [(set_attr "type" "sseadd")
1215 (set_attr "atom_unit" "complex")
1182 (set_attr "mode" "V2DF")]) 1216 (set_attr "mode" "V2DF")])
1183 1217
1184 (define_insn "avx_h<plusminus_insn>v4df3" 1218 (define_insn "avx_h<plusminus_insn>v4df3"
1185 [(set (match_operand:V4DF 0 "register_operand" "=x") 1219 [(set (match_operand:V4DF 0 "register_operand" "=x")
1186 (vec_concat:V4DF 1220 (vec_concat:V4DF
1300 (vec_select:SF (match_dup 2) (parallel [(const_int 2)])) 1334 (vec_select:SF (match_dup 2) (parallel [(const_int 2)]))
1301 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))] 1335 (vec_select:SF (match_dup 2) (parallel [(const_int 3)]))))))]
1302 "TARGET_SSE3" 1336 "TARGET_SSE3"
1303 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}" 1337 "h<plusminus_mnemonic>ps\t{%2, %0|%0, %2}"
1304 [(set_attr "type" "sseadd") 1338 [(set_attr "type" "sseadd")
1339 (set_attr "atom_unit" "complex")
1305 (set_attr "prefix_rep" "1") 1340 (set_attr "prefix_rep" "1")
1306 (set_attr "mode" "V4SF")]) 1341 (set_attr "mode" "V4SF")])
1307 1342
1308 (define_insn "*avx_h<plusminus_insn>v2df3" 1343 (define_insn "*avx_h<plusminus_insn>v2df3"
1309 [(set (match_operand:V2DF 0 "register_operand" "=x") 1344 [(set (match_operand:V2DF 0 "register_operand" "=x")
1399 (match_operand:SI 3 "const_0_to_31_operand" "n")] 1434 (match_operand:SI 3 "const_0_to_31_operand" "n")]
1400 UNSPEC_PCMP))] 1435 UNSPEC_PCMP))]
1401 "TARGET_AVX" 1436 "TARGET_AVX"
1402 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1437 "vcmpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1403 [(set_attr "type" "ssecmp") 1438 [(set_attr "type" "ssecmp")
1439 (set_attr "length_immediate" "1")
1404 (set_attr "prefix" "vex") 1440 (set_attr "prefix" "vex")
1405 (set_attr "mode" "<MODE>")]) 1441 (set_attr "mode" "<MODE>")])
1406 1442
1407 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3" 1443 (define_insn "avx_cmps<ssemodesuffixf2c><mode>3"
1408 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") 1444 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1415 (match_dup 1) 1451 (match_dup 1)
1416 (const_int 1)))] 1452 (const_int 1)))]
1417 "TARGET_AVX" 1453 "TARGET_AVX"
1418 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 1454 "vcmps<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1419 [(set_attr "type" "ssecmp") 1455 [(set_attr "type" "ssecmp")
1456 (set_attr "length_immediate" "1")
1420 (set_attr "prefix" "vex") 1457 (set_attr "prefix" "vex")
1421 (set_attr "mode" "<ssescalarmode>")]) 1458 (set_attr "mode" "<ssescalarmode>")])
1422 1459
1423 ;; We don't promote 128bit vector compare intrinsics. But vectorizer 1460 ;; We don't promote 128bit vector compare intrinsics. But vectorizer
1424 ;; may generate 256bit vector compare instructions. 1461 ;; may generate 256bit vector compare instructions.
1429 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))] 1466 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")]))]
1430 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)" 1467 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)"
1431 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" 1468 "vcmp%D3p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1432 [(set_attr "type" "ssecmp") 1469 [(set_attr "type" "ssecmp")
1433 (set_attr "prefix" "vex") 1470 (set_attr "prefix" "vex")
1471 (set_attr "length_immediate" "1")
1434 (set_attr "mode" "<avxvecmode>")]) 1472 (set_attr "mode" "<avxvecmode>")])
1435 1473
1436 (define_insn "<sse>_maskcmp<mode>3" 1474 (define_insn "<sse>_maskcmp<mode>3"
1437 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x") 1475 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x")
1438 (match_operator:SSEMODEF4 3 "sse_comparison_operator" 1476 (match_operator:SSEMODEF4 3 "sse_comparison_operator"
1439 [(match_operand:SSEMODEF4 1 "register_operand" "0") 1477 [(match_operand:SSEMODEF4 1 "register_operand" "0")
1440 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))] 1478 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "xm")]))]
1441 "(SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode)) 1479 "!TARGET_XOP
1442 && !TARGET_SSE5" 1480 && (SSE_FLOAT_MODE_P (<MODE>mode) || SSE_VEC_FLOAT_MODE_P (<MODE>mode))"
1443 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}" 1481 "cmp%D3<ssemodesuffixf4>\t{%2, %0|%0, %2}"
1444 [(set_attr "type" "ssecmp") 1482 [(set_attr "type" "ssecmp")
1483 (set_attr "length_immediate" "1")
1445 (set_attr "mode" "<MODE>")]) 1484 (set_attr "mode" "<MODE>")])
1446 1485
1447 (define_insn "<sse>_vmmaskcmp<mode>3" 1486 (define_insn "<sse>_vmmaskcmp<mode>3"
1448 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 1487 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1449 (vec_merge:SSEMODEF2P 1488 (vec_merge:SSEMODEF2P
1450 (match_operator:SSEMODEF2P 3 "sse_comparison_operator" 1489 (match_operator:SSEMODEF2P 3 "sse_comparison_operator"
1451 [(match_operand:SSEMODEF2P 1 "register_operand" "0") 1490 [(match_operand:SSEMODEF2P 1 "register_operand" "0")
1452 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]) 1491 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")])
1453 (match_dup 1) 1492 (match_dup 1)
1454 (const_int 1)))] 1493 (const_int 1)))]
1455 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) && !TARGET_SSE5" 1494 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1456 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}" 1495 "cmp%D3s<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1457 [(set_attr "type" "ssecmp") 1496 [(set_attr "type" "ssecmp")
1497 (set_attr "length_immediate" "1")
1458 (set_attr "mode" "<ssescalarmode>")]) 1498 (set_attr "mode" "<ssescalarmode>")])
1459 1499
1460 (define_insn "<sse>_comi" 1500 (define_insn "<sse>_comi"
1461 [(set (reg:CCFP FLAGS_REG) 1501 [(set (reg:CCFP FLAGS_REG)
1462 (compare:CCFP 1502 (compare:CCFP
1468 (parallel [(const_int 0)]))))] 1508 (parallel [(const_int 0)]))))]
1469 "SSE_FLOAT_MODE_P (<MODE>mode)" 1509 "SSE_FLOAT_MODE_P (<MODE>mode)"
1470 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}" 1510 "%vcomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1471 [(set_attr "type" "ssecomi") 1511 [(set_attr "type" "ssecomi")
1472 (set_attr "prefix" "maybe_vex") 1512 (set_attr "prefix" "maybe_vex")
1513 (set_attr "prefix_rep" "0")
1514 (set (attr "prefix_data16")
1515 (if_then_else (eq_attr "mode" "DF")
1516 (const_string "1")
1517 (const_string "0")))
1473 (set_attr "mode" "<MODE>")]) 1518 (set_attr "mode" "<MODE>")])
1474 1519
1475 (define_insn "<sse>_ucomi" 1520 (define_insn "<sse>_ucomi"
1476 [(set (reg:CCFPU FLAGS_REG) 1521 [(set (reg:CCFPU FLAGS_REG)
1477 (compare:CCFPU 1522 (compare:CCFPU
1483 (parallel [(const_int 0)]))))] 1528 (parallel [(const_int 0)]))))]
1484 "SSE_FLOAT_MODE_P (<MODE>mode)" 1529 "SSE_FLOAT_MODE_P (<MODE>mode)"
1485 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}" 1530 "%vucomis<ssemodefsuffix>\t{%1, %0|%0, %1}"
1486 [(set_attr "type" "ssecomi") 1531 [(set_attr "type" "ssecomi")
1487 (set_attr "prefix" "maybe_vex") 1532 (set_attr "prefix" "maybe_vex")
1533 (set_attr "prefix_rep" "0")
1534 (set (attr "prefix_data16")
1535 (if_then_else (eq_attr "mode" "DF")
1536 (const_string "1")
1537 (const_string "0")))
1488 (set_attr "mode" "<MODE>")]) 1538 (set_attr "mode" "<MODE>")])
1489 1539
1490 (define_expand "vcond<mode>" 1540 (define_expand "vcond<mode>"
1491 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") 1541 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1492 (if_then_else:SSEMODEF2P 1542 (if_then_else:SSEMODEF2P
1531 [(set_attr "type" "sselog") 1581 [(set_attr "type" "sselog")
1532 (set_attr "mode" "<MODE>")]) 1582 (set_attr "mode" "<MODE>")])
1533 1583
1534 (define_expand "<code><mode>3" 1584 (define_expand "<code><mode>3"
1535 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "") 1585 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "")
1536 (plogic:AVX256MODEF2P 1586 (any_logic:AVX256MODEF2P
1537 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "") 1587 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "")
1538 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))] 1588 (match_operand:AVX256MODEF2P 2 "nonimmediate_operand" "")))]
1539 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)" 1589 "AVX256_VEC_FLOAT_MODE_P (<MODE>mode)"
1540 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 1590 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1541 1591
1542 (define_insn "*avx_<code><mode>3" 1592 (define_insn "*avx_<code><mode>3"
1543 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") 1593 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
1544 (plogic:AVXMODEF2P 1594 (any_logic:AVXMODEF2P
1545 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x") 1595 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "%x")
1546 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))] 1596 (match_operand:AVXMODEF2P 2 "nonimmediate_operand" "xm")))]
1547 "AVX_VEC_FLOAT_MODE_P (<MODE>mode) 1597 "AVX_VEC_FLOAT_MODE_P (<MODE>mode)
1548 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1598 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1549 "v<plogicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" 1599 "v<logicprefix>p<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
1550 [(set_attr "type" "sselog") 1600 [(set_attr "type" "sselog")
1551 (set_attr "prefix" "vex") 1601 (set_attr "prefix" "vex")
1552 (set_attr "mode" "<avxvecmode>")]) 1602 (set_attr "mode" "<avxvecmode>")])
1553 1603
1554 (define_expand "<code><mode>3" 1604 (define_expand "<code><mode>3"
1555 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") 1605 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1556 (plogic:SSEMODEF2P 1606 (any_logic:SSEMODEF2P
1557 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "") 1607 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")
1558 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))] 1608 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))]
1559 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" 1609 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1560 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 1610 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
1561 1611
1562 (define_insn "*<code><mode>3" 1612 (define_insn "*<code><mode>3"
1563 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 1613 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
1564 (plogic:SSEMODEF2P 1614 (any_logic:SSEMODEF2P
1565 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0") 1615 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0")
1566 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))] 1616 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")))]
1567 "SSE_VEC_FLOAT_MODE_P (<MODE>mode) 1617 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)
1568 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 1618 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
1569 "<plogicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}" 1619 "<logicprefix>p<ssemodesuffixf2c>\t{%2, %0|%0, %2}"
1570 [(set_attr "type" "sselog") 1620 [(set_attr "type" "sselog")
1571 (set_attr "mode" "<MODE>")]) 1621 (set_attr "mode" "<MODE>")])
1622
1623 (define_expand "copysign<mode>3"
1624 [(set (match_dup 4)
1625 (and:SSEMODEF2P
1626 (not:SSEMODEF2P (match_dup 3))
1627 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "")))
1628 (set (match_dup 5)
1629 (and:SSEMODEF2P (match_dup 3)
1630 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "")))
1631 (set (match_operand:SSEMODEF2P 0 "register_operand" "")
1632 (ior:SSEMODEF2P (match_dup 4) (match_dup 5)))]
1633 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
1634 {
1635 operands[3] = ix86_build_signbit_mask (<ssescalarmode>mode, 1, 0);
1636
1637 operands[4] = gen_reg_rtx (<MODE>mode);
1638 operands[5] = gen_reg_rtx (<MODE>mode);
1639 })
1572 1640
1573 ;; Also define scalar versions. These are used for abs, neg, and 1641 ;; Also define scalar versions. These are used for abs, neg, and
1574 ;; conditional move. Using subregs into vector modes causes register 1642 ;; conditional move. Using subregs into vector modes causes register
1575 ;; allocation lossage. These patterns do not allow memory operands 1643 ;; allocation lossage. These patterns do not allow memory operands
1576 ;; because the native instructions read the full 128-bits. 1644 ;; because the native instructions read the full 128-bits.
1598 [(set_attr "type" "sselog") 1666 [(set_attr "type" "sselog")
1599 (set_attr "mode" "<ssevecmode>")]) 1667 (set_attr "mode" "<ssevecmode>")])
1600 1668
1601 (define_insn "*avx_<code><mode>3" 1669 (define_insn "*avx_<code><mode>3"
1602 [(set (match_operand:MODEF 0 "register_operand" "=x") 1670 [(set (match_operand:MODEF 0 "register_operand" "=x")
1603 (plogic:MODEF 1671 (any_logic:MODEF
1604 (match_operand:MODEF 1 "register_operand" "x") 1672 (match_operand:MODEF 1 "register_operand" "x")
1605 (match_operand:MODEF 2 "register_operand" "x")))] 1673 (match_operand:MODEF 2 "register_operand" "x")))]
1606 "AVX_FLOAT_MODE_P (<MODE>mode)" 1674 "AVX_FLOAT_MODE_P (<MODE>mode)"
1607 "v<plogicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}" 1675 "v<logicprefix>p<ssemodefsuffix>\t{%2, %1, %0|%0, %1, %2}"
1608 [(set_attr "type" "sselog") 1676 [(set_attr "type" "sselog")
1609 (set_attr "prefix" "vex") 1677 (set_attr "prefix" "vex")
1610 (set_attr "mode" "<ssevecmode>")]) 1678 (set_attr "mode" "<ssevecmode>")])
1611 1679
1612 (define_insn "*<code><mode>3" 1680 (define_insn "*<code><mode>3"
1613 [(set (match_operand:MODEF 0 "register_operand" "=x") 1681 [(set (match_operand:MODEF 0 "register_operand" "=x")
1614 (plogic:MODEF 1682 (any_logic:MODEF
1615 (match_operand:MODEF 1 "register_operand" "0") 1683 (match_operand:MODEF 1 "register_operand" "0")
1616 (match_operand:MODEF 2 "register_operand" "x")))] 1684 (match_operand:MODEF 2 "register_operand" "x")))]
1617 "SSE_FLOAT_MODE_P (<MODE>mode)" 1685 "SSE_FLOAT_MODE_P (<MODE>mode)"
1618 "<plogicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}" 1686 "<logicprefix>p<ssemodefsuffix>\t{%2, %0|%0, %2}"
1619 [(set_attr "type" "sselog") 1687 [(set_attr "type" "sselog")
1620 (set_attr "mode" "<ssevecmode>")]) 1688 (set_attr "mode" "<ssevecmode>")])
1621 1689
1622 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1690 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1623 ;; 1691 ;;
1624 ;; SSE5 floating point multiply/accumulate instructions This includes the 1692 ;; FMA4 floating point multiply/accumulate instructions. This
1625 ;; scalar version of the instructions as well as the vector 1693 ;; includes the scalar version of the instructions as well as the
1694 ;; vector.
1626 ;; 1695 ;;
1627 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 1696 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
1628 1697
1629 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow 1698 ;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
1630 ;; combine to generate a multiply/add with two memory references. We then 1699 ;; combine to generate a multiply/add with two memory references. We then
1635 ;; add insns, and it can't generate a new pseudo. I.e.: 1704 ;; add insns, and it can't generate a new pseudo. I.e.:
1636 ;; (set (reg1) (mem (addr1))) 1705 ;; (set (reg1) (mem (addr1)))
1637 ;; (set (reg2) (mult (reg1) (mem (addr2)))) 1706 ;; (set (reg2) (mult (reg1) (mem (addr2))))
1638 ;; (set (reg3) (plus (reg2) (mem (addr3)))) 1707 ;; (set (reg3) (plus (reg2) (mem (addr3))))
1639 1708
1640 (define_insn "sse5_fmadd<mode>4" 1709 (define_insn "fma4_fmadd<mode>4256"
1641 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") 1710 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1711 (plus:FMA4MODEF4
1712 (mult:FMA4MODEF4
1713 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1714 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1715 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1716 "TARGET_FMA4 && TARGET_FUSED_MADD"
1717 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1718 [(set_attr "type" "ssemuladd")
1719 (set_attr "mode" "<MODE>")])
1720
1721 ;; Floating multiply and subtract.
1722 (define_insn "fma4_fmsub<mode>4256"
1723 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1724 (minus:FMA4MODEF4
1725 (mult:FMA4MODEF4
1726 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1727 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1728 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1729 "TARGET_FMA4 && TARGET_FUSED_MADD"
1730 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1731 [(set_attr "type" "ssemuladd")
1732 (set_attr "mode" "<MODE>")])
1733
1734 ;; Floating point negative multiply and add.
1735 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1736 (define_insn "fma4_fnmadd<mode>4256"
1737 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1738 (minus:FMA4MODEF4
1739 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1740 (mult:FMA4MODEF4
1741 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1742 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))))]
1743 "TARGET_FMA4 && TARGET_FUSED_MADD"
1744 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1745 [(set_attr "type" "ssemuladd")
1746 (set_attr "mode" "<MODE>")])
1747
1748 ;; Floating point negative multiply and subtract.
1749 (define_insn "fma4_fnmsub<mode>4256"
1750 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1751 (minus:FMA4MODEF4
1752 (mult:FMA4MODEF4
1753 (neg:FMA4MODEF4
1754 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1755 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1756 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")))]
1757 "TARGET_FMA4 && TARGET_FUSED_MADD"
1758 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1759 [(set_attr "type" "ssemuladd")
1760 (set_attr "mode" "<MODE>")])
1761
1762 (define_insn "fma4_fmadd<mode>4"
1763 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1642 (plus:SSEMODEF4 1764 (plus:SSEMODEF4
1643 (mult:SSEMODEF4 1765 (mult:SSEMODEF4
1644 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") 1766 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1645 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) 1767 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1646 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] 1768 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1647 "TARGET_SSE5 && TARGET_FUSED_MADD 1769 "TARGET_FMA4 && TARGET_FUSED_MADD"
1648 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" 1770 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1649 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1650 [(set_attr "type" "ssemuladd") 1771 [(set_attr "type" "ssemuladd")
1651 (set_attr "mode" "<MODE>")]) 1772 (set_attr "mode" "<MODE>")])
1652 1773
1653 ;; Split fmadd with two memory operands into a load and the fmadd.
1654 (define_split
1655 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1656 (plus:SSEMODEF4
1657 (mult:SSEMODEF4
1658 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1659 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1660 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1661 "TARGET_SSE5
1662 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1663 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1664 && !reg_mentioned_p (operands[0], operands[1])
1665 && !reg_mentioned_p (operands[0], operands[2])
1666 && !reg_mentioned_p (operands[0], operands[3])"
1667 [(const_int 0)]
1668 {
1669 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1670 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1671 operands[2], operands[3]));
1672 DONE;
1673 })
1674
1675 ;; For the scalar operations, use operand1 for the upper words that aren't 1774 ;; For the scalar operations, use operand1 for the upper words that aren't
1676 ;; modified, so restrict the forms that are generated. 1775 ;; modified, so restrict the forms that are generated.
1677 ;; Scalar version of fmadd 1776 ;; Scalar version of fmadd.
1678 (define_insn "sse5_vmfmadd<mode>4" 1777 (define_insn "fma4_vmfmadd<mode>4"
1679 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 1778 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1680 (vec_merge:SSEMODEF2P 1779 (vec_merge:SSEMODEF2P
1681 (plus:SSEMODEF2P 1780 (plus:SSEMODEF2P
1682 (mult:SSEMODEF2P 1781 (mult:SSEMODEF2P
1683 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") 1782 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1684 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) 1783 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1685 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) 1784 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1686 (match_dup 1) 1785 (match_dup 0)
1687 (const_int 1)))] 1786 (const_int 1)))]
1688 "TARGET_SSE5 && TARGET_FUSED_MADD 1787 "TARGET_FMA4 && TARGET_FUSED_MADD"
1689 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" 1788 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1690 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1691 [(set_attr "type" "ssemuladd") 1789 [(set_attr "type" "ssemuladd")
1692 (set_attr "mode" "<MODE>")]) 1790 (set_attr "mode" "<MODE>")])
1693 1791
1694 ;; Floating multiply and subtract 1792 ;; Floating multiply and subtract.
1695 ;; Allow two memory operands the same as fmadd 1793 ;; Allow two memory operands the same as fmadd.
1696 (define_insn "sse5_fmsub<mode>4" 1794 (define_insn "fma4_fmsub<mode>4"
1697 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x") 1795 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1698 (minus:SSEMODEF4 1796 (minus:SSEMODEF4
1699 (mult:SSEMODEF4 1797 (mult:SSEMODEF4
1700 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") 1798 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1701 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x")) 1799 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1702 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0")))] 1800 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1703 "TARGET_SSE5 && TARGET_FUSED_MADD 1801 "TARGET_FMA4 && TARGET_FUSED_MADD"
1704 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" 1802 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1705 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1706 [(set_attr "type" "ssemuladd") 1803 [(set_attr "type" "ssemuladd")
1707 (set_attr "mode" "<MODE>")]) 1804 (set_attr "mode" "<MODE>")])
1708 1805
1709 ;; Split fmsub with two memory operands into a load and the fmsub.
1710 (define_split
1711 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1712 (minus:SSEMODEF4
1713 (mult:SSEMODEF4
1714 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1715 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1716 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1717 "TARGET_SSE5
1718 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1719 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1720 && !reg_mentioned_p (operands[0], operands[1])
1721 && !reg_mentioned_p (operands[0], operands[2])
1722 && !reg_mentioned_p (operands[0], operands[3])"
1723 [(const_int 0)]
1724 {
1725 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1726 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1],
1727 operands[2], operands[3]));
1728 DONE;
1729 })
1730
1731 ;; For the scalar operations, use operand1 for the upper words that aren't 1806 ;; For the scalar operations, use operand1 for the upper words that aren't
1732 ;; modified, so restrict the forms that are generated. 1807 ;; modified, so restrict the forms that are generated.
1733 ;; Scalar version of fmsub 1808 ;; Scalar version of fmsub.
1734 (define_insn "sse5_vmfmsub<mode>4" 1809 (define_insn "fma4_vmfmsub<mode>4"
1735 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 1810 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1736 (vec_merge:SSEMODEF2P 1811 (vec_merge:SSEMODEF2P
1737 (minus:SSEMODEF2P 1812 (minus:SSEMODEF2P
1738 (mult:SSEMODEF2P 1813 (mult:SSEMODEF2P
1739 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") 1814 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1740 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) 1815 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1741 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) 1816 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1742 (match_dup 1) 1817 (match_dup 0)
1743 (const_int 1)))] 1818 (const_int 1)))]
1744 "TARGET_SSE5 && TARGET_FUSED_MADD 1819 "TARGET_FMA4 && TARGET_FUSED_MADD"
1745 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 1820 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1746 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1747 [(set_attr "type" "ssemuladd") 1821 [(set_attr "type" "ssemuladd")
1748 (set_attr "mode" "<MODE>")]) 1822 (set_attr "mode" "<MODE>")])
1749 1823
1750 ;; Floating point negative multiply and add 1824 ;; Floating point negative multiply and add.
1751 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) 1825 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b).
1752 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p 1826 (define_insn "fma4_fnmadd<mode>4"
1753 ;; Allow two memory operands to help in optimizing. 1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1754 (define_insn "sse5_fnmadd<mode>4"
1755 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x,x,x")
1756 (minus:SSEMODEF4 1828 (minus:SSEMODEF4
1757 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x,0,0") 1829 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")
1758 (mult:SSEMODEF4 1830 (mult:SSEMODEF4
1759 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%0,0,x,xm") 1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x")
1760 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm,xm,x"))))] 1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))))]
1761 "TARGET_SSE5 && TARGET_FUSED_MADD 1833 "TARGET_FMA4 && TARGET_FUSED_MADD"
1762 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)" 1834 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1763 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1764 [(set_attr "type" "ssemuladd") 1835 [(set_attr "type" "ssemuladd")
1765 (set_attr "mode" "<MODE>")]) 1836 (set_attr "mode" "<MODE>")])
1766 1837
1767 ;; Split fnmadd with two memory operands into a load and the fnmadd.
1768 (define_split
1769 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1770 (minus:SSEMODEF4
1771 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")
1772 (mult:SSEMODEF4
1773 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "")
1774 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))))]
1775 "TARGET_SSE5
1776 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)
1777 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, true)
1778 && !reg_mentioned_p (operands[0], operands[1])
1779 && !reg_mentioned_p (operands[0], operands[2])
1780 && !reg_mentioned_p (operands[0], operands[3])"
1781 [(const_int 0)]
1782 {
1783 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1784 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1785 operands[2], operands[3]));
1786 DONE;
1787 })
1788
1789 ;; For the scalar operations, use operand1 for the upper words that aren't 1838 ;; For the scalar operations, use operand1 for the upper words that aren't
1790 ;; modified, so restrict the forms that are generated. 1839 ;; modified, so restrict the forms that are generated.
1791 ;; Scalar version of fnmadd 1840 ;; Scalar version of fnmadd.
1792 (define_insn "sse5_vmfnmadd<mode>4" 1841 (define_insn "fma4_vmfnmadd<mode>4"
1793 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 1842 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1794 (vec_merge:SSEMODEF2P 1843 (vec_merge:SSEMODEF2P
1795 (minus:SSEMODEF2P 1844 (minus:SSEMODEF2P
1796 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") 1845 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1797 (mult:SSEMODEF2P 1846 (mult:SSEMODEF2P
1798 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0") 1847 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1799 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) 1848 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
1800 (match_dup 1) 1849 (match_dup 0)
1801 (const_int 1)))] 1850 (const_int 1)))]
1802 "TARGET_SSE5 && TARGET_FUSED_MADD 1851 "TARGET_FMA4 && TARGET_FUSED_MADD"
1803 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" 1852 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1804 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1805 [(set_attr "type" "ssemuladd") 1853 [(set_attr "type" "ssemuladd")
1806 (set_attr "mode" "<MODE>")]) 1854 (set_attr "mode" "<MODE>")])
1807 1855
1808 ;; Floating point negative multiply and subtract 1856 ;; Floating point negative multiply and subtract.
1809 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c 1857 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c.
1810 ;; Allow 2 memory operands to help with optimization 1858 (define_insn "fma4_fnmsub<mode>4"
1811 (define_insn "sse5_fnmsub<mode>4"
1812 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x") 1859 [(set (match_operand:SSEMODEF4 0 "register_operand" "=x,x")
1813 (minus:SSEMODEF4 1860 (minus:SSEMODEF4
1814 (mult:SSEMODEF4 1861 (mult:SSEMODEF4
1815 (neg:SSEMODEF4 1862 (neg:SSEMODEF4
1816 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "0,0")) 1863 (match_operand:SSEMODEF4 1 "nonimmediate_operand" "%x,x"))
1817 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,xm")) 1864 (match_operand:SSEMODEF4 2 "nonimmediate_operand" "x,m"))
1818 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))] 1865 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "xm,x")))]
1819 "TARGET_SSE5 && TARGET_FUSED_MADD 1866 "TARGET_FMA4 && TARGET_FUSED_MADD"
1820 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" 1867 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1821 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1822 [(set_attr "type" "ssemuladd") 1868 [(set_attr "type" "ssemuladd")
1823 (set_attr "mode" "<MODE>")]) 1869 (set_attr "mode" "<MODE>")])
1824 1870
1825 ;; Split fnmsub with two memory operands into a load and the fmsub.
1826 (define_split
1827 [(set (match_operand:SSEMODEF4 0 "register_operand" "")
1828 (minus:SSEMODEF4
1829 (mult:SSEMODEF4
1830 (neg:SSEMODEF4
1831 (match_operand:SSEMODEF4 1 "nonimmediate_operand" ""))
1832 (match_operand:SSEMODEF4 2 "nonimmediate_operand" ""))
1833 (match_operand:SSEMODEF4 3 "nonimmediate_operand" "")))]
1834 "TARGET_SSE5
1835 && !ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)
1836 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)
1837 && !reg_mentioned_p (operands[0], operands[1])
1838 && !reg_mentioned_p (operands[0], operands[2])
1839 && !reg_mentioned_p (operands[0], operands[3])"
1840 [(const_int 0)]
1841 {
1842 ix86_expand_sse5_multiple_memory (operands, 4, <MODE>mode);
1843 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1844 operands[2], operands[3]));
1845 DONE;
1846 })
1847
1848 ;; For the scalar operations, use operand1 for the upper words that aren't 1871 ;; For the scalar operations, use operand1 for the upper words that aren't
1849 ;; modified, so restrict the forms that are generated. 1872 ;; modified, so restrict the forms that are generated.
1850 ;; Scalar version of fnmsub 1873 ;; Scalar version of fnmsub.
1851 (define_insn "sse5_vmfnmsub<mode>4" 1874 (define_insn "fma4_vmfnmsub<mode>4"
1852 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 1875 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1853 (vec_merge:SSEMODEF2P 1876 (vec_merge:SSEMODEF2P
1854 (minus:SSEMODEF2P 1877 (minus:SSEMODEF2P
1855 (mult:SSEMODEF2P 1878 (mult:SSEMODEF2P
1856 (neg:SSEMODEF2P 1879 (neg:SSEMODEF2P
1857 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0")) 1880 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1858 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) 1881 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1859 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) 1882 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
1860 (match_dup 1) 1883 (match_dup 0)
1861 (const_int 1)))] 1884 (const_int 1)))]
1862 "TARGET_SSE5 && TARGET_FUSED_MADD 1885 "TARGET_FMA4 && TARGET_FUSED_MADD"
1863 && ix86_sse5_valid_op_p (operands, insn, 4, true, 2, false)" 1886 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1864 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1865 [(set_attr "type" "ssemuladd") 1887 [(set_attr "type" "ssemuladd")
1866 (set_attr "mode" "<MODE>")]) 1888 (set_attr "mode" "<MODE>")])
1867 1889
1868 ;; The same instructions using an UNSPEC to allow the intrinsic to be used 1890 (define_insn "fma4i_fmadd<mode>4256"
1869 ;; even if the user used -mno-fused-madd 1891 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1870 ;; Parallel instructions. During instruction generation, just default 1892 (unspec:FMA4MODEF4
1871 ;; to registers, and let combine later build the appropriate instruction. 1893 [(plus:FMA4MODEF4
1872 (define_expand "sse5i_fmadd<mode>4" 1894 (mult:FMA4MODEF4
1873 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") 1895 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1896 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1897 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1898 UNSPEC_FMA4_INTRINSIC))]
1899 "TARGET_FMA4 && TARGET_FUSED_MADD"
1900 "vfmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1901 [(set_attr "type" "ssemuladd")
1902 (set_attr "mode" "<MODE>")])
1903
1904 (define_insn "fma4i_fmsub<mode>4256"
1905 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1906 (unspec:FMA4MODEF4
1907 [(minus:FMA4MODEF4
1908 (mult:FMA4MODEF4
1909 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1910 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1911 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1912 UNSPEC_FMA4_INTRINSIC))]
1913 "TARGET_FMA4 && TARGET_FUSED_MADD"
1914 "vfmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1915 [(set_attr "type" "ssemuladd")
1916 (set_attr "mode" "<MODE>")])
1917
1918 (define_insn "fma4i_fnmadd<mode>4256"
1919 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1920 (unspec:FMA4MODEF4
1921 [(minus:FMA4MODEF4
1922 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x")
1923 (mult:FMA4MODEF4
1924 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x")
1925 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m")))]
1926 UNSPEC_FMA4_INTRINSIC))]
1927 "TARGET_FMA4 && TARGET_FUSED_MADD"
1928 "vfnmadd<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1929 [(set_attr "type" "ssemuladd")
1930 (set_attr "mode" "<MODE>")])
1931
1932 (define_insn "fma4i_fnmsub<mode>4256"
1933 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x,x")
1934 (unspec:FMA4MODEF4
1935 [(minus:FMA4MODEF4
1936 (mult:FMA4MODEF4
1937 (neg:FMA4MODEF4
1938 (match_operand:FMA4MODEF4 1 "nonimmediate_operand" "%x,x"))
1939 (match_operand:FMA4MODEF4 2 "nonimmediate_operand" "x,m"))
1940 (match_operand:FMA4MODEF4 3 "nonimmediate_operand" "xm,x"))]
1941 UNSPEC_FMA4_INTRINSIC))]
1942 "TARGET_FMA4 && TARGET_FUSED_MADD"
1943 "vfnmsub<fma4modesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1944 [(set_attr "type" "ssemuladd")
1945 (set_attr "mode" "<MODE>")])
1946
1947 (define_insn "fma4i_fmadd<mode>4"
1948 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1874 (unspec:SSEMODEF2P 1949 (unspec:SSEMODEF2P
1875 [(plus:SSEMODEF2P 1950 [(plus:SSEMODEF2P
1876 (mult:SSEMODEF2P 1951 (mult:SSEMODEF2P
1877 (match_operand:SSEMODEF2P 1 "register_operand" "") 1952 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1878 (match_operand:SSEMODEF2P 2 "register_operand" "")) 1953 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1879 (match_operand:SSEMODEF2P 3 "register_operand" ""))] 1954 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1880 UNSPEC_SSE5_INTRINSIC))] 1955 UNSPEC_FMA4_INTRINSIC))]
1881 "TARGET_SSE5" 1956 "TARGET_FMA4 && TARGET_FUSED_MADD"
1882 { 1957 "vfmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1883 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1884 if (TARGET_FUSED_MADD)
1885 {
1886 emit_insn (gen_sse5_fmadd<mode>4 (operands[0], operands[1],
1887 operands[2], operands[3]));
1888 DONE;
1889 }
1890 })
1891
1892 (define_insn "*sse5i_fmadd<mode>4"
1893 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1894 (unspec:SSEMODEF2P
1895 [(plus:SSEMODEF2P
1896 (mult:SSEMODEF2P
1897 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1898 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
1899 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
1900 UNSPEC_SSE5_INTRINSIC))]
1901 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1902 "fmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1903 [(set_attr "type" "ssemuladd") 1958 [(set_attr "type" "ssemuladd")
1904 (set_attr "mode" "<MODE>")]) 1959 (set_attr "mode" "<MODE>")])
1905 1960
1906 (define_expand "sse5i_fmsub<mode>4" 1961 (define_insn "fma4i_fmsub<mode>4"
1907 [(set (match_operand:SSEMODEF2P 0 "register_operand" "") 1962 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1908 (unspec:SSEMODEF2P 1963 (unspec:SSEMODEF2P
1909 [(minus:SSEMODEF2P 1964 [(minus:SSEMODEF2P
1910 (mult:SSEMODEF2P 1965 (mult:SSEMODEF2P
1911 (match_operand:SSEMODEF2P 1 "register_operand" "") 1966 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1912 (match_operand:SSEMODEF2P 2 "register_operand" "")) 1967 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1913 (match_operand:SSEMODEF2P 3 "register_operand" ""))] 1968 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1914 UNSPEC_SSE5_INTRINSIC))] 1969 UNSPEC_FMA4_INTRINSIC))]
1915 "TARGET_SSE5" 1970 "TARGET_FMA4 && TARGET_FUSED_MADD"
1916 { 1971 "vfmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1917 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */ 1972 [(set_attr "type" "ssemuladd")
1918 if (TARGET_FUSED_MADD) 1973 (set_attr "mode" "<MODE>")])
1919 { 1974
1920 emit_insn (gen_sse5_fmsub<mode>4 (operands[0], operands[1], 1975 (define_insn "fma4i_fnmadd<mode>4"
1921 operands[2], operands[3])); 1976 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1922 DONE;
1923 }
1924 })
1925
1926 (define_insn "*sse5i_fmsub<mode>4"
1927 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1928 (unspec:SSEMODEF2P 1977 (unspec:SSEMODEF2P
1929 [(minus:SSEMODEF2P 1978 [(minus:SSEMODEF2P
1979 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
1930 (mult:SSEMODEF2P 1980 (mult:SSEMODEF2P
1931 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm") 1981 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
1932 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")) 1982 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))]
1933 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))] 1983 UNSPEC_FMA4_INTRINSIC))]
1934 UNSPEC_SSE5_INTRINSIC))] 1984 "TARGET_FMA4 && TARGET_FUSED_MADD"
1935 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" 1985 "vfnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1936 "fmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1937 [(set_attr "type" "ssemuladd") 1986 [(set_attr "type" "ssemuladd")
1938 (set_attr "mode" "<MODE>")]) 1987 (set_attr "mode" "<MODE>")])
1939 1988
1940 ;; Rewrite (- (a * b) + c) into the canonical form: c - (a * b) 1989 (define_insn "fma4i_fnmsub<mode>4"
1941 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p 1990 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
1942 (define_expand "sse5i_fnmadd<mode>4"
1943 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1944 (unspec:SSEMODEF2P
1945 [(minus:SSEMODEF2P
1946 (match_operand:SSEMODEF2P 3 "register_operand" "")
1947 (mult:SSEMODEF2P
1948 (match_operand:SSEMODEF2P 1 "register_operand" "")
1949 (match_operand:SSEMODEF2P 2 "register_operand" "")))]
1950 UNSPEC_SSE5_INTRINSIC))]
1951 "TARGET_SSE5"
1952 {
1953 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1954 if (TARGET_FUSED_MADD)
1955 {
1956 emit_insn (gen_sse5_fnmadd<mode>4 (operands[0], operands[1],
1957 operands[2], operands[3]));
1958 DONE;
1959 }
1960 })
1961
1962 (define_insn "*sse5i_fnmadd<mode>4"
1963 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
1964 (unspec:SSEMODEF2P
1965 [(minus:SSEMODEF2P
1966 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0")
1967 (mult:SSEMODEF2P
1968 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0,x,xm")
1969 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")))]
1970 UNSPEC_SSE5_INTRINSIC))]
1971 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)"
1972 "fnmadd<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1973 [(set_attr "type" "ssemuladd")
1974 (set_attr "mode" "<MODE>")])
1975
1976 ;; Rewrite (- (a * b) - c) into the canonical form: ((-a) * b) - c
1977 (define_expand "sse5i_fnmsub<mode>4"
1978 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
1979 (unspec:SSEMODEF2P 1991 (unspec:SSEMODEF2P
1980 [(minus:SSEMODEF2P 1992 [(minus:SSEMODEF2P
1981 (mult:SSEMODEF2P 1993 (mult:SSEMODEF2P
1982 (neg:SSEMODEF2P 1994 (neg:SSEMODEF2P
1983 (match_operand:SSEMODEF2P 1 "register_operand" "")) 1995 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
1984 (match_operand:SSEMODEF2P 2 "register_operand" "")) 1996 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
1985 (match_operand:SSEMODEF2P 3 "register_operand" ""))] 1997 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))]
1986 UNSPEC_SSE5_INTRINSIC))] 1998 UNSPEC_FMA4_INTRINSIC))]
1987 "TARGET_SSE5" 1999 "TARGET_FMA4 && TARGET_FUSED_MADD"
1988 { 2000 "vfnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
1989 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
1990 if (TARGET_FUSED_MADD)
1991 {
1992 emit_insn (gen_sse5_fnmsub<mode>4 (operands[0], operands[1],
1993 operands[2], operands[3]));
1994 DONE;
1995 }
1996 })
1997
1998 (define_insn "*sse5i_fnmsub<mode>4"
1999 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
2000 (unspec:SSEMODEF2P
2001 [(minus:SSEMODEF2P
2002 (mult:SSEMODEF2P
2003 (neg:SSEMODEF2P
2004 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm"))
2005 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x"))
2006 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x,0,0"))]
2007 UNSPEC_SSE5_INTRINSIC))]
2008 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
2009 "fnmsub<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2010 [(set_attr "type" "ssemuladd") 2001 [(set_attr "type" "ssemuladd")
2011 (set_attr "mode" "<MODE>")]) 2002 (set_attr "mode" "<MODE>")])
2012 2003
2013 ;; Scalar instructions
2014 (define_expand "sse5i_vmfmadd<mode>4"
2015 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2016 (unspec:SSEMODEF2P
2017 [(vec_merge:SSEMODEF2P
2018 (plus:SSEMODEF2P
2019 (mult:SSEMODEF2P
2020 (match_operand:SSEMODEF2P 1 "register_operand" "")
2021 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2022 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2023 (match_dup 1)
2024 (const_int 0))]
2025 UNSPEC_SSE5_INTRINSIC))]
2026 "TARGET_SSE5"
2027 {
2028 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2029 if (TARGET_FUSED_MADD)
2030 {
2031 emit_insn (gen_sse5_vmfmadd<mode>4 (operands[0], operands[1],
2032 operands[2], operands[3]));
2033 DONE;
2034 }
2035 })
2036
2037 ;; For the scalar operations, use operand1 for the upper words that aren't 2004 ;; For the scalar operations, use operand1 for the upper words that aren't
2038 ;; modified, so restrict the forms that are accepted. 2005 ;; modified, so restrict the forms that are accepted.
2039 (define_insn "*sse5i_vmfmadd<mode>4" 2006 (define_insn "fma4i_vmfmadd<mode>4"
2040 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 2007 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2041 (unspec:SSEMODEF2P 2008 (unspec:SSEMODEF2P
2042 [(vec_merge:SSEMODEF2P 2009 [(vec_merge:SSEMODEF2P
2043 (plus:SSEMODEF2P 2010 (plus:SSEMODEF2P
2044 (mult:SSEMODEF2P 2011 (mult:SSEMODEF2P
2045 (match_operand:SSEMODEF2P 1 "register_operand" "0,0") 2012 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2046 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) 2013 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2047 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) 2014 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2048 (match_dup 0) 2015 (match_dup 0)
2049 (const_int 0))] 2016 (const_int 1))]
2050 UNSPEC_SSE5_INTRINSIC))] 2017 UNSPEC_FMA4_INTRINSIC))]
2051 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 2018 "TARGET_FMA4 && TARGET_FUSED_MADD"
2052 "fmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2019 "vfmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2053 [(set_attr "type" "ssemuladd") 2020 [(set_attr "type" "ssemuladd")
2054 (set_attr "mode" "<ssescalarmode>")]) 2021 (set_attr "mode" "<ssescalarmode>")])
2055 2022
2056 (define_expand "sse5i_vmfmsub<mode>4" 2023 (define_insn "fma4i_vmfmsub<mode>4"
2057 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2058 (unspec:SSEMODEF2P
2059 [(vec_merge:SSEMODEF2P
2060 (minus:SSEMODEF2P
2061 (mult:SSEMODEF2P
2062 (match_operand:SSEMODEF2P 1 "register_operand" "")
2063 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2064 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2065 (match_dup 0)
2066 (const_int 1))]
2067 UNSPEC_SSE5_INTRINSIC))]
2068 "TARGET_SSE5"
2069 {
2070 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2071 if (TARGET_FUSED_MADD)
2072 {
2073 emit_insn (gen_sse5_vmfmsub<mode>4 (operands[0], operands[1],
2074 operands[2], operands[3]));
2075 DONE;
2076 }
2077 })
2078
2079 (define_insn "*sse5i_vmfmsub<mode>4"
2080 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 2024 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2081 (unspec:SSEMODEF2P 2025 (unspec:SSEMODEF2P
2082 [(vec_merge:SSEMODEF2P 2026 [(vec_merge:SSEMODEF2P
2083 (minus:SSEMODEF2P 2027 (minus:SSEMODEF2P
2084 (mult:SSEMODEF2P 2028 (mult:SSEMODEF2P
2085 (match_operand:SSEMODEF2P 1 "register_operand" "0,0") 2029 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2086 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) 2030 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2087 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) 2031 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2088 (match_dup 1) 2032 (match_dup 0)
2089 (const_int 1))] 2033 (const_int 1))]
2090 UNSPEC_SSE5_INTRINSIC))] 2034 UNSPEC_FMA4_INTRINSIC))]
2091 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 2035 "TARGET_FMA4 && TARGET_FUSED_MADD"
2092 "fmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2036 "vfmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2093 [(set_attr "type" "ssemuladd") 2037 [(set_attr "type" "ssemuladd")
2094 (set_attr "mode" "<ssescalarmode>")]) 2038 (set_attr "mode" "<ssescalarmode>")])
2095 2039
2096 ;; Note operands are out of order to simplify call to ix86_sse5_valid_p 2040 (define_insn "fma4i_vmfnmadd<mode>4"
2097 (define_expand "sse5i_vmfnmadd<mode>4"
2098 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2099 (unspec:SSEMODEF2P
2100 [(vec_merge:SSEMODEF2P
2101 (minus:SSEMODEF2P
2102 (match_operand:SSEMODEF2P 3 "register_operand" "")
2103 (mult:SSEMODEF2P
2104 (match_operand:SSEMODEF2P 1 "register_operand" "")
2105 (match_operand:SSEMODEF2P 2 "register_operand" "")))
2106 (match_dup 1)
2107 (const_int 1))]
2108 UNSPEC_SSE5_INTRINSIC))]
2109 "TARGET_SSE5"
2110 {
2111 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2112 if (TARGET_FUSED_MADD)
2113 {
2114 emit_insn (gen_sse5_vmfnmadd<mode>4 (operands[0], operands[1],
2115 operands[2], operands[3]));
2116 DONE;
2117 }
2118 })
2119
2120 (define_insn "*sse5i_vmfnmadd<mode>4"
2121 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 2041 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2122 (unspec:SSEMODEF2P 2042 (unspec:SSEMODEF2P
2123 [(vec_merge:SSEMODEF2P 2043 [(vec_merge:SSEMODEF2P
2124 (minus:SSEMODEF2P 2044 (minus:SSEMODEF2P
2125 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x") 2045 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")
2126 (mult:SSEMODEF2P 2046 (mult:SSEMODEF2P
2127 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%0,0") 2047 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x")
2128 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm"))) 2048 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m")))
2129 (match_dup 1) 2049 (match_dup 0)
2130 (const_int 1))] 2050 (const_int 1))]
2131 UNSPEC_SSE5_INTRINSIC))] 2051 UNSPEC_FMA4_INTRINSIC))]
2132 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, true)" 2052 "TARGET_FMA4 && TARGET_FUSED_MADD"
2133 "fnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2053 "vfnmadd<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2134 [(set_attr "type" "ssemuladd") 2054 [(set_attr "type" "ssemuladd")
2135 (set_attr "mode" "<ssescalarmode>")]) 2055 (set_attr "mode" "<ssescalarmode>")])
2136 2056
2137 (define_expand "sse5i_vmfnmsub<mode>4" 2057 (define_insn "fma4i_vmfnmsub<mode>4"
2138 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
2139 (unspec:SSEMODEF2P
2140 [(vec_merge:SSEMODEF2P
2141 (minus:SSEMODEF2P
2142 (mult:SSEMODEF2P
2143 (neg:SSEMODEF2P
2144 (match_operand:SSEMODEF2P 1 "register_operand" ""))
2145 (match_operand:SSEMODEF2P 2 "register_operand" ""))
2146 (match_operand:SSEMODEF2P 3 "register_operand" ""))
2147 (match_dup 1)
2148 (const_int 1))]
2149 UNSPEC_SSE5_INTRINSIC))]
2150 "TARGET_SSE5"
2151 {
2152 /* If we have -mfused-madd, emit the normal insn rather than the UNSPEC */
2153 if (TARGET_FUSED_MADD)
2154 {
2155 emit_insn (gen_sse5_vmfnmsub<mode>4 (operands[0], operands[1],
2156 operands[2], operands[3]));
2157 DONE;
2158 }
2159 })
2160
2161 (define_insn "*sse5i_vmfnmsub<mode>4"
2162 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x") 2058 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x")
2163 (unspec:SSEMODEF2P 2059 (unspec:SSEMODEF2P
2164 [(vec_merge:SSEMODEF2P 2060 [(vec_merge:SSEMODEF2P
2165 (minus:SSEMODEF2P 2061 (minus:SSEMODEF2P
2166 (mult:SSEMODEF2P 2062 (mult:SSEMODEF2P
2167 (neg:SSEMODEF2P 2063 (neg:SSEMODEF2P
2168 (match_operand:SSEMODEF2P 1 "register_operand" "0,0")) 2064 (match_operand:SSEMODEF2P 1 "nonimmediate_operand" "%x,x"))
2169 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm")) 2065 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,m"))
2170 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x")) 2066 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm,x"))
2171 (match_dup 1) 2067 (match_dup 0)
2172 (const_int 1))] 2068 (const_int 1))]
2173 UNSPEC_SSE5_INTRINSIC))] 2069 UNSPEC_FMA4_INTRINSIC))]
2174 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 2070 "TARGET_FMA4 && TARGET_FUSED_MADD"
2175 "fnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 2071 "vfnmsub<ssemodesuffixf2s>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2176 [(set_attr "type" "ssemuladd") 2072 [(set_attr "type" "ssemuladd")
2177 (set_attr "mode" "<ssescalarmode>")]) 2073 (set_attr "mode" "<ssescalarmode>")])
2074
2075 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2076 ;;
2077 ;; FMA4 Parallel floating point multiply addsub and subadd operations.
2078 ;;
2079 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2080
2081 (define_insn "fma4_fmaddsubv8sf4"
2082 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2083 (vec_merge:V8SF
2084 (plus:V8SF
2085 (mult:V8SF
2086 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2087 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2088 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2089 (minus:V8SF
2090 (mult:V8SF
2091 (match_dup 1)
2092 (match_dup 2))
2093 (match_dup 3))
2094 (const_int 170)))]
2095 "TARGET_FMA4 && TARGET_FUSED_MADD"
2096 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2097 [(set_attr "type" "ssemuladd")
2098 (set_attr "mode" "V8SF")])
2099
2100 (define_insn "fma4_fmaddsubv4df4"
2101 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2102 (vec_merge:V4DF
2103 (plus:V4DF
2104 (mult:V4DF
2105 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2106 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2107 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2108 (minus:V4DF
2109 (mult:V4DF
2110 (match_dup 1)
2111 (match_dup 2))
2112 (match_dup 3))
2113 (const_int 10)))]
2114 "TARGET_FMA4 && TARGET_FUSED_MADD"
2115 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2116 [(set_attr "type" "ssemuladd")
2117 (set_attr "mode" "V4DF")])
2118
2119 (define_insn "fma4_fmaddsubv4sf4"
2120 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2121 (vec_merge:V4SF
2122 (plus:V4SF
2123 (mult:V4SF
2124 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2125 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2126 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2127 (minus:V4SF
2128 (mult:V4SF
2129 (match_dup 1)
2130 (match_dup 2))
2131 (match_dup 3))
2132 (const_int 10)))]
2133 "TARGET_FMA4 && TARGET_FUSED_MADD"
2134 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2135 [(set_attr "type" "ssemuladd")
2136 (set_attr "mode" "V4SF")])
2137
2138 (define_insn "fma4_fmaddsubv2df4"
2139 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2140 (vec_merge:V2DF
2141 (plus:V2DF
2142 (mult:V2DF
2143 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2144 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2145 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2146 (minus:V2DF
2147 (mult:V2DF
2148 (match_dup 1)
2149 (match_dup 2))
2150 (match_dup 3))
2151 (const_int 2)))]
2152 "TARGET_FMA4 && TARGET_FUSED_MADD"
2153 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2154 [(set_attr "type" "ssemuladd")
2155 (set_attr "mode" "V2DF")])
2156
2157 (define_insn "fma4_fmsubaddv8sf4"
2158 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2159 (vec_merge:V8SF
2160 (plus:V8SF
2161 (mult:V8SF
2162 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2163 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2164 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2165 (minus:V8SF
2166 (mult:V8SF
2167 (match_dup 1)
2168 (match_dup 2))
2169 (match_dup 3))
2170 (const_int 85)))]
2171 "TARGET_FMA4 && TARGET_FUSED_MADD"
2172 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2173 [(set_attr "type" "ssemuladd")
2174 (set_attr "mode" "V8SF")])
2175
2176 (define_insn "fma4_fmsubaddv4df4"
2177 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2178 (vec_merge:V4DF
2179 (plus:V4DF
2180 (mult:V4DF
2181 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2182 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2183 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2184 (minus:V4DF
2185 (mult:V4DF
2186 (match_dup 1)
2187 (match_dup 2))
2188 (match_dup 3))
2189 (const_int 5)))]
2190 "TARGET_FMA4 && TARGET_FUSED_MADD"
2191 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2192 [(set_attr "type" "ssemuladd")
2193 (set_attr "mode" "V4DF")])
2194
2195 (define_insn "fma4_fmsubaddv4sf4"
2196 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2197 (vec_merge:V4SF
2198 (plus:V4SF
2199 (mult:V4SF
2200 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2201 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2202 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2203 (minus:V4SF
2204 (mult:V4SF
2205 (match_dup 1)
2206 (match_dup 2))
2207 (match_dup 3))
2208 (const_int 5)))]
2209 "TARGET_FMA4 && TARGET_FUSED_MADD"
2210 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2211 [(set_attr "type" "ssemuladd")
2212 (set_attr "mode" "V4SF")])
2213
2214 (define_insn "fma4_fmsubaddv2df4"
2215 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2216 (vec_merge:V2DF
2217 (plus:V2DF
2218 (mult:V2DF
2219 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2220 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2221 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2222 (minus:V2DF
2223 (mult:V2DF
2224 (match_dup 1)
2225 (match_dup 2))
2226 (match_dup 3))
2227 (const_int 1)))]
2228 "TARGET_FMA4 && TARGET_FUSED_MADD"
2229 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2230 [(set_attr "type" "ssemuladd")
2231 (set_attr "mode" "V2DF")])
2232
2233 (define_insn "fma4i_fmaddsubv8sf4"
2234 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2235 (unspec:V8SF
2236 [(vec_merge:V8SF
2237 (plus:V8SF
2238 (mult:V8SF
2239 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2240 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2241 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2242 (minus:V8SF
2243 (mult:V8SF
2244 (match_dup 1)
2245 (match_dup 2))
2246 (match_dup 3))
2247 (const_int 170))]
2248 UNSPEC_FMA4_INTRINSIC))]
2249 "TARGET_FMA4 && TARGET_FUSED_MADD"
2250 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2251 [(set_attr "type" "ssemuladd")
2252 (set_attr "mode" "V8SF")])
2253
2254 (define_insn "fma4i_fmaddsubv4df4"
2255 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2256 (unspec:V4DF
2257 [(vec_merge:V4DF
2258 (plus:V4DF
2259 (mult:V4DF
2260 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2261 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2262 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2263 (minus:V4DF
2264 (mult:V4DF
2265 (match_dup 1)
2266 (match_dup 2))
2267 (match_dup 3))
2268 (const_int 10))]
2269 UNSPEC_FMA4_INTRINSIC))]
2270 "TARGET_FMA4 && TARGET_FUSED_MADD"
2271 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2272 [(set_attr "type" "ssemuladd")
2273 (set_attr "mode" "V4DF")])
2274
2275 (define_insn "fma4i_fmaddsubv4sf4"
2276 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2277 (unspec:V4SF
2278 [(vec_merge:V4SF
2279 (plus:V4SF
2280 (mult:V4SF
2281 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2282 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2283 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2284 (minus:V4SF
2285 (mult:V4SF
2286 (match_dup 1)
2287 (match_dup 2))
2288 (match_dup 3))
2289 (const_int 10))]
2290 UNSPEC_FMA4_INTRINSIC))]
2291 "TARGET_FMA4 && TARGET_FUSED_MADD"
2292 "vfmaddsubps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2293 [(set_attr "type" "ssemuladd")
2294 (set_attr "mode" "V4SF")])
2295
2296 (define_insn "fma4i_fmaddsubv2df4"
2297 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2298 (unspec:V2DF
2299 [(vec_merge:V2DF
2300 (plus:V2DF
2301 (mult:V2DF
2302 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2303 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2304 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2305 (minus:V2DF
2306 (mult:V2DF
2307 (match_dup 1)
2308 (match_dup 2))
2309 (match_dup 3))
2310 (const_int 2))]
2311 UNSPEC_FMA4_INTRINSIC))]
2312 "TARGET_FMA4 && TARGET_FUSED_MADD"
2313 "vfmaddsubpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2314 [(set_attr "type" "ssemuladd")
2315 (set_attr "mode" "V2DF")])
2316
2317 (define_insn "fma4i_fmsubaddv8sf4"
2318 [(set (match_operand:V8SF 0 "register_operand" "=x,x")
2319 (unspec:V8SF
2320 [(vec_merge:V8SF
2321 (plus:V8SF
2322 (mult:V8SF
2323 (match_operand:V8SF 1 "nonimmediate_operand" "%x,x")
2324 (match_operand:V8SF 2 "nonimmediate_operand" "x,m"))
2325 (match_operand:V8SF 3 "nonimmediate_operand" "xm,x"))
2326 (minus:V8SF
2327 (mult:V8SF
2328 (match_dup 1)
2329 (match_dup 2))
2330 (match_dup 3))
2331 (const_int 85))]
2332 UNSPEC_FMA4_INTRINSIC))]
2333 "TARGET_FMA4 && TARGET_FUSED_MADD"
2334 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2335 [(set_attr "type" "ssemuladd")
2336 (set_attr "mode" "V8SF")])
2337
2338 (define_insn "fma4i_fmsubaddv4df4"
2339 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
2340 (unspec:V4DF
2341 [(vec_merge:V4DF
2342 (plus:V4DF
2343 (mult:V4DF
2344 (match_operand:V4DF 1 "nonimmediate_operand" "%x,x")
2345 (match_operand:V4DF 2 "nonimmediate_operand" "x,m"))
2346 (match_operand:V4DF 3 "nonimmediate_operand" "xm,x"))
2347 (minus:V4DF
2348 (mult:V4DF
2349 (match_dup 1)
2350 (match_dup 2))
2351 (match_dup 3))
2352 (const_int 5))]
2353 UNSPEC_FMA4_INTRINSIC))]
2354 "TARGET_FMA4 && TARGET_FUSED_MADD"
2355 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2356 [(set_attr "type" "ssemuladd")
2357 (set_attr "mode" "V4DF")])
2358
2359 (define_insn "fma4i_fmsubaddv4sf4"
2360 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2361 (unspec:V4SF
2362 [(vec_merge:V4SF
2363 (plus:V4SF
2364 (mult:V4SF
2365 (match_operand:V4SF 1 "nonimmediate_operand" "%x,x")
2366 (match_operand:V4SF 2 "nonimmediate_operand" "x,m"))
2367 (match_operand:V4SF 3 "nonimmediate_operand" "xm,x"))
2368 (minus:V4SF
2369 (mult:V4SF
2370 (match_dup 1)
2371 (match_dup 2))
2372 (match_dup 3))
2373 (const_int 5))]
2374 UNSPEC_FMA4_INTRINSIC))]
2375 "TARGET_FMA4 && TARGET_FUSED_MADD"
2376 "vfmsubaddps\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2377 [(set_attr "type" "ssemuladd")
2378 (set_attr "mode" "V4SF")])
2379
2380 (define_insn "fma4i_fmsubaddv2df4"
2381 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2382 (unspec:V2DF
2383 [(vec_merge:V2DF
2384 (plus:V2DF
2385 (mult:V2DF
2386 (match_operand:V2DF 1 "nonimmediate_operand" "%x,x")
2387 (match_operand:V2DF 2 "nonimmediate_operand" "x,m"))
2388 (match_operand:V2DF 3 "nonimmediate_operand" "xm,x"))
2389 (minus:V2DF
2390 (mult:V2DF
2391 (match_dup 1)
2392 (match_dup 2))
2393 (match_dup 3))
2394 (const_int 1))]
2395 UNSPEC_FMA4_INTRINSIC))]
2396 "TARGET_FMA4 && TARGET_FUSED_MADD"
2397 "vfmsubaddpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
2398 [(set_attr "type" "ssemuladd")
2399 (set_attr "mode" "V2DF")])
2178 2400
2179 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2401 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2180 ;; 2402 ;;
2181 ;; Parallel single-precision floating point conversion operations 2403 ;; Parallel single-precision floating point conversion operations
2182 ;; 2404 ;;
2213 (parallel [(const_int 0) (const_int 1)])))] 2435 (parallel [(const_int 0) (const_int 1)])))]
2214 "TARGET_SSE" 2436 "TARGET_SSE"
2215 "cvttps2pi\t{%1, %0|%0, %1}" 2437 "cvttps2pi\t{%1, %0|%0, %1}"
2216 [(set_attr "type" "ssecvt") 2438 [(set_attr "type" "ssecvt")
2217 (set_attr "unit" "mmx") 2439 (set_attr "unit" "mmx")
2440 (set_attr "prefix_rep" "0")
2218 (set_attr "mode" "SF")]) 2441 (set_attr "mode" "SF")])
2219 2442
2220 (define_insn "*avx_cvtsi2ss" 2443 (define_insn "*avx_cvtsi2ss"
2221 [(set (match_operand:V4SF 0 "register_operand" "=x") 2444 [(set (match_operand:V4SF 0 "register_operand" "=x")
2222 (vec_merge:V4SF 2445 (vec_merge:V4SF
2252 (match_operand:V4SF 1 "register_operand" "x") 2475 (match_operand:V4SF 1 "register_operand" "x")
2253 (const_int 1)))] 2476 (const_int 1)))]
2254 "TARGET_AVX && TARGET_64BIT" 2477 "TARGET_AVX && TARGET_64BIT"
2255 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}" 2478 "vcvtsi2ssq\t{%2, %1, %0|%0, %1, %2}"
2256 [(set_attr "type" "sseicvt") 2479 [(set_attr "type" "sseicvt")
2480 (set_attr "length_vex" "4")
2257 (set_attr "prefix" "vex") 2481 (set_attr "prefix" "vex")
2258 (set_attr "mode" "SF")]) 2482 (set_attr "mode" "SF")])
2259 2483
2260 (define_insn "sse_cvtsi2ssq" 2484 (define_insn "sse_cvtsi2ssq"
2261 [(set (match_operand:V4SF 0 "register_operand" "=x,x") 2485 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
2265 (match_operand:V4SF 1 "register_operand" "0,0") 2489 (match_operand:V4SF 1 "register_operand" "0,0")
2266 (const_int 1)))] 2490 (const_int 1)))]
2267 "TARGET_SSE && TARGET_64BIT" 2491 "TARGET_SSE && TARGET_64BIT"
2268 "cvtsi2ssq\t{%2, %0|%0, %2}" 2492 "cvtsi2ssq\t{%2, %0|%0, %2}"
2269 [(set_attr "type" "sseicvt") 2493 [(set_attr "type" "sseicvt")
2494 (set_attr "prefix_rex" "1")
2270 (set_attr "athlon_decode" "vector,double") 2495 (set_attr "athlon_decode" "vector,double")
2271 (set_attr "amdfam10_decode" "vector,double") 2496 (set_attr "amdfam10_decode" "vector,double")
2272 (set_attr "mode" "SF")]) 2497 (set_attr "mode" "SF")])
2273 2498
2274 (define_insn "sse_cvtss2si" 2499 (define_insn "sse_cvtss2si"
2373 "TARGET_SSE2" 2598 "TARGET_SSE2"
2374 "cvtdq2ps\t{%1, %0|%0, %1}" 2599 "cvtdq2ps\t{%1, %0|%0, %1}"
2375 [(set_attr "type" "ssecvt") 2600 [(set_attr "type" "ssecvt")
2376 (set_attr "mode" "V4SF")]) 2601 (set_attr "mode" "V4SF")])
2377 2602
2603 (define_expand "sse2_cvtudq2ps"
2604 [(set (match_dup 5)
2605 (float:V4SF (match_operand:V4SI 1 "nonimmediate_operand" "")))
2606 (set (match_dup 6)
2607 (lt:V4SF (match_dup 5) (match_dup 3)))
2608 (set (match_dup 7)
2609 (and:V4SF (match_dup 6) (match_dup 4)))
2610 (set (match_operand:V4SF 0 "register_operand" "")
2611 (plus:V4SF (match_dup 5) (match_dup 7)))]
2612 "TARGET_SSE2"
2613 {
2614 REAL_VALUE_TYPE TWO32r;
2615 rtx x;
2616 int i;
2617
2618 real_ldexp (&TWO32r, &dconst1, 32);
2619 x = const_double_from_real_value (TWO32r, SFmode);
2620
2621 operands[3] = force_reg (V4SFmode, CONST0_RTX (V4SFmode));
2622 operands[4] = force_reg (V4SFmode, ix86_build_const_vector (SFmode, 1, x));
2623
2624 for (i = 5; i < 8; i++)
2625 operands[i] = gen_reg_rtx (V4SFmode);
2626 })
2627
2378 (define_insn "avx_cvtps2dq<avxmodesuffix>" 2628 (define_insn "avx_cvtps2dq<avxmodesuffix>"
2379 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x") 2629 [(set (match_operand:AVXMODEDCVTPS2DQ 0 "register_operand" "=x")
2380 (unspec:AVXMODEDCVTPS2DQ 2630 (unspec:AVXMODEDCVTPS2DQ
2381 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")] 2631 [(match_operand:<avxcvtvecmode> 1 "nonimmediate_operand" "xm")]
2382 UNSPEC_FIX_NOTRUNC))] 2632 UNSPEC_FIX_NOTRUNC))]
2411 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))] 2661 (fix:V4SI (match_operand:V4SF 1 "nonimmediate_operand" "xm")))]
2412 "TARGET_SSE2" 2662 "TARGET_SSE2"
2413 "cvttps2dq\t{%1, %0|%0, %1}" 2663 "cvttps2dq\t{%1, %0|%0, %1}"
2414 [(set_attr "type" "ssecvt") 2664 [(set_attr "type" "ssecvt")
2415 (set_attr "prefix_rep" "1") 2665 (set_attr "prefix_rep" "1")
2666 (set_attr "prefix_data16" "0")
2416 (set_attr "mode" "TI")]) 2667 (set_attr "mode" "TI")])
2417 2668
2418 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 2669 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2419 ;; 2670 ;;
2420 ;; Parallel double-precision floating point conversion operations 2671 ;; Parallel double-precision floating point conversion operations
2426 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))] 2677 (float:V2DF (match_operand:V2SI 1 "nonimmediate_operand" "y,m")))]
2427 "TARGET_SSE2" 2678 "TARGET_SSE2"
2428 "cvtpi2pd\t{%1, %0|%0, %1}" 2679 "cvtpi2pd\t{%1, %0|%0, %1}"
2429 [(set_attr "type" "ssecvt") 2680 [(set_attr "type" "ssecvt")
2430 (set_attr "unit" "mmx,*") 2681 (set_attr "unit" "mmx,*")
2682 (set_attr "prefix_data16" "1,*")
2431 (set_attr "mode" "V2DF")]) 2683 (set_attr "mode" "V2DF")])
2432 2684
2433 (define_insn "sse2_cvtpd2pi" 2685 (define_insn "sse2_cvtpd2pi"
2434 [(set (match_operand:V2SI 0 "register_operand" "=y") 2686 [(set (match_operand:V2SI 0 "register_operand" "=y")
2435 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")] 2687 (unspec:V2SI [(match_operand:V2DF 1 "nonimmediate_operand" "xm")]
2486 (match_operand:V2DF 1 "register_operand" "x") 2738 (match_operand:V2DF 1 "register_operand" "x")
2487 (const_int 1)))] 2739 (const_int 1)))]
2488 "TARGET_AVX && TARGET_64BIT" 2740 "TARGET_AVX && TARGET_64BIT"
2489 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}" 2741 "vcvtsi2sdq\t{%2, %1, %0|%0, %1, %2}"
2490 [(set_attr "type" "sseicvt") 2742 [(set_attr "type" "sseicvt")
2743 (set_attr "length_vex" "4")
2491 (set_attr "prefix" "vex") 2744 (set_attr "prefix" "vex")
2492 (set_attr "mode" "DF")]) 2745 (set_attr "mode" "DF")])
2493 2746
2494 (define_insn "sse2_cvtsi2sdq" 2747 (define_insn "sse2_cvtsi2sdq"
2495 [(set (match_operand:V2DF 0 "register_operand" "=x,x") 2748 [(set (match_operand:V2DF 0 "register_operand" "=x,x")
2499 (match_operand:V2DF 1 "register_operand" "0,0") 2752 (match_operand:V2DF 1 "register_operand" "0,0")
2500 (const_int 1)))] 2753 (const_int 1)))]
2501 "TARGET_SSE2 && TARGET_64BIT" 2754 "TARGET_SSE2 && TARGET_64BIT"
2502 "cvtsi2sdq\t{%2, %0|%0, %2}" 2755 "cvtsi2sdq\t{%2, %0|%0, %2}"
2503 [(set_attr "type" "sseicvt") 2756 [(set_attr "type" "sseicvt")
2757 (set_attr "prefix_rex" "1")
2504 (set_attr "mode" "DF") 2758 (set_attr "mode" "DF")
2505 (set_attr "athlon_decode" "double,direct") 2759 (set_attr "athlon_decode" "double,direct")
2506 (set_attr "amdfam10_decode" "vector,double")]) 2760 (set_attr "amdfam10_decode" "vector,double")])
2507 2761
2508 (define_insn "sse2_cvtsd2si" 2762 (define_insn "sse2_cvtsd2si"
2640 "TARGET_SSE2" 2894 "TARGET_SSE2"
2641 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\" 2895 "* return TARGET_AVX ? \"vcvtpd2dq{x}\t{%1, %0|%0, %1}\"
2642 : \"cvtpd2dq\t{%1, %0|%0, %1}\";" 2896 : \"cvtpd2dq\t{%1, %0|%0, %1}\";"
2643 [(set_attr "type" "ssecvt") 2897 [(set_attr "type" "ssecvt")
2644 (set_attr "prefix_rep" "1") 2898 (set_attr "prefix_rep" "1")
2899 (set_attr "prefix_data16" "0")
2645 (set_attr "prefix" "maybe_vex") 2900 (set_attr "prefix" "maybe_vex")
2646 (set_attr "mode" "TI") 2901 (set_attr "mode" "TI")
2647 (set_attr "amdfam10_decode" "double")]) 2902 (set_attr "amdfam10_decode" "double")])
2648 2903
2649 (define_insn "avx_cvttpd2dq256" 2904 (define_insn "avx_cvttpd2dq256"
2670 (match_operand:V2SI 2 "const0_operand" "")))] 2925 (match_operand:V2SI 2 "const0_operand" "")))]
2671 "TARGET_SSE2" 2926 "TARGET_SSE2"
2672 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\" 2927 "* return TARGET_AVX ? \"vcvttpd2dq{x}\t{%1, %0|%0, %1}\"
2673 : \"cvttpd2dq\t{%1, %0|%0, %1}\";" 2928 : \"cvttpd2dq\t{%1, %0|%0, %1}\";"
2674 [(set_attr "type" "ssecvt") 2929 [(set_attr "type" "ssecvt")
2675 (set_attr "prefix_rep" "1")
2676 (set_attr "prefix" "maybe_vex") 2930 (set_attr "prefix" "maybe_vex")
2677 (set_attr "mode" "TI") 2931 (set_attr "mode" "TI")
2678 (set_attr "amdfam10_decode" "double")]) 2932 (set_attr "amdfam10_decode" "double")])
2679 2933
2680 (define_insn "*avx_cvtsd2ss" 2934 (define_insn "*avx_cvtsd2ss"
2789 "TARGET_SSE2" 3043 "TARGET_SSE2"
2790 "%vcvtps2pd\t{%1, %0|%0, %1}" 3044 "%vcvtps2pd\t{%1, %0|%0, %1}"
2791 [(set_attr "type" "ssecvt") 3045 [(set_attr "type" "ssecvt")
2792 (set_attr "prefix" "maybe_vex") 3046 (set_attr "prefix" "maybe_vex")
2793 (set_attr "mode" "V2DF") 3047 (set_attr "mode" "V2DF")
3048 (set_attr "prefix_data16" "0")
2794 (set_attr "amdfam10_decode" "direct")]) 3049 (set_attr "amdfam10_decode" "direct")])
2795 3050
2796 (define_expand "vec_unpacks_hi_v4sf" 3051 (define_expand "vec_unpacks_hi_v4sf"
2797 [(set (match_dup 2) 3052 [(set (match_dup 2)
2798 (vec_select:V4SF 3053 (vec_select:V4SF
2881 (float:V2DF 3136 (float:V2DF
2882 (vec_select:V2SI 3137 (vec_select:V2SI
2883 (match_dup 2) 3138 (match_dup 2)
2884 (parallel [(const_int 0) (const_int 1)]))))] 3139 (parallel [(const_int 0) (const_int 1)]))))]
2885 "TARGET_SSE2" 3140 "TARGET_SSE2"
2886 { 3141 "operands[2] = gen_reg_rtx (V4SImode);")
2887 operands[2] = gen_reg_rtx (V4SImode);
2888 })
2889 3142
2890 (define_expand "vec_unpacks_float_lo_v4si" 3143 (define_expand "vec_unpacks_float_lo_v4si"
2891 [(set (match_operand:V2DF 0 "register_operand" "") 3144 [(set (match_operand:V2DF 0 "register_operand" "")
2892 (float:V2DF 3145 (float:V2DF
2893 (vec_select:V2SI 3146 (vec_select:V2SI
2894 (match_operand:V4SI 1 "nonimmediate_operand" "") 3147 (match_operand:V4SI 1 "nonimmediate_operand" "")
2895 (parallel [(const_int 0) (const_int 1)]))))] 3148 (parallel [(const_int 0) (const_int 1)]))))]
2896 "TARGET_SSE2") 3149 "TARGET_SSE2")
2897 3150
3151 (define_expand "vec_unpacku_float_hi_v4si"
3152 [(set (match_dup 5)
3153 (vec_select:V4SI
3154 (match_operand:V4SI 1 "nonimmediate_operand" "")
3155 (parallel [(const_int 2)
3156 (const_int 3)
3157 (const_int 2)
3158 (const_int 3)])))
3159 (set (match_dup 6)
3160 (float:V2DF
3161 (vec_select:V2SI
3162 (match_dup 5)
3163 (parallel [(const_int 0) (const_int 1)]))))
3164 (set (match_dup 7)
3165 (lt:V2DF (match_dup 6) (match_dup 3)))
3166 (set (match_dup 8)
3167 (and:V2DF (match_dup 7) (match_dup 4)))
3168 (set (match_operand:V2DF 0 "register_operand" "")
3169 (plus:V2DF (match_dup 6) (match_dup 8)))]
3170 "TARGET_SSE2"
3171 {
3172 REAL_VALUE_TYPE TWO32r;
3173 rtx x;
3174 int i;
3175
3176 real_ldexp (&TWO32r, &dconst1, 32);
3177 x = const_double_from_real_value (TWO32r, DFmode);
3178
3179 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3180 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3181
3182 operands[5] = gen_reg_rtx (V4SImode);
3183
3184 for (i = 6; i < 9; i++)
3185 operands[i] = gen_reg_rtx (V2DFmode);
3186 })
3187
3188 (define_expand "vec_unpacku_float_lo_v4si"
3189 [(set (match_dup 5)
3190 (float:V2DF
3191 (vec_select:V2SI
3192 (match_operand:V4SI 1 "nonimmediate_operand" "")
3193 (parallel [(const_int 0) (const_int 1)]))))
3194 (set (match_dup 6)
3195 (lt:V2DF (match_dup 5) (match_dup 3)))
3196 (set (match_dup 7)
3197 (and:V2DF (match_dup 6) (match_dup 4)))
3198 (set (match_operand:V2DF 0 "register_operand" "")
3199 (plus:V2DF (match_dup 5) (match_dup 7)))]
3200 "TARGET_SSE2"
3201 {
3202 REAL_VALUE_TYPE TWO32r;
3203 rtx x;
3204 int i;
3205
3206 real_ldexp (&TWO32r, &dconst1, 32);
3207 x = const_double_from_real_value (TWO32r, DFmode);
3208
3209 operands[3] = force_reg (V2DFmode, CONST0_RTX (V2DFmode));
3210 operands[4] = force_reg (V2DFmode, ix86_build_const_vector (DFmode, 1, x));
3211
3212 for (i = 5; i < 8; i++)
3213 operands[i] = gen_reg_rtx (V2DFmode);
3214 })
3215
2898 (define_expand "vec_pack_trunc_v2df" 3216 (define_expand "vec_pack_trunc_v2df"
2899 [(match_operand:V4SF 0 "register_operand" "") 3217 [(match_operand:V4SF 0 "register_operand" "")
2900 (match_operand:V2DF 1 "nonimmediate_operand" "") 3218 (match_operand:V2DF 1 "nonimmediate_operand" "")
2901 (match_operand:V2DF 2 "nonimmediate_operand" "")] 3219 (match_operand:V2DF 2 "nonimmediate_operand" "")]
2902 "TARGET_SSE2" 3220 "TARGET_SSE2"
2923 r1 = gen_reg_rtx (V4SImode); 3241 r1 = gen_reg_rtx (V4SImode);
2924 r2 = gen_reg_rtx (V4SImode); 3242 r2 = gen_reg_rtx (V4SImode);
2925 3243
2926 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1])); 3244 emit_insn (gen_sse2_cvttpd2dq (r1, operands[1]));
2927 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2])); 3245 emit_insn (gen_sse2_cvttpd2dq (r2, operands[2]));
2928 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), 3246 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2929 gen_lowpart (V2DImode, r1), 3247 gen_lowpart (V2DImode, r1),
2930 gen_lowpart (V2DImode, r2))); 3248 gen_lowpart (V2DImode, r2)));
2931 DONE; 3249 DONE;
2932 }) 3250 })
2933 3251
2934 (define_expand "vec_pack_sfix_v2df" 3252 (define_expand "vec_pack_sfix_v2df"
2935 [(match_operand:V4SI 0 "register_operand" "") 3253 [(match_operand:V4SI 0 "register_operand" "")
2942 r1 = gen_reg_rtx (V4SImode); 3260 r1 = gen_reg_rtx (V4SImode);
2943 r2 = gen_reg_rtx (V4SImode); 3261 r2 = gen_reg_rtx (V4SImode);
2944 3262
2945 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1])); 3263 emit_insn (gen_sse2_cvtpd2dq (r1, operands[1]));
2946 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2])); 3264 emit_insn (gen_sse2_cvtpd2dq (r2, operands[2]));
2947 emit_insn (gen_sse2_punpcklqdq (gen_lowpart (V2DImode, operands[0]), 3265 emit_insn (gen_vec_interleave_lowv2di (gen_lowpart (V2DImode, operands[0]),
2948 gen_lowpart (V2DImode, r1), 3266 gen_lowpart (V2DImode, r1),
2949 gen_lowpart (V2DImode, r2))); 3267 gen_lowpart (V2DImode, r2)));
2950 DONE; 3268 DONE;
2951 }) 3269 })
2952 3270
2953 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 3271 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2954 ;; 3272 ;;
3054 movhps\t{%2, %0|%0, %2} 3372 movhps\t{%2, %0|%0, %2}
3055 movlps\t{%2, %H0|%H0, %2}" 3373 movlps\t{%2, %H0|%H0, %2}"
3056 [(set_attr "type" "ssemov") 3374 [(set_attr "type" "ssemov")
3057 (set_attr "mode" "V4SF,V2SF,V2SF")]) 3375 (set_attr "mode" "V4SF,V2SF,V2SF")])
3058 3376
3377 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3059 (define_insn "avx_unpckhps256" 3378 (define_insn "avx_unpckhps256"
3060 [(set (match_operand:V8SF 0 "register_operand" "=x") 3379 [(set (match_operand:V8SF 0 "register_operand" "=x")
3061 (vec_select:V8SF 3380 (vec_select:V8SF
3062 (vec_concat:V16SF 3381 (vec_concat:V16SF
3063 (match_operand:V8SF 1 "register_operand" "x") 3382 (match_operand:V8SF 1 "register_operand" "x")
3070 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3389 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3071 [(set_attr "type" "sselog") 3390 [(set_attr "type" "sselog")
3072 (set_attr "prefix" "vex") 3391 (set_attr "prefix" "vex")
3073 (set_attr "mode" "V8SF")]) 3392 (set_attr "mode" "V8SF")])
3074 3393
3075 (define_insn "*avx_unpckhps" 3394 (define_insn "*avx_interleave_highv4sf"
3076 [(set (match_operand:V4SF 0 "register_operand" "=x") 3395 [(set (match_operand:V4SF 0 "register_operand" "=x")
3077 (vec_select:V4SF 3396 (vec_select:V4SF
3078 (vec_concat:V8SF 3397 (vec_concat:V8SF
3079 (match_operand:V4SF 1 "register_operand" "x") 3398 (match_operand:V4SF 1 "register_operand" "x")
3080 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 3399 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3084 "vunpckhps\t{%2, %1, %0|%0, %1, %2}" 3403 "vunpckhps\t{%2, %1, %0|%0, %1, %2}"
3085 [(set_attr "type" "sselog") 3404 [(set_attr "type" "sselog")
3086 (set_attr "prefix" "vex") 3405 (set_attr "prefix" "vex")
3087 (set_attr "mode" "V4SF")]) 3406 (set_attr "mode" "V4SF")])
3088 3407
3089 (define_insn "sse_unpckhps" 3408 (define_insn "vec_interleave_highv4sf"
3090 [(set (match_operand:V4SF 0 "register_operand" "=x") 3409 [(set (match_operand:V4SF 0 "register_operand" "=x")
3091 (vec_select:V4SF 3410 (vec_select:V4SF
3092 (vec_concat:V8SF 3411 (vec_concat:V8SF
3093 (match_operand:V4SF 1 "register_operand" "0") 3412 (match_operand:V4SF 1 "register_operand" "0")
3094 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 3413 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3097 "TARGET_SSE" 3416 "TARGET_SSE"
3098 "unpckhps\t{%2, %0|%0, %2}" 3417 "unpckhps\t{%2, %0|%0, %2}"
3099 [(set_attr "type" "sselog") 3418 [(set_attr "type" "sselog")
3100 (set_attr "mode" "V4SF")]) 3419 (set_attr "mode" "V4SF")])
3101 3420
3421 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3102 (define_insn "avx_unpcklps256" 3422 (define_insn "avx_unpcklps256"
3103 [(set (match_operand:V8SF 0 "register_operand" "=x") 3423 [(set (match_operand:V8SF 0 "register_operand" "=x")
3104 (vec_select:V8SF 3424 (vec_select:V8SF
3105 (vec_concat:V16SF 3425 (vec_concat:V16SF
3106 (match_operand:V8SF 1 "register_operand" "x") 3426 (match_operand:V8SF 1 "register_operand" "x")
3113 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3433 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3114 [(set_attr "type" "sselog") 3434 [(set_attr "type" "sselog")
3115 (set_attr "prefix" "vex") 3435 (set_attr "prefix" "vex")
3116 (set_attr "mode" "V8SF")]) 3436 (set_attr "mode" "V8SF")])
3117 3437
3118 (define_insn "*avx_unpcklps" 3438 (define_insn "*avx_interleave_lowv4sf"
3119 [(set (match_operand:V4SF 0 "register_operand" "=x") 3439 [(set (match_operand:V4SF 0 "register_operand" "=x")
3120 (vec_select:V4SF 3440 (vec_select:V4SF
3121 (vec_concat:V8SF 3441 (vec_concat:V8SF
3122 (match_operand:V4SF 1 "register_operand" "x") 3442 (match_operand:V4SF 1 "register_operand" "x")
3123 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 3443 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3127 "vunpcklps\t{%2, %1, %0|%0, %1, %2}" 3447 "vunpcklps\t{%2, %1, %0|%0, %1, %2}"
3128 [(set_attr "type" "sselog") 3448 [(set_attr "type" "sselog")
3129 (set_attr "prefix" "vex") 3449 (set_attr "prefix" "vex")
3130 (set_attr "mode" "V4SF")]) 3450 (set_attr "mode" "V4SF")])
3131 3451
3132 (define_insn "sse_unpcklps" 3452 (define_insn "vec_interleave_lowv4sf"
3133 [(set (match_operand:V4SF 0 "register_operand" "=x") 3453 [(set (match_operand:V4SF 0 "register_operand" "=x")
3134 (vec_select:V4SF 3454 (vec_select:V4SF
3135 (vec_concat:V8SF 3455 (vec_concat:V8SF
3136 (match_operand:V4SF 1 "register_operand" "0") 3456 (match_operand:V4SF 1 "register_operand" "0")
3137 (match_operand:V4SF 2 "nonimmediate_operand" "xm")) 3457 (match_operand:V4SF 2 "nonimmediate_operand" "xm"))
3259 operands[3] = GEN_INT (mask); 3579 operands[3] = GEN_INT (mask);
3260 3580
3261 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3581 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3262 } 3582 }
3263 [(set_attr "type" "sselog") 3583 [(set_attr "type" "sselog")
3584 (set_attr "length_immediate" "1")
3264 (set_attr "prefix" "vex") 3585 (set_attr "prefix" "vex")
3265 (set_attr "mode" "V8SF")]) 3586 (set_attr "mode" "V8SF")])
3266 3587
3267 (define_expand "sse_shufps" 3588 (define_expand "sse_shufps"
3268 [(match_operand:V4SF 0 "register_operand" "") 3589 [(match_operand:V4SF 0 "register_operand" "")
3300 operands[3] = GEN_INT (mask); 3621 operands[3] = GEN_INT (mask);
3301 3622
3302 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 3623 return "vshufps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3303 } 3624 }
3304 [(set_attr "type" "sselog") 3625 [(set_attr "type" "sselog")
3626 (set_attr "length_immediate" "1")
3305 (set_attr "prefix" "vex") 3627 (set_attr "prefix" "vex")
3306 (set_attr "mode" "V4SF")]) 3628 (set_attr "mode" "V4SF")])
3307 3629
3308 (define_insn "sse_shufps_<mode>" 3630 (define_insn "sse_shufps_<mode>"
3309 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x") 3631 [(set (match_operand:SSEMODE4S 0 "register_operand" "=x")
3325 operands[3] = GEN_INT (mask); 3647 operands[3] = GEN_INT (mask);
3326 3648
3327 return "shufps\t{%3, %2, %0|%0, %2, %3}"; 3649 return "shufps\t{%3, %2, %0|%0, %2, %3}";
3328 } 3650 }
3329 [(set_attr "type" "sselog") 3651 [(set_attr "type" "sselog")
3652 (set_attr "length_immediate" "1")
3330 (set_attr "mode" "V4SF")]) 3653 (set_attr "mode" "V4SF")])
3331 3654
3332 (define_insn "sse_storehps" 3655 (define_insn "sse_storehps"
3333 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x") 3656 [(set (match_operand:V2SF 0 "nonimmediate_operand" "=m,x,x")
3334 (vec_select:V2SF 3657 (vec_select:V2SF
3432 "@ 3755 "@
3433 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4} 3756 shufps\t{$0xe4, %1, %2, %0|%0, %2, %1, 0xe4}
3434 vmovlps\t{%2, %1, %0|%0, %1, %2} 3757 vmovlps\t{%2, %1, %0|%0, %1, %2}
3435 vmovlps\t{%2, %0|%0, %2}" 3758 vmovlps\t{%2, %0|%0, %2}"
3436 [(set_attr "type" "sselog,ssemov,ssemov") 3759 [(set_attr "type" "sselog,ssemov,ssemov")
3760 (set_attr "length_immediate" "1,*,*")
3437 (set_attr "prefix" "vex") 3761 (set_attr "prefix" "vex")
3438 (set_attr "mode" "V4SF,V2SF,V2SF")]) 3762 (set_attr "mode" "V4SF,V2SF,V2SF")])
3439 3763
3440 (define_insn "sse_loadlps" 3764 (define_insn "sse_loadlps"
3441 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m") 3765 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,m")
3448 "@ 3772 "@
3449 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4} 3773 shufps\t{$0xe4, %1, %0|%0, %1, 0xe4}
3450 movlps\t{%2, %0|%0, %2} 3774 movlps\t{%2, %0|%0, %2}
3451 movlps\t{%2, %0|%0, %2}" 3775 movlps\t{%2, %0|%0, %2}"
3452 [(set_attr "type" "sselog,ssemov,ssemov") 3776 [(set_attr "type" "sselog,ssemov,ssemov")
3777 (set_attr "length_immediate" "1,*,*")
3453 (set_attr "mode" "V4SF,V2SF,V2SF")]) 3778 (set_attr "mode" "V4SF,V2SF,V2SF")])
3454 3779
3455 (define_insn "*avx_movss" 3780 (define_insn "*avx_movss"
3456 [(set (match_operand:V4SF 0 "register_operand" "=x") 3781 [(set (match_operand:V4SF 0 "register_operand" "=x")
3457 (vec_merge:V4SF 3782 (vec_merge:V4SF
3473 "TARGET_SSE" 3798 "TARGET_SSE"
3474 "movss\t{%2, %0|%0, %2}" 3799 "movss\t{%2, %0|%0, %2}"
3475 [(set_attr "type" "ssemov") 3800 [(set_attr "type" "ssemov")
3476 (set_attr "mode" "SF")]) 3801 (set_attr "mode" "SF")])
3477 3802
3803 (define_expand "vec_dupv4sf"
3804 [(set (match_operand:V4SF 0 "register_operand" "")
3805 (vec_duplicate:V4SF
3806 (match_operand:SF 1 "nonimmediate_operand" "")))]
3807 "TARGET_SSE"
3808 {
3809 if (!TARGET_AVX)
3810 operands[1] = force_reg (V4SFmode, operands[1]);
3811 })
3812
3478 (define_insn "*vec_dupv4sf_avx" 3813 (define_insn "*vec_dupv4sf_avx"
3479 [(set (match_operand:V4SF 0 "register_operand" "=x") 3814 [(set (match_operand:V4SF 0 "register_operand" "=x,x")
3480 (vec_duplicate:V4SF 3815 (vec_duplicate:V4SF
3481 (match_operand:SF 1 "register_operand" "x")))] 3816 (match_operand:SF 1 "nonimmediate_operand" "x,m")))]
3482 "TARGET_AVX" 3817 "TARGET_AVX"
3483 "vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}" 3818 "@
3484 [(set_attr "type" "sselog1") 3819 vshufps\t{$0, %1, %1, %0|%0, %1, %1, 0}
3820 vbroadcastss\t{%1, %0|%0, %1}"
3821 [(set_attr "type" "sselog1,ssemov")
3822 (set_attr "length_immediate" "1,0")
3823 (set_attr "prefix_extra" "0,1")
3485 (set_attr "prefix" "vex") 3824 (set_attr "prefix" "vex")
3486 (set_attr "mode" "V4SF")]) 3825 (set_attr "mode" "V4SF")])
3487 3826
3488 (define_insn "*vec_dupv4sf" 3827 (define_insn "*vec_dupv4sf"
3489 [(set (match_operand:V4SF 0 "register_operand" "=x") 3828 [(set (match_operand:V4SF 0 "register_operand" "=x")
3490 (vec_duplicate:V4SF 3829 (vec_duplicate:V4SF
3491 (match_operand:SF 1 "register_operand" "0")))] 3830 (match_operand:SF 1 "register_operand" "0")))]
3492 "TARGET_SSE" 3831 "TARGET_SSE"
3493 "shufps\t{$0, %0, %0|%0, %0, 0}" 3832 "shufps\t{$0, %0, %0|%0, %0, 0}"
3494 [(set_attr "type" "sselog1") 3833 [(set_attr "type" "sselog1")
3834 (set_attr "length_immediate" "1")
3495 (set_attr "mode" "V4SF")]) 3835 (set_attr "mode" "V4SF")])
3496 3836
3497 (define_insn "*vec_concatv2sf_avx" 3837 (define_insn "*vec_concatv2sf_avx"
3498 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y") 3838 [(set (match_operand:V2SF 0 "register_operand" "=x,x,x,*y ,*y")
3499 (vec_concat:V2SF 3839 (vec_concat:V2SF
3505 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10} 3845 vinsertps\t{$0x10, %2, %1, %0|%0, %1, %2, 0x10}
3506 vmovss\t{%1, %0|%0, %1} 3846 vmovss\t{%1, %0|%0, %1}
3507 punpckldq\t{%2, %0|%0, %2} 3847 punpckldq\t{%2, %0|%0, %2}
3508 movd\t{%1, %0|%0, %1}" 3848 movd\t{%1, %0|%0, %1}"
3509 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") 3849 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3850 (set_attr "length_immediate" "*,1,*,*,*")
3851 (set_attr "prefix_extra" "*,1,*,*,*")
3510 (set (attr "prefix") 3852 (set (attr "prefix")
3511 (if_then_else (eq_attr "alternative" "3,4") 3853 (if_then_else (eq_attr "alternative" "3,4")
3512 (const_string "orig") 3854 (const_string "orig")
3513 (const_string "vex"))) 3855 (const_string "vex")))
3514 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) 3856 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3526 insertps\t{$0x10, %2, %0|%0, %2, 0x10} 3868 insertps\t{$0x10, %2, %0|%0, %2, 0x10}
3527 movss\t{%1, %0|%0, %1} 3869 movss\t{%1, %0|%0, %1}
3528 punpckldq\t{%2, %0|%0, %2} 3870 punpckldq\t{%2, %0|%0, %2}
3529 movd\t{%1, %0|%0, %1}" 3871 movd\t{%1, %0|%0, %1}"
3530 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") 3872 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
3873 (set_attr "prefix_data16" "*,1,*,*,*")
3531 (set_attr "prefix_extra" "*,1,*,*,*") 3874 (set_attr "prefix_extra" "*,1,*,*,*")
3875 (set_attr "length_immediate" "*,1,*,*,*")
3532 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")]) 3876 (set_attr "mode" "V4SF,V4SF,SF,DI,DI")])
3533 3877
3534 ;; ??? In theory we can match memory for the MMX alternative, but allowing 3878 ;; ??? In theory we can match memory for the MMX alternative, but allowing
3535 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 3879 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
3536 ;; alternatives pretty much forces the MMX alternative to be chosen. 3880 ;; alternatives pretty much forces the MMX alternative to be chosen.
3580 { 3924 {
3581 ix86_expand_vector_init (false, operands[0], operands[1]); 3925 ix86_expand_vector_init (false, operands[0], operands[1]);
3582 DONE; 3926 DONE;
3583 }) 3927 })
3584 3928
3585 (define_insn "*vec_setv4sf_0_avx" 3929 (define_insn "*vec_set<mode>_0_avx"
3586 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,x,m") 3930 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3587 (vec_merge:V4SF 3931 (vec_merge:SSEMODE4S
3588 (vec_duplicate:V4SF 3932 (vec_duplicate:SSEMODE4S
3589 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) 3933 (match_operand:<ssescalarmode> 2
3590 (match_operand:V4SF 1 "vector_move_operand" " x,C,C ,0") 3934 "general_operand" " x,m,*r,x,*rm,x*rfF"))
3935 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,x, x,0")
3591 (const_int 1)))] 3936 (const_int 1)))]
3592 "TARGET_AVX" 3937 "TARGET_AVX"
3593 "@ 3938 "@
3939 vinsertps\t{$0xe, %2, %2, %0|%0, %2, %2, 0xe}
3940 vmov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3941 vmovd\t{%2, %0|%0, %2}
3594 vmovss\t{%2, %1, %0|%0, %1, %2} 3942 vmovss\t{%2, %1, %0|%0, %1, %2}
3595 vmovss\t{%2, %0|%0, %2} 3943 vpinsrd\t{$0, %2, %1, %0|%0, %1, %2, 0}
3596 vmovd\t{%2, %0|%0, %2} 3944 #"
3945 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3946 (set_attr "prefix_extra" "*,*,*,*,1,*")
3947 (set_attr "length_immediate" "*,*,*,*,1,*")
3948 (set_attr "prefix" "vex")
3949 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3950
3951 (define_insn "*vec_set<mode>_0_sse4_1"
3952 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x, x,x, x,m")
3953 (vec_merge:SSEMODE4S
3954 (vec_duplicate:SSEMODE4S
3955 (match_operand:<ssescalarmode> 2
3956 "general_operand" " x,m,*r,x,*rm,*rfF"))
3957 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,C, C,0, 0,0")
3958 (const_int 1)))]
3959 "TARGET_SSE4_1"
3960 "@
3961 insertps\t{$0xe, %2, %0|%0, %2, 0xe}
3962 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3963 movd\t{%2, %0|%0, %2}
3964 movss\t{%2, %0|%0, %2}
3965 pinsrd\t{$0, %2, %0|%0, %2, 0}
3966 #"
3967 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,*")
3968 (set_attr "prefix_extra" "*,*,*,*,1,*")
3969 (set_attr "length_immediate" "*,*,*,*,1,*")
3970 (set_attr "mode" "SF,<ssescalarmode>,SI,SF,TI,*")])
3971
3972 (define_insn "*vec_set<mode>_0_sse2"
3973 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x, x,x,m")
3974 (vec_merge:SSEMODE4S
3975 (vec_duplicate:SSEMODE4S
3976 (match_operand:<ssescalarmode> 2
3977 "general_operand" " m,*r,x,x*rfF"))
3978 (match_operand:SSEMODE4S 1 "vector_move_operand" " C, C,0,0")
3979 (const_int 1)))]
3980 "TARGET_SSE2"
3981 "@
3982 mov<ssescalarmodesuffix2s>\t{%2, %0|%0, %2}
3983 movd\t{%2, %0|%0, %2}
3984 movss\t{%2, %0|%0, %2}
3597 #" 3985 #"
3598 [(set_attr "type" "ssemov") 3986 [(set_attr "type" "ssemov")
3599 (set_attr "prefix" "vex") 3987 (set_attr "mode" "<ssescalarmode>,SI,SF,*")])
3600 (set_attr "mode" "SF")]) 3988
3601 3989 (define_insn "vec_set<mode>_0"
3602 (define_insn "vec_setv4sf_0" 3990 [(set (match_operand:SSEMODE4S 0 "nonimmediate_operand" "=x,x,m")
3603 [(set (match_operand:V4SF 0 "nonimmediate_operand" "=x,x,Y2,m") 3991 (vec_merge:SSEMODE4S
3604 (vec_merge:V4SF 3992 (vec_duplicate:SSEMODE4S
3605 (vec_duplicate:V4SF 3993 (match_operand:<ssescalarmode> 2
3606 (match_operand:SF 2 "general_operand" " x,m,*r,x*rfF")) 3994 "general_operand" " m,x,x*rfF"))
3607 (match_operand:V4SF 1 "vector_move_operand" " 0,C,C ,0") 3995 (match_operand:SSEMODE4S 1 "vector_move_operand" " C,0,0")
3608 (const_int 1)))] 3996 (const_int 1)))]
3609 "TARGET_SSE" 3997 "TARGET_SSE"
3610 "@ 3998 "@
3611 movss\t{%2, %0|%0, %2} 3999 movss\t{%2, %0|%0, %2}
3612 movss\t{%2, %0|%0, %2} 4000 movss\t{%2, %0|%0, %2}
3613 movd\t{%2, %0|%0, %2}
3614 #" 4001 #"
3615 [(set_attr "type" "ssemov") 4002 [(set_attr "type" "ssemov")
3616 (set_attr "mode" "SF")]) 4003 (set_attr "mode" "SF")])
3617 4004
3618 ;; A subset is vec_setv4sf. 4005 ;; A subset is vec_setv4sf.
3627 { 4014 {
3628 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); 4015 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3629 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4016 return "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3630 } 4017 }
3631 [(set_attr "type" "sselog") 4018 [(set_attr "type" "sselog")
4019 (set_attr "prefix_extra" "1")
4020 (set_attr "length_immediate" "1")
3632 (set_attr "prefix" "vex") 4021 (set_attr "prefix" "vex")
3633 (set_attr "mode" "V4SF")]) 4022 (set_attr "mode" "V4SF")])
3634 4023
3635 (define_insn "*vec_setv4sf_sse4_1" 4024 (define_insn "*vec_setv4sf_sse4_1"
3636 [(set (match_operand:V4SF 0 "register_operand" "=x") 4025 [(set (match_operand:V4SF 0 "register_operand" "=x")
3643 { 4032 {
3644 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4); 4033 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])) << 4);
3645 return "insertps\t{%3, %2, %0|%0, %2, %3}"; 4034 return "insertps\t{%3, %2, %0|%0, %2, %3}";
3646 } 4035 }
3647 [(set_attr "type" "sselog") 4036 [(set_attr "type" "sselog")
3648 (set_attr "prefix_extra" "1") 4037 (set_attr "prefix_data16" "1")
4038 (set_attr "prefix_extra" "1")
4039 (set_attr "length_immediate" "1")
3649 (set_attr "mode" "V4SF")]) 4040 (set_attr "mode" "V4SF")])
3650 4041
3651 (define_insn "*avx_insertps" 4042 (define_insn "*avx_insertps"
3652 [(set (match_operand:V4SF 0 "register_operand" "=x") 4043 [(set (match_operand:V4SF 0 "register_operand" "=x")
3653 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm") 4044 (unspec:V4SF [(match_operand:V4SF 2 "nonimmediate_operand" "xm")
3656 UNSPEC_INSERTPS))] 4047 UNSPEC_INSERTPS))]
3657 "TARGET_AVX" 4048 "TARGET_AVX"
3658 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4049 "vinsertps\t{%3, %2, %1, %0|%0, %1, %2, %3}";
3659 [(set_attr "type" "sselog") 4050 [(set_attr "type" "sselog")
3660 (set_attr "prefix" "vex") 4051 (set_attr "prefix" "vex")
4052 (set_attr "prefix_extra" "1")
4053 (set_attr "length_immediate" "1")
3661 (set_attr "mode" "V4SF")]) 4054 (set_attr "mode" "V4SF")])
3662 4055
3663 (define_insn "sse4_1_insertps" 4056 (define_insn "sse4_1_insertps"
3664 [(set (match_operand:V4SF 0 "register_operand" "=x") 4057 [(set (match_operand:V4SF 0 "register_operand" "=x")
3665 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x") 4058 (unspec:V4SF [(match_operand:V4SF 2 "register_operand" "x")
3667 (match_operand:SI 3 "const_0_to_255_operand" "n")] 4060 (match_operand:SI 3 "const_0_to_255_operand" "n")]
3668 UNSPEC_INSERTPS))] 4061 UNSPEC_INSERTPS))]
3669 "TARGET_SSE4_1" 4062 "TARGET_SSE4_1"
3670 "insertps\t{%3, %2, %0|%0, %2, %3}"; 4063 "insertps\t{%3, %2, %0|%0, %2, %3}";
3671 [(set_attr "type" "sselog") 4064 [(set_attr "type" "sselog")
3672 (set_attr "prefix_extra" "1") 4065 (set_attr "prefix_data16" "1")
4066 (set_attr "prefix_extra" "1")
4067 (set_attr "length_immediate" "1")
3673 (set_attr "mode" "V4SF")]) 4068 (set_attr "mode" "V4SF")])
3674 4069
3675 (define_split 4070 (define_split
3676 [(set (match_operand:V4SF 0 "memory_operand" "") 4071 [(set (match_operand:V4SF 0 "memory_operand" "")
3677 (vec_merge:V4SF 4072 (vec_merge:V4SF
3742 (match_operand:AVX256MODE4P 1 "register_operand" "x,x") 4137 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3743 (parallel [(const_int 0) (const_int 1)])))] 4138 (parallel [(const_int 0) (const_int 1)])))]
3744 "TARGET_AVX" 4139 "TARGET_AVX"
3745 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}" 4140 "vextractf128\t{$0x0, %1, %0|%0, %1, 0x0}"
3746 [(set_attr "type" "sselog") 4141 [(set_attr "type" "sselog")
4142 (set_attr "prefix_extra" "1")
4143 (set_attr "length_immediate" "1")
3747 (set_attr "memory" "none,store") 4144 (set_attr "memory" "none,store")
3748 (set_attr "prefix" "vex") 4145 (set_attr "prefix" "vex")
3749 (set_attr "mode" "V8SF")]) 4146 (set_attr "mode" "V8SF")])
3750 4147
3751 (define_insn "vec_extract_hi_<mode>" 4148 (define_insn "vec_extract_hi_<mode>"
3754 (match_operand:AVX256MODE4P 1 "register_operand" "x,x") 4151 (match_operand:AVX256MODE4P 1 "register_operand" "x,x")
3755 (parallel [(const_int 2) (const_int 3)])))] 4152 (parallel [(const_int 2) (const_int 3)])))]
3756 "TARGET_AVX" 4153 "TARGET_AVX"
3757 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" 4154 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3758 [(set_attr "type" "sselog") 4155 [(set_attr "type" "sselog")
4156 (set_attr "prefix_extra" "1")
4157 (set_attr "length_immediate" "1")
3759 (set_attr "memory" "none,store") 4158 (set_attr "memory" "none,store")
3760 (set_attr "prefix" "vex") 4159 (set_attr "prefix" "vex")
3761 (set_attr "mode" "V8SF")]) 4160 (set_attr "mode" "V8SF")])
3762 4161
3763 (define_insn "vec_extract_lo_<mode>" 4162 (define_insn "vec_extract_lo_<mode>"
3767 (parallel [(const_int 0) (const_int 1) 4166 (parallel [(const_int 0) (const_int 1)
3768 (const_int 2) (const_int 3)])))] 4167 (const_int 2) (const_int 3)])))]
3769 "TARGET_AVX" 4168 "TARGET_AVX"
3770 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" 4169 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3771 [(set_attr "type" "sselog") 4170 [(set_attr "type" "sselog")
4171 (set_attr "prefix_extra" "1")
4172 (set_attr "length_immediate" "1")
3772 (set_attr "memory" "none,store") 4173 (set_attr "memory" "none,store")
3773 (set_attr "prefix" "vex") 4174 (set_attr "prefix" "vex")
3774 (set_attr "mode" "V8SF")]) 4175 (set_attr "mode" "V8SF")])
3775 4176
3776 (define_insn "vec_extract_hi_<mode>" 4177 (define_insn "vec_extract_hi_<mode>"
3780 (parallel [(const_int 4) (const_int 5) 4181 (parallel [(const_int 4) (const_int 5)
3781 (const_int 6) (const_int 7)])))] 4182 (const_int 6) (const_int 7)])))]
3782 "TARGET_AVX" 4183 "TARGET_AVX"
3783 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" 4184 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3784 [(set_attr "type" "sselog") 4185 [(set_attr "type" "sselog")
4186 (set_attr "prefix_extra" "1")
4187 (set_attr "length_immediate" "1")
3785 (set_attr "memory" "none,store") 4188 (set_attr "memory" "none,store")
3786 (set_attr "prefix" "vex") 4189 (set_attr "prefix" "vex")
3787 (set_attr "mode" "V8SF")]) 4190 (set_attr "mode" "V8SF")])
3788 4191
3789 (define_insn "vec_extract_lo_v16hi" 4192 (define_insn "vec_extract_lo_v16hi"
3795 (const_int 4) (const_int 5) 4198 (const_int 4) (const_int 5)
3796 (const_int 6) (const_int 7)])))] 4199 (const_int 6) (const_int 7)])))]
3797 "TARGET_AVX" 4200 "TARGET_AVX"
3798 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" 4201 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3799 [(set_attr "type" "sselog") 4202 [(set_attr "type" "sselog")
4203 (set_attr "prefix_extra" "1")
4204 (set_attr "length_immediate" "1")
3800 (set_attr "memory" "none,store") 4205 (set_attr "memory" "none,store")
3801 (set_attr "prefix" "vex") 4206 (set_attr "prefix" "vex")
3802 (set_attr "mode" "V8SF")]) 4207 (set_attr "mode" "V8SF")])
3803 4208
3804 (define_insn "vec_extract_hi_v16hi" 4209 (define_insn "vec_extract_hi_v16hi"
3810 (const_int 12) (const_int 13) 4215 (const_int 12) (const_int 13)
3811 (const_int 14) (const_int 15)])))] 4216 (const_int 14) (const_int 15)])))]
3812 "TARGET_AVX" 4217 "TARGET_AVX"
3813 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" 4218 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3814 [(set_attr "type" "sselog") 4219 [(set_attr "type" "sselog")
4220 (set_attr "prefix_extra" "1")
4221 (set_attr "length_immediate" "1")
3815 (set_attr "memory" "none,store") 4222 (set_attr "memory" "none,store")
3816 (set_attr "prefix" "vex") 4223 (set_attr "prefix" "vex")
3817 (set_attr "mode" "V8SF")]) 4224 (set_attr "mode" "V8SF")])
3818 4225
3819 (define_insn "vec_extract_lo_v32qi" 4226 (define_insn "vec_extract_lo_v32qi"
3829 (const_int 12) (const_int 13) 4236 (const_int 12) (const_int 13)
3830 (const_int 14) (const_int 15)])))] 4237 (const_int 14) (const_int 15)])))]
3831 "TARGET_AVX" 4238 "TARGET_AVX"
3832 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" 4239 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3833 [(set_attr "type" "sselog") 4240 [(set_attr "type" "sselog")
4241 (set_attr "prefix_extra" "1")
4242 (set_attr "length_immediate" "1")
3834 (set_attr "memory" "none,store") 4243 (set_attr "memory" "none,store")
3835 (set_attr "prefix" "vex") 4244 (set_attr "prefix" "vex")
3836 (set_attr "mode" "V8SF")]) 4245 (set_attr "mode" "V8SF")])
3837 4246
3838 (define_insn "vec_extract_hi_v32qi" 4247 (define_insn "vec_extract_hi_v32qi"
3848 (const_int 28) (const_int 29) 4257 (const_int 28) (const_int 29)
3849 (const_int 30) (const_int 31)])))] 4258 (const_int 30) (const_int 31)])))]
3850 "TARGET_AVX" 4259 "TARGET_AVX"
3851 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}" 4260 "vextractf128\t{$0x1, %1, %0|%0, %1, 0x1}"
3852 [(set_attr "type" "sselog") 4261 [(set_attr "type" "sselog")
4262 (set_attr "prefix_extra" "1")
4263 (set_attr "length_immediate" "1")
3853 (set_attr "memory" "none,store") 4264 (set_attr "memory" "none,store")
3854 (set_attr "prefix" "vex") 4265 (set_attr "prefix" "vex")
3855 (set_attr "mode" "V8SF")]) 4266 (set_attr "mode" "V8SF")])
3856 4267
3857 (define_insn "*sse4_1_extractps" 4268 (define_insn "*sse4_1_extractps"
3860 (match_operand:V4SF 1 "register_operand" "x") 4271 (match_operand:V4SF 1 "register_operand" "x")
3861 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] 4272 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
3862 "TARGET_SSE4_1" 4273 "TARGET_SSE4_1"
3863 "%vextractps\t{%2, %1, %0|%0, %1, %2}" 4274 "%vextractps\t{%2, %1, %0|%0, %1, %2}"
3864 [(set_attr "type" "sselog") 4275 [(set_attr "type" "sselog")
3865 (set_attr "prefix_extra" "1") 4276 (set_attr "prefix_data16" "1")
4277 (set_attr "prefix_extra" "1")
4278 (set_attr "length_immediate" "1")
3866 (set_attr "prefix" "maybe_vex") 4279 (set_attr "prefix" "maybe_vex")
3867 (set_attr "mode" "V4SF")]) 4280 (set_attr "mode" "V4SF")])
3868 4281
3869 (define_insn_and_split "*vec_extract_v4sf_mem" 4282 (define_insn_and_split "*vec_extract_v4sf_mem"
3870 [(set (match_operand:SF 0 "register_operand" "=x*rf") 4283 [(set (match_operand:SF 0 "register_operand" "=x*rf")
3897 ;; 4310 ;;
3898 ;; Parallel double-precision floating point element swizzling 4311 ;; Parallel double-precision floating point element swizzling
3899 ;; 4312 ;;
3900 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 4313 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
3901 4314
4315 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3902 (define_insn "avx_unpckhpd256" 4316 (define_insn "avx_unpckhpd256"
3903 [(set (match_operand:V4DF 0 "register_operand" "=x") 4317 [(set (match_operand:V4DF 0 "register_operand" "=x")
3904 (vec_select:V4DF 4318 (vec_select:V4DF
3905 (vec_concat:V8DF 4319 (vec_concat:V8DF
3906 (match_operand:V4DF 1 "register_operand" "x") 4320 (match_operand:V4DF 1 "register_operand" "x")
3911 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}" 4325 "vunpckhpd\t{%2, %1, %0|%0, %1, %2}"
3912 [(set_attr "type" "sselog") 4326 [(set_attr "type" "sselog")
3913 (set_attr "prefix" "vex") 4327 (set_attr "prefix" "vex")
3914 (set_attr "mode" "V4DF")]) 4328 (set_attr "mode" "V4DF")])
3915 4329
3916 (define_expand "sse2_unpckhpd_exp" 4330 (define_expand "vec_interleave_highv2df"
3917 [(set (match_operand:V2DF 0 "nonimmediate_operand" "") 4331 [(set (match_operand:V2DF 0 "register_operand" "")
3918 (vec_select:V2DF 4332 (vec_select:V2DF
3919 (vec_concat:V4DF 4333 (vec_concat:V4DF
3920 (match_operand:V2DF 1 "nonimmediate_operand" "") 4334 (match_operand:V2DF 1 "nonimmediate_operand" "")
3921 (match_operand:V2DF 2 "nonimmediate_operand" "")) 4335 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3922 (parallel [(const_int 1) 4336 (parallel [(const_int 1)
3923 (const_int 3)])))] 4337 (const_int 3)])))]
3924 "TARGET_SSE2" 4338 "TARGET_SSE2"
3925 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") 4339 {
3926 4340 if (!ix86_vec_interleave_v2df_operator_ok (operands, 1))
3927 (define_insn "*avx_unpckhpd" 4341 operands[2] = force_reg (V2DFmode, operands[2]);
3928 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 4342 })
4343
4344 (define_insn "*avx_interleave_highv2df"
4345 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
3929 (vec_select:V2DF 4346 (vec_select:V2DF
3930 (vec_concat:V4DF 4347 (vec_concat:V4DF
3931 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,x") 4348 (match_operand:V2DF 1 "nonimmediate_operand" " x,o,o,x")
3932 (match_operand:V2DF 2 "nonimmediate_operand" " x,x,0")) 4349 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,x,0"))
3933 (parallel [(const_int 1) 4350 (parallel [(const_int 1)
3934 (const_int 3)])))] 4351 (const_int 3)])))]
3935 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 4352 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3936 "@ 4353 "@
3937 vunpckhpd\t{%2, %1, %0|%0, %1, %2} 4354 vunpckhpd\t{%2, %1, %0|%0, %1, %2}
4355 vmovddup\t{%H1, %0|%0, %H1}
3938 vmovlpd\t{%H1, %2, %0|%0, %2, %H1} 4356 vmovlpd\t{%H1, %2, %0|%0, %2, %H1}
3939 vmovhpd\t{%1, %0|%0, %1}" 4357 vmovhpd\t{%1, %0|%0, %1}"
3940 [(set_attr "type" "sselog,ssemov,ssemov") 4358 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
3941 (set_attr "prefix" "vex") 4359 (set_attr "prefix" "vex")
3942 (set_attr "mode" "V2DF,V1DF,V1DF")]) 4360 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
3943 4361
3944 (define_insn "sse2_unpckhpd" 4362 (define_insn "*sse3_interleave_highv2df"
4363 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,m")
4364 (vec_select:V2DF
4365 (vec_concat:V4DF
4366 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,o,x")
4367 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,0,0"))
4368 (parallel [(const_int 1)
4369 (const_int 3)])))]
4370 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
4371 "@
4372 unpckhpd\t{%2, %0|%0, %2}
4373 movddup\t{%H1, %0|%0, %H1}
4374 movlpd\t{%H1, %0|%0, %H1}
4375 movhpd\t{%1, %0|%0, %1}"
4376 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4377 (set_attr "prefix_data16" "*,*,1,1")
4378 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4379
4380 (define_insn "*sse2_interleave_highv2df"
3945 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m") 4381 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,m")
3946 (vec_select:V2DF 4382 (vec_select:V2DF
3947 (vec_concat:V4DF 4383 (vec_concat:V4DF
3948 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x") 4384 (match_operand:V2DF 1 "nonimmediate_operand" " 0,o,x")
3949 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0")) 4385 (match_operand:V2DF 2 "nonimmediate_operand" " x,0,0"))
3950 (parallel [(const_int 1) 4386 (parallel [(const_int 1)
3951 (const_int 3)])))] 4387 (const_int 3)])))]
3952 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))" 4388 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 1)"
3953 "@ 4389 "@
3954 unpckhpd\t{%2, %0|%0, %2} 4390 unpckhpd\t{%2, %0|%0, %2}
3955 movlpd\t{%H1, %0|%0, %H1} 4391 movlpd\t{%H1, %0|%0, %H1}
3956 movhpd\t{%1, %0|%0, %1}" 4392 movhpd\t{%1, %0|%0, %1}"
3957 [(set_attr "type" "sselog,ssemov,ssemov") 4393 [(set_attr "type" "sselog,ssemov,ssemov")
4394 (set_attr "prefix_data16" "*,1,1")
3958 (set_attr "mode" "V2DF,V1DF,V1DF")]) 4395 (set_attr "mode" "V2DF,V1DF,V1DF")])
3959 4396
3960 (define_insn "avx_movddup256" 4397 ;; Recall that the 256-bit unpck insns only shuffle within their lanes.
3961 [(set (match_operand:V4DF 0 "register_operand" "=x") 4398 (define_expand "avx_movddup256"
4399 [(set (match_operand:V4DF 0 "register_operand" "")
3962 (vec_select:V4DF 4400 (vec_select:V4DF
3963 (vec_concat:V8DF 4401 (vec_concat:V8DF
3964 (match_operand:V4DF 1 "nonimmediate_operand" "xm") 4402 (match_operand:V4DF 1 "nonimmediate_operand" "")
3965 (match_dup 1)) 4403 (match_dup 1))
3966 (parallel [(const_int 0) (const_int 2) 4404 (parallel [(const_int 0) (const_int 4)
3967 (const_int 4) (const_int 6)])))] 4405 (const_int 2) (const_int 6)])))]
3968 "TARGET_AVX" 4406 "TARGET_AVX"
3969 "vmovddup\t{%1, %0|%0, %1}" 4407 "")
3970 [(set_attr "type" "sselog1") 4408
4409 (define_expand "avx_unpcklpd256"
4410 [(set (match_operand:V4DF 0 "register_operand" "")
4411 (vec_select:V4DF
4412 (vec_concat:V8DF
4413 (match_operand:V4DF 1 "register_operand" "")
4414 (match_operand:V4DF 2 "nonimmediate_operand" ""))
4415 (parallel [(const_int 0) (const_int 4)
4416 (const_int 2) (const_int 6)])))]
4417 "TARGET_AVX"
4418 "")
4419
4420 (define_insn "*avx_unpcklpd256"
4421 [(set (match_operand:V4DF 0 "register_operand" "=x,x")
4422 (vec_select:V4DF
4423 (vec_concat:V8DF
4424 (match_operand:V4DF 1 "nonimmediate_operand" "xm,x")
4425 (match_operand:V4DF 2 "nonimmediate_operand" " 1,xm"))
4426 (parallel [(const_int 0) (const_int 4)
4427 (const_int 2) (const_int 6)])))]
4428 "TARGET_AVX
4429 && (!MEM_P (operands[1]) || rtx_equal_p (operands[1], operands[2]))"
4430 "@
4431 vmovddup\t{%1, %0|%0, %1}
4432 vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4433 [(set_attr "type" "sselog")
3971 (set_attr "prefix" "vex") 4434 (set_attr "prefix" "vex")
3972 (set_attr "mode" "V4DF")]) 4435 (set_attr "mode" "V4DF")])
3973 4436
3974 (define_insn "*avx_movddup" 4437 (define_expand "vec_interleave_lowv2df"
3975 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o") 4438 [(set (match_operand:V2DF 0 "register_operand" "")
3976 (vec_select:V2DF 4439 (vec_select:V2DF
3977 (vec_concat:V4DF 4440 (vec_concat:V4DF
3978 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") 4441 (match_operand:V2DF 1 "nonimmediate_operand" "")
3979 (match_dup 1)) 4442 (match_operand:V2DF 2 "nonimmediate_operand" ""))
3980 (parallel [(const_int 0) 4443 (parallel [(const_int 0)
3981 (const_int 2)])))] 4444 (const_int 2)])))]
3982 "TARGET_AVX && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4445 "TARGET_SSE2"
3983 "@ 4446 {
3984 vmovddup\t{%1, %0|%0, %1} 4447 if (!ix86_vec_interleave_v2df_operator_ok (operands, 0))
3985 #" 4448 operands[1] = force_reg (V2DFmode, operands[1]);
3986 [(set_attr "type" "sselog1,ssemov") 4449 })
3987 (set_attr "prefix" "vex") 4450
3988 (set_attr "mode" "V2DF")]) 4451 (define_insn "*avx_interleave_lowv2df"
3989 4452 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
3990 (define_insn "*sse3_movddup"
3991 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,o")
3992 (vec_select:V2DF 4453 (vec_select:V2DF
3993 (vec_concat:V4DF 4454 (vec_concat:V4DF
3994 (match_operand:V2DF 1 "nonimmediate_operand" "xm,x") 4455 (match_operand:V2DF 1 "nonimmediate_operand" " x,m,x,0")
3995 (match_dup 1)) 4456 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
3996 (parallel [(const_int 0) 4457 (parallel [(const_int 0)
3997 (const_int 2)])))] 4458 (const_int 2)])))]
3998 "TARGET_SSE3 && !(MEM_P (operands[0]) && MEM_P (operands[1]))" 4459 "TARGET_AVX && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
3999 "@ 4460 "@
4461 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4462 vmovddup\t{%1, %0|%0, %1}
4463 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4464 vmovlpd\t{%2, %H0|%H0, %2}"
4465 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4466 (set_attr "prefix" "vex")
4467 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4468
4469 (define_insn "*sse3_interleave_lowv2df"
4470 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,x,o")
4471 (vec_select:V2DF
4472 (vec_concat:V4DF
4473 (match_operand:V2DF 1 "nonimmediate_operand" " 0,m,0,0")
4474 (match_operand:V2DF 2 "nonimmediate_operand" " x,1,m,x"))
4475 (parallel [(const_int 0)
4476 (const_int 2)])))]
4477 "TARGET_SSE3 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4478 "@
4479 unpcklpd\t{%2, %0|%0, %2}
4000 movddup\t{%1, %0|%0, %1} 4480 movddup\t{%1, %0|%0, %1}
4001 #" 4481 movhpd\t{%2, %0|%0, %2}
4002 [(set_attr "type" "sselog1,ssemov") 4482 movlpd\t{%2, %H0|%H0, %2}"
4003 (set_attr "mode" "V2DF")]) 4483 [(set_attr "type" "sselog,sselog,ssemov,ssemov")
4484 (set_attr "prefix_data16" "*,*,1,1")
4485 (set_attr "mode" "V2DF,V2DF,V1DF,V1DF")])
4486
4487 (define_insn "*sse2_interleave_lowv2df"
4488 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4489 (vec_select:V2DF
4490 (vec_concat:V4DF
4491 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4492 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4493 (parallel [(const_int 0)
4494 (const_int 2)])))]
4495 "TARGET_SSE2 && ix86_vec_interleave_v2df_operator_ok (operands, 0)"
4496 "@
4497 unpcklpd\t{%2, %0|%0, %2}
4498 movhpd\t{%2, %0|%0, %2}
4499 movlpd\t{%2, %H0|%H0, %2}"
4500 [(set_attr "type" "sselog,ssemov,ssemov")
4501 (set_attr "prefix_data16" "*,1,1")
4502 (set_attr "mode" "V2DF,V1DF,V1DF")])
4004 4503
4005 (define_split 4504 (define_split
4006 [(set (match_operand:V2DF 0 "memory_operand" "") 4505 [(set (match_operand:V2DF 0 "memory_operand" "")
4007 (vec_select:V2DF 4506 (vec_select:V2DF
4008 (vec_concat:V4DF 4507 (vec_concat:V4DF
4017 emit_move_insn (adjust_address (operands[0], DFmode, 0), low); 4516 emit_move_insn (adjust_address (operands[0], DFmode, 0), low);
4018 emit_move_insn (adjust_address (operands[0], DFmode, 8), low); 4517 emit_move_insn (adjust_address (operands[0], DFmode, 8), low);
4019 DONE; 4518 DONE;
4020 }) 4519 })
4021 4520
4022 (define_insn "avx_unpcklpd256" 4521 (define_split
4023 [(set (match_operand:V4DF 0 "register_operand" "=x") 4522 [(set (match_operand:V2DF 0 "register_operand" "")
4024 (vec_select:V4DF
4025 (vec_concat:V8DF
4026 (match_operand:V4DF 1 "register_operand" "x")
4027 (match_operand:V4DF 2 "nonimmediate_operand" "xm"))
4028 (parallel [(const_int 0) (const_int 4)
4029 (const_int 2) (const_int 6)])))]
4030 "TARGET_AVX"
4031 "vunpcklpd\t{%2, %1, %0|%0, %1, %2}"
4032 [(set_attr "type" "sselog")
4033 (set_attr "prefix" "vex")
4034 (set_attr "mode" "V4DF")])
4035
4036 (define_expand "sse2_unpcklpd_exp"
4037 [(set (match_operand:V2DF 0 "nonimmediate_operand" "")
4038 (vec_select:V2DF 4523 (vec_select:V2DF
4039 (vec_concat:V4DF 4524 (vec_concat:V4DF
4040 (match_operand:V2DF 1 "nonimmediate_operand" "") 4525 (match_operand:V2DF 1 "memory_operand" "")
4041 (match_operand:V2DF 2 "nonimmediate_operand" "")) 4526 (match_dup 1))
4042 (parallel [(const_int 0) 4527 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "")
4043 (const_int 2)])))] 4528 (match_operand:SI 3 "const_int_operand" "")])))]
4044 "TARGET_SSE2" 4529 "TARGET_SSE3 && INTVAL (operands[2]) + 2 == INTVAL (operands[3])"
4045 "ix86_fixup_binary_operands (UNKNOWN, V2DFmode, operands);") 4530 [(set (match_dup 0) (vec_duplicate:V2DF (match_dup 1)))]
4046 4531 {
4047 (define_insn "*avx_unpcklpd" 4532 operands[1] = adjust_address (operands[1], DFmode, INTVAL (operands[2]) * 8);
4048 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o") 4533 })
4049 (vec_select:V2DF
4050 (vec_concat:V4DF
4051 (match_operand:V2DF 1 "nonimmediate_operand" " x,x,0")
4052 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4053 (parallel [(const_int 0)
4054 (const_int 2)])))]
4055 "TARGET_AVX && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4056 "@
4057 vunpcklpd\t{%2, %1, %0|%0, %1, %2}
4058 vmovhpd\t{%2, %1, %0|%0, %1, %2}
4059 vmovlpd\t{%2, %H0|%H0, %2}"
4060 [(set_attr "type" "sselog,ssemov,ssemov")
4061 (set_attr "prefix" "vex")
4062 (set_attr "mode" "V2DF,V1DF,V1DF")])
4063
4064 (define_insn "sse2_unpcklpd"
4065 [(set (match_operand:V2DF 0 "nonimmediate_operand" "=x,x,o")
4066 (vec_select:V2DF
4067 (vec_concat:V4DF
4068 (match_operand:V2DF 1 "nonimmediate_operand" " 0,0,0")
4069 (match_operand:V2DF 2 "nonimmediate_operand" " x,m,x"))
4070 (parallel [(const_int 0)
4071 (const_int 2)])))]
4072 "TARGET_SSE2 && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
4073 "@
4074 unpcklpd\t{%2, %0|%0, %2}
4075 movhpd\t{%2, %0|%0, %2}
4076 movlpd\t{%2, %H0|%H0, %2}"
4077 [(set_attr "type" "sselog,ssemov,ssemov")
4078 (set_attr "mode" "V2DF,V1DF,V1DF")])
4079 4534
4080 (define_expand "avx_shufpd256" 4535 (define_expand "avx_shufpd256"
4081 [(match_operand:V4DF 0 "register_operand" "") 4536 [(match_operand:V4DF 0 "register_operand" "")
4082 (match_operand:V4DF 1 "register_operand" "") 4537 (match_operand:V4DF 1 "register_operand" "")
4083 (match_operand:V4DF 2 "nonimmediate_operand" "") 4538 (match_operand:V4DF 2 "nonimmediate_operand" "")
4113 operands[3] = GEN_INT (mask); 4568 operands[3] = GEN_INT (mask);
4114 4569
4115 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4570 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4116 } 4571 }
4117 [(set_attr "type" "sselog") 4572 [(set_attr "type" "sselog")
4573 (set_attr "length_immediate" "1")
4118 (set_attr "prefix" "vex") 4574 (set_attr "prefix" "vex")
4119 (set_attr "mode" "V4DF")]) 4575 (set_attr "mode" "V4DF")])
4120 4576
4121 (define_expand "sse2_shufpd" 4577 (define_expand "sse2_shufpd"
4122 [(match_operand:V2DF 0 "register_operand" "") 4578 [(match_operand:V2DF 0 "register_operand" "")
4131 GEN_INT (mask & 2 ? 3 : 2))); 4587 GEN_INT (mask & 2 ? 3 : 2)));
4132 DONE; 4588 DONE;
4133 }) 4589 })
4134 4590
4135 (define_expand "vec_extract_even<mode>" 4591 (define_expand "vec_extract_even<mode>"
4136 [(set (match_operand:SSEMODE4S 0 "register_operand" "") 4592 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4137 (vec_select:SSEMODE4S 4593 (match_operand:SSEMODE_EO 1 "register_operand" "")
4138 (vec_concat:<ssedoublesizemode> 4594 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4139 (match_operand:SSEMODE4S 1 "register_operand" "") 4595 ""
4140 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) 4596 {
4141 (parallel [(const_int 0) 4597 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 0);
4142 (const_int 2) 4598 DONE;
4143 (const_int 4) 4599 })
4144 (const_int 6)])))]
4145 "TARGET_SSE")
4146 4600
4147 (define_expand "vec_extract_odd<mode>" 4601 (define_expand "vec_extract_odd<mode>"
4148 [(set (match_operand:SSEMODE4S 0 "register_operand" "") 4602 [(match_operand:SSEMODE_EO 0 "register_operand" "")
4149 (vec_select:SSEMODE4S 4603 (match_operand:SSEMODE_EO 1 "register_operand" "")
4150 (vec_concat:<ssedoublesizemode> 4604 (match_operand:SSEMODE_EO 2 "register_operand" "")]
4151 (match_operand:SSEMODE4S 1 "register_operand" "") 4605 ""
4152 (match_operand:SSEMODE4S 2 "nonimmediate_operand" "")) 4606 {
4153 (parallel [(const_int 1) 4607 ix86_expand_vec_extract_even_odd (operands[0], operands[1], operands[2], 1);
4154 (const_int 3) 4608 DONE;
4155 (const_int 5) 4609 })
4156 (const_int 7)])))]
4157 "TARGET_SSE")
4158
4159 (define_expand "vec_extract_even<mode>"
4160 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4161 (vec_select:SSEMODE2D
4162 (vec_concat:<ssedoublesizemode>
4163 (match_operand:SSEMODE2D 1 "register_operand" "")
4164 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4165 (parallel [(const_int 0)
4166 (const_int 2)])))]
4167 "TARGET_SSE2")
4168
4169 (define_expand "vec_extract_odd<mode>"
4170 [(set (match_operand:SSEMODE2D 0 "register_operand" "")
4171 (vec_select:SSEMODE2D
4172 (vec_concat:<ssedoublesizemode>
4173 (match_operand:SSEMODE2D 1 "register_operand" "")
4174 (match_operand:SSEMODE2D 2 "nonimmediate_operand" ""))
4175 (parallel [(const_int 1)
4176 (const_int 3)])))]
4177 "TARGET_SSE2")
4178 4610
4179 ;; punpcklqdq and punpckhqdq are shorter than shufpd. 4611 ;; punpcklqdq and punpckhqdq are shorter than shufpd.
4180 (define_insn "*avx_punpckhqdq" 4612 (define_insn "*avx_interleave_highv2di"
4181 [(set (match_operand:V2DI 0 "register_operand" "=x") 4613 [(set (match_operand:V2DI 0 "register_operand" "=x")
4182 (vec_select:V2DI 4614 (vec_select:V2DI
4183 (vec_concat:V4DI 4615 (vec_concat:V4DI
4184 (match_operand:V2DI 1 "register_operand" "x") 4616 (match_operand:V2DI 1 "register_operand" "x")
4185 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 4617 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4189 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}" 4621 "vpunpckhqdq\t{%2, %1, %0|%0, %1, %2}"
4190 [(set_attr "type" "sselog") 4622 [(set_attr "type" "sselog")
4191 (set_attr "prefix" "vex") 4623 (set_attr "prefix" "vex")
4192 (set_attr "mode" "TI")]) 4624 (set_attr "mode" "TI")])
4193 4625
4194 (define_insn "sse2_punpckhqdq" 4626 (define_insn "vec_interleave_highv2di"
4195 [(set (match_operand:V2DI 0 "register_operand" "=x") 4627 [(set (match_operand:V2DI 0 "register_operand" "=x")
4196 (vec_select:V2DI 4628 (vec_select:V2DI
4197 (vec_concat:V4DI 4629 (vec_concat:V4DI
4198 (match_operand:V2DI 1 "register_operand" "0") 4630 (match_operand:V2DI 1 "register_operand" "0")
4199 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 4631 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4203 "punpckhqdq\t{%2, %0|%0, %2}" 4635 "punpckhqdq\t{%2, %0|%0, %2}"
4204 [(set_attr "type" "sselog") 4636 [(set_attr "type" "sselog")
4205 (set_attr "prefix_data16" "1") 4637 (set_attr "prefix_data16" "1")
4206 (set_attr "mode" "TI")]) 4638 (set_attr "mode" "TI")])
4207 4639
4208 (define_insn "*avx_punpcklqdq" 4640 (define_insn "*avx_interleave_lowv2di"
4209 [(set (match_operand:V2DI 0 "register_operand" "=x") 4641 [(set (match_operand:V2DI 0 "register_operand" "=x")
4210 (vec_select:V2DI 4642 (vec_select:V2DI
4211 (vec_concat:V4DI 4643 (vec_concat:V4DI
4212 (match_operand:V2DI 1 "register_operand" "x") 4644 (match_operand:V2DI 1 "register_operand" "x")
4213 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 4645 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4217 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}" 4649 "vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}"
4218 [(set_attr "type" "sselog") 4650 [(set_attr "type" "sselog")
4219 (set_attr "prefix" "vex") 4651 (set_attr "prefix" "vex")
4220 (set_attr "mode" "TI")]) 4652 (set_attr "mode" "TI")])
4221 4653
4222 (define_insn "sse2_punpcklqdq" 4654 (define_insn "vec_interleave_lowv2di"
4223 [(set (match_operand:V2DI 0 "register_operand" "=x") 4655 [(set (match_operand:V2DI 0 "register_operand" "=x")
4224 (vec_select:V2DI 4656 (vec_select:V2DI
4225 (vec_concat:V4DI 4657 (vec_concat:V4DI
4226 (match_operand:V2DI 1 "register_operand" "0") 4658 (match_operand:V2DI 1 "register_operand" "0")
4227 (match_operand:V2DI 2 "nonimmediate_operand" "xm")) 4659 (match_operand:V2DI 2 "nonimmediate_operand" "xm"))
4249 operands[3] = GEN_INT (mask); 4681 operands[3] = GEN_INT (mask);
4250 4682
4251 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 4683 return "vshufpd\t{%3, %2, %1, %0|%0, %1, %2, %3}";
4252 } 4684 }
4253 [(set_attr "type" "sselog") 4685 [(set_attr "type" "sselog")
4686 (set_attr "length_immediate" "1")
4254 (set_attr "prefix" "vex") 4687 (set_attr "prefix" "vex")
4255 (set_attr "mode" "V2DF")]) 4688 (set_attr "mode" "V2DF")])
4256 4689
4257 (define_insn "sse2_shufpd_<mode>" 4690 (define_insn "sse2_shufpd_<mode>"
4258 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x") 4691 [(set (match_operand:SSEMODE2D 0 "register_operand" "=x")
4270 operands[3] = GEN_INT (mask); 4703 operands[3] = GEN_INT (mask);
4271 4704
4272 return "shufpd\t{%3, %2, %0|%0, %2, %3}"; 4705 return "shufpd\t{%3, %2, %0|%0, %2, %3}";
4273 } 4706 }
4274 [(set_attr "type" "sselog") 4707 [(set_attr "type" "sselog")
4708 (set_attr "length_immediate" "1")
4275 (set_attr "mode" "V2DF")]) 4709 (set_attr "mode" "V2DF")])
4276 4710
4277 ;; Avoid combining registers from different units in a single alternative, 4711 ;; Avoid combining registers from different units in a single alternative,
4278 ;; see comment above inline_secondary_memory_needed function in i386.c 4712 ;; see comment above inline_secondary_memory_needed function in i386.c
4279 (define_insn "*avx_storehpd" 4713 (define_insn "*avx_storehpd"
4303 unpckhpd\t%0, %0 4737 unpckhpd\t%0, %0
4304 # 4738 #
4305 # 4739 #
4306 #" 4740 #"
4307 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov") 4741 [(set_attr "type" "ssemov,sselog1,ssemov,fmov,imov")
4742 (set_attr "prefix_data16" "1,*,*,*,*")
4308 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")]) 4743 (set_attr "mode" "V1DF,V2DF,DF,DF,DF")])
4309 4744
4310 (define_split 4745 (define_split
4311 [(set (match_operand:DF 0 "register_operand" "") 4746 [(set (match_operand:DF 0 "register_operand" "")
4312 (vec_select:DF 4747 (vec_select:DF
4331 # 4766 #
4332 # 4767 #
4333 # 4768 #
4334 #" 4769 #"
4335 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov") 4770 [(set_attr "type" "ssemov,ssemov,ssemov,fmov,imov")
4771 (set_attr "prefix_data16" "1,*,*,*,*")
4336 (set_attr "prefix" "maybe_vex") 4772 (set_attr "prefix" "maybe_vex")
4337 (set_attr "mode" "V1DF,DF,DF,DF,DF")]) 4773 (set_attr "mode" "V1DF,DF,DF,DF,DF")])
4338 4774
4339 (define_split 4775 (define_split
4340 [(set (match_operand:DF 0 "register_operand" "") 4776 [(set (match_operand:DF 0 "register_operand" "")
4397 shufpd\t{$1, %1, %0|%0, %1, 1} 4833 shufpd\t{$1, %1, %0|%0, %1, 1}
4398 # 4834 #
4399 # 4835 #
4400 #" 4836 #"
4401 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov") 4837 [(set_attr "type" "ssemov,sselog,sselog,ssemov,fmov,imov")
4838 (set_attr "prefix_data16" "1,*,*,*,*,*")
4839 (set_attr "length_immediate" "*,*,1,*,*,*")
4402 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")]) 4840 (set_attr "mode" "V1DF,V2DF,V2DF,DF,DF,DF")])
4403 4841
4404 (define_split 4842 (define_split
4405 [(set (match_operand:V2DF 0 "memory_operand" "") 4843 [(set (match_operand:V2DF 0 "memory_operand" "")
4406 (vec_concat:V2DF 4844 (vec_concat:V2DF
4460 movhpd\t{%H1, %0|%0, %H1} 4898 movhpd\t{%H1, %0|%0, %H1}
4461 # 4899 #
4462 # 4900 #
4463 #" 4901 #"
4464 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov") 4902 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov,fmov,imov")
4903 (set_attr "prefix_data16" "*,1,*,*,1,*,*,*")
4904 (set_attr "length_immediate" "*,*,*,1,*,*,*,*")
4465 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")]) 4905 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,DF,DF,DF")])
4466 4906
4467 (define_split 4907 (define_split
4468 [(set (match_operand:V2DF 0 "memory_operand" "") 4908 [(set (match_operand:V2DF 0 "memory_operand" "")
4469 (vec_concat:V2DF 4909 (vec_concat:V2DF
4535 movlpd\t{%2, %0|%0, %2} 4975 movlpd\t{%2, %0|%0, %2}
4536 shufpd\t{$2, %2, %0|%0, %2, 2} 4976 shufpd\t{$2, %2, %0|%0, %2, 2}
4537 movhps\t{%H1, %0|%0, %H1} 4977 movhps\t{%H1, %0|%0, %H1}
4538 movhps\t{%1, %H0|%H0, %1}" 4978 movhps\t{%1, %H0|%H0, %1}"
4539 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") 4979 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
4980 (set_attr "prefix_data16" "*,1,1,*,*,*")
4981 (set_attr "length_immediate" "*,*,*,1,*,*")
4540 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")]) 4982 (set_attr "mode" "DF,V1DF,V1DF,V2DF,V1DF,V1DF")])
4541 4983
4542 (define_insn "*vec_dupv2df_sse3" 4984 (define_insn "*vec_dupv2df_sse3"
4543 [(set (match_operand:V2DF 0 "register_operand" "=x") 4985 [(set (match_operand:V2DF 0 "register_operand" "=x")
4544 (vec_duplicate:V2DF 4986 (vec_duplicate:V2DF
4594 movhpd\t{%2, %0|%0, %2} 5036 movhpd\t{%2, %0|%0, %2}
4595 movsd\t{%1, %0|%0, %1} 5037 movsd\t{%1, %0|%0, %1}
4596 movlhps\t{%2, %0|%0, %2} 5038 movlhps\t{%2, %0|%0, %2}
4597 movhps\t{%2, %0|%0, %2}" 5039 movhps\t{%2, %0|%0, %2}"
4598 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov") 5040 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,ssemov")
5041 (set_attr "prefix_data16" "*,1,*,*,*")
4599 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")]) 5042 (set_attr "mode" "V2DF,V1DF,DF,V4SF,V2SF")])
4600 5043
4601 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 5044 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
4602 ;; 5045 ;;
4603 ;; Parallel integral arithmetic 5046 ;; Parallel integral arithmetic
4675 (define_insn_and_split "mulv16qi3" 5118 (define_insn_and_split "mulv16qi3"
4676 [(set (match_operand:V16QI 0 "register_operand" "") 5119 [(set (match_operand:V16QI 0 "register_operand" "")
4677 (mult:V16QI (match_operand:V16QI 1 "register_operand" "") 5120 (mult:V16QI (match_operand:V16QI 1 "register_operand" "")
4678 (match_operand:V16QI 2 "register_operand" "")))] 5121 (match_operand:V16QI 2 "register_operand" "")))]
4679 "TARGET_SSE2 5122 "TARGET_SSE2
4680 && !(reload_completed || reload_in_progress)" 5123 && can_create_pseudo_p ()"
4681 "#" 5124 "#"
4682 "&& 1" 5125 "&& 1"
4683 [(const_int 0)] 5126 [(const_int 0)]
4684 { 5127 {
4685 rtx t[12], op0, op[3]; 5128 rtx t[6];
4686 int i; 5129 int i;
4687 5130
4688 if (TARGET_SSE5) 5131 for (i = 0; i < 6; ++i)
4689 {
4690 /* On SSE5, we can take advantage of the pperm instruction to pack and
4691 unpack the bytes. Unpack data such that we've got a source byte in
4692 each low byte of each word. We don't care what goes into the high
4693 byte, so put 0 there. */
4694 for (i = 0; i < 6; ++i)
4695 t[i] = gen_reg_rtx (V8HImode);
4696
4697 for (i = 0; i < 2; i++)
4698 {
4699 op[0] = t[i];
4700 op[1] = operands[i+1];
4701 ix86_expand_sse5_unpack (op, true, true); /* high bytes */
4702
4703 op[0] = t[i+2];
4704 ix86_expand_sse5_unpack (op, true, false); /* low bytes */
4705 }
4706
4707 /* Multiply words. */
4708 emit_insn (gen_mulv8hi3 (t[4], t[0], t[1])); /* high bytes */
4709 emit_insn (gen_mulv8hi3 (t[5], t[2], t[3])); /* low bytes */
4710
4711 /* Pack the low byte of each word back into a single xmm */
4712 op[0] = operands[0];
4713 op[1] = t[5];
4714 op[2] = t[4];
4715 ix86_expand_sse5_pack (op);
4716 DONE;
4717 }
4718
4719 for (i = 0; i < 12; ++i)
4720 t[i] = gen_reg_rtx (V16QImode); 5132 t[i] = gen_reg_rtx (V16QImode);
4721 5133
4722 /* Unpack data such that we've got a source byte in each low byte of 5134 /* Unpack data such that we've got a source byte in each low byte of
4723 each word. We don't care what goes into the high byte of each word. 5135 each word. We don't care what goes into the high byte of each word.
4724 Rather than trying to get zero in there, most convenient is to let 5136 Rather than trying to get zero in there, most convenient is to let
4725 it be a copy of the low byte. */ 5137 it be a copy of the low byte. */
4726 emit_insn (gen_sse2_punpckhbw (t[0], operands[1], operands[1])); 5138 emit_insn (gen_vec_interleave_highv16qi (t[0], operands[1], operands[1]));
4727 emit_insn (gen_sse2_punpckhbw (t[1], operands[2], operands[2])); 5139 emit_insn (gen_vec_interleave_highv16qi (t[1], operands[2], operands[2]));
4728 emit_insn (gen_sse2_punpcklbw (t[2], operands[1], operands[1])); 5140 emit_insn (gen_vec_interleave_lowv16qi (t[2], operands[1], operands[1]));
4729 emit_insn (gen_sse2_punpcklbw (t[3], operands[2], operands[2])); 5141 emit_insn (gen_vec_interleave_lowv16qi (t[3], operands[2], operands[2]));
4730 5142
4731 /* Multiply words. The end-of-line annotations here give a picture of what 5143 /* Multiply words. The end-of-line annotations here give a picture of what
4732 the output of that instruction looks like. Dot means don't care; the 5144 the output of that instruction looks like. Dot means don't care; the
4733 letters are the bytes of the result with A being the most significant. */ 5145 letters are the bytes of the result with A being the most significant. */
4734 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */ 5146 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[4]), /* .A.B.C.D.E.F.G.H */
4736 gen_lowpart (V8HImode, t[1]))); 5148 gen_lowpart (V8HImode, t[1])));
4737 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */ 5149 emit_insn (gen_mulv8hi3 (gen_lowpart (V8HImode, t[5]), /* .I.J.K.L.M.N.O.P */
4738 gen_lowpart (V8HImode, t[2]), 5150 gen_lowpart (V8HImode, t[2]),
4739 gen_lowpart (V8HImode, t[3]))); 5151 gen_lowpart (V8HImode, t[3])));
4740 5152
4741 /* Extract the relevant bytes and merge them back together. */ 5153 /* Extract the even bytes and merge them back together. */
4742 emit_insn (gen_sse2_punpckhbw (t[6], t[5], t[4])); /* ..AI..BJ..CK..DL */ 5154 ix86_expand_vec_extract_even_odd (operands[0], t[5], t[4], 0);
4743 emit_insn (gen_sse2_punpcklbw (t[7], t[5], t[4])); /* ..EM..FN..GO..HP */
4744 emit_insn (gen_sse2_punpckhbw (t[8], t[7], t[6])); /* ....AEIM....BFJN */
4745 emit_insn (gen_sse2_punpcklbw (t[9], t[7], t[6])); /* ....CGKO....DHLP */
4746 emit_insn (gen_sse2_punpckhbw (t[10], t[9], t[8])); /* ........ACEGIKMO */
4747 emit_insn (gen_sse2_punpcklbw (t[11], t[9], t[8])); /* ........BDFHJLNP */
4748
4749 op0 = operands[0];
4750 emit_insn (gen_sse2_punpcklbw (op0, t[11], t[10])); /* ABCDEFGHIJKLMNOP */
4751 DONE; 5155 DONE;
4752 }) 5156 })
4753 5157
4754 (define_expand "mulv8hi3" 5158 (define_expand "mulv8hi3"
4755 [(set (match_operand:V8HI 0 "register_operand" "") 5159 [(set (match_operand:V8HI 0 "register_operand" "")
4942 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 5346 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
4943 (parallel [(const_int 0) (const_int 2)])))))] 5347 (parallel [(const_int 0) (const_int 2)])))))]
4944 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5348 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
4945 "vpmuldq\t{%2, %1, %0|%0, %1, %2}" 5349 "vpmuldq\t{%2, %1, %0|%0, %1, %2}"
4946 [(set_attr "type" "sseimul") 5350 [(set_attr "type" "sseimul")
5351 (set_attr "prefix_extra" "1")
4947 (set_attr "prefix" "vex") 5352 (set_attr "prefix" "vex")
4948 (set_attr "mode" "TI")]) 5353 (set_attr "mode" "TI")])
4949 5354
4950 (define_insn "*sse4_1_mulv2siv2di3" 5355 (define_insn "*sse4_1_mulv2siv2di3"
4951 [(set (match_operand:V2DI 0 "register_operand" "=x") 5356 [(set (match_operand:V2DI 0 "register_operand" "=x")
5067 (const_int 5) 5472 (const_int 5)
5068 (const_int 7)]))))))] 5473 (const_int 7)]))))))]
5069 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)" 5474 "TARGET_SSE2 && ix86_binary_operator_ok (MULT, V8HImode, operands)"
5070 "pmaddwd\t{%2, %0|%0, %2}" 5475 "pmaddwd\t{%2, %0|%0, %2}"
5071 [(set_attr "type" "sseiadd") 5476 [(set_attr "type" "sseiadd")
5477 (set_attr "atom_unit" "simul")
5072 (set_attr "prefix_data16" "1") 5478 (set_attr "prefix_data16" "1")
5073 (set_attr "mode" "TI")]) 5479 (set_attr "mode" "TI")])
5074 5480
5075 (define_expand "mulv4si3" 5481 (define_expand "mulv4si3"
5076 [(set (match_operand:V4SI 0 "register_operand" "") 5482 [(set (match_operand:V4SI 0 "register_operand" "")
5077 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 5483 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5078 (match_operand:V4SI 2 "register_operand" "")))] 5484 (match_operand:V4SI 2 "register_operand" "")))]
5079 "TARGET_SSE2" 5485 "TARGET_SSE2"
5080 { 5486 {
5081 if (TARGET_SSE4_1 || TARGET_SSE5) 5487 if (TARGET_SSE4_1 || TARGET_AVX)
5082 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands); 5488 ix86_fixup_binary_operands_no_copy (MULT, V4SImode, operands);
5083 }) 5489 })
5084 5490
5085 (define_insn "*avx_mulv4si3" 5491 (define_insn "*avx_mulv4si3"
5086 [(set (match_operand:V4SI 0 "register_operand" "=x") 5492 [(set (match_operand:V4SI 0 "register_operand" "=x")
5087 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x") 5493 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
5088 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))] 5494 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5089 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)" 5495 "TARGET_AVX && ix86_binary_operator_ok (MULT, V4SImode, operands)"
5090 "vpmulld\t{%2, %1, %0|%0, %1, %2}" 5496 "vpmulld\t{%2, %1, %0|%0, %1, %2}"
5091 [(set_attr "type" "sseimul") 5497 [(set_attr "type" "sseimul")
5498 (set_attr "prefix_extra" "1")
5092 (set_attr "prefix" "vex") 5499 (set_attr "prefix" "vex")
5093 (set_attr "mode" "TI")]) 5500 (set_attr "mode" "TI")])
5094 5501
5095 (define_insn "*sse4_1_mulv4si3" 5502 (define_insn "*sse4_1_mulv4si3"
5096 [(set (match_operand:V4SI 0 "register_operand" "=x") 5503 [(set (match_operand:V4SI 0 "register_operand" "=x")
5100 "pmulld\t{%2, %0|%0, %2}" 5507 "pmulld\t{%2, %0|%0, %2}"
5101 [(set_attr "type" "sseimul") 5508 [(set_attr "type" "sseimul")
5102 (set_attr "prefix_extra" "1") 5509 (set_attr "prefix_extra" "1")
5103 (set_attr "mode" "TI")]) 5510 (set_attr "mode" "TI")])
5104 5511
5105 ;; We don't have a straight 32-bit parallel multiply on SSE5, so fake it with a
5106 ;; multiply/add. In general, we expect the define_split to occur before
5107 ;; register allocation, so we have to handle the corner case where the target
5108 ;; is the same as one of the inputs.
5109 (define_insn_and_split "*sse5_mulv4si3"
5110 [(set (match_operand:V4SI 0 "register_operand" "=&x")
5111 (mult:V4SI (match_operand:V4SI 1 "register_operand" "%x")
5112 (match_operand:V4SI 2 "nonimmediate_operand" "xm")))]
5113 "TARGET_SSE5"
5114 "#"
5115 "&& (reload_completed
5116 || (!reg_mentioned_p (operands[0], operands[1])
5117 && !reg_mentioned_p (operands[0], operands[2])))"
5118 [(set (match_dup 0)
5119 (match_dup 3))
5120 (set (match_dup 0)
5121 (plus:V4SI (mult:V4SI (match_dup 1)
5122 (match_dup 2))
5123 (match_dup 0)))]
5124 {
5125 operands[3] = CONST0_RTX (V4SImode);
5126 }
5127 [(set_attr "type" "ssemuladd")
5128 (set_attr "mode" "TI")])
5129
5130 (define_insn_and_split "*sse2_mulv4si3" 5512 (define_insn_and_split "*sse2_mulv4si3"
5131 [(set (match_operand:V4SI 0 "register_operand" "") 5513 [(set (match_operand:V4SI 0 "register_operand" "")
5132 (mult:V4SI (match_operand:V4SI 1 "register_operand" "") 5514 (mult:V4SI (match_operand:V4SI 1 "register_operand" "")
5133 (match_operand:V4SI 2 "register_operand" "")))] 5515 (match_operand:V4SI 2 "register_operand" "")))]
5134 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_SSE5 5516 "TARGET_SSE2 && !TARGET_SSE4_1 && !TARGET_AVX
5135 && !(reload_completed || reload_in_progress)" 5517 && can_create_pseudo_p ()"
5136 "#" 5518 "#"
5137 "&& 1" 5519 "&& 1"
5138 [(const_int 0)] 5520 [(const_int 0)]
5139 { 5521 {
5140 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 5522 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5156 op1, op2)); 5538 op1, op2));
5157 5539
5158 /* Shift both input vectors down one element, so that elements 3 5540 /* Shift both input vectors down one element, so that elements 3
5159 and 1 are now in the slots for elements 2 and 0. For K8, at 5541 and 1 are now in the slots for elements 2 and 0. For K8, at
5160 least, this is faster than using a shuffle. */ 5542 least, this is faster than using a shuffle. */
5161 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 5543 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5162 gen_lowpart (TImode, op1), 5544 gen_lowpart (V1TImode, op1),
5163 thirtytwo)); 5545 thirtytwo));
5164 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 5546 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5165 gen_lowpart (TImode, op2), 5547 gen_lowpart (V1TImode, op2),
5166 thirtytwo)); 5548 thirtytwo));
5167 /* Multiply elements 3 and 1. */ 5549 /* Multiply elements 3 and 1. */
5168 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4), 5550 emit_insn (gen_sse2_umulv2siv2di3 (gen_lowpart (V2DImode, t4),
5169 t2, t3)); 5551 t2, t3));
5170 5552
5171 /* Move the results in element 2 down to element 1; we don't care 5553 /* Move the results in element 2 down to element 1; we don't care
5174 const0_rtx, const0_rtx)); 5556 const0_rtx, const0_rtx));
5175 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx, 5557 emit_insn (gen_sse2_pshufd_1 (t6, t4, const0_rtx, const2_rtx,
5176 const0_rtx, const0_rtx)); 5558 const0_rtx, const0_rtx));
5177 5559
5178 /* Merge the parts back together. */ 5560 /* Merge the parts back together. */
5179 emit_insn (gen_sse2_punpckldq (op0, t5, t6)); 5561 emit_insn (gen_vec_interleave_lowv4si (op0, t5, t6));
5180 DONE; 5562 DONE;
5181 }) 5563 })
5182 5564
5183 (define_insn_and_split "mulv2di3" 5565 (define_insn_and_split "mulv2di3"
5184 [(set (match_operand:V2DI 0 "register_operand" "") 5566 [(set (match_operand:V2DI 0 "register_operand" "")
5185 (mult:V2DI (match_operand:V2DI 1 "register_operand" "") 5567 (mult:V2DI (match_operand:V2DI 1 "register_operand" "")
5186 (match_operand:V2DI 2 "register_operand" "")))] 5568 (match_operand:V2DI 2 "register_operand" "")))]
5187 "TARGET_SSE2 5569 "TARGET_SSE2
5188 && !(reload_completed || reload_in_progress)" 5570 && can_create_pseudo_p ()"
5189 "#" 5571 "#"
5190 "&& 1" 5572 "&& 1"
5191 [(const_int 0)] 5573 [(const_int 0)]
5192 { 5574 {
5193 rtx t1, t2, t3, t4, t5, t6, thirtytwo; 5575 rtx t1, t2, t3, t4, t5, t6, thirtytwo;
5194 rtx op0, op1, op2; 5576 rtx op0, op1, op2;
5195 5577
5196 if (TARGET_SSE5) 5578 op0 = operands[0];
5579 op1 = operands[1];
5580 op2 = operands[2];
5581
5582 if (TARGET_XOP)
5197 { 5583 {
5198 /* op1: A,B,C,D, op2: E,F,G,H */ 5584 /* op1: A,B,C,D, op2: E,F,G,H */
5199 op0 = operands[0]; 5585 op1 = gen_lowpart (V4SImode, op1);
5200 op1 = gen_lowpart (V4SImode, operands[1]); 5586 op2 = gen_lowpart (V4SImode, op2);
5201 op2 = gen_lowpart (V4SImode, operands[2]); 5587
5202 t1 = gen_reg_rtx (V4SImode); 5588 t1 = gen_reg_rtx (V4SImode);
5203 t2 = gen_reg_rtx (V4SImode); 5589 t2 = gen_reg_rtx (V4SImode);
5204 t3 = gen_reg_rtx (V4SImode); 5590 t3 = gen_reg_rtx (V2DImode);
5205 t4 = gen_reg_rtx (V2DImode); 5591 t4 = gen_reg_rtx (V2DImode);
5206 t5 = gen_reg_rtx (V2DImode);
5207 5592
5208 /* t1: B,A,D,C */ 5593 /* t1: B,A,D,C */
5209 emit_insn (gen_sse2_pshufd_1 (t1, op1, 5594 emit_insn (gen_sse2_pshufd_1 (t1, op1,
5210 GEN_INT (1), 5595 GEN_INT (1),
5211 GEN_INT (0), 5596 GEN_INT (0),
5212 GEN_INT (3), 5597 GEN_INT (3),
5213 GEN_INT (2))); 5598 GEN_INT (2)));
5214 5599
5215 /* t2: 0 */ 5600 /* t2: (B*E),(A*F),(D*G),(C*H) */
5216 emit_move_insn (t2, CONST0_RTX (V4SImode)); 5601 emit_insn (gen_mulv4si3 (t2, t1, op2));
5217
5218 /* t3: (B*E),(A*F),(D*G),(C*H) */
5219 emit_insn (gen_sse5_pmacsdd (t3, t1, op2, t2));
5220 5602
5221 /* t4: (B*E)+(A*F), (D*G)+(C*H) */ 5603 /* t4: (B*E)+(A*F), (D*G)+(C*H) */
5222 emit_insn (gen_sse5_phadddq (t4, t3)); 5604 emit_insn (gen_xop_phadddq (t3, t2));
5223 5605
5224 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */ 5606 /* t5: ((B*E)+(A*F))<<32, ((D*G)+(C*H))<<32 */
5225 emit_insn (gen_ashlv2di3 (t5, t4, GEN_INT (32))); 5607 emit_insn (gen_ashlv2di3 (t4, t3, GEN_INT (32)));
5226 5608
5227 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */ 5609 /* op0: (((B*E)+(A*F))<<32)+(B*F), (((D*G)+(C*H))<<32)+(D*H) */
5228 emit_insn (gen_sse5_pmacsdql (op0, op1, op2, t5)); 5610 emit_insn (gen_xop_pmacsdql (op0, op1, op2, t4));
5229 DONE;
5230 } 5611 }
5231 5612 else
5232 op0 = operands[0]; 5613 {
5233 op1 = operands[1]; 5614 t1 = gen_reg_rtx (V2DImode);
5234 op2 = operands[2]; 5615 t2 = gen_reg_rtx (V2DImode);
5235 t1 = gen_reg_rtx (V2DImode); 5616 t3 = gen_reg_rtx (V2DImode);
5236 t2 = gen_reg_rtx (V2DImode); 5617 t4 = gen_reg_rtx (V2DImode);
5237 t3 = gen_reg_rtx (V2DImode); 5618 t5 = gen_reg_rtx (V2DImode);
5238 t4 = gen_reg_rtx (V2DImode); 5619 t6 = gen_reg_rtx (V2DImode);
5239 t5 = gen_reg_rtx (V2DImode); 5620 thirtytwo = GEN_INT (32);
5240 t6 = gen_reg_rtx (V2DImode); 5621
5241 thirtytwo = GEN_INT (32); 5622 /* Multiply low parts. */
5242 5623 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1),
5243 /* Multiply low parts. */ 5624 gen_lowpart (V4SImode, op2)));
5244 emit_insn (gen_sse2_umulv2siv2di3 (t1, gen_lowpart (V4SImode, op1), 5625
5245 gen_lowpart (V4SImode, op2))); 5626 /* Shift input vectors left 32 bits so we can multiply high parts. */
5246 5627 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo));
5247 /* Shift input vectors left 32 bits so we can multiply high parts. */ 5628 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo));
5248 emit_insn (gen_lshrv2di3 (t2, op1, thirtytwo)); 5629
5249 emit_insn (gen_lshrv2di3 (t3, op2, thirtytwo)); 5630 /* Multiply high parts by low parts. */
5250 5631 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1),
5251 /* Multiply high parts by low parts. */ 5632 gen_lowpart (V4SImode, t3)));
5252 emit_insn (gen_sse2_umulv2siv2di3 (t4, gen_lowpart (V4SImode, op1), 5633 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2),
5253 gen_lowpart (V4SImode, t3))); 5634 gen_lowpart (V4SImode, t2)));
5254 emit_insn (gen_sse2_umulv2siv2di3 (t5, gen_lowpart (V4SImode, op2), 5635
5255 gen_lowpart (V4SImode, t2))); 5636 /* Shift them back. */
5256 5637 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo));
5257 /* Shift them back. */ 5638 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo));
5258 emit_insn (gen_ashlv2di3 (t4, t4, thirtytwo)); 5639
5259 emit_insn (gen_ashlv2di3 (t5, t5, thirtytwo)); 5640 /* Add the three parts together. */
5260 5641 emit_insn (gen_addv2di3 (t6, t1, t4));
5261 /* Add the three parts together. */ 5642 emit_insn (gen_addv2di3 (op0, t6, t5));
5262 emit_insn (gen_addv2di3 (t6, t1, t4)); 5643 }
5263 emit_insn (gen_addv2di3 (op0, t6, t5));
5264 DONE; 5644 DONE;
5265 }) 5645 })
5266 5646
5267 (define_expand "vec_widen_smult_hi_v8hi" 5647 (define_expand "vec_widen_smult_hi_v8hi"
5268 [(match_operand:V4SI 0 "register_operand" "") 5648 [(match_operand:V4SI 0 "register_operand" "")
5346 5726
5347 (define_expand "vec_widen_smult_hi_v4si" 5727 (define_expand "vec_widen_smult_hi_v4si"
5348 [(match_operand:V2DI 0 "register_operand" "") 5728 [(match_operand:V2DI 0 "register_operand" "")
5349 (match_operand:V4SI 1 "register_operand" "") 5729 (match_operand:V4SI 1 "register_operand" "")
5350 (match_operand:V4SI 2 "register_operand" "")] 5730 (match_operand:V4SI 2 "register_operand" "")]
5351 "TARGET_SSE5" 5731 "TARGET_XOP"
5352 { 5732 {
5353 rtx t1, t2; 5733 rtx t1, t2;
5354 5734
5355 t1 = gen_reg_rtx (V4SImode); 5735 t1 = gen_reg_rtx (V4SImode);
5356 t2 = gen_reg_rtx (V4SImode); 5736 t2 = gen_reg_rtx (V4SImode);
5363 emit_insn (gen_sse2_pshufd_1 (t2, operands[2], 5743 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5364 GEN_INT (0), 5744 GEN_INT (0),
5365 GEN_INT (2), 5745 GEN_INT (2),
5366 GEN_INT (1), 5746 GEN_INT (1),
5367 GEN_INT (3))); 5747 GEN_INT (3)));
5368 emit_insn (gen_sse5_mulv2div2di3_high (operands[0], t1, t2)); 5748 emit_insn (gen_xop_mulv2div2di3_high (operands[0], t1, t2));
5369 DONE; 5749 DONE;
5370 }) 5750 })
5371 5751
5372 (define_expand "vec_widen_smult_lo_v4si" 5752 (define_expand "vec_widen_smult_lo_v4si"
5373 [(match_operand:V2DI 0 "register_operand" "") 5753 [(match_operand:V2DI 0 "register_operand" "")
5374 (match_operand:V4SI 1 "register_operand" "") 5754 (match_operand:V4SI 1 "register_operand" "")
5375 (match_operand:V4SI 2 "register_operand" "")] 5755 (match_operand:V4SI 2 "register_operand" "")]
5376 "TARGET_SSE5" 5756 "TARGET_XOP"
5377 { 5757 {
5378 rtx t1, t2; 5758 rtx t1, t2;
5379 5759
5380 t1 = gen_reg_rtx (V4SImode); 5760 t1 = gen_reg_rtx (V4SImode);
5381 t2 = gen_reg_rtx (V4SImode); 5761 t2 = gen_reg_rtx (V4SImode);
5388 emit_insn (gen_sse2_pshufd_1 (t2, operands[2], 5768 emit_insn (gen_sse2_pshufd_1 (t2, operands[2],
5389 GEN_INT (0), 5769 GEN_INT (0),
5390 GEN_INT (2), 5770 GEN_INT (2),
5391 GEN_INT (1), 5771 GEN_INT (1),
5392 GEN_INT (3))); 5772 GEN_INT (3)));
5393 emit_insn (gen_sse5_mulv2div2di3_low (operands[0], t1, t2)); 5773 emit_insn (gen_xop_mulv2div2di3_low (operands[0], t1, t2));
5394 DONE;
5395 DONE; 5774 DONE;
5396 }) 5775 })
5397 5776
5398 (define_expand "vec_widen_umult_hi_v4si" 5777 (define_expand "vec_widen_umult_hi_v4si"
5399 [(match_operand:V2DI 0 "register_operand" "") 5778 [(match_operand:V2DI 0 "register_operand" "")
5459 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2])); 5838 emit_insn (gen_sse2_umulv2siv2di3 (t1, operands[1], operands[2]));
5460 emit_insn (gen_addv2di3 (t1, t1, operands[3])); 5839 emit_insn (gen_addv2di3 (t1, t1, operands[3]));
5461 5840
5462 t2 = gen_reg_rtx (V4SImode); 5841 t2 = gen_reg_rtx (V4SImode);
5463 t3 = gen_reg_rtx (V4SImode); 5842 t3 = gen_reg_rtx (V4SImode);
5464 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t2), 5843 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t2),
5465 gen_lowpart (TImode, operands[1]), 5844 gen_lowpart (V1TImode, operands[1]),
5466 GEN_INT (32))); 5845 GEN_INT (32)));
5467 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, t3), 5846 emit_insn (gen_sse2_lshrv1ti3 (gen_lowpart (V1TImode, t3),
5468 gen_lowpart (TImode, operands[2]), 5847 gen_lowpart (V1TImode, operands[2]),
5469 GEN_INT (32))); 5848 GEN_INT (32)));
5470 5849
5471 t4 = gen_reg_rtx (V2DImode); 5850 t4 = gen_reg_rtx (V2DImode);
5472 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3)); 5851 emit_insn (gen_sse2_umulv2siv2di3 (t4, t2, t3));
5473 5852
5474 emit_insn (gen_addv2di3 (operands[0], t1, t4)); 5853 emit_insn (gen_addv2di3 (operands[0], t1, t4));
5482 (match_operand:SI 2 "nonmemory_operand" "xN")))] 5861 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5483 "TARGET_AVX" 5862 "TARGET_AVX"
5484 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 5863 "vpsra<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5485 [(set_attr "type" "sseishft") 5864 [(set_attr "type" "sseishft")
5486 (set_attr "prefix" "vex") 5865 (set_attr "prefix" "vex")
5866 (set (attr "length_immediate")
5867 (if_then_else (match_operand 2 "const_int_operand" "")
5868 (const_string "1")
5869 (const_string "0")))
5487 (set_attr "mode" "TI")]) 5870 (set_attr "mode" "TI")])
5488 5871
5489 (define_insn "ashr<mode>3" 5872 (define_insn "ashr<mode>3"
5490 [(set (match_operand:SSEMODE24 0 "register_operand" "=x") 5873 [(set (match_operand:SSEMODE24 0 "register_operand" "=x")
5491 (ashiftrt:SSEMODE24 5874 (ashiftrt:SSEMODE24
5493 (match_operand:SI 2 "nonmemory_operand" "xN")))] 5876 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5494 "TARGET_SSE2" 5877 "TARGET_SSE2"
5495 "psra<ssevecsize>\t{%2, %0|%0, %2}" 5878 "psra<ssevecsize>\t{%2, %0|%0, %2}"
5496 [(set_attr "type" "sseishft") 5879 [(set_attr "type" "sseishft")
5497 (set_attr "prefix_data16" "1") 5880 (set_attr "prefix_data16" "1")
5881 (set (attr "length_immediate")
5882 (if_then_else (match_operand 2 "const_int_operand" "")
5883 (const_string "1")
5884 (const_string "0")))
5885 (set_attr "mode" "TI")])
5886
5887 (define_insn "*avx_lshrv1ti3"
5888 [(set (match_operand:V1TI 0 "register_operand" "=x")
5889 (lshiftrt:V1TI
5890 (match_operand:V1TI 1 "register_operand" "x")
5891 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5892 "TARGET_AVX"
5893 {
5894 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5895 return "vpsrldq\t{%2, %1, %0|%0, %1, %2}";
5896 }
5897 [(set_attr "type" "sseishft")
5898 (set_attr "prefix" "vex")
5899 (set_attr "length_immediate" "1")
5498 (set_attr "mode" "TI")]) 5900 (set_attr "mode" "TI")])
5499 5901
5500 (define_insn "*avx_lshr<mode>3" 5902 (define_insn "*avx_lshr<mode>3"
5501 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 5903 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5502 (lshiftrt:SSEMODE248 5904 (lshiftrt:SSEMODE248
5504 (match_operand:SI 2 "nonmemory_operand" "xN")))] 5906 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5505 "TARGET_AVX" 5907 "TARGET_AVX"
5506 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 5908 "vpsrl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5507 [(set_attr "type" "sseishft") 5909 [(set_attr "type" "sseishft")
5508 (set_attr "prefix" "vex") 5910 (set_attr "prefix" "vex")
5911 (set (attr "length_immediate")
5912 (if_then_else (match_operand 2 "const_int_operand" "")
5913 (const_string "1")
5914 (const_string "0")))
5915 (set_attr "mode" "TI")])
5916
5917 (define_insn "sse2_lshrv1ti3"
5918 [(set (match_operand:V1TI 0 "register_operand" "=x")
5919 (lshiftrt:V1TI
5920 (match_operand:V1TI 1 "register_operand" "0")
5921 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5922 "TARGET_SSE2"
5923 {
5924 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5925 return "psrldq\t{%2, %0|%0, %2}";
5926 }
5927 [(set_attr "type" "sseishft")
5928 (set_attr "prefix_data16" "1")
5929 (set_attr "length_immediate" "1")
5509 (set_attr "mode" "TI")]) 5930 (set_attr "mode" "TI")])
5510 5931
5511 (define_insn "lshr<mode>3" 5932 (define_insn "lshr<mode>3"
5512 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 5933 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5513 (lshiftrt:SSEMODE248 5934 (lshiftrt:SSEMODE248
5515 (match_operand:SI 2 "nonmemory_operand" "xN")))] 5936 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5516 "TARGET_SSE2" 5937 "TARGET_SSE2"
5517 "psrl<ssevecsize>\t{%2, %0|%0, %2}" 5938 "psrl<ssevecsize>\t{%2, %0|%0, %2}"
5518 [(set_attr "type" "sseishft") 5939 [(set_attr "type" "sseishft")
5519 (set_attr "prefix_data16" "1") 5940 (set_attr "prefix_data16" "1")
5941 (set (attr "length_immediate")
5942 (if_then_else (match_operand 2 "const_int_operand" "")
5943 (const_string "1")
5944 (const_string "0")))
5945 (set_attr "mode" "TI")])
5946
5947 (define_insn "*avx_ashlv1ti3"
5948 [(set (match_operand:V1TI 0 "register_operand" "=x")
5949 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "x")
5950 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5951 "TARGET_AVX"
5952 {
5953 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5954 return "vpslldq\t{%2, %1, %0|%0, %1, %2}";
5955 }
5956 [(set_attr "type" "sseishft")
5957 (set_attr "prefix" "vex")
5958 (set_attr "length_immediate" "1")
5520 (set_attr "mode" "TI")]) 5959 (set_attr "mode" "TI")])
5521 5960
5522 (define_insn "*avx_ashl<mode>3" 5961 (define_insn "*avx_ashl<mode>3"
5523 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 5962 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5524 (ashift:SSEMODE248 5963 (ashift:SSEMODE248
5526 (match_operand:SI 2 "nonmemory_operand" "xN")))] 5965 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5527 "TARGET_AVX" 5966 "TARGET_AVX"
5528 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 5967 "vpsll<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5529 [(set_attr "type" "sseishft") 5968 [(set_attr "type" "sseishft")
5530 (set_attr "prefix" "vex") 5969 (set_attr "prefix" "vex")
5970 (set (attr "length_immediate")
5971 (if_then_else (match_operand 2 "const_int_operand" "")
5972 (const_string "1")
5973 (const_string "0")))
5974 (set_attr "mode" "TI")])
5975
5976 (define_insn "sse2_ashlv1ti3"
5977 [(set (match_operand:V1TI 0 "register_operand" "=x")
5978 (ashift:V1TI (match_operand:V1TI 1 "register_operand" "0")
5979 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "n")))]
5980 "TARGET_SSE2"
5981 {
5982 operands[2] = GEN_INT (INTVAL (operands[2]) / 8);
5983 return "pslldq\t{%2, %0|%0, %2}";
5984 }
5985 [(set_attr "type" "sseishft")
5986 (set_attr "prefix_data16" "1")
5987 (set_attr "length_immediate" "1")
5531 (set_attr "mode" "TI")]) 5988 (set_attr "mode" "TI")])
5532 5989
5533 (define_insn "ashl<mode>3" 5990 (define_insn "ashl<mode>3"
5534 [(set (match_operand:SSEMODE248 0 "register_operand" "=x") 5991 [(set (match_operand:SSEMODE248 0 "register_operand" "=x")
5535 (ashift:SSEMODE248 5992 (ashift:SSEMODE248
5537 (match_operand:SI 2 "nonmemory_operand" "xN")))] 5994 (match_operand:SI 2 "nonmemory_operand" "xN")))]
5538 "TARGET_SSE2" 5995 "TARGET_SSE2"
5539 "psll<ssevecsize>\t{%2, %0|%0, %2}" 5996 "psll<ssevecsize>\t{%2, %0|%0, %2}"
5540 [(set_attr "type" "sseishft") 5997 [(set_attr "type" "sseishft")
5541 (set_attr "prefix_data16" "1") 5998 (set_attr "prefix_data16" "1")
5999 (set (attr "length_immediate")
6000 (if_then_else (match_operand 2 "const_int_operand" "")
6001 (const_string "1")
6002 (const_string "0")))
5542 (set_attr "mode" "TI")]) 6003 (set_attr "mode" "TI")])
5543 6004
5544 (define_expand "vec_shl_<mode>" 6005 (define_expand "vec_shl_<mode>"
5545 [(set (match_operand:SSEMODEI 0 "register_operand" "") 6006 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5546 (ashift:TI (match_operand:SSEMODEI 1 "register_operand" "") 6007 (ashift:V1TI
5547 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] 6008 (match_operand:SSEMODEI 1 "register_operand" "")
5548 "TARGET_SSE2" 6009 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5549 { 6010 "TARGET_SSE2"
5550 operands[0] = gen_lowpart (TImode, operands[0]); 6011 {
5551 operands[1] = gen_lowpart (TImode, operands[1]); 6012 operands[0] = gen_lowpart (V1TImode, operands[0]);
6013 operands[1] = gen_lowpart (V1TImode, operands[1]);
5552 }) 6014 })
5553 6015
5554 (define_expand "vec_shr_<mode>" 6016 (define_expand "vec_shr_<mode>"
5555 [(set (match_operand:SSEMODEI 0 "register_operand" "") 6017 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5556 (lshiftrt:TI (match_operand:SSEMODEI 1 "register_operand" "") 6018 (lshiftrt:V1TI
5557 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))] 6019 (match_operand:SSEMODEI 1 "register_operand" "")
5558 "TARGET_SSE2" 6020 (match_operand:SI 2 "const_0_to_255_mul_8_operand" "")))]
5559 { 6021 "TARGET_SSE2"
5560 operands[0] = gen_lowpart (TImode, operands[0]); 6022 {
5561 operands[1] = gen_lowpart (TImode, operands[1]); 6023 operands[0] = gen_lowpart (V1TImode, operands[0]);
6024 operands[1] = gen_lowpart (V1TImode, operands[1]);
5562 }) 6025 })
5563 6026
5564 (define_insn "*avx_<code><mode>3" 6027 (define_insn "*avx_<code><mode>3"
5565 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 6028 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5566 (maxmin:SSEMODE124 6029 (maxmin:SSEMODE124
5567 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x") 6030 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%x")
5568 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 6031 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5569 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6032 "TARGET_AVX && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5570 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 6033 "vp<maxminiprefix><ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5571 [(set_attr "type" "sseiadd") 6034 [(set_attr "type" "sseiadd")
6035 (set (attr "prefix_extra")
6036 (if_then_else
6037 (ne (symbol_ref "<MODE>mode != ((<CODE> == SMAX || <CODE> == SMIN) ? V8HImode : V16QImode)")
6038 (const_int 0))
6039 (const_string "1")
6040 (const_string "0")))
5572 (set_attr "prefix" "vex") 6041 (set_attr "prefix" "vex")
5573 (set_attr "mode" "TI")]) 6042 (set_attr "mode" "TI")])
5574 6043
5575 (define_expand "<code>v16qi3" 6044 (define_expand "<code>v16qi3"
5576 [(set (match_operand:V16QI 0 "register_operand" "") 6045 [(set (match_operand:V16QI 0 "register_operand" "")
5760 (define_expand "sse2_eq<mode>3" 6229 (define_expand "sse2_eq<mode>3"
5761 [(set (match_operand:SSEMODE124 0 "register_operand" "") 6230 [(set (match_operand:SSEMODE124 0 "register_operand" "")
5762 (eq:SSEMODE124 6231 (eq:SSEMODE124
5763 (match_operand:SSEMODE124 1 "nonimmediate_operand" "") 6232 (match_operand:SSEMODE124 1 "nonimmediate_operand" "")
5764 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))] 6233 (match_operand:SSEMODE124 2 "nonimmediate_operand" "")))]
5765 "TARGET_SSE2 && !TARGET_SSE5" 6234 "TARGET_SSE2 && !TARGET_XOP "
5766 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);") 6235 "ix86_fixup_binary_operands_no_copy (EQ, <MODE>mode, operands);")
5767 6236
5768 (define_insn "*avx_eq<mode>3" 6237 (define_insn "*avx_eq<mode>3"
5769 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") 6238 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
5770 (eq:SSEMODE1248 6239 (eq:SSEMODE1248
5771 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x") 6240 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "%x")
5772 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))] 6241 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5773 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6242 "TARGET_AVX && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5774 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 6243 "vpcmpeq<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5775 [(set_attr "type" "ssecmp") 6244 [(set_attr "type" "ssecmp")
6245 (set (attr "prefix_extra")
6246 (if_then_else (match_operand:V2DI 0 "" "")
6247 (const_string "1")
6248 (const_string "*")))
5776 (set_attr "prefix" "vex") 6249 (set_attr "prefix" "vex")
5777 (set_attr "mode" "TI")]) 6250 (set_attr "mode" "TI")])
5778 6251
5779 (define_insn "*sse2_eq<mode>3" 6252 (define_insn "*sse2_eq<mode>3"
5780 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 6253 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5781 (eq:SSEMODE124 6254 (eq:SSEMODE124
5782 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0") 6255 (match_operand:SSEMODE124 1 "nonimmediate_operand" "%0")
5783 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 6256 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5784 "TARGET_SSE2 && !TARGET_SSE5 6257 "TARGET_SSE2 && !TARGET_XOP
5785 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)" 6258 && ix86_binary_operator_ok (EQ, <MODE>mode, operands)"
5786 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}" 6259 "pcmpeq<ssevecsize>\t{%2, %0|%0, %2}"
5787 [(set_attr "type" "ssecmp") 6260 [(set_attr "type" "ssecmp")
5788 (set_attr "prefix_data16" "1") 6261 (set_attr "prefix_data16" "1")
5789 (set_attr "mode" "TI")]) 6262 (set_attr "mode" "TI")])
5813 (match_operand:SSEMODE1248 1 "register_operand" "x") 6286 (match_operand:SSEMODE1248 1 "register_operand" "x")
5814 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))] 6287 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")))]
5815 "TARGET_AVX" 6288 "TARGET_AVX"
5816 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 6289 "vpcmpgt<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
5817 [(set_attr "type" "ssecmp") 6290 [(set_attr "type" "ssecmp")
6291 (set (attr "prefix_extra")
6292 (if_then_else (match_operand:V2DI 0 "" "")
6293 (const_string "1")
6294 (const_string "*")))
5818 (set_attr "prefix" "vex") 6295 (set_attr "prefix" "vex")
5819 (set_attr "mode" "TI")]) 6296 (set_attr "mode" "TI")])
5820 6297
5821 (define_insn "sse2_gt<mode>3" 6298 (define_insn "sse2_gt<mode>3"
5822 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 6299 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
5823 (gt:SSEMODE124 6300 (gt:SSEMODE124
5824 (match_operand:SSEMODE124 1 "register_operand" "0") 6301 (match_operand:SSEMODE124 1 "register_operand" "0")
5825 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))] 6302 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")))]
5826 "TARGET_SSE2 && !TARGET_SSE5" 6303 "TARGET_SSE2 && !TARGET_XOP"
5827 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}" 6304 "pcmpgt<ssevecsize>\t{%2, %0|%0, %2}"
5828 [(set_attr "type" "ssecmp") 6305 [(set_attr "type" "ssecmp")
5829 (set_attr "prefix_data16" "1") 6306 (set_attr "prefix_data16" "1")
5830 (set_attr "mode" "TI")]) 6307 (set_attr "mode" "TI")])
5831 6308
5835 (match_operand:V2DI 1 "register_operand" "0") 6312 (match_operand:V2DI 1 "register_operand" "0")
5836 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))] 6313 (match_operand:V2DI 2 "nonimmediate_operand" "xm")))]
5837 "TARGET_SSE4_2" 6314 "TARGET_SSE4_2"
5838 "pcmpgtq\t{%2, %0|%0, %2}" 6315 "pcmpgtq\t{%2, %0|%0, %2}"
5839 [(set_attr "type" "ssecmp") 6316 [(set_attr "type" "ssecmp")
6317 (set_attr "prefix_extra" "1")
5840 (set_attr "mode" "TI")]) 6318 (set_attr "mode" "TI")])
5841 6319
5842 (define_expand "vcond<mode>" 6320 (define_expand "vcond<mode>"
5843 [(set (match_operand:SSEMODE124C8 0 "register_operand" "") 6321 [(set (match_operand:SSEMODE124C8 0 "register_operand" "")
5844 (if_then_else:SSEMODE124C8 6322 (if_then_else:SSEMODE124C8
5944 (set_attr "prefix_data16" "1") 6422 (set_attr "prefix_data16" "1")
5945 (set_attr "mode" "TI")]) 6423 (set_attr "mode" "TI")])
5946 6424
5947 (define_expand "<code><mode>3" 6425 (define_expand "<code><mode>3"
5948 [(set (match_operand:SSEMODEI 0 "register_operand" "") 6426 [(set (match_operand:SSEMODEI 0 "register_operand" "")
5949 (plogic:SSEMODEI 6427 (any_logic:SSEMODEI
5950 (match_operand:SSEMODEI 1 "nonimmediate_operand" "") 6428 (match_operand:SSEMODEI 1 "nonimmediate_operand" "")
5951 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))] 6429 (match_operand:SSEMODEI 2 "nonimmediate_operand" "")))]
5952 "TARGET_SSE" 6430 "TARGET_SSE"
5953 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);") 6431 "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
5954 6432
5955 (define_insn "*avx_<code><mode>3" 6433 (define_insn "*avx_<code><mode>3"
5956 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x") 6434 [(set (match_operand:AVX256MODEI 0 "register_operand" "=x")
5957 (plogic:AVX256MODEI 6435 (any_logic:AVX256MODEI
5958 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x") 6436 (match_operand:AVX256MODEI 1 "nonimmediate_operand" "%x")
5959 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))] 6437 (match_operand:AVX256MODEI 2 "nonimmediate_operand" "xm")))]
5960 "TARGET_AVX 6438 "TARGET_AVX
5961 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6439 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5962 "v<plogicprefix>ps\t{%2, %1, %0|%0, %1, %2}" 6440 "v<logicprefix>ps\t{%2, %1, %0|%0, %1, %2}"
5963 [(set_attr "type" "sselog") 6441 [(set_attr "type" "sselog")
5964 (set_attr "prefix" "vex") 6442 (set_attr "prefix" "vex")
5965 (set_attr "mode" "<avxvecpsmode>")]) 6443 (set_attr "mode" "<avxvecpsmode>")])
5966 6444
5967 (define_insn "*sse_<code><mode>3" 6445 (define_insn "*sse_<code><mode>3"
5968 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 6446 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5969 (plogic:SSEMODEI 6447 (any_logic:SSEMODEI
5970 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 6448 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5971 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 6449 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5972 "(TARGET_SSE && !TARGET_SSE2) 6450 "(TARGET_SSE && !TARGET_SSE2)
5973 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6451 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5974 "<plogicprefix>ps\t{%2, %0|%0, %2}" 6452 "<logicprefix>ps\t{%2, %0|%0, %2}"
5975 [(set_attr "type" "sselog") 6453 [(set_attr "type" "sselog")
5976 (set_attr "mode" "V4SF")]) 6454 (set_attr "mode" "V4SF")])
5977 6455
5978 (define_insn "*avx_<code><mode>3" 6456 (define_insn "*avx_<code><mode>3"
5979 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 6457 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5980 (plogic:SSEMODEI 6458 (any_logic:SSEMODEI
5981 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x") 6459 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%x")
5982 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 6460 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5983 "TARGET_AVX 6461 "TARGET_AVX
5984 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6462 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5985 "vp<plogicprefix>\t{%2, %1, %0|%0, %1, %2}" 6463 "vp<logicprefix>\t{%2, %1, %0|%0, %1, %2}"
5986 [(set_attr "type" "sselog") 6464 [(set_attr "type" "sselog")
5987 (set_attr "prefix" "vex") 6465 (set_attr "prefix" "vex")
5988 (set_attr "mode" "TI")]) 6466 (set_attr "mode" "TI")])
5989 6467
5990 (define_insn "*sse2_<code><mode>3" 6468 (define_insn "*sse2_<code><mode>3"
5991 [(set (match_operand:SSEMODEI 0 "register_operand" "=x") 6469 [(set (match_operand:SSEMODEI 0 "register_operand" "=x")
5992 (plogic:SSEMODEI 6470 (any_logic:SSEMODEI
5993 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0") 6471 (match_operand:SSEMODEI 1 "nonimmediate_operand" "%0")
5994 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))] 6472 (match_operand:SSEMODEI 2 "nonimmediate_operand" "xm")))]
5995 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)" 6473 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
5996 "p<plogicprefix>\t{%2, %0|%0, %2}" 6474 "p<logicprefix>\t{%2, %0|%0, %2}"
5997 [(set_attr "type" "sselog") 6475 [(set_attr "type" "sselog")
5998 (set_attr "prefix_data16" "1") 6476 (set_attr "prefix_data16" "1")
5999 (set_attr "mode" "TI")]) 6477 (set_attr "mode" "TI")])
6000 6478
6001 (define_expand "<code>tf3" 6479 (define_expand "<code>tf3"
6002 [(set (match_operand:TF 0 "register_operand" "") 6480 [(set (match_operand:TF 0 "register_operand" "")
6003 (plogic:TF 6481 (any_logic:TF
6004 (match_operand:TF 1 "nonimmediate_operand" "") 6482 (match_operand:TF 1 "nonimmediate_operand" "")
6005 (match_operand:TF 2 "nonimmediate_operand" "")))] 6483 (match_operand:TF 2 "nonimmediate_operand" "")))]
6006 "TARGET_SSE2" 6484 "TARGET_SSE2"
6007 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);") 6485 "ix86_fixup_binary_operands_no_copy (<CODE>, TFmode, operands);")
6008 6486
6009 (define_insn "*<code>tf3" 6487 (define_insn "*<code>tf3"
6010 [(set (match_operand:TF 0 "register_operand" "=x") 6488 [(set (match_operand:TF 0 "register_operand" "=x")
6011 (plogic:TF 6489 (any_logic:TF
6012 (match_operand:TF 1 "nonimmediate_operand" "%0") 6490 (match_operand:TF 1 "nonimmediate_operand" "%0")
6013 (match_operand:TF 2 "nonimmediate_operand" "xm")))] 6491 (match_operand:TF 2 "nonimmediate_operand" "xm")))]
6014 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)" 6492 "TARGET_SSE2 && ix86_binary_operator_ok (<CODE>, TFmode, operands)"
6015 "p<plogicprefix>\t{%2, %0|%0, %2}" 6493 "p<logicprefix>\t{%2, %0|%0, %2}"
6016 [(set_attr "type" "sselog") 6494 [(set_attr "type" "sselog")
6017 (set_attr "prefix_data16" "1") 6495 (set_attr "prefix_data16" "1")
6018 (set_attr "mode" "TI")]) 6496 (set_attr "mode" "TI")])
6019 6497
6020 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6498 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6021 ;; 6499 ;;
6022 ;; Parallel integral element swizzling 6500 ;; Parallel integral element swizzling
6023 ;; 6501 ;;
6024 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 6502 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
6025 6503
6026 ;; Reduce:
6027 ;; op1 = abcdefghijklmnop
6028 ;; op2 = qrstuvwxyz012345
6029 ;; h1 = aqbrcsdteufvgwhx
6030 ;; l1 = iyjzk0l1m2n3o4p5
6031 ;; h2 = aiqybjrzcks0dlt1
6032 ;; l2 = emu2fnv3gow4hpx5
6033 ;; h3 = aeimquy2bfjnrvz3
6034 ;; l3 = cgkosw04dhlptx15
6035 ;; result = bdfhjlnprtvxz135
6036 (define_expand "vec_pack_trunc_v8hi" 6504 (define_expand "vec_pack_trunc_v8hi"
6037 [(match_operand:V16QI 0 "register_operand" "") 6505 [(match_operand:V16QI 0 "register_operand" "")
6038 (match_operand:V8HI 1 "register_operand" "") 6506 (match_operand:V8HI 1 "register_operand" "")
6039 (match_operand:V8HI 2 "register_operand" "")] 6507 (match_operand:V8HI 2 "register_operand" "")]
6040 "TARGET_SSE2" 6508 "TARGET_SSE2"
6041 { 6509 {
6042 rtx op1, op2, h1, l1, h2, l2, h3, l3; 6510 rtx op1 = gen_lowpart (V16QImode, operands[1]);
6043 6511 rtx op2 = gen_lowpart (V16QImode, operands[2]);
6044 if (TARGET_SSE5) 6512 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6045 {
6046 ix86_expand_sse5_pack (operands);
6047 DONE;
6048 }
6049
6050 op1 = gen_lowpart (V16QImode, operands[1]);
6051 op2 = gen_lowpart (V16QImode, operands[2]);
6052 h1 = gen_reg_rtx (V16QImode);
6053 l1 = gen_reg_rtx (V16QImode);
6054 h2 = gen_reg_rtx (V16QImode);
6055 l2 = gen_reg_rtx (V16QImode);
6056 h3 = gen_reg_rtx (V16QImode);
6057 l3 = gen_reg_rtx (V16QImode);
6058
6059 emit_insn (gen_vec_interleave_highv16qi (h1, op1, op2));
6060 emit_insn (gen_vec_interleave_lowv16qi (l1, op1, op2));
6061 emit_insn (gen_vec_interleave_highv16qi (h2, l1, h1));
6062 emit_insn (gen_vec_interleave_lowv16qi (l2, l1, h1));
6063 emit_insn (gen_vec_interleave_highv16qi (h3, l2, h2));
6064 emit_insn (gen_vec_interleave_lowv16qi (l3, l2, h2));
6065 emit_insn (gen_vec_interleave_lowv16qi (operands[0], l3, h3));
6066 DONE; 6513 DONE;
6067 }) 6514 })
6068 6515
6069 ;; Reduce:
6070 ;; op1 = abcdefgh
6071 ;; op2 = ijklmnop
6072 ;; h1 = aibjckdl
6073 ;; l1 = emfngohp
6074 ;; h2 = aeimbfjn
6075 ;; l2 = cgkodhlp
6076 ;; result = bdfhjlnp
6077 (define_expand "vec_pack_trunc_v4si" 6516 (define_expand "vec_pack_trunc_v4si"
6078 [(match_operand:V8HI 0 "register_operand" "") 6517 [(match_operand:V8HI 0 "register_operand" "")
6079 (match_operand:V4SI 1 "register_operand" "") 6518 (match_operand:V4SI 1 "register_operand" "")
6080 (match_operand:V4SI 2 "register_operand" "")] 6519 (match_operand:V4SI 2 "register_operand" "")]
6081 "TARGET_SSE2" 6520 "TARGET_SSE2"
6082 { 6521 {
6083 rtx op1, op2, h1, l1, h2, l2; 6522 rtx op1 = gen_lowpart (V8HImode, operands[1]);
6084 6523 rtx op2 = gen_lowpart (V8HImode, operands[2]);
6085 if (TARGET_SSE5) 6524 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6086 {
6087 ix86_expand_sse5_pack (operands);
6088 DONE;
6089 }
6090
6091 op1 = gen_lowpart (V8HImode, operands[1]);
6092 op2 = gen_lowpart (V8HImode, operands[2]);
6093 h1 = gen_reg_rtx (V8HImode);
6094 l1 = gen_reg_rtx (V8HImode);
6095 h2 = gen_reg_rtx (V8HImode);
6096 l2 = gen_reg_rtx (V8HImode);
6097
6098 emit_insn (gen_vec_interleave_highv8hi (h1, op1, op2));
6099 emit_insn (gen_vec_interleave_lowv8hi (l1, op1, op2));
6100 emit_insn (gen_vec_interleave_highv8hi (h2, l1, h1));
6101 emit_insn (gen_vec_interleave_lowv8hi (l2, l1, h1));
6102 emit_insn (gen_vec_interleave_lowv8hi (operands[0], l2, h2));
6103 DONE; 6525 DONE;
6104 }) 6526 })
6105 6527
6106 ;; Reduce:
6107 ;; op1 = abcd
6108 ;; op2 = efgh
6109 ;; h1 = aebf
6110 ;; l1 = cgdh
6111 ;; result = bdfh
6112 (define_expand "vec_pack_trunc_v2di" 6528 (define_expand "vec_pack_trunc_v2di"
6113 [(match_operand:V4SI 0 "register_operand" "") 6529 [(match_operand:V4SI 0 "register_operand" "")
6114 (match_operand:V2DI 1 "register_operand" "") 6530 (match_operand:V2DI 1 "register_operand" "")
6115 (match_operand:V2DI 2 "register_operand" "")] 6531 (match_operand:V2DI 2 "register_operand" "")]
6116 "TARGET_SSE2" 6532 "TARGET_SSE2"
6117 { 6533 {
6118 rtx op1, op2, h1, l1; 6534 rtx op1 = gen_lowpart (V4SImode, operands[1]);
6119 6535 rtx op2 = gen_lowpart (V4SImode, operands[2]);
6120 if (TARGET_SSE5) 6536 ix86_expand_vec_extract_even_odd (operands[0], op1, op2, 0);
6121 {
6122 ix86_expand_sse5_pack (operands);
6123 DONE;
6124 }
6125
6126 op1 = gen_lowpart (V4SImode, operands[1]);
6127 op2 = gen_lowpart (V4SImode, operands[2]);
6128 h1 = gen_reg_rtx (V4SImode);
6129 l1 = gen_reg_rtx (V4SImode);
6130
6131 emit_insn (gen_vec_interleave_highv4si (h1, op1, op2));
6132 emit_insn (gen_vec_interleave_lowv4si (l1, op1, op2));
6133 emit_insn (gen_vec_interleave_lowv4si (operands[0], l1, h1));
6134 DONE; 6537 DONE;
6135 }) 6538 })
6136
6137 (define_expand "vec_interleave_highv16qi"
6138 [(set (match_operand:V16QI 0 "register_operand" "")
6139 (vec_select:V16QI
6140 (vec_concat:V32QI
6141 (match_operand:V16QI 1 "register_operand" "")
6142 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6143 (parallel [(const_int 8) (const_int 24)
6144 (const_int 9) (const_int 25)
6145 (const_int 10) (const_int 26)
6146 (const_int 11) (const_int 27)
6147 (const_int 12) (const_int 28)
6148 (const_int 13) (const_int 29)
6149 (const_int 14) (const_int 30)
6150 (const_int 15) (const_int 31)])))]
6151 "TARGET_SSE2"
6152 {
6153 emit_insn (gen_sse2_punpckhbw (operands[0], operands[1], operands[2]));
6154 DONE;
6155 })
6156
6157 (define_expand "vec_interleave_lowv16qi"
6158 [(set (match_operand:V16QI 0 "register_operand" "")
6159 (vec_select:V16QI
6160 (vec_concat:V32QI
6161 (match_operand:V16QI 1 "register_operand" "")
6162 (match_operand:V16QI 2 "nonimmediate_operand" ""))
6163 (parallel [(const_int 0) (const_int 16)
6164 (const_int 1) (const_int 17)
6165 (const_int 2) (const_int 18)
6166 (const_int 3) (const_int 19)
6167 (const_int 4) (const_int 20)
6168 (const_int 5) (const_int 21)
6169 (const_int 6) (const_int 22)
6170 (const_int 7) (const_int 23)])))]
6171 "TARGET_SSE2"
6172 {
6173 emit_insn (gen_sse2_punpcklbw (operands[0], operands[1], operands[2]));
6174 DONE;
6175 })
6176
6177 (define_expand "vec_interleave_highv8hi"
6178 [(set (match_operand:V8HI 0 "register_operand" "=")
6179 (vec_select:V8HI
6180 (vec_concat:V16HI
6181 (match_operand:V8HI 1 "register_operand" "")
6182 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6183 (parallel [(const_int 4) (const_int 12)
6184 (const_int 5) (const_int 13)
6185 (const_int 6) (const_int 14)
6186 (const_int 7) (const_int 15)])))]
6187 "TARGET_SSE2"
6188 {
6189 emit_insn (gen_sse2_punpckhwd (operands[0], operands[1], operands[2]));
6190 DONE;
6191 })
6192
6193 (define_expand "vec_interleave_lowv8hi"
6194 [(set (match_operand:V8HI 0 "register_operand" "")
6195 (vec_select:V8HI
6196 (vec_concat:V16HI
6197 (match_operand:V8HI 1 "register_operand" "")
6198 (match_operand:V8HI 2 "nonimmediate_operand" ""))
6199 (parallel [(const_int 0) (const_int 8)
6200 (const_int 1) (const_int 9)
6201 (const_int 2) (const_int 10)
6202 (const_int 3) (const_int 11)])))]
6203 "TARGET_SSE2"
6204 {
6205 emit_insn (gen_sse2_punpcklwd (operands[0], operands[1], operands[2]));
6206 DONE;
6207 })
6208
6209 (define_expand "vec_interleave_highv4si"
6210 [(set (match_operand:V4SI 0 "register_operand" "")
6211 (vec_select:V4SI
6212 (vec_concat:V8SI
6213 (match_operand:V4SI 1 "register_operand" "")
6214 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6215 (parallel [(const_int 2) (const_int 6)
6216 (const_int 3) (const_int 7)])))]
6217 "TARGET_SSE2"
6218 {
6219 emit_insn (gen_sse2_punpckhdq (operands[0], operands[1], operands[2]));
6220 DONE;
6221 })
6222
6223 (define_expand "vec_interleave_lowv4si"
6224 [(set (match_operand:V4SI 0 "register_operand" "")
6225 (vec_select:V4SI
6226 (vec_concat:V8SI
6227 (match_operand:V4SI 1 "register_operand" "")
6228 (match_operand:V4SI 2 "nonimmediate_operand" ""))
6229 (parallel [(const_int 0) (const_int 4)
6230 (const_int 1) (const_int 5)])))]
6231 "TARGET_SSE2"
6232 {
6233 emit_insn (gen_sse2_punpckldq (operands[0], operands[1], operands[2]));
6234 DONE;
6235 })
6236
6237 (define_expand "vec_interleave_highv2di"
6238 [(set (match_operand:V2DI 0 "register_operand" "")
6239 (vec_select:V2DI
6240 (vec_concat:V4DI
6241 (match_operand:V2DI 1 "register_operand" "")
6242 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6243 (parallel [(const_int 1)
6244 (const_int 3)])))]
6245 "TARGET_SSE2"
6246 {
6247 emit_insn (gen_sse2_punpckhqdq (operands[0], operands[1], operands[2]));
6248 DONE;
6249 })
6250
6251 (define_expand "vec_interleave_lowv2di"
6252 [(set (match_operand:V2DI 0 "register_operand" "")
6253 (vec_select:V2DI
6254 (vec_concat:V4DI
6255 (match_operand:V2DI 1 "register_operand" "")
6256 (match_operand:V2DI 2 "nonimmediate_operand" ""))
6257 (parallel [(const_int 0)
6258 (const_int 2)])))]
6259 "TARGET_SSE2"
6260 {
6261 emit_insn (gen_sse2_punpcklqdq (operands[0], operands[1], operands[2]));
6262 DONE;
6263 })
6264
6265 (define_expand "vec_interleave_highv4sf"
6266 [(set (match_operand:V4SF 0 "register_operand" "")
6267 (vec_select:V4SF
6268 (vec_concat:V8SF
6269 (match_operand:V4SF 1 "register_operand" "")
6270 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6271 (parallel [(const_int 2) (const_int 6)
6272 (const_int 3) (const_int 7)])))]
6273 "TARGET_SSE")
6274
6275 (define_expand "vec_interleave_lowv4sf"
6276 [(set (match_operand:V4SF 0 "register_operand" "")
6277 (vec_select:V4SF
6278 (vec_concat:V8SF
6279 (match_operand:V4SF 1 "register_operand" "")
6280 (match_operand:V4SF 2 "nonimmediate_operand" ""))
6281 (parallel [(const_int 0) (const_int 4)
6282 (const_int 1) (const_int 5)])))]
6283 "TARGET_SSE")
6284
6285 (define_expand "vec_interleave_highv2df"
6286 [(set (match_operand:V2DF 0 "register_operand" "")
6287 (vec_select:V2DF
6288 (vec_concat:V4DF
6289 (match_operand:V2DF 1 "register_operand" "")
6290 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6291 (parallel [(const_int 1)
6292 (const_int 3)])))]
6293 "TARGET_SSE2")
6294
6295 (define_expand "vec_interleave_lowv2df"
6296 [(set (match_operand:V2DF 0 "register_operand" "")
6297 (vec_select:V2DF
6298 (vec_concat:V4DF
6299 (match_operand:V2DF 1 "register_operand" "")
6300 (match_operand:V2DF 2 "nonimmediate_operand" ""))
6301 (parallel [(const_int 0)
6302 (const_int 2)])))]
6303 "TARGET_SSE2")
6304 6539
6305 (define_insn "*avx_packsswb" 6540 (define_insn "*avx_packsswb"
6306 [(set (match_operand:V16QI 0 "register_operand" "=x") 6541 [(set (match_operand:V16QI 0 "register_operand" "=x")
6307 (vec_concat:V16QI 6542 (vec_concat:V16QI
6308 (ss_truncate:V8QI 6543 (ss_truncate:V8QI
6378 "packuswb\t{%2, %0|%0, %2}" 6613 "packuswb\t{%2, %0|%0, %2}"
6379 [(set_attr "type" "sselog") 6614 [(set_attr "type" "sselog")
6380 (set_attr "prefix_data16" "1") 6615 (set_attr "prefix_data16" "1")
6381 (set_attr "mode" "TI")]) 6616 (set_attr "mode" "TI")])
6382 6617
6383 (define_insn "*avx_punpckhbw" 6618 (define_insn "*avx_interleave_highv16qi"
6384 [(set (match_operand:V16QI 0 "register_operand" "=x") 6619 [(set (match_operand:V16QI 0 "register_operand" "=x")
6385 (vec_select:V16QI 6620 (vec_select:V16QI
6386 (vec_concat:V32QI 6621 (vec_concat:V32QI
6387 (match_operand:V16QI 1 "register_operand" "x") 6622 (match_operand:V16QI 1 "register_operand" "x")
6388 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 6623 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6398 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}" 6633 "vpunpckhbw\t{%2, %1, %0|%0, %1, %2}"
6399 [(set_attr "type" "sselog") 6634 [(set_attr "type" "sselog")
6400 (set_attr "prefix" "vex") 6635 (set_attr "prefix" "vex")
6401 (set_attr "mode" "TI")]) 6636 (set_attr "mode" "TI")])
6402 6637
6403 (define_insn "sse2_punpckhbw" 6638 (define_insn "vec_interleave_highv16qi"
6404 [(set (match_operand:V16QI 0 "register_operand" "=x") 6639 [(set (match_operand:V16QI 0 "register_operand" "=x")
6405 (vec_select:V16QI 6640 (vec_select:V16QI
6406 (vec_concat:V32QI 6641 (vec_concat:V32QI
6407 (match_operand:V16QI 1 "register_operand" "0") 6642 (match_operand:V16QI 1 "register_operand" "0")
6408 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 6643 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6418 "punpckhbw\t{%2, %0|%0, %2}" 6653 "punpckhbw\t{%2, %0|%0, %2}"
6419 [(set_attr "type" "sselog") 6654 [(set_attr "type" "sselog")
6420 (set_attr "prefix_data16" "1") 6655 (set_attr "prefix_data16" "1")
6421 (set_attr "mode" "TI")]) 6656 (set_attr "mode" "TI")])
6422 6657
6423 (define_insn "*avx_punpcklbw" 6658 (define_insn "*avx_interleave_lowv16qi"
6424 [(set (match_operand:V16QI 0 "register_operand" "=x") 6659 [(set (match_operand:V16QI 0 "register_operand" "=x")
6425 (vec_select:V16QI 6660 (vec_select:V16QI
6426 (vec_concat:V32QI 6661 (vec_concat:V32QI
6427 (match_operand:V16QI 1 "register_operand" "x") 6662 (match_operand:V16QI 1 "register_operand" "x")
6428 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 6663 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6438 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}" 6673 "vpunpcklbw\t{%2, %1, %0|%0, %1, %2}"
6439 [(set_attr "type" "sselog") 6674 [(set_attr "type" "sselog")
6440 (set_attr "prefix" "vex") 6675 (set_attr "prefix" "vex")
6441 (set_attr "mode" "TI")]) 6676 (set_attr "mode" "TI")])
6442 6677
6443 (define_insn "sse2_punpcklbw" 6678 (define_insn "vec_interleave_lowv16qi"
6444 [(set (match_operand:V16QI 0 "register_operand" "=x") 6679 [(set (match_operand:V16QI 0 "register_operand" "=x")
6445 (vec_select:V16QI 6680 (vec_select:V16QI
6446 (vec_concat:V32QI 6681 (vec_concat:V32QI
6447 (match_operand:V16QI 1 "register_operand" "0") 6682 (match_operand:V16QI 1 "register_operand" "0")
6448 (match_operand:V16QI 2 "nonimmediate_operand" "xm")) 6683 (match_operand:V16QI 2 "nonimmediate_operand" "xm"))
6458 "punpcklbw\t{%2, %0|%0, %2}" 6693 "punpcklbw\t{%2, %0|%0, %2}"
6459 [(set_attr "type" "sselog") 6694 [(set_attr "type" "sselog")
6460 (set_attr "prefix_data16" "1") 6695 (set_attr "prefix_data16" "1")
6461 (set_attr "mode" "TI")]) 6696 (set_attr "mode" "TI")])
6462 6697
6463 (define_insn "*avx_punpckhwd" 6698 (define_insn "*avx_interleave_highv8hi"
6464 [(set (match_operand:V8HI 0 "register_operand" "=x") 6699 [(set (match_operand:V8HI 0 "register_operand" "=x")
6465 (vec_select:V8HI 6700 (vec_select:V8HI
6466 (vec_concat:V16HI 6701 (vec_concat:V16HI
6467 (match_operand:V8HI 1 "register_operand" "x") 6702 (match_operand:V8HI 1 "register_operand" "x")
6468 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 6703 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6474 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}" 6709 "vpunpckhwd\t{%2, %1, %0|%0, %1, %2}"
6475 [(set_attr "type" "sselog") 6710 [(set_attr "type" "sselog")
6476 (set_attr "prefix" "vex") 6711 (set_attr "prefix" "vex")
6477 (set_attr "mode" "TI")]) 6712 (set_attr "mode" "TI")])
6478 6713
6479 (define_insn "sse2_punpckhwd" 6714 (define_insn "vec_interleave_highv8hi"
6480 [(set (match_operand:V8HI 0 "register_operand" "=x") 6715 [(set (match_operand:V8HI 0 "register_operand" "=x")
6481 (vec_select:V8HI 6716 (vec_select:V8HI
6482 (vec_concat:V16HI 6717 (vec_concat:V16HI
6483 (match_operand:V8HI 1 "register_operand" "0") 6718 (match_operand:V8HI 1 "register_operand" "0")
6484 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 6719 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6490 "punpckhwd\t{%2, %0|%0, %2}" 6725 "punpckhwd\t{%2, %0|%0, %2}"
6491 [(set_attr "type" "sselog") 6726 [(set_attr "type" "sselog")
6492 (set_attr "prefix_data16" "1") 6727 (set_attr "prefix_data16" "1")
6493 (set_attr "mode" "TI")]) 6728 (set_attr "mode" "TI")])
6494 6729
6495 (define_insn "*avx_punpcklwd" 6730 (define_insn "*avx_interleave_lowv8hi"
6496 [(set (match_operand:V8HI 0 "register_operand" "=x") 6731 [(set (match_operand:V8HI 0 "register_operand" "=x")
6497 (vec_select:V8HI 6732 (vec_select:V8HI
6498 (vec_concat:V16HI 6733 (vec_concat:V16HI
6499 (match_operand:V8HI 1 "register_operand" "x") 6734 (match_operand:V8HI 1 "register_operand" "x")
6500 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 6735 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6506 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}" 6741 "vpunpcklwd\t{%2, %1, %0|%0, %1, %2}"
6507 [(set_attr "type" "sselog") 6742 [(set_attr "type" "sselog")
6508 (set_attr "prefix" "vex") 6743 (set_attr "prefix" "vex")
6509 (set_attr "mode" "TI")]) 6744 (set_attr "mode" "TI")])
6510 6745
6511 (define_insn "sse2_punpcklwd" 6746 (define_insn "vec_interleave_lowv8hi"
6512 [(set (match_operand:V8HI 0 "register_operand" "=x") 6747 [(set (match_operand:V8HI 0 "register_operand" "=x")
6513 (vec_select:V8HI 6748 (vec_select:V8HI
6514 (vec_concat:V16HI 6749 (vec_concat:V16HI
6515 (match_operand:V8HI 1 "register_operand" "0") 6750 (match_operand:V8HI 1 "register_operand" "0")
6516 (match_operand:V8HI 2 "nonimmediate_operand" "xm")) 6751 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
6522 "punpcklwd\t{%2, %0|%0, %2}" 6757 "punpcklwd\t{%2, %0|%0, %2}"
6523 [(set_attr "type" "sselog") 6758 [(set_attr "type" "sselog")
6524 (set_attr "prefix_data16" "1") 6759 (set_attr "prefix_data16" "1")
6525 (set_attr "mode" "TI")]) 6760 (set_attr "mode" "TI")])
6526 6761
6527 (define_insn "*avx_punpckhdq" 6762 (define_insn "*avx_interleave_highv4si"
6528 [(set (match_operand:V4SI 0 "register_operand" "=x") 6763 [(set (match_operand:V4SI 0 "register_operand" "=x")
6529 (vec_select:V4SI 6764 (vec_select:V4SI
6530 (vec_concat:V8SI 6765 (vec_concat:V8SI
6531 (match_operand:V4SI 1 "register_operand" "x") 6766 (match_operand:V4SI 1 "register_operand" "x")
6532 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 6767 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6536 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}" 6771 "vpunpckhdq\t{%2, %1, %0|%0, %1, %2}"
6537 [(set_attr "type" "sselog") 6772 [(set_attr "type" "sselog")
6538 (set_attr "prefix" "vex") 6773 (set_attr "prefix" "vex")
6539 (set_attr "mode" "TI")]) 6774 (set_attr "mode" "TI")])
6540 6775
6541 (define_insn "sse2_punpckhdq" 6776 (define_insn "vec_interleave_highv4si"
6542 [(set (match_operand:V4SI 0 "register_operand" "=x") 6777 [(set (match_operand:V4SI 0 "register_operand" "=x")
6543 (vec_select:V4SI 6778 (vec_select:V4SI
6544 (vec_concat:V8SI 6779 (vec_concat:V8SI
6545 (match_operand:V4SI 1 "register_operand" "0") 6780 (match_operand:V4SI 1 "register_operand" "0")
6546 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 6781 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6550 "punpckhdq\t{%2, %0|%0, %2}" 6785 "punpckhdq\t{%2, %0|%0, %2}"
6551 [(set_attr "type" "sselog") 6786 [(set_attr "type" "sselog")
6552 (set_attr "prefix_data16" "1") 6787 (set_attr "prefix_data16" "1")
6553 (set_attr "mode" "TI")]) 6788 (set_attr "mode" "TI")])
6554 6789
6555 (define_insn "*avx_punpckldq" 6790 (define_insn "*avx_interleave_lowv4si"
6556 [(set (match_operand:V4SI 0 "register_operand" "=x") 6791 [(set (match_operand:V4SI 0 "register_operand" "=x")
6557 (vec_select:V4SI 6792 (vec_select:V4SI
6558 (vec_concat:V8SI 6793 (vec_concat:V8SI
6559 (match_operand:V4SI 1 "register_operand" "x") 6794 (match_operand:V4SI 1 "register_operand" "x")
6560 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 6795 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6564 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}" 6799 "vpunpckldq\t{%2, %1, %0|%0, %1, %2}"
6565 [(set_attr "type" "sselog") 6800 [(set_attr "type" "sselog")
6566 (set_attr "prefix" "vex") 6801 (set_attr "prefix" "vex")
6567 (set_attr "mode" "TI")]) 6802 (set_attr "mode" "TI")])
6568 6803
6569 (define_insn "sse2_punpckldq" 6804 (define_insn "vec_interleave_lowv4si"
6570 [(set (match_operand:V4SI 0 "register_operand" "=x") 6805 [(set (match_operand:V4SI 0 "register_operand" "=x")
6571 (vec_select:V4SI 6806 (vec_select:V4SI
6572 (vec_concat:V8SI 6807 (vec_concat:V8SI
6573 (match_operand:V4SI 1 "register_operand" "0") 6808 (match_operand:V4SI 1 "register_operand" "0")
6574 (match_operand:V4SI 2 "nonimmediate_operand" "xm")) 6809 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
6578 "punpckldq\t{%2, %0|%0, %2}" 6813 "punpckldq\t{%2, %0|%0, %2}"
6579 [(set_attr "type" "sselog") 6814 [(set_attr "type" "sselog")
6580 (set_attr "prefix_data16" "1") 6815 (set_attr "prefix_data16" "1")
6581 (set_attr "mode" "TI")]) 6816 (set_attr "mode" "TI")])
6582 6817
6583 (define_insn "*avx_pinsr<avxmodesuffixs>" 6818 (define_insn "*avx_pinsr<ssevecsize>"
6584 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 6819 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
6585 (vec_merge:SSEMODE124 6820 (vec_merge:SSEMODE124
6586 (vec_duplicate:SSEMODE124 6821 (vec_duplicate:SSEMODE124
6587 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm")) 6822 (match_operand:<avxscalarmode> 2 "nonimmediate_operand" "rm"))
6588 (match_operand:SSEMODE124 1 "register_operand" "x") 6823 (match_operand:SSEMODE124 1 "register_operand" "x")
6589 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))] 6824 (match_operand:SI 3 "const_pow2_1_to_<pinsrbits>_operand" "n")))]
6590 "TARGET_AVX" 6825 "TARGET_AVX"
6591 { 6826 {
6592 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 6827 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6593 if (MEM_P (operands[2])) 6828 if (MEM_P (operands[2]))
6594 return "vpinsr<avxmodesuffixs>\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 6829 return "vpinsr<ssevecsize>\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6595 else 6830 else
6596 return "vpinsr<avxmodesuffixs>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}"; 6831 return "vpinsr<ssevecsize>\t{%3, %k2, %1, %0|%0, %1, %k2, %3}";
6597 } 6832 }
6598 [(set_attr "type" "sselog") 6833 [(set_attr "type" "sselog")
6834 (set (attr "prefix_extra")
6835 (if_then_else (match_operand:V8HI 0 "register_operand" "")
6836 (const_string "0")
6837 (const_string "1")))
6838 (set_attr "length_immediate" "1")
6599 (set_attr "prefix" "vex") 6839 (set_attr "prefix" "vex")
6600 (set_attr "mode" "TI")]) 6840 (set_attr "mode" "TI")])
6601 6841
6602 (define_insn "*sse4_1_pinsrb" 6842 (define_insn "*sse4_1_pinsrb"
6603 [(set (match_operand:V16QI 0 "register_operand" "=x") 6843 [(set (match_operand:V16QI 0 "register_operand" "=x")
6614 else 6854 else
6615 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}"; 6855 return "pinsrb\t{%3, %k2, %0|%0, %k2, %3}";
6616 } 6856 }
6617 [(set_attr "type" "sselog") 6857 [(set_attr "type" "sselog")
6618 (set_attr "prefix_extra" "1") 6858 (set_attr "prefix_extra" "1")
6859 (set_attr "length_immediate" "1")
6619 (set_attr "mode" "TI")]) 6860 (set_attr "mode" "TI")])
6620 6861
6621 (define_insn "*sse2_pinsrw" 6862 (define_insn "*sse2_pinsrw"
6622 [(set (match_operand:V8HI 0 "register_operand" "=x") 6863 [(set (match_operand:V8HI 0 "register_operand" "=x")
6623 (vec_merge:V8HI 6864 (vec_merge:V8HI
6633 else 6874 else
6634 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}"; 6875 return "pinsrw\t{%3, %k2, %0|%0, %k2, %3}";
6635 } 6876 }
6636 [(set_attr "type" "sselog") 6877 [(set_attr "type" "sselog")
6637 (set_attr "prefix_data16" "1") 6878 (set_attr "prefix_data16" "1")
6879 (set_attr "length_immediate" "1")
6638 (set_attr "mode" "TI")]) 6880 (set_attr "mode" "TI")])
6639 6881
6640 ;; It must come before sse2_loadld since it is preferred. 6882 ;; It must come before sse2_loadld since it is preferred.
6641 (define_insn "*sse4_1_pinsrd" 6883 (define_insn "*sse4_1_pinsrd"
6642 [(set (match_operand:V4SI 0 "register_operand" "=x") 6884 [(set (match_operand:V4SI 0 "register_operand" "=x")
6650 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 6892 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6651 return "pinsrd\t{%3, %2, %0|%0, %2, %3}"; 6893 return "pinsrd\t{%3, %2, %0|%0, %2, %3}";
6652 } 6894 }
6653 [(set_attr "type" "sselog") 6895 [(set_attr "type" "sselog")
6654 (set_attr "prefix_extra" "1") 6896 (set_attr "prefix_extra" "1")
6897 (set_attr "length_immediate" "1")
6655 (set_attr "mode" "TI")]) 6898 (set_attr "mode" "TI")])
6656 6899
6657 (define_insn "*avx_pinsrq" 6900 (define_insn "*avx_pinsrq"
6658 [(set (match_operand:V2DI 0 "register_operand" "=x") 6901 [(set (match_operand:V2DI 0 "register_operand" "=x")
6659 (vec_merge:V2DI 6902 (vec_merge:V2DI
6665 { 6908 {
6666 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 6909 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6667 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 6910 return "vpinsrq\t{%3, %2, %1, %0|%0, %1, %2, %3}";
6668 } 6911 }
6669 [(set_attr "type" "sselog") 6912 [(set_attr "type" "sselog")
6913 (set_attr "prefix_extra" "1")
6914 (set_attr "length_immediate" "1")
6670 (set_attr "prefix" "vex") 6915 (set_attr "prefix" "vex")
6671 (set_attr "mode" "TI")]) 6916 (set_attr "mode" "TI")])
6672 6917
6673 (define_insn "*sse4_1_pinsrq" 6918 (define_insn "*sse4_1_pinsrq"
6674 [(set (match_operand:V2DI 0 "register_operand" "=x") 6919 [(set (match_operand:V2DI 0 "register_operand" "=x")
6681 { 6926 {
6682 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3]))); 6927 operands[3] = GEN_INT (exact_log2 (INTVAL (operands[3])));
6683 return "pinsrq\t{%3, %2, %0|%0, %2, %3}"; 6928 return "pinsrq\t{%3, %2, %0|%0, %2, %3}";
6684 } 6929 }
6685 [(set_attr "type" "sselog") 6930 [(set_attr "type" "sselog")
6686 (set_attr "prefix_extra" "1") 6931 (set_attr "prefix_rex" "1")
6932 (set_attr "prefix_extra" "1")
6933 (set_attr "length_immediate" "1")
6687 (set_attr "mode" "TI")]) 6934 (set_attr "mode" "TI")])
6688 6935
6689 (define_insn "*sse4_1_pextrb" 6936 (define_insn "*sse4_1_pextrb"
6690 [(set (match_operand:SI 0 "register_operand" "=r") 6937 [(set (match_operand:SI 0 "register_operand" "=r")
6691 (zero_extend:SI 6938 (zero_extend:SI
6694 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))] 6941 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")]))))]
6695 "TARGET_SSE4_1" 6942 "TARGET_SSE4_1"
6696 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" 6943 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6697 [(set_attr "type" "sselog") 6944 [(set_attr "type" "sselog")
6698 (set_attr "prefix_extra" "1") 6945 (set_attr "prefix_extra" "1")
6946 (set_attr "length_immediate" "1")
6699 (set_attr "prefix" "maybe_vex") 6947 (set_attr "prefix" "maybe_vex")
6700 (set_attr "mode" "TI")]) 6948 (set_attr "mode" "TI")])
6701 6949
6702 (define_insn "*sse4_1_pextrb_memory" 6950 (define_insn "*sse4_1_pextrb_memory"
6703 [(set (match_operand:QI 0 "memory_operand" "=m") 6951 [(set (match_operand:QI 0 "memory_operand" "=m")
6706 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))] 6954 (parallel [(match_operand:SI 2 "const_0_to_15_operand" "n")])))]
6707 "TARGET_SSE4_1" 6955 "TARGET_SSE4_1"
6708 "%vpextrb\t{%2, %1, %0|%0, %1, %2}" 6956 "%vpextrb\t{%2, %1, %0|%0, %1, %2}"
6709 [(set_attr "type" "sselog") 6957 [(set_attr "type" "sselog")
6710 (set_attr "prefix_extra" "1") 6958 (set_attr "prefix_extra" "1")
6959 (set_attr "length_immediate" "1")
6711 (set_attr "prefix" "maybe_vex") 6960 (set_attr "prefix" "maybe_vex")
6712 (set_attr "mode" "TI")]) 6961 (set_attr "mode" "TI")])
6713 6962
6714 (define_insn "*sse2_pextrw" 6963 (define_insn "*sse2_pextrw"
6715 [(set (match_operand:SI 0 "register_operand" "=r") 6964 [(set (match_operand:SI 0 "register_operand" "=r")
6719 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))] 6968 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")]))))]
6720 "TARGET_SSE2" 6969 "TARGET_SSE2"
6721 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" 6970 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6722 [(set_attr "type" "sselog") 6971 [(set_attr "type" "sselog")
6723 (set_attr "prefix_data16" "1") 6972 (set_attr "prefix_data16" "1")
6973 (set_attr "length_immediate" "1")
6724 (set_attr "prefix" "maybe_vex") 6974 (set_attr "prefix" "maybe_vex")
6725 (set_attr "mode" "TI")]) 6975 (set_attr "mode" "TI")])
6726 6976
6727 (define_insn "*sse4_1_pextrw_memory" 6977 (define_insn "*sse4_1_pextrw_memory"
6728 [(set (match_operand:HI 0 "memory_operand" "=m") 6978 [(set (match_operand:HI 0 "memory_operand" "=m")
6731 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))] 6981 (parallel [(match_operand:SI 2 "const_0_to_7_operand" "n")])))]
6732 "TARGET_SSE4_1" 6982 "TARGET_SSE4_1"
6733 "%vpextrw\t{%2, %1, %0|%0, %1, %2}" 6983 "%vpextrw\t{%2, %1, %0|%0, %1, %2}"
6734 [(set_attr "type" "sselog") 6984 [(set_attr "type" "sselog")
6735 (set_attr "prefix_extra" "1") 6985 (set_attr "prefix_extra" "1")
6986 (set_attr "length_immediate" "1")
6736 (set_attr "prefix" "maybe_vex") 6987 (set_attr "prefix" "maybe_vex")
6737 (set_attr "mode" "TI")]) 6988 (set_attr "mode" "TI")])
6738 6989
6739 (define_insn "*sse4_1_pextrd" 6990 (define_insn "*sse4_1_pextrd"
6740 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm") 6991 [(set (match_operand:SI 0 "nonimmediate_operand" "=rm")
6743 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))] 6994 (parallel [(match_operand:SI 2 "const_0_to_3_operand" "n")])))]
6744 "TARGET_SSE4_1" 6995 "TARGET_SSE4_1"
6745 "%vpextrd\t{%2, %1, %0|%0, %1, %2}" 6996 "%vpextrd\t{%2, %1, %0|%0, %1, %2}"
6746 [(set_attr "type" "sselog") 6997 [(set_attr "type" "sselog")
6747 (set_attr "prefix_extra" "1") 6998 (set_attr "prefix_extra" "1")
6999 (set_attr "length_immediate" "1")
6748 (set_attr "prefix" "maybe_vex") 7000 (set_attr "prefix" "maybe_vex")
6749 (set_attr "mode" "TI")]) 7001 (set_attr "mode" "TI")])
6750 7002
6751 ;; It must come before *vec_extractv2di_1_sse since it is preferred. 7003 ;; It must come before *vec_extractv2di_1_sse since it is preferred.
6752 (define_insn "*sse4_1_pextrq" 7004 (define_insn "*sse4_1_pextrq"
6755 (match_operand:V2DI 1 "register_operand" "x") 7007 (match_operand:V2DI 1 "register_operand" "x")
6756 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))] 7008 (parallel [(match_operand:SI 2 "const_0_to_1_operand" "n")])))]
6757 "TARGET_SSE4_1 && TARGET_64BIT" 7009 "TARGET_SSE4_1 && TARGET_64BIT"
6758 "%vpextrq\t{%2, %1, %0|%0, %1, %2}" 7010 "%vpextrq\t{%2, %1, %0|%0, %1, %2}"
6759 [(set_attr "type" "sselog") 7011 [(set_attr "type" "sselog")
6760 (set_attr "prefix_extra" "1") 7012 (set_attr "prefix_rex" "1")
7013 (set_attr "prefix_extra" "1")
7014 (set_attr "length_immediate" "1")
6761 (set_attr "prefix" "maybe_vex") 7015 (set_attr "prefix" "maybe_vex")
6762 (set_attr "mode" "TI")]) 7016 (set_attr "mode" "TI")])
6763 7017
6764 (define_expand "sse2_pshufd" 7018 (define_expand "sse2_pshufd"
6765 [(match_operand:V4SI 0 "register_operand" "") 7019 [(match_operand:V4SI 0 "register_operand" "")
6795 7049
6796 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}"; 7050 return "%vpshufd\t{%2, %1, %0|%0, %1, %2}";
6797 } 7051 }
6798 [(set_attr "type" "sselog1") 7052 [(set_attr "type" "sselog1")
6799 (set_attr "prefix_data16" "1") 7053 (set_attr "prefix_data16" "1")
6800 (set_attr "prefix" "vex") 7054 (set_attr "prefix" "maybe_vex")
7055 (set_attr "length_immediate" "1")
6801 (set_attr "mode" "TI")]) 7056 (set_attr "mode" "TI")])
6802 7057
6803 (define_expand "sse2_pshuflw" 7058 (define_expand "sse2_pshuflw"
6804 [(match_operand:V8HI 0 "register_operand" "") 7059 [(match_operand:V8HI 0 "register_operand" "")
6805 (match_operand:V8HI 1 "nonimmediate_operand" "") 7060 (match_operand:V8HI 1 "nonimmediate_operand" "")
6837 operands[2] = GEN_INT (mask); 7092 operands[2] = GEN_INT (mask);
6838 7093
6839 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}"; 7094 return "%vpshuflw\t{%2, %1, %0|%0, %1, %2}";
6840 } 7095 }
6841 [(set_attr "type" "sselog") 7096 [(set_attr "type" "sselog")
7097 (set_attr "prefix_data16" "0")
6842 (set_attr "prefix_rep" "1") 7098 (set_attr "prefix_rep" "1")
6843 (set_attr "prefix" "maybe_vex") 7099 (set_attr "prefix" "maybe_vex")
7100 (set_attr "length_immediate" "1")
6844 (set_attr "mode" "TI")]) 7101 (set_attr "mode" "TI")])
6845 7102
6846 (define_expand "sse2_pshufhw" 7103 (define_expand "sse2_pshufhw"
6847 [(match_operand:V8HI 0 "register_operand" "") 7104 [(match_operand:V8HI 0 "register_operand" "")
6848 (match_operand:V8HI 1 "nonimmediate_operand" "") 7105 (match_operand:V8HI 1 "nonimmediate_operand" "")
6881 7138
6882 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}"; 7139 return "%vpshufhw\t{%2, %1, %0|%0, %1, %2}";
6883 } 7140 }
6884 [(set_attr "type" "sselog") 7141 [(set_attr "type" "sselog")
6885 (set_attr "prefix_rep" "1") 7142 (set_attr "prefix_rep" "1")
7143 (set_attr "prefix_data16" "0")
6886 (set_attr "prefix" "maybe_vex") 7144 (set_attr "prefix" "maybe_vex")
7145 (set_attr "length_immediate" "1")
6887 (set_attr "mode" "TI")]) 7146 (set_attr "mode" "TI")])
6888 7147
6889 (define_expand "sse2_loadd" 7148 (define_expand "sse2_loadd"
6890 [(set (match_operand:V4SI 0 "register_operand" "") 7149 [(set (match_operand:V4SI 0 "register_operand" "")
6891 (vec_merge:V4SI 7150 (vec_merge:V4SI
7017 vmovhps\t{%1, %0|%0, %1} 7276 vmovhps\t{%1, %0|%0, %1}
7018 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7277 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7019 vmovq\t{%H1, %0|%0, %H1} 7278 vmovq\t{%H1, %0|%0, %H1}
7020 vmov{q}\t{%H1, %0|%0, %H1}" 7279 vmov{q}\t{%H1, %0|%0, %H1}"
7021 [(set_attr "type" "ssemov,sseishft,ssemov,imov") 7280 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7281 (set_attr "length_immediate" "*,1,*,*")
7022 (set_attr "memory" "*,none,*,*") 7282 (set_attr "memory" "*,none,*,*")
7023 (set_attr "prefix" "vex") 7283 (set_attr "prefix" "vex")
7024 (set_attr "mode" "V2SF,TI,TI,DI")]) 7284 (set_attr "mode" "V2SF,TI,TI,DI")])
7025 7285
7026 (define_insn "*vec_extractv2di_1_rex64" 7286 (define_insn "*vec_extractv2di_1_rex64"
7033 movhps\t{%1, %0|%0, %1} 7293 movhps\t{%1, %0|%0, %1}
7034 psrldq\t{$8, %0|%0, 8} 7294 psrldq\t{$8, %0|%0, 8}
7035 movq\t{%H1, %0|%0, %H1} 7295 movq\t{%H1, %0|%0, %H1}
7036 mov{q}\t{%H1, %0|%0, %H1}" 7296 mov{q}\t{%H1, %0|%0, %H1}"
7037 [(set_attr "type" "ssemov,sseishft,ssemov,imov") 7297 [(set_attr "type" "ssemov,sseishft,ssemov,imov")
7298 (set_attr "length_immediate" "*,1,*,*")
7299 (set_attr "atom_unit" "*,sishuf,*,*")
7038 (set_attr "memory" "*,none,*,*") 7300 (set_attr "memory" "*,none,*,*")
7039 (set_attr "mode" "V2SF,TI,TI,DI")]) 7301 (set_attr "mode" "V2SF,TI,TI,DI")])
7040 7302
7041 (define_insn "*vec_extractv2di_1_avx" 7303 (define_insn "*vec_extractv2di_1_avx"
7042 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x") 7304 [(set (match_operand:DI 0 "nonimmediate_operand" "=m,x,x")
7049 "@ 7311 "@
7050 vmovhps\t{%1, %0|%0, %1} 7312 vmovhps\t{%1, %0|%0, %1}
7051 vpsrldq\t{$8, %1, %0|%0, %1, 8} 7313 vpsrldq\t{$8, %1, %0|%0, %1, 8}
7052 vmovq\t{%H1, %0|%0, %H1}" 7314 vmovq\t{%H1, %0|%0, %H1}"
7053 [(set_attr "type" "ssemov,sseishft,ssemov") 7315 [(set_attr "type" "ssemov,sseishft,ssemov")
7316 (set_attr "length_immediate" "*,1,*")
7054 (set_attr "memory" "*,none,*") 7317 (set_attr "memory" "*,none,*")
7055 (set_attr "prefix" "vex") 7318 (set_attr "prefix" "vex")
7056 (set_attr "mode" "V2SF,TI,TI")]) 7319 (set_attr "mode" "V2SF,TI,TI")])
7057 7320
7058 (define_insn "*vec_extractv2di_1_sse2" 7321 (define_insn "*vec_extractv2di_1_sse2"
7065 "@ 7328 "@
7066 movhps\t{%1, %0|%0, %1} 7329 movhps\t{%1, %0|%0, %1}
7067 psrldq\t{$8, %0|%0, 8} 7330 psrldq\t{$8, %0|%0, 8}
7068 movq\t{%H1, %0|%0, %H1}" 7331 movq\t{%H1, %0|%0, %H1}"
7069 [(set_attr "type" "ssemov,sseishft,ssemov") 7332 [(set_attr "type" "ssemov,sseishft,ssemov")
7333 (set_attr "length_immediate" "*,1,*")
7334 (set_attr "atom_unit" "*,sishuf,*")
7070 (set_attr "memory" "*,none,*") 7335 (set_attr "memory" "*,none,*")
7071 (set_attr "mode" "V2SF,TI,TI")]) 7336 (set_attr "mode" "V2SF,TI,TI")])
7072 7337
7073 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva 7338 ;; Not sure this is ever used, but it doesn't hurt to have it. -aoliva
7074 (define_insn "*vec_extractv2di_1_sse" 7339 (define_insn "*vec_extractv2di_1_sse"
7083 movhlps\t{%1, %0|%0, %1} 7348 movhlps\t{%1, %0|%0, %1}
7084 movlps\t{%H1, %0|%0, %H1}" 7349 movlps\t{%H1, %0|%0, %H1}"
7085 [(set_attr "type" "ssemov") 7350 [(set_attr "type" "ssemov")
7086 (set_attr "mode" "V2SF,V4SF,V2SF")]) 7351 (set_attr "mode" "V2SF,V4SF,V2SF")])
7087 7352
7353 (define_insn "*vec_dupv4si_avx"
7354 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
7355 (vec_duplicate:V4SI
7356 (match_operand:SI 1 "register_operand" "x,m")))]
7357 "TARGET_AVX"
7358 "@
7359 vpshufd\t{$0, %1, %0|%0, %1, 0}
7360 vbroadcastss\t{%1, %0|%0, %1}"
7361 [(set_attr "type" "sselog1,ssemov")
7362 (set_attr "length_immediate" "1,0")
7363 (set_attr "prefix_extra" "0,1")
7364 (set_attr "prefix" "vex")
7365 (set_attr "mode" "TI,V4SF")])
7366
7088 (define_insn "*vec_dupv4si" 7367 (define_insn "*vec_dupv4si"
7089 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x") 7368 [(set (match_operand:V4SI 0 "register_operand" "=Y2,x")
7090 (vec_duplicate:V4SI 7369 (vec_duplicate:V4SI
7091 (match_operand:SI 1 "register_operand" " Y2,0")))] 7370 (match_operand:SI 1 "register_operand" " Y2,0")))]
7092 "TARGET_SSE" 7371 "TARGET_SSE"
7093 "@ 7372 "@
7094 %vpshufd\t{$0, %1, %0|%0, %1, 0} 7373 %vpshufd\t{$0, %1, %0|%0, %1, 0}
7095 shufps\t{$0, %0, %0|%0, %0, 0}" 7374 shufps\t{$0, %0, %0|%0, %0, 0}"
7096 [(set_attr "type" "sselog1") 7375 [(set_attr "type" "sselog1")
7097 (set_attr "prefix" "maybe_vex,orig") 7376 (set_attr "length_immediate" "1")
7098 (set_attr "mode" "TI,V4SF")]) 7377 (set_attr "mode" "TI,V4SF")])
7099 7378
7100 (define_insn "*vec_dupv2di_avx" 7379 (define_insn "*vec_dupv2di_avx"
7101 [(set (match_operand:V2DI 0 "register_operand" "=x") 7380 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7102 (vec_duplicate:V2DI 7381 (vec_duplicate:V2DI
7103 (match_operand:DI 1 "register_operand" "x")))] 7382 (match_operand:DI 1 "nonimmediate_operand" " x,m")))]
7104 "TARGET_AVX" 7383 "TARGET_AVX"
7105 "vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}" 7384 "@
7385 vpunpcklqdq\t{%1, %1, %0|%0, %1, %1}
7386 vmovddup\t{%1, %0|%0, %1}"
7106 [(set_attr "type" "sselog1") 7387 [(set_attr "type" "sselog1")
7107 (set_attr "prefix" "vex") 7388 (set_attr "prefix" "vex")
7108 (set_attr "mode" "TI")]) 7389 (set_attr "mode" "TI,DF")])
7390
7391 (define_insn "*vec_dupv2di_sse3"
7392 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
7393 (vec_duplicate:V2DI
7394 (match_operand:DI 1 "nonimmediate_operand" " 0,m")))]
7395 "TARGET_SSE3"
7396 "@
7397 punpcklqdq\t%0, %0
7398 movddup\t{%1, %0|%0, %1}"
7399 [(set_attr "type" "sselog1")
7400 (set_attr "mode" "TI,DF")])
7109 7401
7110 (define_insn "*vec_dupv2di" 7402 (define_insn "*vec_dupv2di"
7111 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x") 7403 [(set (match_operand:V2DI 0 "register_operand" "=Y2,x")
7112 (vec_duplicate:V2DI 7404 (vec_duplicate:V2DI
7113 (match_operand:DI 1 "register_operand" " 0 ,0")))] 7405 (match_operand:DI 1 "register_operand" " 0 ,0")))]
7129 vpunpckldq\t{%2, %1, %0|%0, %1, %2} 7421 vpunpckldq\t{%2, %1, %0|%0, %1, %2}
7130 vmovd\t{%1, %0|%0, %1} 7422 vmovd\t{%1, %0|%0, %1}
7131 punpckldq\t{%2, %0|%0, %2} 7423 punpckldq\t{%2, %0|%0, %2}
7132 movd\t{%1, %0|%0, %1}" 7424 movd\t{%1, %0|%0, %1}"
7133 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") 7425 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7426 (set_attr "prefix_extra" "1,*,*,*,*")
7427 (set_attr "length_immediate" "1,*,*,*,*")
7134 (set (attr "prefix") 7428 (set (attr "prefix")
7135 (if_then_else (eq_attr "alternative" "3,4") 7429 (if_then_else (eq_attr "alternative" "3,4")
7136 (const_string "orig") 7430 (const_string "orig")
7137 (const_string "vex"))) 7431 (const_string "vex")))
7138 (set_attr "mode" "TI,TI,TI,DI,DI")]) 7432 (set_attr "mode" "TI,TI,TI,DI,DI")])
7149 movd\t{%1, %0|%0, %1} 7443 movd\t{%1, %0|%0, %1}
7150 punpckldq\t{%2, %0|%0, %2} 7444 punpckldq\t{%2, %0|%0, %2}
7151 movd\t{%1, %0|%0, %1}" 7445 movd\t{%1, %0|%0, %1}"
7152 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov") 7446 [(set_attr "type" "sselog,sselog,ssemov,mmxcvt,mmxmov")
7153 (set_attr "prefix_extra" "1,*,*,*,*") 7447 (set_attr "prefix_extra" "1,*,*,*,*")
7448 (set_attr "length_immediate" "1,*,*,*,*")
7154 (set_attr "mode" "TI,TI,TI,DI,DI")]) 7449 (set_attr "mode" "TI,TI,TI,DI,DI")])
7155 7450
7156 ;; ??? In theory we can match memory for the MMX alternative, but allowing 7451 ;; ??? In theory we can match memory for the MMX alternative, but allowing
7157 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE 7452 ;; nonimmediate_operand for operand 2 and *not* allowing memory for the SSE
7158 ;; alternatives pretty much forces the MMX alternative to be chosen. 7453 ;; alternatives pretty much forces the MMX alternative to be chosen.
7255 vmovq\t{%1, %0|%0, %1} 7550 vmovq\t{%1, %0|%0, %1}
7256 movq2dq\t{%1, %0|%0, %1} 7551 movq2dq\t{%1, %0|%0, %1}
7257 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2} 7552 vpunpcklqdq\t{%2, %1, %0|%0, %1, %2}
7258 vmovhps\t{%2, %1, %0|%0, %1, %2}" 7553 vmovhps\t{%2, %1, %0|%0, %1, %2}"
7259 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov") 7554 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov")
7555 (set_attr "prefix_extra" "1,*,*,*,*,*")
7556 (set_attr "length_immediate" "1,*,*,*,*,*")
7260 (set (attr "prefix") 7557 (set (attr "prefix")
7261 (if_then_else (eq_attr "alternative" "3") 7558 (if_then_else (eq_attr "alternative" "3")
7262 (const_string "orig") 7559 (const_string "orig")
7263 (const_string "vex"))) 7560 (const_string "vex")))
7264 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")]) 7561 (set_attr "mode" "TI,TI,TI,TI,TI,V2SF")])
7276 movq2dq\t{%1, %0|%0, %1} 7573 movq2dq\t{%1, %0|%0, %1}
7277 punpcklqdq\t{%2, %0|%0, %2} 7574 punpcklqdq\t{%2, %0|%0, %2}
7278 movlhps\t{%2, %0|%0, %2} 7575 movlhps\t{%2, %0|%0, %2}
7279 movhps\t{%2, %0|%0, %2}" 7576 movhps\t{%2, %0|%0, %2}"
7280 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov") 7577 [(set_attr "type" "sselog,ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7578 (set_attr "prefix_rex" "1,*,1,*,*,*,*")
7281 (set_attr "prefix_extra" "1,*,*,*,*,*,*") 7579 (set_attr "prefix_extra" "1,*,*,*,*,*,*")
7580 (set_attr "length_immediate" "1,*,*,*,*,*,*")
7282 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")]) 7581 (set_attr "mode" "TI,TI,TI,TI,TI,V4SF,V2SF")])
7283 7582
7284 (define_insn "*vec_concatv2di_rex64_sse" 7583 (define_insn "*vec_concatv2di_rex64_sse"
7285 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x") 7584 [(set (match_operand:V2DI 0 "register_operand" "=Y2 ,Yi,!Y2,Y2,x,x")
7286 (vec_concat:V2DI 7585 (vec_concat:V2DI
7293 movq2dq\t{%1, %0|%0, %1} 7592 movq2dq\t{%1, %0|%0, %1}
7294 punpcklqdq\t{%2, %0|%0, %2} 7593 punpcklqdq\t{%2, %0|%0, %2}
7295 movlhps\t{%2, %0|%0, %2} 7594 movlhps\t{%2, %0|%0, %2}
7296 movhps\t{%2, %0|%0, %2}" 7595 movhps\t{%2, %0|%0, %2}"
7297 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov") 7596 [(set_attr "type" "ssemov,ssemov,ssemov,sselog,ssemov,ssemov")
7597 (set_attr "prefix_rex" "*,1,*,*,*,*")
7298 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")]) 7598 (set_attr "mode" "TI,TI,TI,TI,V4SF,V2SF")])
7299 7599
7300 (define_expand "vec_unpacku_hi_v16qi" 7600 (define_expand "vec_unpacku_hi_v16qi"
7301 [(match_operand:V8HI 0 "register_operand" "") 7601 [(match_operand:V8HI 0 "register_operand" "")
7302 (match_operand:V16QI 1 "register_operand" "")] 7602 (match_operand:V16QI 1 "register_operand" "")]
7303 "TARGET_SSE2" 7603 "TARGET_SSE2"
7304 { 7604 {
7305 if (TARGET_SSE4_1) 7605 if (TARGET_SSE4_1)
7306 ix86_expand_sse4_unpack (operands, true, true); 7606 ix86_expand_sse4_unpack (operands, true, true);
7307 else if (TARGET_SSE5)
7308 ix86_expand_sse5_unpack (operands, true, true);
7309 else 7607 else
7310 ix86_expand_sse_unpack (operands, true, true); 7608 ix86_expand_sse_unpack (operands, true, true);
7311 DONE; 7609 DONE;
7312 }) 7610 })
7313 7611
7316 (match_operand:V16QI 1 "register_operand" "")] 7614 (match_operand:V16QI 1 "register_operand" "")]
7317 "TARGET_SSE2" 7615 "TARGET_SSE2"
7318 { 7616 {
7319 if (TARGET_SSE4_1) 7617 if (TARGET_SSE4_1)
7320 ix86_expand_sse4_unpack (operands, false, true); 7618 ix86_expand_sse4_unpack (operands, false, true);
7321 else if (TARGET_SSE5)
7322 ix86_expand_sse5_unpack (operands, false, true);
7323 else 7619 else
7324 ix86_expand_sse_unpack (operands, false, true); 7620 ix86_expand_sse_unpack (operands, false, true);
7325 DONE; 7621 DONE;
7326 }) 7622 })
7327 7623
7330 (match_operand:V16QI 1 "register_operand" "")] 7626 (match_operand:V16QI 1 "register_operand" "")]
7331 "TARGET_SSE2" 7627 "TARGET_SSE2"
7332 { 7628 {
7333 if (TARGET_SSE4_1) 7629 if (TARGET_SSE4_1)
7334 ix86_expand_sse4_unpack (operands, true, false); 7630 ix86_expand_sse4_unpack (operands, true, false);
7335 else if (TARGET_SSE5)
7336 ix86_expand_sse5_unpack (operands, true, false);
7337 else 7631 else
7338 ix86_expand_sse_unpack (operands, true, false); 7632 ix86_expand_sse_unpack (operands, true, false);
7339 DONE; 7633 DONE;
7340 }) 7634 })
7341 7635
7344 (match_operand:V16QI 1 "register_operand" "")] 7638 (match_operand:V16QI 1 "register_operand" "")]
7345 "TARGET_SSE2" 7639 "TARGET_SSE2"
7346 { 7640 {
7347 if (TARGET_SSE4_1) 7641 if (TARGET_SSE4_1)
7348 ix86_expand_sse4_unpack (operands, false, false); 7642 ix86_expand_sse4_unpack (operands, false, false);
7349 else if (TARGET_SSE5)
7350 ix86_expand_sse5_unpack (operands, false, false);
7351 else 7643 else
7352 ix86_expand_sse_unpack (operands, false, false); 7644 ix86_expand_sse_unpack (operands, false, false);
7353 DONE; 7645 DONE;
7354 }) 7646 })
7355 7647
7358 (match_operand:V8HI 1 "register_operand" "")] 7650 (match_operand:V8HI 1 "register_operand" "")]
7359 "TARGET_SSE2" 7651 "TARGET_SSE2"
7360 { 7652 {
7361 if (TARGET_SSE4_1) 7653 if (TARGET_SSE4_1)
7362 ix86_expand_sse4_unpack (operands, true, true); 7654 ix86_expand_sse4_unpack (operands, true, true);
7363 else if (TARGET_SSE5)
7364 ix86_expand_sse5_unpack (operands, true, true);
7365 else 7655 else
7366 ix86_expand_sse_unpack (operands, true, true); 7656 ix86_expand_sse_unpack (operands, true, true);
7367 DONE; 7657 DONE;
7368 }) 7658 })
7369 7659
7372 (match_operand:V8HI 1 "register_operand" "")] 7662 (match_operand:V8HI 1 "register_operand" "")]
7373 "TARGET_SSE2" 7663 "TARGET_SSE2"
7374 { 7664 {
7375 if (TARGET_SSE4_1) 7665 if (TARGET_SSE4_1)
7376 ix86_expand_sse4_unpack (operands, false, true); 7666 ix86_expand_sse4_unpack (operands, false, true);
7377 else if (TARGET_SSE5)
7378 ix86_expand_sse5_unpack (operands, false, true);
7379 else 7667 else
7380 ix86_expand_sse_unpack (operands, false, true); 7668 ix86_expand_sse_unpack (operands, false, true);
7381 DONE; 7669 DONE;
7382 }) 7670 })
7383 7671
7386 (match_operand:V8HI 1 "register_operand" "")] 7674 (match_operand:V8HI 1 "register_operand" "")]
7387 "TARGET_SSE2" 7675 "TARGET_SSE2"
7388 { 7676 {
7389 if (TARGET_SSE4_1) 7677 if (TARGET_SSE4_1)
7390 ix86_expand_sse4_unpack (operands, true, false); 7678 ix86_expand_sse4_unpack (operands, true, false);
7391 else if (TARGET_SSE5)
7392 ix86_expand_sse5_unpack (operands, true, false);
7393 else 7679 else
7394 ix86_expand_sse_unpack (operands, true, false); 7680 ix86_expand_sse_unpack (operands, true, false);
7395 DONE; 7681 DONE;
7396 }) 7682 })
7397 7683
7400 (match_operand:V8HI 1 "register_operand" "")] 7686 (match_operand:V8HI 1 "register_operand" "")]
7401 "TARGET_SSE2" 7687 "TARGET_SSE2"
7402 { 7688 {
7403 if (TARGET_SSE4_1) 7689 if (TARGET_SSE4_1)
7404 ix86_expand_sse4_unpack (operands, false, false); 7690 ix86_expand_sse4_unpack (operands, false, false);
7405 else if (TARGET_SSE5)
7406 ix86_expand_sse5_unpack (operands, false, false);
7407 else 7691 else
7408 ix86_expand_sse_unpack (operands, false, false); 7692 ix86_expand_sse_unpack (operands, false, false);
7409 DONE; 7693 DONE;
7410 }) 7694 })
7411 7695
7414 (match_operand:V4SI 1 "register_operand" "")] 7698 (match_operand:V4SI 1 "register_operand" "")]
7415 "TARGET_SSE2" 7699 "TARGET_SSE2"
7416 { 7700 {
7417 if (TARGET_SSE4_1) 7701 if (TARGET_SSE4_1)
7418 ix86_expand_sse4_unpack (operands, true, true); 7702 ix86_expand_sse4_unpack (operands, true, true);
7419 else if (TARGET_SSE5)
7420 ix86_expand_sse5_unpack (operands, true, true);
7421 else 7703 else
7422 ix86_expand_sse_unpack (operands, true, true); 7704 ix86_expand_sse_unpack (operands, true, true);
7423 DONE; 7705 DONE;
7424 }) 7706 })
7425 7707
7428 (match_operand:V4SI 1 "register_operand" "")] 7710 (match_operand:V4SI 1 "register_operand" "")]
7429 "TARGET_SSE2" 7711 "TARGET_SSE2"
7430 { 7712 {
7431 if (TARGET_SSE4_1) 7713 if (TARGET_SSE4_1)
7432 ix86_expand_sse4_unpack (operands, false, true); 7714 ix86_expand_sse4_unpack (operands, false, true);
7433 else if (TARGET_SSE5)
7434 ix86_expand_sse5_unpack (operands, false, true);
7435 else 7715 else
7436 ix86_expand_sse_unpack (operands, false, true); 7716 ix86_expand_sse_unpack (operands, false, true);
7437 DONE; 7717 DONE;
7438 }) 7718 })
7439 7719
7442 (match_operand:V4SI 1 "register_operand" "")] 7722 (match_operand:V4SI 1 "register_operand" "")]
7443 "TARGET_SSE2" 7723 "TARGET_SSE2"
7444 { 7724 {
7445 if (TARGET_SSE4_1) 7725 if (TARGET_SSE4_1)
7446 ix86_expand_sse4_unpack (operands, true, false); 7726 ix86_expand_sse4_unpack (operands, true, false);
7447 else if (TARGET_SSE5)
7448 ix86_expand_sse5_unpack (operands, true, false);
7449 else 7727 else
7450 ix86_expand_sse_unpack (operands, true, false); 7728 ix86_expand_sse_unpack (operands, true, false);
7451 DONE; 7729 DONE;
7452 }) 7730 })
7453 7731
7456 (match_operand:V4SI 1 "register_operand" "")] 7734 (match_operand:V4SI 1 "register_operand" "")]
7457 "TARGET_SSE2" 7735 "TARGET_SSE2"
7458 { 7736 {
7459 if (TARGET_SSE4_1) 7737 if (TARGET_SSE4_1)
7460 ix86_expand_sse4_unpack (operands, false, false); 7738 ix86_expand_sse4_unpack (operands, false, false);
7461 else if (TARGET_SSE5)
7462 ix86_expand_sse5_unpack (operands, false, false);
7463 else 7739 else
7464 ix86_expand_sse_unpack (operands, false, false); 7740 ix86_expand_sse_unpack (operands, false, false);
7465 DONE; 7741 DONE;
7466 }) 7742 })
7467 7743
7622 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] 7898 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
7623 UNSPEC_PSADBW))] 7899 UNSPEC_PSADBW))]
7624 "TARGET_SSE2" 7900 "TARGET_SSE2"
7625 "psadbw\t{%2, %0|%0, %2}" 7901 "psadbw\t{%2, %0|%0, %2}"
7626 [(set_attr "type" "sseiadd") 7902 [(set_attr "type" "sseiadd")
7903 (set_attr "atom_unit" "simul")
7627 (set_attr "prefix_data16" "1") 7904 (set_attr "prefix_data16" "1")
7628 (set_attr "mode" "TI")]) 7905 (set_attr "mode" "TI")])
7629 7906
7630 (define_insn "avx_movmskp<avxmodesuffixf2c>256" 7907 (define_insn "avx_movmskp<avxmodesuffixf2c>256"
7631 [(set (match_operand:SI 0 "register_operand" "=r") 7908 [(set (match_operand:SI 0 "register_operand" "=r")
7643 (unspec:SI 7920 (unspec:SI
7644 [(match_operand:SSEMODEF2P 1 "register_operand" "x")] 7921 [(match_operand:SSEMODEF2P 1 "register_operand" "x")]
7645 UNSPEC_MOVMSK))] 7922 UNSPEC_MOVMSK))]
7646 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)" 7923 "SSE_VEC_FLOAT_MODE_P (<MODE>mode)"
7647 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}" 7924 "%vmovmskp<ssemodesuffixf2c>\t{%1, %0|%0, %1}"
7648 [(set_attr "type" "ssecvt") 7925 [(set_attr "type" "ssemov")
7649 (set_attr "prefix" "maybe_vex") 7926 (set_attr "prefix" "maybe_vex")
7650 (set_attr "mode" "<MODE>")]) 7927 (set_attr "mode" "<MODE>")])
7651 7928
7652 (define_insn "sse2_pmovmskb" 7929 (define_insn "sse2_pmovmskb"
7653 [(set (match_operand:SI 0 "register_operand" "=r") 7930 [(set (match_operand:SI 0 "register_operand" "=r")
7654 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")] 7931 (unspec:SI [(match_operand:V16QI 1 "register_operand" "x")]
7655 UNSPEC_MOVMSK))] 7932 UNSPEC_MOVMSK))]
7656 "TARGET_SSE2" 7933 "TARGET_SSE2"
7657 "%vpmovmskb\t{%1, %0|%0, %1}" 7934 "%vpmovmskb\t{%1, %0|%0, %1}"
7658 [(set_attr "type" "ssecvt") 7935 [(set_attr "type" "ssemov")
7659 (set_attr "prefix_data16" "1") 7936 (set_attr "prefix_data16" "1")
7660 (set_attr "prefix" "maybe_vex") 7937 (set_attr "prefix" "maybe_vex")
7661 (set_attr "mode" "SI")]) 7938 (set_attr "mode" "SI")])
7662 7939
7663 (define_expand "sse2_maskmovdqu" 7940 (define_expand "sse2_maskmovdqu"
7676 (mem:V16QI (match_dup 0))] 7953 (mem:V16QI (match_dup 0))]
7677 UNSPEC_MASKMOV))] 7954 UNSPEC_MASKMOV))]
7678 "TARGET_SSE2 && !TARGET_64BIT" 7955 "TARGET_SSE2 && !TARGET_64BIT"
7679 ;; @@@ check ordering of operands in intel/nonintel syntax 7956 ;; @@@ check ordering of operands in intel/nonintel syntax
7680 "%vmaskmovdqu\t{%2, %1|%1, %2}" 7957 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7681 [(set_attr "type" "ssecvt") 7958 [(set_attr "type" "ssemov")
7682 (set_attr "prefix_data16" "1") 7959 (set_attr "prefix_data16" "1")
7960 ;; The implicit %rdi operand confuses default length_vex computation.
7961 (set_attr "length_vex" "3")
7683 (set_attr "prefix" "maybe_vex") 7962 (set_attr "prefix" "maybe_vex")
7684 (set_attr "mode" "TI")]) 7963 (set_attr "mode" "TI")])
7685 7964
7686 (define_insn "*sse2_maskmovdqu_rex64" 7965 (define_insn "*sse2_maskmovdqu_rex64"
7687 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D")) 7966 [(set (mem:V16QI (match_operand:DI 0 "register_operand" "D"))
7690 (mem:V16QI (match_dup 0))] 7969 (mem:V16QI (match_dup 0))]
7691 UNSPEC_MASKMOV))] 7970 UNSPEC_MASKMOV))]
7692 "TARGET_SSE2 && TARGET_64BIT" 7971 "TARGET_SSE2 && TARGET_64BIT"
7693 ;; @@@ check ordering of operands in intel/nonintel syntax 7972 ;; @@@ check ordering of operands in intel/nonintel syntax
7694 "%vmaskmovdqu\t{%2, %1|%1, %2}" 7973 "%vmaskmovdqu\t{%2, %1|%1, %2}"
7695 [(set_attr "type" "ssecvt") 7974 [(set_attr "type" "ssemov")
7696 (set_attr "prefix_data16" "1") 7975 (set_attr "prefix_data16" "1")
7976 ;; The implicit %rdi operand confuses default length_vex computation.
7977 (set (attr "length_vex")
7978 (symbol_ref ("REGNO (operands[2]) >= FIRST_REX_SSE_REG ? 3 + 1 : 2 + 1")))
7697 (set_attr "prefix" "maybe_vex") 7979 (set_attr "prefix" "maybe_vex")
7698 (set_attr "mode" "TI")]) 7980 (set_attr "mode" "TI")])
7699 7981
7700 (define_insn "sse_ldmxcsr" 7982 (define_insn "sse_ldmxcsr"
7701 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")] 7983 [(unspec_volatile [(match_operand:SI 0 "memory_operand" "m")]
7702 UNSPECV_LDMXCSR)] 7984 UNSPECV_LDMXCSR)]
7703 "TARGET_SSE" 7985 "TARGET_SSE"
7704 "%vldmxcsr\t%0" 7986 "%vldmxcsr\t%0"
7705 [(set_attr "type" "sse") 7987 [(set_attr "type" "sse")
7988 (set_attr "atom_sse_attr" "mxcsr")
7706 (set_attr "prefix" "maybe_vex") 7989 (set_attr "prefix" "maybe_vex")
7707 (set_attr "memory" "load")]) 7990 (set_attr "memory" "load")])
7708 7991
7709 (define_insn "sse_stmxcsr" 7992 (define_insn "sse_stmxcsr"
7710 [(set (match_operand:SI 0 "memory_operand" "=m") 7993 [(set (match_operand:SI 0 "memory_operand" "=m")
7711 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))] 7994 (unspec_volatile:SI [(const_int 0)] UNSPECV_STMXCSR))]
7712 "TARGET_SSE" 7995 "TARGET_SSE"
7713 "%vstmxcsr\t%0" 7996 "%vstmxcsr\t%0"
7714 [(set_attr "type" "sse") 7997 [(set_attr "type" "sse")
7998 (set_attr "atom_sse_attr" "mxcsr")
7715 (set_attr "prefix" "maybe_vex") 7999 (set_attr "prefix" "maybe_vex")
7716 (set_attr "memory" "store")]) 8000 (set_attr "memory" "store")])
7717 8001
7718 (define_expand "sse_sfence" 8002 (define_expand "sse_sfence"
7719 [(set (match_dup 0) 8003 [(set (match_dup 0)
7728 [(set (match_operand:BLK 0 "" "") 8012 [(set (match_operand:BLK 0 "" "")
7729 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))] 8013 (unspec:BLK [(match_dup 0)] UNSPEC_SFENCE))]
7730 "TARGET_SSE || TARGET_3DNOW_A" 8014 "TARGET_SSE || TARGET_3DNOW_A"
7731 "sfence" 8015 "sfence"
7732 [(set_attr "type" "sse") 8016 [(set_attr "type" "sse")
8017 (set_attr "length_address" "0")
8018 (set_attr "atom_sse_attr" "fence")
7733 (set_attr "memory" "unknown")]) 8019 (set_attr "memory" "unknown")])
7734 8020
7735 (define_insn "sse2_clflush" 8021 (define_insn "sse2_clflush"
7736 [(unspec_volatile [(match_operand 0 "address_operand" "p")] 8022 [(unspec_volatile [(match_operand 0 "address_operand" "p")]
7737 UNSPECV_CLFLUSH)] 8023 UNSPECV_CLFLUSH)]
7738 "TARGET_SSE2" 8024 "TARGET_SSE2"
7739 "clflush\t%a0" 8025 "clflush\t%a0"
7740 [(set_attr "type" "sse") 8026 [(set_attr "type" "sse")
8027 (set_attr "atom_sse_attr" "fence")
7741 (set_attr "memory" "unknown")]) 8028 (set_attr "memory" "unknown")])
7742 8029
7743 (define_expand "sse2_mfence" 8030 (define_expand "sse2_mfence"
7744 [(set (match_dup 0) 8031 [(set (match_dup 0)
7745 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 8032 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7753 [(set (match_operand:BLK 0 "" "") 8040 [(set (match_operand:BLK 0 "" "")
7754 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))] 8041 (unspec:BLK [(match_dup 0)] UNSPEC_MFENCE))]
7755 "TARGET_64BIT || TARGET_SSE2" 8042 "TARGET_64BIT || TARGET_SSE2"
7756 "mfence" 8043 "mfence"
7757 [(set_attr "type" "sse") 8044 [(set_attr "type" "sse")
8045 (set_attr "length_address" "0")
8046 (set_attr "atom_sse_attr" "fence")
7758 (set_attr "memory" "unknown")]) 8047 (set_attr "memory" "unknown")])
7759 8048
7760 (define_expand "sse2_lfence" 8049 (define_expand "sse2_lfence"
7761 [(set (match_dup 0) 8050 [(set (match_dup 0)
7762 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 8051 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7770 [(set (match_operand:BLK 0 "" "") 8059 [(set (match_operand:BLK 0 "" "")
7771 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))] 8060 (unspec:BLK [(match_dup 0)] UNSPEC_LFENCE))]
7772 "TARGET_SSE2" 8061 "TARGET_SSE2"
7773 "lfence" 8062 "lfence"
7774 [(set_attr "type" "sse") 8063 [(set_attr "type" "sse")
8064 (set_attr "length_address" "0")
8065 (set_attr "atom_sse_attr" "lfence")
7775 (set_attr "memory" "unknown")]) 8066 (set_attr "memory" "unknown")])
7776 8067
7777 (define_insn "sse3_mwait" 8068 (define_insn "sse3_mwait"
7778 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a") 8069 [(unspec_volatile [(match_operand:SI 0 "register_operand" "a")
7779 (match_operand:SI 1 "register_operand" "c")] 8070 (match_operand:SI 1 "register_operand" "c")]
7850 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8141 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7851 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8142 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7852 "TARGET_AVX" 8143 "TARGET_AVX"
7853 "vphaddw\t{%2, %1, %0|%0, %1, %2}" 8144 "vphaddw\t{%2, %1, %0|%0, %1, %2}"
7854 [(set_attr "type" "sseiadd") 8145 [(set_attr "type" "sseiadd")
8146 (set_attr "prefix_extra" "1")
7855 (set_attr "prefix" "vex") 8147 (set_attr "prefix" "vex")
7856 (set_attr "mode" "TI")]) 8148 (set_attr "mode" "TI")])
7857 8149
7858 (define_insn "ssse3_phaddwv8hi3" 8150 (define_insn "ssse3_phaddwv8hi3"
7859 [(set (match_operand:V8HI 0 "register_operand" "=x") 8151 [(set (match_operand:V8HI 0 "register_operand" "=x")
7893 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8185 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
7894 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8186 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
7895 "TARGET_SSSE3" 8187 "TARGET_SSSE3"
7896 "phaddw\t{%2, %0|%0, %2}" 8188 "phaddw\t{%2, %0|%0, %2}"
7897 [(set_attr "type" "sseiadd") 8189 [(set_attr "type" "sseiadd")
8190 (set_attr "atom_unit" "complex")
7898 (set_attr "prefix_data16" "1") 8191 (set_attr "prefix_data16" "1")
7899 (set_attr "prefix_extra" "1") 8192 (set_attr "prefix_extra" "1")
7900 (set_attr "mode" "TI")]) 8193 (set_attr "mode" "TI")])
7901 8194
7902 (define_insn "ssse3_phaddwv4hi3" 8195 (define_insn "ssse3_phaddwv4hi3"
7921 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8214 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
7922 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8215 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
7923 "TARGET_SSSE3" 8216 "TARGET_SSSE3"
7924 "phaddw\t{%2, %0|%0, %2}" 8217 "phaddw\t{%2, %0|%0, %2}"
7925 [(set_attr "type" "sseiadd") 8218 [(set_attr "type" "sseiadd")
7926 (set_attr "prefix_extra" "1") 8219 (set_attr "atom_unit" "complex")
8220 (set_attr "prefix_extra" "1")
8221 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
7927 (set_attr "mode" "DI")]) 8222 (set_attr "mode" "DI")])
7928 8223
7929 (define_insn "*avx_phadddv4si3" 8224 (define_insn "*avx_phadddv4si3"
7930 [(set (match_operand:V4SI 0 "register_operand" "=x") 8225 [(set (match_operand:V4SI 0 "register_operand" "=x")
7931 (vec_concat:V4SI 8226 (vec_concat:V4SI
7948 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8243 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7949 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8244 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7950 "TARGET_AVX" 8245 "TARGET_AVX"
7951 "vphaddd\t{%2, %1, %0|%0, %1, %2}" 8246 "vphaddd\t{%2, %1, %0|%0, %1, %2}"
7952 [(set_attr "type" "sseiadd") 8247 [(set_attr "type" "sseiadd")
8248 (set_attr "prefix_extra" "1")
7953 (set_attr "prefix" "vex") 8249 (set_attr "prefix" "vex")
7954 (set_attr "mode" "TI")]) 8250 (set_attr "mode" "TI")])
7955 8251
7956 (define_insn "ssse3_phadddv4si3" 8252 (define_insn "ssse3_phadddv4si3"
7957 [(set (match_operand:V4SI 0 "register_operand" "=x") 8253 [(set (match_operand:V4SI 0 "register_operand" "=x")
7975 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8271 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
7976 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8272 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
7977 "TARGET_SSSE3" 8273 "TARGET_SSSE3"
7978 "phaddd\t{%2, %0|%0, %2}" 8274 "phaddd\t{%2, %0|%0, %2}"
7979 [(set_attr "type" "sseiadd") 8275 [(set_attr "type" "sseiadd")
8276 (set_attr "atom_unit" "complex")
7980 (set_attr "prefix_data16" "1") 8277 (set_attr "prefix_data16" "1")
7981 (set_attr "prefix_extra" "1") 8278 (set_attr "prefix_extra" "1")
7982 (set_attr "mode" "TI")]) 8279 (set_attr "mode" "TI")])
7983 8280
7984 (define_insn "ssse3_phadddv2si3" 8281 (define_insn "ssse3_phadddv2si3"
7995 (parallel [(const_int 0)])) 8292 (parallel [(const_int 0)]))
7996 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 8293 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
7997 "TARGET_SSSE3" 8294 "TARGET_SSSE3"
7998 "phaddd\t{%2, %0|%0, %2}" 8295 "phaddd\t{%2, %0|%0, %2}"
7999 [(set_attr "type" "sseiadd") 8296 [(set_attr "type" "sseiadd")
8000 (set_attr "prefix_extra" "1") 8297 (set_attr "atom_unit" "complex")
8298 (set_attr "prefix_extra" "1")
8299 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8001 (set_attr "mode" "DI")]) 8300 (set_attr "mode" "DI")])
8002 8301
8003 (define_insn "*avx_phaddswv8hi3" 8302 (define_insn "*avx_phaddswv8hi3"
8004 [(set (match_operand:V8HI 0 "register_operand" "=x") 8303 [(set (match_operand:V8HI 0 "register_operand" "=x")
8005 (vec_concat:V8HI 8304 (vec_concat:V8HI
8038 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8337 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8039 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8338 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8040 "TARGET_AVX" 8339 "TARGET_AVX"
8041 "vphaddsw\t{%2, %1, %0|%0, %1, %2}" 8340 "vphaddsw\t{%2, %1, %0|%0, %1, %2}"
8042 [(set_attr "type" "sseiadd") 8341 [(set_attr "type" "sseiadd")
8342 (set_attr "prefix_extra" "1")
8043 (set_attr "prefix" "vex") 8343 (set_attr "prefix" "vex")
8044 (set_attr "mode" "TI")]) 8344 (set_attr "mode" "TI")])
8045 8345
8046 (define_insn "ssse3_phaddswv8hi3" 8346 (define_insn "ssse3_phaddswv8hi3"
8047 [(set (match_operand:V8HI 0 "register_operand" "=x") 8347 [(set (match_operand:V8HI 0 "register_operand" "=x")
8081 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8381 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8082 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8382 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8083 "TARGET_SSSE3" 8383 "TARGET_SSSE3"
8084 "phaddsw\t{%2, %0|%0, %2}" 8384 "phaddsw\t{%2, %0|%0, %2}"
8085 [(set_attr "type" "sseiadd") 8385 [(set_attr "type" "sseiadd")
8386 (set_attr "atom_unit" "complex")
8086 (set_attr "prefix_data16" "1") 8387 (set_attr "prefix_data16" "1")
8087 (set_attr "prefix_extra" "1") 8388 (set_attr "prefix_extra" "1")
8088 (set_attr "mode" "TI")]) 8389 (set_attr "mode" "TI")])
8089 8390
8090 (define_insn "ssse3_phaddswv4hi3" 8391 (define_insn "ssse3_phaddswv4hi3"
8109 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8410 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8110 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8411 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8111 "TARGET_SSSE3" 8412 "TARGET_SSSE3"
8112 "phaddsw\t{%2, %0|%0, %2}" 8413 "phaddsw\t{%2, %0|%0, %2}"
8113 [(set_attr "type" "sseiadd") 8414 [(set_attr "type" "sseiadd")
8114 (set_attr "prefix_extra" "1") 8415 (set_attr "atom_unit" "complex")
8416 (set_attr "prefix_extra" "1")
8417 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8115 (set_attr "mode" "DI")]) 8418 (set_attr "mode" "DI")])
8116 8419
8117 (define_insn "*avx_phsubwv8hi3" 8420 (define_insn "*avx_phsubwv8hi3"
8118 [(set (match_operand:V8HI 0 "register_operand" "=x") 8421 [(set (match_operand:V8HI 0 "register_operand" "=x")
8119 (vec_concat:V8HI 8422 (vec_concat:V8HI
8152 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8455 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8153 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8456 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8154 "TARGET_AVX" 8457 "TARGET_AVX"
8155 "vphsubw\t{%2, %1, %0|%0, %1, %2}" 8458 "vphsubw\t{%2, %1, %0|%0, %1, %2}"
8156 [(set_attr "type" "sseiadd") 8459 [(set_attr "type" "sseiadd")
8460 (set_attr "prefix_extra" "1")
8157 (set_attr "prefix" "vex") 8461 (set_attr "prefix" "vex")
8158 (set_attr "mode" "TI")]) 8462 (set_attr "mode" "TI")])
8159 8463
8160 (define_insn "ssse3_phsubwv8hi3" 8464 (define_insn "ssse3_phsubwv8hi3"
8161 [(set (match_operand:V8HI 0 "register_operand" "=x") 8465 [(set (match_operand:V8HI 0 "register_operand" "=x")
8195 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8499 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8196 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8500 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8197 "TARGET_SSSE3" 8501 "TARGET_SSSE3"
8198 "phsubw\t{%2, %0|%0, %2}" 8502 "phsubw\t{%2, %0|%0, %2}"
8199 [(set_attr "type" "sseiadd") 8503 [(set_attr "type" "sseiadd")
8504 (set_attr "atom_unit" "complex")
8200 (set_attr "prefix_data16" "1") 8505 (set_attr "prefix_data16" "1")
8201 (set_attr "prefix_extra" "1") 8506 (set_attr "prefix_extra" "1")
8202 (set_attr "mode" "TI")]) 8507 (set_attr "mode" "TI")])
8203 8508
8204 (define_insn "ssse3_phsubwv4hi3" 8509 (define_insn "ssse3_phsubwv4hi3"
8223 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8528 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8224 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8529 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8225 "TARGET_SSSE3" 8530 "TARGET_SSSE3"
8226 "phsubw\t{%2, %0|%0, %2}" 8531 "phsubw\t{%2, %0|%0, %2}"
8227 [(set_attr "type" "sseiadd") 8532 [(set_attr "type" "sseiadd")
8228 (set_attr "prefix_extra" "1") 8533 (set_attr "atom_unit" "complex")
8534 (set_attr "prefix_extra" "1")
8535 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8229 (set_attr "mode" "DI")]) 8536 (set_attr "mode" "DI")])
8230 8537
8231 (define_insn "*avx_phsubdv4si3" 8538 (define_insn "*avx_phsubdv4si3"
8232 [(set (match_operand:V4SI 0 "register_operand" "=x") 8539 [(set (match_operand:V4SI 0 "register_operand" "=x")
8233 (vec_concat:V4SI 8540 (vec_concat:V4SI
8250 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8557 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8251 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8558 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8252 "TARGET_AVX" 8559 "TARGET_AVX"
8253 "vphsubd\t{%2, %1, %0|%0, %1, %2}" 8560 "vphsubd\t{%2, %1, %0|%0, %1, %2}"
8254 [(set_attr "type" "sseiadd") 8561 [(set_attr "type" "sseiadd")
8562 (set_attr "prefix_extra" "1")
8255 (set_attr "prefix" "vex") 8563 (set_attr "prefix" "vex")
8256 (set_attr "mode" "TI")]) 8564 (set_attr "mode" "TI")])
8257 8565
8258 (define_insn "ssse3_phsubdv4si3" 8566 (define_insn "ssse3_phsubdv4si3"
8259 [(set (match_operand:V4SI 0 "register_operand" "=x") 8567 [(set (match_operand:V4SI 0 "register_operand" "=x")
8277 (vec_select:SI (match_dup 2) (parallel [(const_int 2)])) 8585 (vec_select:SI (match_dup 2) (parallel [(const_int 2)]))
8278 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))] 8586 (vec_select:SI (match_dup 2) (parallel [(const_int 3)]))))))]
8279 "TARGET_SSSE3" 8587 "TARGET_SSSE3"
8280 "phsubd\t{%2, %0|%0, %2}" 8588 "phsubd\t{%2, %0|%0, %2}"
8281 [(set_attr "type" "sseiadd") 8589 [(set_attr "type" "sseiadd")
8590 (set_attr "atom_unit" "complex")
8282 (set_attr "prefix_data16" "1") 8591 (set_attr "prefix_data16" "1")
8283 (set_attr "prefix_extra" "1") 8592 (set_attr "prefix_extra" "1")
8284 (set_attr "mode" "TI")]) 8593 (set_attr "mode" "TI")])
8285 8594
8286 (define_insn "ssse3_phsubdv2si3" 8595 (define_insn "ssse3_phsubdv2si3"
8297 (parallel [(const_int 0)])) 8606 (parallel [(const_int 0)]))
8298 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))] 8607 (vec_select:SI (match_dup 2) (parallel [(const_int 1)])))))]
8299 "TARGET_SSSE3" 8608 "TARGET_SSSE3"
8300 "phsubd\t{%2, %0|%0, %2}" 8609 "phsubd\t{%2, %0|%0, %2}"
8301 [(set_attr "type" "sseiadd") 8610 [(set_attr "type" "sseiadd")
8302 (set_attr "prefix_extra" "1") 8611 (set_attr "atom_unit" "complex")
8612 (set_attr "prefix_extra" "1")
8613 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8303 (set_attr "mode" "DI")]) 8614 (set_attr "mode" "DI")])
8304 8615
8305 (define_insn "*avx_phsubswv8hi3" 8616 (define_insn "*avx_phsubswv8hi3"
8306 [(set (match_operand:V8HI 0 "register_operand" "=x") 8617 [(set (match_operand:V8HI 0 "register_operand" "=x")
8307 (vec_concat:V8HI 8618 (vec_concat:V8HI
8340 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8651 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8341 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8652 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8342 "TARGET_AVX" 8653 "TARGET_AVX"
8343 "vphsubsw\t{%2, %1, %0|%0, %1, %2}" 8654 "vphsubsw\t{%2, %1, %0|%0, %1, %2}"
8344 [(set_attr "type" "sseiadd") 8655 [(set_attr "type" "sseiadd")
8656 (set_attr "prefix_extra" "1")
8345 (set_attr "prefix" "vex") 8657 (set_attr "prefix" "vex")
8346 (set_attr "mode" "TI")]) 8658 (set_attr "mode" "TI")])
8347 8659
8348 (define_insn "ssse3_phsubswv8hi3" 8660 (define_insn "ssse3_phsubswv8hi3"
8349 [(set (match_operand:V8HI 0 "register_operand" "=x") 8661 [(set (match_operand:V8HI 0 "register_operand" "=x")
8383 (vec_select:HI (match_dup 2) (parallel [(const_int 6)])) 8695 (vec_select:HI (match_dup 2) (parallel [(const_int 6)]))
8384 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))] 8696 (vec_select:HI (match_dup 2) (parallel [(const_int 7)])))))))]
8385 "TARGET_SSSE3" 8697 "TARGET_SSSE3"
8386 "phsubsw\t{%2, %0|%0, %2}" 8698 "phsubsw\t{%2, %0|%0, %2}"
8387 [(set_attr "type" "sseiadd") 8699 [(set_attr "type" "sseiadd")
8700 (set_attr "atom_unit" "complex")
8388 (set_attr "prefix_data16" "1") 8701 (set_attr "prefix_data16" "1")
8389 (set_attr "prefix_extra" "1") 8702 (set_attr "prefix_extra" "1")
8390 (set_attr "mode" "TI")]) 8703 (set_attr "mode" "TI")])
8391 8704
8392 (define_insn "ssse3_phsubswv4hi3" 8705 (define_insn "ssse3_phsubswv4hi3"
8411 (vec_select:HI (match_dup 2) (parallel [(const_int 2)])) 8724 (vec_select:HI (match_dup 2) (parallel [(const_int 2)]))
8412 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))] 8725 (vec_select:HI (match_dup 2) (parallel [(const_int 3)]))))))]
8413 "TARGET_SSSE3" 8726 "TARGET_SSSE3"
8414 "phsubsw\t{%2, %0|%0, %2}" 8727 "phsubsw\t{%2, %0|%0, %2}"
8415 [(set_attr "type" "sseiadd") 8728 [(set_attr "type" "sseiadd")
8416 (set_attr "prefix_extra" "1") 8729 (set_attr "atom_unit" "complex")
8730 (set_attr "prefix_extra" "1")
8731 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8417 (set_attr "mode" "DI")]) 8732 (set_attr "mode" "DI")])
8418 8733
8419 (define_insn "*avx_pmaddubsw128" 8734 (define_insn "*avx_pmaddubsw128"
8420 [(set (match_operand:V8HI 0 "register_operand" "=x") 8735 [(set (match_operand:V8HI 0 "register_operand" "=x")
8421 (ss_plus:V8HI 8736 (ss_plus:V8HI
8464 (const_int 13) 8779 (const_int 13)
8465 (const_int 15)]))))))] 8780 (const_int 15)]))))))]
8466 "TARGET_AVX" 8781 "TARGET_AVX"
8467 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}" 8782 "vpmaddubsw\t{%2, %1, %0|%0, %1, %2}"
8468 [(set_attr "type" "sseiadd") 8783 [(set_attr "type" "sseiadd")
8784 (set_attr "prefix_extra" "1")
8469 (set_attr "prefix" "vex") 8785 (set_attr "prefix" "vex")
8470 (set_attr "mode" "TI")]) 8786 (set_attr "mode" "TI")])
8471 8787
8472 (define_insn "ssse3_pmaddubsw128" 8788 (define_insn "ssse3_pmaddubsw128"
8473 [(set (match_operand:V8HI 0 "register_operand" "=x") 8789 [(set (match_operand:V8HI 0 "register_operand" "=x")
8517 (const_int 13) 8833 (const_int 13)
8518 (const_int 15)]))))))] 8834 (const_int 15)]))))))]
8519 "TARGET_SSSE3" 8835 "TARGET_SSSE3"
8520 "pmaddubsw\t{%2, %0|%0, %2}" 8836 "pmaddubsw\t{%2, %0|%0, %2}"
8521 [(set_attr "type" "sseiadd") 8837 [(set_attr "type" "sseiadd")
8838 (set_attr "atom_unit" "simul")
8522 (set_attr "prefix_data16" "1") 8839 (set_attr "prefix_data16" "1")
8523 (set_attr "prefix_extra" "1") 8840 (set_attr "prefix_extra" "1")
8524 (set_attr "mode" "TI")]) 8841 (set_attr "mode" "TI")])
8525 8842
8526 (define_insn "ssse3_pmaddubsw" 8843 (define_insn "ssse3_pmaddubsw"
8555 (const_int 5) 8872 (const_int 5)
8556 (const_int 7)]))))))] 8873 (const_int 7)]))))))]
8557 "TARGET_SSSE3" 8874 "TARGET_SSSE3"
8558 "pmaddubsw\t{%2, %0|%0, %2}" 8875 "pmaddubsw\t{%2, %0|%0, %2}"
8559 [(set_attr "type" "sseiadd") 8876 [(set_attr "type" "sseiadd")
8560 (set_attr "prefix_extra" "1") 8877 (set_attr "atom_unit" "simul")
8878 (set_attr "prefix_extra" "1")
8879 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8561 (set_attr "mode" "DI")]) 8880 (set_attr "mode" "DI")])
8562 8881
8563 (define_expand "ssse3_pmulhrswv8hi3" 8882 (define_expand "ssse3_pmulhrswv8hi3"
8564 [(set (match_operand:V8HI 0 "register_operand" "") 8883 [(set (match_operand:V8HI 0 "register_operand" "")
8565 (truncate:V8HI 8884 (truncate:V8HI
8598 (const_int 1) (const_int 1)])) 8917 (const_int 1) (const_int 1)]))
8599 (const_int 1))))] 8918 (const_int 1))))]
8600 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)" 8919 "TARGET_AVX && ix86_binary_operator_ok (MULT, V8HImode, operands)"
8601 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}" 8920 "vpmulhrsw\t{%2, %1, %0|%0, %1, %2}"
8602 [(set_attr "type" "sseimul") 8921 [(set_attr "type" "sseimul")
8922 (set_attr "prefix_extra" "1")
8603 (set_attr "prefix" "vex") 8923 (set_attr "prefix" "vex")
8604 (set_attr "mode" "TI")]) 8924 (set_attr "mode" "TI")])
8605 8925
8606 (define_insn "*ssse3_pmulhrswv8hi3" 8926 (define_insn "*ssse3_pmulhrswv8hi3"
8607 [(set (match_operand:V8HI 0 "register_operand" "=x") 8927 [(set (match_operand:V8HI 0 "register_operand" "=x")
8662 (const_int 1))))] 8982 (const_int 1))))]
8663 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)" 8983 "TARGET_SSSE3 && ix86_binary_operator_ok (MULT, V4HImode, operands)"
8664 "pmulhrsw\t{%2, %0|%0, %2}" 8984 "pmulhrsw\t{%2, %0|%0, %2}"
8665 [(set_attr "type" "sseimul") 8985 [(set_attr "type" "sseimul")
8666 (set_attr "prefix_extra" "1") 8986 (set_attr "prefix_extra" "1")
8987 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8667 (set_attr "mode" "DI")]) 8988 (set_attr "mode" "DI")])
8668 8989
8669 (define_insn "*avx_pshufbv16qi3" 8990 (define_insn "*avx_pshufbv16qi3"
8670 [(set (match_operand:V16QI 0 "register_operand" "=x") 8991 [(set (match_operand:V16QI 0 "register_operand" "=x")
8671 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x") 8992 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "x")
8672 (match_operand:V16QI 2 "nonimmediate_operand" "xm")] 8993 (match_operand:V16QI 2 "nonimmediate_operand" "xm")]
8673 UNSPEC_PSHUFB))] 8994 UNSPEC_PSHUFB))]
8674 "TARGET_AVX" 8995 "TARGET_AVX"
8675 "vpshufb\t{%2, %1, %0|%0, %1, %2}"; 8996 "vpshufb\t{%2, %1, %0|%0, %1, %2}";
8676 [(set_attr "type" "sselog1") 8997 [(set_attr "type" "sselog1")
8998 (set_attr "prefix_extra" "1")
8677 (set_attr "prefix" "vex") 8999 (set_attr "prefix" "vex")
8678 (set_attr "mode" "TI")]) 9000 (set_attr "mode" "TI")])
8679 9001
8680 (define_insn "ssse3_pshufbv16qi3" 9002 (define_insn "ssse3_pshufbv16qi3"
8681 [(set (match_operand:V16QI 0 "register_operand" "=x") 9003 [(set (match_operand:V16QI 0 "register_operand" "=x")
8696 UNSPEC_PSHUFB))] 9018 UNSPEC_PSHUFB))]
8697 "TARGET_SSSE3" 9019 "TARGET_SSSE3"
8698 "pshufb\t{%2, %0|%0, %2}"; 9020 "pshufb\t{%2, %0|%0, %2}";
8699 [(set_attr "type" "sselog1") 9021 [(set_attr "type" "sselog1")
8700 (set_attr "prefix_extra" "1") 9022 (set_attr "prefix_extra" "1")
9023 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8701 (set_attr "mode" "DI")]) 9024 (set_attr "mode" "DI")])
8702 9025
8703 (define_insn "*avx_psign<mode>3" 9026 (define_insn "*avx_psign<mode>3"
8704 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 9027 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8705 (unspec:SSEMODE124 9028 (unspec:SSEMODE124
8707 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")] 9030 (match_operand:SSEMODE124 2 "nonimmediate_operand" "xm")]
8708 UNSPEC_PSIGN))] 9031 UNSPEC_PSIGN))]
8709 "TARGET_AVX" 9032 "TARGET_AVX"
8710 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"; 9033 "vpsign<ssevecsize>\t{%2, %1, %0|%0, %1, %2}";
8711 [(set_attr "type" "sselog1") 9034 [(set_attr "type" "sselog1")
9035 (set_attr "prefix_extra" "1")
8712 (set_attr "prefix" "vex") 9036 (set_attr "prefix" "vex")
8713 (set_attr "mode" "TI")]) 9037 (set_attr "mode" "TI")])
8714 9038
8715 (define_insn "ssse3_psign<mode>3" 9039 (define_insn "ssse3_psign<mode>3"
8716 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 9040 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8733 UNSPEC_PSIGN))] 9057 UNSPEC_PSIGN))]
8734 "TARGET_SSSE3" 9058 "TARGET_SSSE3"
8735 "psign<mmxvecsize>\t{%2, %0|%0, %2}"; 9059 "psign<mmxvecsize>\t{%2, %0|%0, %2}";
8736 [(set_attr "type" "sselog1") 9060 [(set_attr "type" "sselog1")
8737 (set_attr "prefix_extra" "1") 9061 (set_attr "prefix_extra" "1")
9062 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8738 (set_attr "mode" "DI")]) 9063 (set_attr "mode" "DI")])
8739 9064
8740 (define_insn "*avx_palignrti" 9065 (define_insn "*avx_palignrti"
8741 [(set (match_operand:TI 0 "register_operand" "=x") 9066 [(set (match_operand:TI 0 "register_operand" "=x")
8742 (unspec:TI [(match_operand:TI 1 "register_operand" "x") 9067 (unspec:TI [(match_operand:TI 1 "register_operand" "x")
8747 { 9072 {
8748 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 9073 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8749 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}"; 9074 return "vpalignr\t{%3, %2, %1, %0|%0, %1, %2, %3}";
8750 } 9075 }
8751 [(set_attr "type" "sseishft") 9076 [(set_attr "type" "sseishft")
9077 (set_attr "prefix_extra" "1")
9078 (set_attr "length_immediate" "1")
8752 (set_attr "prefix" "vex") 9079 (set_attr "prefix" "vex")
8753 (set_attr "mode" "TI")]) 9080 (set_attr "mode" "TI")])
8754 9081
8755 (define_insn "ssse3_palignrti" 9082 (define_insn "ssse3_palignrti"
8756 [(set (match_operand:TI 0 "register_operand" "=x") 9083 [(set (match_operand:TI 0 "register_operand" "=x")
8762 { 9089 {
8763 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 9090 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8764 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 9091 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8765 } 9092 }
8766 [(set_attr "type" "sseishft") 9093 [(set_attr "type" "sseishft")
9094 (set_attr "atom_unit" "sishuf")
8767 (set_attr "prefix_data16" "1") 9095 (set_attr "prefix_data16" "1")
8768 (set_attr "prefix_extra" "1") 9096 (set_attr "prefix_extra" "1")
9097 (set_attr "length_immediate" "1")
8769 (set_attr "mode" "TI")]) 9098 (set_attr "mode" "TI")])
8770 9099
8771 (define_insn "ssse3_palignrdi" 9100 (define_insn "ssse3_palignrdi"
8772 [(set (match_operand:DI 0 "register_operand" "=y") 9101 [(set (match_operand:DI 0 "register_operand" "=y")
8773 (unspec:DI [(match_operand:DI 1 "register_operand" "0") 9102 (unspec:DI [(match_operand:DI 1 "register_operand" "0")
8778 { 9107 {
8779 operands[3] = GEN_INT (INTVAL (operands[3]) / 8); 9108 operands[3] = GEN_INT (INTVAL (operands[3]) / 8);
8780 return "palignr\t{%3, %2, %0|%0, %2, %3}"; 9109 return "palignr\t{%3, %2, %0|%0, %2, %3}";
8781 } 9110 }
8782 [(set_attr "type" "sseishft") 9111 [(set_attr "type" "sseishft")
8783 (set_attr "prefix_extra" "1") 9112 (set_attr "atom_unit" "sishuf")
9113 (set_attr "prefix_extra" "1")
9114 (set_attr "length_immediate" "1")
9115 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8784 (set_attr "mode" "DI")]) 9116 (set_attr "mode" "DI")])
8785 9117
8786 (define_insn "abs<mode>2" 9118 (define_insn "abs<mode>2"
8787 [(set (match_operand:SSEMODE124 0 "register_operand" "=x") 9119 [(set (match_operand:SSEMODE124 0 "register_operand" "=x")
8788 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))] 9120 (abs:SSEMODE124 (match_operand:SSEMODE124 1 "nonimmediate_operand" "xm")))]
8798 [(set (match_operand:MMXMODEI 0 "register_operand" "=y") 9130 [(set (match_operand:MMXMODEI 0 "register_operand" "=y")
8799 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))] 9131 (abs:MMXMODEI (match_operand:MMXMODEI 1 "nonimmediate_operand" "ym")))]
8800 "TARGET_SSSE3" 9132 "TARGET_SSSE3"
8801 "pabs<mmxvecsize>\t{%1, %0|%0, %1}"; 9133 "pabs<mmxvecsize>\t{%1, %0|%0, %1}";
8802 [(set_attr "type" "sselog1") 9134 [(set_attr "type" "sselog1")
8803 (set_attr "prefix_extra" "1") 9135 (set_attr "prefix_rep" "0")
9136 (set_attr "prefix_extra" "1")
9137 (set (attr "prefix_rex") (symbol_ref "x86_extended_reg_mentioned_p (insn)"))
8804 (set_attr "mode" "DI")]) 9138 (set_attr "mode" "DI")])
8805 9139
8806 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9140 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8807 ;; 9141 ;;
8808 ;; AMD SSE4A instructions 9142 ;; AMD SSE4A instructions
8839 UNSPEC_EXTRQI))] 9173 UNSPEC_EXTRQI))]
8840 "TARGET_SSE4A" 9174 "TARGET_SSE4A"
8841 "extrq\t{%3, %2, %0|%0, %2, %3}" 9175 "extrq\t{%3, %2, %0|%0, %2, %3}"
8842 [(set_attr "type" "sse") 9176 [(set_attr "type" "sse")
8843 (set_attr "prefix_data16" "1") 9177 (set_attr "prefix_data16" "1")
9178 (set_attr "length_immediate" "2")
8844 (set_attr "mode" "TI")]) 9179 (set_attr "mode" "TI")])
8845 9180
8846 (define_insn "sse4a_extrq" 9181 (define_insn "sse4a_extrq"
8847 [(set (match_operand:V2DI 0 "register_operand" "=x") 9182 [(set (match_operand:V2DI 0 "register_operand" "=x")
8848 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9183 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8862 (match_operand 4 "const_int_operand" "")] 9197 (match_operand 4 "const_int_operand" "")]
8863 UNSPEC_INSERTQI))] 9198 UNSPEC_INSERTQI))]
8864 "TARGET_SSE4A" 9199 "TARGET_SSE4A"
8865 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}" 9200 "insertq\t{%4, %3, %2, %0|%0, %2, %3, %4}"
8866 [(set_attr "type" "sseins") 9201 [(set_attr "type" "sseins")
9202 (set_attr "prefix_data16" "0")
8867 (set_attr "prefix_rep" "1") 9203 (set_attr "prefix_rep" "1")
9204 (set_attr "length_immediate" "2")
8868 (set_attr "mode" "TI")]) 9205 (set_attr "mode" "TI")])
8869 9206
8870 (define_insn "sse4a_insertq" 9207 (define_insn "sse4a_insertq"
8871 [(set (match_operand:V2DI 0 "register_operand" "=x") 9208 [(set (match_operand:V2DI 0 "register_operand" "=x")
8872 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") 9209 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
8873 (match_operand:V2DI 2 "register_operand" "x")] 9210 (match_operand:V2DI 2 "register_operand" "x")]
8874 UNSPEC_INSERTQ))] 9211 UNSPEC_INSERTQ))]
8875 "TARGET_SSE4A" 9212 "TARGET_SSE4A"
8876 "insertq\t{%2, %0|%0, %2}" 9213 "insertq\t{%2, %0|%0, %2}"
8877 [(set_attr "type" "sseins") 9214 [(set_attr "type" "sseins")
9215 (set_attr "prefix_data16" "0")
8878 (set_attr "prefix_rep" "1") 9216 (set_attr "prefix_rep" "1")
8879 (set_attr "mode" "TI")]) 9217 (set_attr "mode" "TI")])
8880 9218
8881 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9219 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
8882 ;; 9220 ;;
8891 (match_operand:AVXMODEF2P 1 "register_operand" "x") 9229 (match_operand:AVXMODEF2P 1 "register_operand" "x")
8892 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] 9230 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8893 "TARGET_AVX" 9231 "TARGET_AVX"
8894 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9232 "vblendp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8895 [(set_attr "type" "ssemov") 9233 [(set_attr "type" "ssemov")
9234 (set_attr "prefix_extra" "1")
9235 (set_attr "length_immediate" "1")
8896 (set_attr "prefix" "vex") 9236 (set_attr "prefix" "vex")
8897 (set_attr "mode" "<avxvecmode>")]) 9237 (set_attr "mode" "<avxvecmode>")])
8898 9238
8899 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>" 9239 (define_insn "avx_blendvp<avxmodesuffixf2c><avxmodesuffix>"
8900 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") 9240 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8904 (match_operand:AVXMODEF2P 3 "register_operand" "x")] 9244 (match_operand:AVXMODEF2P 3 "register_operand" "x")]
8905 UNSPEC_BLENDV))] 9245 UNSPEC_BLENDV))]
8906 "TARGET_AVX" 9246 "TARGET_AVX"
8907 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9247 "vblendvp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8908 [(set_attr "type" "ssemov") 9248 [(set_attr "type" "ssemov")
9249 (set_attr "prefix_extra" "1")
9250 (set_attr "length_immediate" "1")
8909 (set_attr "prefix" "vex") 9251 (set_attr "prefix" "vex")
8910 (set_attr "mode" "<avxvecmode>")]) 9252 (set_attr "mode" "<avxvecmode>")])
8911 9253
8912 (define_insn "sse4_1_blendp<ssemodesuffixf2c>" 9254 (define_insn "sse4_1_blendp<ssemodesuffixf2c>"
8913 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 9255 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8916 (match_operand:SSEMODEF2P 1 "register_operand" "0") 9258 (match_operand:SSEMODEF2P 1 "register_operand" "0")
8917 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))] 9259 (match_operand:SI 3 "const_0_to_<blendbits>_operand" "n")))]
8918 "TARGET_SSE4_1" 9260 "TARGET_SSE4_1"
8919 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" 9261 "blendp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8920 [(set_attr "type" "ssemov") 9262 [(set_attr "type" "ssemov")
8921 (set_attr "prefix_extra" "1") 9263 (set_attr "prefix_data16" "1")
9264 (set_attr "prefix_extra" "1")
9265 (set_attr "length_immediate" "1")
8922 (set_attr "mode" "<MODE>")]) 9266 (set_attr "mode" "<MODE>")])
8923 9267
8924 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>" 9268 (define_insn "sse4_1_blendvp<ssemodesuffixf2c>"
8925 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x") 9269 [(set (match_operand:SSEMODEF2P 0 "reg_not_xmm0_operand" "=x")
8926 (unspec:SSEMODEF2P 9270 (unspec:SSEMODEF2P
8929 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")] 9273 (match_operand:SSEMODEF2P 3 "register_operand" "Yz")]
8930 UNSPEC_BLENDV))] 9274 UNSPEC_BLENDV))]
8931 "TARGET_SSE4_1" 9275 "TARGET_SSE4_1"
8932 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" 9276 "blendvp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8933 [(set_attr "type" "ssemov") 9277 [(set_attr "type" "ssemov")
9278 (set_attr "prefix_data16" "1")
8934 (set_attr "prefix_extra" "1") 9279 (set_attr "prefix_extra" "1")
8935 (set_attr "mode" "<MODE>")]) 9280 (set_attr "mode" "<MODE>")])
8936 9281
8937 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>" 9282 (define_insn "avx_dpp<avxmodesuffixf2c><avxmodesuffix>"
8938 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") 9283 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
8943 UNSPEC_DP))] 9288 UNSPEC_DP))]
8944 "TARGET_AVX" 9289 "TARGET_AVX"
8945 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9290 "vdpp<avxmodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8946 [(set_attr "type" "ssemul") 9291 [(set_attr "type" "ssemul")
8947 (set_attr "prefix" "vex") 9292 (set_attr "prefix" "vex")
9293 (set_attr "prefix_extra" "1")
9294 (set_attr "length_immediate" "1")
8948 (set_attr "mode" "<avxvecmode>")]) 9295 (set_attr "mode" "<avxvecmode>")])
8949 9296
8950 (define_insn "sse4_1_dpp<ssemodesuffixf2c>" 9297 (define_insn "sse4_1_dpp<ssemodesuffixf2c>"
8951 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 9298 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
8952 (unspec:SSEMODEF2P 9299 (unspec:SSEMODEF2P
8955 (match_operand:SI 3 "const_0_to_255_operand" "n")] 9302 (match_operand:SI 3 "const_0_to_255_operand" "n")]
8956 UNSPEC_DP))] 9303 UNSPEC_DP))]
8957 "TARGET_SSE4_1" 9304 "TARGET_SSE4_1"
8958 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" 9305 "dpp<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
8959 [(set_attr "type" "ssemul") 9306 [(set_attr "type" "ssemul")
8960 (set_attr "prefix_extra" "1") 9307 (set_attr "prefix_data16" "1")
9308 (set_attr "prefix_extra" "1")
9309 (set_attr "length_immediate" "1")
8961 (set_attr "mode" "<MODE>")]) 9310 (set_attr "mode" "<MODE>")])
8962 9311
8963 (define_insn "sse4_1_movntdqa" 9312 (define_insn "sse4_1_movntdqa"
8964 [(set (match_operand:V2DI 0 "register_operand" "=x") 9313 [(set (match_operand:V2DI 0 "register_operand" "=x")
8965 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")] 9314 (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "m")]
8966 UNSPEC_MOVNTDQA))] 9315 UNSPEC_MOVNTDQA))]
8967 "TARGET_SSE4_1" 9316 "TARGET_SSE4_1"
8968 "%vmovntdqa\t{%1, %0|%0, %1}" 9317 "%vmovntdqa\t{%1, %0|%0, %1}"
8969 [(set_attr "type" "ssecvt") 9318 [(set_attr "type" "ssemov")
8970 (set_attr "prefix_extra" "1") 9319 (set_attr "prefix_extra" "1")
8971 (set_attr "prefix" "maybe_vex") 9320 (set_attr "prefix" "maybe_vex")
8972 (set_attr "mode" "TI")]) 9321 (set_attr "mode" "TI")])
8973 9322
8974 (define_insn "*avx_mpsadbw" 9323 (define_insn "*avx_mpsadbw"
8979 UNSPEC_MPSADBW))] 9328 UNSPEC_MPSADBW))]
8980 "TARGET_AVX" 9329 "TARGET_AVX"
8981 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9330 "vmpsadbw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
8982 [(set_attr "type" "sselog1") 9331 [(set_attr "type" "sselog1")
8983 (set_attr "prefix" "vex") 9332 (set_attr "prefix" "vex")
9333 (set_attr "prefix_extra" "1")
9334 (set_attr "length_immediate" "1")
8984 (set_attr "mode" "TI")]) 9335 (set_attr "mode" "TI")])
8985 9336
8986 (define_insn "sse4_1_mpsadbw" 9337 (define_insn "sse4_1_mpsadbw"
8987 [(set (match_operand:V16QI 0 "register_operand" "=x") 9338 [(set (match_operand:V16QI 0 "register_operand" "=x")
8988 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") 9339 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0")
8991 UNSPEC_MPSADBW))] 9342 UNSPEC_MPSADBW))]
8992 "TARGET_SSE4_1" 9343 "TARGET_SSE4_1"
8993 "mpsadbw\t{%3, %2, %0|%0, %2, %3}" 9344 "mpsadbw\t{%3, %2, %0|%0, %2, %3}"
8994 [(set_attr "type" "sselog1") 9345 [(set_attr "type" "sselog1")
8995 (set_attr "prefix_extra" "1") 9346 (set_attr "prefix_extra" "1")
9347 (set_attr "length_immediate" "1")
8996 (set_attr "mode" "TI")]) 9348 (set_attr "mode" "TI")])
8997 9349
8998 (define_insn "*avx_packusdw" 9350 (define_insn "*avx_packusdw"
8999 [(set (match_operand:V8HI 0 "register_operand" "=x") 9351 [(set (match_operand:V8HI 0 "register_operand" "=x")
9000 (vec_concat:V8HI 9352 (vec_concat:V8HI
9003 (us_truncate:V4HI 9355 (us_truncate:V4HI
9004 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))] 9356 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))))]
9005 "TARGET_AVX" 9357 "TARGET_AVX"
9006 "vpackusdw\t{%2, %1, %0|%0, %1, %2}" 9358 "vpackusdw\t{%2, %1, %0|%0, %1, %2}"
9007 [(set_attr "type" "sselog") 9359 [(set_attr "type" "sselog")
9360 (set_attr "prefix_extra" "1")
9008 (set_attr "prefix" "vex") 9361 (set_attr "prefix" "vex")
9009 (set_attr "mode" "TI")]) 9362 (set_attr "mode" "TI")])
9010 9363
9011 (define_insn "sse4_1_packusdw" 9364 (define_insn "sse4_1_packusdw"
9012 [(set (match_operand:V8HI 0 "register_operand" "=x") 9365 [(set (match_operand:V8HI 0 "register_operand" "=x")
9028 (match_operand:V16QI 3 "register_operand" "x")] 9381 (match_operand:V16QI 3 "register_operand" "x")]
9029 UNSPEC_BLENDV))] 9382 UNSPEC_BLENDV))]
9030 "TARGET_AVX" 9383 "TARGET_AVX"
9031 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9384 "vpblendvb\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9032 [(set_attr "type" "ssemov") 9385 [(set_attr "type" "ssemov")
9386 (set_attr "prefix_extra" "1")
9387 (set_attr "length_immediate" "1")
9033 (set_attr "prefix" "vex") 9388 (set_attr "prefix" "vex")
9034 (set_attr "mode" "TI")]) 9389 (set_attr "mode" "TI")])
9035 9390
9036 (define_insn "sse4_1_pblendvb" 9391 (define_insn "sse4_1_pblendvb"
9037 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x") 9392 [(set (match_operand:V16QI 0 "reg_not_xmm0_operand" "=x")
9053 (match_operand:SI 3 "const_0_to_255_operand" "n")))] 9408 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9054 "TARGET_AVX" 9409 "TARGET_AVX"
9055 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9410 "vpblendw\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9056 [(set_attr "type" "ssemov") 9411 [(set_attr "type" "ssemov")
9057 (set_attr "prefix" "vex") 9412 (set_attr "prefix" "vex")
9413 (set_attr "prefix_extra" "1")
9414 (set_attr "length_immediate" "1")
9058 (set_attr "mode" "TI")]) 9415 (set_attr "mode" "TI")])
9059 9416
9060 (define_insn "sse4_1_pblendw" 9417 (define_insn "sse4_1_pblendw"
9061 [(set (match_operand:V8HI 0 "register_operand" "=x") 9418 [(set (match_operand:V8HI 0 "register_operand" "=x")
9062 (vec_merge:V8HI 9419 (vec_merge:V8HI
9065 (match_operand:SI 3 "const_0_to_255_operand" "n")))] 9422 (match_operand:SI 3 "const_0_to_255_operand" "n")))]
9066 "TARGET_SSE4_1" 9423 "TARGET_SSE4_1"
9067 "pblendw\t{%3, %2, %0|%0, %2, %3}" 9424 "pblendw\t{%3, %2, %0|%0, %2, %3}"
9068 [(set_attr "type" "ssemov") 9425 [(set_attr "type" "ssemov")
9069 (set_attr "prefix_extra" "1") 9426 (set_attr "prefix_extra" "1")
9427 (set_attr "length_immediate" "1")
9070 (set_attr "mode" "TI")]) 9428 (set_attr "mode" "TI")])
9071 9429
9072 (define_insn "sse4_1_phminposuw" 9430 (define_insn "sse4_1_phminposuw"
9073 [(set (match_operand:V8HI 0 "register_operand" "=x") 9431 [(set (match_operand:V8HI 0 "register_operand" "=x")
9074 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")] 9432 (unspec:V8HI [(match_operand:V8HI 1 "nonimmediate_operand" "xm")]
9476 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")] 9834 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")]
9477 UNSPEC_VTESTP))] 9835 UNSPEC_VTESTP))]
9478 "TARGET_AVX" 9836 "TARGET_AVX"
9479 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}" 9837 "vtestp<avxmodesuffixf2c>\t{%1, %0|%0, %1}"
9480 [(set_attr "type" "ssecomi") 9838 [(set_attr "type" "ssecomi")
9839 (set_attr "prefix_extra" "1")
9481 (set_attr "prefix" "vex") 9840 (set_attr "prefix" "vex")
9482 (set_attr "mode" "<MODE>")]) 9841 (set_attr "mode" "<MODE>")])
9483 9842
9484 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG. 9843 ;; ptest is very similar to comiss and ucomiss when setting FLAGS_REG.
9485 ;; But it is not a really compare instruction. 9844 ;; But it is not a really compare instruction.
9489 (match_operand:V4DI 1 "nonimmediate_operand" "xm")] 9848 (match_operand:V4DI 1 "nonimmediate_operand" "xm")]
9490 UNSPEC_PTEST))] 9849 UNSPEC_PTEST))]
9491 "TARGET_AVX" 9850 "TARGET_AVX"
9492 "vptest\t{%1, %0|%0, %1}" 9851 "vptest\t{%1, %0|%0, %1}"
9493 [(set_attr "type" "ssecomi") 9852 [(set_attr "type" "ssecomi")
9853 (set_attr "prefix_extra" "1")
9494 (set_attr "prefix" "vex") 9854 (set_attr "prefix" "vex")
9495 (set_attr "mode" "OI")]) 9855 (set_attr "mode" "OI")])
9496 9856
9497 (define_insn "sse4_1_ptest" 9857 (define_insn "sse4_1_ptest"
9498 [(set (reg:CC FLAGS_REG) 9858 [(set (reg:CC FLAGS_REG)
9513 (match_operand:SI 2 "const_0_to_15_operand" "n")] 9873 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9514 UNSPEC_ROUND))] 9874 UNSPEC_ROUND))]
9515 "TARGET_AVX" 9875 "TARGET_AVX"
9516 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" 9876 "vroundp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9517 [(set_attr "type" "ssecvt") 9877 [(set_attr "type" "ssecvt")
9878 (set_attr "prefix_extra" "1")
9879 (set_attr "length_immediate" "1")
9518 (set_attr "prefix" "vex") 9880 (set_attr "prefix" "vex")
9519 (set_attr "mode" "<MODE>")]) 9881 (set_attr "mode" "<MODE>")])
9520 9882
9521 (define_insn "sse4_1_roundp<ssemodesuffixf2c>" 9883 (define_insn "sse4_1_roundp<ssemodesuffixf2c>"
9522 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 9884 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9525 (match_operand:SI 2 "const_0_to_15_operand" "n")] 9887 (match_operand:SI 2 "const_0_to_15_operand" "n")]
9526 UNSPEC_ROUND))] 9888 UNSPEC_ROUND))]
9527 "TARGET_ROUND" 9889 "TARGET_ROUND"
9528 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" 9890 "%vroundp<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
9529 [(set_attr "type" "ssecvt") 9891 [(set_attr "type" "ssecvt")
9530 (set_attr "prefix_extra" "1") 9892 (set_attr "prefix_data16" "1")
9893 (set_attr "prefix_extra" "1")
9894 (set_attr "length_immediate" "1")
9531 (set_attr "prefix" "maybe_vex") 9895 (set_attr "prefix" "maybe_vex")
9532 (set_attr "mode" "<MODE>")]) 9896 (set_attr "mode" "<MODE>")])
9533 9897
9534 (define_insn "*avx_rounds<ssemodesuffixf2c>" 9898 (define_insn "*avx_rounds<ssemodesuffixf2c>"
9535 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 9899 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9541 (match_operand:SSEMODEF2P 1 "register_operand" "x") 9905 (match_operand:SSEMODEF2P 1 "register_operand" "x")
9542 (const_int 1)))] 9906 (const_int 1)))]
9543 "TARGET_AVX" 9907 "TARGET_AVX"
9544 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}" 9908 "vrounds<ssemodesuffixf2c>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9545 [(set_attr "type" "ssecvt") 9909 [(set_attr "type" "ssecvt")
9910 (set_attr "prefix_extra" "1")
9911 (set_attr "length_immediate" "1")
9546 (set_attr "prefix" "vex") 9912 (set_attr "prefix" "vex")
9547 (set_attr "mode" "<MODE>")]) 9913 (set_attr "mode" "<MODE>")])
9548 9914
9549 (define_insn "sse4_1_rounds<ssemodesuffixf2c>" 9915 (define_insn "sse4_1_rounds<ssemodesuffixf2c>"
9550 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 9916 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
9556 (match_operand:SSEMODEF2P 1 "register_operand" "0") 9922 (match_operand:SSEMODEF2P 1 "register_operand" "0")
9557 (const_int 1)))] 9923 (const_int 1)))]
9558 "TARGET_ROUND" 9924 "TARGET_ROUND"
9559 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}" 9925 "rounds<ssemodesuffixf2c>\t{%3, %2, %0|%0, %2, %3}"
9560 [(set_attr "type" "ssecvt") 9926 [(set_attr "type" "ssecvt")
9561 (set_attr "prefix_extra" "1") 9927 (set_attr "prefix_data16" "1")
9928 (set_attr "prefix_extra" "1")
9929 (set_attr "length_immediate" "1")
9562 (set_attr "mode" "<MODE>")]) 9930 (set_attr "mode" "<MODE>")])
9563 9931
9564 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 9932 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9565 ;; 9933 ;;
9566 ;; Intel SSE4.2 string/text processing instructions 9934 ;; Intel SSE4.2 string/text processing instructions
9591 (match_dup 4) 9959 (match_dup 4)
9592 (match_dup 5) 9960 (match_dup 5)
9593 (match_dup 6)] 9961 (match_dup 6)]
9594 UNSPEC_PCMPESTR))] 9962 UNSPEC_PCMPESTR))]
9595 "TARGET_SSE4_2 9963 "TARGET_SSE4_2
9596 && !(reload_completed || reload_in_progress)" 9964 && can_create_pseudo_p ()"
9597 "#" 9965 "#"
9598 "&& 1" 9966 "&& 1"
9599 [(const_int 0)] 9967 [(const_int 0)]
9600 { 9968 {
9601 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 9969 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9618 DONE; 9986 DONE;
9619 } 9987 }
9620 [(set_attr "type" "sselog") 9988 [(set_attr "type" "sselog")
9621 (set_attr "prefix_data16" "1") 9989 (set_attr "prefix_data16" "1")
9622 (set_attr "prefix_extra" "1") 9990 (set_attr "prefix_extra" "1")
9991 (set_attr "length_immediate" "1")
9623 (set_attr "memory" "none,load") 9992 (set_attr "memory" "none,load")
9624 (set_attr "mode" "TI")]) 9993 (set_attr "mode" "TI")])
9625 9994
9626 (define_insn "sse4_2_pcmpestri" 9995 (define_insn "sse4_2_pcmpestri"
9627 [(set (match_operand:SI 0 "register_operand" "=c,c") 9996 [(set (match_operand:SI 0 "register_operand" "=c,c")
9644 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}" 10013 "%vpcmpestri\t{%5, %3, %1|%1, %3, %5}"
9645 [(set_attr "type" "sselog") 10014 [(set_attr "type" "sselog")
9646 (set_attr "prefix_data16" "1") 10015 (set_attr "prefix_data16" "1")
9647 (set_attr "prefix_extra" "1") 10016 (set_attr "prefix_extra" "1")
9648 (set_attr "prefix" "maybe_vex") 10017 (set_attr "prefix" "maybe_vex")
10018 (set_attr "length_immediate" "1")
9649 (set_attr "memory" "none,load") 10019 (set_attr "memory" "none,load")
9650 (set_attr "mode" "TI")]) 10020 (set_attr "mode" "TI")])
9651 10021
9652 (define_insn "sse4_2_pcmpestrm" 10022 (define_insn "sse4_2_pcmpestrm"
9653 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz") 10023 [(set (match_operand:V16QI 0 "register_operand" "=Yz,Yz")
9669 "TARGET_SSE4_2" 10039 "TARGET_SSE4_2"
9670 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}" 10040 "%vpcmpestrm\t{%5, %3, %1|%1, %3, %5}"
9671 [(set_attr "type" "sselog") 10041 [(set_attr "type" "sselog")
9672 (set_attr "prefix_data16" "1") 10042 (set_attr "prefix_data16" "1")
9673 (set_attr "prefix_extra" "1") 10043 (set_attr "prefix_extra" "1")
10044 (set_attr "length_immediate" "1")
9674 (set_attr "prefix" "maybe_vex") 10045 (set_attr "prefix" "maybe_vex")
9675 (set_attr "memory" "none,load") 10046 (set_attr "memory" "none,load")
9676 (set_attr "mode" "TI")]) 10047 (set_attr "mode" "TI")])
9677 10048
9678 (define_insn "sse4_2_pcmpestr_cconly" 10049 (define_insn "sse4_2_pcmpestr_cconly"
9693 %vpcmpestri\t{%6, %4, %2|%2, %4, %6} 10064 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}
9694 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}" 10065 %vpcmpestri\t{%6, %4, %2|%2, %4, %6}"
9695 [(set_attr "type" "sselog") 10066 [(set_attr "type" "sselog")
9696 (set_attr "prefix_data16" "1") 10067 (set_attr "prefix_data16" "1")
9697 (set_attr "prefix_extra" "1") 10068 (set_attr "prefix_extra" "1")
10069 (set_attr "length_immediate" "1")
9698 (set_attr "memory" "none,load,none,load") 10070 (set_attr "memory" "none,load,none,load")
9699 (set_attr "prefix" "maybe_vex") 10071 (set_attr "prefix" "maybe_vex")
9700 (set_attr "mode" "TI")]) 10072 (set_attr "mode" "TI")])
9701 10073
9702 (define_insn_and_split "sse4_2_pcmpistr" 10074 (define_insn_and_split "sse4_2_pcmpistr"
9717 [(match_dup 2) 10089 [(match_dup 2)
9718 (match_dup 3) 10090 (match_dup 3)
9719 (match_dup 4)] 10091 (match_dup 4)]
9720 UNSPEC_PCMPISTR))] 10092 UNSPEC_PCMPISTR))]
9721 "TARGET_SSE4_2 10093 "TARGET_SSE4_2
9722 && !(reload_completed || reload_in_progress)" 10094 && can_create_pseudo_p ()"
9723 "#" 10095 "#"
9724 "&& 1" 10096 "&& 1"
9725 [(const_int 0)] 10097 [(const_int 0)]
9726 { 10098 {
9727 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0])); 10099 int ecx = !find_regno_note (curr_insn, REG_UNUSED, REGNO (operands[0]));
9741 DONE; 10113 DONE;
9742 } 10114 }
9743 [(set_attr "type" "sselog") 10115 [(set_attr "type" "sselog")
9744 (set_attr "prefix_data16" "1") 10116 (set_attr "prefix_data16" "1")
9745 (set_attr "prefix_extra" "1") 10117 (set_attr "prefix_extra" "1")
10118 (set_attr "length_immediate" "1")
9746 (set_attr "memory" "none,load") 10119 (set_attr "memory" "none,load")
9747 (set_attr "mode" "TI")]) 10120 (set_attr "mode" "TI")])
9748 10121
9749 (define_insn "sse4_2_pcmpistri" 10122 (define_insn "sse4_2_pcmpistri"
9750 [(set (match_operand:SI 0 "register_operand" "=c,c") 10123 [(set (match_operand:SI 0 "register_operand" "=c,c")
9762 "TARGET_SSE4_2" 10135 "TARGET_SSE4_2"
9763 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}" 10136 "%vpcmpistri\t{%3, %2, %1|%1, %2, %3}"
9764 [(set_attr "type" "sselog") 10137 [(set_attr "type" "sselog")
9765 (set_attr "prefix_data16" "1") 10138 (set_attr "prefix_data16" "1")
9766 (set_attr "prefix_extra" "1") 10139 (set_attr "prefix_extra" "1")
10140 (set_attr "length_immediate" "1")
9767 (set_attr "prefix" "maybe_vex") 10141 (set_attr "prefix" "maybe_vex")
9768 (set_attr "memory" "none,load") 10142 (set_attr "memory" "none,load")
9769 (set_attr "mode" "TI")]) 10143 (set_attr "mode" "TI")])
9770 10144
9771 (define_insn "sse4_2_pcmpistrm" 10145 (define_insn "sse4_2_pcmpistrm"
9784 "TARGET_SSE4_2" 10158 "TARGET_SSE4_2"
9785 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}" 10159 "%vpcmpistrm\t{%3, %2, %1|%1, %2, %3}"
9786 [(set_attr "type" "sselog") 10160 [(set_attr "type" "sselog")
9787 (set_attr "prefix_data16" "1") 10161 (set_attr "prefix_data16" "1")
9788 (set_attr "prefix_extra" "1") 10162 (set_attr "prefix_extra" "1")
10163 (set_attr "length_immediate" "1")
9789 (set_attr "prefix" "maybe_vex") 10164 (set_attr "prefix" "maybe_vex")
9790 (set_attr "memory" "none,load") 10165 (set_attr "memory" "none,load")
9791 (set_attr "mode" "TI")]) 10166 (set_attr "mode" "TI")])
9792 10167
9793 (define_insn "sse4_2_pcmpistr_cconly" 10168 (define_insn "sse4_2_pcmpistr_cconly"
9806 %vpcmpistri\t{%4, %3, %2|%2, %3, %4} 10181 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}
9807 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}" 10182 %vpcmpistri\t{%4, %3, %2|%2, %3, %4}"
9808 [(set_attr "type" "sselog") 10183 [(set_attr "type" "sselog")
9809 (set_attr "prefix_data16" "1") 10184 (set_attr "prefix_data16" "1")
9810 (set_attr "prefix_extra" "1") 10185 (set_attr "prefix_extra" "1")
10186 (set_attr "length_immediate" "1")
9811 (set_attr "memory" "none,load,none,load") 10187 (set_attr "memory" "none,load,none,load")
9812 (set_attr "prefix" "maybe_vex") 10188 (set_attr "prefix" "maybe_vex")
9813 (set_attr "mode" "TI")]) 10189 (set_attr "mode" "TI")])
9814 10190
9815 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10191 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9816 ;; 10192 ;;
9817 ;; SSE5 instructions 10193 ;; XOP instructions
9818 ;; 10194 ;;
9819 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; 10195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
9820 10196
9821 ;; SSE5 parallel integer multiply/add instructions. 10197 ;; XOP parallel integer multiply/add instructions.
9822 ;; Note the instruction does not allow the value being added to be a memory 10198 ;; Note the XOP multiply/add instructions
9823 ;; operation. However by pretending via the nonimmediate_operand predicate 10199 ;; a[i] = b[i] * c[i] + d[i];
9824 ;; that it does and splitting it later allows the following to be recognized: 10200 ;; do not allow the value being added to be a memory operation.
9825 ;; a[i] = b[i] * c[i] + d[i]; 10201 (define_insn "xop_pmacsww"
9826 (define_insn "sse5_pmacsww" 10202 [(set (match_operand:V8HI 0 "register_operand" "=x")
9827 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9828 (plus:V8HI 10203 (plus:V8HI
9829 (mult:V8HI 10204 (mult:V8HI
9830 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,xm") 10205 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9831 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,x")) 10206 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
9832 (match_operand:V8HI 3 "register_operand" "0,0,0")))] 10207 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
9833 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" 10208 "TARGET_XOP"
9834 "@ 10209 "vpmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9835 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9836 pmacsww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9837 pmacsww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9838 [(set_attr "type" "ssemuladd") 10210 [(set_attr "type" "ssemuladd")
9839 (set_attr "mode" "TI")]) 10211 (set_attr "mode" "TI")])
9840 10212
9841 ;; Split pmacsww with two memory operands into a load and the pmacsww. 10213 (define_insn "xop_pmacssww"
9842 (define_split 10214 [(set (match_operand:V8HI 0 "register_operand" "=x")
9843 [(set (match_operand:V8HI 0 "register_operand" "")
9844 (plus:V8HI
9845 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "")
9846 (match_operand:V8HI 2 "nonimmediate_operand" ""))
9847 (match_operand:V8HI 3 "nonimmediate_operand" "")))]
9848 "TARGET_SSE5
9849 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9850 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9851 && !reg_mentioned_p (operands[0], operands[1])
9852 && !reg_mentioned_p (operands[0], operands[2])
9853 && !reg_mentioned_p (operands[0], operands[3])"
9854 [(const_int 0)]
9855 {
9856 ix86_expand_sse5_multiple_memory (operands, 4, V8HImode);
9857 emit_insn (gen_sse5_pmacsww (operands[0], operands[1], operands[2],
9858 operands[3]));
9859 DONE;
9860 })
9861
9862 (define_insn "sse5_pmacssww"
9863 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x")
9864 (ss_plus:V8HI 10215 (ss_plus:V8HI
9865 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") 10216 (mult:V8HI (match_operand:V8HI 1 "nonimmediate_operand" "%x")
9866 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x")) 10217 (match_operand:V8HI 2 "nonimmediate_operand" "xm"))
9867 (match_operand:V8HI 3 "register_operand" "0,0,0")))] 10218 (match_operand:V8HI 3 "nonimmediate_operand" "x")))]
9868 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10219 "TARGET_XOP"
9869 "@ 10220 "vpmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9870 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9871 pmacssww\t{%3, %2, %1, %0|%0, %1, %2, %3}
9872 pmacssww\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9873 [(set_attr "type" "ssemuladd") 10221 [(set_attr "type" "ssemuladd")
9874 (set_attr "mode" "TI")]) 10222 (set_attr "mode" "TI")])
9875 10223
9876 ;; Note the instruction does not allow the value being added to be a memory 10224 (define_insn "xop_pmacsdd"
9877 ;; operation. However by pretending via the nonimmediate_operand predicate 10225 [(set (match_operand:V4SI 0 "register_operand" "=x")
9878 ;; that it does and splitting it later allows the following to be recognized:
9879 ;; a[i] = b[i] * c[i] + d[i];
9880 (define_insn "sse5_pmacsdd"
9881 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9882 (plus:V4SI 10226 (plus:V4SI
9883 (mult:V4SI 10227 (mult:V4SI
9884 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") 10228 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9885 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) 10229 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
9886 (match_operand:V4SI 3 "register_operand" "0,0,0")))] 10230 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9887 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)" 10231 "TARGET_XOP"
9888 "@ 10232 "vpmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9889 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9890 pmacsdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9891 pmacsdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9892 [(set_attr "type" "ssemuladd") 10233 [(set_attr "type" "ssemuladd")
9893 (set_attr "mode" "TI")]) 10234 (set_attr "mode" "TI")])
9894 10235
9895 ;; Split pmacsdd with two memory operands into a load and the pmacsdd. 10236 (define_insn "xop_pmacssdd"
9896 (define_split 10237 [(set (match_operand:V4SI 0 "register_operand" "=x")
9897 [(set (match_operand:V4SI 0 "register_operand" "")
9898 (plus:V4SI
9899 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "")
9900 (match_operand:V4SI 2 "nonimmediate_operand" ""))
9901 (match_operand:V4SI 3 "nonimmediate_operand" "")))]
9902 "TARGET_SSE5
9903 && !ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)
9904 && ix86_sse5_valid_op_p (operands, insn, 4, false, 2, true)
9905 && !reg_mentioned_p (operands[0], operands[1])
9906 && !reg_mentioned_p (operands[0], operands[2])
9907 && !reg_mentioned_p (operands[0], operands[3])"
9908 [(const_int 0)]
9909 {
9910 ix86_expand_sse5_multiple_memory (operands, 4, V4SImode);
9911 emit_insn (gen_sse5_pmacsdd (operands[0], operands[1], operands[2],
9912 operands[3]));
9913 DONE;
9914 })
9915
9916 (define_insn "sse5_pmacssdd"
9917 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x")
9918 (ss_plus:V4SI 10238 (ss_plus:V4SI
9919 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") 10239 (mult:V4SI (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9920 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")) 10240 (match_operand:V4SI 2 "nonimmediate_operand" "xm"))
9921 (match_operand:V4SI 3 "register_operand" "0,0,0")))] 10241 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
9922 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10242 "TARGET_XOP"
9923 "@ 10243 "vpmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9924 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9925 pmacssdd\t{%3, %2, %1, %0|%0, %1, %2, %3}
9926 pmacssdd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9927 [(set_attr "type" "ssemuladd") 10244 [(set_attr "type" "ssemuladd")
9928 (set_attr "mode" "TI")]) 10245 (set_attr "mode" "TI")])
9929 10246
9930 (define_insn "sse5_pmacssdql" 10247 (define_insn "xop_pmacssdql"
9931 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") 10248 [(set (match_operand:V2DI 0 "register_operand" "=x")
9932 (ss_plus:V2DI 10249 (ss_plus:V2DI
9933 (mult:V2DI 10250 (mult:V2DI
9934 (sign_extend:V2DI 10251 (sign_extend:V2DI
9935 (vec_select:V2SI 10252 (vec_select:V2SI
9936 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") 10253 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9937 (parallel [(const_int 1) 10254 (parallel [(const_int 1)
9938 (const_int 3)]))) 10255 (const_int 3)])))
9939 (vec_select:V2SI 10256 (vec_select:V2SI
9940 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") 10257 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9941 (parallel [(const_int 1) 10258 (parallel [(const_int 1)
9942 (const_int 3)]))) 10259 (const_int 3)])))
9943 (match_operand:V2DI 3 "register_operand" "0,0,0")))] 10260 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9944 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10261 "TARGET_XOP"
9945 "@ 10262 "vpmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9946 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9947 pmacssdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9948 pmacssdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9949 [(set_attr "type" "ssemuladd") 10263 [(set_attr "type" "ssemuladd")
9950 (set_attr "mode" "TI")]) 10264 (set_attr "mode" "TI")])
9951 10265
9952 (define_insn "sse5_pmacssdqh" 10266 (define_insn "xop_pmacssdqh"
9953 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") 10267 [(set (match_operand:V2DI 0 "register_operand" "=x")
9954 (ss_plus:V2DI 10268 (ss_plus:V2DI
9955 (mult:V2DI 10269 (mult:V2DI
9956 (sign_extend:V2DI 10270 (sign_extend:V2DI
9957 (vec_select:V2SI 10271 (vec_select:V2SI
9958 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") 10272 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9959 (parallel [(const_int 0) 10273 (parallel [(const_int 0)
9960 (const_int 2)]))) 10274 (const_int 2)])))
9961 (sign_extend:V2DI 10275 (sign_extend:V2DI
9962 (vec_select:V2SI 10276 (vec_select:V2SI
9963 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") 10277 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9964 (parallel [(const_int 0) 10278 (parallel [(const_int 0)
9965 (const_int 2)])))) 10279 (const_int 2)]))))
9966 (match_operand:V2DI 3 "register_operand" "0,0,0")))] 10280 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9967 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10281 "TARGET_XOP"
9968 "@ 10282 "vpmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9969 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9970 pmacssdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
9971 pmacssdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9972 [(set_attr "type" "ssemuladd") 10283 [(set_attr "type" "ssemuladd")
9973 (set_attr "mode" "TI")]) 10284 (set_attr "mode" "TI")])
9974 10285
9975 (define_insn "sse5_pmacsdql" 10286 (define_insn "xop_pmacsdql"
9976 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") 10287 [(set (match_operand:V2DI 0 "register_operand" "=x")
9977 (plus:V2DI 10288 (plus:V2DI
9978 (mult:V2DI 10289 (mult:V2DI
9979 (sign_extend:V2DI 10290 (sign_extend:V2DI
9980 (vec_select:V2SI 10291 (vec_select:V2SI
9981 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") 10292 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
9982 (parallel [(const_int 1) 10293 (parallel [(const_int 1)
9983 (const_int 3)]))) 10294 (const_int 3)])))
9984 (sign_extend:V2DI 10295 (sign_extend:V2DI
9985 (vec_select:V2SI 10296 (vec_select:V2SI
9986 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") 10297 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
9987 (parallel [(const_int 1) 10298 (parallel [(const_int 1)
9988 (const_int 3)])))) 10299 (const_int 3)]))))
9989 (match_operand:V2DI 3 "register_operand" "0,0,0")))] 10300 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
9990 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10301 "TARGET_XOP"
9991 "@ 10302 "vpmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}"
9992 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9993 pmacsdql\t{%3, %2, %1, %0|%0, %1, %2, %3}
9994 pmacsdql\t{%3, %1, %2, %0|%0, %2, %1, %3}"
9995 [(set_attr "type" "ssemuladd") 10303 [(set_attr "type" "ssemuladd")
9996 (set_attr "mode" "TI")]) 10304 (set_attr "mode" "TI")])
9997 10305
9998 (define_insn_and_split "*sse5_pmacsdql_mem" 10306 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
9999 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10000 (plus:V2DI
10001 (mult:V2DI
10002 (sign_extend:V2DI
10003 (vec_select:V2SI
10004 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10005 (parallel [(const_int 1)
10006 (const_int 3)])))
10007 (sign_extend:V2DI
10008 (vec_select:V2SI
10009 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10010 (parallel [(const_int 1)
10011 (const_int 3)]))))
10012 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10013 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10014 "#"
10015 "&& (reload_completed
10016 || (!reg_mentioned_p (operands[0], operands[1])
10017 && !reg_mentioned_p (operands[0], operands[2])))"
10018 [(set (match_dup 0)
10019 (match_dup 3))
10020 (set (match_dup 0)
10021 (plus:V2DI
10022 (mult:V2DI
10023 (sign_extend:V2DI
10024 (vec_select:V2SI
10025 (match_dup 1)
10026 (parallel [(const_int 1)
10027 (const_int 3)])))
10028 (sign_extend:V2DI
10029 (vec_select:V2SI
10030 (match_dup 2)
10031 (parallel [(const_int 1)
10032 (const_int 3)]))))
10033 (match_dup 0)))])
10034
10035 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10036 ;; fake it with a multiply/add. In general, we expect the define_split to 10307 ;; fake it with a multiply/add. In general, we expect the define_split to
10037 ;; occur before register allocation, so we have to handle the corner case where 10308 ;; occur before register allocation, so we have to handle the corner case where
10038 ;; the target is the same as operands 1/2 10309 ;; the target is the same as operands 1/2
10039 (define_insn_and_split "sse5_mulv2div2di3_low" 10310 (define_insn_and_split "xop_mulv2div2di3_low"
10040 [(set (match_operand:V2DI 0 "register_operand" "=&x") 10311 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10041 (mult:V2DI 10312 (mult:V2DI
10042 (sign_extend:V2DI 10313 (sign_extend:V2DI
10043 (vec_select:V2SI 10314 (vec_select:V2SI
10044 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10315 (match_operand:V4SI 1 "register_operand" "%x")
10045 (parallel [(const_int 1) 10316 (parallel [(const_int 1)
10046 (const_int 3)]))) 10317 (const_int 3)])))
10047 (sign_extend:V2DI 10318 (sign_extend:V2DI
10048 (vec_select:V2SI 10319 (vec_select:V2SI
10049 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10320 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10050 (parallel [(const_int 1) 10321 (parallel [(const_int 1)
10051 (const_int 3)])))))] 10322 (const_int 3)])))))]
10052 "TARGET_SSE5" 10323 "TARGET_XOP"
10053 "#" 10324 "#"
10054 "&& (reload_completed 10325 "&& reload_completed"
10055 || (!reg_mentioned_p (operands[0], operands[1])
10056 && !reg_mentioned_p (operands[0], operands[2])))"
10057 [(set (match_dup 0) 10326 [(set (match_dup 0)
10058 (match_dup 3)) 10327 (match_dup 3))
10059 (set (match_dup 0) 10328 (set (match_dup 0)
10060 (plus:V2DI 10329 (plus:V2DI
10061 (mult:V2DI 10330 (mult:V2DI
10074 operands[3] = CONST0_RTX (V2DImode); 10343 operands[3] = CONST0_RTX (V2DImode);
10075 } 10344 }
10076 [(set_attr "type" "ssemuladd") 10345 [(set_attr "type" "ssemuladd")
10077 (set_attr "mode" "TI")]) 10346 (set_attr "mode" "TI")])
10078 10347
10079 (define_insn "sse5_pmacsdqh" 10348 (define_insn "xop_pmacsdqh"
10080 [(set (match_operand:V2DI 0 "register_operand" "=x,x,x") 10349 [(set (match_operand:V2DI 0 "register_operand" "=x")
10081 (plus:V2DI 10350 (plus:V2DI
10082 (mult:V2DI 10351 (mult:V2DI
10083 (sign_extend:V2DI 10352 (sign_extend:V2DI
10084 (vec_select:V2SI 10353 (vec_select:V2SI
10085 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m") 10354 (match_operand:V4SI 1 "nonimmediate_operand" "%x")
10086 (parallel [(const_int 0) 10355 (parallel [(const_int 0)
10087 (const_int 2)]))) 10356 (const_int 2)])))
10088 (sign_extend:V2DI 10357 (sign_extend:V2DI
10089 (vec_select:V2SI 10358 (vec_select:V2SI
10090 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x") 10359 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10091 (parallel [(const_int 0) 10360 (parallel [(const_int 0)
10092 (const_int 2)])))) 10361 (const_int 2)]))))
10093 (match_operand:V2DI 3 "register_operand" "0,0,0")))] 10362 (match_operand:V2DI 3 "nonimmediate_operand" "x")))]
10094 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10363 "TARGET_XOP"
10095 "@ 10364 "vpmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10096 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10097 pmacsdqh\t{%3, %2, %1, %0|%0, %1, %2, %3}
10098 pmacsdqh\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10099 [(set_attr "type" "ssemuladd") 10365 [(set_attr "type" "ssemuladd")
10100 (set_attr "mode" "TI")]) 10366 (set_attr "mode" "TI")])
10101 10367
10102 (define_insn_and_split "*sse5_pmacsdqh_mem" 10368 ;; We don't have a straight 32-bit parallel multiply and extend on XOP, so
10103 [(set (match_operand:V2DI 0 "register_operand" "=&x,&x,&x")
10104 (plus:V2DI
10105 (mult:V2DI
10106 (sign_extend:V2DI
10107 (vec_select:V2SI
10108 (match_operand:V4SI 1 "nonimmediate_operand" "%x,x,m")
10109 (parallel [(const_int 0)
10110 (const_int 2)])))
10111 (sign_extend:V2DI
10112 (vec_select:V2SI
10113 (match_operand:V4SI 2 "nonimmediate_operand" "x,m,x")
10114 (parallel [(const_int 0)
10115 (const_int 2)]))))
10116 (match_operand:V2DI 3 "memory_operand" "m,m,m")))]
10117 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, -1, true)"
10118 "#"
10119 "&& (reload_completed
10120 || (!reg_mentioned_p (operands[0], operands[1])
10121 && !reg_mentioned_p (operands[0], operands[2])))"
10122 [(set (match_dup 0)
10123 (match_dup 3))
10124 (set (match_dup 0)
10125 (plus:V2DI
10126 (mult:V2DI
10127 (sign_extend:V2DI
10128 (vec_select:V2SI
10129 (match_dup 1)
10130 (parallel [(const_int 0)
10131 (const_int 2)])))
10132 (sign_extend:V2DI
10133 (vec_select:V2SI
10134 (match_dup 2)
10135 (parallel [(const_int 0)
10136 (const_int 2)]))))
10137 (match_dup 0)))])
10138
10139 ;; We don't have a straight 32-bit parallel multiply and extend on SSE5, so
10140 ;; fake it with a multiply/add. In general, we expect the define_split to 10369 ;; fake it with a multiply/add. In general, we expect the define_split to
10141 ;; occur before register allocation, so we have to handle the corner case where 10370 ;; occur before register allocation, so we have to handle the corner case where
10142 ;; the target is the same as either operands[1] or operands[2] 10371 ;; the target is the same as either operands[1] or operands[2]
10143 (define_insn_and_split "sse5_mulv2div2di3_high" 10372 (define_insn_and_split "xop_mulv2div2di3_high"
10144 [(set (match_operand:V2DI 0 "register_operand" "=&x") 10373 [(set (match_operand:V2DI 0 "register_operand" "=&x")
10145 (mult:V2DI 10374 (mult:V2DI
10146 (sign_extend:V2DI 10375 (sign_extend:V2DI
10147 (vec_select:V2SI 10376 (vec_select:V2SI
10148 (match_operand:V4SI 1 "nonimmediate_operand" "%x") 10377 (match_operand:V4SI 1 "register_operand" "%x")
10149 (parallel [(const_int 0) 10378 (parallel [(const_int 0)
10150 (const_int 2)]))) 10379 (const_int 2)])))
10151 (sign_extend:V2DI 10380 (sign_extend:V2DI
10152 (vec_select:V2SI 10381 (vec_select:V2SI
10153 (match_operand:V4SI 2 "nonimmediate_operand" "xm") 10382 (match_operand:V4SI 2 "nonimmediate_operand" "xm")
10154 (parallel [(const_int 0) 10383 (parallel [(const_int 0)
10155 (const_int 2)])))))] 10384 (const_int 2)])))))]
10156 "TARGET_SSE5" 10385 "TARGET_XOP"
10157 "#" 10386 "#"
10158 "&& (reload_completed 10387 "&& reload_completed"
10159 || (!reg_mentioned_p (operands[0], operands[1])
10160 && !reg_mentioned_p (operands[0], operands[2])))"
10161 [(set (match_dup 0) 10388 [(set (match_dup 0)
10162 (match_dup 3)) 10389 (match_dup 3))
10163 (set (match_dup 0) 10390 (set (match_dup 0)
10164 (plus:V2DI 10391 (plus:V2DI
10165 (mult:V2DI 10392 (mult:V2DI
10178 operands[3] = CONST0_RTX (V2DImode); 10405 operands[3] = CONST0_RTX (V2DImode);
10179 } 10406 }
10180 [(set_attr "type" "ssemuladd") 10407 [(set_attr "type" "ssemuladd")
10181 (set_attr "mode" "TI")]) 10408 (set_attr "mode" "TI")])
10182 10409
10183 ;; SSE5 parallel integer multiply/add instructions for the intrinisics 10410 ;; XOP parallel integer multiply/add instructions for the intrinisics
10184 (define_insn "sse5_pmacsswd" 10411 (define_insn "xop_pmacsswd"
10185 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") 10412 [(set (match_operand:V4SI 0 "register_operand" "=x")
10186 (ss_plus:V4SI 10413 (ss_plus:V4SI
10187 (mult:V4SI 10414 (mult:V4SI
10188 (sign_extend:V4SI 10415 (sign_extend:V4SI
10189 (vec_select:V4HI 10416 (vec_select:V4HI
10190 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") 10417 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10191 (parallel [(const_int 1) 10418 (parallel [(const_int 1)
10192 (const_int 3) 10419 (const_int 3)
10193 (const_int 5) 10420 (const_int 5)
10194 (const_int 7)]))) 10421 (const_int 7)])))
10195 (sign_extend:V4SI 10422 (sign_extend:V4SI
10196 (vec_select:V4HI 10423 (vec_select:V4HI
10197 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") 10424 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10198 (parallel [(const_int 1) 10425 (parallel [(const_int 1)
10199 (const_int 3) 10426 (const_int 3)
10200 (const_int 5) 10427 (const_int 5)
10201 (const_int 7)])))) 10428 (const_int 7)]))))
10202 (match_operand:V4SI 3 "register_operand" "0,0,0")))] 10429 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10203 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10430 "TARGET_XOP"
10204 "@ 10431 "vpmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10205 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10206 pmacsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10207 pmacsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10208 [(set_attr "type" "ssemuladd") 10432 [(set_attr "type" "ssemuladd")
10209 (set_attr "mode" "TI")]) 10433 (set_attr "mode" "TI")])
10210 10434
10211 (define_insn "sse5_pmacswd" 10435 (define_insn "xop_pmacswd"
10212 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") 10436 [(set (match_operand:V4SI 0 "register_operand" "=x")
10213 (plus:V4SI 10437 (plus:V4SI
10214 (mult:V4SI 10438 (mult:V4SI
10215 (sign_extend:V4SI 10439 (sign_extend:V4SI
10216 (vec_select:V4HI 10440 (vec_select:V4HI
10217 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") 10441 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10218 (parallel [(const_int 1) 10442 (parallel [(const_int 1)
10219 (const_int 3) 10443 (const_int 3)
10220 (const_int 5) 10444 (const_int 5)
10221 (const_int 7)]))) 10445 (const_int 7)])))
10222 (sign_extend:V4SI 10446 (sign_extend:V4SI
10223 (vec_select:V4HI 10447 (vec_select:V4HI
10224 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") 10448 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10225 (parallel [(const_int 1) 10449 (parallel [(const_int 1)
10226 (const_int 3) 10450 (const_int 3)
10227 (const_int 5) 10451 (const_int 5)
10228 (const_int 7)])))) 10452 (const_int 7)]))))
10229 (match_operand:V4SI 3 "register_operand" "0,0,0")))] 10453 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10230 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10454 "TARGET_XOP"
10231 "@ 10455 "vpmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10232 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10233 pmacswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10234 pmacswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10235 [(set_attr "type" "ssemuladd") 10456 [(set_attr "type" "ssemuladd")
10236 (set_attr "mode" "TI")]) 10457 (set_attr "mode" "TI")])
10237 10458
10238 (define_insn "sse5_pmadcsswd" 10459 (define_insn "xop_pmadcsswd"
10239 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") 10460 [(set (match_operand:V4SI 0 "register_operand" "=x")
10240 (ss_plus:V4SI 10461 (ss_plus:V4SI
10241 (plus:V4SI 10462 (plus:V4SI
10242 (mult:V4SI 10463 (mult:V4SI
10243 (sign_extend:V4SI 10464 (sign_extend:V4SI
10244 (vec_select:V4HI 10465 (vec_select:V4HI
10245 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") 10466 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10246 (parallel [(const_int 0) 10467 (parallel [(const_int 0)
10247 (const_int 2) 10468 (const_int 2)
10248 (const_int 4) 10469 (const_int 4)
10249 (const_int 6)]))) 10470 (const_int 6)])))
10250 (sign_extend:V4SI 10471 (sign_extend:V4SI
10251 (vec_select:V4HI 10472 (vec_select:V4HI
10252 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") 10473 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10253 (parallel [(const_int 0) 10474 (parallel [(const_int 0)
10254 (const_int 2) 10475 (const_int 2)
10255 (const_int 4) 10476 (const_int 4)
10256 (const_int 6)])))) 10477 (const_int 6)]))))
10257 (mult:V4SI 10478 (mult:V4SI
10267 (match_dup 2) 10488 (match_dup 2)
10268 (parallel [(const_int 1) 10489 (parallel [(const_int 1)
10269 (const_int 3) 10490 (const_int 3)
10270 (const_int 5) 10491 (const_int 5)
10271 (const_int 7)]))))) 10492 (const_int 7)])))))
10272 (match_operand:V4SI 3 "register_operand" "0,0,0")))] 10493 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10273 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10494 "TARGET_XOP"
10274 "@ 10495 "vpmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10275 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10276 pmadcsswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10277 pmadcsswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10278 [(set_attr "type" "ssemuladd") 10496 [(set_attr "type" "ssemuladd")
10279 (set_attr "mode" "TI")]) 10497 (set_attr "mode" "TI")])
10280 10498
10281 (define_insn "sse5_pmadcswd" 10499 (define_insn "xop_pmadcswd"
10282 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x") 10500 [(set (match_operand:V4SI 0 "register_operand" "=x")
10283 (plus:V4SI 10501 (plus:V4SI
10284 (plus:V4SI 10502 (plus:V4SI
10285 (mult:V4SI 10503 (mult:V4SI
10286 (sign_extend:V4SI 10504 (sign_extend:V4SI
10287 (vec_select:V4HI 10505 (vec_select:V4HI
10288 (match_operand:V8HI 1 "nonimmediate_operand" "%x,x,m") 10506 (match_operand:V8HI 1 "nonimmediate_operand" "%x")
10289 (parallel [(const_int 0) 10507 (parallel [(const_int 0)
10290 (const_int 2) 10508 (const_int 2)
10291 (const_int 4) 10509 (const_int 4)
10292 (const_int 6)]))) 10510 (const_int 6)])))
10293 (sign_extend:V4SI 10511 (sign_extend:V4SI
10294 (vec_select:V4HI 10512 (vec_select:V4HI
10295 (match_operand:V8HI 2 "nonimmediate_operand" "x,m,x") 10513 (match_operand:V8HI 2 "nonimmediate_operand" "xm")
10296 (parallel [(const_int 0) 10514 (parallel [(const_int 0)
10297 (const_int 2) 10515 (const_int 2)
10298 (const_int 4) 10516 (const_int 4)
10299 (const_int 6)])))) 10517 (const_int 6)]))))
10300 (mult:V4SI 10518 (mult:V4SI
10310 (match_dup 2) 10528 (match_dup 2)
10311 (parallel [(const_int 1) 10529 (parallel [(const_int 1)
10312 (const_int 3) 10530 (const_int 3)
10313 (const_int 5) 10531 (const_int 5)
10314 (const_int 7)]))))) 10532 (const_int 7)])))))
10315 (match_operand:V4SI 3 "register_operand" "0,0,0")))] 10533 (match_operand:V4SI 3 "nonimmediate_operand" "x")))]
10316 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, false, 1, true)" 10534 "TARGET_XOP"
10317 "@ 10535 "vpmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10318 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10319 pmadcswd\t{%3, %2, %1, %0|%0, %1, %2, %3}
10320 pmadcswd\t{%3, %1, %2, %0|%0, %2, %1, %3}"
10321 [(set_attr "type" "ssemuladd") 10536 [(set_attr "type" "ssemuladd")
10322 (set_attr "mode" "TI")]) 10537 (set_attr "mode" "TI")])
10323 10538
10324 ;; SSE5 parallel XMM conditional moves 10539 ;; XOP parallel XMM conditional moves
10325 (define_insn "sse5_pcmov_<mode>" 10540 (define_insn "xop_pcmov_<mode>"
10326 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x,x,x") 10541 [(set (match_operand:SSEMODE 0 "register_operand" "=x,x")
10327 (if_then_else:SSEMODE 10542 (if_then_else:SSEMODE
10328 (match_operand:SSEMODE 3 "nonimmediate_operand" "0,0,xm,x") 10543 (match_operand:SSEMODE 3 "nonimmediate_operand" "x,m")
10329 (match_operand:SSEMODE 1 "vector_move_operand" "x,xm,0,0") 10544 (match_operand:SSEMODE 1 "vector_move_operand" "x,x")
10330 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x,x,xm")))] 10545 (match_operand:SSEMODE 2 "vector_move_operand" "xm,x")))]
10331 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 10546 "TARGET_XOP"
10332 "@ 10547 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10333 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10334 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10335 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}
10336 pcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10337 [(set_attr "type" "sse4arg")]) 10548 [(set_attr "type" "sse4arg")])
10338 10549
10339 ;; SSE5 horizontal add/subtract instructions 10550 (define_insn "xop_pcmov_<mode>256"
10340 (define_insn "sse5_phaddbw" 10551 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x")
10552 (if_then_else:AVX256MODE
10553 (match_operand:AVX256MODE 3 "nonimmediate_operand" "x,m")
10554 (match_operand:AVX256MODE 1 "vector_move_operand" "x,x")
10555 (match_operand:AVX256MODE 2 "vector_move_operand" "xm,x")))]
10556 "TARGET_XOP"
10557 "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10558 [(set_attr "type" "sse4arg")])
10559
10560 ;; XOP horizontal add/subtract instructions
10561 (define_insn "xop_phaddbw"
10341 [(set (match_operand:V8HI 0 "register_operand" "=x") 10562 [(set (match_operand:V8HI 0 "register_operand" "=x")
10342 (plus:V8HI 10563 (plus:V8HI
10343 (sign_extend:V8HI 10564 (sign_extend:V8HI
10344 (vec_select:V8QI 10565 (vec_select:V8QI
10345 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10566 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10360 (const_int 7) 10581 (const_int 7)
10361 (const_int 9) 10582 (const_int 9)
10362 (const_int 11) 10583 (const_int 11)
10363 (const_int 13) 10584 (const_int 13)
10364 (const_int 15)])))))] 10585 (const_int 15)])))))]
10365 "TARGET_SSE5" 10586 "TARGET_XOP"
10366 "phaddbw\t{%1, %0|%0, %1}" 10587 "vphaddbw\t{%1, %0|%0, %1}"
10367 [(set_attr "type" "sseiadd1")]) 10588 [(set_attr "type" "sseiadd1")])
10368 10589
10369 (define_insn "sse5_phaddbd" 10590 (define_insn "xop_phaddbd"
10370 [(set (match_operand:V4SI 0 "register_operand" "=x") 10591 [(set (match_operand:V4SI 0 "register_operand" "=x")
10371 (plus:V4SI 10592 (plus:V4SI
10372 (plus:V4SI 10593 (plus:V4SI
10373 (sign_extend:V4SI 10594 (sign_extend:V4SI
10374 (vec_select:V4QI 10595 (vec_select:V4QI
10397 (match_dup 1) 10618 (match_dup 1)
10398 (parallel [(const_int 3) 10619 (parallel [(const_int 3)
10399 (const_int 7) 10620 (const_int 7)
10400 (const_int 11) 10621 (const_int 11)
10401 (const_int 15)]))))))] 10622 (const_int 15)]))))))]
10402 "TARGET_SSE5" 10623 "TARGET_XOP"
10403 "phaddbd\t{%1, %0|%0, %1}" 10624 "vphaddbd\t{%1, %0|%0, %1}"
10404 [(set_attr "type" "sseiadd1")]) 10625 [(set_attr "type" "sseiadd1")])
10405 10626
10406 (define_insn "sse5_phaddbq" 10627 (define_insn "xop_phaddbq"
10407 [(set (match_operand:V2DI 0 "register_operand" "=x") 10628 [(set (match_operand:V2DI 0 "register_operand" "=x")
10408 (plus:V2DI 10629 (plus:V2DI
10409 (plus:V2DI 10630 (plus:V2DI
10410 (plus:V2DI 10631 (plus:V2DI
10411 (sign_extend:V2DI 10632 (sign_extend:V2DI
10450 (sign_extend:V2DI 10671 (sign_extend:V2DI
10451 (vec_select:V2QI 10672 (vec_select:V2QI
10452 (match_dup 1) 10673 (match_dup 1)
10453 (parallel [(const_int 11) 10674 (parallel [(const_int 11)
10454 (const_int 15)])))))))] 10675 (const_int 15)])))))))]
10455 "TARGET_SSE5" 10676 "TARGET_XOP"
10456 "phaddbq\t{%1, %0|%0, %1}" 10677 "vphaddbq\t{%1, %0|%0, %1}"
10457 [(set_attr "type" "sseiadd1")]) 10678 [(set_attr "type" "sseiadd1")])
10458 10679
10459 (define_insn "sse5_phaddwd" 10680 (define_insn "xop_phaddwd"
10460 [(set (match_operand:V4SI 0 "register_operand" "=x") 10681 [(set (match_operand:V4SI 0 "register_operand" "=x")
10461 (plus:V4SI 10682 (plus:V4SI
10462 (sign_extend:V4SI 10683 (sign_extend:V4SI
10463 (vec_select:V4HI 10684 (vec_select:V4HI
10464 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10685 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10471 (match_dup 1) 10692 (match_dup 1)
10472 (parallel [(const_int 1) 10693 (parallel [(const_int 1)
10473 (const_int 3) 10694 (const_int 3)
10474 (const_int 5) 10695 (const_int 5)
10475 (const_int 7)])))))] 10696 (const_int 7)])))))]
10476 "TARGET_SSE5" 10697 "TARGET_XOP"
10477 "phaddwd\t{%1, %0|%0, %1}" 10698 "vphaddwd\t{%1, %0|%0, %1}"
10478 [(set_attr "type" "sseiadd1")]) 10699 [(set_attr "type" "sseiadd1")])
10479 10700
10480 (define_insn "sse5_phaddwq" 10701 (define_insn "xop_phaddwq"
10481 [(set (match_operand:V2DI 0 "register_operand" "=x") 10702 [(set (match_operand:V2DI 0 "register_operand" "=x")
10482 (plus:V2DI 10703 (plus:V2DI
10483 (plus:V2DI 10704 (plus:V2DI
10484 (sign_extend:V2DI 10705 (sign_extend:V2DI
10485 (vec_select:V2HI 10706 (vec_select:V2HI
10500 (sign_extend:V2DI 10721 (sign_extend:V2DI
10501 (vec_select:V2HI 10722 (vec_select:V2HI
10502 (match_dup 1) 10723 (match_dup 1)
10503 (parallel [(const_int 3) 10724 (parallel [(const_int 3)
10504 (const_int 7)]))))))] 10725 (const_int 7)]))))))]
10505 "TARGET_SSE5" 10726 "TARGET_XOP"
10506 "phaddwq\t{%1, %0|%0, %1}" 10727 "vphaddwq\t{%1, %0|%0, %1}"
10507 [(set_attr "type" "sseiadd1")]) 10728 [(set_attr "type" "sseiadd1")])
10508 10729
10509 (define_insn "sse5_phadddq" 10730 (define_insn "xop_phadddq"
10510 [(set (match_operand:V2DI 0 "register_operand" "=x") 10731 [(set (match_operand:V2DI 0 "register_operand" "=x")
10511 (plus:V2DI 10732 (plus:V2DI
10512 (sign_extend:V2DI 10733 (sign_extend:V2DI
10513 (vec_select:V2SI 10734 (vec_select:V2SI
10514 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10735 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10517 (sign_extend:V2DI 10738 (sign_extend:V2DI
10518 (vec_select:V2SI 10739 (vec_select:V2SI
10519 (match_dup 1) 10740 (match_dup 1)
10520 (parallel [(const_int 1) 10741 (parallel [(const_int 1)
10521 (const_int 3)])))))] 10742 (const_int 3)])))))]
10522 "TARGET_SSE5" 10743 "TARGET_XOP"
10523 "phadddq\t{%1, %0|%0, %1}" 10744 "vphadddq\t{%1, %0|%0, %1}"
10524 [(set_attr "type" "sseiadd1")]) 10745 [(set_attr "type" "sseiadd1")])
10525 10746
10526 (define_insn "sse5_phaddubw" 10747 (define_insn "xop_phaddubw"
10527 [(set (match_operand:V8HI 0 "register_operand" "=x") 10748 [(set (match_operand:V8HI 0 "register_operand" "=x")
10528 (plus:V8HI 10749 (plus:V8HI
10529 (zero_extend:V8HI 10750 (zero_extend:V8HI
10530 (vec_select:V8QI 10751 (vec_select:V8QI
10531 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10752 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10546 (const_int 7) 10767 (const_int 7)
10547 (const_int 9) 10768 (const_int 9)
10548 (const_int 11) 10769 (const_int 11)
10549 (const_int 13) 10770 (const_int 13)
10550 (const_int 15)])))))] 10771 (const_int 15)])))))]
10551 "TARGET_SSE5" 10772 "TARGET_XOP"
10552 "phaddubw\t{%1, %0|%0, %1}" 10773 "vphaddubw\t{%1, %0|%0, %1}"
10553 [(set_attr "type" "sseiadd1")]) 10774 [(set_attr "type" "sseiadd1")])
10554 10775
10555 (define_insn "sse5_phaddubd" 10776 (define_insn "xop_phaddubd"
10556 [(set (match_operand:V4SI 0 "register_operand" "=x") 10777 [(set (match_operand:V4SI 0 "register_operand" "=x")
10557 (plus:V4SI 10778 (plus:V4SI
10558 (plus:V4SI 10779 (plus:V4SI
10559 (zero_extend:V4SI 10780 (zero_extend:V4SI
10560 (vec_select:V4QI 10781 (vec_select:V4QI
10583 (match_dup 1) 10804 (match_dup 1)
10584 (parallel [(const_int 3) 10805 (parallel [(const_int 3)
10585 (const_int 7) 10806 (const_int 7)
10586 (const_int 11) 10807 (const_int 11)
10587 (const_int 15)]))))))] 10808 (const_int 15)]))))))]
10588 "TARGET_SSE5" 10809 "TARGET_XOP"
10589 "phaddubd\t{%1, %0|%0, %1}" 10810 "vphaddubd\t{%1, %0|%0, %1}"
10590 [(set_attr "type" "sseiadd1")]) 10811 [(set_attr "type" "sseiadd1")])
10591 10812
10592 (define_insn "sse5_phaddubq" 10813 (define_insn "xop_phaddubq"
10593 [(set (match_operand:V2DI 0 "register_operand" "=x") 10814 [(set (match_operand:V2DI 0 "register_operand" "=x")
10594 (plus:V2DI 10815 (plus:V2DI
10595 (plus:V2DI 10816 (plus:V2DI
10596 (plus:V2DI 10817 (plus:V2DI
10597 (zero_extend:V2DI 10818 (zero_extend:V2DI
10636 (zero_extend:V2DI 10857 (zero_extend:V2DI
10637 (vec_select:V2QI 10858 (vec_select:V2QI
10638 (match_dup 1) 10859 (match_dup 1)
10639 (parallel [(const_int 11) 10860 (parallel [(const_int 11)
10640 (const_int 15)])))))))] 10861 (const_int 15)])))))))]
10641 "TARGET_SSE5" 10862 "TARGET_XOP"
10642 "phaddubq\t{%1, %0|%0, %1}" 10863 "vphaddubq\t{%1, %0|%0, %1}"
10643 [(set_attr "type" "sseiadd1")]) 10864 [(set_attr "type" "sseiadd1")])
10644 10865
10645 (define_insn "sse5_phadduwd" 10866 (define_insn "xop_phadduwd"
10646 [(set (match_operand:V4SI 0 "register_operand" "=x") 10867 [(set (match_operand:V4SI 0 "register_operand" "=x")
10647 (plus:V4SI 10868 (plus:V4SI
10648 (zero_extend:V4SI 10869 (zero_extend:V4SI
10649 (vec_select:V4HI 10870 (vec_select:V4HI
10650 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10871 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10657 (match_dup 1) 10878 (match_dup 1)
10658 (parallel [(const_int 1) 10879 (parallel [(const_int 1)
10659 (const_int 3) 10880 (const_int 3)
10660 (const_int 5) 10881 (const_int 5)
10661 (const_int 7)])))))] 10882 (const_int 7)])))))]
10662 "TARGET_SSE5" 10883 "TARGET_XOP"
10663 "phadduwd\t{%1, %0|%0, %1}" 10884 "vphadduwd\t{%1, %0|%0, %1}"
10664 [(set_attr "type" "sseiadd1")]) 10885 [(set_attr "type" "sseiadd1")])
10665 10886
10666 (define_insn "sse5_phadduwq" 10887 (define_insn "xop_phadduwq"
10667 [(set (match_operand:V2DI 0 "register_operand" "=x") 10888 [(set (match_operand:V2DI 0 "register_operand" "=x")
10668 (plus:V2DI 10889 (plus:V2DI
10669 (plus:V2DI 10890 (plus:V2DI
10670 (zero_extend:V2DI 10891 (zero_extend:V2DI
10671 (vec_select:V2HI 10892 (vec_select:V2HI
10686 (zero_extend:V2DI 10907 (zero_extend:V2DI
10687 (vec_select:V2HI 10908 (vec_select:V2HI
10688 (match_dup 1) 10909 (match_dup 1)
10689 (parallel [(const_int 3) 10910 (parallel [(const_int 3)
10690 (const_int 7)]))))))] 10911 (const_int 7)]))))))]
10691 "TARGET_SSE5" 10912 "TARGET_XOP"
10692 "phadduwq\t{%1, %0|%0, %1}" 10913 "vphadduwq\t{%1, %0|%0, %1}"
10693 [(set_attr "type" "sseiadd1")]) 10914 [(set_attr "type" "sseiadd1")])
10694 10915
10695 (define_insn "sse5_phaddudq" 10916 (define_insn "xop_phaddudq"
10696 [(set (match_operand:V2DI 0 "register_operand" "=x") 10917 [(set (match_operand:V2DI 0 "register_operand" "=x")
10697 (plus:V2DI 10918 (plus:V2DI
10698 (zero_extend:V2DI 10919 (zero_extend:V2DI
10699 (vec_select:V2SI 10920 (vec_select:V2SI
10700 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10921 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10703 (zero_extend:V2DI 10924 (zero_extend:V2DI
10704 (vec_select:V2SI 10925 (vec_select:V2SI
10705 (match_dup 1) 10926 (match_dup 1)
10706 (parallel [(const_int 1) 10927 (parallel [(const_int 1)
10707 (const_int 3)])))))] 10928 (const_int 3)])))))]
10708 "TARGET_SSE5" 10929 "TARGET_XOP"
10709 "phaddudq\t{%1, %0|%0, %1}" 10930 "vphaddudq\t{%1, %0|%0, %1}"
10710 [(set_attr "type" "sseiadd1")]) 10931 [(set_attr "type" "sseiadd1")])
10711 10932
10712 (define_insn "sse5_phsubbw" 10933 (define_insn "xop_phsubbw"
10713 [(set (match_operand:V8HI 0 "register_operand" "=x") 10934 [(set (match_operand:V8HI 0 "register_operand" "=x")
10714 (minus:V8HI 10935 (minus:V8HI
10715 (sign_extend:V8HI 10936 (sign_extend:V8HI
10716 (vec_select:V8QI 10937 (vec_select:V8QI
10717 (match_operand:V16QI 1 "nonimmediate_operand" "xm") 10938 (match_operand:V16QI 1 "nonimmediate_operand" "xm")
10732 (const_int 7) 10953 (const_int 7)
10733 (const_int 9) 10954 (const_int 9)
10734 (const_int 11) 10955 (const_int 11)
10735 (const_int 13) 10956 (const_int 13)
10736 (const_int 15)])))))] 10957 (const_int 15)])))))]
10737 "TARGET_SSE5" 10958 "TARGET_XOP"
10738 "phsubbw\t{%1, %0|%0, %1}" 10959 "vphsubbw\t{%1, %0|%0, %1}"
10739 [(set_attr "type" "sseiadd1")]) 10960 [(set_attr "type" "sseiadd1")])
10740 10961
10741 (define_insn "sse5_phsubwd" 10962 (define_insn "xop_phsubwd"
10742 [(set (match_operand:V4SI 0 "register_operand" "=x") 10963 [(set (match_operand:V4SI 0 "register_operand" "=x")
10743 (minus:V4SI 10964 (minus:V4SI
10744 (sign_extend:V4SI 10965 (sign_extend:V4SI
10745 (vec_select:V4HI 10966 (vec_select:V4HI
10746 (match_operand:V8HI 1 "nonimmediate_operand" "xm") 10967 (match_operand:V8HI 1 "nonimmediate_operand" "xm")
10753 (match_dup 1) 10974 (match_dup 1)
10754 (parallel [(const_int 1) 10975 (parallel [(const_int 1)
10755 (const_int 3) 10976 (const_int 3)
10756 (const_int 5) 10977 (const_int 5)
10757 (const_int 7)])))))] 10978 (const_int 7)])))))]
10758 "TARGET_SSE5" 10979 "TARGET_XOP"
10759 "phsubwd\t{%1, %0|%0, %1}" 10980 "vphsubwd\t{%1, %0|%0, %1}"
10760 [(set_attr "type" "sseiadd1")]) 10981 [(set_attr "type" "sseiadd1")])
10761 10982
10762 (define_insn "sse5_phsubdq" 10983 (define_insn "xop_phsubdq"
10763 [(set (match_operand:V2DI 0 "register_operand" "=x") 10984 [(set (match_operand:V2DI 0 "register_operand" "=x")
10764 (minus:V2DI 10985 (minus:V2DI
10765 (sign_extend:V2DI 10986 (sign_extend:V2DI
10766 (vec_select:V2SI 10987 (vec_select:V2SI
10767 (match_operand:V4SI 1 "nonimmediate_operand" "xm") 10988 (match_operand:V4SI 1 "nonimmediate_operand" "xm")
10770 (sign_extend:V2DI 10991 (sign_extend:V2DI
10771 (vec_select:V2SI 10992 (vec_select:V2SI
10772 (match_dup 1) 10993 (match_dup 1)
10773 (parallel [(const_int 1) 10994 (parallel [(const_int 1)
10774 (const_int 3)])))))] 10995 (const_int 3)])))))]
10775 "TARGET_SSE5" 10996 "TARGET_XOP"
10776 "phsubdq\t{%1, %0|%0, %1}" 10997 "vphsubdq\t{%1, %0|%0, %1}"
10777 [(set_attr "type" "sseiadd1")]) 10998 [(set_attr "type" "sseiadd1")])
10778 10999
10779 ;; SSE5 permute instructions 11000 ;; XOP permute instructions
10780 (define_insn "sse5_pperm" 11001 (define_insn "xop_pperm"
10781 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") 11002 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10782 (unspec:V16QI 11003 (unspec:V16QI
10783 [(match_operand:V16QI 1 "nonimmediate_operand" "0,0,x,xm") 11004 [(match_operand:V16QI 1 "register_operand" "x,x")
10784 (match_operand:V16QI 2 "nonimmediate_operand" "x,xm,xm,x") 11005 (match_operand:V16QI 2 "nonimmediate_operand" "x,m")
10785 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")] 11006 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x")]
10786 UNSPEC_SSE5_PERMUTE))] 11007 UNSPEC_XOP_PERMUTE))]
10787 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 11008 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10788 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11009 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10789 [(set_attr "type" "sse4arg") 11010 [(set_attr "type" "sse4arg")
10790 (set_attr "mode" "TI")]) 11011 (set_attr "mode" "TI")])
10791 11012
10792 ;; The following are for the various unpack insns which doesn't need the first 11013 ;; XOP pack instructions that combine two vectors into a smaller vector
10793 ;; source operand, so we can just use the output operand for the first operand. 11014 (define_insn "xop_pperm_pack_v2di_v4si"
10794 ;; This allows either of the other two operands to be a memory operand. We
10795 ;; can't just use the first operand as an argument to the normal pperm because
10796 ;; then an output only argument, suddenly becomes an input operand.
10797 (define_insn "sse5_pperm_zero_v16qi_v8hi"
10798 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10799 (zero_extend:V8HI
10800 (vec_select:V8QI
10801 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10802 (match_operand 2 "" "")))) ;; parallel with const_int's
10803 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10804 "TARGET_SSE5
10805 && (register_operand (operands[1], V16QImode)
10806 || register_operand (operands[2], V16QImode))"
10807 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10808 [(set_attr "type" "sseadd")
10809 (set_attr "mode" "TI")])
10810
10811 (define_insn "sse5_pperm_sign_v16qi_v8hi"
10812 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10813 (sign_extend:V8HI
10814 (vec_select:V8QI
10815 (match_operand:V16QI 1 "nonimmediate_operand" "xm,x")
10816 (match_operand 2 "" "")))) ;; parallel with const_int's
10817 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10818 "TARGET_SSE5
10819 && (register_operand (operands[1], V16QImode)
10820 || register_operand (operands[2], V16QImode))"
10821 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10822 [(set_attr "type" "sseadd")
10823 (set_attr "mode" "TI")])
10824
10825 (define_insn "sse5_pperm_zero_v8hi_v4si"
10826 [(set (match_operand:V4SI 0 "register_operand" "=x,x") 11015 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10827 (zero_extend:V4SI
10828 (vec_select:V4HI
10829 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10830 (match_operand 2 "" "")))) ;; parallel with const_int's
10831 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10832 "TARGET_SSE5
10833 && (register_operand (operands[1], V8HImode)
10834 || register_operand (operands[2], V16QImode))"
10835 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10836 [(set_attr "type" "sseadd")
10837 (set_attr "mode" "TI")])
10838
10839 (define_insn "sse5_pperm_sign_v8hi_v4si"
10840 [(set (match_operand:V4SI 0 "register_operand" "=x,x")
10841 (sign_extend:V4SI
10842 (vec_select:V4HI
10843 (match_operand:V8HI 1 "nonimmediate_operand" "xm,x")
10844 (match_operand 2 "" "")))) ;; parallel with const_int's
10845 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10846 "TARGET_SSE5
10847 && (register_operand (operands[1], V8HImode)
10848 || register_operand (operands[2], V16QImode))"
10849 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10850 [(set_attr "type" "sseadd")
10851 (set_attr "mode" "TI")])
10852
10853 (define_insn "sse5_pperm_zero_v4si_v2di"
10854 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10855 (zero_extend:V2DI
10856 (vec_select:V2SI
10857 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10858 (match_operand 2 "" "")))) ;; parallel with const_int's
10859 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10860 "TARGET_SSE5
10861 && (register_operand (operands[1], V4SImode)
10862 || register_operand (operands[2], V16QImode))"
10863 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10864 [(set_attr "type" "sseadd")
10865 (set_attr "mode" "TI")])
10866
10867 (define_insn "sse5_pperm_sign_v4si_v2di"
10868 [(set (match_operand:V2DI 0 "register_operand" "=x,x")
10869 (sign_extend:V2DI
10870 (vec_select:V2SI
10871 (match_operand:V4SI 1 "nonimmediate_operand" "xm,x")
10872 (match_operand 2 "" "")))) ;; parallel with const_int's
10873 (use (match_operand:V16QI 3 "nonimmediate_operand" "x,xm"))]
10874 "TARGET_SSE5
10875 && (register_operand (operands[1], V4SImode)
10876 || register_operand (operands[2], V16QImode))"
10877 "pperm\t{%3, %1, %0, %0|%0, %0, %1, %3}"
10878 [(set_attr "type" "sseadd")
10879 (set_attr "mode" "TI")])
10880
10881 ;; SSE5 pack instructions that combine two vectors into a smaller vector
10882 (define_insn "sse5_pperm_pack_v2di_v4si"
10883 [(set (match_operand:V4SI 0 "register_operand" "=x,x,x,x")
10884 (vec_concat:V4SI 11016 (vec_concat:V4SI
10885 (truncate:V2SI 11017 (truncate:V2SI
10886 (match_operand:V2DI 1 "nonimmediate_operand" "0,0,x,xm")) 11018 (match_operand:V2DI 1 "register_operand" "x,x"))
10887 (truncate:V2SI 11019 (truncate:V2SI
10888 (match_operand:V2DI 2 "nonimmediate_operand" "x,xm,xm,x")))) 11020 (match_operand:V2DI 2 "nonimmediate_operand" "x,m"))))
10889 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] 11021 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10890 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 11022 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10891 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11023 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10892 [(set_attr "type" "sse4arg") 11024 [(set_attr "type" "sse4arg")
10893 (set_attr "mode" "TI")]) 11025 (set_attr "mode" "TI")])
10894 11026
10895 (define_insn "sse5_pperm_pack_v4si_v8hi" 11027 (define_insn "xop_pperm_pack_v4si_v8hi"
10896 [(set (match_operand:V8HI 0 "register_operand" "=x,x,x,x") 11028 [(set (match_operand:V8HI 0 "register_operand" "=x,x")
10897 (vec_concat:V8HI 11029 (vec_concat:V8HI
10898 (truncate:V4HI 11030 (truncate:V4HI
10899 (match_operand:V4SI 1 "nonimmediate_operand" "0,0,x,xm")) 11031 (match_operand:V4SI 1 "register_operand" "x,x"))
10900 (truncate:V4HI 11032 (truncate:V4HI
10901 (match_operand:V4SI 2 "nonimmediate_operand" "x,xm,xm,x")))) 11033 (match_operand:V4SI 2 "nonimmediate_operand" "x,m"))))
10902 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] 11034 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10903 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 11035 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10904 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11036 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10905 [(set_attr "type" "sse4arg") 11037 [(set_attr "type" "sse4arg")
10906 (set_attr "mode" "TI")]) 11038 (set_attr "mode" "TI")])
10907 11039
10908 (define_insn "sse5_pperm_pack_v8hi_v16qi" 11040 (define_insn "xop_pperm_pack_v8hi_v16qi"
10909 [(set (match_operand:V16QI 0 "register_operand" "=x,x,x,x") 11041 [(set (match_operand:V16QI 0 "register_operand" "=x,x")
10910 (vec_concat:V16QI 11042 (vec_concat:V16QI
10911 (truncate:V8QI 11043 (truncate:V8QI
10912 (match_operand:V8HI 1 "nonimmediate_operand" "0,0,x,xm")) 11044 (match_operand:V8HI 1 "register_operand" "x,x"))
10913 (truncate:V8QI 11045 (truncate:V8QI
10914 (match_operand:V8HI 2 "nonimmediate_operand" "x,xm,xm,x")))) 11046 (match_operand:V8HI 2 "nonimmediate_operand" "x,m"))))
10915 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0"))] 11047 (use (match_operand:V16QI 3 "nonimmediate_operand" "xm,x"))]
10916 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)" 11048 "TARGET_XOP && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
10917 "pperm\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11049 "vpperm\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10918 [(set_attr "type" "sse4arg") 11050 [(set_attr "type" "sse4arg")
10919 (set_attr "mode" "TI")]) 11051 (set_attr "mode" "TI")])
10920 11052
10921 ;; Floating point permutation (permps, permpd) 11053 ;; XOP packed rotate instructions
10922 (define_insn "sse5_perm<mode>"
10923 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x,x,x,x")
10924 (unspec:SSEMODEF2P
10925 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "0,0,x,xm")
10926 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "x,xm,xm,x")
10927 (match_operand:V16QI 3 "nonimmediate_operand" "xm,x,0,0")]
10928 UNSPEC_SSE5_PERMUTE))]
10929 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 4, true, 1, false)"
10930 "perm<ssemodesuffixf4>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
10931 [(set_attr "type" "sse4arg")
10932 (set_attr "mode" "<MODE>")])
10933
10934 ;; SSE5 packed rotate instructions
10935 (define_expand "rotl<mode>3" 11054 (define_expand "rotl<mode>3"
10936 [(set (match_operand:SSEMODE1248 0 "register_operand" "") 11055 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10937 (rotate:SSEMODE1248 11056 (rotate:SSEMODE1248
10938 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") 11057 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10939 (match_operand:SI 2 "general_operand")))] 11058 (match_operand:SI 2 "general_operand")))]
10940 "TARGET_SSE5" 11059 "TARGET_XOP"
10941 { 11060 {
10942 /* If we were given a scalar, convert it to parallel */ 11061 /* If we were given a scalar, convert it to parallel */
10943 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 11062 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10944 { 11063 {
10945 rtvec vs = rtvec_alloc (<ssescalarnum>); 11064 rtvec vs = rtvec_alloc (<ssescalarnum>);
10956 11075
10957 for (i = 0; i < <ssescalarnum>; i++) 11076 for (i = 0; i < <ssescalarnum>; i++)
10958 RTVEC_ELT (vs, i) = op2; 11077 RTVEC_ELT (vs, i) = op2;
10959 11078
10960 emit_insn (gen_vec_init<mode> (reg, par)); 11079 emit_insn (gen_vec_init<mode> (reg, par));
10961 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); 11080 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
10962 DONE; 11081 DONE;
10963 } 11082 }
10964 }) 11083 })
10965 11084
10966 (define_expand "rotr<mode>3" 11085 (define_expand "rotr<mode>3"
10967 [(set (match_operand:SSEMODE1248 0 "register_operand" "") 11086 [(set (match_operand:SSEMODE1248 0 "register_operand" "")
10968 (rotatert:SSEMODE1248 11087 (rotatert:SSEMODE1248
10969 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "") 11088 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "")
10970 (match_operand:SI 2 "general_operand")))] 11089 (match_operand:SI 2 "general_operand")))]
10971 "TARGET_SSE5" 11090 "TARGET_XOP"
10972 { 11091 {
10973 /* If we were given a scalar, convert it to parallel */ 11092 /* If we were given a scalar, convert it to parallel */
10974 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode)) 11093 if (! const_0_to_<sserotatemax>_operand (operands[2], SImode))
10975 { 11094 {
10976 rtvec vs = rtvec_alloc (<ssescalarnum>); 11095 rtvec vs = rtvec_alloc (<ssescalarnum>);
10989 for (i = 0; i < <ssescalarnum>; i++) 11108 for (i = 0; i < <ssescalarnum>; i++)
10990 RTVEC_ELT (vs, i) = op2; 11109 RTVEC_ELT (vs, i) = op2;
10991 11110
10992 emit_insn (gen_vec_init<mode> (reg, par)); 11111 emit_insn (gen_vec_init<mode> (reg, par));
10993 emit_insn (gen_neg<mode>2 (neg, reg)); 11112 emit_insn (gen_neg<mode>2 (neg, reg));
10994 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], neg)); 11113 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], neg));
10995 DONE; 11114 DONE;
10996 } 11115 }
10997 }) 11116 })
10998 11117
10999 (define_insn "sse5_rotl<mode>3" 11118 (define_insn "xop_rotl<mode>3"
11000 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") 11119 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11001 (rotate:SSEMODE1248 11120 (rotate:SSEMODE1248
11002 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") 11121 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11003 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 11122 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11004 "TARGET_SSE5" 11123 "TARGET_XOP"
11005 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 11124 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11006 [(set_attr "type" "sseishft") 11125 [(set_attr "type" "sseishft")
11007 (set_attr "mode" "TI")]) 11126 (set_attr "length_immediate" "1")
11008 11127 (set_attr "mode" "TI")])
11009 (define_insn "sse5_rotr<mode>3" 11128
11129 (define_insn "xop_rotr<mode>3"
11010 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") 11130 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11011 (rotatert:SSEMODE1248 11131 (rotatert:SSEMODE1248
11012 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm") 11132 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm")
11013 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))] 11133 (match_operand:SI 2 "const_0_to_<sserotatemax>_operand" "n")))]
11014 "TARGET_SSE5" 11134 "TARGET_XOP"
11015 { 11135 {
11016 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2])); 11136 operands[3] = GEN_INT ((<ssescalarnum> * 8) - INTVAL (operands[2]));
11017 return \"prot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\"; 11137 return \"vprot<ssevecsize>\t{%3, %1, %0|%0, %1, %3}\";
11018 } 11138 }
11019 [(set_attr "type" "sseishft") 11139 [(set_attr "type" "sseishft")
11140 (set_attr "length_immediate" "1")
11020 (set_attr "mode" "TI")]) 11141 (set_attr "mode" "TI")])
11021 11142
11022 (define_expand "vrotr<mode>3" 11143 (define_expand "vrotr<mode>3"
11023 [(match_operand:SSEMODE1248 0 "register_operand" "") 11144 [(match_operand:SSEMODE1248 0 "register_operand" "")
11024 (match_operand:SSEMODE1248 1 "register_operand" "") 11145 (match_operand:SSEMODE1248 1 "register_operand" "")
11025 (match_operand:SSEMODE1248 2 "register_operand" "")] 11146 (match_operand:SSEMODE1248 2 "register_operand" "")]
11026 "TARGET_SSE5" 11147 "TARGET_XOP"
11027 { 11148 {
11028 rtx reg = gen_reg_rtx (<MODE>mode); 11149 rtx reg = gen_reg_rtx (<MODE>mode);
11029 emit_insn (gen_neg<mode>2 (reg, operands[2])); 11150 emit_insn (gen_neg<mode>2 (reg, operands[2]));
11030 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], reg)); 11151 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], reg));
11031 DONE; 11152 DONE;
11032 }) 11153 })
11033 11154
11034 (define_expand "vrotl<mode>3" 11155 (define_expand "vrotl<mode>3"
11035 [(match_operand:SSEMODE1248 0 "register_operand" "") 11156 [(match_operand:SSEMODE1248 0 "register_operand" "")
11036 (match_operand:SSEMODE1248 1 "register_operand" "") 11157 (match_operand:SSEMODE1248 1 "register_operand" "")
11037 (match_operand:SSEMODE1248 2 "register_operand" "")] 11158 (match_operand:SSEMODE1248 2 "register_operand" "")]
11038 "TARGET_SSE5" 11159 "TARGET_XOP"
11039 { 11160 {
11040 emit_insn (gen_sse5_vrotl<mode>3 (operands[0], operands[1], operands[2])); 11161 emit_insn (gen_xop_vrotl<mode>3 (operands[0], operands[1], operands[2]));
11041 DONE; 11162 DONE;
11042 }) 11163 })
11043 11164
11044 (define_insn "sse5_vrotl<mode>3" 11165 (define_insn "xop_vrotl<mode>3"
11045 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") 11166 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11046 (if_then_else:SSEMODE1248 11167 (if_then_else:SSEMODE1248
11047 (ge:SSEMODE1248 11168 (ge:SSEMODE1248
11048 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") 11169 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11049 (const_int 0)) 11170 (const_int 0))
11050 (rotate:SSEMODE1248 11171 (rotate:SSEMODE1248
11051 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") 11172 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11052 (match_dup 2)) 11173 (match_dup 2))
11053 (rotatert:SSEMODE1248 11174 (rotatert:SSEMODE1248
11054 (match_dup 1) 11175 (match_dup 1)
11055 (neg:SSEMODE1248 (match_dup 2)))))] 11176 (neg:SSEMODE1248 (match_dup 2)))))]
11056 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" 11177 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11057 "prot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 11178 "vprot<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11058 [(set_attr "type" "sseishft") 11179 [(set_attr "type" "sseishft")
11059 (set_attr "mode" "TI")]) 11180 (set_attr "prefix_data16" "0")
11060 11181 (set_attr "prefix_extra" "2")
11061 ;; SSE5 packed shift instructions. 11182 (set_attr "mode" "TI")])
11183
11184 ;; XOP packed shift instructions.
11062 ;; FIXME: add V2DI back in 11185 ;; FIXME: add V2DI back in
11063 (define_expand "vlshr<mode>3" 11186 (define_expand "vlshr<mode>3"
11064 [(match_operand:SSEMODE124 0 "register_operand" "") 11187 [(match_operand:SSEMODE124 0 "register_operand" "")
11065 (match_operand:SSEMODE124 1 "register_operand" "") 11188 (match_operand:SSEMODE124 1 "register_operand" "")
11066 (match_operand:SSEMODE124 2 "register_operand" "")] 11189 (match_operand:SSEMODE124 2 "register_operand" "")]
11067 "TARGET_SSE5" 11190 "TARGET_XOP"
11068 { 11191 {
11069 rtx neg = gen_reg_rtx (<MODE>mode); 11192 rtx neg = gen_reg_rtx (<MODE>mode);
11070 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11193 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11071 emit_insn (gen_sse5_lshl<mode>3 (operands[0], operands[1], neg)); 11194 emit_insn (gen_xop_lshl<mode>3 (operands[0], operands[1], neg));
11072 DONE; 11195 DONE;
11073 }) 11196 })
11074 11197
11075 (define_expand "vashr<mode>3" 11198 (define_expand "vashr<mode>3"
11076 [(match_operand:SSEMODE124 0 "register_operand" "") 11199 [(match_operand:SSEMODE124 0 "register_operand" "")
11077 (match_operand:SSEMODE124 1 "register_operand" "") 11200 (match_operand:SSEMODE124 1 "register_operand" "")
11078 (match_operand:SSEMODE124 2 "register_operand" "")] 11201 (match_operand:SSEMODE124 2 "register_operand" "")]
11079 "TARGET_SSE5" 11202 "TARGET_XOP"
11080 { 11203 {
11081 rtx neg = gen_reg_rtx (<MODE>mode); 11204 rtx neg = gen_reg_rtx (<MODE>mode);
11082 emit_insn (gen_neg<mode>2 (neg, operands[2])); 11205 emit_insn (gen_neg<mode>2 (neg, operands[2]));
11083 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], neg)); 11206 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], neg));
11084 DONE; 11207 DONE;
11085 }) 11208 })
11086 11209
11087 (define_expand "vashl<mode>3" 11210 (define_expand "vashl<mode>3"
11088 [(match_operand:SSEMODE124 0 "register_operand" "") 11211 [(match_operand:SSEMODE124 0 "register_operand" "")
11089 (match_operand:SSEMODE124 1 "register_operand" "") 11212 (match_operand:SSEMODE124 1 "register_operand" "")
11090 (match_operand:SSEMODE124 2 "register_operand" "")] 11213 (match_operand:SSEMODE124 2 "register_operand" "")]
11091 "TARGET_SSE5" 11214 "TARGET_XOP"
11092 { 11215 {
11093 emit_insn (gen_sse5_ashl<mode>3 (operands[0], operands[1], operands[2])); 11216 emit_insn (gen_xop_ashl<mode>3 (operands[0], operands[1], operands[2]));
11094 DONE; 11217 DONE;
11095 }) 11218 })
11096 11219
11097 (define_insn "sse5_ashl<mode>3" 11220 (define_insn "xop_ashl<mode>3"
11098 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") 11221 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11099 (if_then_else:SSEMODE1248 11222 (if_then_else:SSEMODE1248
11100 (ge:SSEMODE1248 11223 (ge:SSEMODE1248
11101 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") 11224 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11102 (const_int 0)) 11225 (const_int 0))
11103 (ashift:SSEMODE1248 11226 (ashift:SSEMODE1248
11104 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") 11227 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11105 (match_dup 2)) 11228 (match_dup 2))
11106 (ashiftrt:SSEMODE1248 11229 (ashiftrt:SSEMODE1248
11107 (match_dup 1) 11230 (match_dup 1)
11108 (neg:SSEMODE1248 (match_dup 2)))))] 11231 (neg:SSEMODE1248 (match_dup 2)))))]
11109 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" 11232 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11110 "psha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 11233 "vpsha<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11111 [(set_attr "type" "sseishft") 11234 [(set_attr "type" "sseishft")
11112 (set_attr "mode" "TI")]) 11235 (set_attr "prefix_data16" "0")
11113 11236 (set_attr "prefix_extra" "2")
11114 (define_insn "sse5_lshl<mode>3" 11237 (set_attr "mode" "TI")])
11238
11239 (define_insn "xop_lshl<mode>3"
11115 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x") 11240 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x,x")
11116 (if_then_else:SSEMODE1248 11241 (if_then_else:SSEMODE1248
11117 (ge:SSEMODE1248 11242 (ge:SSEMODE1248
11118 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm,x") 11243 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "x,m")
11119 (const_int 0)) 11244 (const_int 0))
11120 (ashift:SSEMODE1248 11245 (ashift:SSEMODE1248
11121 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "x,xm") 11246 (match_operand:SSEMODE1248 1 "nonimmediate_operand" "xm,x")
11122 (match_dup 2)) 11247 (match_dup 2))
11123 (lshiftrt:SSEMODE1248 11248 (lshiftrt:SSEMODE1248
11124 (match_dup 1) 11249 (match_dup 1)
11125 (neg:SSEMODE1248 (match_dup 2)))))] 11250 (neg:SSEMODE1248 (match_dup 2)))))]
11126 "TARGET_SSE5 && ix86_sse5_valid_op_p (operands, insn, 3, true, 1, false)" 11251 "TARGET_XOP && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
11127 "pshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 11252 "vpshl<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11128 [(set_attr "type" "sseishft") 11253 [(set_attr "type" "sseishft")
11129 (set_attr "mode" "TI")]) 11254 (set_attr "prefix_data16" "0")
11130 11255 (set_attr "prefix_extra" "2")
11131 ;; SSE2 doesn't have some shift varients, so define versions for SSE5 11256 (set_attr "mode" "TI")])
11257
11258 ;; SSE2 doesn't have some shift varients, so define versions for XOP
11132 (define_expand "ashlv16qi3" 11259 (define_expand "ashlv16qi3"
11133 [(match_operand:V16QI 0 "register_operand" "") 11260 [(match_operand:V16QI 0 "register_operand" "")
11134 (match_operand:V16QI 1 "register_operand" "") 11261 (match_operand:V16QI 1 "register_operand" "")
11135 (match_operand:SI 2 "nonmemory_operand" "")] 11262 (match_operand:SI 2 "nonmemory_operand" "")]
11136 "TARGET_SSE5" 11263 "TARGET_XOP"
11137 { 11264 {
11138 rtvec vs = rtvec_alloc (16); 11265 rtvec vs = rtvec_alloc (16);
11139 rtx par = gen_rtx_PARALLEL (V16QImode, vs); 11266 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11140 rtx reg = gen_reg_rtx (V16QImode); 11267 rtx reg = gen_reg_rtx (V16QImode);
11141 int i; 11268 int i;
11142 for (i = 0; i < 16; i++) 11269 for (i = 0; i < 16; i++)
11143 RTVEC_ELT (vs, i) = operands[2]; 11270 RTVEC_ELT (vs, i) = operands[2];
11144 11271
11145 emit_insn (gen_vec_initv16qi (reg, par)); 11272 emit_insn (gen_vec_initv16qi (reg, par));
11146 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); 11273 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11147 DONE; 11274 DONE;
11148 }) 11275 })
11149 11276
11150 (define_expand "lshlv16qi3" 11277 (define_expand "lshlv16qi3"
11151 [(match_operand:V16QI 0 "register_operand" "") 11278 [(match_operand:V16QI 0 "register_operand" "")
11152 (match_operand:V16QI 1 "register_operand" "") 11279 (match_operand:V16QI 1 "register_operand" "")
11153 (match_operand:SI 2 "nonmemory_operand" "")] 11280 (match_operand:SI 2 "nonmemory_operand" "")]
11154 "TARGET_SSE5" 11281 "TARGET_XOP"
11155 { 11282 {
11156 rtvec vs = rtvec_alloc (16); 11283 rtvec vs = rtvec_alloc (16);
11157 rtx par = gen_rtx_PARALLEL (V16QImode, vs); 11284 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11158 rtx reg = gen_reg_rtx (V16QImode); 11285 rtx reg = gen_reg_rtx (V16QImode);
11159 int i; 11286 int i;
11160 for (i = 0; i < 16; i++) 11287 for (i = 0; i < 16; i++)
11161 RTVEC_ELT (vs, i) = operands[2]; 11288 RTVEC_ELT (vs, i) = operands[2];
11162 11289
11163 emit_insn (gen_vec_initv16qi (reg, par)); 11290 emit_insn (gen_vec_initv16qi (reg, par));
11164 emit_insn (gen_sse5_lshlv16qi3 (operands[0], operands[1], reg)); 11291 emit_insn (gen_xop_lshlv16qi3 (operands[0], operands[1], reg));
11165 DONE; 11292 DONE;
11166 }) 11293 })
11167 11294
11168 (define_expand "ashrv16qi3" 11295 (define_expand "ashrv16qi3"
11169 [(match_operand:V16QI 0 "register_operand" "") 11296 [(match_operand:V16QI 0 "register_operand" "")
11170 (match_operand:V16QI 1 "register_operand" "") 11297 (match_operand:V16QI 1 "register_operand" "")
11171 (match_operand:SI 2 "nonmemory_operand" "")] 11298 (match_operand:SI 2 "nonmemory_operand" "")]
11172 "TARGET_SSE5" 11299 "TARGET_XOP"
11173 { 11300 {
11174 rtvec vs = rtvec_alloc (16); 11301 rtvec vs = rtvec_alloc (16);
11175 rtx par = gen_rtx_PARALLEL (V16QImode, vs); 11302 rtx par = gen_rtx_PARALLEL (V16QImode, vs);
11176 rtx reg = gen_reg_rtx (V16QImode); 11303 rtx reg = gen_reg_rtx (V16QImode);
11177 int i; 11304 int i;
11178 rtx ele = ((GET_CODE (operands[2]) == CONST_INT) 11305 rtx ele = ((CONST_INT_P (operands[2]))
11179 ? GEN_INT (- INTVAL (operands[2])) 11306 ? GEN_INT (- INTVAL (operands[2]))
11180 : operands[2]); 11307 : operands[2]);
11181 11308
11182 for (i = 0; i < 16; i++) 11309 for (i = 0; i < 16; i++)
11183 RTVEC_ELT (vs, i) = ele; 11310 RTVEC_ELT (vs, i) = ele;
11184 11311
11185 emit_insn (gen_vec_initv16qi (reg, par)); 11312 emit_insn (gen_vec_initv16qi (reg, par));
11186 11313
11187 if (GET_CODE (operands[2]) != CONST_INT) 11314 if (!CONST_INT_P (operands[2]))
11188 { 11315 {
11189 rtx neg = gen_reg_rtx (V16QImode); 11316 rtx neg = gen_reg_rtx (V16QImode);
11190 emit_insn (gen_negv16qi2 (neg, reg)); 11317 emit_insn (gen_negv16qi2 (neg, reg));
11191 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], neg)); 11318 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], neg));
11192 } 11319 }
11193 else 11320 else
11194 emit_insn (gen_sse5_ashlv16qi3 (operands[0], operands[1], reg)); 11321 emit_insn (gen_xop_ashlv16qi3 (operands[0], operands[1], reg));
11195 11322
11196 DONE; 11323 DONE;
11197 }) 11324 })
11198 11325
11199 (define_expand "ashrv2di3" 11326 (define_expand "ashrv2di3"
11200 [(match_operand:V2DI 0 "register_operand" "") 11327 [(match_operand:V2DI 0 "register_operand" "")
11201 (match_operand:V2DI 1 "register_operand" "") 11328 (match_operand:V2DI 1 "register_operand" "")
11202 (match_operand:DI 2 "nonmemory_operand" "")] 11329 (match_operand:DI 2 "nonmemory_operand" "")]
11203 "TARGET_SSE5" 11330 "TARGET_XOP"
11204 { 11331 {
11205 rtvec vs = rtvec_alloc (2); 11332 rtvec vs = rtvec_alloc (2);
11206 rtx par = gen_rtx_PARALLEL (V2DImode, vs); 11333 rtx par = gen_rtx_PARALLEL (V2DImode, vs);
11207 rtx reg = gen_reg_rtx (V2DImode); 11334 rtx reg = gen_reg_rtx (V2DImode);
11208 rtx ele; 11335 rtx ele;
11209 11336
11210 if (GET_CODE (operands[2]) == CONST_INT) 11337 if (CONST_INT_P (operands[2]))
11211 ele = GEN_INT (- INTVAL (operands[2])); 11338 ele = GEN_INT (- INTVAL (operands[2]));
11212 else if (GET_MODE (operands[2]) != DImode) 11339 else if (GET_MODE (operands[2]) != DImode)
11213 { 11340 {
11214 rtx move = gen_reg_rtx (DImode); 11341 rtx move = gen_reg_rtx (DImode);
11215 ele = gen_reg_rtx (DImode); 11342 ele = gen_reg_rtx (DImode);
11223 } 11350 }
11224 11351
11225 RTVEC_ELT (vs, 0) = ele; 11352 RTVEC_ELT (vs, 0) = ele;
11226 RTVEC_ELT (vs, 1) = ele; 11353 RTVEC_ELT (vs, 1) = ele;
11227 emit_insn (gen_vec_initv2di (reg, par)); 11354 emit_insn (gen_vec_initv2di (reg, par));
11228 emit_insn (gen_sse5_ashlv2di3 (operands[0], operands[1], reg)); 11355 emit_insn (gen_xop_ashlv2di3 (operands[0], operands[1], reg));
11229 DONE; 11356 DONE;
11230 }) 11357 })
11231 11358
11232 ;; SSE5 FRCZ support 11359 ;; XOP FRCZ support
11233 ;; parallel insns 11360 ;; parallel insns
11234 (define_insn "sse5_frcz<mode>2" 11361 (define_insn "xop_frcz<mode>2"
11235 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 11362 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11236 (unspec:SSEMODEF2P 11363 (unspec:SSEMODEF2P
11237 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")] 11364 [(match_operand:SSEMODEF2P 1 "nonimmediate_operand" "xm")]
11238 UNSPEC_FRCZ))] 11365 UNSPEC_FRCZ))]
11239 "TARGET_SSE5" 11366 "TARGET_XOP"
11240 "frcz<ssemodesuffixf4>\t{%1, %0|%0, %1}" 11367 "vfrcz<ssemodesuffixf4>\t{%1, %0|%0, %1}"
11241 [(set_attr "type" "ssecvt1") 11368 [(set_attr "type" "ssecvt1")
11242 (set_attr "prefix_extra" "1")
11243 (set_attr "mode" "<MODE>")]) 11369 (set_attr "mode" "<MODE>")])
11244 11370
11245 ;; scalar insns 11371 ;; scalar insns
11246 (define_insn "sse5_vmfrcz<mode>2" 11372 (define_insn "xop_vmfrcz<mode>2"
11247 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x") 11373 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11248 (vec_merge:SSEMODEF2P 11374 (vec_merge:SSEMODEF2P
11249 (unspec:SSEMODEF2P 11375 (unspec:SSEMODEF2P
11250 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")] 11376 [(match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")]
11251 UNSPEC_FRCZ) 11377 UNSPEC_FRCZ)
11252 (match_operand:SSEMODEF2P 1 "register_operand" "0") 11378 (match_operand:SSEMODEF2P 1 "register_operand" "0")
11253 (const_int 1)))] 11379 (const_int 1)))]
11254 "TARGET_SSE5" 11380 "TARGET_XOP"
11255 "frcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}" 11381 "vfrcz<ssemodesuffixf2s>\t{%2, %0|%0, %2}"
11256 [(set_attr "type" "ssecvt1") 11382 [(set_attr "type" "ssecvt1")
11257 (set_attr "prefix_extra" "1")
11258 (set_attr "mode" "<MODE>")]) 11383 (set_attr "mode" "<MODE>")])
11259 11384
11260 (define_insn "sse5_cvtph2ps" 11385 (define_insn "xop_frcz<mode>2256"
11261 [(set (match_operand:V4SF 0 "register_operand" "=x") 11386 [(set (match_operand:FMA4MODEF4 0 "register_operand" "=x")
11262 (unspec:V4SF [(match_operand:V4HI 1 "nonimmediate_operand" "xm")] 11387 (unspec:FMA4MODEF4
11263 UNSPEC_CVTPH2PS))] 11388 [(match_operand:FMA4MODEF4 1 "nonimmediate_operand" "xm")]
11264 "TARGET_SSE5" 11389 UNSPEC_FRCZ))]
11265 "cvtph2ps\t{%1, %0|%0, %1}" 11390 "TARGET_XOP"
11266 [(set_attr "type" "ssecvt") 11391 "vfrcz<fma4modesuffixf4>\t{%1, %0|%0, %1}"
11267 (set_attr "mode" "V4SF")]) 11392 [(set_attr "type" "ssecvt1")
11268
11269 (define_insn "sse5_cvtps2ph"
11270 [(set (match_operand:V4HI 0 "nonimmediate_operand" "=xm")
11271 (unspec:V4HI [(match_operand:V4SF 1 "register_operand" "x")]
11272 UNSPEC_CVTPS2PH))]
11273 "TARGET_SSE5"
11274 "cvtps2ph\t{%1, %0|%0, %1}"
11275 [(set_attr "type" "ssecvt")
11276 (set_attr "mode" "V4SF")])
11277
11278 ;; Scalar versions of the com instructions that use vector types that are
11279 ;; called from the intrinsics. Unlike the the other s{s,d} instructions, the
11280 ;; com instructions fill in 0's in the upper bits instead of leaving them
11281 ;; unmodified, so we use const_vector of 0 instead of match_dup.
11282 (define_expand "sse5_vmmaskcmp<mode>3"
11283 [(set (match_operand:SSEMODEF2P 0 "register_operand" "")
11284 (vec_merge:SSEMODEF2P
11285 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11286 [(match_operand:SSEMODEF2P 2 "register_operand" "")
11287 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "")])
11288 (match_dup 4)
11289 (const_int 1)))]
11290 "TARGET_SSE5"
11291 {
11292 operands[4] = CONST0_RTX (<MODE>mode);
11293 })
11294
11295 (define_insn "*sse5_vmmaskcmp<mode>3"
11296 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11297 (vec_merge:SSEMODEF2P
11298 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11299 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11300 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")])
11301 (match_operand:SSEMODEF2P 4 "")
11302 (const_int 1)))]
11303 "TARGET_SSE5"
11304 "com%Y1<ssemodesuffixf2s>\t{%3, %2, %0|%0, %2, %3}"
11305 [(set_attr "type" "sse4arg")
11306 (set_attr "mode" "<ssescalarmode>")])
11307
11308 ;; We don't have a comparison operator that always returns true/false, so
11309 ;; handle comfalse and comtrue specially.
11310 (define_insn "sse5_com_tf<mode>3"
11311 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11312 (unspec:SSEMODEF2P
11313 [(match_operand:SSEMODEF2P 1 "register_operand" "x")
11314 (match_operand:SSEMODEF2P 2 "nonimmediate_operand" "xm")
11315 (match_operand:SI 3 "const_int_operand" "n")]
11316 UNSPEC_SSE5_TRUEFALSE))]
11317 "TARGET_SSE5"
11318 {
11319 const char *ret = NULL;
11320
11321 switch (INTVAL (operands[3]))
11322 {
11323 case COM_FALSE_S:
11324 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11325 break;
11326
11327 case COM_FALSE_P:
11328 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11329 break;
11330
11331 case COM_TRUE_S:
11332 ret = \"comfalses<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11333 break;
11334
11335 case COM_TRUE_P:
11336 ret = \"comfalsep<ssemodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}\";
11337 break;
11338
11339 default:
11340 gcc_unreachable ();
11341 }
11342
11343 return ret;
11344 }
11345 [(set_attr "type" "ssecmp")
11346 (set_attr "mode" "<MODE>")]) 11393 (set_attr "mode" "<MODE>")])
11347 11394
11348 (define_insn "sse5_maskcmp<mode>3" 11395 (define_insn "xop_maskcmp<mode>3"
11349 [(set (match_operand:SSEMODEF2P 0 "register_operand" "=x")
11350 (match_operator:SSEMODEF2P 1 "sse5_comparison_float_operator"
11351 [(match_operand:SSEMODEF2P 2 "register_operand" "x")
11352 (match_operand:SSEMODEF2P 3 "nonimmediate_operand" "xm")]))]
11353 "TARGET_SSE5"
11354 "com%Y1<ssemodesuffixf4>\t{%3, %2, %0|%0, %2, %3}"
11355 [(set_attr "type" "ssecmp")
11356 (set_attr "mode" "<MODE>")])
11357
11358 (define_insn "sse5_maskcmp<mode>3"
11359 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") 11396 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11360 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator" 11397 (match_operator:SSEMODE1248 1 "ix86_comparison_int_operator"
11361 [(match_operand:SSEMODE1248 2 "register_operand" "x") 11398 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11362 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] 11399 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11363 "TARGET_SSE5" 11400 "TARGET_XOP"
11364 "pcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" 11401 "vpcom%Y1<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11365 [(set_attr "type" "sse4arg") 11402 [(set_attr "type" "sse4arg")
11366 (set_attr "mode" "TI")]) 11403 (set_attr "prefix_data16" "0")
11367 11404 (set_attr "prefix_rep" "0")
11368 (define_insn "sse5_maskcmp_uns<mode>3" 11405 (set_attr "prefix_extra" "2")
11406 (set_attr "length_immediate" "1")
11407 (set_attr "mode" "TI")])
11408
11409 (define_insn "xop_maskcmp_uns<mode>3"
11369 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") 11410 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11370 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" 11411 (match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11371 [(match_operand:SSEMODE1248 2 "register_operand" "x") 11412 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11372 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))] 11413 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")]))]
11373 "TARGET_SSE5" 11414 "TARGET_XOP"
11374 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" 11415 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11375 [(set_attr "type" "ssecmp") 11416 [(set_attr "type" "ssecmp")
11417 (set_attr "prefix_data16" "0")
11418 (set_attr "prefix_rep" "0")
11419 (set_attr "prefix_extra" "2")
11420 (set_attr "length_immediate" "1")
11376 (set_attr "mode" "TI")]) 11421 (set_attr "mode" "TI")])
11377 11422
11378 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ* 11423 ;; Version of pcom*u* that is called from the intrinsics that allows pcomequ*
11379 ;; and pcomneu* not to be converted to the signed ones in case somebody needs 11424 ;; and pcomneu* not to be converted to the signed ones in case somebody needs
11380 ;; the exact instruction generated for the intrinsic. 11425 ;; the exact instruction generated for the intrinsic.
11381 (define_insn "sse5_maskcmp_uns2<mode>3" 11426 (define_insn "xop_maskcmp_uns2<mode>3"
11382 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") 11427 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11383 (unspec:SSEMODE1248 11428 (unspec:SSEMODE1248
11384 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator" 11429 [(match_operator:SSEMODE1248 1 "ix86_comparison_uns_operator"
11385 [(match_operand:SSEMODE1248 2 "register_operand" "x") 11430 [(match_operand:SSEMODE1248 2 "register_operand" "x")
11386 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])] 11431 (match_operand:SSEMODE1248 3 "nonimmediate_operand" "xm")])]
11387 UNSPEC_SSE5_UNSIGNED_CMP))] 11432 UNSPEC_XOP_UNSIGNED_CMP))]
11388 "TARGET_SSE5" 11433 "TARGET_XOP"
11389 "pcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}" 11434 "vpcom%Y1u<ssevecsize>\t{%3, %2, %0|%0, %2, %3}"
11390 [(set_attr "type" "ssecmp") 11435 [(set_attr "type" "ssecmp")
11436 (set_attr "prefix_data16" "0")
11437 (set_attr "prefix_extra" "2")
11438 (set_attr "length_immediate" "1")
11391 (set_attr "mode" "TI")]) 11439 (set_attr "mode" "TI")])
11392 11440
11393 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are 11441 ;; Pcomtrue and pcomfalse support. These are useless instructions, but are
11394 ;; being added here to be complete. 11442 ;; being added here to be complete.
11395 (define_insn "sse5_pcom_tf<mode>3" 11443 (define_insn "xop_pcom_tf<mode>3"
11396 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x") 11444 [(set (match_operand:SSEMODE1248 0 "register_operand" "=x")
11397 (unspec:SSEMODE1248 11445 (unspec:SSEMODE1248
11398 [(match_operand:SSEMODE1248 1 "register_operand" "x") 11446 [(match_operand:SSEMODE1248 1 "register_operand" "x")
11399 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm") 11447 (match_operand:SSEMODE1248 2 "nonimmediate_operand" "xm")
11400 (match_operand:SI 3 "const_int_operand" "n")] 11448 (match_operand:SI 3 "const_int_operand" "n")]
11401 UNSPEC_SSE5_TRUEFALSE))] 11449 UNSPEC_XOP_TRUEFALSE))]
11402 "TARGET_SSE5" 11450 "TARGET_XOP"
11403 { 11451 {
11404 return ((INTVAL (operands[3]) != 0) 11452 return ((INTVAL (operands[3]) != 0)
11405 ? "pcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}" 11453 ? "vpcomtrue<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"
11406 : "pcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}"); 11454 : "vpcomfalse<ssevecsize>\t{%2, %1, %0|%0, %1, %2}");
11407 } 11455 }
11408 [(set_attr "type" "ssecmp") 11456 [(set_attr "type" "ssecmp")
11409 (set_attr "mode" "TI")]) 11457 (set_attr "prefix_data16" "0")
11410 11458 (set_attr "prefix_extra" "2")
11459 (set_attr "length_immediate" "1")
11460 (set_attr "mode" "TI")])
11461
11462 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
11411 (define_insn "*avx_aesenc" 11463 (define_insn "*avx_aesenc"
11412 [(set (match_operand:V2DI 0 "register_operand" "=x") 11464 [(set (match_operand:V2DI 0 "register_operand" "=x")
11413 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x") 11465 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "x")
11414 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] 11466 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11415 UNSPEC_AESENC))] 11467 UNSPEC_AESENC))]
11416 "TARGET_AES && TARGET_AVX" 11468 "TARGET_AES && TARGET_AVX"
11417 "vaesenc\t{%2, %1, %0|%0, %1, %2}" 11469 "vaesenc\t{%2, %1, %0|%0, %1, %2}"
11418 [(set_attr "type" "sselog1") 11470 [(set_attr "type" "sselog1")
11471 (set_attr "prefix_extra" "1")
11419 (set_attr "prefix" "vex") 11472 (set_attr "prefix" "vex")
11420 (set_attr "mode" "TI")]) 11473 (set_attr "mode" "TI")])
11421 11474
11422 (define_insn "aesenc" 11475 (define_insn "aesenc"
11423 [(set (match_operand:V2DI 0 "register_operand" "=x") 11476 [(set (match_operand:V2DI 0 "register_operand" "=x")
11436 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] 11489 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11437 UNSPEC_AESENCLAST))] 11490 UNSPEC_AESENCLAST))]
11438 "TARGET_AES && TARGET_AVX" 11491 "TARGET_AES && TARGET_AVX"
11439 "vaesenclast\t{%2, %1, %0|%0, %1, %2}" 11492 "vaesenclast\t{%2, %1, %0|%0, %1, %2}"
11440 [(set_attr "type" "sselog1") 11493 [(set_attr "type" "sselog1")
11494 (set_attr "prefix_extra" "1")
11441 (set_attr "prefix" "vex") 11495 (set_attr "prefix" "vex")
11442 (set_attr "mode" "TI")]) 11496 (set_attr "mode" "TI")])
11443 11497
11444 (define_insn "aesenclast" 11498 (define_insn "aesenclast"
11445 [(set (match_operand:V2DI 0 "register_operand" "=x") 11499 [(set (match_operand:V2DI 0 "register_operand" "=x")
11458 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] 11512 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11459 UNSPEC_AESDEC))] 11513 UNSPEC_AESDEC))]
11460 "TARGET_AES && TARGET_AVX" 11514 "TARGET_AES && TARGET_AVX"
11461 "vaesdec\t{%2, %1, %0|%0, %1, %2}" 11515 "vaesdec\t{%2, %1, %0|%0, %1, %2}"
11462 [(set_attr "type" "sselog1") 11516 [(set_attr "type" "sselog1")
11517 (set_attr "prefix_extra" "1")
11463 (set_attr "prefix" "vex") 11518 (set_attr "prefix" "vex")
11464 (set_attr "mode" "TI")]) 11519 (set_attr "mode" "TI")])
11465 11520
11466 (define_insn "aesdec" 11521 (define_insn "aesdec"
11467 [(set (match_operand:V2DI 0 "register_operand" "=x") 11522 [(set (match_operand:V2DI 0 "register_operand" "=x")
11480 (match_operand:V2DI 2 "nonimmediate_operand" "xm")] 11535 (match_operand:V2DI 2 "nonimmediate_operand" "xm")]
11481 UNSPEC_AESDECLAST))] 11536 UNSPEC_AESDECLAST))]
11482 "TARGET_AES && TARGET_AVX" 11537 "TARGET_AES && TARGET_AVX"
11483 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}" 11538 "vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
11484 [(set_attr "type" "sselog1") 11539 [(set_attr "type" "sselog1")
11540 (set_attr "prefix_extra" "1")
11485 (set_attr "prefix" "vex") 11541 (set_attr "prefix" "vex")
11486 (set_attr "mode" "TI")]) 11542 (set_attr "mode" "TI")])
11487 11543
11488 (define_insn "aesdeclast" 11544 (define_insn "aesdeclast"
11489 [(set (match_operand:V2DI 0 "register_operand" "=x") 11545 [(set (match_operand:V2DI 0 "register_operand" "=x")
11514 UNSPEC_AESKEYGENASSIST))] 11570 UNSPEC_AESKEYGENASSIST))]
11515 "TARGET_AES" 11571 "TARGET_AES"
11516 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}" 11572 "%vaeskeygenassist\t{%2, %1, %0|%0, %1, %2}"
11517 [(set_attr "type" "sselog1") 11573 [(set_attr "type" "sselog1")
11518 (set_attr "prefix_extra" "1") 11574 (set_attr "prefix_extra" "1")
11575 (set_attr "length_immediate" "1")
11519 (set_attr "prefix" "maybe_vex") 11576 (set_attr "prefix" "maybe_vex")
11520 (set_attr "mode" "TI")]) 11577 (set_attr "mode" "TI")])
11521 11578
11522 (define_insn "*vpclmulqdq" 11579 (define_insn "*vpclmulqdq"
11523 [(set (match_operand:V2DI 0 "register_operand" "=x") 11580 [(set (match_operand:V2DI 0 "register_operand" "=x")
11526 (match_operand:SI 3 "const_0_to_255_operand" "n")] 11583 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11527 UNSPEC_PCLMUL))] 11584 UNSPEC_PCLMUL))]
11528 "TARGET_PCLMUL && TARGET_AVX" 11585 "TARGET_PCLMUL && TARGET_AVX"
11529 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11586 "vpclmulqdq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11530 [(set_attr "type" "sselog1") 11587 [(set_attr "type" "sselog1")
11588 (set_attr "prefix_extra" "1")
11589 (set_attr "length_immediate" "1")
11531 (set_attr "prefix" "vex") 11590 (set_attr "prefix" "vex")
11532 (set_attr "mode" "TI")]) 11591 (set_attr "mode" "TI")])
11533 11592
11534 (define_insn "pclmulqdq" 11593 (define_insn "pclmulqdq"
11535 [(set (match_operand:V2DI 0 "register_operand" "=x") 11594 [(set (match_operand:V2DI 0 "register_operand" "=x")
11539 UNSPEC_PCLMUL))] 11598 UNSPEC_PCLMUL))]
11540 "TARGET_PCLMUL" 11599 "TARGET_PCLMUL"
11541 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}" 11600 "pclmulqdq\t{%3, %2, %0|%0, %2, %3}"
11542 [(set_attr "type" "sselog1") 11601 [(set_attr "type" "sselog1")
11543 (set_attr "prefix_extra" "1") 11602 (set_attr "prefix_extra" "1")
11603 (set_attr "length_immediate" "1")
11544 (set_attr "mode" "TI")]) 11604 (set_attr "mode" "TI")])
11545 11605
11546 (define_expand "avx_vzeroall" 11606 (define_expand "avx_vzeroall"
11547 [(match_par_dup 0 [(const_int 0)])] 11607 [(match_par_dup 0 [(const_int 0)])]
11548 "TARGET_AVX" 11608 "TARGET_AVX"
11563 CONST0_RTX (V8SImode)); 11623 CONST0_RTX (V8SImode));
11564 }) 11624 })
11565 11625
11566 (define_insn "*avx_vzeroall" 11626 (define_insn "*avx_vzeroall"
11567 [(match_parallel 0 "vzeroall_operation" 11627 [(match_parallel 0 "vzeroall_operation"
11568 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL) 11628 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROALL)])]
11569 (set (match_operand 1 "register_operand" "=x")
11570 (match_operand 2 "const0_operand" "X"))])]
11571 "TARGET_AVX" 11629 "TARGET_AVX"
11572 "vzeroall" 11630 "vzeroall"
11573 [(set_attr "type" "sse") 11631 [(set_attr "type" "sse")
11632 (set_attr "modrm" "0")
11574 (set_attr "memory" "none") 11633 (set_attr "memory" "none")
11575 (set_attr "prefix" "vex") 11634 (set_attr "prefix" "vex")
11576 (set_attr "mode" "OI")]) 11635 (set_attr "mode" "OI")])
11577 11636
11578 ;; vzeroupper clobbers the upper 128bits of AVX registers. 11637 ;; vzeroupper clobbers the upper 128bits of AVX registers.
11579 (define_insn "avx_vzeroupper" 11638 (define_expand "avx_vzeroupper"
11580 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER) 11639 [(match_par_dup 0 [(const_int 0)])]
11581 (clobber (reg:V8SI XMM0_REG)) 11640 "TARGET_AVX"
11582 (clobber (reg:V8SI XMM1_REG)) 11641 {
11583 (clobber (reg:V8SI XMM2_REG)) 11642 int nregs = TARGET_64BIT ? 16 : 8;
11584 (clobber (reg:V8SI XMM3_REG)) 11643 int regno;
11585 (clobber (reg:V8SI XMM4_REG)) 11644
11586 (clobber (reg:V8SI XMM5_REG)) 11645 operands[0] = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs + 1));
11587 (clobber (reg:V8SI XMM6_REG)) 11646
11588 (clobber (reg:V8SI XMM7_REG))] 11647 XVECEXP (operands[0], 0, 0)
11589 "TARGET_AVX && !TARGET_64BIT" 11648 = gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec (1, const0_rtx),
11649 UNSPECV_VZEROUPPER);
11650
11651 for (regno = 0; regno < nregs; regno++)
11652 XVECEXP (operands[0], 0, regno + 1)
11653 = gen_rtx_CLOBBER (VOIDmode,
11654 gen_rtx_REG (V8SImode, SSE_REGNO (regno)));
11655 })
11656
11657 (define_insn "*avx_vzeroupper"
11658 [(match_parallel 0 "vzeroupper_operation"
11659 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER)])]
11660 "TARGET_AVX"
11590 "vzeroupper" 11661 "vzeroupper"
11591 [(set_attr "type" "sse") 11662 [(set_attr "type" "sse")
11663 (set_attr "modrm" "0")
11592 (set_attr "memory" "none") 11664 (set_attr "memory" "none")
11593 (set_attr "prefix" "vex") 11665 (set_attr "prefix" "vex")
11594 (set_attr "mode" "OI")]) 11666 (set_attr "mode" "OI")])
11595 11667
11596 (define_insn "avx_vzeroupper_rex64" 11668 (define_insn_and_split "vec_dup<mode>"
11597 [(unspec_volatile [(const_int 0)] UNSPECV_VZEROUPPER) 11669 [(set (match_operand:AVX256MODE24P 0 "register_operand" "=x,x")
11598 (clobber (reg:V8SI XMM0_REG)) 11670 (vec_duplicate:AVX256MODE24P
11599 (clobber (reg:V8SI XMM1_REG)) 11671 (match_operand:<avxscalarmode> 1 "nonimmediate_operand" "m,?x")))]
11600 (clobber (reg:V8SI XMM2_REG)) 11672 "TARGET_AVX"
11601 (clobber (reg:V8SI XMM3_REG)) 11673 "@
11602 (clobber (reg:V8SI XMM4_REG)) 11674 vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}
11603 (clobber (reg:V8SI XMM5_REG)) 11675 #"
11604 (clobber (reg:V8SI XMM6_REG)) 11676 "&& reload_completed && REG_P (operands[1])"
11605 (clobber (reg:V8SI XMM7_REG)) 11677 [(set (match_dup 2) (vec_duplicate:<avxhalfvecmode> (match_dup 1)))
11606 (clobber (reg:V8SI XMM8_REG)) 11678 (set (match_dup 0) (vec_concat:AVX256MODE24P (match_dup 2) (match_dup 2)))]
11607 (clobber (reg:V8SI XMM9_REG)) 11679 {
11608 (clobber (reg:V8SI XMM10_REG)) 11680 operands[2] = gen_rtx_REG (<avxhalfvecmode>mode, REGNO (operands[0]));
11609 (clobber (reg:V8SI XMM11_REG)) 11681 }
11610 (clobber (reg:V8SI XMM12_REG)) 11682 [(set_attr "type" "ssemov")
11611 (clobber (reg:V8SI XMM13_REG)) 11683 (set_attr "prefix_extra" "1")
11612 (clobber (reg:V8SI XMM14_REG)) 11684 (set_attr "prefix" "vex")
11613 (clobber (reg:V8SI XMM15_REG))] 11685 (set_attr "mode" "V8SF")])
11614 "TARGET_AVX && TARGET_64BIT" 11686
11615 "vzeroupper" 11687 (define_insn "avx_vbroadcastf128_<mode>"
11616 [(set_attr "type" "sse") 11688 [(set (match_operand:AVX256MODE 0 "register_operand" "=x,x,x")
11617 (set_attr "memory" "none") 11689 (vec_concat:AVX256MODE
11618 (set_attr "prefix" "vex") 11690 (match_operand:<avxhalfvecmode> 1 "nonimmediate_operand" "m,0,?x")
11619 (set_attr "mode" "OI")]) 11691 (match_dup 1)))]
11620 11692 "TARGET_AVX"
11621 (define_insn "avx_vpermil<mode>" 11693 "@
11694 vbroadcastf128\t{%1, %0|%0, %1}
11695 vinsertf128\t{$1, %1, %0, %0|%0, %0, %1, 1}
11696 vperm2f128\t{$0, %t1, %t1, %0|%0, %t1, %t1, 0}"
11697 [(set_attr "type" "ssemov,sselog1,sselog1")
11698 (set_attr "prefix_extra" "1")
11699 (set_attr "length_immediate" "0,1,1")
11700 (set_attr "prefix" "vex")
11701 (set_attr "mode" "V4SF,V8SF,V8SF")])
11702
11703 ;; Recognize broadcast as a vec_select as produced by builtin_vec_perm.
11704 ;; If it so happens that the input is in memory, use vbroadcast.
11705 ;; Otherwise use vpermilp (and in the case of 256-bit modes, vperm2f128).
11706 (define_insn "*avx_vperm_broadcast_v4sf"
11707 [(set (match_operand:V4SF 0 "register_operand" "=x,x,x")
11708 (vec_select:V4SF
11709 (match_operand:V4SF 1 "nonimmediate_operand" "m,o,x")
11710 (match_parallel 2 "avx_vbroadcast_operand"
11711 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11712 "TARGET_AVX"
11713 {
11714 int elt = INTVAL (operands[3]);
11715 switch (which_alternative)
11716 {
11717 case 0:
11718 case 1:
11719 operands[1] = adjust_address_nv (operands[1], SFmode, elt * 4);
11720 return "vbroadcastss\t{%1, %0|%0, %1}";
11721 case 2:
11722 operands[2] = GEN_INT (elt * 0x55);
11723 return "vpermilps\t{%2, %1, %0|%0, %1, %2}";
11724 default:
11725 gcc_unreachable ();
11726 }
11727 }
11728 [(set_attr "type" "ssemov,ssemov,sselog1")
11729 (set_attr "prefix_extra" "1")
11730 (set_attr "length_immediate" "0,0,1")
11731 (set_attr "prefix" "vex")
11732 (set_attr "mode" "SF,SF,V4SF")])
11733
11734 (define_insn_and_split "*avx_vperm_broadcast_<mode>"
11735 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x,x,x")
11736 (vec_select:AVX256MODEF2P
11737 (match_operand:AVX256MODEF2P 1 "nonimmediate_operand" "m,o,?x")
11738 (match_parallel 2 "avx_vbroadcast_operand"
11739 [(match_operand 3 "const_int_operand" "C,n,n")])))]
11740 "TARGET_AVX"
11741 "#"
11742 "&& reload_completed"
11743 [(set (match_dup 0) (vec_duplicate:AVX256MODEF2P (match_dup 1)))]
11744 {
11745 rtx op0 = operands[0], op1 = operands[1];
11746 int elt = INTVAL (operands[3]);
11747
11748 if (REG_P (op1))
11749 {
11750 int mask;
11751
11752 /* Shuffle element we care about into all elements of the 128-bit lane.
11753 The other lane gets shuffled too, but we don't care. */
11754 if (<MODE>mode == V4DFmode)
11755 mask = (elt & 1 ? 15 : 0);
11756 else
11757 mask = (elt & 3) * 0x55;
11758 emit_insn (gen_avx_vpermil<mode> (op0, op1, GEN_INT (mask)));
11759
11760 /* Shuffle the lane we care about into both lanes of the dest. */
11761 mask = (elt / (<ssescalarnum> / 2)) * 0x11;
11762 emit_insn (gen_avx_vperm2f128<mode>3 (op0, op0, op0, GEN_INT (mask)));
11763 DONE;
11764 }
11765
11766 operands[1] = adjust_address_nv (op1, <avxscalarmode>mode,
11767 elt * GET_MODE_SIZE (<avxscalarmode>mode));
11768 })
11769
11770 (define_expand "avx_vpermil<mode>"
11771 [(set (match_operand:AVXMODEFDP 0 "register_operand" "")
11772 (vec_select:AVXMODEFDP
11773 (match_operand:AVXMODEFDP 1 "nonimmediate_operand" "")
11774 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11775 "TARGET_AVX"
11776 {
11777 int mask = INTVAL (operands[2]);
11778 rtx perm[<ssescalarnum>];
11779
11780 perm[0] = GEN_INT (mask & 1);
11781 perm[1] = GEN_INT ((mask >> 1) & 1);
11782 if (<MODE>mode == V4DFmode)
11783 {
11784 perm[2] = GEN_INT (((mask >> 2) & 1) + 2);
11785 perm[3] = GEN_INT (((mask >> 3) & 1) + 2);
11786 }
11787
11788 operands[2]
11789 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11790 })
11791
11792 (define_expand "avx_vpermil<mode>"
11793 [(set (match_operand:AVXMODEFSP 0 "register_operand" "")
11794 (vec_select:AVXMODEFSP
11795 (match_operand:AVXMODEFSP 1 "nonimmediate_operand" "")
11796 (match_operand:SI 2 "const_0_to_255_operand" "")))]
11797 "TARGET_AVX"
11798 {
11799 int mask = INTVAL (operands[2]);
11800 rtx perm[<ssescalarnum>];
11801
11802 perm[0] = GEN_INT (mask & 3);
11803 perm[1] = GEN_INT ((mask >> 2) & 3);
11804 perm[2] = GEN_INT ((mask >> 4) & 3);
11805 perm[3] = GEN_INT ((mask >> 6) & 3);
11806 if (<MODE>mode == V8SFmode)
11807 {
11808 perm[4] = GEN_INT ((mask & 3) + 4);
11809 perm[5] = GEN_INT (((mask >> 2) & 3) + 4);
11810 perm[6] = GEN_INT (((mask >> 4) & 3) + 4);
11811 perm[7] = GEN_INT (((mask >> 6) & 3) + 4);
11812 }
11813
11814 operands[2]
11815 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (<ssescalarnum>, perm));
11816 })
11817
11818 (define_insn "*avx_vpermilp<mode>"
11622 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") 11819 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11623 (unspec:AVXMODEF2P 11820 (vec_select:AVXMODEF2P
11624 [(match_operand:AVXMODEF2P 1 "register_operand" "xm") 11821 (match_operand:AVXMODEF2P 1 "nonimmediate_operand" "xm")
11625 (match_operand:SI 2 "const_0_to_<vpermilbits>_operand" "n")] 11822 (match_parallel 2 "avx_vpermilp_<mode>_operand"
11626 UNSPEC_VPERMIL))] 11823 [(match_operand 3 "const_int_operand" "")])))]
11627 "TARGET_AVX" 11824 "TARGET_AVX"
11628 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" 11825 {
11826 int mask = avx_vpermilp_parallel (operands[2], <MODE>mode) - 1;
11827 operands[2] = GEN_INT (mask);
11828 return "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}";
11829 }
11629 [(set_attr "type" "sselog") 11830 [(set_attr "type" "sselog")
11831 (set_attr "prefix_extra" "1")
11832 (set_attr "length_immediate" "1")
11630 (set_attr "prefix" "vex") 11833 (set_attr "prefix" "vex")
11631 (set_attr "mode" "<MODE>")]) 11834 (set_attr "mode" "<MODE>")])
11632 11835
11633 (define_insn "avx_vpermilvar<mode>3" 11836 (define_insn "avx_vpermilvar<mode>3"
11634 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") 11837 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11637 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")] 11840 (match_operand:<avxpermvecmode> 2 "nonimmediate_operand" "xm")]
11638 UNSPEC_VPERMIL))] 11841 UNSPEC_VPERMIL))]
11639 "TARGET_AVX" 11842 "TARGET_AVX"
11640 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" 11843 "vpermilp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11641 [(set_attr "type" "sselog") 11844 [(set_attr "type" "sselog")
11845 (set_attr "prefix_extra" "1")
11642 (set_attr "prefix" "vex") 11846 (set_attr "prefix" "vex")
11643 (set_attr "mode" "<MODE>")]) 11847 (set_attr "mode" "<MODE>")])
11644 11848
11645 (define_insn "avx_vperm2f128<mode>3" 11849 (define_expand "avx_vperm2f128<mode>3"
11850 [(set (match_operand:AVX256MODE2P 0 "register_operand" "")
11851 (unspec:AVX256MODE2P
11852 [(match_operand:AVX256MODE2P 1 "register_operand" "")
11853 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "")
11854 (match_operand:SI 3 "const_0_to_255_operand" "")]
11855 UNSPEC_VPERMIL2F128))]
11856 "TARGET_AVX"
11857 {
11858 int mask = INTVAL (operands[2]);
11859 if ((mask & 0x88) == 0)
11860 {
11861 rtx perm[<ssescalarnum>], t1, t2;
11862 int i, base, nelt = <ssescalarnum>, nelt2 = nelt / 2;
11863
11864 base = (mask & 3) * nelt2;
11865 for (i = 0; i < nelt2; ++i)
11866 perm[i] = GEN_INT (base + i);
11867
11868 base = ((mask >> 4) & 3) * nelt2;
11869 for (i = 0; i < nelt2; ++i)
11870 perm[i + nelt2] = GEN_INT (base + i);
11871
11872 t2 = gen_rtx_VEC_CONCAT (<ssedoublesizemode>mode,
11873 operands[1], operands[2]);
11874 t1 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nelt, perm));
11875 t2 = gen_rtx_VEC_SELECT (<MODE>mode, t2, t1);
11876 t2 = gen_rtx_SET (VOIDmode, operands[0], t2);
11877 emit_insn (t2);
11878 DONE;
11879 }
11880 })
11881
11882 ;; Note that bits 7 and 3 of the imm8 allow lanes to be zeroed, which
11883 ;; means that in order to represent this properly in rtl we'd have to
11884 ;; nest *another* vec_concat with a zero operand and do the select from
11885 ;; a 4x wide vector. That doesn't seem very nice.
11886 (define_insn "*avx_vperm2f128<mode>_full"
11646 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x") 11887 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11647 (unspec:AVX256MODE2P 11888 (unspec:AVX256MODE2P
11648 [(match_operand:AVX256MODE2P 1 "register_operand" "x") 11889 [(match_operand:AVX256MODE2P 1 "register_operand" "x")
11649 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm") 11890 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm")
11650 (match_operand:SI 3 "const_0_to_255_operand" "n")] 11891 (match_operand:SI 3 "const_0_to_255_operand" "n")]
11651 UNSPEC_VPERMIL2F128))] 11892 UNSPEC_VPERMIL2F128))]
11652 "TARGET_AVX" 11893 "TARGET_AVX"
11653 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}" 11894 "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}"
11654 [(set_attr "type" "sselog") 11895 [(set_attr "type" "sselog")
11896 (set_attr "prefix_extra" "1")
11897 (set_attr "length_immediate" "1")
11655 (set_attr "prefix" "vex") 11898 (set_attr "prefix" "vex")
11656 (set_attr "mode" "V8SF")]) 11899 (set_attr "mode" "V8SF")])
11657 11900
11658 (define_insn "avx_vbroadcasts<avxmodesuffixf2c><avxmodesuffix>" 11901 (define_insn "*avx_vperm2f128<mode>_nozero"
11659 [(set (match_operand:AVXMODEF4P 0 "register_operand" "=x") 11902 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x")
11660 (vec_concat:AVXMODEF4P 11903 (vec_select:AVX256MODE2P
11661 (vec_concat:<avxhalfvecmode> 11904 (vec_concat:<ssedoublesizemode>
11662 (match_operand:<avxscalarmode> 1 "memory_operand" "m") 11905 (match_operand:AVX256MODE2P 1 "register_operand" "x")
11663 (match_dup 1)) 11906 (match_operand:AVX256MODE2P 2 "nonimmediate_operand" "xm"))
11664 (vec_concat:<avxhalfvecmode> 11907 (match_parallel 3 "avx_vperm2f128_<mode>_operand"
11665 (match_dup 1) 11908 [(match_operand 4 "const_int_operand" "")])))]
11666 (match_dup 1))))] 11909 "TARGET_AVX"
11667 "TARGET_AVX" 11910 {
11668 "vbroadcasts<avxmodesuffixf2c>\t{%1, %0|%0, %1}" 11911 int mask = avx_vperm2f128_parallel (operands[3], <MODE>mode) - 1;
11669 [(set_attr "type" "ssemov") 11912 operands[3] = GEN_INT (mask);
11670 (set_attr "prefix" "vex") 11913 return "vperm2f128\t{%3, %2, %1, %0|%0, %1, %2, %3}";
11671 (set_attr "mode" "<avxscalarmode>")]) 11914 }
11672 11915 [(set_attr "type" "sselog")
11673 (define_insn "avx_vbroadcastss256" 11916 (set_attr "prefix_extra" "1")
11674 [(set (match_operand:V8SF 0 "register_operand" "=x") 11917 (set_attr "length_immediate" "1")
11675 (vec_concat:V8SF 11918 (set_attr "prefix" "vex")
11676 (vec_concat:V4SF 11919 (set_attr "mode" "V8SF")])
11677 (vec_concat:V2SF
11678 (match_operand:SF 1 "memory_operand" "m")
11679 (match_dup 1))
11680 (vec_concat:V2SF
11681 (match_dup 1)
11682 (match_dup 1)))
11683 (vec_concat:V4SF
11684 (vec_concat:V2SF
11685 (match_dup 1)
11686 (match_dup 1))
11687 (vec_concat:V2SF
11688 (match_dup 1)
11689 (match_dup 1)))))]
11690 "TARGET_AVX"
11691 "vbroadcastss\t{%1, %0|%0, %1}"
11692 [(set_attr "type" "ssemov")
11693 (set_attr "prefix" "vex")
11694 (set_attr "mode" "SF")])
11695
11696 (define_insn "avx_vbroadcastf128_p<avxmodesuffixf2c>256"
11697 [(set (match_operand:AVX256MODEF2P 0 "register_operand" "=x")
11698 (vec_concat:AVX256MODEF2P
11699 (match_operand:<avxhalfvecmode> 1 "memory_operand" "m")
11700 (match_dup 1)))]
11701 "TARGET_AVX"
11702 "vbroadcastf128\t{%1, %0|%0, %1}"
11703 [(set_attr "type" "ssemov")
11704 (set_attr "prefix" "vex")
11705 (set_attr "mode" "V4SF")])
11706 11920
11707 (define_expand "avx_vinsertf128<mode>" 11921 (define_expand "avx_vinsertf128<mode>"
11708 [(match_operand:AVX256MODE 0 "register_operand" "") 11922 [(match_operand:AVX256MODE 0 "register_operand" "")
11709 (match_operand:AVX256MODE 1 "register_operand" "") 11923 (match_operand:AVX256MODE 1 "register_operand" "")
11710 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "") 11924 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "")
11735 (match_operand:AVX256MODE4P 1 "register_operand" "x") 11949 (match_operand:AVX256MODE4P 1 "register_operand" "x")
11736 (parallel [(const_int 2) (const_int 3)]))))] 11950 (parallel [(const_int 2) (const_int 3)]))))]
11737 "TARGET_AVX" 11951 "TARGET_AVX"
11738 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 11952 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11739 [(set_attr "type" "sselog") 11953 [(set_attr "type" "sselog")
11954 (set_attr "prefix_extra" "1")
11955 (set_attr "length_immediate" "1")
11740 (set_attr "prefix" "vex") 11956 (set_attr "prefix" "vex")
11741 (set_attr "mode" "V8SF")]) 11957 (set_attr "mode" "V8SF")])
11742 11958
11743 (define_insn "vec_set_hi_<mode>" 11959 (define_insn "vec_set_hi_<mode>"
11744 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x") 11960 [(set (match_operand:AVX256MODE4P 0 "register_operand" "=x")
11748 (parallel [(const_int 0) (const_int 1)])) 11964 (parallel [(const_int 0) (const_int 1)]))
11749 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))] 11965 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11750 "TARGET_AVX" 11966 "TARGET_AVX"
11751 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 11967 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11752 [(set_attr "type" "sselog") 11968 [(set_attr "type" "sselog")
11969 (set_attr "prefix_extra" "1")
11970 (set_attr "length_immediate" "1")
11753 (set_attr "prefix" "vex") 11971 (set_attr "prefix" "vex")
11754 (set_attr "mode" "V8SF")]) 11972 (set_attr "mode" "V8SF")])
11755 11973
11756 (define_insn "vec_set_lo_<mode>" 11974 (define_insn "vec_set_lo_<mode>"
11757 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") 11975 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11762 (parallel [(const_int 4) (const_int 5) 11980 (parallel [(const_int 4) (const_int 5)
11763 (const_int 6) (const_int 7)]))))] 11981 (const_int 6) (const_int 7)]))))]
11764 "TARGET_AVX" 11982 "TARGET_AVX"
11765 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 11983 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11766 [(set_attr "type" "sselog") 11984 [(set_attr "type" "sselog")
11985 (set_attr "prefix_extra" "1")
11986 (set_attr "length_immediate" "1")
11767 (set_attr "prefix" "vex") 11987 (set_attr "prefix" "vex")
11768 (set_attr "mode" "V8SF")]) 11988 (set_attr "mode" "V8SF")])
11769 11989
11770 (define_insn "vec_set_hi_<mode>" 11990 (define_insn "vec_set_hi_<mode>"
11771 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x") 11991 [(set (match_operand:AVX256MODE8P 0 "register_operand" "=x")
11776 (const_int 2) (const_int 3)])) 11996 (const_int 2) (const_int 3)]))
11777 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))] 11997 (match_operand:<avxhalfvecmode> 2 "nonimmediate_operand" "xm")))]
11778 "TARGET_AVX" 11998 "TARGET_AVX"
11779 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 11999 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11780 [(set_attr "type" "sselog") 12000 [(set_attr "type" "sselog")
12001 (set_attr "prefix_extra" "1")
12002 (set_attr "length_immediate" "1")
11781 (set_attr "prefix" "vex") 12003 (set_attr "prefix" "vex")
11782 (set_attr "mode" "V8SF")]) 12004 (set_attr "mode" "V8SF")])
11783 12005
11784 (define_insn "vec_set_lo_v16hi" 12006 (define_insn "vec_set_lo_v16hi"
11785 [(set (match_operand:V16HI 0 "register_operand" "=x") 12007 [(set (match_operand:V16HI 0 "register_operand" "=x")
11792 (const_int 12) (const_int 13) 12014 (const_int 12) (const_int 13)
11793 (const_int 14) (const_int 15)]))))] 12015 (const_int 14) (const_int 15)]))))]
11794 "TARGET_AVX" 12016 "TARGET_AVX"
11795 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12017 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11796 [(set_attr "type" "sselog") 12018 [(set_attr "type" "sselog")
12019 (set_attr "prefix_extra" "1")
12020 (set_attr "length_immediate" "1")
11797 (set_attr "prefix" "vex") 12021 (set_attr "prefix" "vex")
11798 (set_attr "mode" "V8SF")]) 12022 (set_attr "mode" "V8SF")])
11799 12023
11800 (define_insn "vec_set_hi_v16hi" 12024 (define_insn "vec_set_hi_v16hi"
11801 [(set (match_operand:V16HI 0 "register_operand" "=x") 12025 [(set (match_operand:V16HI 0 "register_operand" "=x")
11808 (const_int 6) (const_int 7)])) 12032 (const_int 6) (const_int 7)]))
11809 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))] 12033 (match_operand:V8HI 2 "nonimmediate_operand" "xm")))]
11810 "TARGET_AVX" 12034 "TARGET_AVX"
11811 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12035 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11812 [(set_attr "type" "sselog") 12036 [(set_attr "type" "sselog")
12037 (set_attr "prefix_extra" "1")
12038 (set_attr "length_immediate" "1")
11813 (set_attr "prefix" "vex") 12039 (set_attr "prefix" "vex")
11814 (set_attr "mode" "V8SF")]) 12040 (set_attr "mode" "V8SF")])
11815 12041
11816 (define_insn "vec_set_lo_v32qi" 12042 (define_insn "vec_set_lo_v32qi"
11817 [(set (match_operand:V32QI 0 "register_operand" "=x") 12043 [(set (match_operand:V32QI 0 "register_operand" "=x")
11828 (const_int 28) (const_int 29) 12054 (const_int 28) (const_int 29)
11829 (const_int 30) (const_int 31)]))))] 12055 (const_int 30) (const_int 31)]))))]
11830 "TARGET_AVX" 12056 "TARGET_AVX"
11831 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}" 12057 "vinsertf128\t{$0x0, %2, %1, %0|%0, %1, %2, 0x0}"
11832 [(set_attr "type" "sselog") 12058 [(set_attr "type" "sselog")
12059 (set_attr "prefix_extra" "1")
12060 (set_attr "length_immediate" "1")
11833 (set_attr "prefix" "vex") 12061 (set_attr "prefix" "vex")
11834 (set_attr "mode" "V8SF")]) 12062 (set_attr "mode" "V8SF")])
11835 12063
11836 (define_insn "vec_set_hi_v32qi" 12064 (define_insn "vec_set_hi_v32qi"
11837 [(set (match_operand:V32QI 0 "register_operand" "=x") 12065 [(set (match_operand:V32QI 0 "register_operand" "=x")
11848 (const_int 14) (const_int 15)])) 12076 (const_int 14) (const_int 15)]))
11849 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))] 12077 (match_operand:V16QI 2 "nonimmediate_operand" "xm")))]
11850 "TARGET_AVX" 12078 "TARGET_AVX"
11851 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}" 12079 "vinsertf128\t{$0x1, %2, %1, %0|%0, %1, %2, 0x1}"
11852 [(set_attr "type" "sselog") 12080 [(set_attr "type" "sselog")
12081 (set_attr "prefix_extra" "1")
12082 (set_attr "length_immediate" "1")
11853 (set_attr "prefix" "vex") 12083 (set_attr "prefix" "vex")
11854 (set_attr "mode" "V8SF")]) 12084 (set_attr "mode" "V8SF")])
11855 12085
11856 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>" 12086 (define_insn "avx_maskloadp<avxmodesuffixf2c><avxmodesuffix>"
11857 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x") 12087 [(set (match_operand:AVXMODEF2P 0 "register_operand" "=x")
11861 (match_dup 0)] 12091 (match_dup 0)]
11862 UNSPEC_MASKLOAD))] 12092 UNSPEC_MASKLOAD))]
11863 "TARGET_AVX" 12093 "TARGET_AVX"
11864 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}" 12094 "vmaskmovp<avxmodesuffixf2c>\t{%1, %2, %0|%0, %2, %1}"
11865 [(set_attr "type" "sselog1") 12095 [(set_attr "type" "sselog1")
12096 (set_attr "prefix_extra" "1")
11866 (set_attr "prefix" "vex") 12097 (set_attr "prefix" "vex")
11867 (set_attr "mode" "<MODE>")]) 12098 (set_attr "mode" "<MODE>")])
11868 12099
11869 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>" 12100 (define_insn "avx_maskstorep<avxmodesuffixf2c><avxmodesuffix>"
11870 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m") 12101 [(set (match_operand:AVXMODEF2P 0 "memory_operand" "=m")
11874 (match_dup 0)] 12105 (match_dup 0)]
11875 UNSPEC_MASKSTORE))] 12106 UNSPEC_MASKSTORE))]
11876 "TARGET_AVX" 12107 "TARGET_AVX"
11877 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}" 12108 "vmaskmovp<avxmodesuffixf2c>\t{%2, %1, %0|%0, %1, %2}"
11878 [(set_attr "type" "sselog1") 12109 [(set_attr "type" "sselog1")
12110 (set_attr "prefix_extra" "1")
11879 (set_attr "prefix" "vex") 12111 (set_attr "prefix" "vex")
11880 (set_attr "mode" "<MODE>")]) 12112 (set_attr "mode" "<MODE>")])
11881 12113
11882 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>" 12114 (define_insn "avx_<avxmodesuffixp><avxmodesuffix>_<avxmodesuffixp>"
11883 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x") 12115 [(set (match_operand:AVX256MODE2P 0 "register_operand" "=x,x")
11984 default: 12216 default:
11985 gcc_unreachable (); 12217 gcc_unreachable ();
11986 } 12218 }
11987 } 12219 }
11988 [(set_attr "type" "sselog,ssemov") 12220 [(set_attr "type" "sselog,ssemov")
12221 (set_attr "prefix_extra" "1,*")
12222 (set_attr "length_immediate" "1,*")
11989 (set_attr "prefix" "vex") 12223 (set_attr "prefix" "vex")
11990 (set_attr "mode" "<avxvecmode>")]) 12224 (set_attr "mode" "<avxvecmode>")])