comparison gcc/config/aarch64/aarch64-simd.md @ 131:84e7813d76e9

gcc-8.2
author mir3636
date Thu, 25 Oct 2018 07:37:49 +0900
parents 04ced10e8804
children 1830386684a0
comparison
equal deleted inserted replaced
111:04ced10e8804 131:84e7813d76e9
1 ;; Machine description for AArch64 AdvSIMD architecture. 1 ;; Machine description for AArch64 AdvSIMD architecture.
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc. 2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd. 3 ;; Contributed by ARM Ltd.
4 ;; 4 ;;
5 ;; This file is part of GCC. 5 ;; This file is part of GCC.
6 ;; 6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify it 7 ;; GCC is free software; you can redistribute it and/or modify it
29 a stp in DI mode, so we check the validity of that. 29 a stp in DI mode, so we check the validity of that.
30 If the mode is 8 bytes wide, then we will do doing a 30 If the mode is 8 bytes wide, then we will do doing a
31 normal str, so the check need not apply. */ 31 normal str, so the check need not apply. */
32 if (GET_CODE (operands[0]) == MEM 32 if (GET_CODE (operands[0]) == MEM
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) 33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode)
34 && ((GET_MODE_SIZE (<MODE>mode) == 16 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16)
35 && aarch64_mem_pair_operand (operands[0], DImode)) 35 && aarch64_mem_pair_operand (operands[0], DImode))
36 || GET_MODE_SIZE (<MODE>mode) == 8))) 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8))))
37 operands[1] = force_reg (<MODE>mode, operands[1]); 37 operands[1] = force_reg (<MODE>mode, operands[1]);
38 " 38 "
39 ) 39 )
40 40
41 (define_expand "movmisalign<mode>" 41 (define_expand "movmisalign<mode>"
78 (match_operand:VALL_F16 1 "register_operand" "w") 78 (match_operand:VALL_F16 1 "register_operand" "w")
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 79 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
80 )))] 80 )))]
81 "TARGET_SIMD" 81 "TARGET_SIMD"
82 { 82 {
83 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
85 } 85 }
86 [(set_attr "type" "neon_dup<q>")] 86 [(set_attr "type" "neon_dup<q>")]
87 ) 87 )
88 88
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") 93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w")
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) 94 (parallel [(match_operand:SI 2 "immediate_operand" "i")])
95 )))] 95 )))]
96 "TARGET_SIMD" 96 "TARGET_SIMD"
97 { 97 {
98 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
99 INTVAL (operands[2])));
100 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; 99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]";
101 } 100 }
102 [(set_attr "type" "neon_dup<q>")] 101 [(set_attr "type" "neon_dup<q>")]
103 ) 102 )
104 103
105 (define_insn "*aarch64_simd_mov<mode>" 104 (define_insn "*aarch64_simd_mov<VD:mode>"
106 [(set (match_operand:VD 0 "nonimmediate_operand" 105 [(set (match_operand:VD 0 "nonimmediate_operand"
107 "=w, m, m, w, ?r, ?w, ?r, w") 106 "=w, m, m, w, ?r, ?w, ?r, w")
108 (match_operand:VD 1 "general_operand" 107 (match_operand:VD 1 "general_operand"
109 "m, Dz, w, w, w, r, r, Dn"))] 108 "m, Dz, w, w, w, r, r, Dn"))]
110 "TARGET_SIMD 109 "TARGET_SIMD
119 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; 118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>";
120 case 4: return "umov\t%0, %1.d[0]"; 119 case 4: return "umov\t%0, %1.d[0]";
121 case 5: return "fmov\t%d0, %1"; 120 case 5: return "fmov\t%d0, %1";
122 case 6: return "mov\t%0, %1"; 121 case 6: return "mov\t%0, %1";
123 case 7: 122 case 7:
124 return aarch64_output_simd_mov_immediate (operands[1], 123 return aarch64_output_simd_mov_immediate (operands[1], 64);
125 <MODE>mode, 64);
126 default: gcc_unreachable (); 124 default: gcc_unreachable ();
127 } 125 }
128 } 126 }
129 [(set_attr "type" "neon_load1_1reg<q>, neon_stp, neon_store1_1reg<q>,\ 127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
130 neon_logic<q>, neon_to_gp<q>, f_mcr,\ 128 neon_logic<q>, neon_to_gp<q>, f_mcr,\
131 mov_reg, neon_move<q>")] 129 mov_reg, neon_move<q>")]
132 ) 130 )
133 131
134 (define_insn "*aarch64_simd_mov<mode>" 132 (define_insn "*aarch64_simd_mov<VQ:mode>"
135 [(set (match_operand:VQ 0 "nonimmediate_operand" 133 [(set (match_operand:VQ 0 "nonimmediate_operand"
136 "=w, Umq, m, w, ?r, ?w, ?r, w") 134 "=w, Umn, m, w, ?r, ?w, ?r, w")
137 (match_operand:VQ 1 "general_operand" 135 (match_operand:VQ 1 "general_operand"
138 "m, Dz, w, w, w, r, r, Dn"))] 136 "m, Dz, w, w, w, r, r, Dn"))]
139 "TARGET_SIMD 137 "TARGET_SIMD
140 && (register_operand (operands[0], <MODE>mode) 138 && (register_operand (operands[0], <MODE>mode)
141 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" 139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
153 case 4: 151 case 4:
154 case 5: 152 case 5:
155 case 6: 153 case 6:
156 return "#"; 154 return "#";
157 case 7: 155 case 7:
158 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128); 156 return aarch64_output_simd_mov_immediate (operands[1], 128);
159 default: 157 default:
160 gcc_unreachable (); 158 gcc_unreachable ();
161 } 159 }
162 } 160 }
163 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ 161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\
164 neon_stp, neon_logic<q>, multiple, multiple,\ 162 neon_logic<q>, multiple, multiple,\
165 multiple, neon_move<q>") 163 multiple, neon_move<q>")
166 (set_attr "length" "4,4,4,4,8,8,8,4")] 164 (set_attr "length" "4,4,4,4,8,8,8,4")]
167 ) 165 )
168 166
169 ;; When storing lane zero we can use the normal STR and its more permissive 167 ;; When storing lane zero we can use the normal STR and its more permissive
172 (define_insn "aarch64_store_lane0<mode>" 170 (define_insn "aarch64_store_lane0<mode>"
173 [(set (match_operand:<VEL> 0 "memory_operand" "=m") 171 [(set (match_operand:<VEL> 0 "memory_operand" "=m")
174 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") 172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w")
175 (parallel [(match_operand 2 "const_int_operand" "n")])))] 173 (parallel [(match_operand 2 "const_int_operand" "n")])))]
176 "TARGET_SIMD 174 "TARGET_SIMD
177 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0" 175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0"
178 "str\\t%<Vetype>1, %0" 176 "str\\t%<Vetype>1, %0"
179 [(set_attr "type" "neon_store1_1reg<q>")] 177 [(set_attr "type" "neon_store1_1reg<q>")]
180 ) 178 )
181 179
182 (define_insn "load_pair<mode>" 180 (define_insn "load_pair<DREG:mode><DREG2:mode>"
183 [(set (match_operand:VD 0 "register_operand" "=w") 181 [(set (match_operand:DREG 0 "register_operand" "=w")
184 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) 182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
185 (set (match_operand:VD 2 "register_operand" "=w") 183 (set (match_operand:DREG2 2 "register_operand" "=w")
186 (match_operand:VD 3 "memory_operand" "m"))] 184 (match_operand:DREG2 3 "memory_operand" "m"))]
187 "TARGET_SIMD 185 "TARGET_SIMD
188 && rtx_equal_p (XEXP (operands[3], 0), 186 && rtx_equal_p (XEXP (operands[3], 0),
189 plus_constant (Pmode, 187 plus_constant (Pmode,
190 XEXP (operands[1], 0), 188 XEXP (operands[1], 0),
191 GET_MODE_SIZE (<MODE>mode)))" 189 GET_MODE_SIZE (<DREG:MODE>mode)))"
192 "ldp\\t%d0, %d2, %1" 190 "ldp\\t%d0, %d2, %1"
193 [(set_attr "type" "neon_ldp")] 191 [(set_attr "type" "neon_ldp")]
194 ) 192 )
195 193
196 (define_insn "store_pair<mode>" 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
197 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump") 195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
198 (match_operand:VD 1 "register_operand" "w")) 196 (match_operand:DREG 1 "register_operand" "w"))
199 (set (match_operand:VD 2 "memory_operand" "=m") 197 (set (match_operand:DREG2 2 "memory_operand" "=m")
200 (match_operand:VD 3 "register_operand" "w"))] 198 (match_operand:DREG2 3 "register_operand" "w"))]
201 "TARGET_SIMD 199 "TARGET_SIMD
202 && rtx_equal_p (XEXP (operands[2], 0), 200 && rtx_equal_p (XEXP (operands[2], 0),
203 plus_constant (Pmode, 201 plus_constant (Pmode,
204 XEXP (operands[0], 0), 202 XEXP (operands[0], 0),
205 GET_MODE_SIZE (<MODE>mode)))" 203 GET_MODE_SIZE (<DREG:MODE>mode)))"
206 "stp\\t%d1, %d3, %0" 204 "stp\\t%d1, %d3, %0"
207 [(set_attr "type" "neon_stp")] 205 [(set_attr "type" "neon_stp")]
208 ) 206 )
207
208 (define_insn "load_pair<VQ:mode><VQ2:mode>"
209 [(set (match_operand:VQ 0 "register_operand" "=w")
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump"))
211 (set (match_operand:VQ2 2 "register_operand" "=w")
212 (match_operand:VQ2 3 "memory_operand" "m"))]
213 "TARGET_SIMD
214 && rtx_equal_p (XEXP (operands[3], 0),
215 plus_constant (Pmode,
216 XEXP (operands[1], 0),
217 GET_MODE_SIZE (<VQ:MODE>mode)))"
218 "ldp\\t%q0, %q2, %1"
219 [(set_attr "type" "neon_ldp_q")]
220 )
221
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>"
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump")
224 (match_operand:VQ 1 "register_operand" "w"))
225 (set (match_operand:VQ2 2 "memory_operand" "=m")
226 (match_operand:VQ2 3 "register_operand" "w"))]
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0),
228 plus_constant (Pmode,
229 XEXP (operands[0], 0),
230 GET_MODE_SIZE (<VQ:MODE>mode)))"
231 "stp\\t%q1, %q3, %0"
232 [(set_attr "type" "neon_stp_q")]
233 )
234
209 235
210 (define_split 236 (define_split
211 [(set (match_operand:VQ 0 "register_operand" "") 237 [(set (match_operand:VQ 0 "register_operand" "")
212 (match_operand:VQ 1 "register_operand" ""))] 238 (match_operand:VQ 1 "register_operand" ""))]
213 "TARGET_SIMD && reload_completed 239 "TARGET_SIMD && reload_completed
229 { 255 {
230 aarch64_split_simd_move (operands[0], operands[1]); 256 aarch64_split_simd_move (operands[0], operands[1]);
231 DONE; 257 DONE;
232 }) 258 })
233 259
234 (define_expand "aarch64_split_simd_mov<mode>" 260 (define_expand "@aarch64_split_simd_mov<mode>"
235 [(set (match_operand:VQ 0) 261 [(set (match_operand:VQ 0)
236 (match_operand:VQ 1))] 262 (match_operand:VQ 1))]
237 "TARGET_SIMD" 263 "TARGET_SIMD"
238 { 264 {
239 rtx dst = operands[0]; 265 rtx dst = operands[0];
252 278
253 else 279 else
254 { 280 {
255 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); 281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst);
256 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); 282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst);
257 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
258 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
259 285
260 emit_insn 286 emit_insn
261 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); 287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo));
262 emit_insn 288 emit_insn
263 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); 289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi));
449 (match_operand:V8QI 3 "register_operand" "<h_con>") 475 (match_operand:V8QI 3 "register_operand" "<h_con>")
450 (match_operand:SI 4 "immediate_operand" "i")] 476 (match_operand:SI 4 "immediate_operand" "i")]
451 DOTPROD)))] 477 DOTPROD)))]
452 "TARGET_DOTPROD" 478 "TARGET_DOTPROD"
453 { 479 {
454 operands[4] 480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4]));
455 = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
456 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
457 } 482 }
458 [(set_attr "type" "neon_dot")] 483 [(set_attr "type" "neon_dot")]
459 ) 484 )
460 485
465 (match_operand:V16QI 3 "register_operand" "<h_con>") 490 (match_operand:V16QI 3 "register_operand" "<h_con>")
466 (match_operand:SI 4 "immediate_operand" "i")] 491 (match_operand:SI 4 "immediate_operand" "i")]
467 DOTPROD)))] 492 DOTPROD)))]
468 "TARGET_DOTPROD" 493 "TARGET_DOTPROD"
469 { 494 {
470 operands[4] 495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4]));
471 = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4])));
472 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; 496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]";
473 } 497 }
474 [(set_attr "type" "neon_dot")] 498 [(set_attr "type" "neon_dot")]
475 ) 499 )
476 500
500 (match_operand:VMUL 1 "register_operand" "<h_con>") 524 (match_operand:VMUL 1 "register_operand" "<h_con>")
501 (parallel [(match_operand:SI 2 "immediate_operand")]))) 525 (parallel [(match_operand:SI 2 "immediate_operand")])))
502 (match_operand:VMUL 3 "register_operand" "w")))] 526 (match_operand:VMUL 3 "register_operand" "w")))]
503 "TARGET_SIMD" 527 "TARGET_SIMD"
504 { 528 {
505 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
506 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
507 } 531 }
508 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
509 ) 533 )
510 534
516 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
517 (parallel [(match_operand:SI 2 "immediate_operand")]))) 541 (parallel [(match_operand:SI 2 "immediate_operand")])))
518 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] 542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))]
519 "TARGET_SIMD" 543 "TARGET_SIMD"
520 { 544 {
521 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
522 INTVAL (operands[2])));
523 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; 546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]";
524 } 547 }
525 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] 548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")]
526 ) 549 )
527 550
534 "TARGET_SIMD" 557 "TARGET_SIMD"
535 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; 558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]";
536 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] 559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")]
537 ) 560 )
538 561
539 (define_insn "aarch64_rsqrte<mode>" 562 (define_insn "@aarch64_rsqrte<mode>"
540 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
541 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] 564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
542 UNSPEC_RSQRTE))] 565 UNSPEC_RSQRTE))]
543 "TARGET_SIMD" 566 "TARGET_SIMD"
544 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" 567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>"
545 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) 568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")])
546 569
547 (define_insn "aarch64_rsqrts<mode>" 570 (define_insn "@aarch64_rsqrts<mode>"
548 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
549 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
550 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
551 UNSPEC_RSQRTS))] 574 UNSPEC_RSQRTS))]
552 "TARGET_SIMD" 575 "TARGET_SIMD"
570 (match_operand:V2DF 1 "register_operand" "w") 593 (match_operand:V2DF 1 "register_operand" "w")
571 (parallel [(match_operand:SI 2 "immediate_operand")])) 594 (parallel [(match_operand:SI 2 "immediate_operand")]))
572 (match_operand:DF 3 "register_operand" "w")))] 595 (match_operand:DF 3 "register_operand" "w")))]
573 "TARGET_SIMD" 596 "TARGET_SIMD"
574 { 597 {
575 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); 598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
576 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; 599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]";
577 } 600 }
578 [(set_attr "type" "neon_fp_mul_d_scalar_q")] 601 [(set_attr "type" "neon_fp_mul_d_scalar_q")]
579 ) 602 )
580 603
615 "TARGET_SIMD" 638 "TARGET_SIMD"
616 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
617 [(set_attr "type" "neon_abd<q>")] 640 [(set_attr "type" "neon_abd<q>")]
618 ) 641 )
619 642
643 (define_insn "aarch64_<sur>abdl2<mode>_3"
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
646 (match_operand:VDQV_S 2 "register_operand" "w")]
647 ABDL2))]
648 "TARGET_SIMD"
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
650 [(set_attr "type" "neon_abd<q>")]
651 )
652
653 (define_insn "aarch64_<sur>abal<mode>_4"
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
656 (match_operand:VDQV_S 2 "register_operand" "w")
657 (match_operand:<VDBLW> 3 "register_operand" "0")]
658 ABAL))]
659 "TARGET_SIMD"
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>"
661 [(set_attr "type" "neon_arith_acc<q>")]
662 )
663
664 (define_insn "aarch64_<sur>adalp<mode>_3"
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w")
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w")
667 (match_operand:<VDBLW> 2 "register_operand" "0")]
668 ADALP))]
669 "TARGET_SIMD"
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>"
671 [(set_attr "type" "neon_reduc_add<q>")]
672 )
673
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening
676 ;; reduction of the difference into a V4SI vector and accumulate that into
677 ;; operand 3 before copying that into the result operand 0.
678 ;; Perform that with a sequence of:
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b
680 ;; UABAL tmp.8h, op1.16b, op2.16b
681 ;; UADALP op3.4s, tmp.8h
682 ;; MOV op0, op3 // should be eliminated in later passes.
683 ;; The signed version just uses the signed variants of the above instructions.
684
685 (define_expand "<sur>sadv16qi"
686 [(use (match_operand:V4SI 0 "register_operand"))
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL)
689 (use (match_operand:V4SI 3 "register_operand"))]
690 "TARGET_SIMD"
691 {
692 rtx reduc = gen_reg_rtx (V8HImode);
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1],
694 operands[2]));
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1],
696 operands[2], reduc));
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc,
698 operands[3]));
699 emit_move_insn (operands[0], operands[3]);
700 DONE;
701 }
702 )
703
620 (define_insn "aba<mode>_3" 704 (define_insn "aba<mode>_3"
621 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
622 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI 706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI
623 (match_operand:VDQ_BHSI 1 "register_operand" "w") 707 (match_operand:VDQ_BHSI 1 "register_operand" "w")
624 (match_operand:VDQ_BHSI 2 "register_operand" "w"))) 708 (match_operand:VDQ_BHSI 2 "register_operand" "w")))
649 switch (which_alternative) 733 switch (which_alternative)
650 { 734 {
651 case 0: 735 case 0:
652 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
653 case 1: 737 case 1:
654 return aarch64_output_simd_mov_immediate (operands[2], 738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
655 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC); 739 AARCH64_CHECK_BIC);
656 default: 740 default:
657 gcc_unreachable (); 741 gcc_unreachable ();
658 } 742 }
659 } 743 }
660 [(set_attr "type" "neon_logic<q>")] 744 [(set_attr "type" "neon_logic<q>")]
670 switch (which_alternative) 754 switch (which_alternative)
671 { 755 {
672 case 0: 756 case 0:
673 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; 757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>";
674 case 1: 758 case 1:
675 return aarch64_output_simd_mov_immediate (operands[2], 759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>,
676 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR); 760 AARCH64_CHECK_ORR);
677 default: 761 default:
678 gcc_unreachable (); 762 gcc_unreachable ();
679 } 763 }
680 } 764 }
681 [(set_attr "type" "neon_logic<q>")] 765 [(set_attr "type" "neon_logic<q>")]
697 "not\t%0.<Vbtype>, %1.<Vbtype>" 781 "not\t%0.<Vbtype>, %1.<Vbtype>"
698 [(set_attr "type" "neon_logic<q>")] 782 [(set_attr "type" "neon_logic<q>")]
699 ) 783 )
700 784
701 (define_insn "aarch64_simd_vec_set<mode>" 785 (define_insn "aarch64_simd_vec_set<mode>"
702 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w") 786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
703 (vec_merge:VDQ_BHSI 787 (vec_merge:VALL_F16
704 (vec_duplicate:VDQ_BHSI 788 (vec_duplicate:VALL_F16
705 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv")) 789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv"))
706 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0") 790 (match_operand:VALL_F16 3 "register_operand" "0,0,0")
707 (match_operand:SI 2 "immediate_operand" "i,i,i")))] 791 (match_operand:SI 2 "immediate_operand" "i,i,i")))]
708 "TARGET_SIMD" 792 "TARGET_SIMD"
709 { 793 {
710 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); 794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
711 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); 795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
712 switch (which_alternative) 796 switch (which_alternative)
713 { 797 {
714 case 0: 798 case 0:
715 return "ins\\t%0.<Vetype>[%p2], %w1"; 799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
716 case 1: 800 case 1:
717 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; 801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1";
718 case 2: 802 case 2:
719 return "ld1\\t{%0.<Vetype>}[%p2], %1"; 803 return "ld1\\t{%0.<Vetype>}[%p2], %1";
720 default: 804 default:
721 gcc_unreachable (); 805 gcc_unreachable ();
722 } 806 }
723 } 807 }
724 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")] 808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")]
725 ) 809 )
726 810
727 (define_insn "*aarch64_simd_vec_copy_lane<mode>" 811 (define_insn "*aarch64_simd_vec_copy_lane<mode>"
728 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 812 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
729 (vec_merge:VALL_F16 813 (vec_merge:VALL_F16
734 [(match_operand:SI 4 "immediate_operand" "i")]))) 818 [(match_operand:SI 4 "immediate_operand" "i")])))
735 (match_operand:VALL_F16 1 "register_operand" "0") 819 (match_operand:VALL_F16 1 "register_operand" "0")
736 (match_operand:SI 2 "immediate_operand" "i")))] 820 (match_operand:SI 2 "immediate_operand" "i")))]
737 "TARGET_SIMD" 821 "TARGET_SIMD"
738 { 822 {
739 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); 823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
740 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
741 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4]))); 825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4]));
742 826
743 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
744 } 828 }
745 [(set_attr "type" "neon_ins<q>")] 829 [(set_attr "type" "neon_ins<q>")]
746 ) 830 )
755 [(match_operand:SI 4 "immediate_operand" "i")]))) 839 [(match_operand:SI 4 "immediate_operand" "i")])))
756 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") 840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0")
757 (match_operand:SI 2 "immediate_operand" "i")))] 841 (match_operand:SI 2 "immediate_operand" "i")))]
758 "TARGET_SIMD" 842 "TARGET_SIMD"
759 { 843 {
760 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); 844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
761 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); 845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt);
762 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode,
763 INTVAL (operands[4]))); 847 INTVAL (operands[4]));
764 848
765 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; 849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]";
766 } 850 }
767 [(set_attr "type" "neon_ins<q>")] 851 [(set_attr "type" "neon_ins<q>")]
768 ) 852 )
1033 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); 1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2]));
1034 DONE; 1118 DONE;
1035 } 1119 }
1036 ) 1120 )
1037 1121
1038 (define_expand "vec_set<mode>"
1039 [(match_operand:VDQ_BHSI 0 "register_operand")
1040 (match_operand:<VEL> 1 "register_operand")
1041 (match_operand:SI 2 "immediate_operand")]
1042 "TARGET_SIMD"
1043 {
1044 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1045 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1],
1046 GEN_INT (elem), operands[0]));
1047 DONE;
1048 }
1049 )
1050
1051 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. 1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero.
1052 (define_insn "vec_shr_<mode>" 1123 (define_insn "vec_shr_<mode>"
1053 [(set (match_operand:VD 0 "register_operand" "=w") 1124 [(set (match_operand:VD 0 "register_operand" "=w")
1054 (unspec:VD [(match_operand:VD 1 "register_operand" "w") 1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w")
1055 (match_operand:SI 2 "immediate_operand" "i")] 1126 (match_operand:SI 2 "immediate_operand" "i")]
1062 return "ushr %d0, %d1, %2"; 1133 return "ushr %d0, %d1, %2";
1063 } 1134 }
1064 [(set_attr "type" "neon_shift_imm")] 1135 [(set_attr "type" "neon_shift_imm")]
1065 ) 1136 )
1066 1137
1067 (define_insn "aarch64_simd_vec_setv2di"
1068 [(set (match_operand:V2DI 0 "register_operand" "=w,w")
1069 (vec_merge:V2DI
1070 (vec_duplicate:V2DI
1071 (match_operand:DI 1 "register_operand" "r,w"))
1072 (match_operand:V2DI 3 "register_operand" "0,0")
1073 (match_operand:SI 2 "immediate_operand" "i,i")))]
1074 "TARGET_SIMD"
1075 {
1076 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2])));
1077 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
1078 switch (which_alternative)
1079 {
1080 case 0:
1081 return "ins\\t%0.d[%p2], %1";
1082 case 1:
1083 return "ins\\t%0.d[%p2], %1.d[0]";
1084 default:
1085 gcc_unreachable ();
1086 }
1087 }
1088 [(set_attr "type" "neon_from_gp, neon_ins_q")]
1089 )
1090
1091 (define_expand "vec_setv2di"
1092 [(match_operand:V2DI 0 "register_operand")
1093 (match_operand:DI 1 "register_operand")
1094 (match_operand:SI 2 "immediate_operand")]
1095 "TARGET_SIMD"
1096 {
1097 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1098 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1],
1099 GEN_INT (elem), operands[0]));
1100 DONE;
1101 }
1102 )
1103
1104 (define_insn "aarch64_simd_vec_set<mode>"
1105 [(set (match_operand:VDQF_F16 0 "register_operand" "=w")
1106 (vec_merge:VDQF_F16
1107 (vec_duplicate:VDQF_F16
1108 (match_operand:<VEL> 1 "register_operand" "w"))
1109 (match_operand:VDQF_F16 3 "register_operand" "0")
1110 (match_operand:SI 2 "immediate_operand" "i")))]
1111 "TARGET_SIMD"
1112 {
1113 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2])));
1114
1115 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt);
1116 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]";
1117 }
1118 [(set_attr "type" "neon_ins<q>")]
1119 )
1120
1121 (define_expand "vec_set<mode>" 1138 (define_expand "vec_set<mode>"
1122 [(match_operand:VDQF_F16 0 "register_operand" "+w") 1139 [(match_operand:VALL_F16 0 "register_operand" "+w")
1123 (match_operand:<VEL> 1 "register_operand" "w") 1140 (match_operand:<VEL> 1 "register_operand" "w")
1124 (match_operand:SI 2 "immediate_operand" "")] 1141 (match_operand:SI 2 "immediate_operand" "")]
1125 "TARGET_SIMD" 1142 "TARGET_SIMD"
1126 { 1143 {
1127 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); 1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]);
1153 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1170 (parallel [(match_operand:SI 2 "immediate_operand")])))
1154 (match_operand:VDQHS 3 "register_operand" "w")) 1171 (match_operand:VDQHS 3 "register_operand" "w"))
1155 (match_operand:VDQHS 4 "register_operand" "0")))] 1172 (match_operand:VDQHS 4 "register_operand" "0")))]
1156 "TARGET_SIMD" 1173 "TARGET_SIMD"
1157 { 1174 {
1158 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1159 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1160 } 1177 }
1161 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1162 ) 1179 )
1163 1180
1171 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1188 (parallel [(match_operand:SI 2 "immediate_operand")])))
1172 (match_operand:VDQHS 3 "register_operand" "w")) 1189 (match_operand:VDQHS 3 "register_operand" "w"))
1173 (match_operand:VDQHS 4 "register_operand" "0")))] 1190 (match_operand:VDQHS 4 "register_operand" "0")))]
1174 "TARGET_SIMD" 1191 "TARGET_SIMD"
1175 { 1192 {
1176 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1177 INTVAL (operands[2])));
1178 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1179 } 1195 }
1180 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1181 ) 1197 )
1182 1198
1212 (match_operand:VDQHS 1 "register_operand" "<h_con>") 1228 (match_operand:VDQHS 1 "register_operand" "<h_con>")
1213 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1229 (parallel [(match_operand:SI 2 "immediate_operand")])))
1214 (match_operand:VDQHS 3 "register_operand" "w"))))] 1230 (match_operand:VDQHS 3 "register_operand" "w"))))]
1215 "TARGET_SIMD" 1231 "TARGET_SIMD"
1216 { 1232 {
1217 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1218 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1219 } 1235 }
1220 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1221 ) 1237 )
1222 1238
1230 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1231 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1247 (parallel [(match_operand:SI 2 "immediate_operand")])))
1232 (match_operand:VDQHS 3 "register_operand" "w"))))] 1248 (match_operand:VDQHS 3 "register_operand" "w"))))]
1233 "TARGET_SIMD" 1249 "TARGET_SIMD"
1234 { 1250 {
1235 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1236 INTVAL (operands[2])));
1237 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1238 } 1253 }
1239 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] 1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")]
1240 ) 1255 )
1241 1256
1335 "@ 1350 "@
1336 dup\\t%d0, %1.d[0] 1351 dup\\t%d0, %1.d[0]
1337 fmov\\t%d0, %1 1352 fmov\\t%d0, %1
1338 dup\\t%d0, %1" 1353 dup\\t%d0, %1"
1339 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1340 (set_attr "simd" "yes,*,yes") 1355 (set_attr "length" "4")
1341 (set_attr "fp" "*,yes,*") 1356 (set_attr "arch" "simd,fp,simd")]
1342 (set_attr "length" "4")]
1343 ) 1357 )
1344 1358
1345 (define_insn "move_lo_quad_internal_<mode>" 1359 (define_insn "move_lo_quad_internal_<mode>"
1346 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1360 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1347 (vec_concat:VQ_2E 1361 (vec_concat:VQ_2E
1351 "@ 1365 "@
1352 dup\\t%d0, %1.d[0] 1366 dup\\t%d0, %1.d[0]
1353 fmov\\t%d0, %1 1367 fmov\\t%d0, %1
1354 dup\\t%d0, %1" 1368 dup\\t%d0, %1"
1355 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1369 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1356 (set_attr "simd" "yes,*,yes") 1370 (set_attr "length" "4")
1357 (set_attr "fp" "*,yes,*") 1371 (set_attr "arch" "simd,fp,simd")]
1358 (set_attr "length" "4")]
1359 ) 1372 )
1360 1373
1361 (define_insn "move_lo_quad_internal_be_<mode>" 1374 (define_insn "move_lo_quad_internal_be_<mode>"
1362 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") 1375 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w")
1363 (vec_concat:VQ_NO2E 1376 (vec_concat:VQ_NO2E
1367 "@ 1380 "@
1368 dup\\t%d0, %1.d[0] 1381 dup\\t%d0, %1.d[0]
1369 fmov\\t%d0, %1 1382 fmov\\t%d0, %1
1370 dup\\t%d0, %1" 1383 dup\\t%d0, %1"
1371 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1384 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1372 (set_attr "simd" "yes,*,yes") 1385 (set_attr "length" "4")
1373 (set_attr "fp" "*,yes,*") 1386 (set_attr "arch" "simd,fp,simd")]
1374 (set_attr "length" "4")]
1375 ) 1387 )
1376 1388
1377 (define_insn "move_lo_quad_internal_be_<mode>" 1389 (define_insn "move_lo_quad_internal_be_<mode>"
1378 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") 1390 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w")
1379 (vec_concat:VQ_2E 1391 (vec_concat:VQ_2E
1383 "@ 1395 "@
1384 dup\\t%d0, %1.d[0] 1396 dup\\t%d0, %1.d[0]
1385 fmov\\t%d0, %1 1397 fmov\\t%d0, %1
1386 dup\\t%d0, %1" 1398 dup\\t%d0, %1"
1387 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") 1399 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>")
1388 (set_attr "simd" "yes,*,yes") 1400 (set_attr "length" "4")
1389 (set_attr "fp" "*,yes,*") 1401 (set_attr "arch" "simd,fp,simd")]
1390 (set_attr "length" "4")]
1391 ) 1402 )
1392 1403
1393 (define_expand "move_lo_quad_<mode>" 1404 (define_expand "move_lo_quad_<mode>"
1394 [(match_operand:VQ 0 "register_operand") 1405 [(match_operand:VQ 0 "register_operand")
1395 (match_operand:VQ 1 "register_operand")] 1406 (match_operand:VQ 1 "register_operand")]
1439 (define_expand "move_hi_quad_<mode>" 1450 (define_expand "move_hi_quad_<mode>"
1440 [(match_operand:VQ 0 "register_operand" "") 1451 [(match_operand:VQ 0 "register_operand" "")
1441 (match_operand:<VHALF> 1 "register_operand" "")] 1452 (match_operand:<VHALF> 1 "register_operand" "")]
1442 "TARGET_SIMD" 1453 "TARGET_SIMD"
1443 { 1454 {
1444 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 1455 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1445 if (BYTES_BIG_ENDIAN) 1456 if (BYTES_BIG_ENDIAN)
1446 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], 1457 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0],
1447 operands[1], p)); 1458 operands[1], p));
1448 else 1459 else
1449 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], 1460 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0],
1503 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1514 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1504 (match_operand:VQW 1 "register_operand" "w") 1515 (match_operand:VQW 1 "register_operand" "w")
1505 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") 1516 (match_operand:VQW 2 "vect_par_cnst_lo_half" "")
1506 )))] 1517 )))]
1507 "TARGET_SIMD" 1518 "TARGET_SIMD"
1508 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0" 1519 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>"
1509 [(set_attr "type" "neon_shift_imm_long")] 1520 [(set_attr "type" "neon_shift_imm_long")]
1510 ) 1521 )
1511 1522
1512 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" 1523 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>"
1513 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 1524 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
1514 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> 1525 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
1515 (match_operand:VQW 1 "register_operand" "w") 1526 (match_operand:VQW 1 "register_operand" "w")
1516 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") 1527 (match_operand:VQW 2 "vect_par_cnst_hi_half" "")
1517 )))] 1528 )))]
1518 "TARGET_SIMD" 1529 "TARGET_SIMD"
1519 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0" 1530 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>"
1520 [(set_attr "type" "neon_shift_imm_long")] 1531 [(set_attr "type" "neon_shift_imm_long")]
1521 ) 1532 )
1522 1533
1523 (define_expand "vec_unpack<su>_hi_<mode>" 1534 (define_expand "vec_unpack<su>_hi_<mode>"
1524 [(match_operand:<VWIDE> 0 "register_operand" "") 1535 [(match_operand:<VWIDE> 0 "register_operand" "")
1525 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] 1536 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))]
1526 "TARGET_SIMD" 1537 "TARGET_SIMD"
1527 { 1538 {
1528 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1529 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], 1540 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0],
1530 operands[1], p)); 1541 operands[1], p));
1531 DONE; 1542 DONE;
1532 } 1543 }
1533 ) 1544 )
1535 (define_expand "vec_unpack<su>_lo_<mode>" 1546 (define_expand "vec_unpack<su>_lo_<mode>"
1536 [(match_operand:<VWIDE> 0 "register_operand" "") 1547 [(match_operand:<VWIDE> 0 "register_operand" "")
1537 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] 1548 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))]
1538 "TARGET_SIMD" 1549 "TARGET_SIMD"
1539 { 1550 {
1540 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 1551 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1541 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], 1552 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0],
1542 operands[1], p)); 1553 operands[1], p));
1543 DONE; 1554 DONE;
1544 } 1555 }
1545 ) 1556 )
1655 [(match_operand:<VWIDE> 0 "register_operand" "") 1666 [(match_operand:<VWIDE> 0 "register_operand" "")
1656 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1667 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1657 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1668 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1658 "TARGET_SIMD" 1669 "TARGET_SIMD"
1659 { 1670 {
1660 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 1671 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
1661 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], 1672 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0],
1662 operands[1], 1673 operands[1],
1663 operands[2], p)); 1674 operands[2], p));
1664 DONE; 1675 DONE;
1665 } 1676 }
1682 [(match_operand:<VWIDE> 0 "register_operand" "") 1693 [(match_operand:<VWIDE> 0 "register_operand" "")
1683 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) 1694 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))
1684 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] 1695 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))]
1685 "TARGET_SIMD" 1696 "TARGET_SIMD"
1686 { 1697 {
1687 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 1698 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
1688 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], 1699 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0],
1689 operands[1], 1700 operands[1],
1690 operands[2], p)); 1701 operands[2], p));
1691 DONE; 1702 DONE;
1692 1703
1801 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1812 (parallel [(match_operand:SI 2 "immediate_operand")])))
1802 (match_operand:VDQF 3 "register_operand" "w") 1813 (match_operand:VDQF 3 "register_operand" "w")
1803 (match_operand:VDQF 4 "register_operand" "0")))] 1814 (match_operand:VDQF 4 "register_operand" "0")))]
1804 "TARGET_SIMD" 1815 "TARGET_SIMD"
1805 { 1816 {
1806 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 1817 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1807 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1808 } 1819 }
1809 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1810 ) 1821 )
1811 1822
1818 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1829 (parallel [(match_operand:SI 2 "immediate_operand")])))
1819 (match_operand:VDQSF 3 "register_operand" "w") 1830 (match_operand:VDQSF 3 "register_operand" "w")
1820 (match_operand:VDQSF 4 "register_operand" "0")))] 1831 (match_operand:VDQSF 4 "register_operand" "0")))]
1821 "TARGET_SIMD" 1832 "TARGET_SIMD"
1822 { 1833 {
1823 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1834 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1824 INTVAL (operands[2])));
1825 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1835 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1826 } 1836 }
1827 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1837 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1828 ) 1838 )
1829 1839
1847 (parallel [(match_operand:SI 2 "immediate_operand")])) 1857 (parallel [(match_operand:SI 2 "immediate_operand")]))
1848 (match_operand:DF 3 "register_operand" "w") 1858 (match_operand:DF 3 "register_operand" "w")
1849 (match_operand:DF 4 "register_operand" "0")))] 1859 (match_operand:DF 4 "register_operand" "0")))]
1850 "TARGET_SIMD" 1860 "TARGET_SIMD"
1851 { 1861 {
1852 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); 1862 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1853 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; 1863 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]";
1854 } 1864 }
1855 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 1865 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1856 ) 1866 )
1857 1867
1858 (define_insn "fnma<mode>4" 1868 (define_insn "fnma<mode>4"
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w") 1869 [(set (match_operand:VHSDF 0 "register_operand" "=w")
1860 (fma:VHSDF 1870 (fma:VHSDF
1861 (match_operand:VHSDF 1 "register_operand" "w") 1871 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
1862 (neg:VHSDF 1872 (match_operand:VHSDF 2 "register_operand" "w")
1863 (match_operand:VHSDF 2 "register_operand" "w"))
1864 (match_operand:VHSDF 3 "register_operand" "0")))] 1873 (match_operand:VHSDF 3 "register_operand" "0")))]
1865 "TARGET_SIMD" 1874 "TARGET_SIMD"
1866 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 1875 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
1867 [(set_attr "type" "neon_fp_mla_<stype><q>")] 1876 [(set_attr "type" "neon_fp_mla_<stype><q>")]
1868 ) 1877 )
1877 (match_operand:VDQF 1 "register_operand" "<h_con>") 1886 (match_operand:VDQF 1 "register_operand" "<h_con>")
1878 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1887 (parallel [(match_operand:SI 2 "immediate_operand")])))
1879 (match_operand:VDQF 4 "register_operand" "0")))] 1888 (match_operand:VDQF 4 "register_operand" "0")))]
1880 "TARGET_SIMD" 1889 "TARGET_SIMD"
1881 { 1890 {
1882 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 1891 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
1883 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1892 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1884 } 1893 }
1885 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1886 ) 1895 )
1887 1896
1895 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") 1904 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>")
1896 (parallel [(match_operand:SI 2 "immediate_operand")]))) 1905 (parallel [(match_operand:SI 2 "immediate_operand")])))
1897 (match_operand:VDQSF 4 "register_operand" "0")))] 1906 (match_operand:VDQSF 4 "register_operand" "0")))]
1898 "TARGET_SIMD" 1907 "TARGET_SIMD"
1899 { 1908 {
1900 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 1909 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2]));
1901 INTVAL (operands[2])));
1902 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; 1910 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]";
1903 } 1911 }
1904 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] 1912 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")]
1905 ) 1913 )
1906 1914
1926 (neg:DF 1934 (neg:DF
1927 (match_operand:DF 3 "register_operand" "w")) 1935 (match_operand:DF 3 "register_operand" "w"))
1928 (match_operand:DF 4 "register_operand" "0")))] 1936 (match_operand:DF 4 "register_operand" "0")))]
1929 "TARGET_SIMD" 1937 "TARGET_SIMD"
1930 { 1938 {
1931 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); 1939 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2]));
1932 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; 1940 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]";
1933 } 1941 }
1934 [(set_attr "type" "neon_fp_mla_d_scalar_q")] 1942 [(set_attr "type" "neon_fp_mla_d_scalar_q")]
1935 ) 1943 )
1936 1944
2088 (define_expand "vec_unpacks_lo_<mode>" 2096 (define_expand "vec_unpacks_lo_<mode>"
2089 [(match_operand:<VWIDE> 0 "register_operand" "") 2097 [(match_operand:<VWIDE> 0 "register_operand" "")
2090 (match_operand:VQ_HSF 1 "register_operand" "")] 2098 (match_operand:VQ_HSF 1 "register_operand" "")]
2091 "TARGET_SIMD" 2099 "TARGET_SIMD"
2092 { 2100 {
2093 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 2101 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
2094 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2102 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2095 operands[1], p)); 2103 operands[1], p));
2096 DONE; 2104 DONE;
2097 } 2105 }
2098 ) 2106 )
2111 (define_expand "vec_unpacks_hi_<mode>" 2119 (define_expand "vec_unpacks_hi_<mode>"
2112 [(match_operand:<VWIDE> 0 "register_operand" "") 2120 [(match_operand:<VWIDE> 0 "register_operand" "")
2113 (match_operand:VQ_HSF 1 "register_operand" "")] 2121 (match_operand:VQ_HSF 1 "register_operand" "")]
2114 "TARGET_SIMD" 2122 "TARGET_SIMD"
2115 { 2123 {
2116 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 2124 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
2117 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], 2125 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0],
2118 operands[1], p)); 2126 operands[1], p));
2119 DONE; 2127 DONE;
2120 } 2128 }
2121 ) 2129 )
2219 2227
2220 ;; FP Max/Min 2228 ;; FP Max/Min
2221 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An 2229 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An
2222 ;; expression like: 2230 ;; expression like:
2223 ;; a = (b < c) ? b : c; 2231 ;; a = (b < c) ? b : c;
2224 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled 2232 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and
2225 ;; either explicitly or indirectly via -ffast-math. 2233 ;; -fno-signed-zeros are enabled either explicitly or indirectly via
2234 ;; -ffast-math.
2226 ;; 2235 ;;
2227 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. 2236 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL.
2228 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which 2237 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which
2229 ;; operand will be returned when both operands are zero (i.e. they may not 2238 ;; operand will be returned when both operands are zero (i.e. they may not
2230 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC 2239 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC
2259 [(match_operand:<VEL> 0 "register_operand" "=w") 2268 [(match_operand:<VEL> 0 "register_operand" "=w")
2260 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] 2269 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")]
2261 UNSPEC_ADDV)] 2270 UNSPEC_ADDV)]
2262 "TARGET_SIMD" 2271 "TARGET_SIMD"
2263 { 2272 {
2264 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); 2273 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2265 rtx scratch = gen_reg_rtx (<MODE>mode); 2274 rtx scratch = gen_reg_rtx (<MODE>mode);
2266 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); 2275 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1]));
2267 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2276 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2268 DONE; 2277 DONE;
2269 } 2278 }
2310 [(set (match_operand:SF 0 "register_operand") 2319 [(set (match_operand:SF 0 "register_operand")
2311 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] 2320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")]
2312 UNSPEC_FADDV))] 2321 UNSPEC_FADDV))]
2313 "TARGET_SIMD" 2322 "TARGET_SIMD"
2314 { 2323 {
2315 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0)); 2324 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0);
2316 rtx scratch = gen_reg_rtx (V4SFmode); 2325 rtx scratch = gen_reg_rtx (V4SFmode);
2317 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); 2326 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1]));
2318 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); 2327 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch));
2319 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); 2328 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt));
2320 DONE; 2329 DONE;
2345 ) 2354 )
2346 2355
2347 ;; 'across lanes' max and min ops. 2356 ;; 'across lanes' max and min ops.
2348 2357
2349 ;; Template for outputting a scalar, so we can create __builtins which can be 2358 ;; Template for outputting a scalar, so we can create __builtins which can be
2350 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). 2359 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin).
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>" 2360 (define_expand "reduc_<maxmin_uns>_scal_<mode>"
2352 [(match_operand:<VEL> 0 "register_operand") 2361 [(match_operand:<VEL> 0 "register_operand")
2353 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] 2362 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")]
2354 FMAXMINV)] 2363 FMAXMINV)]
2355 "TARGET_SIMD" 2364 "TARGET_SIMD"
2356 { 2365 {
2357 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); 2366 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2358 rtx scratch = gen_reg_rtx (<MODE>mode); 2367 rtx scratch = gen_reg_rtx (<MODE>mode);
2359 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2368 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2360 operands[1])); 2369 operands[1]));
2361 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2370 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2362 DONE; 2371 DONE;
2368 [(match_operand:<VEL> 0 "register_operand") 2377 [(match_operand:<VEL> 0 "register_operand")
2369 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] 2378 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")]
2370 MAXMINV)] 2379 MAXMINV)]
2371 "TARGET_SIMD" 2380 "TARGET_SIMD"
2372 { 2381 {
2373 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); 2382 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0);
2374 rtx scratch = gen_reg_rtx (<MODE>mode); 2383 rtx scratch = gen_reg_rtx (<MODE>mode);
2375 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, 2384 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch,
2376 operands[1])); 2385 operands[1]));
2377 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); 2386 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt));
2378 DONE; 2387 DONE;
2425 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. 2434 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander.
2426 ;; Some forms of straight-line code may generate the equivalent form 2435 ;; Some forms of straight-line code may generate the equivalent form
2427 ;; in *aarch64_simd_bsl<mode>_alt. 2436 ;; in *aarch64_simd_bsl<mode>_alt.
2428 2437
2429 (define_insn "aarch64_simd_bsl<mode>_internal" 2438 (define_insn "aarch64_simd_bsl<mode>_internal"
2430 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") 2439 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2431 (xor:VSDQ_I_DI 2440 (xor:VDQ_I
2432 (and:VSDQ_I_DI 2441 (and:VDQ_I
2433 (xor:VSDQ_I_DI 2442 (xor:VDQ_I
2434 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w") 2443 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w")
2435 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0")) 2444 (match_operand:VDQ_I 2 "register_operand" "w,w,0"))
2436 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) 2445 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2437 (match_dup:<V_INT_EQUIV> 3) 2446 (match_dup:<V_INT_EQUIV> 3)
2438 ))] 2447 ))]
2439 "TARGET_SIMD" 2448 "TARGET_SIMD"
2440 "@ 2449 "@
2441 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> 2450 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
2449 ;; the outer XOR matches the second operand of the inner XOR rather than 2458 ;; the outer XOR matches the second operand of the inner XOR rather than
2450 ;; the first. The two are equivalent but since recog doesn't try all 2459 ;; the first. The two are equivalent but since recog doesn't try all
2451 ;; permutations of commutative operations, we have to have a separate pattern. 2460 ;; permutations of commutative operations, we have to have a separate pattern.
2452 2461
2453 (define_insn "*aarch64_simd_bsl<mode>_alt" 2462 (define_insn "*aarch64_simd_bsl<mode>_alt"
2454 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") 2463 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w")
2455 (xor:VSDQ_I_DI 2464 (xor:VDQ_I
2456 (and:VSDQ_I_DI 2465 (and:VDQ_I
2457 (xor:VSDQ_I_DI 2466 (xor:VDQ_I
2458 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0") 2467 (match_operand:VDQ_I 3 "register_operand" "w,w,0")
2459 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w")) 2468 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w"))
2460 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) 2469 (match_operand:VDQ_I 1 "register_operand" "0,w,w"))
2461 (match_dup:VSDQ_I_DI 2)))] 2470 (match_dup:<V_INT_EQUIV> 2)))]
2462 "TARGET_SIMD" 2471 "TARGET_SIMD"
2463 "@ 2472 "@
2464 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> 2473 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype>
2465 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> 2474 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype>
2466 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" 2475 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>"
2467 [(set_attr "type" "neon_bsl<q>")] 2476 [(set_attr "type" "neon_bsl<q>")]
2477 )
2478
2479 ;; DImode is special, we want to avoid computing operations which are
2480 ;; more naturally computed in general purpose registers in the vector
2481 ;; registers. If we do that, we need to move all three operands from general
2482 ;; purpose registers to vector registers, then back again. However, we
2483 ;; don't want to make this pattern an UNSPEC as we'd lose scope for
2484 ;; optimizations based on the component operations of a BSL.
2485 ;;
2486 ;; That means we need a splitter back to the individual operations, if they
2487 ;; would be better calculated on the integer side.
2488
2489 (define_insn_and_split "aarch64_simd_bsldi_internal"
2490 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2491 (xor:DI
2492 (and:DI
2493 (xor:DI
2494 (match_operand:DI 3 "register_operand" "w,0,w,r")
2495 (match_operand:DI 2 "register_operand" "w,w,0,r"))
2496 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2497 (match_dup:DI 3)
2498 ))]
2499 "TARGET_SIMD"
2500 "@
2501 bsl\\t%0.8b, %2.8b, %3.8b
2502 bit\\t%0.8b, %2.8b, %1.8b
2503 bif\\t%0.8b, %3.8b, %1.8b
2504 #"
2505 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2506 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)]
2507 {
2508 /* Split back to individual operations. If we're before reload, and
2509 able to create a temporary register, do so. If we're after reload,
2510 we've got an early-clobber destination register, so use that.
2511 Otherwise, we can't create pseudos and we can't yet guarantee that
2512 operands[0] is safe to write, so FAIL to split. */
2513
2514 rtx scratch;
2515 if (reload_completed)
2516 scratch = operands[0];
2517 else if (can_create_pseudo_p ())
2518 scratch = gen_reg_rtx (DImode);
2519 else
2520 FAIL;
2521
2522 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2523 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2524 emit_insn (gen_xordi3 (operands[0], scratch, operands[3]));
2525 DONE;
2526 }
2527 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2528 (set_attr "length" "4,4,4,12")]
2529 )
2530
2531 (define_insn_and_split "aarch64_simd_bsldi_alt"
2532 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r")
2533 (xor:DI
2534 (and:DI
2535 (xor:DI
2536 (match_operand:DI 3 "register_operand" "w,w,0,r")
2537 (match_operand:DI 2 "register_operand" "w,0,w,r"))
2538 (match_operand:DI 1 "register_operand" "0,w,w,r"))
2539 (match_dup:DI 2)
2540 ))]
2541 "TARGET_SIMD"
2542 "@
2543 bsl\\t%0.8b, %3.8b, %2.8b
2544 bit\\t%0.8b, %3.8b, %1.8b
2545 bif\\t%0.8b, %2.8b, %1.8b
2546 #"
2547 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
2548 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)]
2549 {
2550 /* Split back to individual operations. If we're before reload, and
2551 able to create a temporary register, do so. If we're after reload,
2552 we've got an early-clobber destination register, so use that.
2553 Otherwise, we can't create pseudos and we can't yet guarantee that
2554 operands[0] is safe to write, so FAIL to split. */
2555
2556 rtx scratch;
2557 if (reload_completed)
2558 scratch = operands[0];
2559 else if (can_create_pseudo_p ())
2560 scratch = gen_reg_rtx (DImode);
2561 else
2562 FAIL;
2563
2564 emit_insn (gen_xordi3 (scratch, operands[2], operands[3]));
2565 emit_insn (gen_anddi3 (scratch, scratch, operands[1]));
2566 emit_insn (gen_xordi3 (operands[0], scratch, operands[2]));
2567 DONE;
2568 }
2569 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple")
2570 (set_attr "length" "4,4,4,12")]
2468 ) 2571 )
2469 2572
2470 (define_expand "aarch64_simd_bsl<mode>" 2573 (define_expand "aarch64_simd_bsl<mode>"
2471 [(match_operand:VALLDIF 0 "register_operand") 2574 [(match_operand:VALLDIF 0 "register_operand")
2472 (match_operand:<V_INT_EQUIV> 1 "register_operand") 2575 (match_operand:<V_INT_EQUIV> 1 "register_operand")
2644 { 2747 {
2645 comparison = gen_aarch64_cmlt<mode>; 2748 comparison = gen_aarch64_cmlt<mode>;
2646 break; 2749 break;
2647 } 2750 }
2648 /* Fall through. */ 2751 /* Fall through. */
2649 case UNGE: 2752 case UNLT:
2650 std::swap (operands[2], operands[3]); 2753 std::swap (operands[2], operands[3]);
2651 /* Fall through. */ 2754 /* Fall through. */
2652 case UNLE: 2755 case UNGT:
2653 case GT: 2756 case GT:
2654 comparison = gen_aarch64_cmgt<mode>; 2757 comparison = gen_aarch64_cmgt<mode>;
2655 break; 2758 break;
2656 case LE: 2759 case LE:
2657 if (use_zero_form) 2760 if (use_zero_form)
2658 { 2761 {
2659 comparison = gen_aarch64_cmle<mode>; 2762 comparison = gen_aarch64_cmle<mode>;
2660 break; 2763 break;
2661 } 2764 }
2662 /* Fall through. */ 2765 /* Fall through. */
2663 case UNGT: 2766 case UNLE:
2664 std::swap (operands[2], operands[3]); 2767 std::swap (operands[2], operands[3]);
2665 /* Fall through. */ 2768 /* Fall through. */
2666 case UNLT: 2769 case UNGE:
2667 case GE: 2770 case GE:
2668 comparison = gen_aarch64_cmge<mode>; 2771 comparison = gen_aarch64_cmge<mode>;
2669 break; 2772 break;
2670 case NE: 2773 case NE:
2671 case EQ: 2774 case EQ:
2672 comparison = gen_aarch64_cmeq<mode>; 2775 comparison = gen_aarch64_cmeq<mode>;
2673 break; 2776 break;
2674 case UNEQ: 2777 case UNEQ:
2675 case ORDERED: 2778 case ORDERED:
2676 case UNORDERED: 2779 case UNORDERED:
2780 case LTGT:
2677 break; 2781 break;
2678 default: 2782 default:
2679 gcc_unreachable (); 2783 gcc_unreachable ();
2680 } 2784 }
2681 2785
2683 { 2787 {
2684 case UNGE: 2788 case UNGE:
2685 case UNGT: 2789 case UNGT:
2686 case UNLE: 2790 case UNLE:
2687 case UNLT: 2791 case UNLT:
2688 case NE: 2792 {
2689 /* FCM returns false for lanes which are unordered, so if we use 2793 /* All of the above must not raise any FP exceptions. Thus we first
2690 the inverse of the comparison we actually want to emit, then 2794 check each operand for NaNs and force any elements containing NaN to
2691 invert the result, we will end up with the correct result. 2795 zero before using them in the compare.
2692 Note that a NE NaN and NaN NE b are true for all a, b. 2796 Example: UN<cc> (a, b) -> UNORDERED (a, b) |
2693 2797 (cm<cc> (isnan (a) ? 0.0 : a,
2694 Our transformations are: 2798 isnan (b) ? 0.0 : b))
2695 a UNGE b -> !(b GT a) 2799 We use the following transformations for doing the comparisions:
2696 a UNGT b -> !(b GE a) 2800 a UNGE b -> a GE b
2697 a UNLE b -> !(a GT b) 2801 a UNGT b -> a GT b
2698 a UNLT b -> !(a GE b) 2802 a UNLE b -> b GE a
2699 a NE b -> !(a EQ b) */ 2803 a UNLT b -> b GT a. */
2700 gcc_assert (comparison != NULL); 2804
2701 emit_insn (comparison (operands[0], operands[2], operands[3])); 2805 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode);
2702 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 2806 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode);
2807 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode);
2808 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2]));
2809 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3]));
2810 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1));
2811 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0,
2812 lowpart_subreg (<V_INT_EQUIV>mode,
2813 operands[2],
2814 <MODE>mode)));
2815 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1,
2816 lowpart_subreg (<V_INT_EQUIV>mode,
2817 operands[3],
2818 <MODE>mode)));
2819 gcc_assert (comparison != NULL);
2820 emit_insn (comparison (operands[0],
2821 lowpart_subreg (<MODE>mode,
2822 tmp0, <V_INT_EQUIV>mode),
2823 lowpart_subreg (<MODE>mode,
2824 tmp1, <V_INT_EQUIV>mode)));
2825 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0]));
2826 }
2703 break; 2827 break;
2704 2828
2705 case LT: 2829 case LT:
2706 case LE: 2830 case LE:
2707 case GT: 2831 case GT:
2708 case GE: 2832 case GE:
2709 case EQ: 2833 case EQ:
2834 case NE:
2710 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. 2835 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ.
2711 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: 2836 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are:
2712 a GE b -> a GE b 2837 a GE b -> a GE b
2713 a GT b -> a GT b 2838 a GT b -> a GT b
2714 a LE b -> b GE a 2839 a LE b -> b GE a
2715 a LT b -> b GT a 2840 a LT b -> b GT a
2716 a EQ b -> a EQ b */ 2841 a EQ b -> a EQ b
2842 a NE b -> ~(a EQ b) */
2717 gcc_assert (comparison != NULL); 2843 gcc_assert (comparison != NULL);
2718 emit_insn (comparison (operands[0], operands[2], operands[3])); 2844 emit_insn (comparison (operands[0], operands[2], operands[3]));
2845 if (code == NE)
2846 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2719 break; 2847 break;
2720 2848
2721 case UNEQ: 2849 case LTGT:
2722 /* We first check (a > b || b > a) which is !UNEQ, inverting 2850 /* LTGT is not guranteed to not generate a FP exception. So let's
2723 this result will then give us (a == b || a UNORDERED b). */ 2851 go the faster way : ((a > b) || (b > a)). */
2724 emit_insn (gen_aarch64_cmgt<mode> (operands[0], 2852 emit_insn (gen_aarch64_cmgt<mode> (operands[0],
2725 operands[2], operands[3])); 2853 operands[2], operands[3]));
2726 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); 2854 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2]));
2727 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); 2855 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp));
2728 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2729 break; 2856 break;
2730 2857
2858 case ORDERED:
2731 case UNORDERED: 2859 case UNORDERED:
2732 /* Operands are ORDERED iff (a > b || b >= a), so we can compute 2860 case UNEQ:
2733 UNORDERED as !ORDERED. */ 2861 /* cmeq (a, a) & cmeq (b, b). */
2734 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3])); 2862 emit_insn (gen_aarch64_cmeq<mode> (operands[0],
2735 emit_insn (gen_aarch64_cmge<mode> (operands[0], 2863 operands[2], operands[2]));
2736 operands[3], operands[2])); 2864 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3]));
2737 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); 2865 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp));
2738 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); 2866
2739 break; 2867 if (code == UNORDERED)
2740 2868 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0]));
2741 case ORDERED: 2869 else if (code == UNEQ)
2742 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3])); 2870 {
2743 emit_insn (gen_aarch64_cmge<mode> (operands[0], 2871 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3]));
2744 operands[3], operands[2])); 2872 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp));
2745 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); 2873 }
2746 break; 2874 break;
2747 2875
2748 default: 2876 default:
2749 gcc_unreachable (); 2877 gcc_unreachable ();
2750 } 2878 }
2893 (vec_select:<VEL> 3021 (vec_select:<VEL>
2894 (match_operand:VDQQH 1 "register_operand" "w") 3022 (match_operand:VDQQH 1 "register_operand" "w")
2895 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3023 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2896 "TARGET_SIMD" 3024 "TARGET_SIMD"
2897 { 3025 {
2898 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 3026 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2899 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; 3027 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]";
2900 } 3028 }
2901 [(set_attr "type" "neon_to_gp<q>")] 3029 [(set_attr "type" "neon_to_gp<q>")]
2902 ) 3030 )
2903 3031
2904 (define_insn "*aarch64_get_lane_zero_extendsi<mode>" 3032 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>"
2905 [(set (match_operand:SI 0 "register_operand" "=r") 3033 [(set (match_operand:GPI 0 "register_operand" "=r")
2906 (zero_extend:SI 3034 (zero_extend:GPI
2907 (vec_select:<VEL> 3035 (vec_select:<VEL>
2908 (match_operand:VDQQH 1 "register_operand" "w") 3036 (match_operand:VDQQH 1 "register_operand" "w")
2909 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] 3037 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
2910 "TARGET_SIMD" 3038 "TARGET_SIMD"
2911 { 3039 {
2912 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 3040 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode,
3041 INTVAL (operands[2]));
2913 return "umov\\t%w0, %1.<Vetype>[%2]"; 3042 return "umov\\t%w0, %1.<Vetype>[%2]";
2914 } 3043 }
2915 [(set_attr "type" "neon_to_gp<q>")] 3044 [(set_attr "type" "neon_to_gp<q>")]
2916 ) 3045 )
2917 3046
2918 ;; Lane extraction of a value, neither sign nor zero extension 3047 ;; Lane extraction of a value, neither sign nor zero extension
2919 ;; is guaranteed so upper bits should be considered undefined. 3048 ;; is guaranteed so upper bits should be considered undefined.
2920 ;; RTL uses GCC vector extension indices throughout so flip only for assembly. 3049 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
2921 (define_insn "aarch64_get_lane<mode>" 3050 (define_insn "aarch64_get_lane<mode>"
2922 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") 3051 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
2923 (vec_select:<VEL> 3052 (vec_select:<VEL>
2924 (match_operand:VALL_F16 1 "register_operand" "w, w, w") 3053 (match_operand:VALL_F16 1 "register_operand" "w, w, w")
2925 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] 3054 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))]
2926 "TARGET_SIMD" 3055 "TARGET_SIMD"
2927 { 3056 {
2928 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 3057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
2929 switch (which_alternative) 3058 switch (which_alternative)
2930 { 3059 {
2931 case 0: 3060 case 0:
2932 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; 3061 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
2933 case 1: 3062 case 1:
2939 } 3068 }
2940 } 3069 }
2941 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] 3070 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")]
2942 ) 3071 )
2943 3072
3073 (define_insn "load_pair_lanes<mode>"
3074 [(set (match_operand:<VDBL> 0 "register_operand" "=w")
3075 (vec_concat:<VDBL>
3076 (match_operand:VDC 1 "memory_operand" "Utq")
3077 (match_operand:VDC 2 "memory_operand" "m")))]
3078 "TARGET_SIMD && !STRICT_ALIGNMENT
3079 && rtx_equal_p (XEXP (operands[2], 0),
3080 plus_constant (Pmode,
3081 XEXP (operands[1], 0),
3082 GET_MODE_SIZE (<MODE>mode)))"
3083 "ldr\\t%q0, %1"
3084 [(set_attr "type" "neon_load1_1reg_q")]
3085 )
3086
3087 (define_insn "store_pair_lanes<mode>"
3088 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn")
3089 (vec_concat:<VDBL>
3090 (match_operand:VDC 1 "register_operand" "w, r")
3091 (match_operand:VDC 2 "register_operand" "w, r")))]
3092 "TARGET_SIMD"
3093 "@
3094 stp\\t%d1, %d2, %y0
3095 stp\\t%x1, %x2, %y0"
3096 [(set_attr "type" "neon_stp, store_16")]
3097 )
3098
2944 ;; In this insn, operand 1 should be low, and operand 2 the high part of the 3099 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
2945 ;; dest vector. 3100 ;; dest vector.
2946 3101
2947 (define_insn "*aarch64_combinez<mode>" 3102 (define_insn "*aarch64_combinez<mode>"
2948 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3103 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2949 (vec_concat:<VDBL> 3104 (vec_concat:<VDBL>
2950 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m") 3105 (match_operand:VDC 1 "general_operand" "w,?r,m")
2951 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))] 3106 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))]
2952 "TARGET_SIMD && !BYTES_BIG_ENDIAN" 3107 "TARGET_SIMD && !BYTES_BIG_ENDIAN"
2953 "@ 3108 "@
2954 mov\\t%0.8b, %1.8b 3109 mov\\t%0.8b, %1.8b
2955 fmov\t%d0, %1 3110 fmov\t%d0, %1
2956 ldr\\t%d0, %1" 3111 ldr\\t%d0, %1"
2957 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3112 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2958 (set_attr "simd" "yes,*,yes") 3113 (set_attr "arch" "simd,fp,simd")]
2959 (set_attr "fp" "*,yes,*")]
2960 ) 3114 )
2961 3115
2962 (define_insn "*aarch64_combinez_be<mode>" 3116 (define_insn "*aarch64_combinez_be<mode>"
2963 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") 3117 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w")
2964 (vec_concat:<VDBL> 3118 (vec_concat:<VDBL>
2965 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz") 3119 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")
2966 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))] 3120 (match_operand:VDC 1 "general_operand" "w,?r,m")))]
2967 "TARGET_SIMD && BYTES_BIG_ENDIAN" 3121 "TARGET_SIMD && BYTES_BIG_ENDIAN"
2968 "@ 3122 "@
2969 mov\\t%0.8b, %1.8b 3123 mov\\t%0.8b, %1.8b
2970 fmov\t%d0, %1 3124 fmov\t%d0, %1
2971 ldr\\t%d0, %1" 3125 ldr\\t%d0, %1"
2972 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") 3126 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg")
2973 (set_attr "simd" "yes,*,yes") 3127 (set_attr "arch" "simd,fp,simd")]
2974 (set_attr "fp" "*,yes,*")]
2975 ) 3128 )
2976 3129
2977 (define_expand "aarch64_combine<mode>" 3130 (define_expand "aarch64_combine<mode>"
2978 [(match_operand:<VDBL> 0 "register_operand") 3131 [(match_operand:<VDBL> 0 "register_operand")
2979 (match_operand:VDC 1 "register_operand") 3132 (match_operand:VDC 1 "register_operand")
2984 3137
2985 DONE; 3138 DONE;
2986 } 3139 }
2987 ) 3140 )
2988 3141
2989 (define_expand "aarch64_simd_combine<mode>" 3142 (define_expand "@aarch64_simd_combine<mode>"
2990 [(match_operand:<VDBL> 0 "register_operand") 3143 [(match_operand:<VDBL> 0 "register_operand")
2991 (match_operand:VDC 1 "register_operand") 3144 (match_operand:VDC 1 "register_operand")
2992 (match_operand:VDC 2 "register_operand")] 3145 (match_operand:VDC 2 "register_operand")]
2993 "TARGET_SIMD" 3146 "TARGET_SIMD"
2994 { 3147 {
3032 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3185 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3033 (match_operand:VQW 1 "register_operand" "w") 3186 (match_operand:VQW 1 "register_operand" "w")
3034 (match_operand:VQW 2 "register_operand" "w")] 3187 (match_operand:VQW 2 "register_operand" "w")]
3035 "TARGET_SIMD" 3188 "TARGET_SIMD"
3036 { 3189 {
3037 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3038 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], 3191 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1],
3039 operands[2], p)); 3192 operands[2], p));
3040 DONE; 3193 DONE;
3041 }) 3194 })
3042 3195
3044 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3197 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3045 (match_operand:VQW 1 "register_operand" "w") 3198 (match_operand:VQW 1 "register_operand" "w")
3046 (match_operand:VQW 2 "register_operand" "w")] 3199 (match_operand:VQW 2 "register_operand" "w")]
3047 "TARGET_SIMD" 3200 "TARGET_SIMD"
3048 { 3201 {
3049 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3050 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], 3203 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1],
3051 operands[2], p)); 3204 operands[2], p));
3052 DONE; 3205 DONE;
3053 }) 3206 })
3054 3207
3056 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3209 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3057 (match_operand:VQW 1 "register_operand" "w") 3210 (match_operand:VQW 1 "register_operand" "w")
3058 (match_operand:VQW 2 "register_operand" "w")] 3211 (match_operand:VQW 2 "register_operand" "w")]
3059 "TARGET_SIMD" 3212 "TARGET_SIMD"
3060 { 3213 {
3061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3214 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3062 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], 3215 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1],
3063 operands[2], p)); 3216 operands[2], p));
3064 DONE; 3217 DONE;
3065 }) 3218 })
3066 3219
3068 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3221 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3069 (match_operand:VQW 1 "register_operand" "w") 3222 (match_operand:VQW 1 "register_operand" "w")
3070 (match_operand:VQW 2 "register_operand" "w")] 3223 (match_operand:VQW 2 "register_operand" "w")]
3071 "TARGET_SIMD" 3224 "TARGET_SIMD"
3072 { 3225 {
3073 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3226 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3074 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], 3227 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1],
3075 operands[2], p)); 3228 operands[2], p));
3076 DONE; 3229 DONE;
3077 }) 3230 })
3078 3231
3094 (plus:<VDBLW> (sign_extend:<VDBLW> 3247 (plus:<VDBLW> (sign_extend:<VDBLW>
3095 (match_operand:VQW 1 "register_operand" "")) 3248 (match_operand:VQW 1 "register_operand" ""))
3096 (match_operand:<VDBLW> 2 "register_operand" "")))] 3249 (match_operand:<VDBLW> 2 "register_operand" "")))]
3097 "TARGET_SIMD" 3250 "TARGET_SIMD"
3098 { 3251 {
3099 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 3252 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3100 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3253 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3101 3254
3102 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], 3255 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2],
3103 operands[1], p)); 3256 operands[1], p));
3104 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); 3257 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1]));
3122 (plus:<VDBLW> (zero_extend:<VDBLW> 3275 (plus:<VDBLW> (zero_extend:<VDBLW>
3123 (match_operand:VQW 1 "register_operand" "")) 3276 (match_operand:VQW 1 "register_operand" ""))
3124 (match_operand:<VDBLW> 2 "register_operand" "")))] 3277 (match_operand:<VDBLW> 2 "register_operand" "")))]
3125 "TARGET_SIMD" 3278 "TARGET_SIMD"
3126 { 3279 {
3127 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); 3280 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false);
3128 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); 3281 rtx temp = gen_reg_rtx (GET_MODE (operands[0]));
3129 3282
3130 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], 3283 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2],
3131 operands[1], p)); 3284 operands[1], p));
3132 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); 3285 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1]));
3143 { 3296 {
3144 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); 3297 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1]));
3145 DONE; 3298 DONE;
3146 }) 3299 })
3147 3300
3148 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>" 3301 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>"
3149 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3302 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3150 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3303 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3151 (ANY_EXTEND:<VWIDE> 3304 (ANY_EXTEND:<VWIDE>
3152 (match_operand:VD_BHSI 2 "register_operand" "w"))))] 3305 (match_operand:VD_BHSI 2 "register_operand" "w"))))]
3153 "TARGET_SIMD" 3306 "TARGET_SIMD"
3154 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3307 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3155 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3308 [(set_attr "type" "neon_sub_widen")]
3156 ) 3309 )
3157 3310
3158 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal" 3311 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal"
3159 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3312 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3160 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3313 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3161 (ANY_EXTEND:<VWIDE> 3314 (ANY_EXTEND:<VWIDE>
3162 (vec_select:<VHALF> 3315 (vec_select:<VHALF>
3163 (match_operand:VQW 2 "register_operand" "w") 3316 (match_operand:VQW 2 "register_operand" "w")
3164 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] 3317 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))]
3165 "TARGET_SIMD" 3318 "TARGET_SIMD"
3166 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" 3319 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3167 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3320 [(set_attr "type" "neon_sub_widen")]
3168 ) 3321 )
3169 3322
3170 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal" 3323 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal"
3171 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") 3324 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3172 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") 3325 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w")
3173 (ANY_EXTEND:<VWIDE> 3326 (ANY_EXTEND:<VWIDE>
3174 (vec_select:<VHALF> 3327 (vec_select:<VHALF>
3175 (match_operand:VQW 2 "register_operand" "w") 3328 (match_operand:VQW 2 "register_operand" "w")
3176 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] 3329 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))]
3177 "TARGET_SIMD" 3330 "TARGET_SIMD"
3178 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" 3331 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3179 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] 3332 [(set_attr "type" "neon_sub_widen")]
3333 )
3334
3335 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>"
3336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3337 (plus:<VWIDE>
3338 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w"))
3339 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3340 "TARGET_SIMD"
3341 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3342 [(set_attr "type" "neon_add_widen")]
3343 )
3344
3345 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal"
3346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3347 (plus:<VWIDE>
3348 (ANY_EXTEND:<VWIDE>
3349 (vec_select:<VHALF>
3350 (match_operand:VQW 2 "register_operand" "w")
3351 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
3352 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3353 "TARGET_SIMD"
3354 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>"
3355 [(set_attr "type" "neon_add_widen")]
3356 )
3357
3358 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal"
3359 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
3360 (plus:<VWIDE>
3361 (ANY_EXTEND:<VWIDE>
3362 (vec_select:<VHALF>
3363 (match_operand:VQW 2 "register_operand" "w")
3364 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
3365 (match_operand:<VWIDE> 1 "register_operand" "w")))]
3366 "TARGET_SIMD"
3367 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>"
3368 [(set_attr "type" "neon_add_widen")]
3180 ) 3369 )
3181 3370
3182 (define_expand "aarch64_saddw2<mode>" 3371 (define_expand "aarch64_saddw2<mode>"
3183 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3372 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3184 (match_operand:<VWIDE> 1 "register_operand" "w") 3373 (match_operand:<VWIDE> 1 "register_operand" "w")
3185 (match_operand:VQW 2 "register_operand" "w")] 3374 (match_operand:VQW 2 "register_operand" "w")]
3186 "TARGET_SIMD" 3375 "TARGET_SIMD"
3187 { 3376 {
3188 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3189 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], 3378 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1],
3190 operands[2], p)); 3379 operands[2], p));
3191 DONE; 3380 DONE;
3192 }) 3381 })
3193 3382
3195 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3384 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3196 (match_operand:<VWIDE> 1 "register_operand" "w") 3385 (match_operand:<VWIDE> 1 "register_operand" "w")
3197 (match_operand:VQW 2 "register_operand" "w")] 3386 (match_operand:VQW 2 "register_operand" "w")]
3198 "TARGET_SIMD" 3387 "TARGET_SIMD"
3199 { 3388 {
3200 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3389 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3201 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], 3390 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1],
3202 operands[2], p)); 3391 operands[2], p));
3203 DONE; 3392 DONE;
3204 }) 3393 })
3205 3394
3208 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3397 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3209 (match_operand:<VWIDE> 1 "register_operand" "w") 3398 (match_operand:<VWIDE> 1 "register_operand" "w")
3210 (match_operand:VQW 2 "register_operand" "w")] 3399 (match_operand:VQW 2 "register_operand" "w")]
3211 "TARGET_SIMD" 3400 "TARGET_SIMD"
3212 { 3401 {
3213 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3402 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3214 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], 3403 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1],
3215 operands[2], p)); 3404 operands[2], p));
3216 DONE; 3405 DONE;
3217 }) 3406 })
3218 3407
3220 [(match_operand:<VWIDE> 0 "register_operand" "=w") 3409 [(match_operand:<VWIDE> 0 "register_operand" "=w")
3221 (match_operand:<VWIDE> 1 "register_operand" "w") 3410 (match_operand:<VWIDE> 1 "register_operand" "w")
3222 (match_operand:VQW 2 "register_operand" "w")] 3411 (match_operand:VQW 2 "register_operand" "w")]
3223 "TARGET_SIMD" 3412 "TARGET_SIMD"
3224 { 3413 {
3225 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3414 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3226 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], 3415 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1],
3227 operands[2], p)); 3416 operands[2], p));
3228 DONE; 3417 DONE;
3229 }) 3418 })
3230 3419
3231 ;; <su><r>h<addsub>. 3420 ;; <su><r>h<addsub>.
3421
3422 (define_expand "<u>avg<mode>3_floor"
3423 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3424 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3425 (match_operand:VDQ_BHSI 2 "register_operand")]
3426 HADD))]
3427 "TARGET_SIMD"
3428 )
3429
3430 (define_expand "<u>avg<mode>3_ceil"
3431 [(set (match_operand:VDQ_BHSI 0 "register_operand")
3432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")
3433 (match_operand:VDQ_BHSI 2 "register_operand")]
3434 RHADD))]
3435 "TARGET_SIMD"
3436 )
3232 3437
3233 (define_insn "aarch64_<sur>h<addsub><mode>" 3438 (define_insn "aarch64_<sur>h<addsub><mode>"
3234 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") 3439 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
3235 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") 3440 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w")
3236 (match_operand:VDQ_BHSI 2 "register_operand" "w")] 3441 (match_operand:VDQ_BHSI 2 "register_operand" "w")]
3299 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") 3504 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w")
3300 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3505 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3301 UNSPEC_FMULX))] 3506 UNSPEC_FMULX))]
3302 "TARGET_SIMD" 3507 "TARGET_SIMD"
3303 { 3508 {
3304 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, 3509 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3]));
3305 INTVAL (operands[3])));
3306 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3510 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3307 } 3511 }
3308 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] 3512 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")]
3309 ) 3513 )
3310 3514
3319 (match_operand:VDQF 2 "register_operand" "w") 3523 (match_operand:VDQF 2 "register_operand" "w")
3320 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] 3524 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))]
3321 UNSPEC_FMULX))] 3525 UNSPEC_FMULX))]
3322 "TARGET_SIMD" 3526 "TARGET_SIMD"
3323 { 3527 {
3324 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 3528 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3325 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 3529 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3326 } 3530 }
3327 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] 3531 [(set_attr "type" "neon_fp_mul_<Vetype><q>")]
3328 ) 3532 )
3329 3533
3353 (match_operand:VDQF 2 "register_operand" "w") 3557 (match_operand:VDQF 2 "register_operand" "w")
3354 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3558 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3355 UNSPEC_FMULX))] 3559 UNSPEC_FMULX))]
3356 "TARGET_SIMD" 3560 "TARGET_SIMD"
3357 { 3561 {
3358 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 3562 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
3359 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; 3563 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]";
3360 } 3564 }
3361 [(set_attr "type" "fmul<Vetype>")] 3565 [(set_attr "type" "fmul<Vetype>")]
3362 ) 3566 )
3363 ;; <su>q<addsub> 3567 ;; <su>q<addsub>
3439 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3643 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3440 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3644 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3441 VQDMULH))] 3645 VQDMULH))]
3442 "TARGET_SIMD" 3646 "TARGET_SIMD"
3443 "* 3647 "*
3444 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 3648 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3445 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3649 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3446 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3650 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3447 ) 3651 )
3448 3652
3449 (define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3653 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3454 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3658 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3455 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3659 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3456 VQDMULH))] 3660 VQDMULH))]
3457 "TARGET_SIMD" 3661 "TARGET_SIMD"
3458 "* 3662 "*
3459 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 3663 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3460 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" 3664 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";"
3461 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3665 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3462 ) 3666 )
3463 3667
3464 (define_insn "aarch64_sq<r>dmulh_lane<mode>" 3668 (define_insn "aarch64_sq<r>dmulh_lane<mode>"
3469 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 3673 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
3470 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3674 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3471 VQDMULH))] 3675 VQDMULH))]
3472 "TARGET_SIMD" 3676 "TARGET_SIMD"
3473 "* 3677 "*
3474 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 3678 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3475 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3679 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3476 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3680 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3477 ) 3681 )
3478 3682
3479 (define_insn "aarch64_sq<r>dmulh_laneq<mode>" 3683 (define_insn "aarch64_sq<r>dmulh_laneq<mode>"
3484 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 3688 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
3485 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] 3689 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))]
3486 VQDMULH))] 3690 VQDMULH))]
3487 "TARGET_SIMD" 3691 "TARGET_SIMD"
3488 "* 3692 "*
3489 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 3693 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3490 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" 3694 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";"
3491 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] 3695 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")]
3492 ) 3696 )
3493 3697
3494 ;; sqrdml[as]h. 3698 ;; sqrdml[as]h.
3516 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3720 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3517 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3721 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3518 SQRDMLH_AS))] 3722 SQRDMLH_AS))]
3519 "TARGET_SIMD_RDMA" 3723 "TARGET_SIMD_RDMA"
3520 { 3724 {
3521 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3725 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3522 return 3726 return
3523 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3727 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3524 } 3728 }
3525 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3729 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3526 ) 3730 )
3534 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 3738 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3535 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3739 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3536 SQRDMLH_AS))] 3740 SQRDMLH_AS))]
3537 "TARGET_SIMD_RDMA" 3741 "TARGET_SIMD_RDMA"
3538 { 3742 {
3539 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3743 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3540 return 3744 return
3541 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; 3745 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]";
3542 } 3746 }
3543 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3747 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3544 ) 3748 )
3554 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3758 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3555 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3759 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3556 SQRDMLH_AS))] 3760 SQRDMLH_AS))]
3557 "TARGET_SIMD_RDMA" 3761 "TARGET_SIMD_RDMA"
3558 { 3762 {
3559 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3763 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3560 return 3764 return
3561 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; 3765 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]";
3562 } 3766 }
3563 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3767 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3564 ) 3768 )
3572 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 3776 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3573 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] 3777 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))]
3574 SQRDMLH_AS))] 3778 SQRDMLH_AS))]
3575 "TARGET_SIMD_RDMA" 3779 "TARGET_SIMD_RDMA"
3576 { 3780 {
3577 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3781 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3578 return 3782 return
3579 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; 3783 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]";
3580 } 3784 }
3581 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3785 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3582 ) 3786 )
3616 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3820 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3617 )) 3821 ))
3618 (const_int 1))))] 3822 (const_int 1))))]
3619 "TARGET_SIMD" 3823 "TARGET_SIMD"
3620 { 3824 {
3621 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3825 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3622 return 3826 return
3623 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3827 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3624 } 3828 }
3625 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3626 ) 3830 )
3640 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3844 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3641 )) 3845 ))
3642 (const_int 1))))] 3846 (const_int 1))))]
3643 "TARGET_SIMD" 3847 "TARGET_SIMD"
3644 { 3848 {
3645 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3849 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3646 return 3850 return
3647 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3851 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3648 } 3852 }
3649 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3853 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3650 ) 3854 )
3663 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3867 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3664 ) 3868 )
3665 (const_int 1))))] 3869 (const_int 1))))]
3666 "TARGET_SIMD" 3870 "TARGET_SIMD"
3667 { 3871 {
3668 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3872 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3669 return 3873 return
3670 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3874 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3671 } 3875 }
3672 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3876 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3673 ) 3877 )
3686 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) 3890 (parallel [(match_operand:SI 4 "immediate_operand" "i")])))
3687 ) 3891 )
3688 (const_int 1))))] 3892 (const_int 1))))]
3689 "TARGET_SIMD" 3893 "TARGET_SIMD"
3690 { 3894 {
3691 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 3895 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3692 return 3896 return
3693 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3897 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3694 } 3898 }
3695 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3899 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3696 ) 3900 )
3741 (match_operand:<VWIDE> 1 "register_operand" "w") 3945 (match_operand:<VWIDE> 1 "register_operand" "w")
3742 (match_operand:VQ_HSI 2 "register_operand" "w") 3946 (match_operand:VQ_HSI 2 "register_operand" "w")
3743 (match_operand:VQ_HSI 3 "register_operand" "w")] 3947 (match_operand:VQ_HSI 3 "register_operand" "w")]
3744 "TARGET_SIMD" 3948 "TARGET_SIMD"
3745 { 3949 {
3746 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3950 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3747 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], 3951 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1],
3748 operands[2], operands[3], p)); 3952 operands[2], operands[3], p));
3749 DONE; 3953 DONE;
3750 }) 3954 })
3751 3955
3754 (match_operand:<VWIDE> 1 "register_operand" "w") 3958 (match_operand:<VWIDE> 1 "register_operand" "w")
3755 (match_operand:VQ_HSI 2 "register_operand" "w") 3959 (match_operand:VQ_HSI 2 "register_operand" "w")
3756 (match_operand:VQ_HSI 3 "register_operand" "w")] 3960 (match_operand:VQ_HSI 3 "register_operand" "w")]
3757 "TARGET_SIMD" 3961 "TARGET_SIMD"
3758 { 3962 {
3759 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 3963 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3760 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], 3964 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1],
3761 operands[2], operands[3], p)); 3965 operands[2], operands[3], p));
3762 DONE; 3966 DONE;
3763 }) 3967 })
3764 3968
3781 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 3985 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3782 )))) 3986 ))))
3783 (const_int 1))))] 3987 (const_int 1))))]
3784 "TARGET_SIMD" 3988 "TARGET_SIMD"
3785 { 3989 {
3786 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); 3990 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4]));
3787 return 3991 return
3788 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 3992 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3789 } 3993 }
3790 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 3994 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3791 ) 3995 )
3807 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) 4011 (parallel [(match_operand:SI 4 "immediate_operand" "i")])
3808 )))) 4012 ))))
3809 (const_int 1))))] 4013 (const_int 1))))]
3810 "TARGET_SIMD" 4014 "TARGET_SIMD"
3811 { 4015 {
3812 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); 4016 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4]));
3813 return 4017 return
3814 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; 4018 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]";
3815 } 4019 }
3816 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] 4020 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")]
3817 ) 4021 )
3822 (match_operand:VQ_HSI 2 "register_operand" "w") 4026 (match_operand:VQ_HSI 2 "register_operand" "w")
3823 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4027 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3824 (match_operand:SI 4 "immediate_operand" "i")] 4028 (match_operand:SI 4 "immediate_operand" "i")]
3825 "TARGET_SIMD" 4029 "TARGET_SIMD"
3826 { 4030 {
3827 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3828 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], 4032 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1],
3829 operands[2], operands[3], 4033 operands[2], operands[3],
3830 operands[4], p)); 4034 operands[4], p));
3831 DONE; 4035 DONE;
3832 }) 4036 })
3837 (match_operand:VQ_HSI 2 "register_operand" "w") 4041 (match_operand:VQ_HSI 2 "register_operand" "w")
3838 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4042 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3839 (match_operand:SI 4 "immediate_operand" "i")] 4043 (match_operand:SI 4 "immediate_operand" "i")]
3840 "TARGET_SIMD" 4044 "TARGET_SIMD"
3841 { 4045 {
3842 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4046 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3843 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], 4047 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1],
3844 operands[2], operands[3], 4048 operands[2], operands[3],
3845 operands[4], p)); 4049 operands[4], p));
3846 DONE; 4050 DONE;
3847 }) 4051 })
3852 (match_operand:VQ_HSI 2 "register_operand" "w") 4056 (match_operand:VQ_HSI 2 "register_operand" "w")
3853 (match_operand:<VCOND> 3 "register_operand" "<vwx>") 4057 (match_operand:<VCOND> 3 "register_operand" "<vwx>")
3854 (match_operand:SI 4 "immediate_operand" "i")] 4058 (match_operand:SI 4 "immediate_operand" "i")]
3855 "TARGET_SIMD" 4059 "TARGET_SIMD"
3856 { 4060 {
3857 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3858 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], 4062 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1],
3859 operands[2], operands[3], 4063 operands[2], operands[3],
3860 operands[4], p)); 4064 operands[4], p));
3861 DONE; 4065 DONE;
3862 }) 4066 })
3867 (match_operand:VQ_HSI 2 "register_operand" "w") 4071 (match_operand:VQ_HSI 2 "register_operand" "w")
3868 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") 4072 (match_operand:<VCONQ> 3 "register_operand" "<vwx>")
3869 (match_operand:SI 4 "immediate_operand" "i")] 4073 (match_operand:SI 4 "immediate_operand" "i")]
3870 "TARGET_SIMD" 4074 "TARGET_SIMD"
3871 { 4075 {
3872 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3873 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], 4077 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1],
3874 operands[2], operands[3], 4078 operands[2], operands[3],
3875 operands[4], p)); 4079 operands[4], p));
3876 DONE; 4080 DONE;
3877 }) 4081 })
3900 (match_operand:<VWIDE> 1 "register_operand" "w") 4104 (match_operand:<VWIDE> 1 "register_operand" "w")
3901 (match_operand:VQ_HSI 2 "register_operand" "w") 4105 (match_operand:VQ_HSI 2 "register_operand" "w")
3902 (match_operand:<VEL> 3 "register_operand" "w")] 4106 (match_operand:<VEL> 3 "register_operand" "w")]
3903 "TARGET_SIMD" 4107 "TARGET_SIMD"
3904 { 4108 {
3905 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4109 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3906 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], 4110 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1],
3907 operands[2], operands[3], 4111 operands[2], operands[3],
3908 p)); 4112 p));
3909 DONE; 4113 DONE;
3910 }) 4114 })
3914 (match_operand:<VWIDE> 1 "register_operand" "w") 4118 (match_operand:<VWIDE> 1 "register_operand" "w")
3915 (match_operand:VQ_HSI 2 "register_operand" "w") 4119 (match_operand:VQ_HSI 2 "register_operand" "w")
3916 (match_operand:<VEL> 3 "register_operand" "w")] 4120 (match_operand:<VEL> 3 "register_operand" "w")]
3917 "TARGET_SIMD" 4121 "TARGET_SIMD"
3918 { 4122 {
3919 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4123 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
3920 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], 4124 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1],
3921 operands[2], operands[3], 4125 operands[2], operands[3],
3922 p)); 4126 p));
3923 DONE; 4127 DONE;
3924 }) 4128 })
3954 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4158 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3955 )) 4159 ))
3956 (const_int 1)))] 4160 (const_int 1)))]
3957 "TARGET_SIMD" 4161 "TARGET_SIMD"
3958 { 4162 {
3959 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 4163 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
3960 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4164 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3961 } 4165 }
3962 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4166 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3963 ) 4167 )
3964 4168
3975 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4179 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
3976 )) 4180 ))
3977 (const_int 1)))] 4181 (const_int 1)))]
3978 "TARGET_SIMD" 4182 "TARGET_SIMD"
3979 { 4183 {
3980 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 4184 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
3981 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4185 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
3982 } 4186 }
3983 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4187 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
3984 ) 4188 )
3985 4189
3995 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4199 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
3996 )) 4200 ))
3997 (const_int 1)))] 4201 (const_int 1)))]
3998 "TARGET_SIMD" 4202 "TARGET_SIMD"
3999 { 4203 {
4000 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 4204 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4001 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4205 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4002 } 4206 }
4003 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4207 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4004 ) 4208 )
4005 4209
4015 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) 4219 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))
4016 )) 4220 ))
4017 (const_int 1)))] 4221 (const_int 1)))]
4018 "TARGET_SIMD" 4222 "TARGET_SIMD"
4019 { 4223 {
4020 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 4224 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4021 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4225 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4022 } 4226 }
4023 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4227 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4024 ) 4228 )
4025 4229
4068 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4272 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4069 (match_operand:VQ_HSI 1 "register_operand" "w") 4273 (match_operand:VQ_HSI 1 "register_operand" "w")
4070 (match_operand:VQ_HSI 2 "register_operand" "w")] 4274 (match_operand:VQ_HSI 2 "register_operand" "w")]
4071 "TARGET_SIMD" 4275 "TARGET_SIMD"
4072 { 4276 {
4073 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4277 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4074 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], 4278 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1],
4075 operands[2], p)); 4279 operands[2], p));
4076 DONE; 4280 DONE;
4077 }) 4281 })
4078 4282
4093 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4297 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4094 )) 4298 ))
4095 (const_int 1)))] 4299 (const_int 1)))]
4096 "TARGET_SIMD" 4300 "TARGET_SIMD"
4097 { 4301 {
4098 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); 4302 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3]));
4099 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4303 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4100 } 4304 }
4101 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4102 ) 4306 )
4103 4307
4116 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) 4320 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))
4117 )) 4321 ))
4118 (const_int 1)))] 4322 (const_int 1)))]
4119 "TARGET_SIMD" 4323 "TARGET_SIMD"
4120 { 4324 {
4121 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); 4325 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3]));
4122 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; 4326 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]";
4123 } 4327 }
4124 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] 4328 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")]
4125 ) 4329 )
4126 4330
4129 (match_operand:VQ_HSI 1 "register_operand" "w") 4333 (match_operand:VQ_HSI 1 "register_operand" "w")
4130 (match_operand:<VCOND> 2 "register_operand" "<vwx>") 4334 (match_operand:<VCOND> 2 "register_operand" "<vwx>")
4131 (match_operand:SI 3 "immediate_operand" "i")] 4335 (match_operand:SI 3 "immediate_operand" "i")]
4132 "TARGET_SIMD" 4336 "TARGET_SIMD"
4133 { 4337 {
4134 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4135 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], 4339 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1],
4136 operands[2], operands[3], 4340 operands[2], operands[3],
4137 p)); 4341 p));
4138 DONE; 4342 DONE;
4139 }) 4343 })
4143 (match_operand:VQ_HSI 1 "register_operand" "w") 4347 (match_operand:VQ_HSI 1 "register_operand" "w")
4144 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") 4348 (match_operand:<VCONQ> 2 "register_operand" "<vwx>")
4145 (match_operand:SI 3 "immediate_operand" "i")] 4349 (match_operand:SI 3 "immediate_operand" "i")]
4146 "TARGET_SIMD" 4350 "TARGET_SIMD"
4147 { 4351 {
4148 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4352 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4149 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], 4353 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1],
4150 operands[2], operands[3], 4354 operands[2], operands[3],
4151 p)); 4355 p));
4152 DONE; 4356 DONE;
4153 }) 4357 })
4176 [(match_operand:<VWIDE> 0 "register_operand" "=w") 4380 [(match_operand:<VWIDE> 0 "register_operand" "=w")
4177 (match_operand:VQ_HSI 1 "register_operand" "w") 4381 (match_operand:VQ_HSI 1 "register_operand" "w")
4178 (match_operand:<VEL> 2 "register_operand" "w")] 4382 (match_operand:<VEL> 2 "register_operand" "w")]
4179 "TARGET_SIMD" 4383 "TARGET_SIMD"
4180 { 4384 {
4181 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); 4385 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true);
4182 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], 4386 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1],
4183 operands[2], p)); 4387 operands[2], p));
4184 DONE; 4388 DONE;
4185 }) 4389 })
4186 4390
4341 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") 4545 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
4342 ))) 4546 )))
4343 (clobber (reg:CC CC_REGNUM))] 4547 (clobber (reg:CC CC_REGNUM))]
4344 "TARGET_SIMD" 4548 "TARGET_SIMD"
4345 "#" 4549 "#"
4346 "reload_completed" 4550 "&& reload_completed"
4347 [(set (match_operand:DI 0 "register_operand") 4551 [(set (match_operand:DI 0 "register_operand")
4348 (neg:DI 4552 (neg:DI
4349 (COMPARISONS:DI 4553 (COMPARISONS:DI
4350 (match_operand:DI 1 "register_operand") 4554 (match_operand:DI 1 "register_operand")
4351 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4555 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4404 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") 4608 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
4405 ))) 4609 )))
4406 (clobber (reg:CC CC_REGNUM))] 4610 (clobber (reg:CC CC_REGNUM))]
4407 "TARGET_SIMD" 4611 "TARGET_SIMD"
4408 "#" 4612 "#"
4409 "reload_completed" 4613 "&& reload_completed"
4410 [(set (match_operand:DI 0 "register_operand") 4614 [(set (match_operand:DI 0 "register_operand")
4411 (neg:DI 4615 (neg:DI
4412 (UCOMPARISONS:DI 4616 (UCOMPARISONS:DI
4413 (match_operand:DI 1 "register_operand") 4617 (match_operand:DI 1 "register_operand")
4414 (match_operand:DI 2 "aarch64_simd_reg_or_zero") 4618 (match_operand:DI 2 "aarch64_simd_reg_or_zero")
4475 (match_operand:DI 2 "register_operand" "w,r")) 4679 (match_operand:DI 2 "register_operand" "w,r"))
4476 (const_int 0)))) 4680 (const_int 0))))
4477 (clobber (reg:CC CC_REGNUM))] 4681 (clobber (reg:CC CC_REGNUM))]
4478 "TARGET_SIMD" 4682 "TARGET_SIMD"
4479 "#" 4683 "#"
4480 "reload_completed" 4684 "&& reload_completed"
4481 [(set (match_operand:DI 0 "register_operand") 4685 [(set (match_operand:DI 0 "register_operand")
4482 (neg:DI 4686 (neg:DI
4483 (ne:DI 4687 (ne:DI
4484 (and:DI 4688 (and:DI
4485 (match_operand:DI 1 "register_operand") 4689 (match_operand:DI 1 "register_operand")
4622 (match_operand:SI 3 "immediate_operand" "i") 4826 (match_operand:SI 3 "immediate_operand" "i")
4623 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] 4827 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ]
4624 UNSPEC_LD2_LANE))] 4828 UNSPEC_LD2_LANE))]
4625 "TARGET_SIMD" 4829 "TARGET_SIMD"
4626 { 4830 {
4627 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 4831 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4628 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; 4832 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1";
4629 } 4833 }
4630 [(set_attr "type" "neon_load2_one_lane")] 4834 [(set_attr "type" "neon_load2_one_lane")]
4631 ) 4835 )
4632 4836
4638 "TARGET_SIMD" 4842 "TARGET_SIMD"
4639 { 4843 {
4640 if (BYTES_BIG_ENDIAN) 4844 if (BYTES_BIG_ENDIAN)
4641 { 4845 {
4642 rtx tmp = gen_reg_rtx (OImode); 4846 rtx tmp = gen_reg_rtx (OImode);
4643 rtx mask = aarch64_reverse_mask (<MODE>mode); 4847 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4644 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); 4848 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1]));
4645 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); 4849 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask));
4646 } 4850 }
4647 else 4851 else
4648 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); 4852 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1]));
4666 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4870 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4667 (match_operand:SI 2 "immediate_operand" "i")] 4871 (match_operand:SI 2 "immediate_operand" "i")]
4668 UNSPEC_ST2_LANE))] 4872 UNSPEC_ST2_LANE))]
4669 "TARGET_SIMD" 4873 "TARGET_SIMD"
4670 { 4874 {
4671 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 4875 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4672 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; 4876 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0";
4673 } 4877 }
4674 [(set_attr "type" "neon_store2_one_lane<q>")] 4878 [(set_attr "type" "neon_store2_one_lane<q>")]
4675 ) 4879 )
4676 4880
4682 "TARGET_SIMD" 4886 "TARGET_SIMD"
4683 { 4887 {
4684 if (BYTES_BIG_ENDIAN) 4888 if (BYTES_BIG_ENDIAN)
4685 { 4889 {
4686 rtx tmp = gen_reg_rtx (OImode); 4890 rtx tmp = gen_reg_rtx (OImode);
4687 rtx mask = aarch64_reverse_mask (<MODE>mode); 4891 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4688 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); 4892 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask));
4689 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); 4893 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp));
4690 } 4894 }
4691 else 4895 else
4692 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); 4896 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1]));
4720 (match_operand:SI 3 "immediate_operand" "i") 4924 (match_operand:SI 3 "immediate_operand" "i")
4721 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 4925 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4722 UNSPEC_LD3_LANE))] 4926 UNSPEC_LD3_LANE))]
4723 "TARGET_SIMD" 4927 "TARGET_SIMD"
4724 { 4928 {
4725 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 4929 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4726 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; 4930 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1";
4727 } 4931 }
4728 [(set_attr "type" "neon_load3_one_lane")] 4932 [(set_attr "type" "neon_load3_one_lane")]
4729 ) 4933 )
4730 4934
4736 "TARGET_SIMD" 4940 "TARGET_SIMD"
4737 { 4941 {
4738 if (BYTES_BIG_ENDIAN) 4942 if (BYTES_BIG_ENDIAN)
4739 { 4943 {
4740 rtx tmp = gen_reg_rtx (CImode); 4944 rtx tmp = gen_reg_rtx (CImode);
4741 rtx mask = aarch64_reverse_mask (<MODE>mode); 4945 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4742 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); 4946 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1]));
4743 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); 4947 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask));
4744 } 4948 }
4745 else 4949 else
4746 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); 4950 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1]));
4764 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 4968 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4765 (match_operand:SI 2 "immediate_operand" "i")] 4969 (match_operand:SI 2 "immediate_operand" "i")]
4766 UNSPEC_ST3_LANE))] 4970 UNSPEC_ST3_LANE))]
4767 "TARGET_SIMD" 4971 "TARGET_SIMD"
4768 { 4972 {
4769 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 4973 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4770 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; 4974 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0";
4771 } 4975 }
4772 [(set_attr "type" "neon_store3_one_lane<q>")] 4976 [(set_attr "type" "neon_store3_one_lane<q>")]
4773 ) 4977 )
4774 4978
4780 "TARGET_SIMD" 4984 "TARGET_SIMD"
4781 { 4985 {
4782 if (BYTES_BIG_ENDIAN) 4986 if (BYTES_BIG_ENDIAN)
4783 { 4987 {
4784 rtx tmp = gen_reg_rtx (CImode); 4988 rtx tmp = gen_reg_rtx (CImode);
4785 rtx mask = aarch64_reverse_mask (<MODE>mode); 4989 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4786 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); 4990 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask));
4787 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); 4991 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp));
4788 } 4992 }
4789 else 4993 else
4790 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); 4994 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1]));
4818 (match_operand:SI 3 "immediate_operand" "i") 5022 (match_operand:SI 3 "immediate_operand" "i")
4819 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] 5023 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
4820 UNSPEC_LD4_LANE))] 5024 UNSPEC_LD4_LANE))]
4821 "TARGET_SIMD" 5025 "TARGET_SIMD"
4822 { 5026 {
4823 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); 5027 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3]));
4824 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; 5028 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1";
4825 } 5029 }
4826 [(set_attr "type" "neon_load4_one_lane")] 5030 [(set_attr "type" "neon_load4_one_lane")]
4827 ) 5031 )
4828 5032
4834 "TARGET_SIMD" 5038 "TARGET_SIMD"
4835 { 5039 {
4836 if (BYTES_BIG_ENDIAN) 5040 if (BYTES_BIG_ENDIAN)
4837 { 5041 {
4838 rtx tmp = gen_reg_rtx (XImode); 5042 rtx tmp = gen_reg_rtx (XImode);
4839 rtx mask = aarch64_reverse_mask (<MODE>mode); 5043 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4840 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); 5044 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1]));
4841 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); 5045 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask));
4842 } 5046 }
4843 else 5047 else
4844 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); 5048 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1]));
4862 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) 5066 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)
4863 (match_operand:SI 2 "immediate_operand" "i")] 5067 (match_operand:SI 2 "immediate_operand" "i")]
4864 UNSPEC_ST4_LANE))] 5068 UNSPEC_ST4_LANE))]
4865 "TARGET_SIMD" 5069 "TARGET_SIMD"
4866 { 5070 {
4867 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); 5071 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2]));
4868 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; 5072 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0";
4869 } 5073 }
4870 [(set_attr "type" "neon_store4_one_lane<q>")] 5074 [(set_attr "type" "neon_store4_one_lane<q>")]
4871 ) 5075 )
4872 5076
4878 "TARGET_SIMD" 5082 "TARGET_SIMD"
4879 { 5083 {
4880 if (BYTES_BIG_ENDIAN) 5084 if (BYTES_BIG_ENDIAN)
4881 { 5085 {
4882 rtx tmp = gen_reg_rtx (XImode); 5086 rtx tmp = gen_reg_rtx (XImode);
4883 rtx mask = aarch64_reverse_mask (<MODE>mode); 5087 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>);
4884 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); 5088 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask));
4885 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); 5089 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp));
4886 } 5090 }
4887 else 5091 else
4888 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); 5092 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1]));
4925 { 5129 {
4926 if (GET_CODE (operands[0]) != REG) 5130 if (GET_CODE (operands[0]) != REG)
4927 operands[1] = force_reg (<MODE>mode, operands[1]); 5131 operands[1] = force_reg (<MODE>mode, operands[1]);
4928 } 5132 }
4929 }) 5133 })
5134
5135
5136 (define_expand "aarch64_ld1x3<VALLDIF:mode>"
5137 [(match_operand:CI 0 "register_operand" "=w")
5138 (match_operand:DI 1 "register_operand" "r")
5139 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5140 "TARGET_SIMD"
5141 {
5142 rtx mem = gen_rtx_MEM (CImode, operands[1]);
5143 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem));
5144 DONE;
5145 })
5146
5147 (define_insn "aarch64_ld1_x3_<mode>"
5148 [(set (match_operand:CI 0 "register_operand" "=w")
5149 (unspec:CI
5150 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv")
5151 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))]
5152 "TARGET_SIMD"
5153 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1"
5154 [(set_attr "type" "neon_load1_3reg<q>")]
5155 )
5156
5157 (define_expand "aarch64_st1x2<VALLDIF:mode>"
5158 [(match_operand:DI 0 "register_operand" "")
5159 (match_operand:OI 1 "register_operand" "")
5160 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5161 "TARGET_SIMD"
5162 {
5163 rtx mem = gen_rtx_MEM (OImode, operands[0]);
5164 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1]));
5165 DONE;
5166 })
5167
5168 (define_insn "aarch64_st1_x2_<mode>"
5169 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv")
5170 (unspec:OI
5171 [(match_operand:OI 1 "register_operand" "w")
5172 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5173 "TARGET_SIMD"
5174 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0"
5175 [(set_attr "type" "neon_store1_2reg<q>")]
5176 )
5177
5178 (define_expand "aarch64_st1x3<VALLDIF:mode>"
5179 [(match_operand:DI 0 "register_operand" "")
5180 (match_operand:CI 1 "register_operand" "")
5181 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5182 "TARGET_SIMD"
5183 {
5184 rtx mem = gen_rtx_MEM (CImode, operands[0]);
5185 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1]));
5186 DONE;
5187 })
5188
5189 (define_insn "aarch64_st1_x3_<mode>"
5190 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv")
5191 (unspec:CI
5192 [(match_operand:CI 1 "register_operand" "w")
5193 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))]
5194 "TARGET_SIMD"
5195 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0"
5196 [(set_attr "type" "neon_store1_3reg<q>")]
5197 )
4930 5198
4931 (define_insn "*aarch64_mov<mode>" 5199 (define_insn "*aarch64_mov<mode>"
4932 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") 5200 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w")
4933 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] 5201 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))]
4934 "TARGET_SIMD && !BYTES_BIG_ENDIAN 5202 "TARGET_SIMD && !BYTES_BIG_ENDIAN
5174 5442
5175 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); 5443 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem));
5176 DONE; 5444 DONE;
5177 }) 5445 })
5178 5446
5447 (define_expand "aarch64_ld1x2<VQ:mode>"
5448 [(match_operand:OI 0 "register_operand" "=w")
5449 (match_operand:DI 1 "register_operand" "r")
5450 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5451 "TARGET_SIMD"
5452 {
5453 machine_mode mode = OImode;
5454 rtx mem = gen_rtx_MEM (mode, operands[1]);
5455
5456 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem));
5457 DONE;
5458 })
5459
5460 (define_expand "aarch64_ld1x2<VDC:mode>"
5461 [(match_operand:OI 0 "register_operand" "=w")
5462 (match_operand:DI 1 "register_operand" "r")
5463 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5464 "TARGET_SIMD"
5465 {
5466 machine_mode mode = OImode;
5467 rtx mem = gen_rtx_MEM (mode, operands[1]);
5468
5469 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem));
5470 DONE;
5471 })
5472
5473
5179 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" 5474 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>"
5180 [(match_operand:VSTRUCT 0 "register_operand" "=w") 5475 [(match_operand:VSTRUCT 0 "register_operand" "=w")
5181 (match_operand:DI 1 "register_operand" "w") 5476 (match_operand:DI 1 "register_operand" "w")
5182 (match_operand:VSTRUCT 2 "register_operand" "0") 5477 (match_operand:VSTRUCT 2 "register_operand" "0")
5183 (match_operand:SI 3 "immediate_operand" "i") 5478 (match_operand:SI 3 "immediate_operand" "i")
5186 { 5481 {
5187 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); 5482 rtx mem = gen_rtx_MEM (BLKmode, operands[1]);
5188 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) 5483 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode))
5189 * <VSTRUCT:nregs>); 5484 * <VSTRUCT:nregs>);
5190 5485
5191 aarch64_simd_lane_bounds (operands[3], 0, 5486 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL);
5192 GET_MODE_NUNITS (<VALLDIF:MODE>mode),
5193 NULL);
5194 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( 5487 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> (
5195 operands[0], mem, operands[2], operands[3])); 5488 operands[0], mem, operands[2], operands[3]));
5196 DONE; 5489 DONE;
5197 }) 5490 })
5198 5491
5235 ;; Permuted-store expanders for neon intrinsics. 5528 ;; Permuted-store expanders for neon intrinsics.
5236 5529
5237 ;; Permute instructions 5530 ;; Permute instructions
5238 5531
5239 ;; vec_perm support 5532 ;; vec_perm support
5240
5241 (define_expand "vec_perm_const<mode>"
5242 [(match_operand:VALL_F16 0 "register_operand")
5243 (match_operand:VALL_F16 1 "register_operand")
5244 (match_operand:VALL_F16 2 "register_operand")
5245 (match_operand:<V_INT_EQUIV> 3)]
5246 "TARGET_SIMD"
5247 {
5248 if (aarch64_expand_vec_perm_const (operands[0], operands[1],
5249 operands[2], operands[3]))
5250 DONE;
5251 else
5252 FAIL;
5253 })
5254 5533
5255 (define_expand "vec_perm<mode>" 5534 (define_expand "vec_perm<mode>"
5256 [(match_operand:VB 0 "register_operand") 5535 [(match_operand:VB 0 "register_operand")
5257 (match_operand:VB 1 "register_operand") 5536 (match_operand:VB 1 "register_operand")
5258 (match_operand:VB 2 "register_operand") 5537 (match_operand:VB 2 "register_operand")
5259 (match_operand:VB 3 "register_operand")] 5538 (match_operand:VB 3 "register_operand")]
5260 "TARGET_SIMD" 5539 "TARGET_SIMD"
5261 { 5540 {
5262 aarch64_expand_vec_perm (operands[0], operands[1], 5541 aarch64_expand_vec_perm (operands[0], operands[1],
5263 operands[2], operands[3]); 5542 operands[2], operands[3], <nunits>);
5264 DONE; 5543 DONE;
5265 }) 5544 })
5266 5545
5267 (define_insn "aarch64_tbl1<mode>" 5546 (define_insn "aarch64_tbl1<mode>"
5268 [(set (match_operand:VB 0 "register_operand" "=w") 5547 [(set (match_operand:VB 0 "register_operand" "=w")
5367 DONE; 5646 DONE;
5368 } 5647 }
5369 [(set_attr "type" "multiple")] 5648 [(set_attr "type" "multiple")]
5370 ) 5649 )
5371 5650
5651 ;; This instruction's pattern is generated directly by
5652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5653 ;; need corresponding changes there.
5372 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" 5654 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>"
5373 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5655 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5374 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5656 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5375 (match_operand:VALL_F16 2 "register_operand" "w")] 5657 (match_operand:VALL_F16 2 "register_operand" "w")]
5376 PERMUTE))] 5658 PERMUTE))]
5377 "TARGET_SIMD" 5659 "TARGET_SIMD"
5378 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" 5660 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
5379 [(set_attr "type" "neon_permute<q>")] 5661 [(set_attr "type" "neon_permute<q>")]
5380 ) 5662 )
5381 5663
5382 ;; Note immediate (third) operand is lane index not byte index. 5664 ;; This instruction's pattern is generated directly by
5665 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5666 ;; need corresponding changes there. Note that the immediate (third)
5667 ;; operand is a lane index not a byte index.
5383 (define_insn "aarch64_ext<mode>" 5668 (define_insn "aarch64_ext<mode>"
5384 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5669 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5385 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") 5670 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
5386 (match_operand:VALL_F16 2 "register_operand" "w") 5671 (match_operand:VALL_F16 2 "register_operand" "w")
5387 (match_operand:SI 3 "immediate_operand" "i")] 5672 (match_operand:SI 3 "immediate_operand" "i")]
5393 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; 5678 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3";
5394 } 5679 }
5395 [(set_attr "type" "neon_ext<q>")] 5680 [(set_attr "type" "neon_ext<q>")]
5396 ) 5681 )
5397 5682
5683 ;; This instruction's pattern is generated directly by
5684 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
5685 ;; need corresponding changes there.
5398 (define_insn "aarch64_rev<REVERSE:rev_op><mode>" 5686 (define_insn "aarch64_rev<REVERSE:rev_op><mode>"
5399 [(set (match_operand:VALL_F16 0 "register_operand" "=w") 5687 [(set (match_operand:VALL_F16 0 "register_operand" "=w")
5400 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] 5688 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
5401 REVERSE))] 5689 REVERSE))]
5402 "TARGET_SIMD" 5690 "TARGET_SIMD"
5561 "TARGET_SIMD" 5849 "TARGET_SIMD"
5562 "ld1r\\t{%0.<Vtype>}, %1" 5850 "ld1r\\t{%0.<Vtype>}, %1"
5563 [(set_attr "type" "neon_load1_all_lanes")] 5851 [(set_attr "type" "neon_load1_all_lanes")]
5564 ) 5852 )
5565 5853
5566 (define_insn "aarch64_frecpe<mode>" 5854 (define_insn "aarch64_simd_ld1<mode>_x2"
5567 [(set (match_operand:VHSDF 0 "register_operand" "=w") 5855 [(set (match_operand:OI 0 "register_operand" "=w")
5568 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] 5856 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5857 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5858 UNSPEC_LD1))]
5859 "TARGET_SIMD"
5860 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5861 [(set_attr "type" "neon_load1_2reg<q>")]
5862 )
5863
5864 (define_insn "aarch64_simd_ld1<mode>_x2"
5865 [(set (match_operand:OI 0 "register_operand" "=w")
5866 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv")
5867 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5868 UNSPEC_LD1))]
5869 "TARGET_SIMD"
5870 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1"
5871 [(set_attr "type" "neon_load1_2reg<q>")]
5872 )
5873
5874
5875 (define_insn "@aarch64_frecpe<mode>"
5876 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5877 (unspec:VHSDF_HSDF
5878 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")]
5569 UNSPEC_FRECPE))] 5879 UNSPEC_FRECPE))]
5570 "TARGET_SIMD" 5880 "TARGET_SIMD"
5571 "frecpe\\t%0.<Vtype>, %1.<Vtype>" 5881 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>"
5572 [(set_attr "type" "neon_fp_recpe_<stype><q>")] 5882 [(set_attr "type" "neon_fp_recpe_<stype><q>")]
5573 ) 5883 )
5574 5884
5575 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" 5885 (define_insn "aarch64_frecpx<mode>"
5576 [(set (match_operand:GPF_F16 0 "register_operand" "=w") 5886 [(set (match_operand:GPF_F16 0 "register_operand" "=w")
5577 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] 5887 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")]
5578 FRECP))] 5888 UNSPEC_FRECPX))]
5579 "TARGET_SIMD" 5889 "TARGET_SIMD"
5580 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" 5890 "frecpx\t%<s>0, %<s>1"
5581 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")] 5891 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")]
5582 ) 5892 )
5583 5893
5584 (define_insn "aarch64_frecps<mode>" 5894 (define_insn "@aarch64_frecps<mode>"
5585 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") 5895 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w")
5586 (unspec:VHSDF_HSDF 5896 (unspec:VHSDF_HSDF
5587 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") 5897 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")
5588 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] 5898 (match_operand:VHSDF_HSDF 2 "register_operand" "w")]
5589 UNSPEC_FRECPS))] 5899 UNSPEC_FRECPS))]
5615 5925
5616 ;; aes 5926 ;; aes
5617 5927
5618 (define_insn "aarch64_crypto_aes<aes_op>v16qi" 5928 (define_insn "aarch64_crypto_aes<aes_op>v16qi"
5619 [(set (match_operand:V16QI 0 "register_operand" "=w") 5929 [(set (match_operand:V16QI 0 "register_operand" "=w")
5620 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") 5930 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0")
5621 (match_operand:V16QI 2 "register_operand" "w")] 5931 (match_operand:V16QI 2 "register_operand" "w")]
5622 CRYPTO_AES))] 5932 CRYPTO_AES))]
5623 "TARGET_SIMD && TARGET_CRYPTO" 5933 "TARGET_SIMD && TARGET_AES"
5934 "aes<aes_op>\\t%0.16b, %2.16b"
5935 [(set_attr "type" "crypto_aese")]
5936 )
5937
5938 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5939 [(set (match_operand:V16QI 0 "register_operand" "=w")
5940 (unspec:V16QI [(xor:V16QI
5941 (match_operand:V16QI 1 "register_operand" "%0")
5942 (match_operand:V16QI 2 "register_operand" "w"))
5943 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")]
5944 CRYPTO_AES))]
5945 "TARGET_SIMD && TARGET_AES"
5946 "aes<aes_op>\\t%0.16b, %2.16b"
5947 [(set_attr "type" "crypto_aese")]
5948 )
5949
5950 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine"
5951 [(set (match_operand:V16QI 0 "register_operand" "=w")
5952 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "")
5953 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0")
5954 (match_operand:V16QI 2 "register_operand" "w"))]
5955 CRYPTO_AES))]
5956 "TARGET_SIMD && TARGET_AES"
5624 "aes<aes_op>\\t%0.16b, %2.16b" 5957 "aes<aes_op>\\t%0.16b, %2.16b"
5625 [(set_attr "type" "crypto_aese")] 5958 [(set_attr "type" "crypto_aese")]
5626 ) 5959 )
5627 5960
5628 ;; When AES/AESMC fusion is enabled we want the register allocation to 5961 ;; When AES/AESMC fusion is enabled we want the register allocation to
5633 5966
5634 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi" 5967 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi"
5635 [(set (match_operand:V16QI 0 "register_operand" "=w,w") 5968 [(set (match_operand:V16QI 0 "register_operand" "=w,w")
5636 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] 5969 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")]
5637 CRYPTO_AESMC))] 5970 CRYPTO_AESMC))]
5638 "TARGET_SIMD && TARGET_CRYPTO" 5971 "TARGET_SIMD && TARGET_AES"
5639 "aes<aesmc_op>\\t%0.16b, %1.16b" 5972 "aes<aesmc_op>\\t%0.16b, %1.16b"
5640 [(set_attr "type" "crypto_aesmc") 5973 [(set_attr "type" "crypto_aesmc")
5641 (set_attr_alternative "enabled" 5974 (set_attr_alternative "enabled"
5642 [(if_then_else (match_test 5975 [(if_then_else (match_test
5643 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") 5976 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)")
5644 (const_string "yes" ) 5977 (const_string "yes" )
5645 (const_string "no")) 5978 (const_string "no"))
5646 (const_string "yes")])] 5979 (const_string "yes")])]
5647 ) 5980 )
5648 5981
5982 ;; When AESE/AESMC fusion is enabled we really want to keep the two together
5983 ;; and enforce the register dependency without scheduling or register
5984 ;; allocation messing up the order or introducing moves inbetween.
5985 ;; Mash the two together during combine.
5986
5987 (define_insn "*aarch64_crypto_aese_fused"
5988 [(set (match_operand:V16QI 0 "register_operand" "=&w")
5989 (unspec:V16QI
5990 [(unspec:V16QI
5991 [(match_operand:V16QI 1 "register_operand" "0")
5992 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE)
5993 ] UNSPEC_AESMC))]
5994 "TARGET_SIMD && TARGET_AES
5995 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
5996 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b"
5997 [(set_attr "type" "crypto_aese")
5998 (set_attr "length" "8")]
5999 )
6000
6001 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together
6002 ;; and enforce the register dependency without scheduling or register
6003 ;; allocation messing up the order or introducing moves inbetween.
6004 ;; Mash the two together during combine.
6005
6006 (define_insn "*aarch64_crypto_aesd_fused"
6007 [(set (match_operand:V16QI 0 "register_operand" "=&w")
6008 (unspec:V16QI
6009 [(unspec:V16QI
6010 [(match_operand:V16QI 1 "register_operand" "0")
6011 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD)
6012 ] UNSPEC_AESIMC))]
6013 "TARGET_SIMD && TARGET_AES
6014 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
6015 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b"
6016 [(set_attr "type" "crypto_aese")
6017 (set_attr "length" "8")]
6018 )
6019
5649 ;; sha1 6020 ;; sha1
5650 6021
5651 (define_insn "aarch64_crypto_sha1hsi" 6022 (define_insn "aarch64_crypto_sha1hsi"
5652 [(set (match_operand:SI 0 "register_operand" "=w") 6023 [(set (match_operand:SI 0 "register_operand" "=w")
5653 (unspec:SI [(match_operand:SI 1 6024 (unspec:SI [(match_operand:SI 1
5654 "register_operand" "w")] 6025 "register_operand" "w")]
5655 UNSPEC_SHA1H))] 6026 UNSPEC_SHA1H))]
5656 "TARGET_SIMD && TARGET_CRYPTO" 6027 "TARGET_SIMD && TARGET_SHA2"
5657 "sha1h\\t%s0, %s1" 6028 "sha1h\\t%s0, %s1"
5658 [(set_attr "type" "crypto_sha1_fast")] 6029 [(set_attr "type" "crypto_sha1_fast")]
5659 ) 6030 )
5660 6031
5661 (define_insn "aarch64_crypto_sha1hv4si" 6032 (define_insn "aarch64_crypto_sha1hv4si"
5662 [(set (match_operand:SI 0 "register_operand" "=w") 6033 [(set (match_operand:SI 0 "register_operand" "=w")
5663 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 6034 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5664 (parallel [(const_int 0)]))] 6035 (parallel [(const_int 0)]))]
5665 UNSPEC_SHA1H))] 6036 UNSPEC_SHA1H))]
5666 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN" 6037 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN"
5667 "sha1h\\t%s0, %s1" 6038 "sha1h\\t%s0, %s1"
5668 [(set_attr "type" "crypto_sha1_fast")] 6039 [(set_attr "type" "crypto_sha1_fast")]
5669 ) 6040 )
5670 6041
5671 (define_insn "aarch64_be_crypto_sha1hv4si" 6042 (define_insn "aarch64_be_crypto_sha1hv4si"
5672 [(set (match_operand:SI 0 "register_operand" "=w") 6043 [(set (match_operand:SI 0 "register_operand" "=w")
5673 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") 6044 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w")
5674 (parallel [(const_int 3)]))] 6045 (parallel [(const_int 3)]))]
5675 UNSPEC_SHA1H))] 6046 UNSPEC_SHA1H))]
5676 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN" 6047 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN"
5677 "sha1h\\t%s0, %s1" 6048 "sha1h\\t%s0, %s1"
5678 [(set_attr "type" "crypto_sha1_fast")] 6049 [(set_attr "type" "crypto_sha1_fast")]
5679 ) 6050 )
5680 6051
5681 (define_insn "aarch64_crypto_sha1su1v4si" 6052 (define_insn "aarch64_crypto_sha1su1v4si"
5682 [(set (match_operand:V4SI 0 "register_operand" "=w") 6053 [(set (match_operand:V4SI 0 "register_operand" "=w")
5683 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6054 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5684 (match_operand:V4SI 2 "register_operand" "w")] 6055 (match_operand:V4SI 2 "register_operand" "w")]
5685 UNSPEC_SHA1SU1))] 6056 UNSPEC_SHA1SU1))]
5686 "TARGET_SIMD && TARGET_CRYPTO" 6057 "TARGET_SIMD && TARGET_SHA2"
5687 "sha1su1\\t%0.4s, %2.4s" 6058 "sha1su1\\t%0.4s, %2.4s"
5688 [(set_attr "type" "crypto_sha1_fast")] 6059 [(set_attr "type" "crypto_sha1_fast")]
5689 ) 6060 )
5690 6061
5691 (define_insn "aarch64_crypto_sha1<sha1_op>v4si" 6062 (define_insn "aarch64_crypto_sha1<sha1_op>v4si"
5692 [(set (match_operand:V4SI 0 "register_operand" "=w") 6063 [(set (match_operand:V4SI 0 "register_operand" "=w")
5693 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6064 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5694 (match_operand:SI 2 "register_operand" "w") 6065 (match_operand:SI 2 "register_operand" "w")
5695 (match_operand:V4SI 3 "register_operand" "w")] 6066 (match_operand:V4SI 3 "register_operand" "w")]
5696 CRYPTO_SHA1))] 6067 CRYPTO_SHA1))]
5697 "TARGET_SIMD && TARGET_CRYPTO" 6068 "TARGET_SIMD && TARGET_SHA2"
5698 "sha1<sha1_op>\\t%q0, %s2, %3.4s" 6069 "sha1<sha1_op>\\t%q0, %s2, %3.4s"
5699 [(set_attr "type" "crypto_sha1_slow")] 6070 [(set_attr "type" "crypto_sha1_slow")]
5700 ) 6071 )
5701 6072
5702 (define_insn "aarch64_crypto_sha1su0v4si" 6073 (define_insn "aarch64_crypto_sha1su0v4si"
5703 [(set (match_operand:V4SI 0 "register_operand" "=w") 6074 [(set (match_operand:V4SI 0 "register_operand" "=w")
5704 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6075 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5705 (match_operand:V4SI 2 "register_operand" "w") 6076 (match_operand:V4SI 2 "register_operand" "w")
5706 (match_operand:V4SI 3 "register_operand" "w")] 6077 (match_operand:V4SI 3 "register_operand" "w")]
5707 UNSPEC_SHA1SU0))] 6078 UNSPEC_SHA1SU0))]
5708 "TARGET_SIMD && TARGET_CRYPTO" 6079 "TARGET_SIMD && TARGET_SHA2"
5709 "sha1su0\\t%0.4s, %2.4s, %3.4s" 6080 "sha1su0\\t%0.4s, %2.4s, %3.4s"
5710 [(set_attr "type" "crypto_sha1_xor")] 6081 [(set_attr "type" "crypto_sha1_xor")]
5711 ) 6082 )
5712 6083
5713 ;; sha256 6084 ;; sha256
5716 [(set (match_operand:V4SI 0 "register_operand" "=w") 6087 [(set (match_operand:V4SI 0 "register_operand" "=w")
5717 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6088 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5718 (match_operand:V4SI 2 "register_operand" "w") 6089 (match_operand:V4SI 2 "register_operand" "w")
5719 (match_operand:V4SI 3 "register_operand" "w")] 6090 (match_operand:V4SI 3 "register_operand" "w")]
5720 CRYPTO_SHA256))] 6091 CRYPTO_SHA256))]
5721 "TARGET_SIMD && TARGET_CRYPTO" 6092 "TARGET_SIMD && TARGET_SHA2"
5722 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" 6093 "sha256h<sha256_op>\\t%q0, %q2, %3.4s"
5723 [(set_attr "type" "crypto_sha256_slow")] 6094 [(set_attr "type" "crypto_sha256_slow")]
5724 ) 6095 )
5725 6096
5726 (define_insn "aarch64_crypto_sha256su0v4si" 6097 (define_insn "aarch64_crypto_sha256su0v4si"
5727 [(set (match_operand:V4SI 0 "register_operand" "=w") 6098 [(set (match_operand:V4SI 0 "register_operand" "=w")
5728 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6099 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5729 (match_operand:V4SI 2 "register_operand" "w")] 6100 (match_operand:V4SI 2 "register_operand" "w")]
5730 UNSPEC_SHA256SU0))] 6101 UNSPEC_SHA256SU0))]
5731 "TARGET_SIMD &&TARGET_CRYPTO" 6102 "TARGET_SIMD && TARGET_SHA2"
5732 "sha256su0\\t%0.4s, %2.4s" 6103 "sha256su0\\t%0.4s, %2.4s"
5733 [(set_attr "type" "crypto_sha256_fast")] 6104 [(set_attr "type" "crypto_sha256_fast")]
5734 ) 6105 )
5735 6106
5736 (define_insn "aarch64_crypto_sha256su1v4si" 6107 (define_insn "aarch64_crypto_sha256su1v4si"
5737 [(set (match_operand:V4SI 0 "register_operand" "=w") 6108 [(set (match_operand:V4SI 0 "register_operand" "=w")
5738 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") 6109 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
5739 (match_operand:V4SI 2 "register_operand" "w") 6110 (match_operand:V4SI 2 "register_operand" "w")
5740 (match_operand:V4SI 3 "register_operand" "w")] 6111 (match_operand:V4SI 3 "register_operand" "w")]
5741 UNSPEC_SHA256SU1))] 6112 UNSPEC_SHA256SU1))]
5742 "TARGET_SIMD &&TARGET_CRYPTO" 6113 "TARGET_SIMD && TARGET_SHA2"
5743 "sha256su1\\t%0.4s, %2.4s, %3.4s" 6114 "sha256su1\\t%0.4s, %2.4s, %3.4s"
5744 [(set_attr "type" "crypto_sha256_slow")] 6115 [(set_attr "type" "crypto_sha256_slow")]
6116 )
6117
6118 ;; sha512
6119
6120 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di"
6121 [(set (match_operand:V2DI 0 "register_operand" "=w")
6122 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6123 (match_operand:V2DI 2 "register_operand" "w")
6124 (match_operand:V2DI 3 "register_operand" "w")]
6125 CRYPTO_SHA512))]
6126 "TARGET_SIMD && TARGET_SHA3"
6127 "sha512h<sha512_op>\\t%q0, %q2, %3.2d"
6128 [(set_attr "type" "crypto_sha512")]
6129 )
6130
6131 (define_insn "aarch64_crypto_sha512su0qv2di"
6132 [(set (match_operand:V2DI 0 "register_operand" "=w")
6133 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6134 (match_operand:V2DI 2 "register_operand" "w")]
6135 UNSPEC_SHA512SU0))]
6136 "TARGET_SIMD && TARGET_SHA3"
6137 "sha512su0\\t%0.2d, %2.2d"
6138 [(set_attr "type" "crypto_sha512")]
6139 )
6140
6141 (define_insn "aarch64_crypto_sha512su1qv2di"
6142 [(set (match_operand:V2DI 0 "register_operand" "=w")
6143 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
6144 (match_operand:V2DI 2 "register_operand" "w")
6145 (match_operand:V2DI 3 "register_operand" "w")]
6146 UNSPEC_SHA512SU1))]
6147 "TARGET_SIMD && TARGET_SHA3"
6148 "sha512su1\\t%0.2d, %2.2d, %3.2d"
6149 [(set_attr "type" "crypto_sha512")]
6150 )
6151
6152 ;; sha3
6153
6154 (define_insn "eor3q<mode>4"
6155 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6156 (xor:VQ_I
6157 (xor:VQ_I
6158 (match_operand:VQ_I 2 "register_operand" "w")
6159 (match_operand:VQ_I 3 "register_operand" "w"))
6160 (match_operand:VQ_I 1 "register_operand" "w")))]
6161 "TARGET_SIMD && TARGET_SHA3"
6162 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
6163 [(set_attr "type" "crypto_sha3")]
6164 )
6165
6166 (define_insn "aarch64_rax1qv2di"
6167 [(set (match_operand:V2DI 0 "register_operand" "=w")
6168 (xor:V2DI
6169 (rotate:V2DI
6170 (match_operand:V2DI 2 "register_operand" "w")
6171 (const_int 1))
6172 (match_operand:V2DI 1 "register_operand" "w")))]
6173 "TARGET_SIMD && TARGET_SHA3"
6174 "rax1\\t%0.2d, %1.2d, %2.2d"
6175 [(set_attr "type" "crypto_sha3")]
6176 )
6177
6178 (define_insn "aarch64_xarqv2di"
6179 [(set (match_operand:V2DI 0 "register_operand" "=w")
6180 (rotatert:V2DI
6181 (xor:V2DI
6182 (match_operand:V2DI 1 "register_operand" "%w")
6183 (match_operand:V2DI 2 "register_operand" "w"))
6184 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
6185 "TARGET_SIMD && TARGET_SHA3"
6186 "xar\\t%0.2d, %1.2d, %2.2d, %3"
6187 [(set_attr "type" "crypto_sha3")]
6188 )
6189
6190 (define_insn "bcaxq<mode>4"
6191 [(set (match_operand:VQ_I 0 "register_operand" "=w")
6192 (xor:VQ_I
6193 (and:VQ_I
6194 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
6195 (match_operand:VQ_I 2 "register_operand" "w"))
6196 (match_operand:VQ_I 1 "register_operand" "w")))]
6197 "TARGET_SIMD && TARGET_SHA3"
6198 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
6199 [(set_attr "type" "crypto_sha3")]
6200 )
6201
6202 ;; SM3
6203
6204 (define_insn "aarch64_sm3ss1qv4si"
6205 [(set (match_operand:V4SI 0 "register_operand" "=w")
6206 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6207 (match_operand:V4SI 2 "register_operand" "w")
6208 (match_operand:V4SI 3 "register_operand" "w")]
6209 UNSPEC_SM3SS1))]
6210 "TARGET_SIMD && TARGET_SM4"
6211 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s"
6212 [(set_attr "type" "crypto_sm3")]
6213 )
6214
6215
6216 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si"
6217 [(set (match_operand:V4SI 0 "register_operand" "=w")
6218 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6219 (match_operand:V4SI 2 "register_operand" "w")
6220 (match_operand:V4SI 3 "register_operand" "w")
6221 (match_operand:SI 4 "aarch64_imm2" "Ui2")]
6222 CRYPTO_SM3TT))]
6223 "TARGET_SIMD && TARGET_SM4"
6224 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]"
6225 [(set_attr "type" "crypto_sm3")]
6226 )
6227
6228 (define_insn "aarch64_sm3partw<sm3part_op>qv4si"
6229 [(set (match_operand:V4SI 0 "register_operand" "=w")
6230 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6231 (match_operand:V4SI 2 "register_operand" "w")
6232 (match_operand:V4SI 3 "register_operand" "w")]
6233 CRYPTO_SM3PART))]
6234 "TARGET_SIMD && TARGET_SM4"
6235 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s"
6236 [(set_attr "type" "crypto_sm3")]
6237 )
6238
6239 ;; SM4
6240
6241 (define_insn "aarch64_sm4eqv4si"
6242 [(set (match_operand:V4SI 0 "register_operand" "=w")
6243 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0")
6244 (match_operand:V4SI 2 "register_operand" "w")]
6245 UNSPEC_SM4E))]
6246 "TARGET_SIMD && TARGET_SM4"
6247 "sm4e\\t%0.4s, %2.4s"
6248 [(set_attr "type" "crypto_sm4")]
6249 )
6250
6251 (define_insn "aarch64_sm4ekeyqv4si"
6252 [(set (match_operand:V4SI 0 "register_operand" "=w")
6253 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w")
6254 (match_operand:V4SI 2 "register_operand" "w")]
6255 UNSPEC_SM4EKEY))]
6256 "TARGET_SIMD && TARGET_SM4"
6257 "sm4ekey\\t%0.4s, %1.4s, %2.4s"
6258 [(set_attr "type" "crypto_sm4")]
6259 )
6260
6261 ;; fp16fml
6262
6263 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>"
6264 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6265 (unspec:VDQSF
6266 [(match_operand:VDQSF 1 "register_operand" "0")
6267 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6268 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6269 VFMLA16_LOW))]
6270 "TARGET_F16FML"
6271 {
6272 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6273 <nunits> * 2, false);
6274 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode,
6275 <nunits> * 2, false);
6276
6277 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0],
6278 operands[1],
6279 operands[2],
6280 operands[3],
6281 p1, p2));
6282 DONE;
6283
6284 })
6285
6286 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>"
6287 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6288 (unspec:VDQSF
6289 [(match_operand:VDQSF 1 "register_operand" "0")
6290 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6291 (match_operand:<VFMLA_W> 3 "register_operand" "w")]
6292 VFMLA16_HIGH))]
6293 "TARGET_F16FML"
6294 {
6295 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6296 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true);
6297
6298 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0],
6299 operands[1],
6300 operands[2],
6301 operands[3],
6302 p1, p2));
6303 DONE;
6304 })
6305
6306 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>"
6307 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6308 (fma:VDQSF
6309 (float_extend:VDQSF
6310 (vec_select:<VFMLA_SEL_W>
6311 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6312 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))
6313 (float_extend:VDQSF
6314 (vec_select:<VFMLA_SEL_W>
6315 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6316 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6317 (match_operand:VDQSF 1 "register_operand" "0")))]
6318 "TARGET_F16FML"
6319 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6320 [(set_attr "type" "neon_fp_mul_s")]
6321 )
6322
6323 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>"
6324 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6325 (fma:VDQSF
6326 (float_extend:VDQSF
6327 (neg:<VFMLA_SEL_W>
6328 (vec_select:<VFMLA_SEL_W>
6329 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6330 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))))
6331 (float_extend:VDQSF
6332 (vec_select:<VFMLA_SEL_W>
6333 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6334 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" "")))
6335 (match_operand:VDQSF 1 "register_operand" "0")))]
6336 "TARGET_F16FML"
6337 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6338 [(set_attr "type" "neon_fp_mul_s")]
6339 )
6340
6341 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>"
6342 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6343 (fma:VDQSF
6344 (float_extend:VDQSF
6345 (vec_select:<VFMLA_SEL_W>
6346 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6347 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))
6348 (float_extend:VDQSF
6349 (vec_select:<VFMLA_SEL_W>
6350 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6351 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6352 (match_operand:VDQSF 1 "register_operand" "0")))]
6353 "TARGET_F16FML"
6354 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6355 [(set_attr "type" "neon_fp_mul_s")]
6356 )
6357
6358 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>"
6359 [(set (match_operand:VDQSF 0 "register_operand" "=w")
6360 (fma:VDQSF
6361 (float_extend:VDQSF
6362 (neg:<VFMLA_SEL_W>
6363 (vec_select:<VFMLA_SEL_W>
6364 (match_operand:<VFMLA_W> 2 "register_operand" "w")
6365 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))))
6366 (float_extend:VDQSF
6367 (vec_select:<VFMLA_SEL_W>
6368 (match_operand:<VFMLA_W> 3 "register_operand" "w")
6369 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" "")))
6370 (match_operand:VDQSF 1 "register_operand" "0")))]
6371 "TARGET_F16FML"
6372 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h"
6373 [(set_attr "type" "neon_fp_mul_s")]
6374 )
6375
6376 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf"
6377 [(set (match_operand:V2SF 0 "register_operand" "")
6378 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6379 (match_operand:V4HF 2 "register_operand" "")
6380 (match_operand:V4HF 3 "register_operand" "")
6381 (match_operand:SI 4 "aarch64_imm2" "")]
6382 VFMLA16_LOW))]
6383 "TARGET_F16FML"
6384 {
6385 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6386 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6387
6388 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0],
6389 operands[1],
6390 operands[2],
6391 operands[3],
6392 p1, lane));
6393 DONE;
6394 }
6395 )
6396
6397 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf"
6398 [(set (match_operand:V2SF 0 "register_operand" "")
6399 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6400 (match_operand:V4HF 2 "register_operand" "")
6401 (match_operand:V4HF 3 "register_operand" "")
6402 (match_operand:SI 4 "aarch64_imm2" "")]
6403 VFMLA16_HIGH))]
6404 "TARGET_F16FML"
6405 {
6406 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6407 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6408
6409 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0],
6410 operands[1],
6411 operands[2],
6412 operands[3],
6413 p1, lane));
6414 DONE;
6415 })
6416
6417 (define_insn "aarch64_simd_fmlal_lane_lowv2sf"
6418 [(set (match_operand:V2SF 0 "register_operand" "=w")
6419 (fma:V2SF
6420 (float_extend:V2SF
6421 (vec_select:V2HF
6422 (match_operand:V4HF 2 "register_operand" "w")
6423 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6424 (float_extend:V2SF
6425 (vec_duplicate:V2HF
6426 (vec_select:HF
6427 (match_operand:V4HF 3 "register_operand" "x")
6428 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6429 (match_operand:V2SF 1 "register_operand" "0")))]
6430 "TARGET_F16FML"
6431 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6432 [(set_attr "type" "neon_fp_mul_s")]
6433 )
6434
6435 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf"
6436 [(set (match_operand:V2SF 0 "register_operand" "=w")
6437 (fma:V2SF
6438 (float_extend:V2SF
6439 (neg:V2HF
6440 (vec_select:V2HF
6441 (match_operand:V4HF 2 "register_operand" "w")
6442 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6443 (float_extend:V2SF
6444 (vec_duplicate:V2HF
6445 (vec_select:HF
6446 (match_operand:V4HF 3 "register_operand" "x")
6447 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6448 (match_operand:V2SF 1 "register_operand" "0")))]
6449 "TARGET_F16FML"
6450 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6451 [(set_attr "type" "neon_fp_mul_s")]
6452 )
6453
6454 (define_insn "aarch64_simd_fmlal_lane_highv2sf"
6455 [(set (match_operand:V2SF 0 "register_operand" "=w")
6456 (fma:V2SF
6457 (float_extend:V2SF
6458 (vec_select:V2HF
6459 (match_operand:V4HF 2 "register_operand" "w")
6460 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6461 (float_extend:V2SF
6462 (vec_duplicate:V2HF
6463 (vec_select:HF
6464 (match_operand:V4HF 3 "register_operand" "x")
6465 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6466 (match_operand:V2SF 1 "register_operand" "0")))]
6467 "TARGET_F16FML"
6468 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6469 [(set_attr "type" "neon_fp_mul_s")]
6470 )
6471
6472 (define_insn "aarch64_simd_fmlsl_lane_highv2sf"
6473 [(set (match_operand:V2SF 0 "register_operand" "=w")
6474 (fma:V2SF
6475 (float_extend:V2SF
6476 (neg:V2HF
6477 (vec_select:V2HF
6478 (match_operand:V4HF 2 "register_operand" "w")
6479 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6480 (float_extend:V2SF
6481 (vec_duplicate:V2HF
6482 (vec_select:HF
6483 (match_operand:V4HF 3 "register_operand" "x")
6484 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6485 (match_operand:V2SF 1 "register_operand" "0")))]
6486 "TARGET_F16FML"
6487 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6488 [(set_attr "type" "neon_fp_mul_s")]
6489 )
6490
6491 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf"
6492 [(set (match_operand:V4SF 0 "register_operand" "")
6493 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6494 (match_operand:V8HF 2 "register_operand" "")
6495 (match_operand:V8HF 3 "register_operand" "")
6496 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6497 VFMLA16_LOW))]
6498 "TARGET_F16FML"
6499 {
6500 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6501 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6502
6503 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0],
6504 operands[1],
6505 operands[2],
6506 operands[3],
6507 p1, lane));
6508 DONE;
6509 })
6510
6511 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf"
6512 [(set (match_operand:V4SF 0 "register_operand" "")
6513 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6514 (match_operand:V8HF 2 "register_operand" "")
6515 (match_operand:V8HF 3 "register_operand" "")
6516 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6517 VFMLA16_HIGH))]
6518 "TARGET_F16FML"
6519 {
6520 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6521 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6522
6523 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0],
6524 operands[1],
6525 operands[2],
6526 operands[3],
6527 p1, lane));
6528 DONE;
6529 })
6530
6531 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf"
6532 [(set (match_operand:V4SF 0 "register_operand" "=w")
6533 (fma:V4SF
6534 (float_extend:V4SF
6535 (vec_select:V4HF
6536 (match_operand:V8HF 2 "register_operand" "w")
6537 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6538 (float_extend:V4SF
6539 (vec_duplicate:V4HF
6540 (vec_select:HF
6541 (match_operand:V8HF 3 "register_operand" "x")
6542 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6543 (match_operand:V4SF 1 "register_operand" "0")))]
6544 "TARGET_F16FML"
6545 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6546 [(set_attr "type" "neon_fp_mul_s")]
6547 )
6548
6549 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf"
6550 [(set (match_operand:V4SF 0 "register_operand" "=w")
6551 (fma:V4SF
6552 (float_extend:V4SF
6553 (neg:V4HF
6554 (vec_select:V4HF
6555 (match_operand:V8HF 2 "register_operand" "w")
6556 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6557 (float_extend:V4SF
6558 (vec_duplicate:V4HF
6559 (vec_select:HF
6560 (match_operand:V8HF 3 "register_operand" "x")
6561 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6562 (match_operand:V4SF 1 "register_operand" "0")))]
6563 "TARGET_F16FML"
6564 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6565 [(set_attr "type" "neon_fp_mul_s")]
6566 )
6567
6568 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf"
6569 [(set (match_operand:V4SF 0 "register_operand" "=w")
6570 (fma:V4SF
6571 (float_extend:V4SF
6572 (vec_select:V4HF
6573 (match_operand:V8HF 2 "register_operand" "w")
6574 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6575 (float_extend:V4SF
6576 (vec_duplicate:V4HF
6577 (vec_select:HF
6578 (match_operand:V8HF 3 "register_operand" "x")
6579 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6580 (match_operand:V4SF 1 "register_operand" "0")))]
6581 "TARGET_F16FML"
6582 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6583 [(set_attr "type" "neon_fp_mul_s")]
6584 )
6585
6586 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf"
6587 [(set (match_operand:V4SF 0 "register_operand" "=w")
6588 (fma:V4SF
6589 (float_extend:V4SF
6590 (neg:V4HF
6591 (vec_select:V4HF
6592 (match_operand:V8HF 2 "register_operand" "w")
6593 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6594 (float_extend:V4SF
6595 (vec_duplicate:V4HF
6596 (vec_select:HF
6597 (match_operand:V8HF 3 "register_operand" "x")
6598 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6599 (match_operand:V4SF 1 "register_operand" "0")))]
6600 "TARGET_F16FML"
6601 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6602 [(set_attr "type" "neon_fp_mul_s")]
6603 )
6604
6605 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf"
6606 [(set (match_operand:V2SF 0 "register_operand" "")
6607 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6608 (match_operand:V4HF 2 "register_operand" "")
6609 (match_operand:V8HF 3 "register_operand" "")
6610 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6611 VFMLA16_LOW))]
6612 "TARGET_F16FML"
6613 {
6614 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false);
6615 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6616
6617 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0],
6618 operands[1],
6619 operands[2],
6620 operands[3],
6621 p1, lane));
6622 DONE;
6623
6624 })
6625
6626 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf"
6627 [(set (match_operand:V2SF 0 "register_operand" "")
6628 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "")
6629 (match_operand:V4HF 2 "register_operand" "")
6630 (match_operand:V8HF 3 "register_operand" "")
6631 (match_operand:SI 4 "aarch64_lane_imm3" "")]
6632 VFMLA16_HIGH))]
6633 "TARGET_F16FML"
6634 {
6635 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true);
6636 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4]));
6637
6638 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0],
6639 operands[1],
6640 operands[2],
6641 operands[3],
6642 p1, lane));
6643 DONE;
6644
6645 })
6646
6647 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf"
6648 [(set (match_operand:V2SF 0 "register_operand" "=w")
6649 (fma:V2SF
6650 (float_extend:V2SF
6651 (vec_select:V2HF
6652 (match_operand:V4HF 2 "register_operand" "w")
6653 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))
6654 (float_extend:V2SF
6655 (vec_duplicate:V2HF
6656 (vec_select:HF
6657 (match_operand:V8HF 3 "register_operand" "x")
6658 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6659 (match_operand:V2SF 1 "register_operand" "0")))]
6660 "TARGET_F16FML"
6661 "fmlal\\t%0.2s, %2.2h, %3.h[%5]"
6662 [(set_attr "type" "neon_fp_mul_s")]
6663 )
6664
6665 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf"
6666 [(set (match_operand:V2SF 0 "register_operand" "=w")
6667 (fma:V2SF
6668 (float_extend:V2SF
6669 (neg:V2HF
6670 (vec_select:V2HF
6671 (match_operand:V4HF 2 "register_operand" "w")
6672 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))))
6673 (float_extend:V2SF
6674 (vec_duplicate:V2HF
6675 (vec_select:HF
6676 (match_operand:V8HF 3 "register_operand" "x")
6677 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6678 (match_operand:V2SF 1 "register_operand" "0")))]
6679 "TARGET_F16FML"
6680 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]"
6681 [(set_attr "type" "neon_fp_mul_s")]
6682 )
6683
6684 (define_insn "aarch64_simd_fmlal_laneq_highv2sf"
6685 [(set (match_operand:V2SF 0 "register_operand" "=w")
6686 (fma:V2SF
6687 (float_extend:V2SF
6688 (vec_select:V2HF
6689 (match_operand:V4HF 2 "register_operand" "w")
6690 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))
6691 (float_extend:V2SF
6692 (vec_duplicate:V2HF
6693 (vec_select:HF
6694 (match_operand:V8HF 3 "register_operand" "x")
6695 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6696 (match_operand:V2SF 1 "register_operand" "0")))]
6697 "TARGET_F16FML"
6698 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]"
6699 [(set_attr "type" "neon_fp_mul_s")]
6700 )
6701
6702 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf"
6703 [(set (match_operand:V2SF 0 "register_operand" "=w")
6704 (fma:V2SF
6705 (float_extend:V2SF
6706 (neg:V2HF
6707 (vec_select:V2HF
6708 (match_operand:V4HF 2 "register_operand" "w")
6709 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))))
6710 (float_extend:V2SF
6711 (vec_duplicate:V2HF
6712 (vec_select:HF
6713 (match_operand:V8HF 3 "register_operand" "x")
6714 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")]))))
6715 (match_operand:V2SF 1 "register_operand" "0")))]
6716 "TARGET_F16FML"
6717 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]"
6718 [(set_attr "type" "neon_fp_mul_s")]
6719 )
6720
6721 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf"
6722 [(set (match_operand:V4SF 0 "register_operand" "")
6723 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6724 (match_operand:V8HF 2 "register_operand" "")
6725 (match_operand:V4HF 3 "register_operand" "")
6726 (match_operand:SI 4 "aarch64_imm2" "")]
6727 VFMLA16_LOW))]
6728 "TARGET_F16FML"
6729 {
6730 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false);
6731 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6732
6733 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0],
6734 operands[1],
6735 operands[2],
6736 operands[3],
6737 p1, lane));
6738 DONE;
6739 })
6740
6741 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf"
6742 [(set (match_operand:V4SF 0 "register_operand" "")
6743 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "")
6744 (match_operand:V8HF 2 "register_operand" "")
6745 (match_operand:V4HF 3 "register_operand" "")
6746 (match_operand:SI 4 "aarch64_imm2" "")]
6747 VFMLA16_HIGH))]
6748 "TARGET_F16FML"
6749 {
6750 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true);
6751 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4]));
6752
6753 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0],
6754 operands[1],
6755 operands[2],
6756 operands[3],
6757 p1, lane));
6758 DONE;
6759 })
6760
6761 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf"
6762 [(set (match_operand:V4SF 0 "register_operand" "=w")
6763 (fma:V4SF
6764 (float_extend:V4SF
6765 (vec_select:V4HF
6766 (match_operand:V8HF 2 "register_operand" "w")
6767 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))
6768 (float_extend:V4SF
6769 (vec_duplicate:V4HF
6770 (vec_select:HF
6771 (match_operand:V4HF 3 "register_operand" "x")
6772 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6773 (match_operand:V4SF 1 "register_operand" "0")))]
6774 "TARGET_F16FML"
6775 "fmlal\\t%0.4s, %2.4h, %3.h[%5]"
6776 [(set_attr "type" "neon_fp_mul_s")]
6777 )
6778
6779 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf"
6780 [(set (match_operand:V4SF 0 "register_operand" "=w")
6781 (fma:V4SF
6782 (float_extend:V4SF
6783 (neg:V4HF
6784 (vec_select:V4HF
6785 (match_operand:V8HF 2 "register_operand" "w")
6786 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))))
6787 (float_extend:V4SF
6788 (vec_duplicate:V4HF
6789 (vec_select:HF
6790 (match_operand:V4HF 3 "register_operand" "x")
6791 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6792 (match_operand:V4SF 1 "register_operand" "0")))]
6793 "TARGET_F16FML"
6794 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]"
6795 [(set_attr "type" "neon_fp_mul_s")]
6796 )
6797
6798 (define_insn "aarch64_simd_fmlalq_lane_highv4sf"
6799 [(set (match_operand:V4SF 0 "register_operand" "=w")
6800 (fma:V4SF
6801 (float_extend:V4SF
6802 (vec_select:V4HF
6803 (match_operand:V8HF 2 "register_operand" "w")
6804 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))
6805 (float_extend:V4SF
6806 (vec_duplicate:V4HF
6807 (vec_select:HF
6808 (match_operand:V4HF 3 "register_operand" "x")
6809 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6810 (match_operand:V4SF 1 "register_operand" "0")))]
6811 "TARGET_F16FML"
6812 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]"
6813 [(set_attr "type" "neon_fp_mul_s")]
6814 )
6815
6816 (define_insn "aarch64_simd_fmlslq_lane_highv4sf"
6817 [(set (match_operand:V4SF 0 "register_operand" "=w")
6818 (fma:V4SF
6819 (float_extend:V4SF
6820 (neg:V4HF
6821 (vec_select:V4HF
6822 (match_operand:V8HF 2 "register_operand" "w")
6823 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))))
6824 (float_extend:V4SF
6825 (vec_duplicate:V4HF
6826 (vec_select:HF
6827 (match_operand:V4HF 3 "register_operand" "x")
6828 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")]))))
6829 (match_operand:V4SF 1 "register_operand" "0")))]
6830 "TARGET_F16FML"
6831 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]"
6832 [(set_attr "type" "neon_fp_mul_s")]
5745 ) 6833 )
5746 6834
5747 ;; pmull 6835 ;; pmull
5748 6836
5749 (define_insn "aarch64_crypto_pmulldi" 6837 (define_insn "aarch64_crypto_pmulldi"
5750 [(set (match_operand:TI 0 "register_operand" "=w") 6838 [(set (match_operand:TI 0 "register_operand" "=w")
5751 (unspec:TI [(match_operand:DI 1 "register_operand" "w") 6839 (unspec:TI [(match_operand:DI 1 "register_operand" "w")
5752 (match_operand:DI 2 "register_operand" "w")] 6840 (match_operand:DI 2 "register_operand" "w")]
5753 UNSPEC_PMULL))] 6841 UNSPEC_PMULL))]
5754 "TARGET_SIMD && TARGET_CRYPTO" 6842 "TARGET_SIMD && TARGET_AES"
5755 "pmull\\t%0.1q, %1.1d, %2.1d" 6843 "pmull\\t%0.1q, %1.1d, %2.1d"
5756 [(set_attr "type" "crypto_pmull")] 6844 [(set_attr "type" "crypto_pmull")]
5757 ) 6845 )
5758 6846
5759 (define_insn "aarch64_crypto_pmullv2di" 6847 (define_insn "aarch64_crypto_pmullv2di"
5760 [(set (match_operand:TI 0 "register_operand" "=w") 6848 [(set (match_operand:TI 0 "register_operand" "=w")
5761 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") 6849 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w")
5762 (match_operand:V2DI 2 "register_operand" "w")] 6850 (match_operand:V2DI 2 "register_operand" "w")]
5763 UNSPEC_PMULL2))] 6851 UNSPEC_PMULL2))]
5764 "TARGET_SIMD && TARGET_CRYPTO" 6852 "TARGET_SIMD && TARGET_AES"
5765 "pmull2\\t%0.1q, %1.2d, %2.2d" 6853 "pmull2\\t%0.1q, %1.2d, %2.2d"
5766 [(set_attr "type" "crypto_pmull")] 6854 [(set_attr "type" "crypto_pmull")]
5767 ) 6855 )