Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/aarch64/aarch64-simd.md @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | 04ced10e8804 |
children | 1830386684a0 |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 ;; Machine description for AArch64 AdvSIMD architecture. | 1 ;; Machine description for AArch64 AdvSIMD architecture. |
2 ;; Copyright (C) 2011-2017 Free Software Foundation, Inc. | 2 ;; Copyright (C) 2011-2018 Free Software Foundation, Inc. |
3 ;; Contributed by ARM Ltd. | 3 ;; Contributed by ARM Ltd. |
4 ;; | 4 ;; |
5 ;; This file is part of GCC. | 5 ;; This file is part of GCC. |
6 ;; | 6 ;; |
7 ;; GCC is free software; you can redistribute it and/or modify it | 7 ;; GCC is free software; you can redistribute it and/or modify it |
29 a stp in DI mode, so we check the validity of that. | 29 a stp in DI mode, so we check the validity of that. |
30 If the mode is 8 bytes wide, then we will do doing a | 30 If the mode is 8 bytes wide, then we will do doing a |
31 normal str, so the check need not apply. */ | 31 normal str, so the check need not apply. */ |
32 if (GET_CODE (operands[0]) == MEM | 32 if (GET_CODE (operands[0]) == MEM |
33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) | 33 && !(aarch64_simd_imm_zero (operands[1], <MODE>mode) |
34 && ((GET_MODE_SIZE (<MODE>mode) == 16 | 34 && ((known_eq (GET_MODE_SIZE (<MODE>mode), 16) |
35 && aarch64_mem_pair_operand (operands[0], DImode)) | 35 && aarch64_mem_pair_operand (operands[0], DImode)) |
36 || GET_MODE_SIZE (<MODE>mode) == 8))) | 36 || known_eq (GET_MODE_SIZE (<MODE>mode), 8)))) |
37 operands[1] = force_reg (<MODE>mode, operands[1]); | 37 operands[1] = force_reg (<MODE>mode, operands[1]); |
38 " | 38 " |
39 ) | 39 ) |
40 | 40 |
41 (define_expand "movmisalign<mode>" | 41 (define_expand "movmisalign<mode>" |
78 (match_operand:VALL_F16 1 "register_operand" "w") | 78 (match_operand:VALL_F16 1 "register_operand" "w") |
79 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) | 79 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) |
80 )))] | 80 )))] |
81 "TARGET_SIMD" | 81 "TARGET_SIMD" |
82 { | 82 { |
83 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 83 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; | 84 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; |
85 } | 85 } |
86 [(set_attr "type" "neon_dup<q>")] | 86 [(set_attr "type" "neon_dup<q>")] |
87 ) | 87 ) |
88 | 88 |
93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") | 93 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "w") |
94 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) | 94 (parallel [(match_operand:SI 2 "immediate_operand" "i")]) |
95 )))] | 95 )))] |
96 "TARGET_SIMD" | 96 "TARGET_SIMD" |
97 { | 97 { |
98 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 98 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); |
99 INTVAL (operands[2]))); | |
100 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; | 99 return "dup\\t%0.<Vtype>, %1.<Vetype>[%2]"; |
101 } | 100 } |
102 [(set_attr "type" "neon_dup<q>")] | 101 [(set_attr "type" "neon_dup<q>")] |
103 ) | 102 ) |
104 | 103 |
105 (define_insn "*aarch64_simd_mov<mode>" | 104 (define_insn "*aarch64_simd_mov<VD:mode>" |
106 [(set (match_operand:VD 0 "nonimmediate_operand" | 105 [(set (match_operand:VD 0 "nonimmediate_operand" |
107 "=w, m, m, w, ?r, ?w, ?r, w") | 106 "=w, m, m, w, ?r, ?w, ?r, w") |
108 (match_operand:VD 1 "general_operand" | 107 (match_operand:VD 1 "general_operand" |
109 "m, Dz, w, w, w, r, r, Dn"))] | 108 "m, Dz, w, w, w, r, r, Dn"))] |
110 "TARGET_SIMD | 109 "TARGET_SIMD |
119 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; | 118 case 3: return "mov\t%0.<Vbtype>, %1.<Vbtype>"; |
120 case 4: return "umov\t%0, %1.d[0]"; | 119 case 4: return "umov\t%0, %1.d[0]"; |
121 case 5: return "fmov\t%d0, %1"; | 120 case 5: return "fmov\t%d0, %1"; |
122 case 6: return "mov\t%0, %1"; | 121 case 6: return "mov\t%0, %1"; |
123 case 7: | 122 case 7: |
124 return aarch64_output_simd_mov_immediate (operands[1], | 123 return aarch64_output_simd_mov_immediate (operands[1], 64); |
125 <MODE>mode, 64); | |
126 default: gcc_unreachable (); | 124 default: gcc_unreachable (); |
127 } | 125 } |
128 } | 126 } |
129 [(set_attr "type" "neon_load1_1reg<q>, neon_stp, neon_store1_1reg<q>,\ | 127 [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\ |
130 neon_logic<q>, neon_to_gp<q>, f_mcr,\ | 128 neon_logic<q>, neon_to_gp<q>, f_mcr,\ |
131 mov_reg, neon_move<q>")] | 129 mov_reg, neon_move<q>")] |
132 ) | 130 ) |
133 | 131 |
134 (define_insn "*aarch64_simd_mov<mode>" | 132 (define_insn "*aarch64_simd_mov<VQ:mode>" |
135 [(set (match_operand:VQ 0 "nonimmediate_operand" | 133 [(set (match_operand:VQ 0 "nonimmediate_operand" |
136 "=w, Umq, m, w, ?r, ?w, ?r, w") | 134 "=w, Umn, m, w, ?r, ?w, ?r, w") |
137 (match_operand:VQ 1 "general_operand" | 135 (match_operand:VQ 1 "general_operand" |
138 "m, Dz, w, w, w, r, r, Dn"))] | 136 "m, Dz, w, w, w, r, r, Dn"))] |
139 "TARGET_SIMD | 137 "TARGET_SIMD |
140 && (register_operand (operands[0], <MODE>mode) | 138 && (register_operand (operands[0], <MODE>mode) |
141 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" | 139 || aarch64_simd_reg_or_zero (operands[1], <MODE>mode))" |
153 case 4: | 151 case 4: |
154 case 5: | 152 case 5: |
155 case 6: | 153 case 6: |
156 return "#"; | 154 return "#"; |
157 case 7: | 155 case 7: |
158 return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128); | 156 return aarch64_output_simd_mov_immediate (operands[1], 128); |
159 default: | 157 default: |
160 gcc_unreachable (); | 158 gcc_unreachable (); |
161 } | 159 } |
162 } | 160 } |
163 [(set_attr "type" "neon_load1_1reg<q>, neon_store1_1reg<q>,\ | 161 [(set_attr "type" "neon_load1_1reg<q>, store_16, neon_store1_1reg<q>,\ |
164 neon_stp, neon_logic<q>, multiple, multiple,\ | 162 neon_logic<q>, multiple, multiple,\ |
165 multiple, neon_move<q>") | 163 multiple, neon_move<q>") |
166 (set_attr "length" "4,4,4,4,8,8,8,4")] | 164 (set_attr "length" "4,4,4,4,8,8,8,4")] |
167 ) | 165 ) |
168 | 166 |
169 ;; When storing lane zero we can use the normal STR and its more permissive | 167 ;; When storing lane zero we can use the normal STR and its more permissive |
172 (define_insn "aarch64_store_lane0<mode>" | 170 (define_insn "aarch64_store_lane0<mode>" |
173 [(set (match_operand:<VEL> 0 "memory_operand" "=m") | 171 [(set (match_operand:<VEL> 0 "memory_operand" "=m") |
174 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") | 172 (vec_select:<VEL> (match_operand:VALL_F16 1 "register_operand" "w") |
175 (parallel [(match_operand 2 "const_int_operand" "n")])))] | 173 (parallel [(match_operand 2 "const_int_operand" "n")])))] |
176 "TARGET_SIMD | 174 "TARGET_SIMD |
177 && ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2])) == 0" | 175 && ENDIAN_LANE_N (<nunits>, INTVAL (operands[2])) == 0" |
178 "str\\t%<Vetype>1, %0" | 176 "str\\t%<Vetype>1, %0" |
179 [(set_attr "type" "neon_store1_1reg<q>")] | 177 [(set_attr "type" "neon_store1_1reg<q>")] |
180 ) | 178 ) |
181 | 179 |
182 (define_insn "load_pair<mode>" | 180 (define_insn "load_pair<DREG:mode><DREG2:mode>" |
183 [(set (match_operand:VD 0 "register_operand" "=w") | 181 [(set (match_operand:DREG 0 "register_operand" "=w") |
184 (match_operand:VD 1 "aarch64_mem_pair_operand" "Ump")) | 182 (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump")) |
185 (set (match_operand:VD 2 "register_operand" "=w") | 183 (set (match_operand:DREG2 2 "register_operand" "=w") |
186 (match_operand:VD 3 "memory_operand" "m"))] | 184 (match_operand:DREG2 3 "memory_operand" "m"))] |
187 "TARGET_SIMD | 185 "TARGET_SIMD |
188 && rtx_equal_p (XEXP (operands[3], 0), | 186 && rtx_equal_p (XEXP (operands[3], 0), |
189 plus_constant (Pmode, | 187 plus_constant (Pmode, |
190 XEXP (operands[1], 0), | 188 XEXP (operands[1], 0), |
191 GET_MODE_SIZE (<MODE>mode)))" | 189 GET_MODE_SIZE (<DREG:MODE>mode)))" |
192 "ldp\\t%d0, %d2, %1" | 190 "ldp\\t%d0, %d2, %1" |
193 [(set_attr "type" "neon_ldp")] | 191 [(set_attr "type" "neon_ldp")] |
194 ) | 192 ) |
195 | 193 |
196 (define_insn "store_pair<mode>" | 194 (define_insn "vec_store_pair<DREG:mode><DREG2:mode>" |
197 [(set (match_operand:VD 0 "aarch64_mem_pair_operand" "=Ump") | 195 [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump") |
198 (match_operand:VD 1 "register_operand" "w")) | 196 (match_operand:DREG 1 "register_operand" "w")) |
199 (set (match_operand:VD 2 "memory_operand" "=m") | 197 (set (match_operand:DREG2 2 "memory_operand" "=m") |
200 (match_operand:VD 3 "register_operand" "w"))] | 198 (match_operand:DREG2 3 "register_operand" "w"))] |
201 "TARGET_SIMD | 199 "TARGET_SIMD |
202 && rtx_equal_p (XEXP (operands[2], 0), | 200 && rtx_equal_p (XEXP (operands[2], 0), |
203 plus_constant (Pmode, | 201 plus_constant (Pmode, |
204 XEXP (operands[0], 0), | 202 XEXP (operands[0], 0), |
205 GET_MODE_SIZE (<MODE>mode)))" | 203 GET_MODE_SIZE (<DREG:MODE>mode)))" |
206 "stp\\t%d1, %d3, %0" | 204 "stp\\t%d1, %d3, %0" |
207 [(set_attr "type" "neon_stp")] | 205 [(set_attr "type" "neon_stp")] |
208 ) | 206 ) |
207 | |
208 (define_insn "load_pair<VQ:mode><VQ2:mode>" | |
209 [(set (match_operand:VQ 0 "register_operand" "=w") | |
210 (match_operand:VQ 1 "aarch64_mem_pair_operand" "Ump")) | |
211 (set (match_operand:VQ2 2 "register_operand" "=w") | |
212 (match_operand:VQ2 3 "memory_operand" "m"))] | |
213 "TARGET_SIMD | |
214 && rtx_equal_p (XEXP (operands[3], 0), | |
215 plus_constant (Pmode, | |
216 XEXP (operands[1], 0), | |
217 GET_MODE_SIZE (<VQ:MODE>mode)))" | |
218 "ldp\\t%q0, %q2, %1" | |
219 [(set_attr "type" "neon_ldp_q")] | |
220 ) | |
221 | |
222 (define_insn "vec_store_pair<VQ:mode><VQ2:mode>" | |
223 [(set (match_operand:VQ 0 "aarch64_mem_pair_operand" "=Ump") | |
224 (match_operand:VQ 1 "register_operand" "w")) | |
225 (set (match_operand:VQ2 2 "memory_operand" "=m") | |
226 (match_operand:VQ2 3 "register_operand" "w"))] | |
227 "TARGET_SIMD && rtx_equal_p (XEXP (operands[2], 0), | |
228 plus_constant (Pmode, | |
229 XEXP (operands[0], 0), | |
230 GET_MODE_SIZE (<VQ:MODE>mode)))" | |
231 "stp\\t%q1, %q3, %0" | |
232 [(set_attr "type" "neon_stp_q")] | |
233 ) | |
234 | |
209 | 235 |
210 (define_split | 236 (define_split |
211 [(set (match_operand:VQ 0 "register_operand" "") | 237 [(set (match_operand:VQ 0 "register_operand" "") |
212 (match_operand:VQ 1 "register_operand" ""))] | 238 (match_operand:VQ 1 "register_operand" ""))] |
213 "TARGET_SIMD && reload_completed | 239 "TARGET_SIMD && reload_completed |
229 { | 255 { |
230 aarch64_split_simd_move (operands[0], operands[1]); | 256 aarch64_split_simd_move (operands[0], operands[1]); |
231 DONE; | 257 DONE; |
232 }) | 258 }) |
233 | 259 |
234 (define_expand "aarch64_split_simd_mov<mode>" | 260 (define_expand "@aarch64_split_simd_mov<mode>" |
235 [(set (match_operand:VQ 0) | 261 [(set (match_operand:VQ 0) |
236 (match_operand:VQ 1))] | 262 (match_operand:VQ 1))] |
237 "TARGET_SIMD" | 263 "TARGET_SIMD" |
238 { | 264 { |
239 rtx dst = operands[0]; | 265 rtx dst = operands[0]; |
252 | 278 |
253 else | 279 else |
254 { | 280 { |
255 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); | 281 rtx dst_low_part = gen_lowpart (<VHALF>mode, dst); |
256 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); | 282 rtx dst_high_part = gen_highpart (<VHALF>mode, dst); |
257 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | 283 rtx lo = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); |
258 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 284 rtx hi = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
259 | 285 |
260 emit_insn | 286 emit_insn |
261 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); | 287 (gen_aarch64_simd_mov_from_<mode>low (dst_low_part, src, lo)); |
262 emit_insn | 288 emit_insn |
263 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); | 289 (gen_aarch64_simd_mov_from_<mode>high (dst_high_part, src, hi)); |
449 (match_operand:V8QI 3 "register_operand" "<h_con>") | 475 (match_operand:V8QI 3 "register_operand" "<h_con>") |
450 (match_operand:SI 4 "immediate_operand" "i")] | 476 (match_operand:SI 4 "immediate_operand" "i")] |
451 DOTPROD)))] | 477 DOTPROD)))] |
452 "TARGET_DOTPROD" | 478 "TARGET_DOTPROD" |
453 { | 479 { |
454 operands[4] | 480 operands[4] = aarch64_endian_lane_rtx (V8QImode, INTVAL (operands[4])); |
455 = GEN_INT (ENDIAN_LANE_N (V8QImode, INTVAL (operands[4]))); | |
456 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; | 481 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; |
457 } | 482 } |
458 [(set_attr "type" "neon_dot")] | 483 [(set_attr "type" "neon_dot")] |
459 ) | 484 ) |
460 | 485 |
465 (match_operand:V16QI 3 "register_operand" "<h_con>") | 490 (match_operand:V16QI 3 "register_operand" "<h_con>") |
466 (match_operand:SI 4 "immediate_operand" "i")] | 491 (match_operand:SI 4 "immediate_operand" "i")] |
467 DOTPROD)))] | 492 DOTPROD)))] |
468 "TARGET_DOTPROD" | 493 "TARGET_DOTPROD" |
469 { | 494 { |
470 operands[4] | 495 operands[4] = aarch64_endian_lane_rtx (V16QImode, INTVAL (operands[4])); |
471 = GEN_INT (ENDIAN_LANE_N (V16QImode, INTVAL (operands[4]))); | |
472 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; | 496 return "<sur>dot\\t%0.<Vtype>, %2.<Vdottype>, %3.4b[%4]"; |
473 } | 497 } |
474 [(set_attr "type" "neon_dot")] | 498 [(set_attr "type" "neon_dot")] |
475 ) | 499 ) |
476 | 500 |
500 (match_operand:VMUL 1 "register_operand" "<h_con>") | 524 (match_operand:VMUL 1 "register_operand" "<h_con>") |
501 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 525 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
502 (match_operand:VMUL 3 "register_operand" "w")))] | 526 (match_operand:VMUL 3 "register_operand" "w")))] |
503 "TARGET_SIMD" | 527 "TARGET_SIMD" |
504 { | 528 { |
505 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 529 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
506 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; | 530 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; |
507 } | 531 } |
508 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] | 532 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] |
509 ) | 533 ) |
510 | 534 |
516 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | 540 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") |
517 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 541 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
518 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] | 542 (match_operand:VMUL_CHANGE_NLANES 3 "register_operand" "w")))] |
519 "TARGET_SIMD" | 543 "TARGET_SIMD" |
520 { | 544 { |
521 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 545 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); |
522 INTVAL (operands[2]))); | |
523 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; | 546 return "<f>mul\\t%0.<Vtype>, %3.<Vtype>, %1.<Vetype>[%2]"; |
524 } | 547 } |
525 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] | 548 [(set_attr "type" "neon<fp>_mul_<Vetype>_scalar<q>")] |
526 ) | 549 ) |
527 | 550 |
534 "TARGET_SIMD" | 557 "TARGET_SIMD" |
535 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; | 558 "<f>mul\t%0.<Vtype>, %2.<Vtype>, %1.<Vetype>[0]"; |
536 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] | 559 [(set_attr "type" "neon<fp>_mul_<stype>_scalar<q>")] |
537 ) | 560 ) |
538 | 561 |
539 (define_insn "aarch64_rsqrte<mode>" | 562 (define_insn "@aarch64_rsqrte<mode>" |
540 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") | 563 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") |
541 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] | 564 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] |
542 UNSPEC_RSQRTE))] | 565 UNSPEC_RSQRTE))] |
543 "TARGET_SIMD" | 566 "TARGET_SIMD" |
544 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" | 567 "frsqrte\\t%<v>0<Vmtype>, %<v>1<Vmtype>" |
545 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) | 568 [(set_attr "type" "neon_fp_rsqrte_<stype><q>")]) |
546 | 569 |
547 (define_insn "aarch64_rsqrts<mode>" | 570 (define_insn "@aarch64_rsqrts<mode>" |
548 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") | 571 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") |
549 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") | 572 (unspec:VHSDF_HSDF [(match_operand:VHSDF_HSDF 1 "register_operand" "w") |
550 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] | 573 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] |
551 UNSPEC_RSQRTS))] | 574 UNSPEC_RSQRTS))] |
552 "TARGET_SIMD" | 575 "TARGET_SIMD" |
570 (match_operand:V2DF 1 "register_operand" "w") | 593 (match_operand:V2DF 1 "register_operand" "w") |
571 (parallel [(match_operand:SI 2 "immediate_operand")])) | 594 (parallel [(match_operand:SI 2 "immediate_operand")])) |
572 (match_operand:DF 3 "register_operand" "w")))] | 595 (match_operand:DF 3 "register_operand" "w")))] |
573 "TARGET_SIMD" | 596 "TARGET_SIMD" |
574 { | 597 { |
575 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); | 598 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); |
576 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; | 599 return "fmul\\t%0.2d, %3.2d, %1.d[%2]"; |
577 } | 600 } |
578 [(set_attr "type" "neon_fp_mul_d_scalar_q")] | 601 [(set_attr "type" "neon_fp_mul_d_scalar_q")] |
579 ) | 602 ) |
580 | 603 |
615 "TARGET_SIMD" | 638 "TARGET_SIMD" |
616 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | 639 "sabd\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" |
617 [(set_attr "type" "neon_abd<q>")] | 640 [(set_attr "type" "neon_abd<q>")] |
618 ) | 641 ) |
619 | 642 |
643 (define_insn "aarch64_<sur>abdl2<mode>_3" | |
644 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") | |
645 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") | |
646 (match_operand:VDQV_S 2 "register_operand" "w")] | |
647 ABDL2))] | |
648 "TARGET_SIMD" | |
649 "<sur>abdl2\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>" | |
650 [(set_attr "type" "neon_abd<q>")] | |
651 ) | |
652 | |
653 (define_insn "aarch64_<sur>abal<mode>_4" | |
654 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") | |
655 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") | |
656 (match_operand:VDQV_S 2 "register_operand" "w") | |
657 (match_operand:<VDBLW> 3 "register_operand" "0")] | |
658 ABAL))] | |
659 "TARGET_SIMD" | |
660 "<sur>abal\t%0.<Vwtype>, %1.<Vhalftype>, %2.<Vhalftype>" | |
661 [(set_attr "type" "neon_arith_acc<q>")] | |
662 ) | |
663 | |
664 (define_insn "aarch64_<sur>adalp<mode>_3" | |
665 [(set (match_operand:<VDBLW> 0 "register_operand" "=w") | |
666 (unspec:<VDBLW> [(match_operand:VDQV_S 1 "register_operand" "w") | |
667 (match_operand:<VDBLW> 2 "register_operand" "0")] | |
668 ADALP))] | |
669 "TARGET_SIMD" | |
670 "<sur>adalp\t%0.<Vwtype>, %1.<Vtype>" | |
671 [(set_attr "type" "neon_reduc_add<q>")] | |
672 ) | |
673 | |
674 ;; Emit a sequence to produce a sum-of-absolute-differences of the V16QI | |
675 ;; inputs in operands 1 and 2. The sequence also has to perform a widening | |
676 ;; reduction of the difference into a V4SI vector and accumulate that into | |
677 ;; operand 3 before copying that into the result operand 0. | |
678 ;; Perform that with a sequence of: | |
679 ;; UABDL2 tmp.8h, op1.16b, op2.16b | |
680 ;; UABAL tmp.8h, op1.16b, op2.16b | |
681 ;; UADALP op3.4s, tmp.8h | |
682 ;; MOV op0, op3 // should be eliminated in later passes. | |
683 ;; The signed version just uses the signed variants of the above instructions. | |
684 | |
685 (define_expand "<sur>sadv16qi" | |
686 [(use (match_operand:V4SI 0 "register_operand")) | |
687 (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand")) | |
688 (use (match_operand:V16QI 2 "register_operand"))] ABAL) | |
689 (use (match_operand:V4SI 3 "register_operand"))] | |
690 "TARGET_SIMD" | |
691 { | |
692 rtx reduc = gen_reg_rtx (V8HImode); | |
693 emit_insn (gen_aarch64_<sur>abdl2v16qi_3 (reduc, operands[1], | |
694 operands[2])); | |
695 emit_insn (gen_aarch64_<sur>abalv16qi_4 (reduc, operands[1], | |
696 operands[2], reduc)); | |
697 emit_insn (gen_aarch64_<sur>adalpv8hi_3 (operands[3], reduc, | |
698 operands[3])); | |
699 emit_move_insn (operands[0], operands[3]); | |
700 DONE; | |
701 } | |
702 ) | |
703 | |
620 (define_insn "aba<mode>_3" | 704 (define_insn "aba<mode>_3" |
621 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") | 705 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") |
622 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI | 706 (plus:VDQ_BHSI (abs:VDQ_BHSI (minus:VDQ_BHSI |
623 (match_operand:VDQ_BHSI 1 "register_operand" "w") | 707 (match_operand:VDQ_BHSI 1 "register_operand" "w") |
624 (match_operand:VDQ_BHSI 2 "register_operand" "w"))) | 708 (match_operand:VDQ_BHSI 2 "register_operand" "w"))) |
649 switch (which_alternative) | 733 switch (which_alternative) |
650 { | 734 { |
651 case 0: | 735 case 0: |
652 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; | 736 return "and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; |
653 case 1: | 737 case 1: |
654 return aarch64_output_simd_mov_immediate (operands[2], | 738 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, |
655 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_BIC); | 739 AARCH64_CHECK_BIC); |
656 default: | 740 default: |
657 gcc_unreachable (); | 741 gcc_unreachable (); |
658 } | 742 } |
659 } | 743 } |
660 [(set_attr "type" "neon_logic<q>")] | 744 [(set_attr "type" "neon_logic<q>")] |
670 switch (which_alternative) | 754 switch (which_alternative) |
671 { | 755 { |
672 case 0: | 756 case 0: |
673 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; | 757 return "orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"; |
674 case 1: | 758 case 1: |
675 return aarch64_output_simd_mov_immediate (operands[2], | 759 return aarch64_output_simd_mov_immediate (operands[2], <bitsize>, |
676 <MODE>mode, GET_MODE_BITSIZE (<MODE>mode), AARCH64_CHECK_ORR); | 760 AARCH64_CHECK_ORR); |
677 default: | 761 default: |
678 gcc_unreachable (); | 762 gcc_unreachable (); |
679 } | 763 } |
680 } | 764 } |
681 [(set_attr "type" "neon_logic<q>")] | 765 [(set_attr "type" "neon_logic<q>")] |
697 "not\t%0.<Vbtype>, %1.<Vbtype>" | 781 "not\t%0.<Vbtype>, %1.<Vbtype>" |
698 [(set_attr "type" "neon_logic<q>")] | 782 [(set_attr "type" "neon_logic<q>")] |
699 ) | 783 ) |
700 | 784 |
701 (define_insn "aarch64_simd_vec_set<mode>" | 785 (define_insn "aarch64_simd_vec_set<mode>" |
702 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w,w,w") | 786 [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w") |
703 (vec_merge:VDQ_BHSI | 787 (vec_merge:VALL_F16 |
704 (vec_duplicate:VDQ_BHSI | 788 (vec_duplicate:VALL_F16 |
705 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "r,w,Utv")) | 789 (match_operand:<VEL> 1 "aarch64_simd_general_operand" "w,?r,Utv")) |
706 (match_operand:VDQ_BHSI 3 "register_operand" "0,0,0") | 790 (match_operand:VALL_F16 3 "register_operand" "0,0,0") |
707 (match_operand:SI 2 "immediate_operand" "i,i,i")))] | 791 (match_operand:SI 2 "immediate_operand" "i,i,i")))] |
708 "TARGET_SIMD" | 792 "TARGET_SIMD" |
709 { | 793 { |
710 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); | 794 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); |
711 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); | 795 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); |
712 switch (which_alternative) | 796 switch (which_alternative) |
713 { | 797 { |
714 case 0: | 798 case 0: |
715 return "ins\\t%0.<Vetype>[%p2], %w1"; | 799 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; |
716 case 1: | 800 case 1: |
717 return "ins\\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; | 801 return "ins\\t%0.<Vetype>[%p2], %<vwcore>1"; |
718 case 2: | 802 case 2: |
719 return "ld1\\t{%0.<Vetype>}[%p2], %1"; | 803 return "ld1\\t{%0.<Vetype>}[%p2], %1"; |
720 default: | 804 default: |
721 gcc_unreachable (); | 805 gcc_unreachable (); |
722 } | 806 } |
723 } | 807 } |
724 [(set_attr "type" "neon_from_gp<q>, neon_ins<q>, neon_load1_one_lane<q>")] | 808 [(set_attr "type" "neon_ins<q>, neon_from_gp<q>, neon_load1_one_lane<q>")] |
725 ) | 809 ) |
726 | 810 |
727 (define_insn "*aarch64_simd_vec_copy_lane<mode>" | 811 (define_insn "*aarch64_simd_vec_copy_lane<mode>" |
728 [(set (match_operand:VALL_F16 0 "register_operand" "=w") | 812 [(set (match_operand:VALL_F16 0 "register_operand" "=w") |
729 (vec_merge:VALL_F16 | 813 (vec_merge:VALL_F16 |
734 [(match_operand:SI 4 "immediate_operand" "i")]))) | 818 [(match_operand:SI 4 "immediate_operand" "i")]))) |
735 (match_operand:VALL_F16 1 "register_operand" "0") | 819 (match_operand:VALL_F16 1 "register_operand" "0") |
736 (match_operand:SI 2 "immediate_operand" "i")))] | 820 (match_operand:SI 2 "immediate_operand" "i")))] |
737 "TARGET_SIMD" | 821 "TARGET_SIMD" |
738 { | 822 { |
739 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); | 823 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); |
740 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); | 824 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); |
741 operands[4] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[4]))); | 825 operands[4] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[4])); |
742 | 826 |
743 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; | 827 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; |
744 } | 828 } |
745 [(set_attr "type" "neon_ins<q>")] | 829 [(set_attr "type" "neon_ins<q>")] |
746 ) | 830 ) |
755 [(match_operand:SI 4 "immediate_operand" "i")]))) | 839 [(match_operand:SI 4 "immediate_operand" "i")]))) |
756 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") | 840 (match_operand:VALL_F16_NO_V2Q 1 "register_operand" "0") |
757 (match_operand:SI 2 "immediate_operand" "i")))] | 841 (match_operand:SI 2 "immediate_operand" "i")))] |
758 "TARGET_SIMD" | 842 "TARGET_SIMD" |
759 { | 843 { |
760 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); | 844 int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2]))); |
761 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); | 845 operands[2] = GEN_INT (HOST_WIDE_INT_1 << elt); |
762 operands[4] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 846 operands[4] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, |
763 INTVAL (operands[4]))); | 847 INTVAL (operands[4])); |
764 | 848 |
765 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; | 849 return "ins\t%0.<Vetype>[%p2], %3.<Vetype>[%4]"; |
766 } | 850 } |
767 [(set_attr "type" "neon_ins<q>")] | 851 [(set_attr "type" "neon_ins<q>")] |
768 ) | 852 ) |
1033 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); | 1117 emit_insn (gen_lshrdi3 (operands[0], operands[1], operands[2])); |
1034 DONE; | 1118 DONE; |
1035 } | 1119 } |
1036 ) | 1120 ) |
1037 | 1121 |
1038 (define_expand "vec_set<mode>" | |
1039 [(match_operand:VDQ_BHSI 0 "register_operand") | |
1040 (match_operand:<VEL> 1 "register_operand") | |
1041 (match_operand:SI 2 "immediate_operand")] | |
1042 "TARGET_SIMD" | |
1043 { | |
1044 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); | |
1045 emit_insn (gen_aarch64_simd_vec_set<mode> (operands[0], operands[1], | |
1046 GEN_INT (elem), operands[0])); | |
1047 DONE; | |
1048 } | |
1049 ) | |
1050 | |
1051 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. | 1122 ;; For 64-bit modes we use ushl/r, as this does not require a SIMD zero. |
1052 (define_insn "vec_shr_<mode>" | 1123 (define_insn "vec_shr_<mode>" |
1053 [(set (match_operand:VD 0 "register_operand" "=w") | 1124 [(set (match_operand:VD 0 "register_operand" "=w") |
1054 (unspec:VD [(match_operand:VD 1 "register_operand" "w") | 1125 (unspec:VD [(match_operand:VD 1 "register_operand" "w") |
1055 (match_operand:SI 2 "immediate_operand" "i")] | 1126 (match_operand:SI 2 "immediate_operand" "i")] |
1062 return "ushr %d0, %d1, %2"; | 1133 return "ushr %d0, %d1, %2"; |
1063 } | 1134 } |
1064 [(set_attr "type" "neon_shift_imm")] | 1135 [(set_attr "type" "neon_shift_imm")] |
1065 ) | 1136 ) |
1066 | 1137 |
1067 (define_insn "aarch64_simd_vec_setv2di" | |
1068 [(set (match_operand:V2DI 0 "register_operand" "=w,w") | |
1069 (vec_merge:V2DI | |
1070 (vec_duplicate:V2DI | |
1071 (match_operand:DI 1 "register_operand" "r,w")) | |
1072 (match_operand:V2DI 3 "register_operand" "0,0") | |
1073 (match_operand:SI 2 "immediate_operand" "i,i")))] | |
1074 "TARGET_SIMD" | |
1075 { | |
1076 int elt = ENDIAN_LANE_N (V2DImode, exact_log2 (INTVAL (operands[2]))); | |
1077 operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt); | |
1078 switch (which_alternative) | |
1079 { | |
1080 case 0: | |
1081 return "ins\\t%0.d[%p2], %1"; | |
1082 case 1: | |
1083 return "ins\\t%0.d[%p2], %1.d[0]"; | |
1084 default: | |
1085 gcc_unreachable (); | |
1086 } | |
1087 } | |
1088 [(set_attr "type" "neon_from_gp, neon_ins_q")] | |
1089 ) | |
1090 | |
1091 (define_expand "vec_setv2di" | |
1092 [(match_operand:V2DI 0 "register_operand") | |
1093 (match_operand:DI 1 "register_operand") | |
1094 (match_operand:SI 2 "immediate_operand")] | |
1095 "TARGET_SIMD" | |
1096 { | |
1097 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); | |
1098 emit_insn (gen_aarch64_simd_vec_setv2di (operands[0], operands[1], | |
1099 GEN_INT (elem), operands[0])); | |
1100 DONE; | |
1101 } | |
1102 ) | |
1103 | |
1104 (define_insn "aarch64_simd_vec_set<mode>" | |
1105 [(set (match_operand:VDQF_F16 0 "register_operand" "=w") | |
1106 (vec_merge:VDQF_F16 | |
1107 (vec_duplicate:VDQF_F16 | |
1108 (match_operand:<VEL> 1 "register_operand" "w")) | |
1109 (match_operand:VDQF_F16 3 "register_operand" "0") | |
1110 (match_operand:SI 2 "immediate_operand" "i")))] | |
1111 "TARGET_SIMD" | |
1112 { | |
1113 int elt = ENDIAN_LANE_N (<MODE>mode, exact_log2 (INTVAL (operands[2]))); | |
1114 | |
1115 operands[2] = GEN_INT ((HOST_WIDE_INT)1 << elt); | |
1116 return "ins\t%0.<Vetype>[%p2], %1.<Vetype>[0]"; | |
1117 } | |
1118 [(set_attr "type" "neon_ins<q>")] | |
1119 ) | |
1120 | |
1121 (define_expand "vec_set<mode>" | 1138 (define_expand "vec_set<mode>" |
1122 [(match_operand:VDQF_F16 0 "register_operand" "+w") | 1139 [(match_operand:VALL_F16 0 "register_operand" "+w") |
1123 (match_operand:<VEL> 1 "register_operand" "w") | 1140 (match_operand:<VEL> 1 "register_operand" "w") |
1124 (match_operand:SI 2 "immediate_operand" "")] | 1141 (match_operand:SI 2 "immediate_operand" "")] |
1125 "TARGET_SIMD" | 1142 "TARGET_SIMD" |
1126 { | 1143 { |
1127 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); | 1144 HOST_WIDE_INT elem = (HOST_WIDE_INT) 1 << INTVAL (operands[2]); |
1153 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1170 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1154 (match_operand:VDQHS 3 "register_operand" "w")) | 1171 (match_operand:VDQHS 3 "register_operand" "w")) |
1155 (match_operand:VDQHS 4 "register_operand" "0")))] | 1172 (match_operand:VDQHS 4 "register_operand" "0")))] |
1156 "TARGET_SIMD" | 1173 "TARGET_SIMD" |
1157 { | 1174 { |
1158 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 1175 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
1159 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1176 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1160 } | 1177 } |
1161 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] | 1178 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
1162 ) | 1179 ) |
1163 | 1180 |
1171 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1188 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1172 (match_operand:VDQHS 3 "register_operand" "w")) | 1189 (match_operand:VDQHS 3 "register_operand" "w")) |
1173 (match_operand:VDQHS 4 "register_operand" "0")))] | 1190 (match_operand:VDQHS 4 "register_operand" "0")))] |
1174 "TARGET_SIMD" | 1191 "TARGET_SIMD" |
1175 { | 1192 { |
1176 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 1193 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); |
1177 INTVAL (operands[2]))); | |
1178 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1194 return "mla\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1179 } | 1195 } |
1180 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] | 1196 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
1181 ) | 1197 ) |
1182 | 1198 |
1212 (match_operand:VDQHS 1 "register_operand" "<h_con>") | 1228 (match_operand:VDQHS 1 "register_operand" "<h_con>") |
1213 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1229 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1214 (match_operand:VDQHS 3 "register_operand" "w"))))] | 1230 (match_operand:VDQHS 3 "register_operand" "w"))))] |
1215 "TARGET_SIMD" | 1231 "TARGET_SIMD" |
1216 { | 1232 { |
1217 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 1233 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
1218 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1234 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1219 } | 1235 } |
1220 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] | 1236 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
1221 ) | 1237 ) |
1222 | 1238 |
1230 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | 1246 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") |
1231 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1247 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1232 (match_operand:VDQHS 3 "register_operand" "w"))))] | 1248 (match_operand:VDQHS 3 "register_operand" "w"))))] |
1233 "TARGET_SIMD" | 1249 "TARGET_SIMD" |
1234 { | 1250 { |
1235 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 1251 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); |
1236 INTVAL (operands[2]))); | |
1237 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1252 return "mls\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1238 } | 1253 } |
1239 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] | 1254 [(set_attr "type" "neon_mla_<Vetype>_scalar<q>")] |
1240 ) | 1255 ) |
1241 | 1256 |
1335 "@ | 1350 "@ |
1336 dup\\t%d0, %1.d[0] | 1351 dup\\t%d0, %1.d[0] |
1337 fmov\\t%d0, %1 | 1352 fmov\\t%d0, %1 |
1338 dup\\t%d0, %1" | 1353 dup\\t%d0, %1" |
1339 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") | 1354 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") |
1340 (set_attr "simd" "yes,*,yes") | 1355 (set_attr "length" "4") |
1341 (set_attr "fp" "*,yes,*") | 1356 (set_attr "arch" "simd,fp,simd")] |
1342 (set_attr "length" "4")] | |
1343 ) | 1357 ) |
1344 | 1358 |
1345 (define_insn "move_lo_quad_internal_<mode>" | 1359 (define_insn "move_lo_quad_internal_<mode>" |
1346 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") | 1360 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") |
1347 (vec_concat:VQ_2E | 1361 (vec_concat:VQ_2E |
1351 "@ | 1365 "@ |
1352 dup\\t%d0, %1.d[0] | 1366 dup\\t%d0, %1.d[0] |
1353 fmov\\t%d0, %1 | 1367 fmov\\t%d0, %1 |
1354 dup\\t%d0, %1" | 1368 dup\\t%d0, %1" |
1355 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") | 1369 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") |
1356 (set_attr "simd" "yes,*,yes") | 1370 (set_attr "length" "4") |
1357 (set_attr "fp" "*,yes,*") | 1371 (set_attr "arch" "simd,fp,simd")] |
1358 (set_attr "length" "4")] | |
1359 ) | 1372 ) |
1360 | 1373 |
1361 (define_insn "move_lo_quad_internal_be_<mode>" | 1374 (define_insn "move_lo_quad_internal_be_<mode>" |
1362 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") | 1375 [(set (match_operand:VQ_NO2E 0 "register_operand" "=w,w,w") |
1363 (vec_concat:VQ_NO2E | 1376 (vec_concat:VQ_NO2E |
1367 "@ | 1380 "@ |
1368 dup\\t%d0, %1.d[0] | 1381 dup\\t%d0, %1.d[0] |
1369 fmov\\t%d0, %1 | 1382 fmov\\t%d0, %1 |
1370 dup\\t%d0, %1" | 1383 dup\\t%d0, %1" |
1371 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") | 1384 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") |
1372 (set_attr "simd" "yes,*,yes") | 1385 (set_attr "length" "4") |
1373 (set_attr "fp" "*,yes,*") | 1386 (set_attr "arch" "simd,fp,simd")] |
1374 (set_attr "length" "4")] | |
1375 ) | 1387 ) |
1376 | 1388 |
1377 (define_insn "move_lo_quad_internal_be_<mode>" | 1389 (define_insn "move_lo_quad_internal_be_<mode>" |
1378 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") | 1390 [(set (match_operand:VQ_2E 0 "register_operand" "=w,w,w") |
1379 (vec_concat:VQ_2E | 1391 (vec_concat:VQ_2E |
1383 "@ | 1395 "@ |
1384 dup\\t%d0, %1.d[0] | 1396 dup\\t%d0, %1.d[0] |
1385 fmov\\t%d0, %1 | 1397 fmov\\t%d0, %1 |
1386 dup\\t%d0, %1" | 1398 dup\\t%d0, %1" |
1387 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") | 1399 [(set_attr "type" "neon_dup<q>,f_mcr,neon_dup<q>") |
1388 (set_attr "simd" "yes,*,yes") | 1400 (set_attr "length" "4") |
1389 (set_attr "fp" "*,yes,*") | 1401 (set_attr "arch" "simd,fp,simd")] |
1390 (set_attr "length" "4")] | |
1391 ) | 1402 ) |
1392 | 1403 |
1393 (define_expand "move_lo_quad_<mode>" | 1404 (define_expand "move_lo_quad_<mode>" |
1394 [(match_operand:VQ 0 "register_operand") | 1405 [(match_operand:VQ 0 "register_operand") |
1395 (match_operand:VQ 1 "register_operand")] | 1406 (match_operand:VQ 1 "register_operand")] |
1439 (define_expand "move_hi_quad_<mode>" | 1450 (define_expand "move_hi_quad_<mode>" |
1440 [(match_operand:VQ 0 "register_operand" "") | 1451 [(match_operand:VQ 0 "register_operand" "") |
1441 (match_operand:<VHALF> 1 "register_operand" "")] | 1452 (match_operand:<VHALF> 1 "register_operand" "")] |
1442 "TARGET_SIMD" | 1453 "TARGET_SIMD" |
1443 { | 1454 { |
1444 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | 1455 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); |
1445 if (BYTES_BIG_ENDIAN) | 1456 if (BYTES_BIG_ENDIAN) |
1446 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], | 1457 emit_insn (gen_aarch64_simd_move_hi_quad_be_<mode> (operands[0], |
1447 operands[1], p)); | 1458 operands[1], p)); |
1448 else | 1459 else |
1449 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], | 1460 emit_insn (gen_aarch64_simd_move_hi_quad_<mode> (operands[0], |
1503 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | 1514 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> |
1504 (match_operand:VQW 1 "register_operand" "w") | 1515 (match_operand:VQW 1 "register_operand" "w") |
1505 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") | 1516 (match_operand:VQW 2 "vect_par_cnst_lo_half" "") |
1506 )))] | 1517 )))] |
1507 "TARGET_SIMD" | 1518 "TARGET_SIMD" |
1508 "<su>shll\t%0.<Vwtype>, %1.<Vhalftype>, 0" | 1519 "<su>xtl\t%0.<Vwtype>, %1.<Vhalftype>" |
1509 [(set_attr "type" "neon_shift_imm_long")] | 1520 [(set_attr "type" "neon_shift_imm_long")] |
1510 ) | 1521 ) |
1511 | 1522 |
1512 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" | 1523 (define_insn "aarch64_simd_vec_unpack<su>_hi_<mode>" |
1513 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | 1524 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
1514 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> | 1525 (ANY_EXTEND:<VWIDE> (vec_select:<VHALF> |
1515 (match_operand:VQW 1 "register_operand" "w") | 1526 (match_operand:VQW 1 "register_operand" "w") |
1516 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") | 1527 (match_operand:VQW 2 "vect_par_cnst_hi_half" "") |
1517 )))] | 1528 )))] |
1518 "TARGET_SIMD" | 1529 "TARGET_SIMD" |
1519 "<su>shll2\t%0.<Vwtype>, %1.<Vtype>, 0" | 1530 "<su>xtl2\t%0.<Vwtype>, %1.<Vtype>" |
1520 [(set_attr "type" "neon_shift_imm_long")] | 1531 [(set_attr "type" "neon_shift_imm_long")] |
1521 ) | 1532 ) |
1522 | 1533 |
1523 (define_expand "vec_unpack<su>_hi_<mode>" | 1534 (define_expand "vec_unpack<su>_hi_<mode>" |
1524 [(match_operand:<VWIDE> 0 "register_operand" "") | 1535 [(match_operand:<VWIDE> 0 "register_operand" "") |
1525 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] | 1536 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand"))] |
1526 "TARGET_SIMD" | 1537 "TARGET_SIMD" |
1527 { | 1538 { |
1528 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 1539 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
1529 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], | 1540 emit_insn (gen_aarch64_simd_vec_unpack<su>_hi_<mode> (operands[0], |
1530 operands[1], p)); | 1541 operands[1], p)); |
1531 DONE; | 1542 DONE; |
1532 } | 1543 } |
1533 ) | 1544 ) |
1535 (define_expand "vec_unpack<su>_lo_<mode>" | 1546 (define_expand "vec_unpack<su>_lo_<mode>" |
1536 [(match_operand:<VWIDE> 0 "register_operand" "") | 1547 [(match_operand:<VWIDE> 0 "register_operand" "") |
1537 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] | 1548 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" ""))] |
1538 "TARGET_SIMD" | 1549 "TARGET_SIMD" |
1539 { | 1550 { |
1540 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | 1551 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); |
1541 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], | 1552 emit_insn (gen_aarch64_simd_vec_unpack<su>_lo_<mode> (operands[0], |
1542 operands[1], p)); | 1553 operands[1], p)); |
1543 DONE; | 1554 DONE; |
1544 } | 1555 } |
1545 ) | 1556 ) |
1655 [(match_operand:<VWIDE> 0 "register_operand" "") | 1666 [(match_operand:<VWIDE> 0 "register_operand" "") |
1656 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) | 1667 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) |
1657 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] | 1668 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] |
1658 "TARGET_SIMD" | 1669 "TARGET_SIMD" |
1659 { | 1670 { |
1660 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | 1671 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); |
1661 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], | 1672 emit_insn (gen_aarch64_simd_vec_<su>mult_lo_<mode> (operands[0], |
1662 operands[1], | 1673 operands[1], |
1663 operands[2], p)); | 1674 operands[2], p)); |
1664 DONE; | 1675 DONE; |
1665 } | 1676 } |
1682 [(match_operand:<VWIDE> 0 "register_operand" "") | 1693 [(match_operand:<VWIDE> 0 "register_operand" "") |
1683 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) | 1694 (ANY_EXTEND:<VWIDE> (match_operand:VQW 1 "register_operand" "")) |
1684 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] | 1695 (ANY_EXTEND:<VWIDE> (match_operand:VQW 2 "register_operand" ""))] |
1685 "TARGET_SIMD" | 1696 "TARGET_SIMD" |
1686 { | 1697 { |
1687 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 1698 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
1688 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], | 1699 emit_insn (gen_aarch64_simd_vec_<su>mult_hi_<mode> (operands[0], |
1689 operands[1], | 1700 operands[1], |
1690 operands[2], p)); | 1701 operands[2], p)); |
1691 DONE; | 1702 DONE; |
1692 | 1703 |
1801 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1812 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1802 (match_operand:VDQF 3 "register_operand" "w") | 1813 (match_operand:VDQF 3 "register_operand" "w") |
1803 (match_operand:VDQF 4 "register_operand" "0")))] | 1814 (match_operand:VDQF 4 "register_operand" "0")))] |
1804 "TARGET_SIMD" | 1815 "TARGET_SIMD" |
1805 { | 1816 { |
1806 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 1817 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
1807 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1818 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1808 } | 1819 } |
1809 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] | 1820 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
1810 ) | 1821 ) |
1811 | 1822 |
1818 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1829 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1819 (match_operand:VDQSF 3 "register_operand" "w") | 1830 (match_operand:VDQSF 3 "register_operand" "w") |
1820 (match_operand:VDQSF 4 "register_operand" "0")))] | 1831 (match_operand:VDQSF 4 "register_operand" "0")))] |
1821 "TARGET_SIMD" | 1832 "TARGET_SIMD" |
1822 { | 1833 { |
1823 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 1834 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); |
1824 INTVAL (operands[2]))); | |
1825 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1835 return "fmla\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1826 } | 1836 } |
1827 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] | 1837 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
1828 ) | 1838 ) |
1829 | 1839 |
1847 (parallel [(match_operand:SI 2 "immediate_operand")])) | 1857 (parallel [(match_operand:SI 2 "immediate_operand")])) |
1848 (match_operand:DF 3 "register_operand" "w") | 1858 (match_operand:DF 3 "register_operand" "w") |
1849 (match_operand:DF 4 "register_operand" "0")))] | 1859 (match_operand:DF 4 "register_operand" "0")))] |
1850 "TARGET_SIMD" | 1860 "TARGET_SIMD" |
1851 { | 1861 { |
1852 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); | 1862 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); |
1853 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; | 1863 return "fmla\\t%0.2d, %3.2d, %1.2d[%2]"; |
1854 } | 1864 } |
1855 [(set_attr "type" "neon_fp_mla_d_scalar_q")] | 1865 [(set_attr "type" "neon_fp_mla_d_scalar_q")] |
1856 ) | 1866 ) |
1857 | 1867 |
1858 (define_insn "fnma<mode>4" | 1868 (define_insn "fnma<mode>4" |
1859 [(set (match_operand:VHSDF 0 "register_operand" "=w") | 1869 [(set (match_operand:VHSDF 0 "register_operand" "=w") |
1860 (fma:VHSDF | 1870 (fma:VHSDF |
1861 (match_operand:VHSDF 1 "register_operand" "w") | 1871 (neg:VHSDF (match_operand:VHSDF 1 "register_operand" "w")) |
1862 (neg:VHSDF | 1872 (match_operand:VHSDF 2 "register_operand" "w") |
1863 (match_operand:VHSDF 2 "register_operand" "w")) | |
1864 (match_operand:VHSDF 3 "register_operand" "0")))] | 1873 (match_operand:VHSDF 3 "register_operand" "0")))] |
1865 "TARGET_SIMD" | 1874 "TARGET_SIMD" |
1866 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | 1875 "fmls\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" |
1867 [(set_attr "type" "neon_fp_mla_<stype><q>")] | 1876 [(set_attr "type" "neon_fp_mla_<stype><q>")] |
1868 ) | 1877 ) |
1877 (match_operand:VDQF 1 "register_operand" "<h_con>") | 1886 (match_operand:VDQF 1 "register_operand" "<h_con>") |
1878 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1887 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1879 (match_operand:VDQF 4 "register_operand" "0")))] | 1888 (match_operand:VDQF 4 "register_operand" "0")))] |
1880 "TARGET_SIMD" | 1889 "TARGET_SIMD" |
1881 { | 1890 { |
1882 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 1891 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
1883 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1892 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1884 } | 1893 } |
1885 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] | 1894 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
1886 ) | 1895 ) |
1887 | 1896 |
1895 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") | 1904 (match_operand:<VSWAP_WIDTH> 1 "register_operand" "<h_con>") |
1896 (parallel [(match_operand:SI 2 "immediate_operand")]))) | 1905 (parallel [(match_operand:SI 2 "immediate_operand")]))) |
1897 (match_operand:VDQSF 4 "register_operand" "0")))] | 1906 (match_operand:VDQSF 4 "register_operand" "0")))] |
1898 "TARGET_SIMD" | 1907 "TARGET_SIMD" |
1899 { | 1908 { |
1900 operands[2] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 1909 operands[2] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[2])); |
1901 INTVAL (operands[2]))); | |
1902 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; | 1910 return "fmls\\t%0.<Vtype>, %3.<Vtype>, %1.<Vtype>[%2]"; |
1903 } | 1911 } |
1904 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] | 1912 [(set_attr "type" "neon_fp_mla_<Vetype>_scalar<q>")] |
1905 ) | 1913 ) |
1906 | 1914 |
1926 (neg:DF | 1934 (neg:DF |
1927 (match_operand:DF 3 "register_operand" "w")) | 1935 (match_operand:DF 3 "register_operand" "w")) |
1928 (match_operand:DF 4 "register_operand" "0")))] | 1936 (match_operand:DF 4 "register_operand" "0")))] |
1929 "TARGET_SIMD" | 1937 "TARGET_SIMD" |
1930 { | 1938 { |
1931 operands[2] = GEN_INT (ENDIAN_LANE_N (V2DFmode, INTVAL (operands[2]))); | 1939 operands[2] = aarch64_endian_lane_rtx (V2DFmode, INTVAL (operands[2])); |
1932 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; | 1940 return "fmls\\t%0.2d, %3.2d, %1.2d[%2]"; |
1933 } | 1941 } |
1934 [(set_attr "type" "neon_fp_mla_d_scalar_q")] | 1942 [(set_attr "type" "neon_fp_mla_d_scalar_q")] |
1935 ) | 1943 ) |
1936 | 1944 |
2088 (define_expand "vec_unpacks_lo_<mode>" | 2096 (define_expand "vec_unpacks_lo_<mode>" |
2089 [(match_operand:<VWIDE> 0 "register_operand" "") | 2097 [(match_operand:<VWIDE> 0 "register_operand" "") |
2090 (match_operand:VQ_HSF 1 "register_operand" "")] | 2098 (match_operand:VQ_HSF 1 "register_operand" "")] |
2091 "TARGET_SIMD" | 2099 "TARGET_SIMD" |
2092 { | 2100 { |
2093 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | 2101 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); |
2094 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], | 2102 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], |
2095 operands[1], p)); | 2103 operands[1], p)); |
2096 DONE; | 2104 DONE; |
2097 } | 2105 } |
2098 ) | 2106 ) |
2111 (define_expand "vec_unpacks_hi_<mode>" | 2119 (define_expand "vec_unpacks_hi_<mode>" |
2112 [(match_operand:<VWIDE> 0 "register_operand" "") | 2120 [(match_operand:<VWIDE> 0 "register_operand" "") |
2113 (match_operand:VQ_HSF 1 "register_operand" "")] | 2121 (match_operand:VQ_HSF 1 "register_operand" "")] |
2114 "TARGET_SIMD" | 2122 "TARGET_SIMD" |
2115 { | 2123 { |
2116 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 2124 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
2117 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], | 2125 emit_insn (gen_aarch64_simd_vec_unpacks_lo_<mode> (operands[0], |
2118 operands[1], p)); | 2126 operands[1], p)); |
2119 DONE; | 2127 DONE; |
2120 } | 2128 } |
2121 ) | 2129 ) |
2219 | 2227 |
2220 ;; FP Max/Min | 2228 ;; FP Max/Min |
2221 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An | 2229 ;; Max/Min are introduced by idiom recognition by GCC's mid-end. An |
2222 ;; expression like: | 2230 ;; expression like: |
2223 ;; a = (b < c) ? b : c; | 2231 ;; a = (b < c) ? b : c; |
2224 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only is enabled | 2232 ;; is idiom-matched as MIN_EXPR<b,c> only if -ffinite-math-only and |
2225 ;; either explicitly or indirectly via -ffast-math. | 2233 ;; -fno-signed-zeros are enabled either explicitly or indirectly via |
2234 ;; -ffast-math. | |
2226 ;; | 2235 ;; |
2227 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. | 2236 ;; MIN_EXPR and MAX_EXPR eventually map to 'smin' and 'smax' in RTL. |
2228 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which | 2237 ;; The 'smax' and 'smin' RTL standard pattern names do not specify which |
2229 ;; operand will be returned when both operands are zero (i.e. they may not | 2238 ;; operand will be returned when both operands are zero (i.e. they may not |
2230 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC | 2239 ;; honour signed zeroes), or when either operand is NaN. Therefore GCC |
2259 [(match_operand:<VEL> 0 "register_operand" "=w") | 2268 [(match_operand:<VEL> 0 "register_operand" "=w") |
2260 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] | 2269 (unspec:VDQ_I [(match_operand:VDQ_I 1 "register_operand" "w")] |
2261 UNSPEC_ADDV)] | 2270 UNSPEC_ADDV)] |
2262 "TARGET_SIMD" | 2271 "TARGET_SIMD" |
2263 { | 2272 { |
2264 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); | 2273 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); |
2265 rtx scratch = gen_reg_rtx (<MODE>mode); | 2274 rtx scratch = gen_reg_rtx (<MODE>mode); |
2266 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); | 2275 emit_insn (gen_aarch64_reduc_plus_internal<mode> (scratch, operands[1])); |
2267 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); | 2276 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); |
2268 DONE; | 2277 DONE; |
2269 } | 2278 } |
2310 [(set (match_operand:SF 0 "register_operand") | 2319 [(set (match_operand:SF 0 "register_operand") |
2311 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] | 2320 (unspec:V4SF [(match_operand:V4SF 1 "register_operand")] |
2312 UNSPEC_FADDV))] | 2321 UNSPEC_FADDV))] |
2313 "TARGET_SIMD" | 2322 "TARGET_SIMD" |
2314 { | 2323 { |
2315 rtx elt = GEN_INT (ENDIAN_LANE_N (V4SFmode, 0)); | 2324 rtx elt = aarch64_endian_lane_rtx (V4SFmode, 0); |
2316 rtx scratch = gen_reg_rtx (V4SFmode); | 2325 rtx scratch = gen_reg_rtx (V4SFmode); |
2317 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); | 2326 emit_insn (gen_aarch64_faddpv4sf (scratch, operands[1], operands[1])); |
2318 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); | 2327 emit_insn (gen_aarch64_faddpv4sf (scratch, scratch, scratch)); |
2319 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); | 2328 emit_insn (gen_aarch64_get_lanev4sf (operands[0], scratch, elt)); |
2320 DONE; | 2329 DONE; |
2345 ) | 2354 ) |
2346 | 2355 |
2347 ;; 'across lanes' max and min ops. | 2356 ;; 'across lanes' max and min ops. |
2348 | 2357 |
2349 ;; Template for outputting a scalar, so we can create __builtins which can be | 2358 ;; Template for outputting a scalar, so we can create __builtins which can be |
2350 ;; gimple_fold'd to the REDUC_(MAX|MIN)_EXPR tree code. (This is FP smax/smin). | 2359 ;; gimple_fold'd to the IFN_REDUC_(MAX|MIN) function. (This is FP smax/smin). |
2351 (define_expand "reduc_<maxmin_uns>_scal_<mode>" | 2360 (define_expand "reduc_<maxmin_uns>_scal_<mode>" |
2352 [(match_operand:<VEL> 0 "register_operand") | 2361 [(match_operand:<VEL> 0 "register_operand") |
2353 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] | 2362 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand")] |
2354 FMAXMINV)] | 2363 FMAXMINV)] |
2355 "TARGET_SIMD" | 2364 "TARGET_SIMD" |
2356 { | 2365 { |
2357 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); | 2366 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); |
2358 rtx scratch = gen_reg_rtx (<MODE>mode); | 2367 rtx scratch = gen_reg_rtx (<MODE>mode); |
2359 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, | 2368 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, |
2360 operands[1])); | 2369 operands[1])); |
2361 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); | 2370 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); |
2362 DONE; | 2371 DONE; |
2368 [(match_operand:<VEL> 0 "register_operand") | 2377 [(match_operand:<VEL> 0 "register_operand") |
2369 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] | 2378 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand")] |
2370 MAXMINV)] | 2379 MAXMINV)] |
2371 "TARGET_SIMD" | 2380 "TARGET_SIMD" |
2372 { | 2381 { |
2373 rtx elt = GEN_INT (ENDIAN_LANE_N (<MODE>mode, 0)); | 2382 rtx elt = aarch64_endian_lane_rtx (<MODE>mode, 0); |
2374 rtx scratch = gen_reg_rtx (<MODE>mode); | 2383 rtx scratch = gen_reg_rtx (<MODE>mode); |
2375 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, | 2384 emit_insn (gen_aarch64_reduc_<maxmin_uns>_internal<mode> (scratch, |
2376 operands[1])); | 2385 operands[1])); |
2377 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); | 2386 emit_insn (gen_aarch64_get_lane<mode> (operands[0], scratch, elt)); |
2378 DONE; | 2387 DONE; |
2425 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. | 2434 ;; This pattern is expanded to by the aarch64_simd_bsl<mode> expander. |
2426 ;; Some forms of straight-line code may generate the equivalent form | 2435 ;; Some forms of straight-line code may generate the equivalent form |
2427 ;; in *aarch64_simd_bsl<mode>_alt. | 2436 ;; in *aarch64_simd_bsl<mode>_alt. |
2428 | 2437 |
2429 (define_insn "aarch64_simd_bsl<mode>_internal" | 2438 (define_insn "aarch64_simd_bsl<mode>_internal" |
2430 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") | 2439 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") |
2431 (xor:VSDQ_I_DI | 2440 (xor:VDQ_I |
2432 (and:VSDQ_I_DI | 2441 (and:VDQ_I |
2433 (xor:VSDQ_I_DI | 2442 (xor:VDQ_I |
2434 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w") | 2443 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w,0,w") |
2435 (match_operand:VSDQ_I_DI 2 "register_operand" "w,w,0")) | 2444 (match_operand:VDQ_I 2 "register_operand" "w,w,0")) |
2436 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) | 2445 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) |
2437 (match_dup:<V_INT_EQUIV> 3) | 2446 (match_dup:<V_INT_EQUIV> 3) |
2438 ))] | 2447 ))] |
2439 "TARGET_SIMD" | 2448 "TARGET_SIMD" |
2440 "@ | 2449 "@ |
2441 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> | 2450 bsl\\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype> |
2449 ;; the outer XOR matches the second operand of the inner XOR rather than | 2458 ;; the outer XOR matches the second operand of the inner XOR rather than |
2450 ;; the first. The two are equivalent but since recog doesn't try all | 2459 ;; the first. The two are equivalent but since recog doesn't try all |
2451 ;; permutations of commutative operations, we have to have a separate pattern. | 2460 ;; permutations of commutative operations, we have to have a separate pattern. |
2452 | 2461 |
2453 (define_insn "*aarch64_simd_bsl<mode>_alt" | 2462 (define_insn "*aarch64_simd_bsl<mode>_alt" |
2454 [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w,w,w") | 2463 [(set (match_operand:VDQ_I 0 "register_operand" "=w,w,w") |
2455 (xor:VSDQ_I_DI | 2464 (xor:VDQ_I |
2456 (and:VSDQ_I_DI | 2465 (and:VDQ_I |
2457 (xor:VSDQ_I_DI | 2466 (xor:VDQ_I |
2458 (match_operand:VSDQ_I_DI 3 "register_operand" "w,w,0") | 2467 (match_operand:VDQ_I 3 "register_operand" "w,w,0") |
2459 (match_operand:VSDQ_I_DI 2 "register_operand" "w,0,w")) | 2468 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w,0,w")) |
2460 (match_operand:VSDQ_I_DI 1 "register_operand" "0,w,w")) | 2469 (match_operand:VDQ_I 1 "register_operand" "0,w,w")) |
2461 (match_dup:VSDQ_I_DI 2)))] | 2470 (match_dup:<V_INT_EQUIV> 2)))] |
2462 "TARGET_SIMD" | 2471 "TARGET_SIMD" |
2463 "@ | 2472 "@ |
2464 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> | 2473 bsl\\t%0.<Vbtype>, %3.<Vbtype>, %2.<Vbtype> |
2465 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> | 2474 bit\\t%0.<Vbtype>, %3.<Vbtype>, %1.<Vbtype> |
2466 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" | 2475 bif\\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>" |
2467 [(set_attr "type" "neon_bsl<q>")] | 2476 [(set_attr "type" "neon_bsl<q>")] |
2477 ) | |
2478 | |
2479 ;; DImode is special, we want to avoid computing operations which are | |
2480 ;; more naturally computed in general purpose registers in the vector | |
2481 ;; registers. If we do that, we need to move all three operands from general | |
2482 ;; purpose registers to vector registers, then back again. However, we | |
2483 ;; don't want to make this pattern an UNSPEC as we'd lose scope for | |
2484 ;; optimizations based on the component operations of a BSL. | |
2485 ;; | |
2486 ;; That means we need a splitter back to the individual operations, if they | |
2487 ;; would be better calculated on the integer side. | |
2488 | |
2489 (define_insn_and_split "aarch64_simd_bsldi_internal" | |
2490 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") | |
2491 (xor:DI | |
2492 (and:DI | |
2493 (xor:DI | |
2494 (match_operand:DI 3 "register_operand" "w,0,w,r") | |
2495 (match_operand:DI 2 "register_operand" "w,w,0,r")) | |
2496 (match_operand:DI 1 "register_operand" "0,w,w,r")) | |
2497 (match_dup:DI 3) | |
2498 ))] | |
2499 "TARGET_SIMD" | |
2500 "@ | |
2501 bsl\\t%0.8b, %2.8b, %3.8b | |
2502 bit\\t%0.8b, %2.8b, %1.8b | |
2503 bif\\t%0.8b, %3.8b, %1.8b | |
2504 #" | |
2505 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" | |
2506 [(match_dup 1) (match_dup 1) (match_dup 2) (match_dup 3)] | |
2507 { | |
2508 /* Split back to individual operations. If we're before reload, and | |
2509 able to create a temporary register, do so. If we're after reload, | |
2510 we've got an early-clobber destination register, so use that. | |
2511 Otherwise, we can't create pseudos and we can't yet guarantee that | |
2512 operands[0] is safe to write, so FAIL to split. */ | |
2513 | |
2514 rtx scratch; | |
2515 if (reload_completed) | |
2516 scratch = operands[0]; | |
2517 else if (can_create_pseudo_p ()) | |
2518 scratch = gen_reg_rtx (DImode); | |
2519 else | |
2520 FAIL; | |
2521 | |
2522 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); | |
2523 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); | |
2524 emit_insn (gen_xordi3 (operands[0], scratch, operands[3])); | |
2525 DONE; | |
2526 } | |
2527 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") | |
2528 (set_attr "length" "4,4,4,12")] | |
2529 ) | |
2530 | |
2531 (define_insn_and_split "aarch64_simd_bsldi_alt" | |
2532 [(set (match_operand:DI 0 "register_operand" "=w,w,w,&r") | |
2533 (xor:DI | |
2534 (and:DI | |
2535 (xor:DI | |
2536 (match_operand:DI 3 "register_operand" "w,w,0,r") | |
2537 (match_operand:DI 2 "register_operand" "w,0,w,r")) | |
2538 (match_operand:DI 1 "register_operand" "0,w,w,r")) | |
2539 (match_dup:DI 2) | |
2540 ))] | |
2541 "TARGET_SIMD" | |
2542 "@ | |
2543 bsl\\t%0.8b, %3.8b, %2.8b | |
2544 bit\\t%0.8b, %3.8b, %1.8b | |
2545 bif\\t%0.8b, %2.8b, %1.8b | |
2546 #" | |
2547 "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))" | |
2548 [(match_dup 0) (match_dup 1) (match_dup 2) (match_dup 3)] | |
2549 { | |
2550 /* Split back to individual operations. If we're before reload, and | |
2551 able to create a temporary register, do so. If we're after reload, | |
2552 we've got an early-clobber destination register, so use that. | |
2553 Otherwise, we can't create pseudos and we can't yet guarantee that | |
2554 operands[0] is safe to write, so FAIL to split. */ | |
2555 | |
2556 rtx scratch; | |
2557 if (reload_completed) | |
2558 scratch = operands[0]; | |
2559 else if (can_create_pseudo_p ()) | |
2560 scratch = gen_reg_rtx (DImode); | |
2561 else | |
2562 FAIL; | |
2563 | |
2564 emit_insn (gen_xordi3 (scratch, operands[2], operands[3])); | |
2565 emit_insn (gen_anddi3 (scratch, scratch, operands[1])); | |
2566 emit_insn (gen_xordi3 (operands[0], scratch, operands[2])); | |
2567 DONE; | |
2568 } | |
2569 [(set_attr "type" "neon_bsl,neon_bsl,neon_bsl,multiple") | |
2570 (set_attr "length" "4,4,4,12")] | |
2468 ) | 2571 ) |
2469 | 2572 |
2470 (define_expand "aarch64_simd_bsl<mode>" | 2573 (define_expand "aarch64_simd_bsl<mode>" |
2471 [(match_operand:VALLDIF 0 "register_operand") | 2574 [(match_operand:VALLDIF 0 "register_operand") |
2472 (match_operand:<V_INT_EQUIV> 1 "register_operand") | 2575 (match_operand:<V_INT_EQUIV> 1 "register_operand") |
2644 { | 2747 { |
2645 comparison = gen_aarch64_cmlt<mode>; | 2748 comparison = gen_aarch64_cmlt<mode>; |
2646 break; | 2749 break; |
2647 } | 2750 } |
2648 /* Fall through. */ | 2751 /* Fall through. */ |
2649 case UNGE: | 2752 case UNLT: |
2650 std::swap (operands[2], operands[3]); | 2753 std::swap (operands[2], operands[3]); |
2651 /* Fall through. */ | 2754 /* Fall through. */ |
2652 case UNLE: | 2755 case UNGT: |
2653 case GT: | 2756 case GT: |
2654 comparison = gen_aarch64_cmgt<mode>; | 2757 comparison = gen_aarch64_cmgt<mode>; |
2655 break; | 2758 break; |
2656 case LE: | 2759 case LE: |
2657 if (use_zero_form) | 2760 if (use_zero_form) |
2658 { | 2761 { |
2659 comparison = gen_aarch64_cmle<mode>; | 2762 comparison = gen_aarch64_cmle<mode>; |
2660 break; | 2763 break; |
2661 } | 2764 } |
2662 /* Fall through. */ | 2765 /* Fall through. */ |
2663 case UNGT: | 2766 case UNLE: |
2664 std::swap (operands[2], operands[3]); | 2767 std::swap (operands[2], operands[3]); |
2665 /* Fall through. */ | 2768 /* Fall through. */ |
2666 case UNLT: | 2769 case UNGE: |
2667 case GE: | 2770 case GE: |
2668 comparison = gen_aarch64_cmge<mode>; | 2771 comparison = gen_aarch64_cmge<mode>; |
2669 break; | 2772 break; |
2670 case NE: | 2773 case NE: |
2671 case EQ: | 2774 case EQ: |
2672 comparison = gen_aarch64_cmeq<mode>; | 2775 comparison = gen_aarch64_cmeq<mode>; |
2673 break; | 2776 break; |
2674 case UNEQ: | 2777 case UNEQ: |
2675 case ORDERED: | 2778 case ORDERED: |
2676 case UNORDERED: | 2779 case UNORDERED: |
2780 case LTGT: | |
2677 break; | 2781 break; |
2678 default: | 2782 default: |
2679 gcc_unreachable (); | 2783 gcc_unreachable (); |
2680 } | 2784 } |
2681 | 2785 |
2683 { | 2787 { |
2684 case UNGE: | 2788 case UNGE: |
2685 case UNGT: | 2789 case UNGT: |
2686 case UNLE: | 2790 case UNLE: |
2687 case UNLT: | 2791 case UNLT: |
2688 case NE: | 2792 { |
2689 /* FCM returns false for lanes which are unordered, so if we use | 2793 /* All of the above must not raise any FP exceptions. Thus we first |
2690 the inverse of the comparison we actually want to emit, then | 2794 check each operand for NaNs and force any elements containing NaN to |
2691 invert the result, we will end up with the correct result. | 2795 zero before using them in the compare. |
2692 Note that a NE NaN and NaN NE b are true for all a, b. | 2796 Example: UN<cc> (a, b) -> UNORDERED (a, b) | |
2693 | 2797 (cm<cc> (isnan (a) ? 0.0 : a, |
2694 Our transformations are: | 2798 isnan (b) ? 0.0 : b)) |
2695 a UNGE b -> !(b GT a) | 2799 We use the following transformations for doing the comparisions: |
2696 a UNGT b -> !(b GE a) | 2800 a UNGE b -> a GE b |
2697 a UNLE b -> !(a GT b) | 2801 a UNGT b -> a GT b |
2698 a UNLT b -> !(a GE b) | 2802 a UNLE b -> b GE a |
2699 a NE b -> !(a EQ b) */ | 2803 a UNLT b -> b GT a. */ |
2700 gcc_assert (comparison != NULL); | 2804 |
2701 emit_insn (comparison (operands[0], operands[2], operands[3])); | 2805 rtx tmp0 = gen_reg_rtx (<V_INT_EQUIV>mode); |
2702 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); | 2806 rtx tmp1 = gen_reg_rtx (<V_INT_EQUIV>mode); |
2807 rtx tmp2 = gen_reg_rtx (<V_INT_EQUIV>mode); | |
2808 emit_insn (gen_aarch64_cmeq<mode> (tmp0, operands[2], operands[2])); | |
2809 emit_insn (gen_aarch64_cmeq<mode> (tmp1, operands[3], operands[3])); | |
2810 emit_insn (gen_and<v_int_equiv>3 (tmp2, tmp0, tmp1)); | |
2811 emit_insn (gen_and<v_int_equiv>3 (tmp0, tmp0, | |
2812 lowpart_subreg (<V_INT_EQUIV>mode, | |
2813 operands[2], | |
2814 <MODE>mode))); | |
2815 emit_insn (gen_and<v_int_equiv>3 (tmp1, tmp1, | |
2816 lowpart_subreg (<V_INT_EQUIV>mode, | |
2817 operands[3], | |
2818 <MODE>mode))); | |
2819 gcc_assert (comparison != NULL); | |
2820 emit_insn (comparison (operands[0], | |
2821 lowpart_subreg (<MODE>mode, | |
2822 tmp0, <V_INT_EQUIV>mode), | |
2823 lowpart_subreg (<MODE>mode, | |
2824 tmp1, <V_INT_EQUIV>mode))); | |
2825 emit_insn (gen_orn<v_int_equiv>3 (operands[0], tmp2, operands[0])); | |
2826 } | |
2703 break; | 2827 break; |
2704 | 2828 |
2705 case LT: | 2829 case LT: |
2706 case LE: | 2830 case LE: |
2707 case GT: | 2831 case GT: |
2708 case GE: | 2832 case GE: |
2709 case EQ: | 2833 case EQ: |
2834 case NE: | |
2710 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. | 2835 /* The easy case. Here we emit one of FCMGE, FCMGT or FCMEQ. |
2711 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: | 2836 As a LT b <=> b GE a && a LE b <=> b GT a. Our transformations are: |
2712 a GE b -> a GE b | 2837 a GE b -> a GE b |
2713 a GT b -> a GT b | 2838 a GT b -> a GT b |
2714 a LE b -> b GE a | 2839 a LE b -> b GE a |
2715 a LT b -> b GT a | 2840 a LT b -> b GT a |
2716 a EQ b -> a EQ b */ | 2841 a EQ b -> a EQ b |
2842 a NE b -> ~(a EQ b) */ | |
2717 gcc_assert (comparison != NULL); | 2843 gcc_assert (comparison != NULL); |
2718 emit_insn (comparison (operands[0], operands[2], operands[3])); | 2844 emit_insn (comparison (operands[0], operands[2], operands[3])); |
2845 if (code == NE) | |
2846 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); | |
2719 break; | 2847 break; |
2720 | 2848 |
2721 case UNEQ: | 2849 case LTGT: |
2722 /* We first check (a > b || b > a) which is !UNEQ, inverting | 2850 /* LTGT is not guranteed to not generate a FP exception. So let's |
2723 this result will then give us (a == b || a UNORDERED b). */ | 2851 go the faster way : ((a > b) || (b > a)). */ |
2724 emit_insn (gen_aarch64_cmgt<mode> (operands[0], | 2852 emit_insn (gen_aarch64_cmgt<mode> (operands[0], |
2725 operands[2], operands[3])); | 2853 operands[2], operands[3])); |
2726 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); | 2854 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[3], operands[2])); |
2727 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); | 2855 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); |
2728 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); | |
2729 break; | 2856 break; |
2730 | 2857 |
2858 case ORDERED: | |
2731 case UNORDERED: | 2859 case UNORDERED: |
2732 /* Operands are ORDERED iff (a > b || b >= a), so we can compute | 2860 case UNEQ: |
2733 UNORDERED as !ORDERED. */ | 2861 /* cmeq (a, a) & cmeq (b, b). */ |
2734 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3])); | 2862 emit_insn (gen_aarch64_cmeq<mode> (operands[0], |
2735 emit_insn (gen_aarch64_cmge<mode> (operands[0], | 2863 operands[2], operands[2])); |
2736 operands[3], operands[2])); | 2864 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[3], operands[3])); |
2737 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); | 2865 emit_insn (gen_and<v_int_equiv>3 (operands[0], operands[0], tmp)); |
2738 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); | 2866 |
2739 break; | 2867 if (code == UNORDERED) |
2740 | 2868 emit_insn (gen_one_cmpl<v_int_equiv>2 (operands[0], operands[0])); |
2741 case ORDERED: | 2869 else if (code == UNEQ) |
2742 emit_insn (gen_aarch64_cmgt<mode> (tmp, operands[2], operands[3])); | 2870 { |
2743 emit_insn (gen_aarch64_cmge<mode> (operands[0], | 2871 emit_insn (gen_aarch64_cmeq<mode> (tmp, operands[2], operands[3])); |
2744 operands[3], operands[2])); | 2872 emit_insn (gen_orn<v_int_equiv>3 (operands[0], operands[0], tmp)); |
2745 emit_insn (gen_ior<v_int_equiv>3 (operands[0], operands[0], tmp)); | 2873 } |
2746 break; | 2874 break; |
2747 | 2875 |
2748 default: | 2876 default: |
2749 gcc_unreachable (); | 2877 gcc_unreachable (); |
2750 } | 2878 } |
2893 (vec_select:<VEL> | 3021 (vec_select:<VEL> |
2894 (match_operand:VDQQH 1 "register_operand" "w") | 3022 (match_operand:VDQQH 1 "register_operand" "w") |
2895 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | 3023 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
2896 "TARGET_SIMD" | 3024 "TARGET_SIMD" |
2897 { | 3025 { |
2898 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 3026 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
2899 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; | 3027 return "smov\\t%<GPI:w>0, %1.<VDQQH:Vetype>[%2]"; |
2900 } | 3028 } |
2901 [(set_attr "type" "neon_to_gp<q>")] | 3029 [(set_attr "type" "neon_to_gp<q>")] |
2902 ) | 3030 ) |
2903 | 3031 |
2904 (define_insn "*aarch64_get_lane_zero_extendsi<mode>" | 3032 (define_insn "*aarch64_get_lane_zero_extend<GPI:mode><VDQQH:mode>" |
2905 [(set (match_operand:SI 0 "register_operand" "=r") | 3033 [(set (match_operand:GPI 0 "register_operand" "=r") |
2906 (zero_extend:SI | 3034 (zero_extend:GPI |
2907 (vec_select:<VEL> | 3035 (vec_select:<VEL> |
2908 (match_operand:VDQQH 1 "register_operand" "w") | 3036 (match_operand:VDQQH 1 "register_operand" "w") |
2909 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] | 3037 (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] |
2910 "TARGET_SIMD" | 3038 "TARGET_SIMD" |
2911 { | 3039 { |
2912 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 3040 operands[2] = aarch64_endian_lane_rtx (<VDQQH:MODE>mode, |
3041 INTVAL (operands[2])); | |
2913 return "umov\\t%w0, %1.<Vetype>[%2]"; | 3042 return "umov\\t%w0, %1.<Vetype>[%2]"; |
2914 } | 3043 } |
2915 [(set_attr "type" "neon_to_gp<q>")] | 3044 [(set_attr "type" "neon_to_gp<q>")] |
2916 ) | 3045 ) |
2917 | 3046 |
2918 ;; Lane extraction of a value, neither sign nor zero extension | 3047 ;; Lane extraction of a value, neither sign nor zero extension |
2919 ;; is guaranteed so upper bits should be considered undefined. | 3048 ;; is guaranteed so upper bits should be considered undefined. |
2920 ;; RTL uses GCC vector extension indices throughout so flip only for assembly. | 3049 ;; RTL uses GCC vector extension indices throughout so flip only for assembly. |
2921 (define_insn "aarch64_get_lane<mode>" | 3050 (define_insn "aarch64_get_lane<mode>" |
2922 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv") | 3051 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv") |
2923 (vec_select:<VEL> | 3052 (vec_select:<VEL> |
2924 (match_operand:VALL_F16 1 "register_operand" "w, w, w") | 3053 (match_operand:VALL_F16 1 "register_operand" "w, w, w") |
2925 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] | 3054 (parallel [(match_operand:SI 2 "immediate_operand" "i, i, i")])))] |
2926 "TARGET_SIMD" | 3055 "TARGET_SIMD" |
2927 { | 3056 { |
2928 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 3057 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
2929 switch (which_alternative) | 3058 switch (which_alternative) |
2930 { | 3059 { |
2931 case 0: | 3060 case 0: |
2932 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; | 3061 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]"; |
2933 case 1: | 3062 case 1: |
2939 } | 3068 } |
2940 } | 3069 } |
2941 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] | 3070 [(set_attr "type" "neon_to_gp<q>, neon_dup<q>, neon_store1_one_lane<q>")] |
2942 ) | 3071 ) |
2943 | 3072 |
3073 (define_insn "load_pair_lanes<mode>" | |
3074 [(set (match_operand:<VDBL> 0 "register_operand" "=w") | |
3075 (vec_concat:<VDBL> | |
3076 (match_operand:VDC 1 "memory_operand" "Utq") | |
3077 (match_operand:VDC 2 "memory_operand" "m")))] | |
3078 "TARGET_SIMD && !STRICT_ALIGNMENT | |
3079 && rtx_equal_p (XEXP (operands[2], 0), | |
3080 plus_constant (Pmode, | |
3081 XEXP (operands[1], 0), | |
3082 GET_MODE_SIZE (<MODE>mode)))" | |
3083 "ldr\\t%q0, %1" | |
3084 [(set_attr "type" "neon_load1_1reg_q")] | |
3085 ) | |
3086 | |
3087 (define_insn "store_pair_lanes<mode>" | |
3088 [(set (match_operand:<VDBL> 0 "aarch64_mem_pair_lanes_operand" "=Umn, Umn") | |
3089 (vec_concat:<VDBL> | |
3090 (match_operand:VDC 1 "register_operand" "w, r") | |
3091 (match_operand:VDC 2 "register_operand" "w, r")))] | |
3092 "TARGET_SIMD" | |
3093 "@ | |
3094 stp\\t%d1, %d2, %y0 | |
3095 stp\\t%x1, %x2, %y0" | |
3096 [(set_attr "type" "neon_stp, store_16")] | |
3097 ) | |
3098 | |
2944 ;; In this insn, operand 1 should be low, and operand 2 the high part of the | 3099 ;; In this insn, operand 1 should be low, and operand 2 the high part of the |
2945 ;; dest vector. | 3100 ;; dest vector. |
2946 | 3101 |
2947 (define_insn "*aarch64_combinez<mode>" | 3102 (define_insn "*aarch64_combinez<mode>" |
2948 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") | 3103 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") |
2949 (vec_concat:<VDBL> | 3104 (vec_concat:<VDBL> |
2950 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m") | 3105 (match_operand:VDC 1 "general_operand" "w,?r,m") |
2951 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz")))] | 3106 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero")))] |
2952 "TARGET_SIMD && !BYTES_BIG_ENDIAN" | 3107 "TARGET_SIMD && !BYTES_BIG_ENDIAN" |
2953 "@ | 3108 "@ |
2954 mov\\t%0.8b, %1.8b | 3109 mov\\t%0.8b, %1.8b |
2955 fmov\t%d0, %1 | 3110 fmov\t%d0, %1 |
2956 ldr\\t%d0, %1" | 3111 ldr\\t%d0, %1" |
2957 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") | 3112 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") |
2958 (set_attr "simd" "yes,*,yes") | 3113 (set_attr "arch" "simd,fp,simd")] |
2959 (set_attr "fp" "*,yes,*")] | |
2960 ) | 3114 ) |
2961 | 3115 |
2962 (define_insn "*aarch64_combinez_be<mode>" | 3116 (define_insn "*aarch64_combinez_be<mode>" |
2963 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") | 3117 [(set (match_operand:<VDBL> 0 "register_operand" "=w,w,w") |
2964 (vec_concat:<VDBL> | 3118 (vec_concat:<VDBL> |
2965 (match_operand:VD_BHSI 2 "aarch64_simd_imm_zero" "Dz,Dz,Dz") | 3119 (match_operand:VDC 2 "aarch64_simd_or_scalar_imm_zero") |
2966 (match_operand:VD_BHSI 1 "general_operand" "w,?r,m")))] | 3120 (match_operand:VDC 1 "general_operand" "w,?r,m")))] |
2967 "TARGET_SIMD && BYTES_BIG_ENDIAN" | 3121 "TARGET_SIMD && BYTES_BIG_ENDIAN" |
2968 "@ | 3122 "@ |
2969 mov\\t%0.8b, %1.8b | 3123 mov\\t%0.8b, %1.8b |
2970 fmov\t%d0, %1 | 3124 fmov\t%d0, %1 |
2971 ldr\\t%d0, %1" | 3125 ldr\\t%d0, %1" |
2972 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") | 3126 [(set_attr "type" "neon_move<q>, neon_from_gp, neon_load1_1reg") |
2973 (set_attr "simd" "yes,*,yes") | 3127 (set_attr "arch" "simd,fp,simd")] |
2974 (set_attr "fp" "*,yes,*")] | |
2975 ) | 3128 ) |
2976 | 3129 |
2977 (define_expand "aarch64_combine<mode>" | 3130 (define_expand "aarch64_combine<mode>" |
2978 [(match_operand:<VDBL> 0 "register_operand") | 3131 [(match_operand:<VDBL> 0 "register_operand") |
2979 (match_operand:VDC 1 "register_operand") | 3132 (match_operand:VDC 1 "register_operand") |
2984 | 3137 |
2985 DONE; | 3138 DONE; |
2986 } | 3139 } |
2987 ) | 3140 ) |
2988 | 3141 |
2989 (define_expand "aarch64_simd_combine<mode>" | 3142 (define_expand "@aarch64_simd_combine<mode>" |
2990 [(match_operand:<VDBL> 0 "register_operand") | 3143 [(match_operand:<VDBL> 0 "register_operand") |
2991 (match_operand:VDC 1 "register_operand") | 3144 (match_operand:VDC 1 "register_operand") |
2992 (match_operand:VDC 2 "register_operand")] | 3145 (match_operand:VDC 2 "register_operand")] |
2993 "TARGET_SIMD" | 3146 "TARGET_SIMD" |
2994 { | 3147 { |
3032 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3185 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3033 (match_operand:VQW 1 "register_operand" "w") | 3186 (match_operand:VQW 1 "register_operand" "w") |
3034 (match_operand:VQW 2 "register_operand" "w")] | 3187 (match_operand:VQW 2 "register_operand" "w")] |
3035 "TARGET_SIMD" | 3188 "TARGET_SIMD" |
3036 { | 3189 { |
3037 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3190 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3038 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], | 3191 emit_insn (gen_aarch64_saddl<mode>_hi_internal (operands[0], operands[1], |
3039 operands[2], p)); | 3192 operands[2], p)); |
3040 DONE; | 3193 DONE; |
3041 }) | 3194 }) |
3042 | 3195 |
3044 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3197 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3045 (match_operand:VQW 1 "register_operand" "w") | 3198 (match_operand:VQW 1 "register_operand" "w") |
3046 (match_operand:VQW 2 "register_operand" "w")] | 3199 (match_operand:VQW 2 "register_operand" "w")] |
3047 "TARGET_SIMD" | 3200 "TARGET_SIMD" |
3048 { | 3201 { |
3049 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3202 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3050 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], | 3203 emit_insn (gen_aarch64_uaddl<mode>_hi_internal (operands[0], operands[1], |
3051 operands[2], p)); | 3204 operands[2], p)); |
3052 DONE; | 3205 DONE; |
3053 }) | 3206 }) |
3054 | 3207 |
3056 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3209 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3057 (match_operand:VQW 1 "register_operand" "w") | 3210 (match_operand:VQW 1 "register_operand" "w") |
3058 (match_operand:VQW 2 "register_operand" "w")] | 3211 (match_operand:VQW 2 "register_operand" "w")] |
3059 "TARGET_SIMD" | 3212 "TARGET_SIMD" |
3060 { | 3213 { |
3061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3214 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3062 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], | 3215 emit_insn (gen_aarch64_ssubl<mode>_hi_internal (operands[0], operands[1], |
3063 operands[2], p)); | 3216 operands[2], p)); |
3064 DONE; | 3217 DONE; |
3065 }) | 3218 }) |
3066 | 3219 |
3068 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3221 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3069 (match_operand:VQW 1 "register_operand" "w") | 3222 (match_operand:VQW 1 "register_operand" "w") |
3070 (match_operand:VQW 2 "register_operand" "w")] | 3223 (match_operand:VQW 2 "register_operand" "w")] |
3071 "TARGET_SIMD" | 3224 "TARGET_SIMD" |
3072 { | 3225 { |
3073 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3226 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3074 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], | 3227 emit_insn (gen_aarch64_usubl<mode>_hi_internal (operands[0], operands[1], |
3075 operands[2], p)); | 3228 operands[2], p)); |
3076 DONE; | 3229 DONE; |
3077 }) | 3230 }) |
3078 | 3231 |
3094 (plus:<VDBLW> (sign_extend:<VDBLW> | 3247 (plus:<VDBLW> (sign_extend:<VDBLW> |
3095 (match_operand:VQW 1 "register_operand" "")) | 3248 (match_operand:VQW 1 "register_operand" "")) |
3096 (match_operand:<VDBLW> 2 "register_operand" "")))] | 3249 (match_operand:<VDBLW> 2 "register_operand" "")))] |
3097 "TARGET_SIMD" | 3250 "TARGET_SIMD" |
3098 { | 3251 { |
3099 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | 3252 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); |
3100 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); | 3253 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); |
3101 | 3254 |
3102 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], | 3255 emit_insn (gen_aarch64_saddw<mode>_internal (temp, operands[2], |
3103 operands[1], p)); | 3256 operands[1], p)); |
3104 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); | 3257 emit_insn (gen_aarch64_saddw2<mode> (operands[0], temp, operands[1])); |
3122 (plus:<VDBLW> (zero_extend:<VDBLW> | 3275 (plus:<VDBLW> (zero_extend:<VDBLW> |
3123 (match_operand:VQW 1 "register_operand" "")) | 3276 (match_operand:VQW 1 "register_operand" "")) |
3124 (match_operand:<VDBLW> 2 "register_operand" "")))] | 3277 (match_operand:<VDBLW> 2 "register_operand" "")))] |
3125 "TARGET_SIMD" | 3278 "TARGET_SIMD" |
3126 { | 3279 { |
3127 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, false); | 3280 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, false); |
3128 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); | 3281 rtx temp = gen_reg_rtx (GET_MODE (operands[0])); |
3129 | 3282 |
3130 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], | 3283 emit_insn (gen_aarch64_uaddw<mode>_internal (temp, operands[2], |
3131 operands[1], p)); | 3284 operands[1], p)); |
3132 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); | 3285 emit_insn (gen_aarch64_uaddw2<mode> (operands[0], temp, operands[1])); |
3143 { | 3296 { |
3144 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); | 3297 emit_insn (gen_aarch64_uaddw<mode> (operands[0], operands[2], operands[1])); |
3145 DONE; | 3298 DONE; |
3146 }) | 3299 }) |
3147 | 3300 |
3148 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>" | 3301 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>" |
3149 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | 3302 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3150 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") | 3303 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") |
3151 (ANY_EXTEND:<VWIDE> | 3304 (ANY_EXTEND:<VWIDE> |
3152 (match_operand:VD_BHSI 2 "register_operand" "w"))))] | 3305 (match_operand:VD_BHSI 2 "register_operand" "w"))))] |
3153 "TARGET_SIMD" | 3306 "TARGET_SIMD" |
3154 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" | 3307 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" |
3155 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] | 3308 [(set_attr "type" "neon_sub_widen")] |
3156 ) | 3309 ) |
3157 | 3310 |
3158 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>_internal" | 3311 (define_insn "aarch64_<ANY_EXTEND:su>subw<mode>_internal" |
3159 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | 3312 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3160 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") | 3313 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") |
3161 (ANY_EXTEND:<VWIDE> | 3314 (ANY_EXTEND:<VWIDE> |
3162 (vec_select:<VHALF> | 3315 (vec_select:<VHALF> |
3163 (match_operand:VQW 2 "register_operand" "w") | 3316 (match_operand:VQW 2 "register_operand" "w") |
3164 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] | 3317 (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))))] |
3165 "TARGET_SIMD" | 3318 "TARGET_SIMD" |
3166 "<ANY_EXTEND:su><ADDSUB:optab>w\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" | 3319 "<ANY_EXTEND:su>subw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" |
3167 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] | 3320 [(set_attr "type" "neon_sub_widen")] |
3168 ) | 3321 ) |
3169 | 3322 |
3170 (define_insn "aarch64_<ANY_EXTEND:su><ADDSUB:optab>w2<mode>_internal" | 3323 (define_insn "aarch64_<ANY_EXTEND:su>subw2<mode>_internal" |
3171 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | 3324 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") |
3172 (ADDSUB:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") | 3325 (minus:<VWIDE> (match_operand:<VWIDE> 1 "register_operand" "w") |
3173 (ANY_EXTEND:<VWIDE> | 3326 (ANY_EXTEND:<VWIDE> |
3174 (vec_select:<VHALF> | 3327 (vec_select:<VHALF> |
3175 (match_operand:VQW 2 "register_operand" "w") | 3328 (match_operand:VQW 2 "register_operand" "w") |
3176 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] | 3329 (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))))] |
3177 "TARGET_SIMD" | 3330 "TARGET_SIMD" |
3178 "<ANY_EXTEND:su><ADDSUB:optab>w2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" | 3331 "<ANY_EXTEND:su>subw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" |
3179 [(set_attr "type" "neon_<ADDSUB:optab>_widen")] | 3332 [(set_attr "type" "neon_sub_widen")] |
3333 ) | |
3334 | |
3335 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>" | |
3336 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3337 (plus:<VWIDE> | |
3338 (ANY_EXTEND:<VWIDE> (match_operand:VD_BHSI 2 "register_operand" "w")) | |
3339 (match_operand:<VWIDE> 1 "register_operand" "w")))] | |
3340 "TARGET_SIMD" | |
3341 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" | |
3342 [(set_attr "type" "neon_add_widen")] | |
3343 ) | |
3344 | |
3345 (define_insn "aarch64_<ANY_EXTEND:su>addw<mode>_internal" | |
3346 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3347 (plus:<VWIDE> | |
3348 (ANY_EXTEND:<VWIDE> | |
3349 (vec_select:<VHALF> | |
3350 (match_operand:VQW 2 "register_operand" "w") | |
3351 (match_operand:VQW 3 "vect_par_cnst_lo_half" ""))) | |
3352 (match_operand:<VWIDE> 1 "register_operand" "w")))] | |
3353 "TARGET_SIMD" | |
3354 "<ANY_EXTEND:su>addw\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vhalftype>" | |
3355 [(set_attr "type" "neon_add_widen")] | |
3356 ) | |
3357 | |
3358 (define_insn "aarch64_<ANY_EXTEND:su>addw2<mode>_internal" | |
3359 [(set (match_operand:<VWIDE> 0 "register_operand" "=w") | |
3360 (plus:<VWIDE> | |
3361 (ANY_EXTEND:<VWIDE> | |
3362 (vec_select:<VHALF> | |
3363 (match_operand:VQW 2 "register_operand" "w") | |
3364 (match_operand:VQW 3 "vect_par_cnst_hi_half" ""))) | |
3365 (match_operand:<VWIDE> 1 "register_operand" "w")))] | |
3366 "TARGET_SIMD" | |
3367 "<ANY_EXTEND:su>addw2\\t%0.<Vwtype>, %1.<Vwtype>, %2.<Vtype>" | |
3368 [(set_attr "type" "neon_add_widen")] | |
3180 ) | 3369 ) |
3181 | 3370 |
3182 (define_expand "aarch64_saddw2<mode>" | 3371 (define_expand "aarch64_saddw2<mode>" |
3183 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3372 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3184 (match_operand:<VWIDE> 1 "register_operand" "w") | 3373 (match_operand:<VWIDE> 1 "register_operand" "w") |
3185 (match_operand:VQW 2 "register_operand" "w")] | 3374 (match_operand:VQW 2 "register_operand" "w")] |
3186 "TARGET_SIMD" | 3375 "TARGET_SIMD" |
3187 { | 3376 { |
3188 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3377 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3189 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], | 3378 emit_insn (gen_aarch64_saddw2<mode>_internal (operands[0], operands[1], |
3190 operands[2], p)); | 3379 operands[2], p)); |
3191 DONE; | 3380 DONE; |
3192 }) | 3381 }) |
3193 | 3382 |
3195 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3384 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3196 (match_operand:<VWIDE> 1 "register_operand" "w") | 3385 (match_operand:<VWIDE> 1 "register_operand" "w") |
3197 (match_operand:VQW 2 "register_operand" "w")] | 3386 (match_operand:VQW 2 "register_operand" "w")] |
3198 "TARGET_SIMD" | 3387 "TARGET_SIMD" |
3199 { | 3388 { |
3200 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3389 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3201 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], | 3390 emit_insn (gen_aarch64_uaddw2<mode>_internal (operands[0], operands[1], |
3202 operands[2], p)); | 3391 operands[2], p)); |
3203 DONE; | 3392 DONE; |
3204 }) | 3393 }) |
3205 | 3394 |
3208 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3397 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3209 (match_operand:<VWIDE> 1 "register_operand" "w") | 3398 (match_operand:<VWIDE> 1 "register_operand" "w") |
3210 (match_operand:VQW 2 "register_operand" "w")] | 3399 (match_operand:VQW 2 "register_operand" "w")] |
3211 "TARGET_SIMD" | 3400 "TARGET_SIMD" |
3212 { | 3401 { |
3213 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3402 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3214 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], | 3403 emit_insn (gen_aarch64_ssubw2<mode>_internal (operands[0], operands[1], |
3215 operands[2], p)); | 3404 operands[2], p)); |
3216 DONE; | 3405 DONE; |
3217 }) | 3406 }) |
3218 | 3407 |
3220 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 3409 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
3221 (match_operand:<VWIDE> 1 "register_operand" "w") | 3410 (match_operand:<VWIDE> 1 "register_operand" "w") |
3222 (match_operand:VQW 2 "register_operand" "w")] | 3411 (match_operand:VQW 2 "register_operand" "w")] |
3223 "TARGET_SIMD" | 3412 "TARGET_SIMD" |
3224 { | 3413 { |
3225 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3414 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3226 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], | 3415 emit_insn (gen_aarch64_usubw2<mode>_internal (operands[0], operands[1], |
3227 operands[2], p)); | 3416 operands[2], p)); |
3228 DONE; | 3417 DONE; |
3229 }) | 3418 }) |
3230 | 3419 |
3231 ;; <su><r>h<addsub>. | 3420 ;; <su><r>h<addsub>. |
3421 | |
3422 (define_expand "<u>avg<mode>3_floor" | |
3423 [(set (match_operand:VDQ_BHSI 0 "register_operand") | |
3424 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") | |
3425 (match_operand:VDQ_BHSI 2 "register_operand")] | |
3426 HADD))] | |
3427 "TARGET_SIMD" | |
3428 ) | |
3429 | |
3430 (define_expand "<u>avg<mode>3_ceil" | |
3431 [(set (match_operand:VDQ_BHSI 0 "register_operand") | |
3432 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand") | |
3433 (match_operand:VDQ_BHSI 2 "register_operand")] | |
3434 RHADD))] | |
3435 "TARGET_SIMD" | |
3436 ) | |
3232 | 3437 |
3233 (define_insn "aarch64_<sur>h<addsub><mode>" | 3438 (define_insn "aarch64_<sur>h<addsub><mode>" |
3234 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") | 3439 [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w") |
3235 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") | 3440 (unspec:VDQ_BHSI [(match_operand:VDQ_BHSI 1 "register_operand" "w") |
3236 (match_operand:VDQ_BHSI 2 "register_operand" "w")] | 3441 (match_operand:VDQ_BHSI 2 "register_operand" "w")] |
3299 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") | 3504 (match_operand:<VSWAP_WIDTH> 2 "register_operand" "w") |
3300 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] | 3505 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] |
3301 UNSPEC_FMULX))] | 3506 UNSPEC_FMULX))] |
3302 "TARGET_SIMD" | 3507 "TARGET_SIMD" |
3303 { | 3508 { |
3304 operands[3] = GEN_INT (ENDIAN_LANE_N (<VSWAP_WIDTH>mode, | 3509 operands[3] = aarch64_endian_lane_rtx (<VSWAP_WIDTH>mode, INTVAL (operands[3])); |
3305 INTVAL (operands[3]))); | |
3306 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 3510 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
3307 } | 3511 } |
3308 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] | 3512 [(set_attr "type" "neon_fp_mul_<Vetype>_scalar<q>")] |
3309 ) | 3513 ) |
3310 | 3514 |
3319 (match_operand:VDQF 2 "register_operand" "w") | 3523 (match_operand:VDQF 2 "register_operand" "w") |
3320 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] | 3524 (parallel [(match_operand:SI 3 "immediate_operand" "i")])))] |
3321 UNSPEC_FMULX))] | 3525 UNSPEC_FMULX))] |
3322 "TARGET_SIMD" | 3526 "TARGET_SIMD" |
3323 { | 3527 { |
3324 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | 3528 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); |
3325 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 3529 return "fmulx\t%<v>0<Vmtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
3326 } | 3530 } |
3327 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] | 3531 [(set_attr "type" "neon_fp_mul_<Vetype><q>")] |
3328 ) | 3532 ) |
3329 | 3533 |
3353 (match_operand:VDQF 2 "register_operand" "w") | 3557 (match_operand:VDQF 2 "register_operand" "w") |
3354 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | 3558 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] |
3355 UNSPEC_FMULX))] | 3559 UNSPEC_FMULX))] |
3356 "TARGET_SIMD" | 3560 "TARGET_SIMD" |
3357 { | 3561 { |
3358 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | 3562 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); |
3359 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; | 3563 return "fmulx\t%<Vetype>0, %<Vetype>1, %2.<Vetype>[%3]"; |
3360 } | 3564 } |
3361 [(set_attr "type" "fmul<Vetype>")] | 3565 [(set_attr "type" "fmul<Vetype>")] |
3362 ) | 3566 ) |
3363 ;; <su>q<addsub> | 3567 ;; <su>q<addsub> |
3439 (match_operand:<VCOND> 2 "register_operand" "<vwx>") | 3643 (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
3440 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | 3644 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] |
3441 VQDMULH))] | 3645 VQDMULH))] |
3442 "TARGET_SIMD" | 3646 "TARGET_SIMD" |
3443 "* | 3647 "* |
3444 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | 3648 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); |
3445 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" | 3649 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" |
3446 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] | 3650 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] |
3447 ) | 3651 ) |
3448 | 3652 |
3449 (define_insn "aarch64_sq<r>dmulh_laneq<mode>" | 3653 (define_insn "aarch64_sq<r>dmulh_laneq<mode>" |
3454 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | 3658 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") |
3455 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | 3659 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] |
3456 VQDMULH))] | 3660 VQDMULH))] |
3457 "TARGET_SIMD" | 3661 "TARGET_SIMD" |
3458 "* | 3662 "* |
3459 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | 3663 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); |
3460 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" | 3664 return \"sq<r>dmulh\\t%0.<Vtype>, %1.<Vtype>, %2.<Vetype>[%3]\";" |
3461 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] | 3665 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] |
3462 ) | 3666 ) |
3463 | 3667 |
3464 (define_insn "aarch64_sq<r>dmulh_lane<mode>" | 3668 (define_insn "aarch64_sq<r>dmulh_lane<mode>" |
3469 (match_operand:<VCOND> 2 "register_operand" "<vwx>") | 3673 (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
3470 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | 3674 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] |
3471 VQDMULH))] | 3675 VQDMULH))] |
3472 "TARGET_SIMD" | 3676 "TARGET_SIMD" |
3473 "* | 3677 "* |
3474 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | 3678 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); |
3475 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" | 3679 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" |
3476 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] | 3680 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] |
3477 ) | 3681 ) |
3478 | 3682 |
3479 (define_insn "aarch64_sq<r>dmulh_laneq<mode>" | 3683 (define_insn "aarch64_sq<r>dmulh_laneq<mode>" |
3484 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | 3688 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") |
3485 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] | 3689 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))] |
3486 VQDMULH))] | 3690 VQDMULH))] |
3487 "TARGET_SIMD" | 3691 "TARGET_SIMD" |
3488 "* | 3692 "* |
3489 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | 3693 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); |
3490 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" | 3694 return \"sq<r>dmulh\\t%<v>0, %<v>1, %2.<v>[%3]\";" |
3491 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] | 3695 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar<q>")] |
3492 ) | 3696 ) |
3493 | 3697 |
3494 ;; sqrdml[as]h. | 3698 ;; sqrdml[as]h. |
3516 (match_operand:<VCOND> 3 "register_operand" "<vwx>") | 3720 (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
3517 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | 3721 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] |
3518 SQRDMLH_AS))] | 3722 SQRDMLH_AS))] |
3519 "TARGET_SIMD_RDMA" | 3723 "TARGET_SIMD_RDMA" |
3520 { | 3724 { |
3521 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | 3725 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); |
3522 return | 3726 return |
3523 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; | 3727 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; |
3524 } | 3728 } |
3525 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3729 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3526 ) | 3730 ) |
3534 (match_operand:<VCOND> 3 "register_operand" "<vwx>") | 3738 (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
3535 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | 3739 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] |
3536 SQRDMLH_AS))] | 3740 SQRDMLH_AS))] |
3537 "TARGET_SIMD_RDMA" | 3741 "TARGET_SIMD_RDMA" |
3538 { | 3742 { |
3539 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | 3743 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); |
3540 return | 3744 return |
3541 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; | 3745 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<Vetype>[%4]"; |
3542 } | 3746 } |
3543 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3747 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3544 ) | 3748 ) |
3554 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") | 3758 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") |
3555 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | 3759 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] |
3556 SQRDMLH_AS))] | 3760 SQRDMLH_AS))] |
3557 "TARGET_SIMD_RDMA" | 3761 "TARGET_SIMD_RDMA" |
3558 { | 3762 { |
3559 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | 3763 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); |
3560 return | 3764 return |
3561 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; | 3765 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%0.<Vtype>, %2.<Vtype>, %3.<Vetype>[%4]"; |
3562 } | 3766 } |
3563 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3767 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3564 ) | 3768 ) |
3572 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") | 3776 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") |
3573 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] | 3777 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))] |
3574 SQRDMLH_AS))] | 3778 SQRDMLH_AS))] |
3575 "TARGET_SIMD_RDMA" | 3779 "TARGET_SIMD_RDMA" |
3576 { | 3780 { |
3577 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | 3781 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); |
3578 return | 3782 return |
3579 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; | 3783 "sqrdml<SQRDMLH_AS:rdma_as>h\\t%<v>0, %<v>2, %3.<v>[%4]"; |
3580 } | 3784 } |
3581 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3785 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3582 ) | 3786 ) |
3616 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) | 3820 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) |
3617 )) | 3821 )) |
3618 (const_int 1))))] | 3822 (const_int 1))))] |
3619 "TARGET_SIMD" | 3823 "TARGET_SIMD" |
3620 { | 3824 { |
3621 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | 3825 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); |
3622 return | 3826 return |
3623 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | 3827 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; |
3624 } | 3828 } |
3625 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3829 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3626 ) | 3830 ) |
3640 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) | 3844 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) |
3641 )) | 3845 )) |
3642 (const_int 1))))] | 3846 (const_int 1))))] |
3643 "TARGET_SIMD" | 3847 "TARGET_SIMD" |
3644 { | 3848 { |
3645 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | 3849 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); |
3646 return | 3850 return |
3647 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | 3851 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; |
3648 } | 3852 } |
3649 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3853 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3650 ) | 3854 ) |
3663 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) | 3867 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) |
3664 ) | 3868 ) |
3665 (const_int 1))))] | 3869 (const_int 1))))] |
3666 "TARGET_SIMD" | 3870 "TARGET_SIMD" |
3667 { | 3871 { |
3668 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | 3872 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); |
3669 return | 3873 return |
3670 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | 3874 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; |
3671 } | 3875 } |
3672 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3876 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3673 ) | 3877 ) |
3686 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) | 3890 (parallel [(match_operand:SI 4 "immediate_operand" "i")]))) |
3687 ) | 3891 ) |
3688 (const_int 1))))] | 3892 (const_int 1))))] |
3689 "TARGET_SIMD" | 3893 "TARGET_SIMD" |
3690 { | 3894 { |
3691 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | 3895 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); |
3692 return | 3896 return |
3693 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | 3897 "sqdml<SBINQOPS:as>l\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; |
3694 } | 3898 } |
3695 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3899 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3696 ) | 3900 ) |
3741 (match_operand:<VWIDE> 1 "register_operand" "w") | 3945 (match_operand:<VWIDE> 1 "register_operand" "w") |
3742 (match_operand:VQ_HSI 2 "register_operand" "w") | 3946 (match_operand:VQ_HSI 2 "register_operand" "w") |
3743 (match_operand:VQ_HSI 3 "register_operand" "w")] | 3947 (match_operand:VQ_HSI 3 "register_operand" "w")] |
3744 "TARGET_SIMD" | 3948 "TARGET_SIMD" |
3745 { | 3949 { |
3746 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3950 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3747 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], | 3951 emit_insn (gen_aarch64_sqdmlal2<mode>_internal (operands[0], operands[1], |
3748 operands[2], operands[3], p)); | 3952 operands[2], operands[3], p)); |
3749 DONE; | 3953 DONE; |
3750 }) | 3954 }) |
3751 | 3955 |
3754 (match_operand:<VWIDE> 1 "register_operand" "w") | 3958 (match_operand:<VWIDE> 1 "register_operand" "w") |
3755 (match_operand:VQ_HSI 2 "register_operand" "w") | 3959 (match_operand:VQ_HSI 2 "register_operand" "w") |
3756 (match_operand:VQ_HSI 3 "register_operand" "w")] | 3960 (match_operand:VQ_HSI 3 "register_operand" "w")] |
3757 "TARGET_SIMD" | 3961 "TARGET_SIMD" |
3758 { | 3962 { |
3759 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 3963 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3760 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], | 3964 emit_insn (gen_aarch64_sqdmlsl2<mode>_internal (operands[0], operands[1], |
3761 operands[2], operands[3], p)); | 3965 operands[2], operands[3], p)); |
3762 DONE; | 3966 DONE; |
3763 }) | 3967 }) |
3764 | 3968 |
3781 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) | 3985 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) |
3782 )))) | 3986 )))) |
3783 (const_int 1))))] | 3987 (const_int 1))))] |
3784 "TARGET_SIMD" | 3988 "TARGET_SIMD" |
3785 { | 3989 { |
3786 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[4]))); | 3990 operands[4] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[4])); |
3787 return | 3991 return |
3788 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | 3992 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; |
3789 } | 3993 } |
3790 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 3994 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3791 ) | 3995 ) |
3807 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) | 4011 (parallel [(match_operand:SI 4 "immediate_operand" "i")]) |
3808 )))) | 4012 )))) |
3809 (const_int 1))))] | 4013 (const_int 1))))] |
3810 "TARGET_SIMD" | 4014 "TARGET_SIMD" |
3811 { | 4015 { |
3812 operands[4] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[4]))); | 4016 operands[4] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[4])); |
3813 return | 4017 return |
3814 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; | 4018 "sqdml<SBINQOPS:as>l2\\t%<vw2>0<Vmwtype>, %<v>2<Vmtype>, %3.<Vetype>[%4]"; |
3815 } | 4019 } |
3816 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] | 4020 [(set_attr "type" "neon_sat_mla_<Vetype>_scalar_long")] |
3817 ) | 4021 ) |
3822 (match_operand:VQ_HSI 2 "register_operand" "w") | 4026 (match_operand:VQ_HSI 2 "register_operand" "w") |
3823 (match_operand:<VCOND> 3 "register_operand" "<vwx>") | 4027 (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
3824 (match_operand:SI 4 "immediate_operand" "i")] | 4028 (match_operand:SI 4 "immediate_operand" "i")] |
3825 "TARGET_SIMD" | 4029 "TARGET_SIMD" |
3826 { | 4030 { |
3827 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4031 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3828 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], | 4032 emit_insn (gen_aarch64_sqdmlal2_lane<mode>_internal (operands[0], operands[1], |
3829 operands[2], operands[3], | 4033 operands[2], operands[3], |
3830 operands[4], p)); | 4034 operands[4], p)); |
3831 DONE; | 4035 DONE; |
3832 }) | 4036 }) |
3837 (match_operand:VQ_HSI 2 "register_operand" "w") | 4041 (match_operand:VQ_HSI 2 "register_operand" "w") |
3838 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") | 4042 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") |
3839 (match_operand:SI 4 "immediate_operand" "i")] | 4043 (match_operand:SI 4 "immediate_operand" "i")] |
3840 "TARGET_SIMD" | 4044 "TARGET_SIMD" |
3841 { | 4045 { |
3842 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4046 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3843 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], | 4047 emit_insn (gen_aarch64_sqdmlal2_laneq<mode>_internal (operands[0], operands[1], |
3844 operands[2], operands[3], | 4048 operands[2], operands[3], |
3845 operands[4], p)); | 4049 operands[4], p)); |
3846 DONE; | 4050 DONE; |
3847 }) | 4051 }) |
3852 (match_operand:VQ_HSI 2 "register_operand" "w") | 4056 (match_operand:VQ_HSI 2 "register_operand" "w") |
3853 (match_operand:<VCOND> 3 "register_operand" "<vwx>") | 4057 (match_operand:<VCOND> 3 "register_operand" "<vwx>") |
3854 (match_operand:SI 4 "immediate_operand" "i")] | 4058 (match_operand:SI 4 "immediate_operand" "i")] |
3855 "TARGET_SIMD" | 4059 "TARGET_SIMD" |
3856 { | 4060 { |
3857 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4061 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3858 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], | 4062 emit_insn (gen_aarch64_sqdmlsl2_lane<mode>_internal (operands[0], operands[1], |
3859 operands[2], operands[3], | 4063 operands[2], operands[3], |
3860 operands[4], p)); | 4064 operands[4], p)); |
3861 DONE; | 4065 DONE; |
3862 }) | 4066 }) |
3867 (match_operand:VQ_HSI 2 "register_operand" "w") | 4071 (match_operand:VQ_HSI 2 "register_operand" "w") |
3868 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") | 4072 (match_operand:<VCONQ> 3 "register_operand" "<vwx>") |
3869 (match_operand:SI 4 "immediate_operand" "i")] | 4073 (match_operand:SI 4 "immediate_operand" "i")] |
3870 "TARGET_SIMD" | 4074 "TARGET_SIMD" |
3871 { | 4075 { |
3872 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4076 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3873 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], | 4077 emit_insn (gen_aarch64_sqdmlsl2_laneq<mode>_internal (operands[0], operands[1], |
3874 operands[2], operands[3], | 4078 operands[2], operands[3], |
3875 operands[4], p)); | 4079 operands[4], p)); |
3876 DONE; | 4080 DONE; |
3877 }) | 4081 }) |
3900 (match_operand:<VWIDE> 1 "register_operand" "w") | 4104 (match_operand:<VWIDE> 1 "register_operand" "w") |
3901 (match_operand:VQ_HSI 2 "register_operand" "w") | 4105 (match_operand:VQ_HSI 2 "register_operand" "w") |
3902 (match_operand:<VEL> 3 "register_operand" "w")] | 4106 (match_operand:<VEL> 3 "register_operand" "w")] |
3903 "TARGET_SIMD" | 4107 "TARGET_SIMD" |
3904 { | 4108 { |
3905 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4109 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3906 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], | 4110 emit_insn (gen_aarch64_sqdmlal2_n<mode>_internal (operands[0], operands[1], |
3907 operands[2], operands[3], | 4111 operands[2], operands[3], |
3908 p)); | 4112 p)); |
3909 DONE; | 4113 DONE; |
3910 }) | 4114 }) |
3914 (match_operand:<VWIDE> 1 "register_operand" "w") | 4118 (match_operand:<VWIDE> 1 "register_operand" "w") |
3915 (match_operand:VQ_HSI 2 "register_operand" "w") | 4119 (match_operand:VQ_HSI 2 "register_operand" "w") |
3916 (match_operand:<VEL> 3 "register_operand" "w")] | 4120 (match_operand:<VEL> 3 "register_operand" "w")] |
3917 "TARGET_SIMD" | 4121 "TARGET_SIMD" |
3918 { | 4122 { |
3919 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4123 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
3920 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], | 4124 emit_insn (gen_aarch64_sqdmlsl2_n<mode>_internal (operands[0], operands[1], |
3921 operands[2], operands[3], | 4125 operands[2], operands[3], |
3922 p)); | 4126 p)); |
3923 DONE; | 4127 DONE; |
3924 }) | 4128 }) |
3954 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) | 4158 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) |
3955 )) | 4159 )) |
3956 (const_int 1)))] | 4160 (const_int 1)))] |
3957 "TARGET_SIMD" | 4161 "TARGET_SIMD" |
3958 { | 4162 { |
3959 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | 4163 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); |
3960 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 4164 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
3961 } | 4165 } |
3962 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | 4166 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
3963 ) | 4167 ) |
3964 | 4168 |
3975 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) | 4179 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) |
3976 )) | 4180 )) |
3977 (const_int 1)))] | 4181 (const_int 1)))] |
3978 "TARGET_SIMD" | 4182 "TARGET_SIMD" |
3979 { | 4183 { |
3980 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | 4184 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); |
3981 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 4185 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
3982 } | 4186 } |
3983 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | 4187 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
3984 ) | 4188 ) |
3985 | 4189 |
3995 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) | 4199 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) |
3996 )) | 4200 )) |
3997 (const_int 1)))] | 4201 (const_int 1)))] |
3998 "TARGET_SIMD" | 4202 "TARGET_SIMD" |
3999 { | 4203 { |
4000 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | 4204 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); |
4001 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 4205 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
4002 } | 4206 } |
4003 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | 4207 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
4004 ) | 4208 ) |
4005 | 4209 |
4015 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) | 4219 (parallel [(match_operand:SI 3 "immediate_operand" "i")])) |
4016 )) | 4220 )) |
4017 (const_int 1)))] | 4221 (const_int 1)))] |
4018 "TARGET_SIMD" | 4222 "TARGET_SIMD" |
4019 { | 4223 { |
4020 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | 4224 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); |
4021 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 4225 return "sqdmull\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
4022 } | 4226 } |
4023 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | 4227 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
4024 ) | 4228 ) |
4025 | 4229 |
4068 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 4272 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
4069 (match_operand:VQ_HSI 1 "register_operand" "w") | 4273 (match_operand:VQ_HSI 1 "register_operand" "w") |
4070 (match_operand:VQ_HSI 2 "register_operand" "w")] | 4274 (match_operand:VQ_HSI 2 "register_operand" "w")] |
4071 "TARGET_SIMD" | 4275 "TARGET_SIMD" |
4072 { | 4276 { |
4073 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4277 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
4074 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], | 4278 emit_insn (gen_aarch64_sqdmull2<mode>_internal (operands[0], operands[1], |
4075 operands[2], p)); | 4279 operands[2], p)); |
4076 DONE; | 4280 DONE; |
4077 }) | 4281 }) |
4078 | 4282 |
4093 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) | 4297 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) |
4094 )) | 4298 )) |
4095 (const_int 1)))] | 4299 (const_int 1)))] |
4096 "TARGET_SIMD" | 4300 "TARGET_SIMD" |
4097 { | 4301 { |
4098 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCOND>mode, INTVAL (operands[3]))); | 4302 operands[3] = aarch64_endian_lane_rtx (<VCOND>mode, INTVAL (operands[3])); |
4099 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 4303 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
4100 } | 4304 } |
4101 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | 4305 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
4102 ) | 4306 ) |
4103 | 4307 |
4116 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) | 4320 (parallel [(match_operand:SI 3 "immediate_operand" "i")]))) |
4117 )) | 4321 )) |
4118 (const_int 1)))] | 4322 (const_int 1)))] |
4119 "TARGET_SIMD" | 4323 "TARGET_SIMD" |
4120 { | 4324 { |
4121 operands[3] = GEN_INT (ENDIAN_LANE_N (<VCONQ>mode, INTVAL (operands[3]))); | 4325 operands[3] = aarch64_endian_lane_rtx (<VCONQ>mode, INTVAL (operands[3])); |
4122 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; | 4326 return "sqdmull2\\t%<vw2>0<Vmwtype>, %<v>1<Vmtype>, %2.<Vetype>[%3]"; |
4123 } | 4327 } |
4124 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] | 4328 [(set_attr "type" "neon_sat_mul_<Vetype>_scalar_long")] |
4125 ) | 4329 ) |
4126 | 4330 |
4129 (match_operand:VQ_HSI 1 "register_operand" "w") | 4333 (match_operand:VQ_HSI 1 "register_operand" "w") |
4130 (match_operand:<VCOND> 2 "register_operand" "<vwx>") | 4334 (match_operand:<VCOND> 2 "register_operand" "<vwx>") |
4131 (match_operand:SI 3 "immediate_operand" "i")] | 4335 (match_operand:SI 3 "immediate_operand" "i")] |
4132 "TARGET_SIMD" | 4336 "TARGET_SIMD" |
4133 { | 4337 { |
4134 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4338 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
4135 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], | 4339 emit_insn (gen_aarch64_sqdmull2_lane<mode>_internal (operands[0], operands[1], |
4136 operands[2], operands[3], | 4340 operands[2], operands[3], |
4137 p)); | 4341 p)); |
4138 DONE; | 4342 DONE; |
4139 }) | 4343 }) |
4143 (match_operand:VQ_HSI 1 "register_operand" "w") | 4347 (match_operand:VQ_HSI 1 "register_operand" "w") |
4144 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") | 4348 (match_operand:<VCONQ> 2 "register_operand" "<vwx>") |
4145 (match_operand:SI 3 "immediate_operand" "i")] | 4349 (match_operand:SI 3 "immediate_operand" "i")] |
4146 "TARGET_SIMD" | 4350 "TARGET_SIMD" |
4147 { | 4351 { |
4148 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4352 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
4149 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], | 4353 emit_insn (gen_aarch64_sqdmull2_laneq<mode>_internal (operands[0], operands[1], |
4150 operands[2], operands[3], | 4354 operands[2], operands[3], |
4151 p)); | 4355 p)); |
4152 DONE; | 4356 DONE; |
4153 }) | 4357 }) |
4176 [(match_operand:<VWIDE> 0 "register_operand" "=w") | 4380 [(match_operand:<VWIDE> 0 "register_operand" "=w") |
4177 (match_operand:VQ_HSI 1 "register_operand" "w") | 4381 (match_operand:VQ_HSI 1 "register_operand" "w") |
4178 (match_operand:<VEL> 2 "register_operand" "w")] | 4382 (match_operand:<VEL> 2 "register_operand" "w")] |
4179 "TARGET_SIMD" | 4383 "TARGET_SIMD" |
4180 { | 4384 { |
4181 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, true); | 4385 rtx p = aarch64_simd_vect_par_cnst_half (<MODE>mode, <nunits>, true); |
4182 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], | 4386 emit_insn (gen_aarch64_sqdmull2_n<mode>_internal (operands[0], operands[1], |
4183 operands[2], p)); | 4387 operands[2], p)); |
4184 DONE; | 4388 DONE; |
4185 }) | 4389 }) |
4186 | 4390 |
4341 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") | 4545 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r") |
4342 ))) | 4546 ))) |
4343 (clobber (reg:CC CC_REGNUM))] | 4547 (clobber (reg:CC CC_REGNUM))] |
4344 "TARGET_SIMD" | 4548 "TARGET_SIMD" |
4345 "#" | 4549 "#" |
4346 "reload_completed" | 4550 "&& reload_completed" |
4347 [(set (match_operand:DI 0 "register_operand") | 4551 [(set (match_operand:DI 0 "register_operand") |
4348 (neg:DI | 4552 (neg:DI |
4349 (COMPARISONS:DI | 4553 (COMPARISONS:DI |
4350 (match_operand:DI 1 "register_operand") | 4554 (match_operand:DI 1 "register_operand") |
4351 (match_operand:DI 2 "aarch64_simd_reg_or_zero") | 4555 (match_operand:DI 2 "aarch64_simd_reg_or_zero") |
4404 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") | 4608 (match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r") |
4405 ))) | 4609 ))) |
4406 (clobber (reg:CC CC_REGNUM))] | 4610 (clobber (reg:CC CC_REGNUM))] |
4407 "TARGET_SIMD" | 4611 "TARGET_SIMD" |
4408 "#" | 4612 "#" |
4409 "reload_completed" | 4613 "&& reload_completed" |
4410 [(set (match_operand:DI 0 "register_operand") | 4614 [(set (match_operand:DI 0 "register_operand") |
4411 (neg:DI | 4615 (neg:DI |
4412 (UCOMPARISONS:DI | 4616 (UCOMPARISONS:DI |
4413 (match_operand:DI 1 "register_operand") | 4617 (match_operand:DI 1 "register_operand") |
4414 (match_operand:DI 2 "aarch64_simd_reg_or_zero") | 4618 (match_operand:DI 2 "aarch64_simd_reg_or_zero") |
4475 (match_operand:DI 2 "register_operand" "w,r")) | 4679 (match_operand:DI 2 "register_operand" "w,r")) |
4476 (const_int 0)))) | 4680 (const_int 0)))) |
4477 (clobber (reg:CC CC_REGNUM))] | 4681 (clobber (reg:CC CC_REGNUM))] |
4478 "TARGET_SIMD" | 4682 "TARGET_SIMD" |
4479 "#" | 4683 "#" |
4480 "reload_completed" | 4684 "&& reload_completed" |
4481 [(set (match_operand:DI 0 "register_operand") | 4685 [(set (match_operand:DI 0 "register_operand") |
4482 (neg:DI | 4686 (neg:DI |
4483 (ne:DI | 4687 (ne:DI |
4484 (and:DI | 4688 (and:DI |
4485 (match_operand:DI 1 "register_operand") | 4689 (match_operand:DI 1 "register_operand") |
4622 (match_operand:SI 3 "immediate_operand" "i") | 4826 (match_operand:SI 3 "immediate_operand" "i") |
4623 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] | 4827 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) ] |
4624 UNSPEC_LD2_LANE))] | 4828 UNSPEC_LD2_LANE))] |
4625 "TARGET_SIMD" | 4829 "TARGET_SIMD" |
4626 { | 4830 { |
4627 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | 4831 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); |
4628 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; | 4832 return "ld2\\t{%S0.<Vetype> - %T0.<Vetype>}[%3], %1"; |
4629 } | 4833 } |
4630 [(set_attr "type" "neon_load2_one_lane")] | 4834 [(set_attr "type" "neon_load2_one_lane")] |
4631 ) | 4835 ) |
4632 | 4836 |
4638 "TARGET_SIMD" | 4842 "TARGET_SIMD" |
4639 { | 4843 { |
4640 if (BYTES_BIG_ENDIAN) | 4844 if (BYTES_BIG_ENDIAN) |
4641 { | 4845 { |
4642 rtx tmp = gen_reg_rtx (OImode); | 4846 rtx tmp = gen_reg_rtx (OImode); |
4643 rtx mask = aarch64_reverse_mask (<MODE>mode); | 4847 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); |
4644 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); | 4848 emit_insn (gen_aarch64_simd_ld2<mode> (tmp, operands[1])); |
4645 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); | 4849 emit_insn (gen_aarch64_rev_reglistoi (operands[0], tmp, mask)); |
4646 } | 4850 } |
4647 else | 4851 else |
4648 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); | 4852 emit_insn (gen_aarch64_simd_ld2<mode> (operands[0], operands[1])); |
4666 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) | 4870 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) |
4667 (match_operand:SI 2 "immediate_operand" "i")] | 4871 (match_operand:SI 2 "immediate_operand" "i")] |
4668 UNSPEC_ST2_LANE))] | 4872 UNSPEC_ST2_LANE))] |
4669 "TARGET_SIMD" | 4873 "TARGET_SIMD" |
4670 { | 4874 { |
4671 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 4875 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
4672 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; | 4876 return "st2\\t{%S1.<Vetype> - %T1.<Vetype>}[%2], %0"; |
4673 } | 4877 } |
4674 [(set_attr "type" "neon_store2_one_lane<q>")] | 4878 [(set_attr "type" "neon_store2_one_lane<q>")] |
4675 ) | 4879 ) |
4676 | 4880 |
4682 "TARGET_SIMD" | 4886 "TARGET_SIMD" |
4683 { | 4887 { |
4684 if (BYTES_BIG_ENDIAN) | 4888 if (BYTES_BIG_ENDIAN) |
4685 { | 4889 { |
4686 rtx tmp = gen_reg_rtx (OImode); | 4890 rtx tmp = gen_reg_rtx (OImode); |
4687 rtx mask = aarch64_reverse_mask (<MODE>mode); | 4891 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); |
4688 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); | 4892 emit_insn (gen_aarch64_rev_reglistoi (tmp, operands[1], mask)); |
4689 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); | 4893 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], tmp)); |
4690 } | 4894 } |
4691 else | 4895 else |
4692 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); | 4896 emit_insn (gen_aarch64_simd_st2<mode> (operands[0], operands[1])); |
4720 (match_operand:SI 3 "immediate_operand" "i") | 4924 (match_operand:SI 3 "immediate_operand" "i") |
4721 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | 4925 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
4722 UNSPEC_LD3_LANE))] | 4926 UNSPEC_LD3_LANE))] |
4723 "TARGET_SIMD" | 4927 "TARGET_SIMD" |
4724 { | 4928 { |
4725 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | 4929 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); |
4726 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; | 4930 return "ld3\\t{%S0.<Vetype> - %U0.<Vetype>}[%3], %1"; |
4727 } | 4931 } |
4728 [(set_attr "type" "neon_load3_one_lane")] | 4932 [(set_attr "type" "neon_load3_one_lane")] |
4729 ) | 4933 ) |
4730 | 4934 |
4736 "TARGET_SIMD" | 4940 "TARGET_SIMD" |
4737 { | 4941 { |
4738 if (BYTES_BIG_ENDIAN) | 4942 if (BYTES_BIG_ENDIAN) |
4739 { | 4943 { |
4740 rtx tmp = gen_reg_rtx (CImode); | 4944 rtx tmp = gen_reg_rtx (CImode); |
4741 rtx mask = aarch64_reverse_mask (<MODE>mode); | 4945 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); |
4742 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); | 4946 emit_insn (gen_aarch64_simd_ld3<mode> (tmp, operands[1])); |
4743 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); | 4947 emit_insn (gen_aarch64_rev_reglistci (operands[0], tmp, mask)); |
4744 } | 4948 } |
4745 else | 4949 else |
4746 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); | 4950 emit_insn (gen_aarch64_simd_ld3<mode> (operands[0], operands[1])); |
4764 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) | 4968 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) |
4765 (match_operand:SI 2 "immediate_operand" "i")] | 4969 (match_operand:SI 2 "immediate_operand" "i")] |
4766 UNSPEC_ST3_LANE))] | 4970 UNSPEC_ST3_LANE))] |
4767 "TARGET_SIMD" | 4971 "TARGET_SIMD" |
4768 { | 4972 { |
4769 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 4973 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
4770 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; | 4974 return "st3\\t{%S1.<Vetype> - %U1.<Vetype>}[%2], %0"; |
4771 } | 4975 } |
4772 [(set_attr "type" "neon_store3_one_lane<q>")] | 4976 [(set_attr "type" "neon_store3_one_lane<q>")] |
4773 ) | 4977 ) |
4774 | 4978 |
4780 "TARGET_SIMD" | 4984 "TARGET_SIMD" |
4781 { | 4985 { |
4782 if (BYTES_BIG_ENDIAN) | 4986 if (BYTES_BIG_ENDIAN) |
4783 { | 4987 { |
4784 rtx tmp = gen_reg_rtx (CImode); | 4988 rtx tmp = gen_reg_rtx (CImode); |
4785 rtx mask = aarch64_reverse_mask (<MODE>mode); | 4989 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); |
4786 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); | 4990 emit_insn (gen_aarch64_rev_reglistci (tmp, operands[1], mask)); |
4787 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); | 4991 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], tmp)); |
4788 } | 4992 } |
4789 else | 4993 else |
4790 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); | 4994 emit_insn (gen_aarch64_simd_st3<mode> (operands[0], operands[1])); |
4818 (match_operand:SI 3 "immediate_operand" "i") | 5022 (match_operand:SI 3 "immediate_operand" "i") |
4819 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | 5023 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] |
4820 UNSPEC_LD4_LANE))] | 5024 UNSPEC_LD4_LANE))] |
4821 "TARGET_SIMD" | 5025 "TARGET_SIMD" |
4822 { | 5026 { |
4823 operands[3] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]))); | 5027 operands[3] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[3])); |
4824 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; | 5028 return "ld4\\t{%S0.<Vetype> - %V0.<Vetype>}[%3], %1"; |
4825 } | 5029 } |
4826 [(set_attr "type" "neon_load4_one_lane")] | 5030 [(set_attr "type" "neon_load4_one_lane")] |
4827 ) | 5031 ) |
4828 | 5032 |
4834 "TARGET_SIMD" | 5038 "TARGET_SIMD" |
4835 { | 5039 { |
4836 if (BYTES_BIG_ENDIAN) | 5040 if (BYTES_BIG_ENDIAN) |
4837 { | 5041 { |
4838 rtx tmp = gen_reg_rtx (XImode); | 5042 rtx tmp = gen_reg_rtx (XImode); |
4839 rtx mask = aarch64_reverse_mask (<MODE>mode); | 5043 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); |
4840 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); | 5044 emit_insn (gen_aarch64_simd_ld4<mode> (tmp, operands[1])); |
4841 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); | 5045 emit_insn (gen_aarch64_rev_reglistxi (operands[0], tmp, mask)); |
4842 } | 5046 } |
4843 else | 5047 else |
4844 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); | 5048 emit_insn (gen_aarch64_simd_ld4<mode> (operands[0], operands[1])); |
4862 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) | 5066 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY) |
4863 (match_operand:SI 2 "immediate_operand" "i")] | 5067 (match_operand:SI 2 "immediate_operand" "i")] |
4864 UNSPEC_ST4_LANE))] | 5068 UNSPEC_ST4_LANE))] |
4865 "TARGET_SIMD" | 5069 "TARGET_SIMD" |
4866 { | 5070 { |
4867 operands[2] = GEN_INT (ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[2]))); | 5071 operands[2] = aarch64_endian_lane_rtx (<MODE>mode, INTVAL (operands[2])); |
4868 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; | 5072 return "st4\\t{%S1.<Vetype> - %V1.<Vetype>}[%2], %0"; |
4869 } | 5073 } |
4870 [(set_attr "type" "neon_store4_one_lane<q>")] | 5074 [(set_attr "type" "neon_store4_one_lane<q>")] |
4871 ) | 5075 ) |
4872 | 5076 |
4878 "TARGET_SIMD" | 5082 "TARGET_SIMD" |
4879 { | 5083 { |
4880 if (BYTES_BIG_ENDIAN) | 5084 if (BYTES_BIG_ENDIAN) |
4881 { | 5085 { |
4882 rtx tmp = gen_reg_rtx (XImode); | 5086 rtx tmp = gen_reg_rtx (XImode); |
4883 rtx mask = aarch64_reverse_mask (<MODE>mode); | 5087 rtx mask = aarch64_reverse_mask (<MODE>mode, <nunits>); |
4884 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); | 5088 emit_insn (gen_aarch64_rev_reglistxi (tmp, operands[1], mask)); |
4885 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); | 5089 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], tmp)); |
4886 } | 5090 } |
4887 else | 5091 else |
4888 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); | 5092 emit_insn (gen_aarch64_simd_st4<mode> (operands[0], operands[1])); |
4925 { | 5129 { |
4926 if (GET_CODE (operands[0]) != REG) | 5130 if (GET_CODE (operands[0]) != REG) |
4927 operands[1] = force_reg (<MODE>mode, operands[1]); | 5131 operands[1] = force_reg (<MODE>mode, operands[1]); |
4928 } | 5132 } |
4929 }) | 5133 }) |
5134 | |
5135 | |
5136 (define_expand "aarch64_ld1x3<VALLDIF:mode>" | |
5137 [(match_operand:CI 0 "register_operand" "=w") | |
5138 (match_operand:DI 1 "register_operand" "r") | |
5139 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5140 "TARGET_SIMD" | |
5141 { | |
5142 rtx mem = gen_rtx_MEM (CImode, operands[1]); | |
5143 emit_insn (gen_aarch64_ld1_x3_<VALLDIF:mode> (operands[0], mem)); | |
5144 DONE; | |
5145 }) | |
5146 | |
5147 (define_insn "aarch64_ld1_x3_<mode>" | |
5148 [(set (match_operand:CI 0 "register_operand" "=w") | |
5149 (unspec:CI | |
5150 [(match_operand:CI 1 "aarch64_simd_struct_operand" "Utv") | |
5151 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_LD1))] | |
5152 "TARGET_SIMD" | |
5153 "ld1\\t{%S0.<Vtype> - %U0.<Vtype>}, %1" | |
5154 [(set_attr "type" "neon_load1_3reg<q>")] | |
5155 ) | |
5156 | |
5157 (define_expand "aarch64_st1x2<VALLDIF:mode>" | |
5158 [(match_operand:DI 0 "register_operand" "") | |
5159 (match_operand:OI 1 "register_operand" "") | |
5160 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5161 "TARGET_SIMD" | |
5162 { | |
5163 rtx mem = gen_rtx_MEM (OImode, operands[0]); | |
5164 emit_insn (gen_aarch64_st1_x2_<VALLDIF:mode> (mem, operands[1])); | |
5165 DONE; | |
5166 }) | |
5167 | |
5168 (define_insn "aarch64_st1_x2_<mode>" | |
5169 [(set (match_operand:OI 0 "aarch64_simd_struct_operand" "=Utv") | |
5170 (unspec:OI | |
5171 [(match_operand:OI 1 "register_operand" "w") | |
5172 (unspec:VALLDIF [(const_int 2)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] | |
5173 "TARGET_SIMD" | |
5174 "st1\\t{%S1.<Vtype> - %T1.<Vtype>}, %0" | |
5175 [(set_attr "type" "neon_store1_2reg<q>")] | |
5176 ) | |
5177 | |
5178 (define_expand "aarch64_st1x3<VALLDIF:mode>" | |
5179 [(match_operand:DI 0 "register_operand" "") | |
5180 (match_operand:CI 1 "register_operand" "") | |
5181 (unspec:VALLDIF [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5182 "TARGET_SIMD" | |
5183 { | |
5184 rtx mem = gen_rtx_MEM (CImode, operands[0]); | |
5185 emit_insn (gen_aarch64_st1_x3_<VALLDIF:mode> (mem, operands[1])); | |
5186 DONE; | |
5187 }) | |
5188 | |
5189 (define_insn "aarch64_st1_x3_<mode>" | |
5190 [(set (match_operand:CI 0 "aarch64_simd_struct_operand" "=Utv") | |
5191 (unspec:CI | |
5192 [(match_operand:CI 1 "register_operand" "w") | |
5193 (unspec:VALLDIF [(const_int 3)] UNSPEC_VSTRUCTDUMMY)] UNSPEC_ST1))] | |
5194 "TARGET_SIMD" | |
5195 "st1\\t{%S1.<Vtype> - %U1.<Vtype>}, %0" | |
5196 [(set_attr "type" "neon_store1_3reg<q>")] | |
5197 ) | |
4930 | 5198 |
4931 (define_insn "*aarch64_mov<mode>" | 5199 (define_insn "*aarch64_mov<mode>" |
4932 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") | 5200 [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand" "=w,Utv,w") |
4933 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] | 5201 (match_operand:VSTRUCT 1 "aarch64_simd_general_operand" " w,w,Utv"))] |
4934 "TARGET_SIMD && !BYTES_BIG_ENDIAN | 5202 "TARGET_SIMD && !BYTES_BIG_ENDIAN |
5174 | 5442 |
5175 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); | 5443 emit_insn (gen_aarch64_simd_ld<VSTRUCT:nregs><VQ:mode> (operands[0], mem)); |
5176 DONE; | 5444 DONE; |
5177 }) | 5445 }) |
5178 | 5446 |
5447 (define_expand "aarch64_ld1x2<VQ:mode>" | |
5448 [(match_operand:OI 0 "register_operand" "=w") | |
5449 (match_operand:DI 1 "register_operand" "r") | |
5450 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5451 "TARGET_SIMD" | |
5452 { | |
5453 machine_mode mode = OImode; | |
5454 rtx mem = gen_rtx_MEM (mode, operands[1]); | |
5455 | |
5456 emit_insn (gen_aarch64_simd_ld1<VQ:mode>_x2 (operands[0], mem)); | |
5457 DONE; | |
5458 }) | |
5459 | |
5460 (define_expand "aarch64_ld1x2<VDC:mode>" | |
5461 [(match_operand:OI 0 "register_operand" "=w") | |
5462 (match_operand:DI 1 "register_operand" "r") | |
5463 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5464 "TARGET_SIMD" | |
5465 { | |
5466 machine_mode mode = OImode; | |
5467 rtx mem = gen_rtx_MEM (mode, operands[1]); | |
5468 | |
5469 emit_insn (gen_aarch64_simd_ld1<VDC:mode>_x2 (operands[0], mem)); | |
5470 DONE; | |
5471 }) | |
5472 | |
5473 | |
5179 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" | 5474 (define_expand "aarch64_ld<VSTRUCT:nregs>_lane<VALLDIF:mode>" |
5180 [(match_operand:VSTRUCT 0 "register_operand" "=w") | 5475 [(match_operand:VSTRUCT 0 "register_operand" "=w") |
5181 (match_operand:DI 1 "register_operand" "w") | 5476 (match_operand:DI 1 "register_operand" "w") |
5182 (match_operand:VSTRUCT 2 "register_operand" "0") | 5477 (match_operand:VSTRUCT 2 "register_operand" "0") |
5183 (match_operand:SI 3 "immediate_operand" "i") | 5478 (match_operand:SI 3 "immediate_operand" "i") |
5186 { | 5481 { |
5187 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); | 5482 rtx mem = gen_rtx_MEM (BLKmode, operands[1]); |
5188 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) | 5483 set_mem_size (mem, GET_MODE_SIZE (GET_MODE_INNER (<VALLDIF:MODE>mode)) |
5189 * <VSTRUCT:nregs>); | 5484 * <VSTRUCT:nregs>); |
5190 | 5485 |
5191 aarch64_simd_lane_bounds (operands[3], 0, | 5486 aarch64_simd_lane_bounds (operands[3], 0, <VALLDIF:nunits>, NULL); |
5192 GET_MODE_NUNITS (<VALLDIF:MODE>mode), | |
5193 NULL); | |
5194 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( | 5487 emit_insn (gen_aarch64_vec_load_lanes<VSTRUCT:mode>_lane<VALLDIF:mode> ( |
5195 operands[0], mem, operands[2], operands[3])); | 5488 operands[0], mem, operands[2], operands[3])); |
5196 DONE; | 5489 DONE; |
5197 }) | 5490 }) |
5198 | 5491 |
5235 ;; Permuted-store expanders for neon intrinsics. | 5528 ;; Permuted-store expanders for neon intrinsics. |
5236 | 5529 |
5237 ;; Permute instructions | 5530 ;; Permute instructions |
5238 | 5531 |
5239 ;; vec_perm support | 5532 ;; vec_perm support |
5240 | |
5241 (define_expand "vec_perm_const<mode>" | |
5242 [(match_operand:VALL_F16 0 "register_operand") | |
5243 (match_operand:VALL_F16 1 "register_operand") | |
5244 (match_operand:VALL_F16 2 "register_operand") | |
5245 (match_operand:<V_INT_EQUIV> 3)] | |
5246 "TARGET_SIMD" | |
5247 { | |
5248 if (aarch64_expand_vec_perm_const (operands[0], operands[1], | |
5249 operands[2], operands[3])) | |
5250 DONE; | |
5251 else | |
5252 FAIL; | |
5253 }) | |
5254 | 5533 |
5255 (define_expand "vec_perm<mode>" | 5534 (define_expand "vec_perm<mode>" |
5256 [(match_operand:VB 0 "register_operand") | 5535 [(match_operand:VB 0 "register_operand") |
5257 (match_operand:VB 1 "register_operand") | 5536 (match_operand:VB 1 "register_operand") |
5258 (match_operand:VB 2 "register_operand") | 5537 (match_operand:VB 2 "register_operand") |
5259 (match_operand:VB 3 "register_operand")] | 5538 (match_operand:VB 3 "register_operand")] |
5260 "TARGET_SIMD" | 5539 "TARGET_SIMD" |
5261 { | 5540 { |
5262 aarch64_expand_vec_perm (operands[0], operands[1], | 5541 aarch64_expand_vec_perm (operands[0], operands[1], |
5263 operands[2], operands[3]); | 5542 operands[2], operands[3], <nunits>); |
5264 DONE; | 5543 DONE; |
5265 }) | 5544 }) |
5266 | 5545 |
5267 (define_insn "aarch64_tbl1<mode>" | 5546 (define_insn "aarch64_tbl1<mode>" |
5268 [(set (match_operand:VB 0 "register_operand" "=w") | 5547 [(set (match_operand:VB 0 "register_operand" "=w") |
5367 DONE; | 5646 DONE; |
5368 } | 5647 } |
5369 [(set_attr "type" "multiple")] | 5648 [(set_attr "type" "multiple")] |
5370 ) | 5649 ) |
5371 | 5650 |
5651 ;; This instruction's pattern is generated directly by | |
5652 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would | |
5653 ;; need corresponding changes there. | |
5372 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" | 5654 (define_insn "aarch64_<PERMUTE:perm_insn><PERMUTE:perm_hilo><mode>" |
5373 [(set (match_operand:VALL_F16 0 "register_operand" "=w") | 5655 [(set (match_operand:VALL_F16 0 "register_operand" "=w") |
5374 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") | 5656 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") |
5375 (match_operand:VALL_F16 2 "register_operand" "w")] | 5657 (match_operand:VALL_F16 2 "register_operand" "w")] |
5376 PERMUTE))] | 5658 PERMUTE))] |
5377 "TARGET_SIMD" | 5659 "TARGET_SIMD" |
5378 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" | 5660 "<PERMUTE:perm_insn><PERMUTE:perm_hilo>\\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>" |
5379 [(set_attr "type" "neon_permute<q>")] | 5661 [(set_attr "type" "neon_permute<q>")] |
5380 ) | 5662 ) |
5381 | 5663 |
5382 ;; Note immediate (third) operand is lane index not byte index. | 5664 ;; This instruction's pattern is generated directly by |
5665 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would | |
5666 ;; need corresponding changes there. Note that the immediate (third) | |
5667 ;; operand is a lane index not a byte index. | |
5383 (define_insn "aarch64_ext<mode>" | 5668 (define_insn "aarch64_ext<mode>" |
5384 [(set (match_operand:VALL_F16 0 "register_operand" "=w") | 5669 [(set (match_operand:VALL_F16 0 "register_operand" "=w") |
5385 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") | 5670 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w") |
5386 (match_operand:VALL_F16 2 "register_operand" "w") | 5671 (match_operand:VALL_F16 2 "register_operand" "w") |
5387 (match_operand:SI 3 "immediate_operand" "i")] | 5672 (match_operand:SI 3 "immediate_operand" "i")] |
5393 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; | 5678 return "ext\\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>, #%3"; |
5394 } | 5679 } |
5395 [(set_attr "type" "neon_ext<q>")] | 5680 [(set_attr "type" "neon_ext<q>")] |
5396 ) | 5681 ) |
5397 | 5682 |
5683 ;; This instruction's pattern is generated directly by | |
5684 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would | |
5685 ;; need corresponding changes there. | |
5398 (define_insn "aarch64_rev<REVERSE:rev_op><mode>" | 5686 (define_insn "aarch64_rev<REVERSE:rev_op><mode>" |
5399 [(set (match_operand:VALL_F16 0 "register_operand" "=w") | 5687 [(set (match_operand:VALL_F16 0 "register_operand" "=w") |
5400 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] | 5688 (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")] |
5401 REVERSE))] | 5689 REVERSE))] |
5402 "TARGET_SIMD" | 5690 "TARGET_SIMD" |
5561 "TARGET_SIMD" | 5849 "TARGET_SIMD" |
5562 "ld1r\\t{%0.<Vtype>}, %1" | 5850 "ld1r\\t{%0.<Vtype>}, %1" |
5563 [(set_attr "type" "neon_load1_all_lanes")] | 5851 [(set_attr "type" "neon_load1_all_lanes")] |
5564 ) | 5852 ) |
5565 | 5853 |
5566 (define_insn "aarch64_frecpe<mode>" | 5854 (define_insn "aarch64_simd_ld1<mode>_x2" |
5567 [(set (match_operand:VHSDF 0 "register_operand" "=w") | 5855 [(set (match_operand:OI 0 "register_operand" "=w") |
5568 (unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")] | 5856 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") |
5857 (unspec:VQ [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5858 UNSPEC_LD1))] | |
5859 "TARGET_SIMD" | |
5860 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" | |
5861 [(set_attr "type" "neon_load1_2reg<q>")] | |
5862 ) | |
5863 | |
5864 (define_insn "aarch64_simd_ld1<mode>_x2" | |
5865 [(set (match_operand:OI 0 "register_operand" "=w") | |
5866 (unspec:OI [(match_operand:OI 1 "aarch64_simd_struct_operand" "Utv") | |
5867 (unspec:VDC [(const_int 0)] UNSPEC_VSTRUCTDUMMY)] | |
5868 UNSPEC_LD1))] | |
5869 "TARGET_SIMD" | |
5870 "ld1\\t{%S0.<Vtype> - %T0.<Vtype>}, %1" | |
5871 [(set_attr "type" "neon_load1_2reg<q>")] | |
5872 ) | |
5873 | |
5874 | |
5875 (define_insn "@aarch64_frecpe<mode>" | |
5876 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") | |
5877 (unspec:VHSDF_HSDF | |
5878 [(match_operand:VHSDF_HSDF 1 "register_operand" "w")] | |
5569 UNSPEC_FRECPE))] | 5879 UNSPEC_FRECPE))] |
5570 "TARGET_SIMD" | 5880 "TARGET_SIMD" |
5571 "frecpe\\t%0.<Vtype>, %1.<Vtype>" | 5881 "frecpe\t%<v>0<Vmtype>, %<v>1<Vmtype>" |
5572 [(set_attr "type" "neon_fp_recpe_<stype><q>")] | 5882 [(set_attr "type" "neon_fp_recpe_<stype><q>")] |
5573 ) | 5883 ) |
5574 | 5884 |
5575 (define_insn "aarch64_frecp<FRECP:frecp_suffix><mode>" | 5885 (define_insn "aarch64_frecpx<mode>" |
5576 [(set (match_operand:GPF_F16 0 "register_operand" "=w") | 5886 [(set (match_operand:GPF_F16 0 "register_operand" "=w") |
5577 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] | 5887 (unspec:GPF_F16 [(match_operand:GPF_F16 1 "register_operand" "w")] |
5578 FRECP))] | 5888 UNSPEC_FRECPX))] |
5579 "TARGET_SIMD" | 5889 "TARGET_SIMD" |
5580 "frecp<FRECP:frecp_suffix>\\t%<s>0, %<s>1" | 5890 "frecpx\t%<s>0, %<s>1" |
5581 [(set_attr "type" "neon_fp_recp<FRECP:frecp_suffix>_<GPF_F16:stype>")] | 5891 [(set_attr "type" "neon_fp_recpx_<GPF_F16:stype>")] |
5582 ) | 5892 ) |
5583 | 5893 |
5584 (define_insn "aarch64_frecps<mode>" | 5894 (define_insn "@aarch64_frecps<mode>" |
5585 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") | 5895 [(set (match_operand:VHSDF_HSDF 0 "register_operand" "=w") |
5586 (unspec:VHSDF_HSDF | 5896 (unspec:VHSDF_HSDF |
5587 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") | 5897 [(match_operand:VHSDF_HSDF 1 "register_operand" "w") |
5588 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] | 5898 (match_operand:VHSDF_HSDF 2 "register_operand" "w")] |
5589 UNSPEC_FRECPS))] | 5899 UNSPEC_FRECPS))] |
5615 | 5925 |
5616 ;; aes | 5926 ;; aes |
5617 | 5927 |
5618 (define_insn "aarch64_crypto_aes<aes_op>v16qi" | 5928 (define_insn "aarch64_crypto_aes<aes_op>v16qi" |
5619 [(set (match_operand:V16QI 0 "register_operand" "=w") | 5929 [(set (match_operand:V16QI 0 "register_operand" "=w") |
5620 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0") | 5930 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "%0") |
5621 (match_operand:V16QI 2 "register_operand" "w")] | 5931 (match_operand:V16QI 2 "register_operand" "w")] |
5622 CRYPTO_AES))] | 5932 CRYPTO_AES))] |
5623 "TARGET_SIMD && TARGET_CRYPTO" | 5933 "TARGET_SIMD && TARGET_AES" |
5934 "aes<aes_op>\\t%0.16b, %2.16b" | |
5935 [(set_attr "type" "crypto_aese")] | |
5936 ) | |
5937 | |
5938 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine" | |
5939 [(set (match_operand:V16QI 0 "register_operand" "=w") | |
5940 (unspec:V16QI [(xor:V16QI | |
5941 (match_operand:V16QI 1 "register_operand" "%0") | |
5942 (match_operand:V16QI 2 "register_operand" "w")) | |
5943 (match_operand:V16QI 3 "aarch64_simd_imm_zero" "")] | |
5944 CRYPTO_AES))] | |
5945 "TARGET_SIMD && TARGET_AES" | |
5946 "aes<aes_op>\\t%0.16b, %2.16b" | |
5947 [(set_attr "type" "crypto_aese")] | |
5948 ) | |
5949 | |
5950 (define_insn "*aarch64_crypto_aes<aes_op>v16qi_xor_combine" | |
5951 [(set (match_operand:V16QI 0 "register_operand" "=w") | |
5952 (unspec:V16QI [(match_operand:V16QI 3 "aarch64_simd_imm_zero" "") | |
5953 (xor:V16QI (match_operand:V16QI 1 "register_operand" "%0") | |
5954 (match_operand:V16QI 2 "register_operand" "w"))] | |
5955 CRYPTO_AES))] | |
5956 "TARGET_SIMD && TARGET_AES" | |
5624 "aes<aes_op>\\t%0.16b, %2.16b" | 5957 "aes<aes_op>\\t%0.16b, %2.16b" |
5625 [(set_attr "type" "crypto_aese")] | 5958 [(set_attr "type" "crypto_aese")] |
5626 ) | 5959 ) |
5627 | 5960 |
5628 ;; When AES/AESMC fusion is enabled we want the register allocation to | 5961 ;; When AES/AESMC fusion is enabled we want the register allocation to |
5633 | 5966 |
5634 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi" | 5967 (define_insn "aarch64_crypto_aes<aesmc_op>v16qi" |
5635 [(set (match_operand:V16QI 0 "register_operand" "=w,w") | 5968 [(set (match_operand:V16QI 0 "register_operand" "=w,w") |
5636 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] | 5969 (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "0,w")] |
5637 CRYPTO_AESMC))] | 5970 CRYPTO_AESMC))] |
5638 "TARGET_SIMD && TARGET_CRYPTO" | 5971 "TARGET_SIMD && TARGET_AES" |
5639 "aes<aesmc_op>\\t%0.16b, %1.16b" | 5972 "aes<aesmc_op>\\t%0.16b, %1.16b" |
5640 [(set_attr "type" "crypto_aesmc") | 5973 [(set_attr "type" "crypto_aesmc") |
5641 (set_attr_alternative "enabled" | 5974 (set_attr_alternative "enabled" |
5642 [(if_then_else (match_test | 5975 [(if_then_else (match_test |
5643 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") | 5976 "aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)") |
5644 (const_string "yes" ) | 5977 (const_string "yes" ) |
5645 (const_string "no")) | 5978 (const_string "no")) |
5646 (const_string "yes")])] | 5979 (const_string "yes")])] |
5647 ) | 5980 ) |
5648 | 5981 |
5982 ;; When AESE/AESMC fusion is enabled we really want to keep the two together | |
5983 ;; and enforce the register dependency without scheduling or register | |
5984 ;; allocation messing up the order or introducing moves inbetween. | |
5985 ;; Mash the two together during combine. | |
5986 | |
5987 (define_insn "*aarch64_crypto_aese_fused" | |
5988 [(set (match_operand:V16QI 0 "register_operand" "=&w") | |
5989 (unspec:V16QI | |
5990 [(unspec:V16QI | |
5991 [(match_operand:V16QI 1 "register_operand" "0") | |
5992 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESE) | |
5993 ] UNSPEC_AESMC))] | |
5994 "TARGET_SIMD && TARGET_AES | |
5995 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" | |
5996 "aese\\t%0.16b, %2.16b\;aesmc\\t%0.16b, %0.16b" | |
5997 [(set_attr "type" "crypto_aese") | |
5998 (set_attr "length" "8")] | |
5999 ) | |
6000 | |
6001 ;; When AESD/AESIMC fusion is enabled we really want to keep the two together | |
6002 ;; and enforce the register dependency without scheduling or register | |
6003 ;; allocation messing up the order or introducing moves inbetween. | |
6004 ;; Mash the two together during combine. | |
6005 | |
6006 (define_insn "*aarch64_crypto_aesd_fused" | |
6007 [(set (match_operand:V16QI 0 "register_operand" "=&w") | |
6008 (unspec:V16QI | |
6009 [(unspec:V16QI | |
6010 [(match_operand:V16QI 1 "register_operand" "0") | |
6011 (match_operand:V16QI 2 "register_operand" "w")] UNSPEC_AESD) | |
6012 ] UNSPEC_AESIMC))] | |
6013 "TARGET_SIMD && TARGET_AES | |
6014 && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)" | |
6015 "aesd\\t%0.16b, %2.16b\;aesimc\\t%0.16b, %0.16b" | |
6016 [(set_attr "type" "crypto_aese") | |
6017 (set_attr "length" "8")] | |
6018 ) | |
6019 | |
5649 ;; sha1 | 6020 ;; sha1 |
5650 | 6021 |
5651 (define_insn "aarch64_crypto_sha1hsi" | 6022 (define_insn "aarch64_crypto_sha1hsi" |
5652 [(set (match_operand:SI 0 "register_operand" "=w") | 6023 [(set (match_operand:SI 0 "register_operand" "=w") |
5653 (unspec:SI [(match_operand:SI 1 | 6024 (unspec:SI [(match_operand:SI 1 |
5654 "register_operand" "w")] | 6025 "register_operand" "w")] |
5655 UNSPEC_SHA1H))] | 6026 UNSPEC_SHA1H))] |
5656 "TARGET_SIMD && TARGET_CRYPTO" | 6027 "TARGET_SIMD && TARGET_SHA2" |
5657 "sha1h\\t%s0, %s1" | 6028 "sha1h\\t%s0, %s1" |
5658 [(set_attr "type" "crypto_sha1_fast")] | 6029 [(set_attr "type" "crypto_sha1_fast")] |
5659 ) | 6030 ) |
5660 | 6031 |
5661 (define_insn "aarch64_crypto_sha1hv4si" | 6032 (define_insn "aarch64_crypto_sha1hv4si" |
5662 [(set (match_operand:SI 0 "register_operand" "=w") | 6033 [(set (match_operand:SI 0 "register_operand" "=w") |
5663 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") | 6034 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") |
5664 (parallel [(const_int 0)]))] | 6035 (parallel [(const_int 0)]))] |
5665 UNSPEC_SHA1H))] | 6036 UNSPEC_SHA1H))] |
5666 "TARGET_SIMD && TARGET_CRYPTO && !BYTES_BIG_ENDIAN" | 6037 "TARGET_SIMD && TARGET_SHA2 && !BYTES_BIG_ENDIAN" |
5667 "sha1h\\t%s0, %s1" | 6038 "sha1h\\t%s0, %s1" |
5668 [(set_attr "type" "crypto_sha1_fast")] | 6039 [(set_attr "type" "crypto_sha1_fast")] |
5669 ) | 6040 ) |
5670 | 6041 |
5671 (define_insn "aarch64_be_crypto_sha1hv4si" | 6042 (define_insn "aarch64_be_crypto_sha1hv4si" |
5672 [(set (match_operand:SI 0 "register_operand" "=w") | 6043 [(set (match_operand:SI 0 "register_operand" "=w") |
5673 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") | 6044 (unspec:SI [(vec_select:SI (match_operand:V4SI 1 "register_operand" "w") |
5674 (parallel [(const_int 3)]))] | 6045 (parallel [(const_int 3)]))] |
5675 UNSPEC_SHA1H))] | 6046 UNSPEC_SHA1H))] |
5676 "TARGET_SIMD && TARGET_CRYPTO && BYTES_BIG_ENDIAN" | 6047 "TARGET_SIMD && TARGET_SHA2 && BYTES_BIG_ENDIAN" |
5677 "sha1h\\t%s0, %s1" | 6048 "sha1h\\t%s0, %s1" |
5678 [(set_attr "type" "crypto_sha1_fast")] | 6049 [(set_attr "type" "crypto_sha1_fast")] |
5679 ) | 6050 ) |
5680 | 6051 |
5681 (define_insn "aarch64_crypto_sha1su1v4si" | 6052 (define_insn "aarch64_crypto_sha1su1v4si" |
5682 [(set (match_operand:V4SI 0 "register_operand" "=w") | 6053 [(set (match_operand:V4SI 0 "register_operand" "=w") |
5683 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | 6054 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") |
5684 (match_operand:V4SI 2 "register_operand" "w")] | 6055 (match_operand:V4SI 2 "register_operand" "w")] |
5685 UNSPEC_SHA1SU1))] | 6056 UNSPEC_SHA1SU1))] |
5686 "TARGET_SIMD && TARGET_CRYPTO" | 6057 "TARGET_SIMD && TARGET_SHA2" |
5687 "sha1su1\\t%0.4s, %2.4s" | 6058 "sha1su1\\t%0.4s, %2.4s" |
5688 [(set_attr "type" "crypto_sha1_fast")] | 6059 [(set_attr "type" "crypto_sha1_fast")] |
5689 ) | 6060 ) |
5690 | 6061 |
5691 (define_insn "aarch64_crypto_sha1<sha1_op>v4si" | 6062 (define_insn "aarch64_crypto_sha1<sha1_op>v4si" |
5692 [(set (match_operand:V4SI 0 "register_operand" "=w") | 6063 [(set (match_operand:V4SI 0 "register_operand" "=w") |
5693 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | 6064 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") |
5694 (match_operand:SI 2 "register_operand" "w") | 6065 (match_operand:SI 2 "register_operand" "w") |
5695 (match_operand:V4SI 3 "register_operand" "w")] | 6066 (match_operand:V4SI 3 "register_operand" "w")] |
5696 CRYPTO_SHA1))] | 6067 CRYPTO_SHA1))] |
5697 "TARGET_SIMD && TARGET_CRYPTO" | 6068 "TARGET_SIMD && TARGET_SHA2" |
5698 "sha1<sha1_op>\\t%q0, %s2, %3.4s" | 6069 "sha1<sha1_op>\\t%q0, %s2, %3.4s" |
5699 [(set_attr "type" "crypto_sha1_slow")] | 6070 [(set_attr "type" "crypto_sha1_slow")] |
5700 ) | 6071 ) |
5701 | 6072 |
5702 (define_insn "aarch64_crypto_sha1su0v4si" | 6073 (define_insn "aarch64_crypto_sha1su0v4si" |
5703 [(set (match_operand:V4SI 0 "register_operand" "=w") | 6074 [(set (match_operand:V4SI 0 "register_operand" "=w") |
5704 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | 6075 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") |
5705 (match_operand:V4SI 2 "register_operand" "w") | 6076 (match_operand:V4SI 2 "register_operand" "w") |
5706 (match_operand:V4SI 3 "register_operand" "w")] | 6077 (match_operand:V4SI 3 "register_operand" "w")] |
5707 UNSPEC_SHA1SU0))] | 6078 UNSPEC_SHA1SU0))] |
5708 "TARGET_SIMD && TARGET_CRYPTO" | 6079 "TARGET_SIMD && TARGET_SHA2" |
5709 "sha1su0\\t%0.4s, %2.4s, %3.4s" | 6080 "sha1su0\\t%0.4s, %2.4s, %3.4s" |
5710 [(set_attr "type" "crypto_sha1_xor")] | 6081 [(set_attr "type" "crypto_sha1_xor")] |
5711 ) | 6082 ) |
5712 | 6083 |
5713 ;; sha256 | 6084 ;; sha256 |
5716 [(set (match_operand:V4SI 0 "register_operand" "=w") | 6087 [(set (match_operand:V4SI 0 "register_operand" "=w") |
5717 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | 6088 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") |
5718 (match_operand:V4SI 2 "register_operand" "w") | 6089 (match_operand:V4SI 2 "register_operand" "w") |
5719 (match_operand:V4SI 3 "register_operand" "w")] | 6090 (match_operand:V4SI 3 "register_operand" "w")] |
5720 CRYPTO_SHA256))] | 6091 CRYPTO_SHA256))] |
5721 "TARGET_SIMD && TARGET_CRYPTO" | 6092 "TARGET_SIMD && TARGET_SHA2" |
5722 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" | 6093 "sha256h<sha256_op>\\t%q0, %q2, %3.4s" |
5723 [(set_attr "type" "crypto_sha256_slow")] | 6094 [(set_attr "type" "crypto_sha256_slow")] |
5724 ) | 6095 ) |
5725 | 6096 |
5726 (define_insn "aarch64_crypto_sha256su0v4si" | 6097 (define_insn "aarch64_crypto_sha256su0v4si" |
5727 [(set (match_operand:V4SI 0 "register_operand" "=w") | 6098 [(set (match_operand:V4SI 0 "register_operand" "=w") |
5728 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | 6099 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") |
5729 (match_operand:V4SI 2 "register_operand" "w")] | 6100 (match_operand:V4SI 2 "register_operand" "w")] |
5730 UNSPEC_SHA256SU0))] | 6101 UNSPEC_SHA256SU0))] |
5731 "TARGET_SIMD &&TARGET_CRYPTO" | 6102 "TARGET_SIMD && TARGET_SHA2" |
5732 "sha256su0\\t%0.4s, %2.4s" | 6103 "sha256su0\\t%0.4s, %2.4s" |
5733 [(set_attr "type" "crypto_sha256_fast")] | 6104 [(set_attr "type" "crypto_sha256_fast")] |
5734 ) | 6105 ) |
5735 | 6106 |
5736 (define_insn "aarch64_crypto_sha256su1v4si" | 6107 (define_insn "aarch64_crypto_sha256su1v4si" |
5737 [(set (match_operand:V4SI 0 "register_operand" "=w") | 6108 [(set (match_operand:V4SI 0 "register_operand" "=w") |
5738 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | 6109 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") |
5739 (match_operand:V4SI 2 "register_operand" "w") | 6110 (match_operand:V4SI 2 "register_operand" "w") |
5740 (match_operand:V4SI 3 "register_operand" "w")] | 6111 (match_operand:V4SI 3 "register_operand" "w")] |
5741 UNSPEC_SHA256SU1))] | 6112 UNSPEC_SHA256SU1))] |
5742 "TARGET_SIMD &&TARGET_CRYPTO" | 6113 "TARGET_SIMD && TARGET_SHA2" |
5743 "sha256su1\\t%0.4s, %2.4s, %3.4s" | 6114 "sha256su1\\t%0.4s, %2.4s, %3.4s" |
5744 [(set_attr "type" "crypto_sha256_slow")] | 6115 [(set_attr "type" "crypto_sha256_slow")] |
6116 ) | |
6117 | |
6118 ;; sha512 | |
6119 | |
6120 (define_insn "aarch64_crypto_sha512h<sha512_op>qv2di" | |
6121 [(set (match_operand:V2DI 0 "register_operand" "=w") | |
6122 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
6123 (match_operand:V2DI 2 "register_operand" "w") | |
6124 (match_operand:V2DI 3 "register_operand" "w")] | |
6125 CRYPTO_SHA512))] | |
6126 "TARGET_SIMD && TARGET_SHA3" | |
6127 "sha512h<sha512_op>\\t%q0, %q2, %3.2d" | |
6128 [(set_attr "type" "crypto_sha512")] | |
6129 ) | |
6130 | |
6131 (define_insn "aarch64_crypto_sha512su0qv2di" | |
6132 [(set (match_operand:V2DI 0 "register_operand" "=w") | |
6133 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
6134 (match_operand:V2DI 2 "register_operand" "w")] | |
6135 UNSPEC_SHA512SU0))] | |
6136 "TARGET_SIMD && TARGET_SHA3" | |
6137 "sha512su0\\t%0.2d, %2.2d" | |
6138 [(set_attr "type" "crypto_sha512")] | |
6139 ) | |
6140 | |
6141 (define_insn "aarch64_crypto_sha512su1qv2di" | |
6142 [(set (match_operand:V2DI 0 "register_operand" "=w") | |
6143 (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0") | |
6144 (match_operand:V2DI 2 "register_operand" "w") | |
6145 (match_operand:V2DI 3 "register_operand" "w")] | |
6146 UNSPEC_SHA512SU1))] | |
6147 "TARGET_SIMD && TARGET_SHA3" | |
6148 "sha512su1\\t%0.2d, %2.2d, %3.2d" | |
6149 [(set_attr "type" "crypto_sha512")] | |
6150 ) | |
6151 | |
6152 ;; sha3 | |
6153 | |
6154 (define_insn "eor3q<mode>4" | |
6155 [(set (match_operand:VQ_I 0 "register_operand" "=w") | |
6156 (xor:VQ_I | |
6157 (xor:VQ_I | |
6158 (match_operand:VQ_I 2 "register_operand" "w") | |
6159 (match_operand:VQ_I 3 "register_operand" "w")) | |
6160 (match_operand:VQ_I 1 "register_operand" "w")))] | |
6161 "TARGET_SIMD && TARGET_SHA3" | |
6162 "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b" | |
6163 [(set_attr "type" "crypto_sha3")] | |
6164 ) | |
6165 | |
6166 (define_insn "aarch64_rax1qv2di" | |
6167 [(set (match_operand:V2DI 0 "register_operand" "=w") | |
6168 (xor:V2DI | |
6169 (rotate:V2DI | |
6170 (match_operand:V2DI 2 "register_operand" "w") | |
6171 (const_int 1)) | |
6172 (match_operand:V2DI 1 "register_operand" "w")))] | |
6173 "TARGET_SIMD && TARGET_SHA3" | |
6174 "rax1\\t%0.2d, %1.2d, %2.2d" | |
6175 [(set_attr "type" "crypto_sha3")] | |
6176 ) | |
6177 | |
6178 (define_insn "aarch64_xarqv2di" | |
6179 [(set (match_operand:V2DI 0 "register_operand" "=w") | |
6180 (rotatert:V2DI | |
6181 (xor:V2DI | |
6182 (match_operand:V2DI 1 "register_operand" "%w") | |
6183 (match_operand:V2DI 2 "register_operand" "w")) | |
6184 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))] | |
6185 "TARGET_SIMD && TARGET_SHA3" | |
6186 "xar\\t%0.2d, %1.2d, %2.2d, %3" | |
6187 [(set_attr "type" "crypto_sha3")] | |
6188 ) | |
6189 | |
6190 (define_insn "bcaxq<mode>4" | |
6191 [(set (match_operand:VQ_I 0 "register_operand" "=w") | |
6192 (xor:VQ_I | |
6193 (and:VQ_I | |
6194 (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w")) | |
6195 (match_operand:VQ_I 2 "register_operand" "w")) | |
6196 (match_operand:VQ_I 1 "register_operand" "w")))] | |
6197 "TARGET_SIMD && TARGET_SHA3" | |
6198 "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b" | |
6199 [(set_attr "type" "crypto_sha3")] | |
6200 ) | |
6201 | |
6202 ;; SM3 | |
6203 | |
6204 (define_insn "aarch64_sm3ss1qv4si" | |
6205 [(set (match_operand:V4SI 0 "register_operand" "=w") | |
6206 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") | |
6207 (match_operand:V4SI 2 "register_operand" "w") | |
6208 (match_operand:V4SI 3 "register_operand" "w")] | |
6209 UNSPEC_SM3SS1))] | |
6210 "TARGET_SIMD && TARGET_SM4" | |
6211 "sm3ss1\\t%0.4s, %1.4s, %2.4s, %3.4s" | |
6212 [(set_attr "type" "crypto_sm3")] | |
6213 ) | |
6214 | |
6215 | |
6216 (define_insn "aarch64_sm3tt<sm3tt_op>qv4si" | |
6217 [(set (match_operand:V4SI 0 "register_operand" "=w") | |
6218 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
6219 (match_operand:V4SI 2 "register_operand" "w") | |
6220 (match_operand:V4SI 3 "register_operand" "w") | |
6221 (match_operand:SI 4 "aarch64_imm2" "Ui2")] | |
6222 CRYPTO_SM3TT))] | |
6223 "TARGET_SIMD && TARGET_SM4" | |
6224 "sm3tt<sm3tt_op>\\t%0.4s, %2.4s, %3.4s[%4]" | |
6225 [(set_attr "type" "crypto_sm3")] | |
6226 ) | |
6227 | |
6228 (define_insn "aarch64_sm3partw<sm3part_op>qv4si" | |
6229 [(set (match_operand:V4SI 0 "register_operand" "=w") | |
6230 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
6231 (match_operand:V4SI 2 "register_operand" "w") | |
6232 (match_operand:V4SI 3 "register_operand" "w")] | |
6233 CRYPTO_SM3PART))] | |
6234 "TARGET_SIMD && TARGET_SM4" | |
6235 "sm3partw<sm3part_op>\\t%0.4s, %2.4s, %3.4s" | |
6236 [(set_attr "type" "crypto_sm3")] | |
6237 ) | |
6238 | |
6239 ;; SM4 | |
6240 | |
6241 (define_insn "aarch64_sm4eqv4si" | |
6242 [(set (match_operand:V4SI 0 "register_operand" "=w") | |
6243 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "0") | |
6244 (match_operand:V4SI 2 "register_operand" "w")] | |
6245 UNSPEC_SM4E))] | |
6246 "TARGET_SIMD && TARGET_SM4" | |
6247 "sm4e\\t%0.4s, %2.4s" | |
6248 [(set_attr "type" "crypto_sm4")] | |
6249 ) | |
6250 | |
6251 (define_insn "aarch64_sm4ekeyqv4si" | |
6252 [(set (match_operand:V4SI 0 "register_operand" "=w") | |
6253 (unspec:V4SI [(match_operand:V4SI 1 "register_operand" "w") | |
6254 (match_operand:V4SI 2 "register_operand" "w")] | |
6255 UNSPEC_SM4EKEY))] | |
6256 "TARGET_SIMD && TARGET_SM4" | |
6257 "sm4ekey\\t%0.4s, %1.4s, %2.4s" | |
6258 [(set_attr "type" "crypto_sm4")] | |
6259 ) | |
6260 | |
6261 ;; fp16fml | |
6262 | |
6263 (define_expand "aarch64_fml<f16mac1>l<f16quad>_low<mode>" | |
6264 [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
6265 (unspec:VDQSF | |
6266 [(match_operand:VDQSF 1 "register_operand" "0") | |
6267 (match_operand:<VFMLA_W> 2 "register_operand" "w") | |
6268 (match_operand:<VFMLA_W> 3 "register_operand" "w")] | |
6269 VFMLA16_LOW))] | |
6270 "TARGET_F16FML" | |
6271 { | |
6272 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, | |
6273 <nunits> * 2, false); | |
6274 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, | |
6275 <nunits> * 2, false); | |
6276 | |
6277 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_low<mode> (operands[0], | |
6278 operands[1], | |
6279 operands[2], | |
6280 operands[3], | |
6281 p1, p2)); | |
6282 DONE; | |
6283 | |
6284 }) | |
6285 | |
6286 (define_expand "aarch64_fml<f16mac1>l<f16quad>_high<mode>" | |
6287 [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
6288 (unspec:VDQSF | |
6289 [(match_operand:VDQSF 1 "register_operand" "0") | |
6290 (match_operand:<VFMLA_W> 2 "register_operand" "w") | |
6291 (match_operand:<VFMLA_W> 3 "register_operand" "w")] | |
6292 VFMLA16_HIGH))] | |
6293 "TARGET_F16FML" | |
6294 { | |
6295 rtx p1 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); | |
6296 rtx p2 = aarch64_simd_vect_par_cnst_half (<VFMLA_W>mode, <nunits> * 2, true); | |
6297 | |
6298 emit_insn (gen_aarch64_simd_fml<f16mac1>l<f16quad>_high<mode> (operands[0], | |
6299 operands[1], | |
6300 operands[2], | |
6301 operands[3], | |
6302 p1, p2)); | |
6303 DONE; | |
6304 }) | |
6305 | |
6306 (define_insn "aarch64_simd_fmlal<f16quad>_low<mode>" | |
6307 [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
6308 (fma:VDQSF | |
6309 (float_extend:VDQSF | |
6310 (vec_select:<VFMLA_SEL_W> | |
6311 (match_operand:<VFMLA_W> 2 "register_operand" "w") | |
6312 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" ""))) | |
6313 (float_extend:VDQSF | |
6314 (vec_select:<VFMLA_SEL_W> | |
6315 (match_operand:<VFMLA_W> 3 "register_operand" "w") | |
6316 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) | |
6317 (match_operand:VDQSF 1 "register_operand" "0")))] | |
6318 "TARGET_F16FML" | |
6319 "fmlal\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" | |
6320 [(set_attr "type" "neon_fp_mul_s")] | |
6321 ) | |
6322 | |
6323 (define_insn "aarch64_simd_fmlsl<f16quad>_low<mode>" | |
6324 [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
6325 (fma:VDQSF | |
6326 (float_extend:VDQSF | |
6327 (neg:<VFMLA_SEL_W> | |
6328 (vec_select:<VFMLA_SEL_W> | |
6329 (match_operand:<VFMLA_W> 2 "register_operand" "w") | |
6330 (match_operand:<VFMLA_W> 4 "vect_par_cnst_lo_half" "")))) | |
6331 (float_extend:VDQSF | |
6332 (vec_select:<VFMLA_SEL_W> | |
6333 (match_operand:<VFMLA_W> 3 "register_operand" "w") | |
6334 (match_operand:<VFMLA_W> 5 "vect_par_cnst_lo_half" ""))) | |
6335 (match_operand:VDQSF 1 "register_operand" "0")))] | |
6336 "TARGET_F16FML" | |
6337 "fmlsl\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" | |
6338 [(set_attr "type" "neon_fp_mul_s")] | |
6339 ) | |
6340 | |
6341 (define_insn "aarch64_simd_fmlal<f16quad>_high<mode>" | |
6342 [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
6343 (fma:VDQSF | |
6344 (float_extend:VDQSF | |
6345 (vec_select:<VFMLA_SEL_W> | |
6346 (match_operand:<VFMLA_W> 2 "register_operand" "w") | |
6347 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" ""))) | |
6348 (float_extend:VDQSF | |
6349 (vec_select:<VFMLA_SEL_W> | |
6350 (match_operand:<VFMLA_W> 3 "register_operand" "w") | |
6351 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) | |
6352 (match_operand:VDQSF 1 "register_operand" "0")))] | |
6353 "TARGET_F16FML" | |
6354 "fmlal2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" | |
6355 [(set_attr "type" "neon_fp_mul_s")] | |
6356 ) | |
6357 | |
6358 (define_insn "aarch64_simd_fmlsl<f16quad>_high<mode>" | |
6359 [(set (match_operand:VDQSF 0 "register_operand" "=w") | |
6360 (fma:VDQSF | |
6361 (float_extend:VDQSF | |
6362 (neg:<VFMLA_SEL_W> | |
6363 (vec_select:<VFMLA_SEL_W> | |
6364 (match_operand:<VFMLA_W> 2 "register_operand" "w") | |
6365 (match_operand:<VFMLA_W> 4 "vect_par_cnst_hi_half" "")))) | |
6366 (float_extend:VDQSF | |
6367 (vec_select:<VFMLA_SEL_W> | |
6368 (match_operand:<VFMLA_W> 3 "register_operand" "w") | |
6369 (match_operand:<VFMLA_W> 5 "vect_par_cnst_hi_half" ""))) | |
6370 (match_operand:VDQSF 1 "register_operand" "0")))] | |
6371 "TARGET_F16FML" | |
6372 "fmlsl2\\t%0.<nunits>s, %2.<nunits>h, %3.<nunits>h" | |
6373 [(set_attr "type" "neon_fp_mul_s")] | |
6374 ) | |
6375 | |
6376 (define_expand "aarch64_fml<f16mac1>l_lane_lowv2sf" | |
6377 [(set (match_operand:V2SF 0 "register_operand" "") | |
6378 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") | |
6379 (match_operand:V4HF 2 "register_operand" "") | |
6380 (match_operand:V4HF 3 "register_operand" "") | |
6381 (match_operand:SI 4 "aarch64_imm2" "")] | |
6382 VFMLA16_LOW))] | |
6383 "TARGET_F16FML" | |
6384 { | |
6385 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); | |
6386 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); | |
6387 | |
6388 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_lowv2sf (operands[0], | |
6389 operands[1], | |
6390 operands[2], | |
6391 operands[3], | |
6392 p1, lane)); | |
6393 DONE; | |
6394 } | |
6395 ) | |
6396 | |
6397 (define_expand "aarch64_fml<f16mac1>l_lane_highv2sf" | |
6398 [(set (match_operand:V2SF 0 "register_operand" "") | |
6399 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") | |
6400 (match_operand:V4HF 2 "register_operand" "") | |
6401 (match_operand:V4HF 3 "register_operand" "") | |
6402 (match_operand:SI 4 "aarch64_imm2" "")] | |
6403 VFMLA16_HIGH))] | |
6404 "TARGET_F16FML" | |
6405 { | |
6406 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); | |
6407 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); | |
6408 | |
6409 emit_insn (gen_aarch64_simd_fml<f16mac1>l_lane_highv2sf (operands[0], | |
6410 operands[1], | |
6411 operands[2], | |
6412 operands[3], | |
6413 p1, lane)); | |
6414 DONE; | |
6415 }) | |
6416 | |
6417 (define_insn "aarch64_simd_fmlal_lane_lowv2sf" | |
6418 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6419 (fma:V2SF | |
6420 (float_extend:V2SF | |
6421 (vec_select:V2HF | |
6422 (match_operand:V4HF 2 "register_operand" "w") | |
6423 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) | |
6424 (float_extend:V2SF | |
6425 (vec_duplicate:V2HF | |
6426 (vec_select:HF | |
6427 (match_operand:V4HF 3 "register_operand" "x") | |
6428 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6429 (match_operand:V2SF 1 "register_operand" "0")))] | |
6430 "TARGET_F16FML" | |
6431 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" | |
6432 [(set_attr "type" "neon_fp_mul_s")] | |
6433 ) | |
6434 | |
6435 (define_insn "aarch64_simd_fmlsl_lane_lowv2sf" | |
6436 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6437 (fma:V2SF | |
6438 (float_extend:V2SF | |
6439 (neg:V2HF | |
6440 (vec_select:V2HF | |
6441 (match_operand:V4HF 2 "register_operand" "w") | |
6442 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) | |
6443 (float_extend:V2SF | |
6444 (vec_duplicate:V2HF | |
6445 (vec_select:HF | |
6446 (match_operand:V4HF 3 "register_operand" "x") | |
6447 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6448 (match_operand:V2SF 1 "register_operand" "0")))] | |
6449 "TARGET_F16FML" | |
6450 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" | |
6451 [(set_attr "type" "neon_fp_mul_s")] | |
6452 ) | |
6453 | |
6454 (define_insn "aarch64_simd_fmlal_lane_highv2sf" | |
6455 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6456 (fma:V2SF | |
6457 (float_extend:V2SF | |
6458 (vec_select:V2HF | |
6459 (match_operand:V4HF 2 "register_operand" "w") | |
6460 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) | |
6461 (float_extend:V2SF | |
6462 (vec_duplicate:V2HF | |
6463 (vec_select:HF | |
6464 (match_operand:V4HF 3 "register_operand" "x") | |
6465 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6466 (match_operand:V2SF 1 "register_operand" "0")))] | |
6467 "TARGET_F16FML" | |
6468 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" | |
6469 [(set_attr "type" "neon_fp_mul_s")] | |
6470 ) | |
6471 | |
6472 (define_insn "aarch64_simd_fmlsl_lane_highv2sf" | |
6473 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6474 (fma:V2SF | |
6475 (float_extend:V2SF | |
6476 (neg:V2HF | |
6477 (vec_select:V2HF | |
6478 (match_operand:V4HF 2 "register_operand" "w") | |
6479 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) | |
6480 (float_extend:V2SF | |
6481 (vec_duplicate:V2HF | |
6482 (vec_select:HF | |
6483 (match_operand:V4HF 3 "register_operand" "x") | |
6484 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6485 (match_operand:V2SF 1 "register_operand" "0")))] | |
6486 "TARGET_F16FML" | |
6487 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" | |
6488 [(set_attr "type" "neon_fp_mul_s")] | |
6489 ) | |
6490 | |
6491 (define_expand "aarch64_fml<f16mac1>lq_laneq_lowv4sf" | |
6492 [(set (match_operand:V4SF 0 "register_operand" "") | |
6493 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") | |
6494 (match_operand:V8HF 2 "register_operand" "") | |
6495 (match_operand:V8HF 3 "register_operand" "") | |
6496 (match_operand:SI 4 "aarch64_lane_imm3" "")] | |
6497 VFMLA16_LOW))] | |
6498 "TARGET_F16FML" | |
6499 { | |
6500 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); | |
6501 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); | |
6502 | |
6503 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_lowv4sf (operands[0], | |
6504 operands[1], | |
6505 operands[2], | |
6506 operands[3], | |
6507 p1, lane)); | |
6508 DONE; | |
6509 }) | |
6510 | |
6511 (define_expand "aarch64_fml<f16mac1>lq_laneq_highv4sf" | |
6512 [(set (match_operand:V4SF 0 "register_operand" "") | |
6513 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") | |
6514 (match_operand:V8HF 2 "register_operand" "") | |
6515 (match_operand:V8HF 3 "register_operand" "") | |
6516 (match_operand:SI 4 "aarch64_lane_imm3" "")] | |
6517 VFMLA16_HIGH))] | |
6518 "TARGET_F16FML" | |
6519 { | |
6520 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); | |
6521 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); | |
6522 | |
6523 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_laneq_highv4sf (operands[0], | |
6524 operands[1], | |
6525 operands[2], | |
6526 operands[3], | |
6527 p1, lane)); | |
6528 DONE; | |
6529 }) | |
6530 | |
6531 (define_insn "aarch64_simd_fmlalq_laneq_lowv4sf" | |
6532 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6533 (fma:V4SF | |
6534 (float_extend:V4SF | |
6535 (vec_select:V4HF | |
6536 (match_operand:V8HF 2 "register_operand" "w") | |
6537 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) | |
6538 (float_extend:V4SF | |
6539 (vec_duplicate:V4HF | |
6540 (vec_select:HF | |
6541 (match_operand:V8HF 3 "register_operand" "x") | |
6542 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6543 (match_operand:V4SF 1 "register_operand" "0")))] | |
6544 "TARGET_F16FML" | |
6545 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" | |
6546 [(set_attr "type" "neon_fp_mul_s")] | |
6547 ) | |
6548 | |
6549 (define_insn "aarch64_simd_fmlslq_laneq_lowv4sf" | |
6550 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6551 (fma:V4SF | |
6552 (float_extend:V4SF | |
6553 (neg:V4HF | |
6554 (vec_select:V4HF | |
6555 (match_operand:V8HF 2 "register_operand" "w") | |
6556 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) | |
6557 (float_extend:V4SF | |
6558 (vec_duplicate:V4HF | |
6559 (vec_select:HF | |
6560 (match_operand:V8HF 3 "register_operand" "x") | |
6561 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6562 (match_operand:V4SF 1 "register_operand" "0")))] | |
6563 "TARGET_F16FML" | |
6564 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" | |
6565 [(set_attr "type" "neon_fp_mul_s")] | |
6566 ) | |
6567 | |
6568 (define_insn "aarch64_simd_fmlalq_laneq_highv4sf" | |
6569 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6570 (fma:V4SF | |
6571 (float_extend:V4SF | |
6572 (vec_select:V4HF | |
6573 (match_operand:V8HF 2 "register_operand" "w") | |
6574 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) | |
6575 (float_extend:V4SF | |
6576 (vec_duplicate:V4HF | |
6577 (vec_select:HF | |
6578 (match_operand:V8HF 3 "register_operand" "x") | |
6579 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6580 (match_operand:V4SF 1 "register_operand" "0")))] | |
6581 "TARGET_F16FML" | |
6582 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" | |
6583 [(set_attr "type" "neon_fp_mul_s")] | |
6584 ) | |
6585 | |
6586 (define_insn "aarch64_simd_fmlslq_laneq_highv4sf" | |
6587 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6588 (fma:V4SF | |
6589 (float_extend:V4SF | |
6590 (neg:V4HF | |
6591 (vec_select:V4HF | |
6592 (match_operand:V8HF 2 "register_operand" "w") | |
6593 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) | |
6594 (float_extend:V4SF | |
6595 (vec_duplicate:V4HF | |
6596 (vec_select:HF | |
6597 (match_operand:V8HF 3 "register_operand" "x") | |
6598 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6599 (match_operand:V4SF 1 "register_operand" "0")))] | |
6600 "TARGET_F16FML" | |
6601 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" | |
6602 [(set_attr "type" "neon_fp_mul_s")] | |
6603 ) | |
6604 | |
6605 (define_expand "aarch64_fml<f16mac1>l_laneq_lowv2sf" | |
6606 [(set (match_operand:V2SF 0 "register_operand" "") | |
6607 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") | |
6608 (match_operand:V4HF 2 "register_operand" "") | |
6609 (match_operand:V8HF 3 "register_operand" "") | |
6610 (match_operand:SI 4 "aarch64_lane_imm3" "")] | |
6611 VFMLA16_LOW))] | |
6612 "TARGET_F16FML" | |
6613 { | |
6614 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, false); | |
6615 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); | |
6616 | |
6617 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_lowv2sf (operands[0], | |
6618 operands[1], | |
6619 operands[2], | |
6620 operands[3], | |
6621 p1, lane)); | |
6622 DONE; | |
6623 | |
6624 }) | |
6625 | |
6626 (define_expand "aarch64_fml<f16mac1>l_laneq_highv2sf" | |
6627 [(set (match_operand:V2SF 0 "register_operand" "") | |
6628 (unspec:V2SF [(match_operand:V2SF 1 "register_operand" "") | |
6629 (match_operand:V4HF 2 "register_operand" "") | |
6630 (match_operand:V8HF 3 "register_operand" "") | |
6631 (match_operand:SI 4 "aarch64_lane_imm3" "")] | |
6632 VFMLA16_HIGH))] | |
6633 "TARGET_F16FML" | |
6634 { | |
6635 rtx p1 = aarch64_simd_vect_par_cnst_half (V4HFmode, 4, true); | |
6636 rtx lane = aarch64_endian_lane_rtx (V8HFmode, INTVAL (operands[4])); | |
6637 | |
6638 emit_insn (gen_aarch64_simd_fml<f16mac1>l_laneq_highv2sf (operands[0], | |
6639 operands[1], | |
6640 operands[2], | |
6641 operands[3], | |
6642 p1, lane)); | |
6643 DONE; | |
6644 | |
6645 }) | |
6646 | |
6647 (define_insn "aarch64_simd_fmlal_laneq_lowv2sf" | |
6648 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6649 (fma:V2SF | |
6650 (float_extend:V2SF | |
6651 (vec_select:V2HF | |
6652 (match_operand:V4HF 2 "register_operand" "w") | |
6653 (match_operand:V4HF 4 "vect_par_cnst_lo_half" ""))) | |
6654 (float_extend:V2SF | |
6655 (vec_duplicate:V2HF | |
6656 (vec_select:HF | |
6657 (match_operand:V8HF 3 "register_operand" "x") | |
6658 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6659 (match_operand:V2SF 1 "register_operand" "0")))] | |
6660 "TARGET_F16FML" | |
6661 "fmlal\\t%0.2s, %2.2h, %3.h[%5]" | |
6662 [(set_attr "type" "neon_fp_mul_s")] | |
6663 ) | |
6664 | |
6665 (define_insn "aarch64_simd_fmlsl_laneq_lowv2sf" | |
6666 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6667 (fma:V2SF | |
6668 (float_extend:V2SF | |
6669 (neg:V2HF | |
6670 (vec_select:V2HF | |
6671 (match_operand:V4HF 2 "register_operand" "w") | |
6672 (match_operand:V4HF 4 "vect_par_cnst_lo_half" "")))) | |
6673 (float_extend:V2SF | |
6674 (vec_duplicate:V2HF | |
6675 (vec_select:HF | |
6676 (match_operand:V8HF 3 "register_operand" "x") | |
6677 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6678 (match_operand:V2SF 1 "register_operand" "0")))] | |
6679 "TARGET_F16FML" | |
6680 "fmlsl\\t%0.2s, %2.2h, %3.h[%5]" | |
6681 [(set_attr "type" "neon_fp_mul_s")] | |
6682 ) | |
6683 | |
6684 (define_insn "aarch64_simd_fmlal_laneq_highv2sf" | |
6685 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6686 (fma:V2SF | |
6687 (float_extend:V2SF | |
6688 (vec_select:V2HF | |
6689 (match_operand:V4HF 2 "register_operand" "w") | |
6690 (match_operand:V4HF 4 "vect_par_cnst_hi_half" ""))) | |
6691 (float_extend:V2SF | |
6692 (vec_duplicate:V2HF | |
6693 (vec_select:HF | |
6694 (match_operand:V8HF 3 "register_operand" "x") | |
6695 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6696 (match_operand:V2SF 1 "register_operand" "0")))] | |
6697 "TARGET_F16FML" | |
6698 "fmlal2\\t%0.2s, %2.2h, %3.h[%5]" | |
6699 [(set_attr "type" "neon_fp_mul_s")] | |
6700 ) | |
6701 | |
6702 (define_insn "aarch64_simd_fmlsl_laneq_highv2sf" | |
6703 [(set (match_operand:V2SF 0 "register_operand" "=w") | |
6704 (fma:V2SF | |
6705 (float_extend:V2SF | |
6706 (neg:V2HF | |
6707 (vec_select:V2HF | |
6708 (match_operand:V4HF 2 "register_operand" "w") | |
6709 (match_operand:V4HF 4 "vect_par_cnst_hi_half" "")))) | |
6710 (float_extend:V2SF | |
6711 (vec_duplicate:V2HF | |
6712 (vec_select:HF | |
6713 (match_operand:V8HF 3 "register_operand" "x") | |
6714 (parallel [(match_operand:SI 5 "aarch64_lane_imm3" "Ui7")])))) | |
6715 (match_operand:V2SF 1 "register_operand" "0")))] | |
6716 "TARGET_F16FML" | |
6717 "fmlsl2\\t%0.2s, %2.2h, %3.h[%5]" | |
6718 [(set_attr "type" "neon_fp_mul_s")] | |
6719 ) | |
6720 | |
6721 (define_expand "aarch64_fml<f16mac1>lq_lane_lowv4sf" | |
6722 [(set (match_operand:V4SF 0 "register_operand" "") | |
6723 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") | |
6724 (match_operand:V8HF 2 "register_operand" "") | |
6725 (match_operand:V4HF 3 "register_operand" "") | |
6726 (match_operand:SI 4 "aarch64_imm2" "")] | |
6727 VFMLA16_LOW))] | |
6728 "TARGET_F16FML" | |
6729 { | |
6730 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, false); | |
6731 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); | |
6732 | |
6733 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_lowv4sf (operands[0], | |
6734 operands[1], | |
6735 operands[2], | |
6736 operands[3], | |
6737 p1, lane)); | |
6738 DONE; | |
6739 }) | |
6740 | |
6741 (define_expand "aarch64_fml<f16mac1>lq_lane_highv4sf" | |
6742 [(set (match_operand:V4SF 0 "register_operand" "") | |
6743 (unspec:V4SF [(match_operand:V4SF 1 "register_operand" "") | |
6744 (match_operand:V8HF 2 "register_operand" "") | |
6745 (match_operand:V4HF 3 "register_operand" "") | |
6746 (match_operand:SI 4 "aarch64_imm2" "")] | |
6747 VFMLA16_HIGH))] | |
6748 "TARGET_F16FML" | |
6749 { | |
6750 rtx p1 = aarch64_simd_vect_par_cnst_half (V8HFmode, 8, true); | |
6751 rtx lane = aarch64_endian_lane_rtx (V4HFmode, INTVAL (operands[4])); | |
6752 | |
6753 emit_insn (gen_aarch64_simd_fml<f16mac1>lq_lane_highv4sf (operands[0], | |
6754 operands[1], | |
6755 operands[2], | |
6756 operands[3], | |
6757 p1, lane)); | |
6758 DONE; | |
6759 }) | |
6760 | |
6761 (define_insn "aarch64_simd_fmlalq_lane_lowv4sf" | |
6762 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6763 (fma:V4SF | |
6764 (float_extend:V4SF | |
6765 (vec_select:V4HF | |
6766 (match_operand:V8HF 2 "register_operand" "w") | |
6767 (match_operand:V8HF 4 "vect_par_cnst_lo_half" ""))) | |
6768 (float_extend:V4SF | |
6769 (vec_duplicate:V4HF | |
6770 (vec_select:HF | |
6771 (match_operand:V4HF 3 "register_operand" "x") | |
6772 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6773 (match_operand:V4SF 1 "register_operand" "0")))] | |
6774 "TARGET_F16FML" | |
6775 "fmlal\\t%0.4s, %2.4h, %3.h[%5]" | |
6776 [(set_attr "type" "neon_fp_mul_s")] | |
6777 ) | |
6778 | |
6779 (define_insn "aarch64_simd_fmlslq_lane_lowv4sf" | |
6780 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6781 (fma:V4SF | |
6782 (float_extend:V4SF | |
6783 (neg:V4HF | |
6784 (vec_select:V4HF | |
6785 (match_operand:V8HF 2 "register_operand" "w") | |
6786 (match_operand:V8HF 4 "vect_par_cnst_lo_half" "")))) | |
6787 (float_extend:V4SF | |
6788 (vec_duplicate:V4HF | |
6789 (vec_select:HF | |
6790 (match_operand:V4HF 3 "register_operand" "x") | |
6791 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6792 (match_operand:V4SF 1 "register_operand" "0")))] | |
6793 "TARGET_F16FML" | |
6794 "fmlsl\\t%0.4s, %2.4h, %3.h[%5]" | |
6795 [(set_attr "type" "neon_fp_mul_s")] | |
6796 ) | |
6797 | |
6798 (define_insn "aarch64_simd_fmlalq_lane_highv4sf" | |
6799 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6800 (fma:V4SF | |
6801 (float_extend:V4SF | |
6802 (vec_select:V4HF | |
6803 (match_operand:V8HF 2 "register_operand" "w") | |
6804 (match_operand:V8HF 4 "vect_par_cnst_hi_half" ""))) | |
6805 (float_extend:V4SF | |
6806 (vec_duplicate:V4HF | |
6807 (vec_select:HF | |
6808 (match_operand:V4HF 3 "register_operand" "x") | |
6809 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6810 (match_operand:V4SF 1 "register_operand" "0")))] | |
6811 "TARGET_F16FML" | |
6812 "fmlal2\\t%0.4s, %2.4h, %3.h[%5]" | |
6813 [(set_attr "type" "neon_fp_mul_s")] | |
6814 ) | |
6815 | |
6816 (define_insn "aarch64_simd_fmlslq_lane_highv4sf" | |
6817 [(set (match_operand:V4SF 0 "register_operand" "=w") | |
6818 (fma:V4SF | |
6819 (float_extend:V4SF | |
6820 (neg:V4HF | |
6821 (vec_select:V4HF | |
6822 (match_operand:V8HF 2 "register_operand" "w") | |
6823 (match_operand:V8HF 4 "vect_par_cnst_hi_half" "")))) | |
6824 (float_extend:V4SF | |
6825 (vec_duplicate:V4HF | |
6826 (vec_select:HF | |
6827 (match_operand:V4HF 3 "register_operand" "x") | |
6828 (parallel [(match_operand:SI 5 "aarch64_imm2" "Ui2")])))) | |
6829 (match_operand:V4SF 1 "register_operand" "0")))] | |
6830 "TARGET_F16FML" | |
6831 "fmlsl2\\t%0.4s, %2.4h, %3.h[%5]" | |
6832 [(set_attr "type" "neon_fp_mul_s")] | |
5745 ) | 6833 ) |
5746 | 6834 |
5747 ;; pmull | 6835 ;; pmull |
5748 | 6836 |
5749 (define_insn "aarch64_crypto_pmulldi" | 6837 (define_insn "aarch64_crypto_pmulldi" |
5750 [(set (match_operand:TI 0 "register_operand" "=w") | 6838 [(set (match_operand:TI 0 "register_operand" "=w") |
5751 (unspec:TI [(match_operand:DI 1 "register_operand" "w") | 6839 (unspec:TI [(match_operand:DI 1 "register_operand" "w") |
5752 (match_operand:DI 2 "register_operand" "w")] | 6840 (match_operand:DI 2 "register_operand" "w")] |
5753 UNSPEC_PMULL))] | 6841 UNSPEC_PMULL))] |
5754 "TARGET_SIMD && TARGET_CRYPTO" | 6842 "TARGET_SIMD && TARGET_AES" |
5755 "pmull\\t%0.1q, %1.1d, %2.1d" | 6843 "pmull\\t%0.1q, %1.1d, %2.1d" |
5756 [(set_attr "type" "crypto_pmull")] | 6844 [(set_attr "type" "crypto_pmull")] |
5757 ) | 6845 ) |
5758 | 6846 |
5759 (define_insn "aarch64_crypto_pmullv2di" | 6847 (define_insn "aarch64_crypto_pmullv2di" |
5760 [(set (match_operand:TI 0 "register_operand" "=w") | 6848 [(set (match_operand:TI 0 "register_operand" "=w") |
5761 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") | 6849 (unspec:TI [(match_operand:V2DI 1 "register_operand" "w") |
5762 (match_operand:V2DI 2 "register_operand" "w")] | 6850 (match_operand:V2DI 2 "register_operand" "w")] |
5763 UNSPEC_PMULL2))] | 6851 UNSPEC_PMULL2))] |
5764 "TARGET_SIMD && TARGET_CRYPTO" | 6852 "TARGET_SIMD && TARGET_AES" |
5765 "pmull2\\t%0.1q, %1.2d, %2.2d" | 6853 "pmull2\\t%0.1q, %1.2d, %2.2d" |
5766 [(set_attr "type" "crypto_pmull")] | 6854 [(set_attr "type" "crypto_pmull")] |
5767 ) | 6855 ) |