comparison gcc/config/arm/neon.ml @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 77e2b8dfacca
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 (* Common code for ARM NEON header file, documentation and test case
2 generators.
3
4 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
5 Contributed by CodeSourcery.
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. *)
22
23 (* Shorthand types for vector elements. *)
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
26 | Cast of elts * elts | NoElts
27
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
29 | ConvClass of eltclass * eltclass | NoType
30
31 (* These vector types correspond directly to C types. *)
32 type vectype = T_int8x8 | T_int8x16
33 | T_int16x4 | T_int16x8
34 | T_int32x2 | T_int32x4
35 | T_int64x1 | T_int64x2
36 | T_uint8x8 | T_uint8x16
37 | T_uint16x4 | T_uint16x8
38 | T_uint32x2 | T_uint32x4
39 | T_uint64x1 | T_uint64x2
40 | T_float32x2 | T_float32x4
41 | T_poly8x8 | T_poly8x16
42 | T_poly16x4 | T_poly16x8
43 | T_immediate of int * int
44 | T_int8 | T_int16
45 | T_int32 | T_int64
46 | T_uint8 | T_uint16
47 | T_uint32 | T_uint64
48 | T_poly8 | T_poly16
49 | T_float32 | T_arrayof of int * vectype
50 | T_ptrto of vectype | T_const of vectype
51 | T_void | T_intQI
52 | T_intHI | T_intSI
53 | T_intDI
54
55 (* The meanings of the following are:
56 TImode : "Tetra", two registers (four words).
57 EImode : "hExa", three registers (six words).
58 OImode : "Octa", four registers (eight words).
59 CImode : "dodeCa", six registers (twelve words).
60 XImode : "heXadeca", eight registers (sixteen words).
61 *)
62
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
64
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
66 | PtrTo of shape_elt | CstPtrTo of shape_elt
67 (* These next ones are used only in the test generator. *)
68 | Element_of_dreg (* Used for "lane" variants. *)
69 | Element_of_qreg (* Likewise. *)
70 | All_elements_of_dreg (* Used for "dup" variants. *)
71
72 type shape_form = All of int * shape_elt
73 | Long
74 | Long_noreg of shape_elt
75 | Wide
76 | Wide_noreg of shape_elt
77 | Narrow
78 | Long_imm
79 | Narrow_imm
80 | Binary_imm of shape_elt
81 | Use_operands of shape_elt array
82 | By_scalar of shape_elt
83 | Unary_scalar of shape_elt
84 | Wide_lane
85 | Wide_scalar
86 | Pair_result of shape_elt
87
88 type arity = Arity0 of vectype
89 | Arity1 of vectype * vectype
90 | Arity2 of vectype * vectype * vectype
91 | Arity3 of vectype * vectype * vectype * vectype
92 | Arity4 of vectype * vectype * vectype * vectype * vectype
93
94 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
95 | V16QI | V8HI | V4SI | V4SF | V2DI
96 | QI | HI | SI | SF
97
98 type opcode =
99 (* Binary ops. *)
100 Vadd
101 | Vmul
102 | Vmla
103 | Vmls
104 | Vsub
105 | Vceq
106 | Vcge
107 | Vcgt
108 | Vcle
109 | Vclt
110 | Vcage
111 | Vcagt
112 | Vcale
113 | Vcalt
114 | Vtst
115 | Vabd
116 | Vaba
117 | Vmax
118 | Vmin
119 | Vpadd
120 | Vpada
121 | Vpmax
122 | Vpmin
123 | Vrecps
124 | Vrsqrts
125 | Vshl
126 | Vshr_n
127 | Vshl_n
128 | Vsra_n
129 | Vsri
130 | Vsli
131 (* Logic binops. *)
132 | Vand
133 | Vorr
134 | Veor
135 | Vbic
136 | Vorn
137 | Vbsl
138 (* Ops with scalar. *)
139 | Vmul_lane
140 | Vmla_lane
141 | Vmls_lane
142 | Vmul_n
143 | Vmla_n
144 | Vmls_n
145 | Vmull_n
146 | Vmull_lane
147 | Vqdmull_n
148 | Vqdmull_lane
149 | Vqdmulh_n
150 | Vqdmulh_lane
151 (* Unary ops. *)
152 | Vabs
153 | Vneg
154 | Vcls
155 | Vclz
156 | Vcnt
157 | Vrecpe
158 | Vrsqrte
159 | Vmvn
160 (* Vector extract. *)
161 | Vext
162 (* Reverse elements. *)
163 | Vrev64
164 | Vrev32
165 | Vrev16
166 (* Transposition ops. *)
167 | Vtrn
168 | Vzip
169 | Vuzp
170 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
171 | Vldx of int
172 | Vstx of int
173 | Vldx_lane of int
174 | Vldx_dup of int
175 | Vstx_lane of int
176 (* Set/extract lanes from a vector. *)
177 | Vget_lane
178 | Vset_lane
179 (* Initialize vector from bit pattern. *)
180 | Vcreate
181 (* Set all lanes to same value. *)
182 | Vdup_n
183 | Vmov_n (* Is this the same? *)
184 (* Duplicate scalar to all lanes of vector. *)
185 | Vdup_lane
186 (* Combine vectors. *)
187 | Vcombine
188 (* Get quadword high/low parts. *)
189 | Vget_high
190 | Vget_low
191 (* Convert vectors. *)
192 | Vcvt
193 | Vcvt_n
194 (* Narrow/lengthen vectors. *)
195 | Vmovn
196 | Vmovl
197 (* Table lookup. *)
198 | Vtbl of int
199 | Vtbx of int
200 (* Reinterpret casts. *)
201 | Vreinterp
202
203 (* Features used for documentation, to distinguish between some instruction
204 variants, and to signal special requirements (e.g. swapping arguments). *)
205
206 type features =
207 Halving
208 | Rounding
209 | Saturating
210 | Dst_unsign
211 | High_half
212 | Doubling
213 | Flipped of string (* Builtin name to use with flipped arguments. *)
214 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
215 for All _, Long, Wide, Narrow shape_forms. *)
216 | ReturnPtr (* Pass explicit pointer to return value as first argument. *)
217 (* A specification as to the shape of instruction expected upon
218 disassembly, used if it differs from the shape used to build the
219 intrinsic prototype. Multiple entries in the constructor's argument
220 indicate that the intrinsic expands to more than one assembly
221 instruction, each with a corresponding shape specified here. *)
222 | Disassembles_as of shape_form list
223 | Builtin_name of string (* Override the name of the builtin. *)
224 (* Override the name of the instruction. If more than one name
225 is specified, it means that the instruction can have any of those
226 names. *)
227 | Instruction_name of string list
228 (* Mark that the intrinsic yields no instructions, or expands to yield
229 behavior that the test generator cannot test. *)
230 | No_op
231 (* Mark that the intrinsic has constant arguments that cannot be set
232 to the defaults (zero for pointers and one otherwise) in the test
233 cases. The function supplied must return the integer to be written
234 into the testcase for the argument number (0-based) supplied to it. *)
235 | Const_valuator of (int -> int)
236
237 exception MixedMode of elts * elts
238
239 let rec elt_width = function
240 S8 | U8 | P8 | I8 | B8 -> 8
241 | S16 | U16 | P16 | I16 | B16 -> 16
242 | S32 | F32 | U32 | I32 | B32 -> 32
243 | S64 | U64 | I64 | B64 -> 64
244 | Conv (a, b) ->
245 let wa = elt_width a and wb = elt_width b in
246 if wa = wb then wa else failwith "element width?"
247 | Cast (a, b) -> raise (MixedMode (a, b))
248 | NoElts -> failwith "No elts"
249
250 let rec elt_class = function
251 S8 | S16 | S32 | S64 -> Signed
252 | U8 | U16 | U32 | U64 -> Unsigned
253 | P8 | P16 -> Poly
254 | F32 -> Float
255 | I8 | I16 | I32 | I64 -> Int
256 | B8 | B16 | B32 | B64 -> Bits
257 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
258 | NoElts -> NoType
259
260 let elt_of_class_width c w =
261 match c, w with
262 Signed, 8 -> S8
263 | Signed, 16 -> S16
264 | Signed, 32 -> S32
265 | Signed, 64 -> S64
266 | Float, 32 -> F32
267 | Unsigned, 8 -> U8
268 | Unsigned, 16 -> U16
269 | Unsigned, 32 -> U32
270 | Unsigned, 64 -> U64
271 | Poly, 8 -> P8
272 | Poly, 16 -> P16
273 | Int, 8 -> I8
274 | Int, 16 -> I16
275 | Int, 32 -> I32
276 | Int, 64 -> I64
277 | Bits, 8 -> B8
278 | Bits, 16 -> B16
279 | Bits, 32 -> B32
280 | Bits, 64 -> B64
281 | _ -> failwith "Bad element type"
282
283 (* Return unsigned integer element the same width as argument. *)
284 let unsigned_of_elt elt =
285 elt_of_class_width Unsigned (elt_width elt)
286
287 let signed_of_elt elt =
288 elt_of_class_width Signed (elt_width elt)
289
290 (* Return untyped bits element the same width as argument. *)
291 let bits_of_elt elt =
292 elt_of_class_width Bits (elt_width elt)
293
294 let non_signed_variant = function
295 S8 -> I8
296 | S16 -> I16
297 | S32 -> I32
298 | S64 -> I64
299 | U8 -> I8
300 | U16 -> I16
301 | U32 -> I32
302 | U64 -> I64
303 | x -> x
304
305 let poly_unsigned_variant v =
306 let elclass = match elt_class v with
307 Poly -> Unsigned
308 | x -> x in
309 elt_of_class_width elclass (elt_width v)
310
311 let widen_elt elt =
312 let w = elt_width elt
313 and c = elt_class elt in
314 elt_of_class_width c (w * 2)
315
316 let narrow_elt elt =
317 let w = elt_width elt
318 and c = elt_class elt in
319 elt_of_class_width c (w / 2)
320
321 (* If we're trying to find a mode from a "Use_operands" instruction, use the
322 last vector operand as the dominant mode used to invoke the correct builtin.
323 We must stick to this rule in neon.md. *)
324 let find_key_operand operands =
325 let rec scan opno =
326 match operands.(opno) with
327 Qreg -> Qreg
328 | Dreg -> Dreg
329 | VecArray (_, Qreg) -> Qreg
330 | VecArray (_, Dreg) -> Dreg
331 | _ -> scan (opno-1)
332 in
333 scan ((Array.length operands) - 1)
334
335 let rec mode_of_elt elt shape =
336 let flt = match elt_class elt with
337 Float | ConvClass(_, Float) -> true | _ -> false in
338 let idx =
339 match elt_width elt with
340 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
341 | _ -> failwith "Bad element width"
342 in match shape with
343 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
344 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
345 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
346 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
347 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
348 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
349 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
350 [| QI; HI; if flt then SF else SI; DI |].(idx)
351 | Long | Wide | Wide_lane | Wide_scalar
352 | Long_imm ->
353 [| V8QI; V4HI; V2SI; DI |].(idx)
354 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
355 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
356 | _ -> failwith "invalid shape"
357
358 (* Modify an element type dependent on the shape of the instruction and the
359 operand number. *)
360
361 let shapemap shape no =
362 let ident = fun x -> x in
363 match shape with
364 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
365 | Binary_imm _ -> ident
366 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
367 [| widen_elt; ident; ident |].(no)
368 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
369 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
370 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
371
372 (* Register type (D/Q) of an operand, based on shape and operand number. *)
373
374 let regmap shape no =
375 match shape with
376 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
377 | Long -> [| Qreg; Dreg; Dreg |].(no)
378 | Wide -> [| Qreg; Qreg; Dreg |].(no)
379 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
380 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
381 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
382 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
383 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
384 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
385 | Binary_imm reg -> [| reg; reg; Immed |].(no)
386 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
387 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
388 | Use_operands these -> these.(no)
389
390 let type_for_elt shape elt no =
391 let elt = (shapemap shape no) elt in
392 let reg = regmap shape no in
393 let rec type_for_reg_elt reg elt =
394 match reg with
395 Dreg ->
396 begin match elt with
397 S8 -> T_int8x8
398 | S16 -> T_int16x4
399 | S32 -> T_int32x2
400 | S64 -> T_int64x1
401 | U8 -> T_uint8x8
402 | U16 -> T_uint16x4
403 | U32 -> T_uint32x2
404 | U64 -> T_uint64x1
405 | F32 -> T_float32x2
406 | P8 -> T_poly8x8
407 | P16 -> T_poly16x4
408 | _ -> failwith "Bad elt type"
409 end
410 | Qreg ->
411 begin match elt with
412 S8 -> T_int8x16
413 | S16 -> T_int16x8
414 | S32 -> T_int32x4
415 | S64 -> T_int64x2
416 | U8 -> T_uint8x16
417 | U16 -> T_uint16x8
418 | U32 -> T_uint32x4
419 | U64 -> T_uint64x2
420 | F32 -> T_float32x4
421 | P8 -> T_poly8x16
422 | P16 -> T_poly16x8
423 | _ -> failwith "Bad elt type"
424 end
425 | Corereg ->
426 begin match elt with
427 S8 -> T_int8
428 | S16 -> T_int16
429 | S32 -> T_int32
430 | S64 -> T_int64
431 | U8 -> T_uint8
432 | U16 -> T_uint16
433 | U32 -> T_uint32
434 | U64 -> T_uint64
435 | P8 -> T_poly8
436 | P16 -> T_poly16
437 | F32 -> T_float32
438 | _ -> failwith "Bad elt type"
439 end
440 | Immed ->
441 T_immediate (0, 0)
442 | VecArray (num, sub) ->
443 T_arrayof (num, type_for_reg_elt sub elt)
444 | PtrTo x ->
445 T_ptrto (type_for_reg_elt x elt)
446 | CstPtrTo x ->
447 T_ptrto (T_const (type_for_reg_elt x elt))
448 (* Anything else is solely for the use of the test generator. *)
449 | _ -> assert false
450 in
451 type_for_reg_elt reg elt
452
453 (* Return size of a vector type, in bits. *)
454 let vectype_size = function
455 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
456 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
457 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
458 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
459 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
460 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
461 | _ -> raise Not_found
462
463 let inttype_for_array num elttype =
464 let eltsize = vectype_size elttype in
465 let numwords = (num * eltsize) / 32 in
466 match numwords with
467 4 -> B_TImode
468 | 6 -> B_EImode
469 | 8 -> B_OImode
470 | 12 -> B_CImode
471 | 16 -> B_XImode
472 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
473
474 (* These functions return pairs of (internal, external) types, where "internal"
475 types are those seen by GCC, and "external" are those seen by the assembler.
476 These types aren't necessarily the same, since the intrinsics can munge more
477 than one C type into each assembler opcode. *)
478
479 let make_sign_invariant func shape elt =
480 let arity, elt' = func shape elt in
481 arity, non_signed_variant elt'
482
483 (* Don't restrict any types. *)
484
485 let elts_same make_arity shape elt =
486 let vtype = type_for_elt shape elt in
487 make_arity vtype, elt
488
489 (* As sign_invar_*, but when sign matters. *)
490 let elts_same_io_lane =
491 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
492
493 let elts_same_io =
494 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
495
496 let elts_same_2_lane =
497 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
498
499 let elts_same_3 = elts_same_2_lane
500
501 let elts_same_2 =
502 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
503
504 let elts_same_1 =
505 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
506
507 (* Use for signed/unsigned invariant operations (i.e. where the operation
508 doesn't depend on the sign of the data. *)
509
510 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
511 let sign_invar_io = make_sign_invariant elts_same_io
512 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
513 let sign_invar_2 = make_sign_invariant elts_same_2
514 let sign_invar_1 = make_sign_invariant elts_same_1
515
516 (* Sign-sensitive comparison. *)
517
518 let cmp_sign_matters shape elt =
519 let vtype = type_for_elt shape elt
520 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
521 Arity2 (rtype, vtype 1, vtype 2), elt
522
523 (* Signed/unsigned invariant comparison. *)
524
525 let cmp_sign_invar shape elt =
526 let shape', elt' = cmp_sign_matters shape elt in
527 let elt'' =
528 match non_signed_variant elt' with
529 P8 -> I8
530 | x -> x
531 in
532 shape', elt''
533
534 (* Comparison (VTST) where only the element width matters. *)
535
536 let cmp_bits shape elt =
537 let vtype = type_for_elt shape elt
538 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
539 and bits_only = bits_of_elt elt in
540 Arity2 (rtype, vtype 1, vtype 2), bits_only
541
542 let reg_shift shape elt =
543 let vtype = type_for_elt shape elt
544 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
545 Arity2 (vtype 0, vtype 1, op2type), elt
546
547 (* Genericised constant-shift type-generating function. *)
548
549 let const_shift mkimm ?arity ?result shape elt =
550 let op2type = (shapemap shape 2) elt in
551 let op2width = elt_width op2type in
552 let op2 = mkimm op2width
553 and op1 = type_for_elt shape elt 1
554 and r_elt =
555 match result with
556 None -> elt
557 | Some restriction -> restriction elt in
558 let rtype = type_for_elt shape r_elt 0 in
559 match arity with
560 None -> Arity2 (rtype, op1, op2), elt
561 | Some mkarity -> mkarity rtype op1 op2, elt
562
563 (* Use for immediate right-shifts. *)
564
565 let shift_right shape elt =
566 const_shift (fun imm -> T_immediate (1, imm)) shape elt
567
568 let shift_right_acc shape elt =
569 const_shift (fun imm -> T_immediate (1, imm))
570 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
571
572 (* Use for immediate right-shifts when the operation doesn't care about
573 signedness. *)
574
575 let shift_right_sign_invar =
576 make_sign_invariant shift_right
577
578 (* Immediate right-shift; result is unsigned even when operand is signed. *)
579
580 let shift_right_to_uns shape elt =
581 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
582 shape elt
583
584 (* Immediate left-shift. *)
585
586 let shift_left shape elt =
587 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
588
589 (* Immediate left-shift, unsigned result. *)
590
591 let shift_left_to_uns shape elt =
592 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
593 shape elt
594
595 (* Immediate left-shift, don't care about signs. *)
596
597 let shift_left_sign_invar =
598 make_sign_invariant shift_left
599
600 (* Shift left/right and insert: only element size matters. *)
601
602 let shift_insert shape elt =
603 let arity, elt =
604 const_shift (fun imm -> T_immediate (1, imm))
605 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
606 arity, bits_of_elt elt
607
608 (* Get/set lane. *)
609
610 let get_lane shape elt =
611 let vtype = type_for_elt shape elt in
612 Arity2 (vtype 0, vtype 1, vtype 2),
613 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
614
615 let set_lane shape elt =
616 let vtype = type_for_elt shape elt in
617 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
618
619 let set_lane_notype shape elt =
620 let vtype = type_for_elt shape elt in
621 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
622
623 let create_vector shape elt =
624 let vtype = type_for_elt shape U64 1
625 and rtype = type_for_elt shape elt 0 in
626 Arity1 (rtype, vtype), elt
627
628 let conv make_arity shape elt =
629 let edest, esrc = match elt with
630 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
631 | _ -> failwith "Non-conversion element in conversion" in
632 let vtype = type_for_elt shape esrc
633 and rtype = type_for_elt shape edest 0 in
634 make_arity rtype vtype, elt
635
636 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
637 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
638
639 (* Operation has an unsigned result even if operands are signed. *)
640
641 let dst_unsign make_arity shape elt =
642 let vtype = type_for_elt shape elt
643 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
644 make_arity rtype vtype, elt
645
646 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
647
648 let make_bits_only func shape elt =
649 let arity, elt' = func shape elt in
650 arity, bits_of_elt elt'
651
652 (* Extend operation. *)
653
654 let extend shape elt =
655 let vtype = type_for_elt shape elt in
656 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
657
658 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
659 integer ops respectively, or unsigned for polynomial ops. *)
660
661 let table mkarity shape elt =
662 let vtype = type_for_elt shape elt in
663 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
664 mkarity vtype op2, bits_of_elt elt
665
666 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
667 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
668
669 (* Operations where only bits matter. *)
670
671 let bits_1 = make_bits_only elts_same_1
672 let bits_2 = make_bits_only elts_same_2
673 let bits_3 = make_bits_only elts_same_3
674
675 (* Store insns. *)
676 let store_1 shape elt =
677 let vtype = type_for_elt shape elt in
678 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
679
680 let store_3 shape elt =
681 let vtype = type_for_elt shape elt in
682 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
683
684 let make_notype func shape elt =
685 let arity, _ = func shape elt in
686 arity, NoElts
687
688 let notype_1 = make_notype elts_same_1
689 let notype_2 = make_notype elts_same_2
690 let notype_3 = make_notype elts_same_3
691
692 (* Bit-select operations (first operand is unsigned int). *)
693
694 let bit_select shape elt =
695 let vtype = type_for_elt shape elt
696 and itype = type_for_elt shape (unsigned_of_elt elt) in
697 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
698
699 (* Common lists of supported element types. *)
700
701 let su_8_32 = [S8; S16; S32; U8; U16; U32]
702 let su_8_64 = S64 :: U64 :: su_8_32
703 let su_16_64 = [S16; S32; S64; U16; U32; U64]
704 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
705 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
706
707 let ops =
708 [
709 (* Addition. *)
710 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64;
711 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
712 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
713 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
714 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
715 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
716 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
717 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
718 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
719 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
720 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
721 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
722 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
723 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
724 Narrow, "vRaddhn", sign_invar_2, su_16_64;
725
726 (* Multiplication. *)
727 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
728 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
729 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
730 elts_same_2, [S16; S32];
731 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
732 elts_same_2, [S16; S32];
733 Vmul,
734 [Saturating; Rounding; Doubling; High_half;
735 Instruction_name ["vqrdmulh"]],
736 All (3, Dreg), "vqRdmulh",
737 elts_same_2, [S16; S32];
738 Vmul,
739 [Saturating; Rounding; Doubling; High_half;
740 Instruction_name ["vqrdmulh"]],
741 All (3, Qreg), "vqRdmulhQ",
742 elts_same_2, [S16; S32];
743 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
744 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
745
746 (* Multiply-accumulate. *)
747 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
748 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
749 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
750 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
751
752 (* Multiply-subtract. *)
753 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
754 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
755 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
756 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
757
758 (* Subtraction. *)
759 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64;
760 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
761 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
762 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
763 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
764 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
765 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
766 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
767 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
768 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
769 Narrow, "vRsubhn", sign_invar_2, su_16_64;
770
771 (* Comparison, equal. *)
772 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
773 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
774
775 (* Comparison, greater-than or equal. *)
776 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
777 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
778
779 (* Comparison, less-than or equal. *)
780 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
781 F32 :: su_8_32;
782 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
783 All (3, Qreg), "vcleQ", cmp_sign_matters,
784 F32 :: su_8_32;
785
786 (* Comparison, greater-than. *)
787 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
788 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
789
790 (* Comparison, less-than. *)
791 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
792 F32 :: su_8_32;
793 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
794 All (3, Qreg), "vcltQ", cmp_sign_matters,
795 F32 :: su_8_32;
796
797 (* Compare absolute greater-than or equal. *)
798 Vcage, [Instruction_name ["vacge"]],
799 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
800 Vcage, [Instruction_name ["vacge"]],
801 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
802
803 (* Compare absolute less-than or equal. *)
804 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
805 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
806 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
807 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
808
809 (* Compare absolute greater-than or equal. *)
810 Vcagt, [Instruction_name ["vacgt"]],
811 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
812 Vcagt, [Instruction_name ["vacgt"]],
813 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
814
815 (* Compare absolute less-than or equal. *)
816 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
817 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
818 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
819 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
820
821 (* Test bits. *)
822 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
823 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
824
825 (* Absolute difference. *)
826 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
827 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
828 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
829
830 (* Absolute difference and accumulate. *)
831 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
832 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
833 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
834
835 (* Max. *)
836 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
837 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
838
839 (* Min. *)
840 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
841 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
842
843 (* Pairwise add. *)
844 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
845 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
846 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
847
848 (* Pairwise add, widen and accumulate. *)
849 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
850 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
851
852 (* Folding maximum, minimum. *)
853 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
854 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
855
856 (* Reciprocal step. *)
857 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
858 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
859 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
860 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
861
862 (* Vector shift left. *)
863 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
864 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
865 Vshl, [Instruction_name ["vrshl"]; Rounding],
866 All (3, Dreg), "vRshl", reg_shift, su_8_64;
867 Vshl, [Instruction_name ["vrshl"]; Rounding],
868 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
869 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
870 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
871 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
872 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
873 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
874 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
875
876 (* Vector shift right by constant. *)
877 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
878 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
879 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
880 "vRshr_n", shift_right, su_8_64;
881 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
882 "vRshrQ_n", shift_right, su_8_64;
883 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
884 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
885 shift_right_sign_invar, su_16_64;
886 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
887 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
888 "vqRshrn_n", shift_right, su_16_64;
889 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
890 shift_right_to_uns, [S16; S32; S64];
891 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
892 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
893
894 (* Vector shift left by constant. *)
895 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
896 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
897 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
898 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
899 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
900 shift_left_to_uns, [S8; S16; S32; S64];
901 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
902 shift_left_to_uns, [S8; S16; S32; S64];
903 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
904
905 (* Vector shift right by constant and accumulate. *)
906 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
907 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
908 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
909 "vRsra_n", shift_right_acc, su_8_64;
910 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
911 "vRsraQ_n", shift_right_acc, su_8_64;
912
913 (* Vector shift right and insert. *)
914 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
915 P8 :: P16 :: su_8_64;
916 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
917 P8 :: P16 :: su_8_64;
918
919 (* Vector shift left and insert. *)
920 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
921 P8 :: P16 :: su_8_64;
922 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
923 P8 :: P16 :: su_8_64;
924
925 (* Absolute value. *)
926 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
927 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
928 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
929 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
930
931 (* Negate. *)
932 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
933 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
934 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
935 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
936
937 (* Bitwise not. *)
938 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
939 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
940
941 (* Count leading sign bits. *)
942 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
943 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
944
945 (* Count leading zeros. *)
946 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
947 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
948
949 (* Count number of set bits. *)
950 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
951 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
952
953 (* Reciprocal estimate. *)
954 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
955 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
956
957 (* Reciprocal square-root estimate. *)
958 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
959 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
960
961 (* Get lanes from a vector. *)
962 Vget_lane,
963 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
964 Instruction_name ["vmov"]],
965 Use_operands [| Corereg; Dreg; Immed |],
966 "vget_lane", get_lane, pf_su_8_32;
967 Vget_lane,
968 [InfoWord;
969 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
970 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
971 Use_operands [| Corereg; Dreg; Immed |],
972 "vget_lane", notype_2, [S64; U64];
973 Vget_lane,
974 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
975 Instruction_name ["vmov"]],
976 Use_operands [| Corereg; Qreg; Immed |],
977 "vgetQ_lane", get_lane, pf_su_8_32;
978 Vget_lane,
979 [InfoWord;
980 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
981 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
982 Use_operands [| Corereg; Qreg; Immed |],
983 "vgetQ_lane", notype_2, [S64; U64];
984
985 (* Set lanes in a vector. *)
986 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
987 Instruction_name ["vmov"]],
988 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
989 set_lane, pf_su_8_32;
990 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
991 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
992 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
993 set_lane_notype, [S64; U64];
994 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
995 Instruction_name ["vmov"]],
996 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
997 set_lane, pf_su_8_32;
998 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
999 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
1000 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
1001 set_lane_notype, [S64; U64];
1002
1003 (* Create vector from literal bit pattern. *)
1004 Vcreate,
1005 [No_op], (* Not really, but it can yield various things that are too
1006 hard for the test generator at this time. *)
1007 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
1008 pf_su_8_64;
1009
1010 (* Set all lanes to the same value. *)
1011 Vdup_n, [],
1012 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
1013 pf_su_8_32;
1014 Vdup_n,
1015 [Instruction_name ["vmov"];
1016 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1017 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
1018 [S64; U64];
1019 Vdup_n, [],
1020 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
1021 pf_su_8_32;
1022 Vdup_n,
1023 [Instruction_name ["vmov"];
1024 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1025 Use_operands [| Dreg; Corereg; Corereg |]]],
1026 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
1027 [S64; U64];
1028
1029 (* These are just aliases for the above. *)
1030 Vmov_n,
1031 [Builtin_name "vdup_n"],
1032 Use_operands [| Dreg; Corereg |],
1033 "vmov_n", bits_1, pf_su_8_32;
1034 Vmov_n,
1035 [Builtin_name "vdup_n";
1036 Instruction_name ["vmov"];
1037 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
1038 Use_operands [| Dreg; Corereg |],
1039 "vmov_n", notype_1, [S64; U64];
1040 Vmov_n,
1041 [Builtin_name "vdupQ_n"],
1042 Use_operands [| Qreg; Corereg |],
1043 "vmovQ_n", bits_1, pf_su_8_32;
1044 Vmov_n,
1045 [Builtin_name "vdupQ_n";
1046 Instruction_name ["vmov"];
1047 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
1048 Use_operands [| Dreg; Corereg; Corereg |]]],
1049 Use_operands [| Qreg; Corereg |],
1050 "vmovQ_n", notype_1, [S64; U64];
1051
1052 (* Duplicate, lane version. We can't use Use_operands here because the
1053 rightmost register (always Dreg) would be picked up by find_key_operand,
1054 when we want the leftmost register to be used in this case (otherwise
1055 the modes are indistinguishable in neon.md, etc. *)
1056 Vdup_lane,
1057 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
1058 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
1059 Vdup_lane,
1060 [No_op; Const_valuator (fun _ -> 0)],
1061 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
1062 Vdup_lane,
1063 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
1064 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
1065 Vdup_lane,
1066 [No_op; Const_valuator (fun _ -> 0)],
1067 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
1068
1069 (* Combining vectors. *)
1070 Vcombine, [No_op],
1071 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
1072 pf_su_8_64;
1073
1074 (* Splitting vectors. *)
1075 Vget_high, [No_op],
1076 Use_operands [| Dreg; Qreg |], "vget_high",
1077 notype_1, pf_su_8_64;
1078 Vget_low, [Instruction_name ["vmov"];
1079 Disassembles_as [Use_operands [| Dreg; Dreg |]]],
1080 Use_operands [| Dreg; Qreg |], "vget_low",
1081 notype_1, pf_su_8_64;
1082
1083 (* Conversions. *)
1084 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
1085 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1086 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
1087 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1088 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
1089 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1090 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
1091 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
1092
1093 (* Move, narrowing. *)
1094 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
1095 Narrow, "vmovn", sign_invar_1, su_16_64;
1096 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
1097 Narrow, "vqmovn", elts_same_1, su_16_64;
1098 Vmovn,
1099 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
1100 Narrow, "vqmovun", dst_unsign_1,
1101 [S16; S32; S64];
1102
1103 (* Move, long. *)
1104 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
1105 Long, "vmovl", elts_same_1, su_8_32;
1106
1107 (* Table lookup. *)
1108 Vtbl 1,
1109 [Instruction_name ["vtbl"];
1110 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1111 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
1112 Vtbl 2, [Instruction_name ["vtbl"]],
1113 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
1114 [U8; S8; P8];
1115 Vtbl 3, [Instruction_name ["vtbl"]],
1116 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
1117 [U8; S8; P8];
1118 Vtbl 4, [Instruction_name ["vtbl"]],
1119 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
1120 [U8; S8; P8];
1121
1122 (* Extended table lookup. *)
1123 Vtbx 1,
1124 [Instruction_name ["vtbx"];
1125 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
1126 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
1127 Vtbx 2, [Instruction_name ["vtbx"]],
1128 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
1129 [U8; S8; P8];
1130 Vtbx 3, [Instruction_name ["vtbx"]],
1131 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
1132 [U8; S8; P8];
1133 Vtbx 4, [Instruction_name ["vtbx"]],
1134 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
1135 [U8; S8; P8];
1136
1137 (* Multiply, lane. (note: these were undocumented at the time of
1138 writing). *)
1139 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
1140 [S16; S32; U16; U32; F32];
1141 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
1142 [S16; S32; U16; U32; F32];
1143
1144 (* Multiply-accumulate, lane. *)
1145 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
1146 [S16; S32; U16; U32; F32];
1147 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
1148 [S16; S32; U16; U32; F32];
1149 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
1150 [S16; S32; U16; U32];
1151 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
1152 elts_same_io_lane, [S16; S32];
1153
1154 (* Multiply-subtract, lane. *)
1155 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
1156 [S16; S32; U16; U32; F32];
1157 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
1158 [S16; S32; U16; U32; F32];
1159 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
1160 [S16; S32; U16; U32];
1161 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
1162 elts_same_io_lane, [S16; S32];
1163
1164 (* Long multiply, lane. *)
1165 Vmull_lane, [],
1166 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
1167
1168 (* Saturating doubling long multiply, lane. *)
1169 Vqdmull_lane, [Saturating; Doubling],
1170 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
1171
1172 (* Saturating doubling long multiply high, lane. *)
1173 Vqdmulh_lane, [Saturating; Halving],
1174 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
1175 Vqdmulh_lane, [Saturating; Halving],
1176 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
1177 Vqdmulh_lane, [Saturating; Halving; Rounding;
1178 Instruction_name ["vqrdmulh"]],
1179 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
1180 Vqdmulh_lane, [Saturating; Halving; Rounding;
1181 Instruction_name ["vqrdmulh"]],
1182 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
1183
1184 (* Vector multiply by scalar. *)
1185 Vmul_n, [InfoWord;
1186 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1187 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
1188 sign_invar_2, [S16; S32; U16; U32; F32];
1189 Vmul_n, [InfoWord;
1190 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1191 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
1192 sign_invar_2, [S16; S32; U16; U32; F32];
1193
1194 (* Vector long multiply by scalar. *)
1195 Vmull_n, [Instruction_name ["vmull"];
1196 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
1197 Wide_scalar, "vmull_n",
1198 elts_same_2, [S16; S32; U16; U32];
1199
1200 (* Vector saturating doubling long multiply by scalar. *)
1201 Vqdmull_n, [Saturating; Doubling;
1202 Disassembles_as [Use_operands [| Qreg; Dreg;
1203 Element_of_dreg |]]],
1204 Wide_scalar, "vqdmull_n",
1205 elts_same_2, [S16; S32];
1206
1207 (* Vector saturating doubling long multiply high by scalar. *)
1208 Vqdmulh_n,
1209 [Saturating; Halving; InfoWord;
1210 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1211 Use_operands [| Qreg; Qreg; Corereg |],
1212 "vqdmulhQ_n", elts_same_2, [S16; S32];
1213 Vqdmulh_n,
1214 [Saturating; Halving; InfoWord;
1215 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1216 Use_operands [| Dreg; Dreg; Corereg |],
1217 "vqdmulh_n", elts_same_2, [S16; S32];
1218 Vqdmulh_n,
1219 [Saturating; Halving; Rounding; InfoWord;
1220 Instruction_name ["vqrdmulh"];
1221 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1222 Use_operands [| Qreg; Qreg; Corereg |],
1223 "vqRdmulhQ_n", elts_same_2, [S16; S32];
1224 Vqdmulh_n,
1225 [Saturating; Halving; Rounding; InfoWord;
1226 Instruction_name ["vqrdmulh"];
1227 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1228 Use_operands [| Dreg; Dreg; Corereg |],
1229 "vqRdmulh_n", elts_same_2, [S16; S32];
1230
1231 (* Vector multiply-accumulate by scalar. *)
1232 Vmla_n, [InfoWord;
1233 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1234 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
1235 sign_invar_io, [S16; S32; U16; U32; F32];
1236 Vmla_n, [InfoWord;
1237 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1238 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
1239 sign_invar_io, [S16; S32; U16; U32; F32];
1240 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
1241 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
1242 [S16; S32];
1243
1244 (* Vector multiply subtract by scalar. *)
1245 Vmls_n, [InfoWord;
1246 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
1247 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
1248 sign_invar_io, [S16; S32; U16; U32; F32];
1249 Vmls_n, [InfoWord;
1250 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
1251 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
1252 sign_invar_io, [S16; S32; U16; U32; F32];
1253 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
1254 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
1255 [S16; S32];
1256
1257 (* Vector extract. *)
1258 Vext, [Const_valuator (fun _ -> 0)],
1259 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
1260 pf_su_8_64;
1261 Vext, [Const_valuator (fun _ -> 0)],
1262 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
1263 pf_su_8_64;
1264
1265 (* Reverse elements. *)
1266 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
1267 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1268 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
1269 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
1270 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
1271 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
1272
1273 (* Bit selection. *)
1274 Vbsl,
1275 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1276 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
1277 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
1278 pf_su_8_64;
1279 Vbsl,
1280 [Instruction_name ["vbsl"; "vbit"; "vbif"];
1281 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
1282 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
1283 pf_su_8_64;
1284
1285 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
1286 generating good code for intrinsics which return structure types --
1287 builtins work well by themselves (and understand that the values being
1288 stored on e.g. the stack also reside in registers, so can optimise the
1289 stores away entirely if the results are used immediately), but
1290 intrinsics are very much less efficient. Maybe something can be improved
1291 re: inlining, or tweaking the ABI used for intrinsics (a special call
1292 attribute?).
1293 *)
1294 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
1295 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
1296
1297 (* Zip elements. *)
1298 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
1299 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
1300
1301 (* Unzip elements. *)
1302 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
1303 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
1304
1305 (* Element/structure loads. VLD1 variants. *)
1306 Vldx 1,
1307 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1308 CstPtrTo Corereg |]]],
1309 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
1310 pf_su_8_64;
1311 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1312 CstPtrTo Corereg |]]],
1313 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
1314 pf_su_8_64;
1315
1316 Vldx_lane 1,
1317 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1318 CstPtrTo Corereg |]]],
1319 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1320 "vld1_lane", bits_3, pf_su_8_32;
1321 Vldx_lane 1,
1322 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1323 CstPtrTo Corereg |]];
1324 Const_valuator (fun _ -> 0)],
1325 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
1326 "vld1_lane", bits_3, [S64; U64];
1327 Vldx_lane 1,
1328 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1329 CstPtrTo Corereg |]]],
1330 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1331 "vld1Q_lane", bits_3, pf_su_8_32;
1332 Vldx_lane 1,
1333 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1334 CstPtrTo Corereg |]]],
1335 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
1336 "vld1Q_lane", bits_3, [S64; U64];
1337
1338 Vldx_dup 1,
1339 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
1340 CstPtrTo Corereg |]]],
1341 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1342 bits_1, pf_su_8_32;
1343 Vldx_dup 1,
1344 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1345 CstPtrTo Corereg |]]],
1346 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
1347 bits_1, [S64; U64];
1348 Vldx_dup 1,
1349 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
1350 CstPtrTo Corereg |]]],
1351 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1352 bits_1, pf_su_8_32;
1353 Vldx_dup 1,
1354 [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1355 CstPtrTo Corereg |]]],
1356 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
1357 bits_1, [S64; U64];
1358
1359 (* VST1 variants. *)
1360 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1361 PtrTo Corereg |]]],
1362 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
1363 store_1, pf_su_8_64;
1364 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1365 PtrTo Corereg |]]],
1366 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
1367 store_1, pf_su_8_64;
1368
1369 Vstx_lane 1,
1370 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1371 CstPtrTo Corereg |]]],
1372 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1373 "vst1_lane", store_3, pf_su_8_32;
1374 Vstx_lane 1,
1375 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1376 CstPtrTo Corereg |]];
1377 Const_valuator (fun _ -> 0)],
1378 Use_operands [| PtrTo Corereg; Dreg; Immed |],
1379 "vst1_lane", store_3, [U64; S64];
1380 Vstx_lane 1,
1381 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
1382 CstPtrTo Corereg |]]],
1383 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1384 "vst1Q_lane", store_3, pf_su_8_32;
1385 Vstx_lane 1,
1386 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
1387 CstPtrTo Corereg |]]],
1388 Use_operands [| PtrTo Corereg; Qreg; Immed |],
1389 "vst1Q_lane", store_3, [U64; S64];
1390
1391 (* VLD2 variants. *)
1392 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1393 "vld2", bits_1, pf_su_8_32;
1394 Vldx 2, [Instruction_name ["vld1"]],
1395 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1396 "vld2", bits_1, [S64; U64];
1397 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1398 CstPtrTo Corereg |];
1399 Use_operands [| VecArray (2, Dreg);
1400 CstPtrTo Corereg |]]],
1401 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
1402 "vld2Q", bits_1, pf_su_8_32;
1403
1404 Vldx_lane 2,
1405 [Disassembles_as [Use_operands
1406 [| VecArray (2, Element_of_dreg);
1407 CstPtrTo Corereg |]]],
1408 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
1409 VecArray (2, Dreg); Immed |],
1410 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1411 Vldx_lane 2,
1412 [Disassembles_as [Use_operands
1413 [| VecArray (2, Element_of_dreg);
1414 CstPtrTo Corereg |]]],
1415 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
1416 VecArray (2, Qreg); Immed |],
1417 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1418
1419 Vldx_dup 2,
1420 [Disassembles_as [Use_operands
1421 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
1422 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1423 "vld2_dup", bits_1, pf_su_8_32;
1424 Vldx_dup 2,
1425 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1426 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
1427 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
1428 "vld2_dup", bits_1, [S64; U64];
1429
1430 (* VST2 variants. *)
1431 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1432 PtrTo Corereg |]]],
1433 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1434 store_1, pf_su_8_32;
1435 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1436 PtrTo Corereg |]];
1437 Instruction_name ["vst1"]],
1438 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
1439 store_1, [S64; U64];
1440 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
1441 PtrTo Corereg |];
1442 Use_operands [| VecArray (2, Dreg);
1443 PtrTo Corereg |]]],
1444 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
1445 store_1, pf_su_8_32;
1446
1447 Vstx_lane 2,
1448 [Disassembles_as [Use_operands
1449 [| VecArray (2, Element_of_dreg);
1450 CstPtrTo Corereg |]]],
1451 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
1452 store_3, P8 :: P16 :: F32 :: su_8_32;
1453 Vstx_lane 2,
1454 [Disassembles_as [Use_operands
1455 [| VecArray (2, Element_of_dreg);
1456 CstPtrTo Corereg |]]],
1457 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
1458 store_3, [P16; F32; U16; U32; S16; S32];
1459
1460 (* VLD3 variants. *)
1461 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1462 "vld3", bits_1, pf_su_8_32;
1463 Vldx 3, [Instruction_name ["vld1"]],
1464 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1465 "vld3", bits_1, [S64; U64];
1466 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1467 CstPtrTo Corereg |];
1468 Use_operands [| VecArray (3, Dreg);
1469 CstPtrTo Corereg |]]],
1470 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
1471 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1472
1473 Vldx_lane 3,
1474 [Disassembles_as [Use_operands
1475 [| VecArray (3, Element_of_dreg);
1476 CstPtrTo Corereg |]]],
1477 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
1478 VecArray (3, Dreg); Immed |],
1479 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1480 Vldx_lane 3,
1481 [Disassembles_as [Use_operands
1482 [| VecArray (3, Element_of_dreg);
1483 CstPtrTo Corereg |]]],
1484 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
1485 VecArray (3, Qreg); Immed |],
1486 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1487
1488 Vldx_dup 3,
1489 [Disassembles_as [Use_operands
1490 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
1491 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1492 "vld3_dup", bits_1, pf_su_8_32;
1493 Vldx_dup 3,
1494 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1495 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
1496 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
1497 "vld3_dup", bits_1, [S64; U64];
1498
1499 (* VST3 variants. *)
1500 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1501 PtrTo Corereg |]]],
1502 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1503 store_1, pf_su_8_32;
1504 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1505 PtrTo Corereg |]];
1506 Instruction_name ["vst1"]],
1507 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
1508 store_1, [S64; U64];
1509 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
1510 PtrTo Corereg |];
1511 Use_operands [| VecArray (3, Dreg);
1512 PtrTo Corereg |]]],
1513 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
1514 store_1, pf_su_8_32;
1515
1516 Vstx_lane 3,
1517 [Disassembles_as [Use_operands
1518 [| VecArray (3, Element_of_dreg);
1519 CstPtrTo Corereg |]]],
1520 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
1521 store_3, P8 :: P16 :: F32 :: su_8_32;
1522 Vstx_lane 3,
1523 [Disassembles_as [Use_operands
1524 [| VecArray (3, Element_of_dreg);
1525 CstPtrTo Corereg |]]],
1526 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
1527 store_3, [P16; F32; U16; U32; S16; S32];
1528
1529 (* VLD4/VST4 variants. *)
1530 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1531 "vld4", bits_1, pf_su_8_32;
1532 Vldx 4, [Instruction_name ["vld1"]],
1533 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1534 "vld4", bits_1, [S64; U64];
1535 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1536 CstPtrTo Corereg |];
1537 Use_operands [| VecArray (4, Dreg);
1538 CstPtrTo Corereg |]]],
1539 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
1540 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
1541
1542 Vldx_lane 4,
1543 [Disassembles_as [Use_operands
1544 [| VecArray (4, Element_of_dreg);
1545 CstPtrTo Corereg |]]],
1546 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
1547 VecArray (4, Dreg); Immed |],
1548 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
1549 Vldx_lane 4,
1550 [Disassembles_as [Use_operands
1551 [| VecArray (4, Element_of_dreg);
1552 CstPtrTo Corereg |]]],
1553 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
1554 VecArray (4, Qreg); Immed |],
1555 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
1556
1557 Vldx_dup 4,
1558 [Disassembles_as [Use_operands
1559 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
1560 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1561 "vld4_dup", bits_1, pf_su_8_32;
1562 Vldx_dup 4,
1563 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
1564 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
1565 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
1566 "vld4_dup", bits_1, [S64; U64];
1567
1568 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1569 PtrTo Corereg |]]],
1570 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1571 store_1, pf_su_8_32;
1572 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1573 PtrTo Corereg |]];
1574 Instruction_name ["vst1"]],
1575 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
1576 store_1, [S64; U64];
1577 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
1578 PtrTo Corereg |];
1579 Use_operands [| VecArray (4, Dreg);
1580 PtrTo Corereg |]]],
1581 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
1582 store_1, pf_su_8_32;
1583
1584 Vstx_lane 4,
1585 [Disassembles_as [Use_operands
1586 [| VecArray (4, Element_of_dreg);
1587 CstPtrTo Corereg |]]],
1588 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
1589 store_3, P8 :: P16 :: F32 :: su_8_32;
1590 Vstx_lane 4,
1591 [Disassembles_as [Use_operands
1592 [| VecArray (4, Element_of_dreg);
1593 CstPtrTo Corereg |]]],
1594 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
1595 store_3, [P16; F32; U16; U32; S16; S32];
1596
1597 (* Logical operations. And. *)
1598 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64;
1599 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
1600
1601 (* Or. *)
1602 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64;
1603 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
1604
1605 (* Eor. *)
1606 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64;
1607 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
1608
1609 (* Bic (And-not). *)
1610 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64;
1611 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
1612
1613 (* Or-not. *)
1614 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64;
1615 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
1616 ]
1617
1618 let reinterp =
1619 let elems = P8 :: P16 :: F32 :: su_8_64 in
1620 List.fold_right
1621 (fun convto acc ->
1622 let types = List.fold_right
1623 (fun convfrom acc ->
1624 if convfrom <> convto then
1625 Cast (convto, convfrom) :: acc
1626 else
1627 acc)
1628 elems
1629 []
1630 in
1631 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
1632 "vreinterpret", conv_1, types
1633 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
1634 "vreinterpretQ", conv_1, types in
1635 dconv :: qconv :: acc)
1636 elems
1637 []
1638
1639 (* Output routines. *)
1640
1641 let rec string_of_elt = function
1642 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
1643 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
1644 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
1645 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
1646 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
1647 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
1648 | NoElts -> failwith "No elts"
1649
1650 let string_of_elt_dots elt =
1651 match elt with
1652 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
1653 | _ -> string_of_elt elt
1654
1655 let string_of_vectype vt =
1656 let rec name affix = function
1657 T_int8x8 -> affix "int8x8"
1658 | T_int8x16 -> affix "int8x16"
1659 | T_int16x4 -> affix "int16x4"
1660 | T_int16x8 -> affix "int16x8"
1661 | T_int32x2 -> affix "int32x2"
1662 | T_int32x4 -> affix "int32x4"
1663 | T_int64x1 -> affix "int64x1"
1664 | T_int64x2 -> affix "int64x2"
1665 | T_uint8x8 -> affix "uint8x8"
1666 | T_uint8x16 -> affix "uint8x16"
1667 | T_uint16x4 -> affix "uint16x4"
1668 | T_uint16x8 -> affix "uint16x8"
1669 | T_uint32x2 -> affix "uint32x2"
1670 | T_uint32x4 -> affix "uint32x4"
1671 | T_uint64x1 -> affix "uint64x1"
1672 | T_uint64x2 -> affix "uint64x2"
1673 | T_float32x2 -> affix "float32x2"
1674 | T_float32x4 -> affix "float32x4"
1675 | T_poly8x8 -> affix "poly8x8"
1676 | T_poly8x16 -> affix "poly8x16"
1677 | T_poly16x4 -> affix "poly16x4"
1678 | T_poly16x8 -> affix "poly16x8"
1679 | T_int8 -> affix "int8"
1680 | T_int16 -> affix "int16"
1681 | T_int32 -> affix "int32"
1682 | T_int64 -> affix "int64"
1683 | T_uint8 -> affix "uint8"
1684 | T_uint16 -> affix "uint16"
1685 | T_uint32 -> affix "uint32"
1686 | T_uint64 -> affix "uint64"
1687 | T_poly8 -> affix "poly8"
1688 | T_poly16 -> affix "poly16"
1689 | T_float32 -> affix "float32"
1690 | T_immediate _ -> "const int"
1691 | T_void -> "void"
1692 | T_intQI -> "__builtin_neon_qi"
1693 | T_intHI -> "__builtin_neon_hi"
1694 | T_intSI -> "__builtin_neon_si"
1695 | T_intDI -> "__builtin_neon_di"
1696 | T_arrayof (num, base) ->
1697 let basename = name (fun x -> x) base in
1698 affix (Printf.sprintf "%sx%d" basename num)
1699 | T_ptrto x ->
1700 let basename = name affix x in
1701 Printf.sprintf "%s *" basename
1702 | T_const x ->
1703 let basename = name affix x in
1704 Printf.sprintf "const %s" basename
1705 in
1706 name (fun x -> x ^ "_t") vt
1707
1708 let string_of_inttype = function
1709 B_TImode -> "__builtin_neon_ti"
1710 | B_EImode -> "__builtin_neon_ei"
1711 | B_OImode -> "__builtin_neon_oi"
1712 | B_CImode -> "__builtin_neon_ci"
1713 | B_XImode -> "__builtin_neon_xi"
1714
1715 let string_of_mode = function
1716 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
1717 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
1718 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
1719 | SF -> "sf"
1720
1721 (* Use uppercase chars for letters which form part of the intrinsic name, but
1722 should be omitted from the builtin name (the info is passed in an extra
1723 argument, instead). *)
1724 let intrinsic_name name = String.lowercase name
1725
1726 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
1727 found in the features list. *)
1728 let builtin_name features name =
1729 let name = List.fold_right
1730 (fun el name ->
1731 match el with
1732 Flipped x | Builtin_name x -> x
1733 | _ -> name)
1734 features name in
1735 let islower x = let str = String.make 1 x in (String.lowercase str) = str
1736 and buf = Buffer.create (String.length name) in
1737 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
1738 Buffer.contents buf
1739
1740 (* Transform an arity into a list of strings. *)
1741 let strings_of_arity a =
1742 match a with
1743 | Arity0 vt -> [string_of_vectype vt]
1744 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
1745 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
1746 string_of_vectype vt2;
1747 string_of_vectype vt3]
1748 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
1749 string_of_vectype vt2;
1750 string_of_vectype vt3;
1751 string_of_vectype vt4]
1752 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
1753 string_of_vectype vt2;
1754 string_of_vectype vt3;
1755 string_of_vectype vt4;
1756 string_of_vectype vt5]
1757
1758 (* Suffixes on the end of builtin names that are to be stripped in order
1759 to obtain the name used as an instruction. They are only stripped if
1760 preceded immediately by an underscore. *)
1761 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
1762
1763 (* Get the possible names of an instruction corresponding to a "name" from the
1764 ops table. This is done by getting the equivalent builtin name and
1765 stripping any suffixes from the list at the top of this file, unless
1766 the features list presents with an Instruction_name entry, in which
1767 case that is used; or unless the features list presents with a Flipped
1768 entry, in which case that is used. If both such entries are present,
1769 the first in the list will be chosen. *)
1770 let get_insn_names features name =
1771 let names = try
1772 begin
1773 match List.find (fun feature -> match feature with
1774 Instruction_name _ -> true
1775 | Flipped _ -> true
1776 | _ -> false) features
1777 with
1778 Instruction_name names -> names
1779 | Flipped name -> [name]
1780 | _ -> assert false
1781 end
1782 with Not_found -> [builtin_name features name]
1783 in
1784 begin
1785 List.map (fun name' ->
1786 try
1787 let underscore = String.rindex name' '_' in
1788 let our_suffix = String.sub name' (underscore + 1)
1789 ((String.length name') - underscore - 1)
1790 in
1791 let rec strip remaining_suffixes =
1792 match remaining_suffixes with
1793 [] -> name'
1794 | s::ss when our_suffix = s -> String.sub name' 0 underscore
1795 | _::ss -> strip ss
1796 in
1797 strip suffixes_to_strip
1798 with (Not_found | Invalid_argument _) -> name') names
1799 end
1800
1801 (* Apply a function to each element of a list and then comma-separate
1802 the resulting strings. *)
1803 let rec commas f elts acc =
1804 match elts with
1805 [] -> acc
1806 | [elt] -> acc ^ (f elt)
1807 | elt::elts ->
1808 commas f elts (acc ^ (f elt) ^ ", ")
1809
1810 (* Given a list of features and the shape specified in the "ops" table, apply
1811 a function to each possible shape that the instruction may have.
1812 By default, this is the "shape" entry in "ops". If the features list
1813 contains a Disassembles_as entry, the shapes contained in that entry are
1814 mapped to corresponding outputs and returned in a list. If there is more
1815 than one Disassembles_as entry, only the first is used. *)
1816 let analyze_all_shapes features shape f =
1817 try
1818 match List.find (fun feature ->
1819 match feature with Disassembles_as _ -> true
1820 | _ -> false)
1821 features with
1822 Disassembles_as shapes -> List.map f shapes
1823 | _ -> assert false
1824 with Not_found -> [f shape]
1825