0
|
1 (* Common code for ARM NEON header file, documentation and test case
|
|
2 generators.
|
|
3
|
|
4 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
|
5 Contributed by CodeSourcery.
|
|
6
|
|
7 This file is part of GCC.
|
|
8
|
|
9 GCC is free software; you can redistribute it and/or modify it under
|
|
10 the terms of the GNU General Public License as published by the Free
|
|
11 Software Foundation; either version 3, or (at your option) any later
|
|
12 version.
|
|
13
|
|
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
17 for more details.
|
|
18
|
|
19 You should have received a copy of the GNU General Public License
|
|
20 along with GCC; see the file COPYING3. If not see
|
|
21 <http://www.gnu.org/licenses/>. *)
|
|
22
|
|
23 (* Shorthand types for vector elements. *)
|
|
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16
|
|
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts
|
|
26 | Cast of elts * elts | NoElts
|
|
27
|
|
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits
|
|
29 | ConvClass of eltclass * eltclass | NoType
|
|
30
|
|
31 (* These vector types correspond directly to C types. *)
|
|
32 type vectype = T_int8x8 | T_int8x16
|
|
33 | T_int16x4 | T_int16x8
|
|
34 | T_int32x2 | T_int32x4
|
|
35 | T_int64x1 | T_int64x2
|
|
36 | T_uint8x8 | T_uint8x16
|
|
37 | T_uint16x4 | T_uint16x8
|
|
38 | T_uint32x2 | T_uint32x4
|
|
39 | T_uint64x1 | T_uint64x2
|
|
40 | T_float32x2 | T_float32x4
|
|
41 | T_poly8x8 | T_poly8x16
|
|
42 | T_poly16x4 | T_poly16x8
|
|
43 | T_immediate of int * int
|
|
44 | T_int8 | T_int16
|
|
45 | T_int32 | T_int64
|
|
46 | T_uint8 | T_uint16
|
|
47 | T_uint32 | T_uint64
|
|
48 | T_poly8 | T_poly16
|
|
49 | T_float32 | T_arrayof of int * vectype
|
|
50 | T_ptrto of vectype | T_const of vectype
|
|
51 | T_void | T_intQI
|
|
52 | T_intHI | T_intSI
|
|
53 | T_intDI
|
|
54
|
|
55 (* The meanings of the following are:
|
|
56 TImode : "Tetra", two registers (four words).
|
|
57 EImode : "hExa", three registers (six words).
|
|
58 OImode : "Octa", four registers (eight words).
|
|
59 CImode : "dodeCa", six registers (twelve words).
|
|
60 XImode : "heXadeca", eight registers (sixteen words).
|
|
61 *)
|
|
62
|
|
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode
|
|
64
|
|
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt
|
|
66 | PtrTo of shape_elt | CstPtrTo of shape_elt
|
|
67 (* These next ones are used only in the test generator. *)
|
|
68 | Element_of_dreg (* Used for "lane" variants. *)
|
|
69 | Element_of_qreg (* Likewise. *)
|
|
70 | All_elements_of_dreg (* Used for "dup" variants. *)
|
|
71
|
|
72 type shape_form = All of int * shape_elt
|
|
73 | Long
|
|
74 | Long_noreg of shape_elt
|
|
75 | Wide
|
|
76 | Wide_noreg of shape_elt
|
|
77 | Narrow
|
|
78 | Long_imm
|
|
79 | Narrow_imm
|
|
80 | Binary_imm of shape_elt
|
|
81 | Use_operands of shape_elt array
|
|
82 | By_scalar of shape_elt
|
|
83 | Unary_scalar of shape_elt
|
|
84 | Wide_lane
|
|
85 | Wide_scalar
|
|
86 | Pair_result of shape_elt
|
|
87
|
|
88 type arity = Arity0 of vectype
|
|
89 | Arity1 of vectype * vectype
|
|
90 | Arity2 of vectype * vectype * vectype
|
|
91 | Arity3 of vectype * vectype * vectype * vectype
|
|
92 | Arity4 of vectype * vectype * vectype * vectype * vectype
|
|
93
|
|
94 type vecmode = V8QI | V4HI | V2SI | V2SF | DI
|
|
95 | V16QI | V8HI | V4SI | V4SF | V2DI
|
|
96 | QI | HI | SI | SF
|
|
97
|
|
98 type opcode =
|
|
99 (* Binary ops. *)
|
|
100 Vadd
|
|
101 | Vmul
|
|
102 | Vmla
|
|
103 | Vmls
|
|
104 | Vsub
|
|
105 | Vceq
|
|
106 | Vcge
|
|
107 | Vcgt
|
|
108 | Vcle
|
|
109 | Vclt
|
|
110 | Vcage
|
|
111 | Vcagt
|
|
112 | Vcale
|
|
113 | Vcalt
|
|
114 | Vtst
|
|
115 | Vabd
|
|
116 | Vaba
|
|
117 | Vmax
|
|
118 | Vmin
|
|
119 | Vpadd
|
|
120 | Vpada
|
|
121 | Vpmax
|
|
122 | Vpmin
|
|
123 | Vrecps
|
|
124 | Vrsqrts
|
|
125 | Vshl
|
|
126 | Vshr_n
|
|
127 | Vshl_n
|
|
128 | Vsra_n
|
|
129 | Vsri
|
|
130 | Vsli
|
|
131 (* Logic binops. *)
|
|
132 | Vand
|
|
133 | Vorr
|
|
134 | Veor
|
|
135 | Vbic
|
|
136 | Vorn
|
|
137 | Vbsl
|
|
138 (* Ops with scalar. *)
|
|
139 | Vmul_lane
|
|
140 | Vmla_lane
|
|
141 | Vmls_lane
|
|
142 | Vmul_n
|
|
143 | Vmla_n
|
|
144 | Vmls_n
|
|
145 | Vmull_n
|
|
146 | Vmull_lane
|
|
147 | Vqdmull_n
|
|
148 | Vqdmull_lane
|
|
149 | Vqdmulh_n
|
|
150 | Vqdmulh_lane
|
|
151 (* Unary ops. *)
|
|
152 | Vabs
|
|
153 | Vneg
|
|
154 | Vcls
|
|
155 | Vclz
|
|
156 | Vcnt
|
|
157 | Vrecpe
|
|
158 | Vrsqrte
|
|
159 | Vmvn
|
|
160 (* Vector extract. *)
|
|
161 | Vext
|
|
162 (* Reverse elements. *)
|
|
163 | Vrev64
|
|
164 | Vrev32
|
|
165 | Vrev16
|
|
166 (* Transposition ops. *)
|
|
167 | Vtrn
|
|
168 | Vzip
|
|
169 | Vuzp
|
|
170 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *)
|
|
171 | Vldx of int
|
|
172 | Vstx of int
|
|
173 | Vldx_lane of int
|
|
174 | Vldx_dup of int
|
|
175 | Vstx_lane of int
|
|
176 (* Set/extract lanes from a vector. *)
|
|
177 | Vget_lane
|
|
178 | Vset_lane
|
|
179 (* Initialize vector from bit pattern. *)
|
|
180 | Vcreate
|
|
181 (* Set all lanes to same value. *)
|
|
182 | Vdup_n
|
|
183 | Vmov_n (* Is this the same? *)
|
|
184 (* Duplicate scalar to all lanes of vector. *)
|
|
185 | Vdup_lane
|
|
186 (* Combine vectors. *)
|
|
187 | Vcombine
|
|
188 (* Get quadword high/low parts. *)
|
|
189 | Vget_high
|
|
190 | Vget_low
|
|
191 (* Convert vectors. *)
|
|
192 | Vcvt
|
|
193 | Vcvt_n
|
|
194 (* Narrow/lengthen vectors. *)
|
|
195 | Vmovn
|
|
196 | Vmovl
|
|
197 (* Table lookup. *)
|
|
198 | Vtbl of int
|
|
199 | Vtbx of int
|
|
200 (* Reinterpret casts. *)
|
|
201 | Vreinterp
|
|
202
|
|
203 (* Features used for documentation, to distinguish between some instruction
|
|
204 variants, and to signal special requirements (e.g. swapping arguments). *)
|
|
205
|
|
206 type features =
|
|
207 Halving
|
|
208 | Rounding
|
|
209 | Saturating
|
|
210 | Dst_unsign
|
|
211 | High_half
|
|
212 | Doubling
|
|
213 | Flipped of string (* Builtin name to use with flipped arguments. *)
|
|
214 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed
|
|
215 for All _, Long, Wide, Narrow shape_forms. *)
|
|
216 | ReturnPtr (* Pass explicit pointer to return value as first argument. *)
|
|
217 (* A specification as to the shape of instruction expected upon
|
|
218 disassembly, used if it differs from the shape used to build the
|
|
219 intrinsic prototype. Multiple entries in the constructor's argument
|
|
220 indicate that the intrinsic expands to more than one assembly
|
|
221 instruction, each with a corresponding shape specified here. *)
|
|
222 | Disassembles_as of shape_form list
|
|
223 | Builtin_name of string (* Override the name of the builtin. *)
|
|
224 (* Override the name of the instruction. If more than one name
|
|
225 is specified, it means that the instruction can have any of those
|
|
226 names. *)
|
|
227 | Instruction_name of string list
|
|
228 (* Mark that the intrinsic yields no instructions, or expands to yield
|
|
229 behavior that the test generator cannot test. *)
|
|
230 | No_op
|
|
231 (* Mark that the intrinsic has constant arguments that cannot be set
|
|
232 to the defaults (zero for pointers and one otherwise) in the test
|
|
233 cases. The function supplied must return the integer to be written
|
|
234 into the testcase for the argument number (0-based) supplied to it. *)
|
|
235 | Const_valuator of (int -> int)
|
|
236
|
|
237 exception MixedMode of elts * elts
|
|
238
|
|
239 let rec elt_width = function
|
|
240 S8 | U8 | P8 | I8 | B8 -> 8
|
|
241 | S16 | U16 | P16 | I16 | B16 -> 16
|
|
242 | S32 | F32 | U32 | I32 | B32 -> 32
|
|
243 | S64 | U64 | I64 | B64 -> 64
|
|
244 | Conv (a, b) ->
|
|
245 let wa = elt_width a and wb = elt_width b in
|
|
246 if wa = wb then wa else failwith "element width?"
|
|
247 | Cast (a, b) -> raise (MixedMode (a, b))
|
|
248 | NoElts -> failwith "No elts"
|
|
249
|
|
250 let rec elt_class = function
|
|
251 S8 | S16 | S32 | S64 -> Signed
|
|
252 | U8 | U16 | U32 | U64 -> Unsigned
|
|
253 | P8 | P16 -> Poly
|
|
254 | F32 -> Float
|
|
255 | I8 | I16 | I32 | I64 -> Int
|
|
256 | B8 | B16 | B32 | B64 -> Bits
|
|
257 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b)
|
|
258 | NoElts -> NoType
|
|
259
|
|
260 let elt_of_class_width c w =
|
|
261 match c, w with
|
|
262 Signed, 8 -> S8
|
|
263 | Signed, 16 -> S16
|
|
264 | Signed, 32 -> S32
|
|
265 | Signed, 64 -> S64
|
|
266 | Float, 32 -> F32
|
|
267 | Unsigned, 8 -> U8
|
|
268 | Unsigned, 16 -> U16
|
|
269 | Unsigned, 32 -> U32
|
|
270 | Unsigned, 64 -> U64
|
|
271 | Poly, 8 -> P8
|
|
272 | Poly, 16 -> P16
|
|
273 | Int, 8 -> I8
|
|
274 | Int, 16 -> I16
|
|
275 | Int, 32 -> I32
|
|
276 | Int, 64 -> I64
|
|
277 | Bits, 8 -> B8
|
|
278 | Bits, 16 -> B16
|
|
279 | Bits, 32 -> B32
|
|
280 | Bits, 64 -> B64
|
|
281 | _ -> failwith "Bad element type"
|
|
282
|
|
283 (* Return unsigned integer element the same width as argument. *)
|
|
284 let unsigned_of_elt elt =
|
|
285 elt_of_class_width Unsigned (elt_width elt)
|
|
286
|
|
287 let signed_of_elt elt =
|
|
288 elt_of_class_width Signed (elt_width elt)
|
|
289
|
|
290 (* Return untyped bits element the same width as argument. *)
|
|
291 let bits_of_elt elt =
|
|
292 elt_of_class_width Bits (elt_width elt)
|
|
293
|
|
294 let non_signed_variant = function
|
|
295 S8 -> I8
|
|
296 | S16 -> I16
|
|
297 | S32 -> I32
|
|
298 | S64 -> I64
|
|
299 | U8 -> I8
|
|
300 | U16 -> I16
|
|
301 | U32 -> I32
|
|
302 | U64 -> I64
|
|
303 | x -> x
|
|
304
|
|
305 let poly_unsigned_variant v =
|
|
306 let elclass = match elt_class v with
|
|
307 Poly -> Unsigned
|
|
308 | x -> x in
|
|
309 elt_of_class_width elclass (elt_width v)
|
|
310
|
|
311 let widen_elt elt =
|
|
312 let w = elt_width elt
|
|
313 and c = elt_class elt in
|
|
314 elt_of_class_width c (w * 2)
|
|
315
|
|
316 let narrow_elt elt =
|
|
317 let w = elt_width elt
|
|
318 and c = elt_class elt in
|
|
319 elt_of_class_width c (w / 2)
|
|
320
|
|
321 (* If we're trying to find a mode from a "Use_operands" instruction, use the
|
|
322 last vector operand as the dominant mode used to invoke the correct builtin.
|
|
323 We must stick to this rule in neon.md. *)
|
|
324 let find_key_operand operands =
|
|
325 let rec scan opno =
|
|
326 match operands.(opno) with
|
|
327 Qreg -> Qreg
|
|
328 | Dreg -> Dreg
|
|
329 | VecArray (_, Qreg) -> Qreg
|
|
330 | VecArray (_, Dreg) -> Dreg
|
|
331 | _ -> scan (opno-1)
|
|
332 in
|
|
333 scan ((Array.length operands) - 1)
|
|
334
|
|
335 let rec mode_of_elt elt shape =
|
|
336 let flt = match elt_class elt with
|
|
337 Float | ConvClass(_, Float) -> true | _ -> false in
|
|
338 let idx =
|
|
339 match elt_width elt with
|
|
340 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3
|
|
341 | _ -> failwith "Bad element width"
|
|
342 in match shape with
|
|
343 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg
|
|
344 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg ->
|
|
345 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx)
|
|
346 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg
|
|
347 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg ->
|
|
348 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx)
|
|
349 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) ->
|
|
350 [| QI; HI; if flt then SF else SI; DI |].(idx)
|
|
351 | Long | Wide | Wide_lane | Wide_scalar
|
|
352 | Long_imm ->
|
|
353 [| V8QI; V4HI; V2SI; DI |].(idx)
|
|
354 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx)
|
|
355 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops)))
|
|
356 | _ -> failwith "invalid shape"
|
|
357
|
|
358 (* Modify an element type dependent on the shape of the instruction and the
|
|
359 operand number. *)
|
|
360
|
|
361 let shapemap shape no =
|
|
362 let ident = fun x -> x in
|
|
363 match shape with
|
|
364 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _
|
|
365 | Binary_imm _ -> ident
|
|
366 | Long | Long_noreg _ | Wide_scalar | Long_imm ->
|
|
367 [| widen_elt; ident; ident |].(no)
|
|
368 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no)
|
|
369 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no)
|
|
370 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no)
|
|
371
|
|
372 (* Register type (D/Q) of an operand, based on shape and operand number. *)
|
|
373
|
|
374 let regmap shape no =
|
|
375 match shape with
|
|
376 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg
|
|
377 | Long -> [| Qreg; Dreg; Dreg |].(no)
|
|
378 | Wide -> [| Qreg; Qreg; Dreg |].(no)
|
|
379 | Narrow -> [| Dreg; Qreg; Qreg |].(no)
|
|
380 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no)
|
|
381 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no)
|
|
382 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no)
|
|
383 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no)
|
|
384 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no)
|
|
385 | Binary_imm reg -> [| reg; reg; Immed |].(no)
|
|
386 | Long_imm -> [| Qreg; Dreg; Immed |].(no)
|
|
387 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no)
|
|
388 | Use_operands these -> these.(no)
|
|
389
|
|
390 let type_for_elt shape elt no =
|
|
391 let elt = (shapemap shape no) elt in
|
|
392 let reg = regmap shape no in
|
|
393 let rec type_for_reg_elt reg elt =
|
|
394 match reg with
|
|
395 Dreg ->
|
|
396 begin match elt with
|
|
397 S8 -> T_int8x8
|
|
398 | S16 -> T_int16x4
|
|
399 | S32 -> T_int32x2
|
|
400 | S64 -> T_int64x1
|
|
401 | U8 -> T_uint8x8
|
|
402 | U16 -> T_uint16x4
|
|
403 | U32 -> T_uint32x2
|
|
404 | U64 -> T_uint64x1
|
|
405 | F32 -> T_float32x2
|
|
406 | P8 -> T_poly8x8
|
|
407 | P16 -> T_poly16x4
|
|
408 | _ -> failwith "Bad elt type"
|
|
409 end
|
|
410 | Qreg ->
|
|
411 begin match elt with
|
|
412 S8 -> T_int8x16
|
|
413 | S16 -> T_int16x8
|
|
414 | S32 -> T_int32x4
|
|
415 | S64 -> T_int64x2
|
|
416 | U8 -> T_uint8x16
|
|
417 | U16 -> T_uint16x8
|
|
418 | U32 -> T_uint32x4
|
|
419 | U64 -> T_uint64x2
|
|
420 | F32 -> T_float32x4
|
|
421 | P8 -> T_poly8x16
|
|
422 | P16 -> T_poly16x8
|
|
423 | _ -> failwith "Bad elt type"
|
|
424 end
|
|
425 | Corereg ->
|
|
426 begin match elt with
|
|
427 S8 -> T_int8
|
|
428 | S16 -> T_int16
|
|
429 | S32 -> T_int32
|
|
430 | S64 -> T_int64
|
|
431 | U8 -> T_uint8
|
|
432 | U16 -> T_uint16
|
|
433 | U32 -> T_uint32
|
|
434 | U64 -> T_uint64
|
|
435 | P8 -> T_poly8
|
|
436 | P16 -> T_poly16
|
|
437 | F32 -> T_float32
|
|
438 | _ -> failwith "Bad elt type"
|
|
439 end
|
|
440 | Immed ->
|
|
441 T_immediate (0, 0)
|
|
442 | VecArray (num, sub) ->
|
|
443 T_arrayof (num, type_for_reg_elt sub elt)
|
|
444 | PtrTo x ->
|
|
445 T_ptrto (type_for_reg_elt x elt)
|
|
446 | CstPtrTo x ->
|
|
447 T_ptrto (T_const (type_for_reg_elt x elt))
|
|
448 (* Anything else is solely for the use of the test generator. *)
|
|
449 | _ -> assert false
|
|
450 in
|
|
451 type_for_reg_elt reg elt
|
|
452
|
|
453 (* Return size of a vector type, in bits. *)
|
|
454 let vectype_size = function
|
|
455 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1
|
|
456 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1
|
|
457 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64
|
|
458 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2
|
|
459 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2
|
|
460 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128
|
|
461 | _ -> raise Not_found
|
|
462
|
|
463 let inttype_for_array num elttype =
|
|
464 let eltsize = vectype_size elttype in
|
|
465 let numwords = (num * eltsize) / 32 in
|
|
466 match numwords with
|
|
467 4 -> B_TImode
|
|
468 | 6 -> B_EImode
|
|
469 | 8 -> B_OImode
|
|
470 | 12 -> B_CImode
|
|
471 | 16 -> B_XImode
|
|
472 | _ -> failwith ("no int type for size " ^ string_of_int numwords)
|
|
473
|
|
474 (* These functions return pairs of (internal, external) types, where "internal"
|
|
475 types are those seen by GCC, and "external" are those seen by the assembler.
|
|
476 These types aren't necessarily the same, since the intrinsics can munge more
|
|
477 than one C type into each assembler opcode. *)
|
|
478
|
|
479 let make_sign_invariant func shape elt =
|
|
480 let arity, elt' = func shape elt in
|
|
481 arity, non_signed_variant elt'
|
|
482
|
|
483 (* Don't restrict any types. *)
|
|
484
|
|
485 let elts_same make_arity shape elt =
|
|
486 let vtype = type_for_elt shape elt in
|
|
487 make_arity vtype, elt
|
|
488
|
|
489 (* As sign_invar_*, but when sign matters. *)
|
|
490 let elts_same_io_lane =
|
|
491 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3))
|
|
492
|
|
493 let elts_same_io =
|
|
494 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2))
|
|
495
|
|
496 let elts_same_2_lane =
|
|
497 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3))
|
|
498
|
|
499 let elts_same_3 = elts_same_2_lane
|
|
500
|
|
501 let elts_same_2 =
|
|
502 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2))
|
|
503
|
|
504 let elts_same_1 =
|
|
505 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1))
|
|
506
|
|
507 (* Use for signed/unsigned invariant operations (i.e. where the operation
|
|
508 doesn't depend on the sign of the data. *)
|
|
509
|
|
510 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane
|
|
511 let sign_invar_io = make_sign_invariant elts_same_io
|
|
512 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane
|
|
513 let sign_invar_2 = make_sign_invariant elts_same_2
|
|
514 let sign_invar_1 = make_sign_invariant elts_same_1
|
|
515
|
|
516 (* Sign-sensitive comparison. *)
|
|
517
|
|
518 let cmp_sign_matters shape elt =
|
|
519 let vtype = type_for_elt shape elt
|
|
520 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
|
|
521 Arity2 (rtype, vtype 1, vtype 2), elt
|
|
522
|
|
523 (* Signed/unsigned invariant comparison. *)
|
|
524
|
|
525 let cmp_sign_invar shape elt =
|
|
526 let shape', elt' = cmp_sign_matters shape elt in
|
|
527 let elt'' =
|
|
528 match non_signed_variant elt' with
|
|
529 P8 -> I8
|
|
530 | x -> x
|
|
531 in
|
|
532 shape', elt''
|
|
533
|
|
534 (* Comparison (VTST) where only the element width matters. *)
|
|
535
|
|
536 let cmp_bits shape elt =
|
|
537 let vtype = type_for_elt shape elt
|
|
538 and rtype = type_for_elt shape (unsigned_of_elt elt) 0
|
|
539 and bits_only = bits_of_elt elt in
|
|
540 Arity2 (rtype, vtype 1, vtype 2), bits_only
|
|
541
|
|
542 let reg_shift shape elt =
|
|
543 let vtype = type_for_elt shape elt
|
|
544 and op2type = type_for_elt shape (signed_of_elt elt) 2 in
|
|
545 Arity2 (vtype 0, vtype 1, op2type), elt
|
|
546
|
|
547 (* Genericised constant-shift type-generating function. *)
|
|
548
|
|
549 let const_shift mkimm ?arity ?result shape elt =
|
|
550 let op2type = (shapemap shape 2) elt in
|
|
551 let op2width = elt_width op2type in
|
|
552 let op2 = mkimm op2width
|
|
553 and op1 = type_for_elt shape elt 1
|
|
554 and r_elt =
|
|
555 match result with
|
|
556 None -> elt
|
|
557 | Some restriction -> restriction elt in
|
|
558 let rtype = type_for_elt shape r_elt 0 in
|
|
559 match arity with
|
|
560 None -> Arity2 (rtype, op1, op2), elt
|
|
561 | Some mkarity -> mkarity rtype op1 op2, elt
|
|
562
|
|
563 (* Use for immediate right-shifts. *)
|
|
564
|
|
565 let shift_right shape elt =
|
|
566 const_shift (fun imm -> T_immediate (1, imm)) shape elt
|
|
567
|
|
568 let shift_right_acc shape elt =
|
|
569 const_shift (fun imm -> T_immediate (1, imm))
|
|
570 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt
|
|
571
|
|
572 (* Use for immediate right-shifts when the operation doesn't care about
|
|
573 signedness. *)
|
|
574
|
|
575 let shift_right_sign_invar =
|
|
576 make_sign_invariant shift_right
|
|
577
|
|
578 (* Immediate right-shift; result is unsigned even when operand is signed. *)
|
|
579
|
|
580 let shift_right_to_uns shape elt =
|
|
581 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt
|
|
582 shape elt
|
|
583
|
|
584 (* Immediate left-shift. *)
|
|
585
|
|
586 let shift_left shape elt =
|
|
587 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt
|
|
588
|
|
589 (* Immediate left-shift, unsigned result. *)
|
|
590
|
|
591 let shift_left_to_uns shape elt =
|
|
592 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt
|
|
593 shape elt
|
|
594
|
|
595 (* Immediate left-shift, don't care about signs. *)
|
|
596
|
|
597 let shift_left_sign_invar =
|
|
598 make_sign_invariant shift_left
|
|
599
|
|
600 (* Shift left/right and insert: only element size matters. *)
|
|
601
|
|
602 let shift_insert shape elt =
|
|
603 let arity, elt =
|
|
604 const_shift (fun imm -> T_immediate (1, imm))
|
|
605 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in
|
|
606 arity, bits_of_elt elt
|
|
607
|
|
608 (* Get/set lane. *)
|
|
609
|
|
610 let get_lane shape elt =
|
|
611 let vtype = type_for_elt shape elt in
|
|
612 Arity2 (vtype 0, vtype 1, vtype 2),
|
|
613 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x)
|
|
614
|
|
615 let set_lane shape elt =
|
|
616 let vtype = type_for_elt shape elt in
|
|
617 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
|
|
618
|
|
619 let set_lane_notype shape elt =
|
|
620 let vtype = type_for_elt shape elt in
|
|
621 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts
|
|
622
|
|
623 let create_vector shape elt =
|
|
624 let vtype = type_for_elt shape U64 1
|
|
625 and rtype = type_for_elt shape elt 0 in
|
|
626 Arity1 (rtype, vtype), elt
|
|
627
|
|
628 let conv make_arity shape elt =
|
|
629 let edest, esrc = match elt with
|
|
630 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc
|
|
631 | _ -> failwith "Non-conversion element in conversion" in
|
|
632 let vtype = type_for_elt shape esrc
|
|
633 and rtype = type_for_elt shape edest 0 in
|
|
634 make_arity rtype vtype, elt
|
|
635
|
|
636 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1))
|
|
637 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2))
|
|
638
|
|
639 (* Operation has an unsigned result even if operands are signed. *)
|
|
640
|
|
641 let dst_unsign make_arity shape elt =
|
|
642 let vtype = type_for_elt shape elt
|
|
643 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in
|
|
644 make_arity rtype vtype, elt
|
|
645
|
|
646 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1))
|
|
647
|
|
648 let make_bits_only func shape elt =
|
|
649 let arity, elt' = func shape elt in
|
|
650 arity, bits_of_elt elt'
|
|
651
|
|
652 (* Extend operation. *)
|
|
653
|
|
654 let extend shape elt =
|
|
655 let vtype = type_for_elt shape elt in
|
|
656 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt
|
|
657
|
|
658 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned
|
|
659 integer ops respectively, or unsigned for polynomial ops. *)
|
|
660
|
|
661 let table mkarity shape elt =
|
|
662 let vtype = type_for_elt shape elt in
|
|
663 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in
|
|
664 mkarity vtype op2, bits_of_elt elt
|
|
665
|
|
666 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2))
|
|
667 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2))
|
|
668
|
|
669 (* Operations where only bits matter. *)
|
|
670
|
|
671 let bits_1 = make_bits_only elts_same_1
|
|
672 let bits_2 = make_bits_only elts_same_2
|
|
673 let bits_3 = make_bits_only elts_same_3
|
|
674
|
|
675 (* Store insns. *)
|
|
676 let store_1 shape elt =
|
|
677 let vtype = type_for_elt shape elt in
|
|
678 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt
|
|
679
|
|
680 let store_3 shape elt =
|
|
681 let vtype = type_for_elt shape elt in
|
|
682 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt
|
|
683
|
|
684 let make_notype func shape elt =
|
|
685 let arity, _ = func shape elt in
|
|
686 arity, NoElts
|
|
687
|
|
688 let notype_1 = make_notype elts_same_1
|
|
689 let notype_2 = make_notype elts_same_2
|
|
690 let notype_3 = make_notype elts_same_3
|
|
691
|
|
692 (* Bit-select operations (first operand is unsigned int). *)
|
|
693
|
|
694 let bit_select shape elt =
|
|
695 let vtype = type_for_elt shape elt
|
|
696 and itype = type_for_elt shape (unsigned_of_elt elt) in
|
|
697 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts
|
|
698
|
|
699 (* Common lists of supported element types. *)
|
|
700
|
|
701 let su_8_32 = [S8; S16; S32; U8; U16; U32]
|
|
702 let su_8_64 = S64 :: U64 :: su_8_32
|
|
703 let su_16_64 = [S16; S32; S64; U16; U32; U64]
|
|
704 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32
|
|
705 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64
|
|
706
|
|
707 let ops =
|
|
708 [
|
|
709 (* Addition. *)
|
|
710 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64;
|
|
711 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64;
|
|
712 Vadd, [], Long, "vaddl", elts_same_2, su_8_32;
|
|
713 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32;
|
|
714 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32;
|
|
715 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32;
|
|
716 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
|
|
717 All (3, Dreg), "vRhadd", elts_same_2, su_8_32;
|
|
718 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving],
|
|
719 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32;
|
|
720 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64;
|
|
721 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64;
|
|
722 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64;
|
|
723 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half],
|
|
724 Narrow, "vRaddhn", sign_invar_2, su_16_64;
|
|
725
|
|
726 (* Multiplication. *)
|
|
727 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32;
|
|
728 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32;
|
|
729 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh",
|
|
730 elts_same_2, [S16; S32];
|
|
731 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ",
|
|
732 elts_same_2, [S16; S32];
|
|
733 Vmul,
|
|
734 [Saturating; Rounding; Doubling; High_half;
|
|
735 Instruction_name ["vqrdmulh"]],
|
|
736 All (3, Dreg), "vqRdmulh",
|
|
737 elts_same_2, [S16; S32];
|
|
738 Vmul,
|
|
739 [Saturating; Rounding; Doubling; High_half;
|
|
740 Instruction_name ["vqrdmulh"]],
|
|
741 All (3, Qreg), "vqRdmulhQ",
|
|
742 elts_same_2, [S16; S32];
|
|
743 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32;
|
|
744 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32];
|
|
745
|
|
746 (* Multiply-accumulate. *)
|
|
747 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32;
|
|
748 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32;
|
|
749 Vmla, [], Long, "vmlal", elts_same_io, su_8_32;
|
|
750 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32];
|
|
751
|
|
752 (* Multiply-subtract. *)
|
|
753 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32;
|
|
754 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32;
|
|
755 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32;
|
|
756 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32];
|
|
757
|
|
758 (* Subtraction. *)
|
|
759 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64;
|
|
760 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64;
|
|
761 Vsub, [], Long, "vsubl", elts_same_2, su_8_32;
|
|
762 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32;
|
|
763 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32;
|
|
764 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32;
|
|
765 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64;
|
|
766 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64;
|
|
767 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64;
|
|
768 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half],
|
|
769 Narrow, "vRsubhn", sign_invar_2, su_16_64;
|
|
770
|
|
771 (* Comparison, equal. *)
|
|
772 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32;
|
|
773 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32;
|
|
774
|
|
775 (* Comparison, greater-than or equal. *)
|
|
776 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32;
|
|
777 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32;
|
|
778
|
|
779 (* Comparison, less-than or equal. *)
|
|
780 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters,
|
|
781 F32 :: su_8_32;
|
|
782 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"],
|
|
783 All (3, Qreg), "vcleQ", cmp_sign_matters,
|
|
784 F32 :: su_8_32;
|
|
785
|
|
786 (* Comparison, greater-than. *)
|
|
787 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32;
|
|
788 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32;
|
|
789
|
|
790 (* Comparison, less-than. *)
|
|
791 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters,
|
|
792 F32 :: su_8_32;
|
|
793 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"],
|
|
794 All (3, Qreg), "vcltQ", cmp_sign_matters,
|
|
795 F32 :: su_8_32;
|
|
796
|
|
797 (* Compare absolute greater-than or equal. *)
|
|
798 Vcage, [Instruction_name ["vacge"]],
|
|
799 All (3, Dreg), "vcage", cmp_sign_matters, [F32];
|
|
800 Vcage, [Instruction_name ["vacge"]],
|
|
801 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32];
|
|
802
|
|
803 (* Compare absolute less-than or equal. *)
|
|
804 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"],
|
|
805 All (3, Dreg), "vcale", cmp_sign_matters, [F32];
|
|
806 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"],
|
|
807 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32];
|
|
808
|
|
809 (* Compare absolute greater-than or equal. *)
|
|
810 Vcagt, [Instruction_name ["vacgt"]],
|
|
811 All (3, Dreg), "vcagt", cmp_sign_matters, [F32];
|
|
812 Vcagt, [Instruction_name ["vacgt"]],
|
|
813 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32];
|
|
814
|
|
815 (* Compare absolute less-than or equal. *)
|
|
816 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"],
|
|
817 All (3, Dreg), "vcalt", cmp_sign_matters, [F32];
|
|
818 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"],
|
|
819 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32];
|
|
820
|
|
821 (* Test bits. *)
|
|
822 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32;
|
|
823 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32;
|
|
824
|
|
825 (* Absolute difference. *)
|
|
826 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32;
|
|
827 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32;
|
|
828 Vabd, [], Long, "vabdl", elts_same_2, su_8_32;
|
|
829
|
|
830 (* Absolute difference and accumulate. *)
|
|
831 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32;
|
|
832 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32;
|
|
833 Vaba, [], Long, "vabal", elts_same_io, su_8_32;
|
|
834
|
|
835 (* Max. *)
|
|
836 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32;
|
|
837 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32;
|
|
838
|
|
839 (* Min. *)
|
|
840 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32;
|
|
841 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32;
|
|
842
|
|
843 (* Pairwise add. *)
|
|
844 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32;
|
|
845 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32;
|
|
846 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32;
|
|
847
|
|
848 (* Pairwise add, widen and accumulate. *)
|
|
849 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32;
|
|
850 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32;
|
|
851
|
|
852 (* Folding maximum, minimum. *)
|
|
853 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32;
|
|
854 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32;
|
|
855
|
|
856 (* Reciprocal step. *)
|
|
857 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32];
|
|
858 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32];
|
|
859 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32];
|
|
860 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32];
|
|
861
|
|
862 (* Vector shift left. *)
|
|
863 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64;
|
|
864 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64;
|
|
865 Vshl, [Instruction_name ["vrshl"]; Rounding],
|
|
866 All (3, Dreg), "vRshl", reg_shift, su_8_64;
|
|
867 Vshl, [Instruction_name ["vrshl"]; Rounding],
|
|
868 All (3, Qreg), "vRshlQ", reg_shift, su_8_64;
|
|
869 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64;
|
|
870 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64;
|
|
871 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
|
|
872 All (3, Dreg), "vqRshl", reg_shift, su_8_64;
|
|
873 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding],
|
|
874 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64;
|
|
875
|
|
876 (* Vector shift right by constant. *)
|
|
877 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64;
|
|
878 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64;
|
|
879 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg,
|
|
880 "vRshr_n", shift_right, su_8_64;
|
|
881 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg,
|
|
882 "vRshrQ_n", shift_right, su_8_64;
|
|
883 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64;
|
|
884 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n",
|
|
885 shift_right_sign_invar, su_16_64;
|
|
886 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64;
|
|
887 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm,
|
|
888 "vqRshrn_n", shift_right, su_16_64;
|
|
889 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n",
|
|
890 shift_right_to_uns, [S16; S32; S64];
|
|
891 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding],
|
|
892 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64];
|
|
893
|
|
894 (* Vector shift left by constant. *)
|
|
895 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64;
|
|
896 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64;
|
|
897 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64;
|
|
898 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64;
|
|
899 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n",
|
|
900 shift_left_to_uns, [S8; S16; S32; S64];
|
|
901 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n",
|
|
902 shift_left_to_uns, [S8; S16; S32; S64];
|
|
903 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32;
|
|
904
|
|
905 (* Vector shift right by constant and accumulate. *)
|
|
906 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64;
|
|
907 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64;
|
|
908 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg,
|
|
909 "vRsra_n", shift_right_acc, su_8_64;
|
|
910 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg,
|
|
911 "vRsraQ_n", shift_right_acc, su_8_64;
|
|
912
|
|
913 (* Vector shift right and insert. *)
|
|
914 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert,
|
|
915 P8 :: P16 :: su_8_64;
|
|
916 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert,
|
|
917 P8 :: P16 :: su_8_64;
|
|
918
|
|
919 (* Vector shift left and insert. *)
|
|
920 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert,
|
|
921 P8 :: P16 :: su_8_64;
|
|
922 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert,
|
|
923 P8 :: P16 :: su_8_64;
|
|
924
|
|
925 (* Absolute value. *)
|
|
926 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32];
|
|
927 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32];
|
|
928 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32];
|
|
929 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32];
|
|
930
|
|
931 (* Negate. *)
|
|
932 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32];
|
|
933 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32];
|
|
934 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32];
|
|
935 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32];
|
|
936
|
|
937 (* Bitwise not. *)
|
|
938 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32;
|
|
939 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32;
|
|
940
|
|
941 (* Count leading sign bits. *)
|
|
942 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32];
|
|
943 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32];
|
|
944
|
|
945 (* Count leading zeros. *)
|
|
946 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32;
|
|
947 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32;
|
|
948
|
|
949 (* Count number of set bits. *)
|
|
950 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8];
|
|
951 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8];
|
|
952
|
|
953 (* Reciprocal estimate. *)
|
|
954 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32];
|
|
955 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32];
|
|
956
|
|
957 (* Reciprocal square-root estimate. *)
|
|
958 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32];
|
|
959 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32];
|
|
960
|
|
961 (* Get lanes from a vector. *)
|
|
962 Vget_lane,
|
|
963 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
|
|
964 Instruction_name ["vmov"]],
|
|
965 Use_operands [| Corereg; Dreg; Immed |],
|
|
966 "vget_lane", get_lane, pf_su_8_32;
|
|
967 Vget_lane,
|
|
968 [InfoWord;
|
|
969 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
|
|
970 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
|
|
971 Use_operands [| Corereg; Dreg; Immed |],
|
|
972 "vget_lane", notype_2, [S64; U64];
|
|
973 Vget_lane,
|
|
974 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]];
|
|
975 Instruction_name ["vmov"]],
|
|
976 Use_operands [| Corereg; Qreg; Immed |],
|
|
977 "vgetQ_lane", get_lane, pf_su_8_32;
|
|
978 Vget_lane,
|
|
979 [InfoWord;
|
|
980 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]];
|
|
981 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
|
|
982 Use_operands [| Corereg; Qreg; Immed |],
|
|
983 "vgetQ_lane", notype_2, [S64; U64];
|
|
984
|
|
985 (* Set lanes in a vector. *)
|
|
986 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
|
|
987 Instruction_name ["vmov"]],
|
|
988 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
|
|
989 set_lane, pf_su_8_32;
|
|
990 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
|
|
991 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
|
|
992 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane",
|
|
993 set_lane_notype, [S64; U64];
|
|
994 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]];
|
|
995 Instruction_name ["vmov"]],
|
|
996 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
|
|
997 set_lane, pf_su_8_32;
|
|
998 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]];
|
|
999 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)],
|
|
1000 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane",
|
|
1001 set_lane_notype, [S64; U64];
|
|
1002
|
|
1003 (* Create vector from literal bit pattern. *)
|
|
1004 Vcreate,
|
|
1005 [No_op], (* Not really, but it can yield various things that are too
|
|
1006 hard for the test generator at this time. *)
|
|
1007 Use_operands [| Dreg; Corereg |], "vcreate", create_vector,
|
|
1008 pf_su_8_64;
|
|
1009
|
|
1010 (* Set all lanes to the same value. *)
|
|
1011 Vdup_n, [],
|
|
1012 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1,
|
|
1013 pf_su_8_32;
|
|
1014 Vdup_n,
|
|
1015 [Instruction_name ["vmov"];
|
|
1016 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
|
|
1017 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1,
|
|
1018 [S64; U64];
|
|
1019 Vdup_n, [],
|
|
1020 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1,
|
|
1021 pf_su_8_32;
|
|
1022 Vdup_n,
|
|
1023 [Instruction_name ["vmov"];
|
|
1024 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
|
|
1025 Use_operands [| Dreg; Corereg; Corereg |]]],
|
|
1026 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1,
|
|
1027 [S64; U64];
|
|
1028
|
|
1029 (* These are just aliases for the above. *)
|
|
1030 Vmov_n,
|
|
1031 [Builtin_name "vdup_n"],
|
|
1032 Use_operands [| Dreg; Corereg |],
|
|
1033 "vmov_n", bits_1, pf_su_8_32;
|
|
1034 Vmov_n,
|
|
1035 [Builtin_name "vdup_n";
|
|
1036 Instruction_name ["vmov"];
|
|
1037 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]],
|
|
1038 Use_operands [| Dreg; Corereg |],
|
|
1039 "vmov_n", notype_1, [S64; U64];
|
|
1040 Vmov_n,
|
|
1041 [Builtin_name "vdupQ_n"],
|
|
1042 Use_operands [| Qreg; Corereg |],
|
|
1043 "vmovQ_n", bits_1, pf_su_8_32;
|
|
1044 Vmov_n,
|
|
1045 [Builtin_name "vdupQ_n";
|
|
1046 Instruction_name ["vmov"];
|
|
1047 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |];
|
|
1048 Use_operands [| Dreg; Corereg; Corereg |]]],
|
|
1049 Use_operands [| Qreg; Corereg |],
|
|
1050 "vmovQ_n", notype_1, [S64; U64];
|
|
1051
|
|
1052 (* Duplicate, lane version. We can't use Use_operands here because the
|
|
1053 rightmost register (always Dreg) would be picked up by find_key_operand,
|
|
1054 when we want the leftmost register to be used in this case (otherwise
|
|
1055 the modes are indistinguishable in neon.md, etc. *)
|
|
1056 Vdup_lane,
|
|
1057 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]],
|
|
1058 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32;
|
|
1059 Vdup_lane,
|
|
1060 [No_op; Const_valuator (fun _ -> 0)],
|
|
1061 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64];
|
|
1062 Vdup_lane,
|
|
1063 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]],
|
|
1064 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32;
|
|
1065 Vdup_lane,
|
|
1066 [No_op; Const_valuator (fun _ -> 0)],
|
|
1067 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64];
|
|
1068
|
|
1069 (* Combining vectors. *)
|
|
1070 Vcombine, [No_op],
|
|
1071 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2,
|
|
1072 pf_su_8_64;
|
|
1073
|
|
1074 (* Splitting vectors. *)
|
|
1075 Vget_high, [No_op],
|
|
1076 Use_operands [| Dreg; Qreg |], "vget_high",
|
|
1077 notype_1, pf_su_8_64;
|
|
1078 Vget_low, [Instruction_name ["vmov"];
|
|
1079 Disassembles_as [Use_operands [| Dreg; Dreg |]]],
|
|
1080 Use_operands [| Dreg; Qreg |], "vget_low",
|
|
1081 notype_1, pf_su_8_64;
|
|
1082
|
|
1083 (* Conversions. *)
|
|
1084 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1,
|
|
1085 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
|
|
1086 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1,
|
|
1087 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
|
|
1088 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2,
|
|
1089 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
|
|
1090 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2,
|
|
1091 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)];
|
|
1092
|
|
1093 (* Move, narrowing. *)
|
|
1094 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]],
|
|
1095 Narrow, "vmovn", sign_invar_1, su_16_64;
|
|
1096 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating],
|
|
1097 Narrow, "vqmovn", elts_same_1, su_16_64;
|
|
1098 Vmovn,
|
|
1099 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign],
|
|
1100 Narrow, "vqmovun", dst_unsign_1,
|
|
1101 [S16; S32; S64];
|
|
1102
|
|
1103 (* Move, long. *)
|
|
1104 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]],
|
|
1105 Long, "vmovl", elts_same_1, su_8_32;
|
|
1106
|
|
1107 (* Table lookup. *)
|
|
1108 Vtbl 1,
|
|
1109 [Instruction_name ["vtbl"];
|
|
1110 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
|
|
1111 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8];
|
|
1112 Vtbl 2, [Instruction_name ["vtbl"]],
|
|
1113 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2,
|
|
1114 [U8; S8; P8];
|
|
1115 Vtbl 3, [Instruction_name ["vtbl"]],
|
|
1116 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2,
|
|
1117 [U8; S8; P8];
|
|
1118 Vtbl 4, [Instruction_name ["vtbl"]],
|
|
1119 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2,
|
|
1120 [U8; S8; P8];
|
|
1121
|
|
1122 (* Extended table lookup. *)
|
|
1123 Vtbx 1,
|
|
1124 [Instruction_name ["vtbx"];
|
|
1125 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]],
|
|
1126 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8];
|
|
1127 Vtbx 2, [Instruction_name ["vtbx"]],
|
|
1128 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io,
|
|
1129 [U8; S8; P8];
|
|
1130 Vtbx 3, [Instruction_name ["vtbx"]],
|
|
1131 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io,
|
|
1132 [U8; S8; P8];
|
|
1133 Vtbx 4, [Instruction_name ["vtbx"]],
|
|
1134 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io,
|
|
1135 [U8; S8; P8];
|
|
1136
|
|
1137 (* Multiply, lane. (note: these were undocumented at the time of
|
|
1138 writing). *)
|
|
1139 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane,
|
|
1140 [S16; S32; U16; U32; F32];
|
|
1141 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane,
|
|
1142 [S16; S32; U16; U32; F32];
|
|
1143
|
|
1144 (* Multiply-accumulate, lane. *)
|
|
1145 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane,
|
|
1146 [S16; S32; U16; U32; F32];
|
|
1147 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane,
|
|
1148 [S16; S32; U16; U32; F32];
|
|
1149 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane,
|
|
1150 [S16; S32; U16; U32];
|
|
1151 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane",
|
|
1152 elts_same_io_lane, [S16; S32];
|
|
1153
|
|
1154 (* Multiply-subtract, lane. *)
|
|
1155 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane,
|
|
1156 [S16; S32; U16; U32; F32];
|
|
1157 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane,
|
|
1158 [S16; S32; U16; U32; F32];
|
|
1159 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane,
|
|
1160 [S16; S32; U16; U32];
|
|
1161 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane",
|
|
1162 elts_same_io_lane, [S16; S32];
|
|
1163
|
|
1164 (* Long multiply, lane. *)
|
|
1165 Vmull_lane, [],
|
|
1166 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32];
|
|
1167
|
|
1168 (* Saturating doubling long multiply, lane. *)
|
|
1169 Vqdmull_lane, [Saturating; Doubling],
|
|
1170 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32];
|
|
1171
|
|
1172 (* Saturating doubling long multiply high, lane. *)
|
|
1173 Vqdmulh_lane, [Saturating; Halving],
|
|
1174 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32];
|
|
1175 Vqdmulh_lane, [Saturating; Halving],
|
|
1176 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32];
|
|
1177 Vqdmulh_lane, [Saturating; Halving; Rounding;
|
|
1178 Instruction_name ["vqrdmulh"]],
|
|
1179 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32];
|
|
1180 Vqdmulh_lane, [Saturating; Halving; Rounding;
|
|
1181 Instruction_name ["vqrdmulh"]],
|
|
1182 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32];
|
|
1183
|
|
1184 (* Vector multiply by scalar. *)
|
|
1185 Vmul_n, [InfoWord;
|
|
1186 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
|
|
1187 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n",
|
|
1188 sign_invar_2, [S16; S32; U16; U32; F32];
|
|
1189 Vmul_n, [InfoWord;
|
|
1190 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
|
|
1191 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n",
|
|
1192 sign_invar_2, [S16; S32; U16; U32; F32];
|
|
1193
|
|
1194 (* Vector long multiply by scalar. *)
|
|
1195 Vmull_n, [Instruction_name ["vmull"];
|
|
1196 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]],
|
|
1197 Wide_scalar, "vmull_n",
|
|
1198 elts_same_2, [S16; S32; U16; U32];
|
|
1199
|
|
1200 (* Vector saturating doubling long multiply by scalar. *)
|
|
1201 Vqdmull_n, [Saturating; Doubling;
|
|
1202 Disassembles_as [Use_operands [| Qreg; Dreg;
|
|
1203 Element_of_dreg |]]],
|
|
1204 Wide_scalar, "vqdmull_n",
|
|
1205 elts_same_2, [S16; S32];
|
|
1206
|
|
1207 (* Vector saturating doubling long multiply high by scalar. *)
|
|
1208 Vqdmulh_n,
|
|
1209 [Saturating; Halving; InfoWord;
|
|
1210 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
|
|
1211 Use_operands [| Qreg; Qreg; Corereg |],
|
|
1212 "vqdmulhQ_n", elts_same_2, [S16; S32];
|
|
1213 Vqdmulh_n,
|
|
1214 [Saturating; Halving; InfoWord;
|
|
1215 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
|
|
1216 Use_operands [| Dreg; Dreg; Corereg |],
|
|
1217 "vqdmulh_n", elts_same_2, [S16; S32];
|
|
1218 Vqdmulh_n,
|
|
1219 [Saturating; Halving; Rounding; InfoWord;
|
|
1220 Instruction_name ["vqrdmulh"];
|
|
1221 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
|
|
1222 Use_operands [| Qreg; Qreg; Corereg |],
|
|
1223 "vqRdmulhQ_n", elts_same_2, [S16; S32];
|
|
1224 Vqdmulh_n,
|
|
1225 [Saturating; Halving; Rounding; InfoWord;
|
|
1226 Instruction_name ["vqrdmulh"];
|
|
1227 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
|
|
1228 Use_operands [| Dreg; Dreg; Corereg |],
|
|
1229 "vqRdmulh_n", elts_same_2, [S16; S32];
|
|
1230
|
|
1231 (* Vector multiply-accumulate by scalar. *)
|
|
1232 Vmla_n, [InfoWord;
|
|
1233 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
|
|
1234 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n",
|
|
1235 sign_invar_io, [S16; S32; U16; U32; F32];
|
|
1236 Vmla_n, [InfoWord;
|
|
1237 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
|
|
1238 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n",
|
|
1239 sign_invar_io, [S16; S32; U16; U32; F32];
|
|
1240 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32];
|
|
1241 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io,
|
|
1242 [S16; S32];
|
|
1243
|
|
1244 (* Vector multiply subtract by scalar. *)
|
|
1245 Vmls_n, [InfoWord;
|
|
1246 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]],
|
|
1247 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n",
|
|
1248 sign_invar_io, [S16; S32; U16; U32; F32];
|
|
1249 Vmls_n, [InfoWord;
|
|
1250 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]],
|
|
1251 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n",
|
|
1252 sign_invar_io, [S16; S32; U16; U32; F32];
|
|
1253 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32];
|
|
1254 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io,
|
|
1255 [S16; S32];
|
|
1256
|
|
1257 (* Vector extract. *)
|
|
1258 Vext, [Const_valuator (fun _ -> 0)],
|
|
1259 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend,
|
|
1260 pf_su_8_64;
|
|
1261 Vext, [Const_valuator (fun _ -> 0)],
|
|
1262 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend,
|
|
1263 pf_su_8_64;
|
|
1264
|
|
1265 (* Reverse elements. *)
|
|
1266 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32;
|
|
1267 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
|
|
1268 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16];
|
|
1269 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16];
|
|
1270 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8];
|
|
1271 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8];
|
|
1272
|
|
1273 (* Bit selection. *)
|
|
1274 Vbsl,
|
|
1275 [Instruction_name ["vbsl"; "vbit"; "vbif"];
|
|
1276 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]],
|
|
1277 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select,
|
|
1278 pf_su_8_64;
|
|
1279 Vbsl,
|
|
1280 [Instruction_name ["vbsl"; "vbit"; "vbif"];
|
|
1281 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]],
|
|
1282 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select,
|
|
1283 pf_su_8_64;
|
|
1284
|
|
1285 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards
|
|
1286 generating good code for intrinsics which return structure types --
|
|
1287 builtins work well by themselves (and understand that the values being
|
|
1288 stored on e.g. the stack also reside in registers, so can optimise the
|
|
1289 stores away entirely if the results are used immediately), but
|
|
1290 intrinsics are very much less efficient. Maybe something can be improved
|
|
1291 re: inlining, or tweaking the ABI used for intrinsics (a special call
|
|
1292 attribute?).
|
|
1293 *)
|
|
1294 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32;
|
|
1295 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32;
|
|
1296
|
|
1297 (* Zip elements. *)
|
|
1298 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32;
|
|
1299 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32;
|
|
1300
|
|
1301 (* Unzip elements. *)
|
|
1302 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32;
|
|
1303 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32;
|
|
1304
|
|
1305 (* Element/structure loads. VLD1 variants. *)
|
|
1306 Vldx 1,
|
|
1307 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
|
|
1308 CstPtrTo Corereg |]]],
|
|
1309 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1,
|
|
1310 pf_su_8_64;
|
|
1311 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
|
|
1312 CstPtrTo Corereg |]]],
|
|
1313 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1,
|
|
1314 pf_su_8_64;
|
|
1315
|
|
1316 Vldx_lane 1,
|
|
1317 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
|
|
1318 CstPtrTo Corereg |]]],
|
|
1319 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
|
|
1320 "vld1_lane", bits_3, pf_su_8_32;
|
|
1321 Vldx_lane 1,
|
|
1322 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
|
|
1323 CstPtrTo Corereg |]];
|
|
1324 Const_valuator (fun _ -> 0)],
|
|
1325 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |],
|
|
1326 "vld1_lane", bits_3, [S64; U64];
|
|
1327 Vldx_lane 1,
|
|
1328 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
|
|
1329 CstPtrTo Corereg |]]],
|
|
1330 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
|
|
1331 "vld1Q_lane", bits_3, pf_su_8_32;
|
|
1332 Vldx_lane 1,
|
|
1333 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
|
|
1334 CstPtrTo Corereg |]]],
|
|
1335 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |],
|
|
1336 "vld1Q_lane", bits_3, [S64; U64];
|
|
1337
|
|
1338 Vldx_dup 1,
|
|
1339 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg);
|
|
1340 CstPtrTo Corereg |]]],
|
|
1341 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
|
|
1342 bits_1, pf_su_8_32;
|
|
1343 Vldx_dup 1,
|
|
1344 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
|
|
1345 CstPtrTo Corereg |]]],
|
|
1346 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup",
|
|
1347 bits_1, [S64; U64];
|
|
1348 Vldx_dup 1,
|
|
1349 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg);
|
|
1350 CstPtrTo Corereg |]]],
|
|
1351 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
|
|
1352 bits_1, pf_su_8_32;
|
|
1353 Vldx_dup 1,
|
|
1354 [Disassembles_as [Use_operands [| VecArray (2, Dreg);
|
|
1355 CstPtrTo Corereg |]]],
|
|
1356 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup",
|
|
1357 bits_1, [S64; U64];
|
|
1358
|
|
1359 (* VST1 variants. *)
|
|
1360 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg);
|
|
1361 PtrTo Corereg |]]],
|
|
1362 Use_operands [| PtrTo Corereg; Dreg |], "vst1",
|
|
1363 store_1, pf_su_8_64;
|
|
1364 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
|
|
1365 PtrTo Corereg |]]],
|
|
1366 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q",
|
|
1367 store_1, pf_su_8_64;
|
|
1368
|
|
1369 Vstx_lane 1,
|
|
1370 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
|
|
1371 CstPtrTo Corereg |]]],
|
|
1372 Use_operands [| PtrTo Corereg; Dreg; Immed |],
|
|
1373 "vst1_lane", store_3, pf_su_8_32;
|
|
1374 Vstx_lane 1,
|
|
1375 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
|
|
1376 CstPtrTo Corereg |]];
|
|
1377 Const_valuator (fun _ -> 0)],
|
|
1378 Use_operands [| PtrTo Corereg; Dreg; Immed |],
|
|
1379 "vst1_lane", store_3, [U64; S64];
|
|
1380 Vstx_lane 1,
|
|
1381 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg);
|
|
1382 CstPtrTo Corereg |]]],
|
|
1383 Use_operands [| PtrTo Corereg; Qreg; Immed |],
|
|
1384 "vst1Q_lane", store_3, pf_su_8_32;
|
|
1385 Vstx_lane 1,
|
|
1386 [Disassembles_as [Use_operands [| VecArray (1, Dreg);
|
|
1387 CstPtrTo Corereg |]]],
|
|
1388 Use_operands [| PtrTo Corereg; Qreg; Immed |],
|
|
1389 "vst1Q_lane", store_3, [U64; S64];
|
|
1390
|
|
1391 (* VLD2 variants. *)
|
|
1392 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
|
|
1393 "vld2", bits_1, pf_su_8_32;
|
|
1394 Vldx 2, [Instruction_name ["vld1"]],
|
|
1395 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
|
|
1396 "vld2", bits_1, [S64; U64];
|
|
1397 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
|
|
1398 CstPtrTo Corereg |];
|
|
1399 Use_operands [| VecArray (2, Dreg);
|
|
1400 CstPtrTo Corereg |]]],
|
|
1401 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |],
|
|
1402 "vld2Q", bits_1, pf_su_8_32;
|
|
1403
|
|
1404 Vldx_lane 2,
|
|
1405 [Disassembles_as [Use_operands
|
|
1406 [| VecArray (2, Element_of_dreg);
|
|
1407 CstPtrTo Corereg |]]],
|
|
1408 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg;
|
|
1409 VecArray (2, Dreg); Immed |],
|
|
1410 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
|
|
1411 Vldx_lane 2,
|
|
1412 [Disassembles_as [Use_operands
|
|
1413 [| VecArray (2, Element_of_dreg);
|
|
1414 CstPtrTo Corereg |]]],
|
|
1415 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg;
|
|
1416 VecArray (2, Qreg); Immed |],
|
|
1417 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
|
|
1418
|
|
1419 Vldx_dup 2,
|
|
1420 [Disassembles_as [Use_operands
|
|
1421 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]],
|
|
1422 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
|
|
1423 "vld2_dup", bits_1, pf_su_8_32;
|
|
1424 Vldx_dup 2,
|
|
1425 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
|
|
1426 [| VecArray (2, Dreg); CstPtrTo Corereg |]]],
|
|
1427 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |],
|
|
1428 "vld2_dup", bits_1, [S64; U64];
|
|
1429
|
|
1430 (* VST2 variants. *)
|
|
1431 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
|
|
1432 PtrTo Corereg |]]],
|
|
1433 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
|
|
1434 store_1, pf_su_8_32;
|
|
1435 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
|
|
1436 PtrTo Corereg |]];
|
|
1437 Instruction_name ["vst1"]],
|
|
1438 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2",
|
|
1439 store_1, [S64; U64];
|
|
1440 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg);
|
|
1441 PtrTo Corereg |];
|
|
1442 Use_operands [| VecArray (2, Dreg);
|
|
1443 PtrTo Corereg |]]],
|
|
1444 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q",
|
|
1445 store_1, pf_su_8_32;
|
|
1446
|
|
1447 Vstx_lane 2,
|
|
1448 [Disassembles_as [Use_operands
|
|
1449 [| VecArray (2, Element_of_dreg);
|
|
1450 CstPtrTo Corereg |]]],
|
|
1451 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane",
|
|
1452 store_3, P8 :: P16 :: F32 :: su_8_32;
|
|
1453 Vstx_lane 2,
|
|
1454 [Disassembles_as [Use_operands
|
|
1455 [| VecArray (2, Element_of_dreg);
|
|
1456 CstPtrTo Corereg |]]],
|
|
1457 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane",
|
|
1458 store_3, [P16; F32; U16; U32; S16; S32];
|
|
1459
|
|
1460 (* VLD3 variants. *)
|
|
1461 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
|
|
1462 "vld3", bits_1, pf_su_8_32;
|
|
1463 Vldx 3, [Instruction_name ["vld1"]],
|
|
1464 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
|
|
1465 "vld3", bits_1, [S64; U64];
|
|
1466 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
|
|
1467 CstPtrTo Corereg |];
|
|
1468 Use_operands [| VecArray (3, Dreg);
|
|
1469 CstPtrTo Corereg |]]],
|
|
1470 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |],
|
|
1471 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
|
|
1472
|
|
1473 Vldx_lane 3,
|
|
1474 [Disassembles_as [Use_operands
|
|
1475 [| VecArray (3, Element_of_dreg);
|
|
1476 CstPtrTo Corereg |]]],
|
|
1477 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg;
|
|
1478 VecArray (3, Dreg); Immed |],
|
|
1479 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
|
|
1480 Vldx_lane 3,
|
|
1481 [Disassembles_as [Use_operands
|
|
1482 [| VecArray (3, Element_of_dreg);
|
|
1483 CstPtrTo Corereg |]]],
|
|
1484 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg;
|
|
1485 VecArray (3, Qreg); Immed |],
|
|
1486 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
|
|
1487
|
|
1488 Vldx_dup 3,
|
|
1489 [Disassembles_as [Use_operands
|
|
1490 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]],
|
|
1491 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
|
|
1492 "vld3_dup", bits_1, pf_su_8_32;
|
|
1493 Vldx_dup 3,
|
|
1494 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
|
|
1495 [| VecArray (3, Dreg); CstPtrTo Corereg |]]],
|
|
1496 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |],
|
|
1497 "vld3_dup", bits_1, [S64; U64];
|
|
1498
|
|
1499 (* VST3 variants. *)
|
|
1500 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
|
|
1501 PtrTo Corereg |]]],
|
|
1502 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
|
|
1503 store_1, pf_su_8_32;
|
|
1504 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
|
|
1505 PtrTo Corereg |]];
|
|
1506 Instruction_name ["vst1"]],
|
|
1507 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3",
|
|
1508 store_1, [S64; U64];
|
|
1509 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg);
|
|
1510 PtrTo Corereg |];
|
|
1511 Use_operands [| VecArray (3, Dreg);
|
|
1512 PtrTo Corereg |]]],
|
|
1513 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q",
|
|
1514 store_1, pf_su_8_32;
|
|
1515
|
|
1516 Vstx_lane 3,
|
|
1517 [Disassembles_as [Use_operands
|
|
1518 [| VecArray (3, Element_of_dreg);
|
|
1519 CstPtrTo Corereg |]]],
|
|
1520 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane",
|
|
1521 store_3, P8 :: P16 :: F32 :: su_8_32;
|
|
1522 Vstx_lane 3,
|
|
1523 [Disassembles_as [Use_operands
|
|
1524 [| VecArray (3, Element_of_dreg);
|
|
1525 CstPtrTo Corereg |]]],
|
|
1526 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane",
|
|
1527 store_3, [P16; F32; U16; U32; S16; S32];
|
|
1528
|
|
1529 (* VLD4/VST4 variants. *)
|
|
1530 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
|
|
1531 "vld4", bits_1, pf_su_8_32;
|
|
1532 Vldx 4, [Instruction_name ["vld1"]],
|
|
1533 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
|
|
1534 "vld4", bits_1, [S64; U64];
|
|
1535 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
|
|
1536 CstPtrTo Corereg |];
|
|
1537 Use_operands [| VecArray (4, Dreg);
|
|
1538 CstPtrTo Corereg |]]],
|
|
1539 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |],
|
|
1540 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32;
|
|
1541
|
|
1542 Vldx_lane 4,
|
|
1543 [Disassembles_as [Use_operands
|
|
1544 [| VecArray (4, Element_of_dreg);
|
|
1545 CstPtrTo Corereg |]]],
|
|
1546 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg;
|
|
1547 VecArray (4, Dreg); Immed |],
|
|
1548 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32;
|
|
1549 Vldx_lane 4,
|
|
1550 [Disassembles_as [Use_operands
|
|
1551 [| VecArray (4, Element_of_dreg);
|
|
1552 CstPtrTo Corereg |]]],
|
|
1553 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg;
|
|
1554 VecArray (4, Qreg); Immed |],
|
|
1555 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32];
|
|
1556
|
|
1557 Vldx_dup 4,
|
|
1558 [Disassembles_as [Use_operands
|
|
1559 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]],
|
|
1560 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
|
|
1561 "vld4_dup", bits_1, pf_su_8_32;
|
|
1562 Vldx_dup 4,
|
|
1563 [Instruction_name ["vld1"]; Disassembles_as [Use_operands
|
|
1564 [| VecArray (4, Dreg); CstPtrTo Corereg |]]],
|
|
1565 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |],
|
|
1566 "vld4_dup", bits_1, [S64; U64];
|
|
1567
|
|
1568 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
|
|
1569 PtrTo Corereg |]]],
|
|
1570 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
|
|
1571 store_1, pf_su_8_32;
|
|
1572 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
|
|
1573 PtrTo Corereg |]];
|
|
1574 Instruction_name ["vst1"]],
|
|
1575 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4",
|
|
1576 store_1, [S64; U64];
|
|
1577 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg);
|
|
1578 PtrTo Corereg |];
|
|
1579 Use_operands [| VecArray (4, Dreg);
|
|
1580 PtrTo Corereg |]]],
|
|
1581 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q",
|
|
1582 store_1, pf_su_8_32;
|
|
1583
|
|
1584 Vstx_lane 4,
|
|
1585 [Disassembles_as [Use_operands
|
|
1586 [| VecArray (4, Element_of_dreg);
|
|
1587 CstPtrTo Corereg |]]],
|
|
1588 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane",
|
|
1589 store_3, P8 :: P16 :: F32 :: su_8_32;
|
|
1590 Vstx_lane 4,
|
|
1591 [Disassembles_as [Use_operands
|
|
1592 [| VecArray (4, Element_of_dreg);
|
|
1593 CstPtrTo Corereg |]]],
|
|
1594 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane",
|
|
1595 store_3, [P16; F32; U16; U32; S16; S32];
|
|
1596
|
|
1597 (* Logical operations. And. *)
|
|
1598 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64;
|
|
1599 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64;
|
|
1600
|
|
1601 (* Or. *)
|
|
1602 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64;
|
|
1603 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64;
|
|
1604
|
|
1605 (* Eor. *)
|
|
1606 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64;
|
|
1607 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64;
|
|
1608
|
|
1609 (* Bic (And-not). *)
|
|
1610 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64;
|
|
1611 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64;
|
|
1612
|
|
1613 (* Or-not. *)
|
|
1614 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64;
|
|
1615 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64;
|
|
1616 ]
|
|
1617
|
|
1618 let reinterp =
|
|
1619 let elems = P8 :: P16 :: F32 :: su_8_64 in
|
|
1620 List.fold_right
|
|
1621 (fun convto acc ->
|
|
1622 let types = List.fold_right
|
|
1623 (fun convfrom acc ->
|
|
1624 if convfrom <> convto then
|
|
1625 Cast (convto, convfrom) :: acc
|
|
1626 else
|
|
1627 acc)
|
|
1628 elems
|
|
1629 []
|
|
1630 in
|
|
1631 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |],
|
|
1632 "vreinterpret", conv_1, types
|
|
1633 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |],
|
|
1634 "vreinterpretQ", conv_1, types in
|
|
1635 dconv :: qconv :: acc)
|
|
1636 elems
|
|
1637 []
|
|
1638
|
|
1639 (* Output routines. *)
|
|
1640
|
|
1641 let rec string_of_elt = function
|
|
1642 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64"
|
|
1643 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64"
|
|
1644 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64"
|
|
1645 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64"
|
|
1646 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16"
|
|
1647 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b
|
|
1648 | NoElts -> failwith "No elts"
|
|
1649
|
|
1650 let string_of_elt_dots elt =
|
|
1651 match elt with
|
|
1652 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b
|
|
1653 | _ -> string_of_elt elt
|
|
1654
|
|
1655 let string_of_vectype vt =
|
|
1656 let rec name affix = function
|
|
1657 T_int8x8 -> affix "int8x8"
|
|
1658 | T_int8x16 -> affix "int8x16"
|
|
1659 | T_int16x4 -> affix "int16x4"
|
|
1660 | T_int16x8 -> affix "int16x8"
|
|
1661 | T_int32x2 -> affix "int32x2"
|
|
1662 | T_int32x4 -> affix "int32x4"
|
|
1663 | T_int64x1 -> affix "int64x1"
|
|
1664 | T_int64x2 -> affix "int64x2"
|
|
1665 | T_uint8x8 -> affix "uint8x8"
|
|
1666 | T_uint8x16 -> affix "uint8x16"
|
|
1667 | T_uint16x4 -> affix "uint16x4"
|
|
1668 | T_uint16x8 -> affix "uint16x8"
|
|
1669 | T_uint32x2 -> affix "uint32x2"
|
|
1670 | T_uint32x4 -> affix "uint32x4"
|
|
1671 | T_uint64x1 -> affix "uint64x1"
|
|
1672 | T_uint64x2 -> affix "uint64x2"
|
|
1673 | T_float32x2 -> affix "float32x2"
|
|
1674 | T_float32x4 -> affix "float32x4"
|
|
1675 | T_poly8x8 -> affix "poly8x8"
|
|
1676 | T_poly8x16 -> affix "poly8x16"
|
|
1677 | T_poly16x4 -> affix "poly16x4"
|
|
1678 | T_poly16x8 -> affix "poly16x8"
|
|
1679 | T_int8 -> affix "int8"
|
|
1680 | T_int16 -> affix "int16"
|
|
1681 | T_int32 -> affix "int32"
|
|
1682 | T_int64 -> affix "int64"
|
|
1683 | T_uint8 -> affix "uint8"
|
|
1684 | T_uint16 -> affix "uint16"
|
|
1685 | T_uint32 -> affix "uint32"
|
|
1686 | T_uint64 -> affix "uint64"
|
|
1687 | T_poly8 -> affix "poly8"
|
|
1688 | T_poly16 -> affix "poly16"
|
|
1689 | T_float32 -> affix "float32"
|
|
1690 | T_immediate _ -> "const int"
|
|
1691 | T_void -> "void"
|
|
1692 | T_intQI -> "__builtin_neon_qi"
|
|
1693 | T_intHI -> "__builtin_neon_hi"
|
|
1694 | T_intSI -> "__builtin_neon_si"
|
|
1695 | T_intDI -> "__builtin_neon_di"
|
|
1696 | T_arrayof (num, base) ->
|
|
1697 let basename = name (fun x -> x) base in
|
|
1698 affix (Printf.sprintf "%sx%d" basename num)
|
|
1699 | T_ptrto x ->
|
|
1700 let basename = name affix x in
|
|
1701 Printf.sprintf "%s *" basename
|
|
1702 | T_const x ->
|
|
1703 let basename = name affix x in
|
|
1704 Printf.sprintf "const %s" basename
|
|
1705 in
|
|
1706 name (fun x -> x ^ "_t") vt
|
|
1707
|
|
1708 let string_of_inttype = function
|
|
1709 B_TImode -> "__builtin_neon_ti"
|
|
1710 | B_EImode -> "__builtin_neon_ei"
|
|
1711 | B_OImode -> "__builtin_neon_oi"
|
|
1712 | B_CImode -> "__builtin_neon_ci"
|
|
1713 | B_XImode -> "__builtin_neon_xi"
|
|
1714
|
|
1715 let string_of_mode = function
|
|
1716 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf"
|
|
1717 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si"
|
|
1718 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si"
|
|
1719 | SF -> "sf"
|
|
1720
|
|
1721 (* Use uppercase chars for letters which form part of the intrinsic name, but
|
|
1722 should be omitted from the builtin name (the info is passed in an extra
|
|
1723 argument, instead). *)
|
|
1724 let intrinsic_name name = String.lowercase name
|
|
1725
|
|
1726 (* Allow the name of the builtin to be overridden by things (e.g. Flipped)
|
|
1727 found in the features list. *)
|
|
1728 let builtin_name features name =
|
|
1729 let name = List.fold_right
|
|
1730 (fun el name ->
|
|
1731 match el with
|
|
1732 Flipped x | Builtin_name x -> x
|
|
1733 | _ -> name)
|
|
1734 features name in
|
|
1735 let islower x = let str = String.make 1 x in (String.lowercase str) = str
|
|
1736 and buf = Buffer.create (String.length name) in
|
|
1737 String.iter (fun c -> if islower c then Buffer.add_char buf c) name;
|
|
1738 Buffer.contents buf
|
|
1739
|
|
1740 (* Transform an arity into a list of strings. *)
|
|
1741 let strings_of_arity a =
|
|
1742 match a with
|
|
1743 | Arity0 vt -> [string_of_vectype vt]
|
|
1744 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2]
|
|
1745 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1;
|
|
1746 string_of_vectype vt2;
|
|
1747 string_of_vectype vt3]
|
|
1748 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1;
|
|
1749 string_of_vectype vt2;
|
|
1750 string_of_vectype vt3;
|
|
1751 string_of_vectype vt4]
|
|
1752 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1;
|
|
1753 string_of_vectype vt2;
|
|
1754 string_of_vectype vt3;
|
|
1755 string_of_vectype vt4;
|
|
1756 string_of_vectype vt5]
|
|
1757
|
|
1758 (* Suffixes on the end of builtin names that are to be stripped in order
|
|
1759 to obtain the name used as an instruction. They are only stripped if
|
|
1760 preceded immediately by an underscore. *)
|
|
1761 let suffixes_to_strip = [ "n"; "lane"; "dup" ]
|
|
1762
|
|
1763 (* Get the possible names of an instruction corresponding to a "name" from the
|
|
1764 ops table. This is done by getting the equivalent builtin name and
|
|
1765 stripping any suffixes from the list at the top of this file, unless
|
|
1766 the features list presents with an Instruction_name entry, in which
|
|
1767 case that is used; or unless the features list presents with a Flipped
|
|
1768 entry, in which case that is used. If both such entries are present,
|
|
1769 the first in the list will be chosen. *)
|
|
1770 let get_insn_names features name =
|
|
1771 let names = try
|
|
1772 begin
|
|
1773 match List.find (fun feature -> match feature with
|
|
1774 Instruction_name _ -> true
|
|
1775 | Flipped _ -> true
|
|
1776 | _ -> false) features
|
|
1777 with
|
|
1778 Instruction_name names -> names
|
|
1779 | Flipped name -> [name]
|
|
1780 | _ -> assert false
|
|
1781 end
|
|
1782 with Not_found -> [builtin_name features name]
|
|
1783 in
|
|
1784 begin
|
|
1785 List.map (fun name' ->
|
|
1786 try
|
|
1787 let underscore = String.rindex name' '_' in
|
|
1788 let our_suffix = String.sub name' (underscore + 1)
|
|
1789 ((String.length name') - underscore - 1)
|
|
1790 in
|
|
1791 let rec strip remaining_suffixes =
|
|
1792 match remaining_suffixes with
|
|
1793 [] -> name'
|
|
1794 | s::ss when our_suffix = s -> String.sub name' 0 underscore
|
|
1795 | _::ss -> strip ss
|
|
1796 in
|
|
1797 strip suffixes_to_strip
|
|
1798 with (Not_found | Invalid_argument _) -> name') names
|
|
1799 end
|
|
1800
|
|
1801 (* Apply a function to each element of a list and then comma-separate
|
|
1802 the resulting strings. *)
|
|
1803 let rec commas f elts acc =
|
|
1804 match elts with
|
|
1805 [] -> acc
|
|
1806 | [elt] -> acc ^ (f elt)
|
|
1807 | elt::elts ->
|
|
1808 commas f elts (acc ^ (f elt) ^ ", ")
|
|
1809
|
|
1810 (* Given a list of features and the shape specified in the "ops" table, apply
|
|
1811 a function to each possible shape that the instruction may have.
|
|
1812 By default, this is the "shape" entry in "ops". If the features list
|
|
1813 contains a Disassembles_as entry, the shapes contained in that entry are
|
|
1814 mapped to corresponding outputs and returned in a list. If there is more
|
|
1815 than one Disassembles_as entry, only the first is used. *)
|
|
1816 let analyze_all_shapes features shape f =
|
|
1817 try
|
|
1818 match List.find (fun feature ->
|
|
1819 match feature with Disassembles_as _ -> true
|
|
1820 | _ -> false)
|
|
1821 features with
|
|
1822 Disassembles_as shapes -> List.map f shapes
|
|
1823 | _ -> assert false
|
|
1824 with Not_found -> [f shape]
|
|
1825
|