Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/arm/neon.ml @ 63:b7f97abdc517 gcc-4.6-20100522
update gcc from gcc-4.5.0 to gcc-4.6
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Mon, 24 May 2010 12:47:05 +0900 |
parents | 77e2b8dfacca |
children | f6334be47118 |
rev | line source |
---|---|
0 | 1 (* Common code for ARM NEON header file, documentation and test case |
2 generators. | |
3 | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
4 Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. |
0 | 5 Contributed by CodeSourcery. |
6 | |
7 This file is part of GCC. | |
8 | |
9 GCC is free software; you can redistribute it and/or modify it under | |
10 the terms of the GNU General Public License as published by the Free | |
11 Software Foundation; either version 3, or (at your option) any later | |
12 version. | |
13 | |
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with GCC; see the file COPYING3. If not see | |
21 <http://www.gnu.org/licenses/>. *) | |
22 | |
23 (* Shorthand types for vector elements. *) | |
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16 | |
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts | |
26 | Cast of elts * elts | NoElts | |
27 | |
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits | |
29 | ConvClass of eltclass * eltclass | NoType | |
30 | |
31 (* These vector types correspond directly to C types. *) | |
32 type vectype = T_int8x8 | T_int8x16 | |
33 | T_int16x4 | T_int16x8 | |
34 | T_int32x2 | T_int32x4 | |
35 | T_int64x1 | T_int64x2 | |
36 | T_uint8x8 | T_uint8x16 | |
37 | T_uint16x4 | T_uint16x8 | |
38 | T_uint32x2 | T_uint32x4 | |
39 | T_uint64x1 | T_uint64x2 | |
40 | T_float32x2 | T_float32x4 | |
41 | T_poly8x8 | T_poly8x16 | |
42 | T_poly16x4 | T_poly16x8 | |
43 | T_immediate of int * int | |
44 | T_int8 | T_int16 | |
45 | T_int32 | T_int64 | |
46 | T_uint8 | T_uint16 | |
47 | T_uint32 | T_uint64 | |
48 | T_poly8 | T_poly16 | |
49 | T_float32 | T_arrayof of int * vectype | |
50 | T_ptrto of vectype | T_const of vectype | |
51 | T_void | T_intQI | |
52 | T_intHI | T_intSI | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
53 | T_intDI | T_floatSF |
0 | 54 |
55 (* The meanings of the following are: | |
56 TImode : "Tetra", two registers (four words). | |
57 EImode : "hExa", three registers (six words). | |
58 OImode : "Octa", four registers (eight words). | |
59 CImode : "dodeCa", six registers (twelve words). | |
60 XImode : "heXadeca", eight registers (sixteen words). | |
61 *) | |
62 | |
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode | |
64 | |
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt | |
66 | PtrTo of shape_elt | CstPtrTo of shape_elt | |
67 (* These next ones are used only in the test generator. *) | |
68 | Element_of_dreg (* Used for "lane" variants. *) | |
69 | Element_of_qreg (* Likewise. *) | |
70 | All_elements_of_dreg (* Used for "dup" variants. *) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
71 | Alternatives of shape_elt list (* Used for multiple valid operands *) |
0 | 72 |
73 type shape_form = All of int * shape_elt | |
74 | Long | |
75 | Long_noreg of shape_elt | |
76 | Wide | |
77 | Wide_noreg of shape_elt | |
78 | Narrow | |
79 | Long_imm | |
80 | Narrow_imm | |
81 | Binary_imm of shape_elt | |
82 | Use_operands of shape_elt array | |
83 | By_scalar of shape_elt | |
84 | Unary_scalar of shape_elt | |
85 | Wide_lane | |
86 | Wide_scalar | |
87 | Pair_result of shape_elt | |
88 | |
89 type arity = Arity0 of vectype | |
90 | Arity1 of vectype * vectype | |
91 | Arity2 of vectype * vectype * vectype | |
92 | Arity3 of vectype * vectype * vectype * vectype | |
93 | Arity4 of vectype * vectype * vectype * vectype * vectype | |
94 | |
95 type vecmode = V8QI | V4HI | V2SI | V2SF | DI | |
96 | V16QI | V8HI | V4SI | V4SF | V2DI | |
97 | QI | HI | SI | SF | |
98 | |
99 type opcode = | |
100 (* Binary ops. *) | |
101 Vadd | |
102 | Vmul | |
103 | Vmla | |
104 | Vmls | |
105 | Vsub | |
106 | Vceq | |
107 | Vcge | |
108 | Vcgt | |
109 | Vcle | |
110 | Vclt | |
111 | Vcage | |
112 | Vcagt | |
113 | Vcale | |
114 | Vcalt | |
115 | Vtst | |
116 | Vabd | |
117 | Vaba | |
118 | Vmax | |
119 | Vmin | |
120 | Vpadd | |
121 | Vpada | |
122 | Vpmax | |
123 | Vpmin | |
124 | Vrecps | |
125 | Vrsqrts | |
126 | Vshl | |
127 | Vshr_n | |
128 | Vshl_n | |
129 | Vsra_n | |
130 | Vsri | |
131 | Vsli | |
132 (* Logic binops. *) | |
133 | Vand | |
134 | Vorr | |
135 | Veor | |
136 | Vbic | |
137 | Vorn | |
138 | Vbsl | |
139 (* Ops with scalar. *) | |
140 | Vmul_lane | |
141 | Vmla_lane | |
142 | Vmls_lane | |
143 | Vmul_n | |
144 | Vmla_n | |
145 | Vmls_n | |
146 | Vmull_n | |
147 | Vmull_lane | |
148 | Vqdmull_n | |
149 | Vqdmull_lane | |
150 | Vqdmulh_n | |
151 | Vqdmulh_lane | |
152 (* Unary ops. *) | |
153 | Vabs | |
154 | Vneg | |
155 | Vcls | |
156 | Vclz | |
157 | Vcnt | |
158 | Vrecpe | |
159 | Vrsqrte | |
160 | Vmvn | |
161 (* Vector extract. *) | |
162 | Vext | |
163 (* Reverse elements. *) | |
164 | Vrev64 | |
165 | Vrev32 | |
166 | Vrev16 | |
167 (* Transposition ops. *) | |
168 | Vtrn | |
169 | Vzip | |
170 | Vuzp | |
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *) | |
172 | Vldx of int | |
173 | Vstx of int | |
174 | Vldx_lane of int | |
175 | Vldx_dup of int | |
176 | Vstx_lane of int | |
177 (* Set/extract lanes from a vector. *) | |
178 | Vget_lane | |
179 | Vset_lane | |
180 (* Initialize vector from bit pattern. *) | |
181 | Vcreate | |
182 (* Set all lanes to same value. *) | |
183 | Vdup_n | |
184 | Vmov_n (* Is this the same? *) | |
185 (* Duplicate scalar to all lanes of vector. *) | |
186 | Vdup_lane | |
187 (* Combine vectors. *) | |
188 | Vcombine | |
189 (* Get quadword high/low parts. *) | |
190 | Vget_high | |
191 | Vget_low | |
192 (* Convert vectors. *) | |
193 | Vcvt | |
194 | Vcvt_n | |
195 (* Narrow/lengthen vectors. *) | |
196 | Vmovn | |
197 | Vmovl | |
198 (* Table lookup. *) | |
199 | Vtbl of int | |
200 | Vtbx of int | |
201 (* Reinterpret casts. *) | |
202 | Vreinterp | |
203 | |
204 (* Features used for documentation, to distinguish between some instruction | |
205 variants, and to signal special requirements (e.g. swapping arguments). *) | |
206 | |
207 type features = | |
208 Halving | |
209 | Rounding | |
210 | Saturating | |
211 | Dst_unsign | |
212 | High_half | |
213 | Doubling | |
214 | Flipped of string (* Builtin name to use with flipped arguments. *) | |
215 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed | |
216 for All _, Long, Wide, Narrow shape_forms. *) | |
217 | ReturnPtr (* Pass explicit pointer to return value as first argument. *) | |
218 (* A specification as to the shape of instruction expected upon | |
219 disassembly, used if it differs from the shape used to build the | |
220 intrinsic prototype. Multiple entries in the constructor's argument | |
221 indicate that the intrinsic expands to more than one assembly | |
222 instruction, each with a corresponding shape specified here. *) | |
223 | Disassembles_as of shape_form list | |
224 | Builtin_name of string (* Override the name of the builtin. *) | |
225 (* Override the name of the instruction. If more than one name | |
226 is specified, it means that the instruction can have any of those | |
227 names. *) | |
228 | Instruction_name of string list | |
229 (* Mark that the intrinsic yields no instructions, or expands to yield | |
230 behavior that the test generator cannot test. *) | |
231 | No_op | |
232 (* Mark that the intrinsic has constant arguments that cannot be set | |
233 to the defaults (zero for pointers and one otherwise) in the test | |
234 cases. The function supplied must return the integer to be written | |
235 into the testcase for the argument number (0-based) supplied to it. *) | |
236 | Const_valuator of (int -> int) | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
237 | Fixed_return_reg |
0 | 238 |
239 exception MixedMode of elts * elts | |
240 | |
241 let rec elt_width = function | |
242 S8 | U8 | P8 | I8 | B8 -> 8 | |
243 | S16 | U16 | P16 | I16 | B16 -> 16 | |
244 | S32 | F32 | U32 | I32 | B32 -> 32 | |
245 | S64 | U64 | I64 | B64 -> 64 | |
246 | Conv (a, b) -> | |
247 let wa = elt_width a and wb = elt_width b in | |
248 if wa = wb then wa else failwith "element width?" | |
249 | Cast (a, b) -> raise (MixedMode (a, b)) | |
250 | NoElts -> failwith "No elts" | |
251 | |
252 let rec elt_class = function | |
253 S8 | S16 | S32 | S64 -> Signed | |
254 | U8 | U16 | U32 | U64 -> Unsigned | |
255 | P8 | P16 -> Poly | |
256 | F32 -> Float | |
257 | I8 | I16 | I32 | I64 -> Int | |
258 | B8 | B16 | B32 | B64 -> Bits | |
259 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) | |
260 | NoElts -> NoType | |
261 | |
262 let elt_of_class_width c w = | |
263 match c, w with | |
264 Signed, 8 -> S8 | |
265 | Signed, 16 -> S16 | |
266 | Signed, 32 -> S32 | |
267 | Signed, 64 -> S64 | |
268 | Float, 32 -> F32 | |
269 | Unsigned, 8 -> U8 | |
270 | Unsigned, 16 -> U16 | |
271 | Unsigned, 32 -> U32 | |
272 | Unsigned, 64 -> U64 | |
273 | Poly, 8 -> P8 | |
274 | Poly, 16 -> P16 | |
275 | Int, 8 -> I8 | |
276 | Int, 16 -> I16 | |
277 | Int, 32 -> I32 | |
278 | Int, 64 -> I64 | |
279 | Bits, 8 -> B8 | |
280 | Bits, 16 -> B16 | |
281 | Bits, 32 -> B32 | |
282 | Bits, 64 -> B64 | |
283 | _ -> failwith "Bad element type" | |
284 | |
285 (* Return unsigned integer element the same width as argument. *) | |
286 let unsigned_of_elt elt = | |
287 elt_of_class_width Unsigned (elt_width elt) | |
288 | |
289 let signed_of_elt elt = | |
290 elt_of_class_width Signed (elt_width elt) | |
291 | |
292 (* Return untyped bits element the same width as argument. *) | |
293 let bits_of_elt elt = | |
294 elt_of_class_width Bits (elt_width elt) | |
295 | |
296 let non_signed_variant = function | |
297 S8 -> I8 | |
298 | S16 -> I16 | |
299 | S32 -> I32 | |
300 | S64 -> I64 | |
301 | U8 -> I8 | |
302 | U16 -> I16 | |
303 | U32 -> I32 | |
304 | U64 -> I64 | |
305 | x -> x | |
306 | |
307 let poly_unsigned_variant v = | |
308 let elclass = match elt_class v with | |
309 Poly -> Unsigned | |
310 | x -> x in | |
311 elt_of_class_width elclass (elt_width v) | |
312 | |
313 let widen_elt elt = | |
314 let w = elt_width elt | |
315 and c = elt_class elt in | |
316 elt_of_class_width c (w * 2) | |
317 | |
318 let narrow_elt elt = | |
319 let w = elt_width elt | |
320 and c = elt_class elt in | |
321 elt_of_class_width c (w / 2) | |
322 | |
323 (* If we're trying to find a mode from a "Use_operands" instruction, use the | |
324 last vector operand as the dominant mode used to invoke the correct builtin. | |
325 We must stick to this rule in neon.md. *) | |
326 let find_key_operand operands = | |
327 let rec scan opno = | |
328 match operands.(opno) with | |
329 Qreg -> Qreg | |
330 | Dreg -> Dreg | |
331 | VecArray (_, Qreg) -> Qreg | |
332 | VecArray (_, Dreg) -> Dreg | |
333 | _ -> scan (opno-1) | |
334 in | |
335 scan ((Array.length operands) - 1) | |
336 | |
337 let rec mode_of_elt elt shape = | |
338 let flt = match elt_class elt with | |
339 Float | ConvClass(_, Float) -> true | _ -> false in | |
340 let idx = | |
341 match elt_width elt with | |
342 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | |
343 | _ -> failwith "Bad element width" | |
344 in match shape with | |
345 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg | |
346 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> | |
347 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx) | |
348 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg | |
349 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> | |
350 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx) | |
351 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> | |
352 [| QI; HI; if flt then SF else SI; DI |].(idx) | |
353 | Long | Wide | Wide_lane | Wide_scalar | |
354 | Long_imm -> | |
355 [| V8QI; V4HI; V2SI; DI |].(idx) | |
356 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) | |
357 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops))) | |
358 | _ -> failwith "invalid shape" | |
359 | |
360 (* Modify an element type dependent on the shape of the instruction and the | |
361 operand number. *) | |
362 | |
363 let shapemap shape no = | |
364 let ident = fun x -> x in | |
365 match shape with | |
366 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _ | |
367 | Binary_imm _ -> ident | |
368 | Long | Long_noreg _ | Wide_scalar | Long_imm -> | |
369 [| widen_elt; ident; ident |].(no) | |
370 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no) | |
371 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no) | |
372 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no) | |
373 | |
374 (* Register type (D/Q) of an operand, based on shape and operand number. *) | |
375 | |
376 let regmap shape no = | |
377 match shape with | |
378 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg | |
379 | Long -> [| Qreg; Dreg; Dreg |].(no) | |
380 | Wide -> [| Qreg; Qreg; Dreg |].(no) | |
381 | Narrow -> [| Dreg; Qreg; Qreg |].(no) | |
382 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no) | |
383 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no) | |
384 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no) | |
385 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no) | |
386 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no) | |
387 | Binary_imm reg -> [| reg; reg; Immed |].(no) | |
388 | Long_imm -> [| Qreg; Dreg; Immed |].(no) | |
389 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no) | |
390 | Use_operands these -> these.(no) | |
391 | |
392 let type_for_elt shape elt no = | |
393 let elt = (shapemap shape no) elt in | |
394 let reg = regmap shape no in | |
395 let rec type_for_reg_elt reg elt = | |
396 match reg with | |
397 Dreg -> | |
398 begin match elt with | |
399 S8 -> T_int8x8 | |
400 | S16 -> T_int16x4 | |
401 | S32 -> T_int32x2 | |
402 | S64 -> T_int64x1 | |
403 | U8 -> T_uint8x8 | |
404 | U16 -> T_uint16x4 | |
405 | U32 -> T_uint32x2 | |
406 | U64 -> T_uint64x1 | |
407 | F32 -> T_float32x2 | |
408 | P8 -> T_poly8x8 | |
409 | P16 -> T_poly16x4 | |
410 | _ -> failwith "Bad elt type" | |
411 end | |
412 | Qreg -> | |
413 begin match elt with | |
414 S8 -> T_int8x16 | |
415 | S16 -> T_int16x8 | |
416 | S32 -> T_int32x4 | |
417 | S64 -> T_int64x2 | |
418 | U8 -> T_uint8x16 | |
419 | U16 -> T_uint16x8 | |
420 | U32 -> T_uint32x4 | |
421 | U64 -> T_uint64x2 | |
422 | F32 -> T_float32x4 | |
423 | P8 -> T_poly8x16 | |
424 | P16 -> T_poly16x8 | |
425 | _ -> failwith "Bad elt type" | |
426 end | |
427 | Corereg -> | |
428 begin match elt with | |
429 S8 -> T_int8 | |
430 | S16 -> T_int16 | |
431 | S32 -> T_int32 | |
432 | S64 -> T_int64 | |
433 | U8 -> T_uint8 | |
434 | U16 -> T_uint16 | |
435 | U32 -> T_uint32 | |
436 | U64 -> T_uint64 | |
437 | P8 -> T_poly8 | |
438 | P16 -> T_poly16 | |
439 | F32 -> T_float32 | |
440 | _ -> failwith "Bad elt type" | |
441 end | |
442 | Immed -> | |
443 T_immediate (0, 0) | |
444 | VecArray (num, sub) -> | |
445 T_arrayof (num, type_for_reg_elt sub elt) | |
446 | PtrTo x -> | |
447 T_ptrto (type_for_reg_elt x elt) | |
448 | CstPtrTo x -> | |
449 T_ptrto (T_const (type_for_reg_elt x elt)) | |
450 (* Anything else is solely for the use of the test generator. *) | |
451 | _ -> assert false | |
452 in | |
453 type_for_reg_elt reg elt | |
454 | |
455 (* Return size of a vector type, in bits. *) | |
456 let vectype_size = function | |
457 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 | |
458 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 | |
459 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64 | |
460 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 | |
461 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 | |
462 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128 | |
463 | _ -> raise Not_found | |
464 | |
465 let inttype_for_array num elttype = | |
466 let eltsize = vectype_size elttype in | |
467 let numwords = (num * eltsize) / 32 in | |
468 match numwords with | |
469 4 -> B_TImode | |
470 | 6 -> B_EImode | |
471 | 8 -> B_OImode | |
472 | 12 -> B_CImode | |
473 | 16 -> B_XImode | |
474 | _ -> failwith ("no int type for size " ^ string_of_int numwords) | |
475 | |
476 (* These functions return pairs of (internal, external) types, where "internal" | |
477 types are those seen by GCC, and "external" are those seen by the assembler. | |
478 These types aren't necessarily the same, since the intrinsics can munge more | |
479 than one C type into each assembler opcode. *) | |
480 | |
481 let make_sign_invariant func shape elt = | |
482 let arity, elt' = func shape elt in | |
483 arity, non_signed_variant elt' | |
484 | |
485 (* Don't restrict any types. *) | |
486 | |
487 let elts_same make_arity shape elt = | |
488 let vtype = type_for_elt shape elt in | |
489 make_arity vtype, elt | |
490 | |
491 (* As sign_invar_*, but when sign matters. *) | |
492 let elts_same_io_lane = | |
493 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3)) | |
494 | |
495 let elts_same_io = | |
496 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2)) | |
497 | |
498 let elts_same_2_lane = | |
499 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3)) | |
500 | |
501 let elts_same_3 = elts_same_2_lane | |
502 | |
503 let elts_same_2 = | |
504 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2)) | |
505 | |
506 let elts_same_1 = | |
507 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1)) | |
508 | |
509 (* Use for signed/unsigned invariant operations (i.e. where the operation | |
510 doesn't depend on the sign of the data. *) | |
511 | |
512 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane | |
513 let sign_invar_io = make_sign_invariant elts_same_io | |
514 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane | |
515 let sign_invar_2 = make_sign_invariant elts_same_2 | |
516 let sign_invar_1 = make_sign_invariant elts_same_1 | |
517 | |
518 (* Sign-sensitive comparison. *) | |
519 | |
520 let cmp_sign_matters shape elt = | |
521 let vtype = type_for_elt shape elt | |
522 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in | |
523 Arity2 (rtype, vtype 1, vtype 2), elt | |
524 | |
525 (* Signed/unsigned invariant comparison. *) | |
526 | |
527 let cmp_sign_invar shape elt = | |
528 let shape', elt' = cmp_sign_matters shape elt in | |
529 let elt'' = | |
530 match non_signed_variant elt' with | |
531 P8 -> I8 | |
532 | x -> x | |
533 in | |
534 shape', elt'' | |
535 | |
536 (* Comparison (VTST) where only the element width matters. *) | |
537 | |
538 let cmp_bits shape elt = | |
539 let vtype = type_for_elt shape elt | |
540 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 | |
541 and bits_only = bits_of_elt elt in | |
542 Arity2 (rtype, vtype 1, vtype 2), bits_only | |
543 | |
544 let reg_shift shape elt = | |
545 let vtype = type_for_elt shape elt | |
546 and op2type = type_for_elt shape (signed_of_elt elt) 2 in | |
547 Arity2 (vtype 0, vtype 1, op2type), elt | |
548 | |
549 (* Genericised constant-shift type-generating function. *) | |
550 | |
551 let const_shift mkimm ?arity ?result shape elt = | |
552 let op2type = (shapemap shape 2) elt in | |
553 let op2width = elt_width op2type in | |
554 let op2 = mkimm op2width | |
555 and op1 = type_for_elt shape elt 1 | |
556 and r_elt = | |
557 match result with | |
558 None -> elt | |
559 | Some restriction -> restriction elt in | |
560 let rtype = type_for_elt shape r_elt 0 in | |
561 match arity with | |
562 None -> Arity2 (rtype, op1, op2), elt | |
563 | Some mkarity -> mkarity rtype op1 op2, elt | |
564 | |
565 (* Use for immediate right-shifts. *) | |
566 | |
567 let shift_right shape elt = | |
568 const_shift (fun imm -> T_immediate (1, imm)) shape elt | |
569 | |
570 let shift_right_acc shape elt = | |
571 const_shift (fun imm -> T_immediate (1, imm)) | |
572 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt | |
573 | |
574 (* Use for immediate right-shifts when the operation doesn't care about | |
575 signedness. *) | |
576 | |
577 let shift_right_sign_invar = | |
578 make_sign_invariant shift_right | |
579 | |
580 (* Immediate right-shift; result is unsigned even when operand is signed. *) | |
581 | |
582 let shift_right_to_uns shape elt = | |
583 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt | |
584 shape elt | |
585 | |
586 (* Immediate left-shift. *) | |
587 | |
588 let shift_left shape elt = | |
589 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt | |
590 | |
591 (* Immediate left-shift, unsigned result. *) | |
592 | |
593 let shift_left_to_uns shape elt = | |
594 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt | |
595 shape elt | |
596 | |
597 (* Immediate left-shift, don't care about signs. *) | |
598 | |
599 let shift_left_sign_invar = | |
600 make_sign_invariant shift_left | |
601 | |
602 (* Shift left/right and insert: only element size matters. *) | |
603 | |
604 let shift_insert shape elt = | |
605 let arity, elt = | |
606 const_shift (fun imm -> T_immediate (1, imm)) | |
607 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in | |
608 arity, bits_of_elt elt | |
609 | |
610 (* Get/set lane. *) | |
611 | |
612 let get_lane shape elt = | |
613 let vtype = type_for_elt shape elt in | |
614 Arity2 (vtype 0, vtype 1, vtype 2), | |
615 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) | |
616 | |
617 let set_lane shape elt = | |
618 let vtype = type_for_elt shape elt in | |
619 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt | |
620 | |
621 let set_lane_notype shape elt = | |
622 let vtype = type_for_elt shape elt in | |
623 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts | |
624 | |
625 let create_vector shape elt = | |
626 let vtype = type_for_elt shape U64 1 | |
627 and rtype = type_for_elt shape elt 0 in | |
628 Arity1 (rtype, vtype), elt | |
629 | |
630 let conv make_arity shape elt = | |
631 let edest, esrc = match elt with | |
632 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc | |
633 | _ -> failwith "Non-conversion element in conversion" in | |
634 let vtype = type_for_elt shape esrc | |
635 and rtype = type_for_elt shape edest 0 in | |
636 make_arity rtype vtype, elt | |
637 | |
638 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1)) | |
639 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2)) | |
640 | |
641 (* Operation has an unsigned result even if operands are signed. *) | |
642 | |
643 let dst_unsign make_arity shape elt = | |
644 let vtype = type_for_elt shape elt | |
645 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in | |
646 make_arity rtype vtype, elt | |
647 | |
648 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1)) | |
649 | |
650 let make_bits_only func shape elt = | |
651 let arity, elt' = func shape elt in | |
652 arity, bits_of_elt elt' | |
653 | |
654 (* Extend operation. *) | |
655 | |
656 let extend shape elt = | |
657 let vtype = type_for_elt shape elt in | |
658 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt | |
659 | |
660 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned | |
661 integer ops respectively, or unsigned for polynomial ops. *) | |
662 | |
663 let table mkarity shape elt = | |
664 let vtype = type_for_elt shape elt in | |
665 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in | |
666 mkarity vtype op2, bits_of_elt elt | |
667 | |
668 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2)) | |
669 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2)) | |
670 | |
671 (* Operations where only bits matter. *) | |
672 | |
673 let bits_1 = make_bits_only elts_same_1 | |
674 let bits_2 = make_bits_only elts_same_2 | |
675 let bits_3 = make_bits_only elts_same_3 | |
676 | |
677 (* Store insns. *) | |
678 let store_1 shape elt = | |
679 let vtype = type_for_elt shape elt in | |
680 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt | |
681 | |
682 let store_3 shape elt = | |
683 let vtype = type_for_elt shape elt in | |
684 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt | |
685 | |
686 let make_notype func shape elt = | |
687 let arity, _ = func shape elt in | |
688 arity, NoElts | |
689 | |
690 let notype_1 = make_notype elts_same_1 | |
691 let notype_2 = make_notype elts_same_2 | |
692 let notype_3 = make_notype elts_same_3 | |
693 | |
694 (* Bit-select operations (first operand is unsigned int). *) | |
695 | |
696 let bit_select shape elt = | |
697 let vtype = type_for_elt shape elt | |
698 and itype = type_for_elt shape (unsigned_of_elt elt) in | |
699 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts | |
700 | |
701 (* Common lists of supported element types. *) | |
702 | |
703 let su_8_32 = [S8; S16; S32; U8; U16; U32] | |
704 let su_8_64 = S64 :: U64 :: su_8_32 | |
705 let su_16_64 = [S16; S32; S64; U16; U32; U64] | |
706 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32 | |
707 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 | |
708 | |
709 let ops = | |
710 [ | |
711 (* Addition. *) | |
712 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64; | |
713 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; | |
714 Vadd, [], Long, "vaddl", elts_same_2, su_8_32; | |
715 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; | |
716 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32; | |
717 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32; | |
718 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], | |
719 All (3, Dreg), "vRhadd", elts_same_2, su_8_32; | |
720 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], | |
721 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32; | |
722 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64; | |
723 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64; | |
724 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64; | |
725 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half], | |
726 Narrow, "vRaddhn", sign_invar_2, su_16_64; | |
727 | |
728 (* Multiplication. *) | |
729 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32; | |
730 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32; | |
731 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh", | |
732 elts_same_2, [S16; S32]; | |
733 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ", | |
734 elts_same_2, [S16; S32]; | |
735 Vmul, | |
736 [Saturating; Rounding; Doubling; High_half; | |
737 Instruction_name ["vqrdmulh"]], | |
738 All (3, Dreg), "vqRdmulh", | |
739 elts_same_2, [S16; S32]; | |
740 Vmul, | |
741 [Saturating; Rounding; Doubling; High_half; | |
742 Instruction_name ["vqrdmulh"]], | |
743 All (3, Qreg), "vqRdmulhQ", | |
744 elts_same_2, [S16; S32]; | |
745 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32; | |
746 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32]; | |
747 | |
748 (* Multiply-accumulate. *) | |
749 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32; | |
750 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32; | |
751 Vmla, [], Long, "vmlal", elts_same_io, su_8_32; | |
752 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32]; | |
753 | |
754 (* Multiply-subtract. *) | |
755 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32; | |
756 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32; | |
757 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; | |
758 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; | |
759 | |
760 (* Subtraction. *) | |
761 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64; | |
762 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; | |
763 Vsub, [], Long, "vsubl", elts_same_2, su_8_32; | |
764 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; | |
765 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32; | |
766 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32; | |
767 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64; | |
768 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64; | |
769 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64; | |
770 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half], | |
771 Narrow, "vRsubhn", sign_invar_2, su_16_64; | |
772 | |
773 (* Comparison, equal. *) | |
774 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32; | |
775 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; | |
776 | |
777 (* Comparison, greater-than or equal. *) | |
778 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32; | |
779 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32; | |
780 | |
781 (* Comparison, less-than or equal. *) | |
782 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, | |
783 F32 :: su_8_32; | |
784 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], | |
785 All (3, Qreg), "vcleQ", cmp_sign_matters, | |
786 F32 :: su_8_32; | |
787 | |
788 (* Comparison, greater-than. *) | |
789 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32; | |
790 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32; | |
791 | |
792 (* Comparison, less-than. *) | |
793 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, | |
794 F32 :: su_8_32; | |
795 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], | |
796 All (3, Qreg), "vcltQ", cmp_sign_matters, | |
797 F32 :: su_8_32; | |
798 | |
799 (* Compare absolute greater-than or equal. *) | |
800 Vcage, [Instruction_name ["vacge"]], | |
801 All (3, Dreg), "vcage", cmp_sign_matters, [F32]; | |
802 Vcage, [Instruction_name ["vacge"]], | |
803 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32]; | |
804 | |
805 (* Compare absolute less-than or equal. *) | |
806 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"], | |
807 All (3, Dreg), "vcale", cmp_sign_matters, [F32]; | |
808 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"], | |
809 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32]; | |
810 | |
811 (* Compare absolute greater-than or equal. *) | |
812 Vcagt, [Instruction_name ["vacgt"]], | |
813 All (3, Dreg), "vcagt", cmp_sign_matters, [F32]; | |
814 Vcagt, [Instruction_name ["vacgt"]], | |
815 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32]; | |
816 | |
817 (* Compare absolute less-than or equal. *) | |
818 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"], | |
819 All (3, Dreg), "vcalt", cmp_sign_matters, [F32]; | |
820 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"], | |
821 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32]; | |
822 | |
823 (* Test bits. *) | |
824 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32; | |
825 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32; | |
826 | |
827 (* Absolute difference. *) | |
828 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32; | |
829 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32; | |
830 Vabd, [], Long, "vabdl", elts_same_2, su_8_32; | |
831 | |
832 (* Absolute difference and accumulate. *) | |
833 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32; | |
834 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32; | |
835 Vaba, [], Long, "vabal", elts_same_io, su_8_32; | |
836 | |
837 (* Max. *) | |
838 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32; | |
839 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32; | |
840 | |
841 (* Min. *) | |
842 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32; | |
843 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32; | |
844 | |
845 (* Pairwise add. *) | |
846 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32; | |
847 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32; | |
848 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32; | |
849 | |
850 (* Pairwise add, widen and accumulate. *) | |
851 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32; | |
852 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32; | |
853 | |
854 (* Folding maximum, minimum. *) | |
855 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32; | |
856 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32; | |
857 | |
858 (* Reciprocal step. *) | |
859 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32]; | |
860 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32]; | |
861 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32]; | |
862 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32]; | |
863 | |
864 (* Vector shift left. *) | |
865 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64; | |
866 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64; | |
867 Vshl, [Instruction_name ["vrshl"]; Rounding], | |
868 All (3, Dreg), "vRshl", reg_shift, su_8_64; | |
869 Vshl, [Instruction_name ["vrshl"]; Rounding], | |
870 All (3, Qreg), "vRshlQ", reg_shift, su_8_64; | |
871 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64; | |
872 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64; | |
873 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], | |
874 All (3, Dreg), "vqRshl", reg_shift, su_8_64; | |
875 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], | |
876 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64; | |
877 | |
878 (* Vector shift right by constant. *) | |
879 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64; | |
880 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64; | |
881 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg, | |
882 "vRshr_n", shift_right, su_8_64; | |
883 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg, | |
884 "vRshrQ_n", shift_right, su_8_64; | |
885 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64; | |
886 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n", | |
887 shift_right_sign_invar, su_16_64; | |
888 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64; | |
889 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm, | |
890 "vqRshrn_n", shift_right, su_16_64; | |
891 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n", | |
892 shift_right_to_uns, [S16; S32; S64]; | |
893 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding], | |
894 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64]; | |
895 | |
896 (* Vector shift left by constant. *) | |
897 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64; | |
898 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64; | |
899 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64; | |
900 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64; | |
901 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n", | |
902 shift_left_to_uns, [S8; S16; S32; S64]; | |
903 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n", | |
904 shift_left_to_uns, [S8; S16; S32; S64]; | |
905 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32; | |
906 | |
907 (* Vector shift right by constant and accumulate. *) | |
908 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64; | |
909 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64; | |
910 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg, | |
911 "vRsra_n", shift_right_acc, su_8_64; | |
912 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg, | |
913 "vRsraQ_n", shift_right_acc, su_8_64; | |
914 | |
915 (* Vector shift right and insert. *) | |
916 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, | |
917 P8 :: P16 :: su_8_64; | |
918 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, | |
919 P8 :: P16 :: su_8_64; | |
920 | |
921 (* Vector shift left and insert. *) | |
922 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, | |
923 P8 :: P16 :: su_8_64; | |
924 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, | |
925 P8 :: P16 :: su_8_64; | |
926 | |
927 (* Absolute value. *) | |
928 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32]; | |
929 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32]; | |
930 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32]; | |
931 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32]; | |
932 | |
933 (* Negate. *) | |
934 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32]; | |
935 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32]; | |
936 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32]; | |
937 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32]; | |
938 | |
939 (* Bitwise not. *) | |
940 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32; | |
941 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32; | |
942 | |
943 (* Count leading sign bits. *) | |
944 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32]; | |
945 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32]; | |
946 | |
947 (* Count leading zeros. *) | |
948 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32; | |
949 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32; | |
950 | |
951 (* Count number of set bits. *) | |
952 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8]; | |
953 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8]; | |
954 | |
955 (* Reciprocal estimate. *) | |
956 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32]; | |
957 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32]; | |
958 | |
959 (* Reciprocal square-root estimate. *) | |
960 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32]; | |
961 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32]; | |
962 | |
963 (* Get lanes from a vector. *) | |
964 Vget_lane, | |
965 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; | |
966 Instruction_name ["vmov"]], | |
967 Use_operands [| Corereg; Dreg; Immed |], | |
968 "vget_lane", get_lane, pf_su_8_32; | |
969 Vget_lane, | |
970 [InfoWord; | |
971 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; | |
972 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
973 Use_operands [| Corereg; Dreg; Immed |], | |
974 "vget_lane", notype_2, [S64; U64]; | |
975 Vget_lane, | |
976 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; | |
977 Instruction_name ["vmov"]], | |
978 Use_operands [| Corereg; Qreg; Immed |], | |
979 "vgetQ_lane", get_lane, pf_su_8_32; | |
980 Vget_lane, | |
981 [InfoWord; | |
982 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; | |
983 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
984 Use_operands [| Corereg; Qreg; Immed |], | |
985 "vgetQ_lane", notype_2, [S64; U64]; | |
986 | |
987 (* Set lanes in a vector. *) | |
988 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; | |
989 Instruction_name ["vmov"]], | |
990 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", | |
991 set_lane, pf_su_8_32; | |
992 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; | |
993 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
994 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", | |
995 set_lane_notype, [S64; U64]; | |
996 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; | |
997 Instruction_name ["vmov"]], | |
998 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", | |
999 set_lane, pf_su_8_32; | |
1000 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; | |
1001 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
1002 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", | |
1003 set_lane_notype, [S64; U64]; | |
1004 | |
1005 (* Create vector from literal bit pattern. *) | |
1006 Vcreate, | |
1007 [No_op], (* Not really, but it can yield various things that are too | |
1008 hard for the test generator at this time. *) | |
1009 Use_operands [| Dreg; Corereg |], "vcreate", create_vector, | |
1010 pf_su_8_64; | |
1011 | |
1012 (* Set all lanes to the same value. *) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1013 Vdup_n, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1014 [Disassembles_as [Use_operands [| Dreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1015 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1016 Element_of_dreg ] |]]], |
0 | 1017 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, |
1018 pf_su_8_32; | |
1019 Vdup_n, | |
1020 [Instruction_name ["vmov"]; | |
1021 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], | |
1022 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, | |
1023 [S64; U64]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1024 Vdup_n, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1025 [Disassembles_as [Use_operands [| Qreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1026 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1027 Element_of_dreg ] |]]], |
0 | 1028 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, |
1029 pf_su_8_32; | |
1030 Vdup_n, | |
1031 [Instruction_name ["vmov"]; | |
1032 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; | |
1033 Use_operands [| Dreg; Corereg; Corereg |]]], | |
1034 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1, | |
1035 [S64; U64]; | |
1036 | |
1037 (* These are just aliases for the above. *) | |
1038 Vmov_n, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1039 [Builtin_name "vdup_n"; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1040 Disassembles_as [Use_operands [| Dreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1041 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1042 Element_of_dreg ] |]]], |
0 | 1043 Use_operands [| Dreg; Corereg |], |
1044 "vmov_n", bits_1, pf_su_8_32; | |
1045 Vmov_n, | |
1046 [Builtin_name "vdup_n"; | |
1047 Instruction_name ["vmov"]; | |
1048 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], | |
1049 Use_operands [| Dreg; Corereg |], | |
1050 "vmov_n", notype_1, [S64; U64]; | |
1051 Vmov_n, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1052 [Builtin_name "vdupQ_n"; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1053 Disassembles_as [Use_operands [| Qreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1054 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1055 Element_of_dreg ] |]]], |
0 | 1056 Use_operands [| Qreg; Corereg |], |
1057 "vmovQ_n", bits_1, pf_su_8_32; | |
1058 Vmov_n, | |
1059 [Builtin_name "vdupQ_n"; | |
1060 Instruction_name ["vmov"]; | |
1061 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; | |
1062 Use_operands [| Dreg; Corereg; Corereg |]]], | |
1063 Use_operands [| Qreg; Corereg |], | |
1064 "vmovQ_n", notype_1, [S64; U64]; | |
1065 | |
1066 (* Duplicate, lane version. We can't use Use_operands here because the | |
1067 rightmost register (always Dreg) would be picked up by find_key_operand, | |
1068 when we want the leftmost register to be used in this case (otherwise | |
1069 the modes are indistinguishable in neon.md, etc. *) | |
1070 Vdup_lane, | |
1071 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], | |
1072 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; | |
1073 Vdup_lane, | |
1074 [No_op; Const_valuator (fun _ -> 0)], | |
1075 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; | |
1076 Vdup_lane, | |
1077 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], | |
1078 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; | |
1079 Vdup_lane, | |
1080 [No_op; Const_valuator (fun _ -> 0)], | |
1081 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; | |
1082 | |
1083 (* Combining vectors. *) | |
1084 Vcombine, [No_op], | |
1085 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, | |
1086 pf_su_8_64; | |
1087 | |
1088 (* Splitting vectors. *) | |
1089 Vget_high, [No_op], | |
1090 Use_operands [| Dreg; Qreg |], "vget_high", | |
1091 notype_1, pf_su_8_64; | |
1092 Vget_low, [Instruction_name ["vmov"]; | |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1093 Disassembles_as [Use_operands [| Dreg; Dreg |]]; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1094 Fixed_return_reg], |
0 | 1095 Use_operands [| Dreg; Qreg |], "vget_low", |
63
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1096 notype_1, pf_su_8_32; |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1097 Vget_low, [No_op], |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1098 Use_operands [| Dreg; Qreg |], "vget_low", |
b7f97abdc517
update gcc from gcc-4.5.0 to gcc-4.6
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
55
diff
changeset
|
1099 notype_1, [S64; U64]; |
0 | 1100 |
1101 (* Conversions. *) | |
1102 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1, | |
1103 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1104 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, | |
1105 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1106 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, | |
1107 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1108 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, | |
1109 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1110 | |
1111 (* Move, narrowing. *) | |
1112 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]], | |
1113 Narrow, "vmovn", sign_invar_1, su_16_64; | |
1114 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating], | |
1115 Narrow, "vqmovn", elts_same_1, su_16_64; | |
1116 Vmovn, | |
1117 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign], | |
1118 Narrow, "vqmovun", dst_unsign_1, | |
1119 [S16; S32; S64]; | |
1120 | |
1121 (* Move, long. *) | |
1122 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]], | |
1123 Long, "vmovl", elts_same_1, su_8_32; | |
1124 | |
1125 (* Table lookup. *) | |
1126 Vtbl 1, | |
1127 [Instruction_name ["vtbl"]; | |
1128 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], | |
1129 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8]; | |
1130 Vtbl 2, [Instruction_name ["vtbl"]], | |
1131 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2, | |
1132 [U8; S8; P8]; | |
1133 Vtbl 3, [Instruction_name ["vtbl"]], | |
1134 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2, | |
1135 [U8; S8; P8]; | |
1136 Vtbl 4, [Instruction_name ["vtbl"]], | |
1137 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2, | |
1138 [U8; S8; P8]; | |
1139 | |
1140 (* Extended table lookup. *) | |
1141 Vtbx 1, | |
1142 [Instruction_name ["vtbx"]; | |
1143 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], | |
1144 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8]; | |
1145 Vtbx 2, [Instruction_name ["vtbx"]], | |
1146 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io, | |
1147 [U8; S8; P8]; | |
1148 Vtbx 3, [Instruction_name ["vtbx"]], | |
1149 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io, | |
1150 [U8; S8; P8]; | |
1151 Vtbx 4, [Instruction_name ["vtbx"]], | |
1152 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io, | |
1153 [U8; S8; P8]; | |
1154 | |
1155 (* Multiply, lane. (note: these were undocumented at the time of | |
1156 writing). *) | |
1157 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane, | |
1158 [S16; S32; U16; U32; F32]; | |
1159 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane, | |
1160 [S16; S32; U16; U32; F32]; | |
1161 | |
1162 (* Multiply-accumulate, lane. *) | |
1163 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane, | |
1164 [S16; S32; U16; U32; F32]; | |
1165 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane, | |
1166 [S16; S32; U16; U32; F32]; | |
1167 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane, | |
1168 [S16; S32; U16; U32]; | |
1169 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane", | |
1170 elts_same_io_lane, [S16; S32]; | |
1171 | |
1172 (* Multiply-subtract, lane. *) | |
1173 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane, | |
1174 [S16; S32; U16; U32; F32]; | |
1175 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane, | |
1176 [S16; S32; U16; U32; F32]; | |
1177 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane, | |
1178 [S16; S32; U16; U32]; | |
1179 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane", | |
1180 elts_same_io_lane, [S16; S32]; | |
1181 | |
1182 (* Long multiply, lane. *) | |
1183 Vmull_lane, [], | |
1184 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32]; | |
1185 | |
1186 (* Saturating doubling long multiply, lane. *) | |
1187 Vqdmull_lane, [Saturating; Doubling], | |
1188 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32]; | |
1189 | |
1190 (* Saturating doubling long multiply high, lane. *) | |
1191 Vqdmulh_lane, [Saturating; Halving], | |
1192 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32]; | |
1193 Vqdmulh_lane, [Saturating; Halving], | |
1194 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32]; | |
1195 Vqdmulh_lane, [Saturating; Halving; Rounding; | |
1196 Instruction_name ["vqrdmulh"]], | |
1197 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32]; | |
1198 Vqdmulh_lane, [Saturating; Halving; Rounding; | |
1199 Instruction_name ["vqrdmulh"]], | |
1200 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32]; | |
1201 | |
1202 (* Vector multiply by scalar. *) | |
1203 Vmul_n, [InfoWord; | |
1204 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1205 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n", | |
1206 sign_invar_2, [S16; S32; U16; U32; F32]; | |
1207 Vmul_n, [InfoWord; | |
1208 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1209 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n", | |
1210 sign_invar_2, [S16; S32; U16; U32; F32]; | |
1211 | |
1212 (* Vector long multiply by scalar. *) | |
1213 Vmull_n, [Instruction_name ["vmull"]; | |
1214 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]], | |
1215 Wide_scalar, "vmull_n", | |
1216 elts_same_2, [S16; S32; U16; U32]; | |
1217 | |
1218 (* Vector saturating doubling long multiply by scalar. *) | |
1219 Vqdmull_n, [Saturating; Doubling; | |
1220 Disassembles_as [Use_operands [| Qreg; Dreg; | |
1221 Element_of_dreg |]]], | |
1222 Wide_scalar, "vqdmull_n", | |
1223 elts_same_2, [S16; S32]; | |
1224 | |
1225 (* Vector saturating doubling long multiply high by scalar. *) | |
1226 Vqdmulh_n, | |
1227 [Saturating; Halving; InfoWord; | |
1228 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1229 Use_operands [| Qreg; Qreg; Corereg |], | |
1230 "vqdmulhQ_n", elts_same_2, [S16; S32]; | |
1231 Vqdmulh_n, | |
1232 [Saturating; Halving; InfoWord; | |
1233 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1234 Use_operands [| Dreg; Dreg; Corereg |], | |
1235 "vqdmulh_n", elts_same_2, [S16; S32]; | |
1236 Vqdmulh_n, | |
1237 [Saturating; Halving; Rounding; InfoWord; | |
1238 Instruction_name ["vqrdmulh"]; | |
1239 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1240 Use_operands [| Qreg; Qreg; Corereg |], | |
1241 "vqRdmulhQ_n", elts_same_2, [S16; S32]; | |
1242 Vqdmulh_n, | |
1243 [Saturating; Halving; Rounding; InfoWord; | |
1244 Instruction_name ["vqrdmulh"]; | |
1245 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1246 Use_operands [| Dreg; Dreg; Corereg |], | |
1247 "vqRdmulh_n", elts_same_2, [S16; S32]; | |
1248 | |
1249 (* Vector multiply-accumulate by scalar. *) | |
1250 Vmla_n, [InfoWord; | |
1251 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1252 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n", | |
1253 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1254 Vmla_n, [InfoWord; | |
1255 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1256 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n", | |
1257 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1258 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32]; | |
1259 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io, | |
1260 [S16; S32]; | |
1261 | |
1262 (* Vector multiply subtract by scalar. *) | |
1263 Vmls_n, [InfoWord; | |
1264 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1265 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n", | |
1266 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1267 Vmls_n, [InfoWord; | |
1268 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1269 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n", | |
1270 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1271 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32]; | |
1272 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io, | |
1273 [S16; S32]; | |
1274 | |
1275 (* Vector extract. *) | |
1276 Vext, [Const_valuator (fun _ -> 0)], | |
1277 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, | |
1278 pf_su_8_64; | |
1279 Vext, [Const_valuator (fun _ -> 0)], | |
1280 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, | |
1281 pf_su_8_64; | |
1282 | |
1283 (* Reverse elements. *) | |
1284 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1285 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1286 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16]; | |
1287 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16]; | |
1288 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8]; | |
1289 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8]; | |
1290 | |
1291 (* Bit selection. *) | |
1292 Vbsl, | |
1293 [Instruction_name ["vbsl"; "vbit"; "vbif"]; | |
1294 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], | |
1295 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, | |
1296 pf_su_8_64; | |
1297 Vbsl, | |
1298 [Instruction_name ["vbsl"; "vbit"; "vbif"]; | |
1299 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], | |
1300 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, | |
1301 pf_su_8_64; | |
1302 | |
1303 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards | |
1304 generating good code for intrinsics which return structure types -- | |
1305 builtins work well by themselves (and understand that the values being | |
1306 stored on e.g. the stack also reside in registers, so can optimise the | |
1307 stores away entirely if the results are used immediately), but | |
1308 intrinsics are very much less efficient. Maybe something can be improved | |
1309 re: inlining, or tweaking the ABI used for intrinsics (a special call | |
1310 attribute?). | |
1311 *) | |
1312 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32; | |
1313 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32; | |
1314 | |
1315 (* Zip elements. *) | |
1316 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32; | |
1317 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; | |
1318 | |
1319 (* Unzip elements. *) | |
1320 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32; | |
1321 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32; | |
1322 | |
1323 (* Element/structure loads. VLD1 variants. *) | |
1324 Vldx 1, | |
1325 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1326 CstPtrTo Corereg |]]], | |
1327 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, | |
1328 pf_su_8_64; | |
1329 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1330 CstPtrTo Corereg |]]], | |
1331 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, | |
1332 pf_su_8_64; | |
1333 | |
1334 Vldx_lane 1, | |
1335 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1336 CstPtrTo Corereg |]]], | |
1337 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], | |
1338 "vld1_lane", bits_3, pf_su_8_32; | |
1339 Vldx_lane 1, | |
1340 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1341 CstPtrTo Corereg |]]; | |
1342 Const_valuator (fun _ -> 0)], | |
1343 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], | |
1344 "vld1_lane", bits_3, [S64; U64]; | |
1345 Vldx_lane 1, | |
1346 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1347 CstPtrTo Corereg |]]], | |
1348 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], | |
1349 "vld1Q_lane", bits_3, pf_su_8_32; | |
1350 Vldx_lane 1, | |
1351 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1352 CstPtrTo Corereg |]]], | |
1353 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], | |
1354 "vld1Q_lane", bits_3, [S64; U64]; | |
1355 | |
1356 Vldx_dup 1, | |
1357 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg); | |
1358 CstPtrTo Corereg |]]], | |
1359 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", | |
1360 bits_1, pf_su_8_32; | |
1361 Vldx_dup 1, | |
1362 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1363 CstPtrTo Corereg |]]], | |
1364 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", | |
1365 bits_1, [S64; U64]; | |
1366 Vldx_dup 1, | |
1367 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg); | |
1368 CstPtrTo Corereg |]]], | |
1369 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", | |
1370 bits_1, pf_su_8_32; | |
1371 Vldx_dup 1, | |
1372 [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1373 CstPtrTo Corereg |]]], | |
1374 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", | |
1375 bits_1, [S64; U64]; | |
1376 | |
1377 (* VST1 variants. *) | |
1378 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1379 PtrTo Corereg |]]], | |
1380 Use_operands [| PtrTo Corereg; Dreg |], "vst1", | |
1381 store_1, pf_su_8_64; | |
1382 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1383 PtrTo Corereg |]]], | |
1384 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", | |
1385 store_1, pf_su_8_64; | |
1386 | |
1387 Vstx_lane 1, | |
1388 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1389 CstPtrTo Corereg |]]], | |
1390 Use_operands [| PtrTo Corereg; Dreg; Immed |], | |
1391 "vst1_lane", store_3, pf_su_8_32; | |
1392 Vstx_lane 1, | |
1393 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1394 CstPtrTo Corereg |]]; | |
1395 Const_valuator (fun _ -> 0)], | |
1396 Use_operands [| PtrTo Corereg; Dreg; Immed |], | |
1397 "vst1_lane", store_3, [U64; S64]; | |
1398 Vstx_lane 1, | |
1399 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1400 CstPtrTo Corereg |]]], | |
1401 Use_operands [| PtrTo Corereg; Qreg; Immed |], | |
1402 "vst1Q_lane", store_3, pf_su_8_32; | |
1403 Vstx_lane 1, | |
1404 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1405 CstPtrTo Corereg |]]], | |
1406 Use_operands [| PtrTo Corereg; Qreg; Immed |], | |
1407 "vst1Q_lane", store_3, [U64; S64]; | |
1408 | |
1409 (* VLD2 variants. *) | |
1410 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1411 "vld2", bits_1, pf_su_8_32; | |
1412 Vldx 2, [Instruction_name ["vld1"]], | |
1413 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1414 "vld2", bits_1, [S64; U64]; | |
1415 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1416 CstPtrTo Corereg |]; | |
1417 Use_operands [| VecArray (2, Dreg); | |
1418 CstPtrTo Corereg |]]], | |
1419 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |], | |
1420 "vld2Q", bits_1, pf_su_8_32; | |
1421 | |
1422 Vldx_lane 2, | |
1423 [Disassembles_as [Use_operands | |
1424 [| VecArray (2, Element_of_dreg); | |
1425 CstPtrTo Corereg |]]], | |
1426 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg; | |
1427 VecArray (2, Dreg); Immed |], | |
1428 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1429 Vldx_lane 2, | |
1430 [Disassembles_as [Use_operands | |
1431 [| VecArray (2, Element_of_dreg); | |
1432 CstPtrTo Corereg |]]], | |
1433 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg; | |
1434 VecArray (2, Qreg); Immed |], | |
1435 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1436 | |
1437 Vldx_dup 2, | |
1438 [Disassembles_as [Use_operands | |
1439 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1440 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1441 "vld2_dup", bits_1, pf_su_8_32; | |
1442 Vldx_dup 2, | |
1443 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1444 [| VecArray (2, Dreg); CstPtrTo Corereg |]]], | |
1445 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1446 "vld2_dup", bits_1, [S64; U64]; | |
1447 | |
1448 (* VST2 variants. *) | |
1449 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1450 PtrTo Corereg |]]], | |
1451 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", | |
1452 store_1, pf_su_8_32; | |
1453 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1454 PtrTo Corereg |]]; | |
1455 Instruction_name ["vst1"]], | |
1456 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", | |
1457 store_1, [S64; U64]; | |
1458 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1459 PtrTo Corereg |]; | |
1460 Use_operands [| VecArray (2, Dreg); | |
1461 PtrTo Corereg |]]], | |
1462 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q", | |
1463 store_1, pf_su_8_32; | |
1464 | |
1465 Vstx_lane 2, | |
1466 [Disassembles_as [Use_operands | |
1467 [| VecArray (2, Element_of_dreg); | |
1468 CstPtrTo Corereg |]]], | |
1469 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane", | |
1470 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1471 Vstx_lane 2, | |
1472 [Disassembles_as [Use_operands | |
1473 [| VecArray (2, Element_of_dreg); | |
1474 CstPtrTo Corereg |]]], | |
1475 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane", | |
1476 store_3, [P16; F32; U16; U32; S16; S32]; | |
1477 | |
1478 (* VLD3 variants. *) | |
1479 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1480 "vld3", bits_1, pf_su_8_32; | |
1481 Vldx 3, [Instruction_name ["vld1"]], | |
1482 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1483 "vld3", bits_1, [S64; U64]; | |
1484 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); | |
1485 CstPtrTo Corereg |]; | |
1486 Use_operands [| VecArray (3, Dreg); | |
1487 CstPtrTo Corereg |]]], | |
1488 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |], | |
1489 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1490 | |
1491 Vldx_lane 3, | |
1492 [Disassembles_as [Use_operands | |
1493 [| VecArray (3, Element_of_dreg); | |
1494 CstPtrTo Corereg |]]], | |
1495 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg; | |
1496 VecArray (3, Dreg); Immed |], | |
1497 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1498 Vldx_lane 3, | |
1499 [Disassembles_as [Use_operands | |
1500 [| VecArray (3, Element_of_dreg); | |
1501 CstPtrTo Corereg |]]], | |
1502 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg; | |
1503 VecArray (3, Qreg); Immed |], | |
1504 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1505 | |
1506 Vldx_dup 3, | |
1507 [Disassembles_as [Use_operands | |
1508 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1509 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1510 "vld3_dup", bits_1, pf_su_8_32; | |
1511 Vldx_dup 3, | |
1512 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1513 [| VecArray (3, Dreg); CstPtrTo Corereg |]]], | |
1514 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1515 "vld3_dup", bits_1, [S64; U64]; | |
1516 | |
1517 (* VST3 variants. *) | |
1518 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1519 PtrTo Corereg |]]], | |
1520 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", | |
1521 store_1, pf_su_8_32; | |
1522 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1523 PtrTo Corereg |]]; | |
1524 Instruction_name ["vst1"]], | |
1525 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", | |
1526 store_1, [S64; U64]; | |
1527 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); | |
1528 PtrTo Corereg |]; | |
1529 Use_operands [| VecArray (3, Dreg); | |
1530 PtrTo Corereg |]]], | |
1531 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q", | |
1532 store_1, pf_su_8_32; | |
1533 | |
1534 Vstx_lane 3, | |
1535 [Disassembles_as [Use_operands | |
1536 [| VecArray (3, Element_of_dreg); | |
1537 CstPtrTo Corereg |]]], | |
1538 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane", | |
1539 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1540 Vstx_lane 3, | |
1541 [Disassembles_as [Use_operands | |
1542 [| VecArray (3, Element_of_dreg); | |
1543 CstPtrTo Corereg |]]], | |
1544 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane", | |
1545 store_3, [P16; F32; U16; U32; S16; S32]; | |
1546 | |
1547 (* VLD4/VST4 variants. *) | |
1548 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1549 "vld4", bits_1, pf_su_8_32; | |
1550 Vldx 4, [Instruction_name ["vld1"]], | |
1551 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1552 "vld4", bits_1, [S64; U64]; | |
1553 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1554 CstPtrTo Corereg |]; | |
1555 Use_operands [| VecArray (4, Dreg); | |
1556 CstPtrTo Corereg |]]], | |
1557 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |], | |
1558 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1559 | |
1560 Vldx_lane 4, | |
1561 [Disassembles_as [Use_operands | |
1562 [| VecArray (4, Element_of_dreg); | |
1563 CstPtrTo Corereg |]]], | |
1564 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg; | |
1565 VecArray (4, Dreg); Immed |], | |
1566 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1567 Vldx_lane 4, | |
1568 [Disassembles_as [Use_operands | |
1569 [| VecArray (4, Element_of_dreg); | |
1570 CstPtrTo Corereg |]]], | |
1571 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg; | |
1572 VecArray (4, Qreg); Immed |], | |
1573 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1574 | |
1575 Vldx_dup 4, | |
1576 [Disassembles_as [Use_operands | |
1577 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1578 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1579 "vld4_dup", bits_1, pf_su_8_32; | |
1580 Vldx_dup 4, | |
1581 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1582 [| VecArray (4, Dreg); CstPtrTo Corereg |]]], | |
1583 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1584 "vld4_dup", bits_1, [S64; U64]; | |
1585 | |
1586 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1587 PtrTo Corereg |]]], | |
1588 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", | |
1589 store_1, pf_su_8_32; | |
1590 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1591 PtrTo Corereg |]]; | |
1592 Instruction_name ["vst1"]], | |
1593 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", | |
1594 store_1, [S64; U64]; | |
1595 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1596 PtrTo Corereg |]; | |
1597 Use_operands [| VecArray (4, Dreg); | |
1598 PtrTo Corereg |]]], | |
1599 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q", | |
1600 store_1, pf_su_8_32; | |
1601 | |
1602 Vstx_lane 4, | |
1603 [Disassembles_as [Use_operands | |
1604 [| VecArray (4, Element_of_dreg); | |
1605 CstPtrTo Corereg |]]], | |
1606 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane", | |
1607 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1608 Vstx_lane 4, | |
1609 [Disassembles_as [Use_operands | |
1610 [| VecArray (4, Element_of_dreg); | |
1611 CstPtrTo Corereg |]]], | |
1612 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane", | |
1613 store_3, [P16; F32; U16; U32; S16; S32]; | |
1614 | |
1615 (* Logical operations. And. *) | |
1616 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64; | |
1617 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64; | |
1618 | |
1619 (* Or. *) | |
1620 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64; | |
1621 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64; | |
1622 | |
1623 (* Eor. *) | |
1624 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64; | |
1625 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64; | |
1626 | |
1627 (* Bic (And-not). *) | |
1628 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64; | |
1629 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64; | |
1630 | |
1631 (* Or-not. *) | |
1632 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64; | |
1633 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64; | |
1634 ] | |
1635 | |
1636 let reinterp = | |
1637 let elems = P8 :: P16 :: F32 :: su_8_64 in | |
1638 List.fold_right | |
1639 (fun convto acc -> | |
1640 let types = List.fold_right | |
1641 (fun convfrom acc -> | |
1642 if convfrom <> convto then | |
1643 Cast (convto, convfrom) :: acc | |
1644 else | |
1645 acc) | |
1646 elems | |
1647 [] | |
1648 in | |
1649 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |], | |
1650 "vreinterpret", conv_1, types | |
1651 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |], | |
1652 "vreinterpretQ", conv_1, types in | |
1653 dconv :: qconv :: acc) | |
1654 elems | |
1655 [] | |
1656 | |
1657 (* Output routines. *) | |
1658 | |
1659 let rec string_of_elt = function | |
1660 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64" | |
1661 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" | |
1662 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" | |
1663 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" | |
1664 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" | |
1665 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b | |
1666 | NoElts -> failwith "No elts" | |
1667 | |
1668 let string_of_elt_dots elt = | |
1669 match elt with | |
1670 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b | |
1671 | _ -> string_of_elt elt | |
1672 | |
1673 let string_of_vectype vt = | |
1674 let rec name affix = function | |
1675 T_int8x8 -> affix "int8x8" | |
1676 | T_int8x16 -> affix "int8x16" | |
1677 | T_int16x4 -> affix "int16x4" | |
1678 | T_int16x8 -> affix "int16x8" | |
1679 | T_int32x2 -> affix "int32x2" | |
1680 | T_int32x4 -> affix "int32x4" | |
1681 | T_int64x1 -> affix "int64x1" | |
1682 | T_int64x2 -> affix "int64x2" | |
1683 | T_uint8x8 -> affix "uint8x8" | |
1684 | T_uint8x16 -> affix "uint8x16" | |
1685 | T_uint16x4 -> affix "uint16x4" | |
1686 | T_uint16x8 -> affix "uint16x8" | |
1687 | T_uint32x2 -> affix "uint32x2" | |
1688 | T_uint32x4 -> affix "uint32x4" | |
1689 | T_uint64x1 -> affix "uint64x1" | |
1690 | T_uint64x2 -> affix "uint64x2" | |
1691 | T_float32x2 -> affix "float32x2" | |
1692 | T_float32x4 -> affix "float32x4" | |
1693 | T_poly8x8 -> affix "poly8x8" | |
1694 | T_poly8x16 -> affix "poly8x16" | |
1695 | T_poly16x4 -> affix "poly16x4" | |
1696 | T_poly16x8 -> affix "poly16x8" | |
1697 | T_int8 -> affix "int8" | |
1698 | T_int16 -> affix "int16" | |
1699 | T_int32 -> affix "int32" | |
1700 | T_int64 -> affix "int64" | |
1701 | T_uint8 -> affix "uint8" | |
1702 | T_uint16 -> affix "uint16" | |
1703 | T_uint32 -> affix "uint32" | |
1704 | T_uint64 -> affix "uint64" | |
1705 | T_poly8 -> affix "poly8" | |
1706 | T_poly16 -> affix "poly16" | |
1707 | T_float32 -> affix "float32" | |
1708 | T_immediate _ -> "const int" | |
1709 | T_void -> "void" | |
1710 | T_intQI -> "__builtin_neon_qi" | |
1711 | T_intHI -> "__builtin_neon_hi" | |
1712 | T_intSI -> "__builtin_neon_si" | |
1713 | T_intDI -> "__builtin_neon_di" | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1714 | T_floatSF -> "__builtin_neon_sf" |
0 | 1715 | T_arrayof (num, base) -> |
1716 let basename = name (fun x -> x) base in | |
1717 affix (Printf.sprintf "%sx%d" basename num) | |
1718 | T_ptrto x -> | |
1719 let basename = name affix x in | |
1720 Printf.sprintf "%s *" basename | |
1721 | T_const x -> | |
1722 let basename = name affix x in | |
1723 Printf.sprintf "const %s" basename | |
1724 in | |
1725 name (fun x -> x ^ "_t") vt | |
1726 | |
1727 let string_of_inttype = function | |
1728 B_TImode -> "__builtin_neon_ti" | |
1729 | B_EImode -> "__builtin_neon_ei" | |
1730 | B_OImode -> "__builtin_neon_oi" | |
1731 | B_CImode -> "__builtin_neon_ci" | |
1732 | B_XImode -> "__builtin_neon_xi" | |
1733 | |
1734 let string_of_mode = function | |
1735 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf" | |
1736 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si" | |
1737 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si" | |
1738 | SF -> "sf" | |
1739 | |
1740 (* Use uppercase chars for letters which form part of the intrinsic name, but | |
1741 should be omitted from the builtin name (the info is passed in an extra | |
1742 argument, instead). *) | |
1743 let intrinsic_name name = String.lowercase name | |
1744 | |
1745 (* Allow the name of the builtin to be overridden by things (e.g. Flipped) | |
1746 found in the features list. *) | |
1747 let builtin_name features name = | |
1748 let name = List.fold_right | |
1749 (fun el name -> | |
1750 match el with | |
1751 Flipped x | Builtin_name x -> x | |
1752 | _ -> name) | |
1753 features name in | |
1754 let islower x = let str = String.make 1 x in (String.lowercase str) = str | |
1755 and buf = Buffer.create (String.length name) in | |
1756 String.iter (fun c -> if islower c then Buffer.add_char buf c) name; | |
1757 Buffer.contents buf | |
1758 | |
1759 (* Transform an arity into a list of strings. *) | |
1760 let strings_of_arity a = | |
1761 match a with | |
1762 | Arity0 vt -> [string_of_vectype vt] | |
1763 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2] | |
1764 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1; | |
1765 string_of_vectype vt2; | |
1766 string_of_vectype vt3] | |
1767 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1; | |
1768 string_of_vectype vt2; | |
1769 string_of_vectype vt3; | |
1770 string_of_vectype vt4] | |
1771 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1; | |
1772 string_of_vectype vt2; | |
1773 string_of_vectype vt3; | |
1774 string_of_vectype vt4; | |
1775 string_of_vectype vt5] | |
1776 | |
1777 (* Suffixes on the end of builtin names that are to be stripped in order | |
1778 to obtain the name used as an instruction. They are only stripped if | |
1779 preceded immediately by an underscore. *) | |
1780 let suffixes_to_strip = [ "n"; "lane"; "dup" ] | |
1781 | |
1782 (* Get the possible names of an instruction corresponding to a "name" from the | |
1783 ops table. This is done by getting the equivalent builtin name and | |
1784 stripping any suffixes from the list at the top of this file, unless | |
1785 the features list presents with an Instruction_name entry, in which | |
1786 case that is used; or unless the features list presents with a Flipped | |
1787 entry, in which case that is used. If both such entries are present, | |
1788 the first in the list will be chosen. *) | |
1789 let get_insn_names features name = | |
1790 let names = try | |
1791 begin | |
1792 match List.find (fun feature -> match feature with | |
1793 Instruction_name _ -> true | |
1794 | Flipped _ -> true | |
1795 | _ -> false) features | |
1796 with | |
1797 Instruction_name names -> names | |
1798 | Flipped name -> [name] | |
1799 | _ -> assert false | |
1800 end | |
1801 with Not_found -> [builtin_name features name] | |
1802 in | |
1803 begin | |
1804 List.map (fun name' -> | |
1805 try | |
1806 let underscore = String.rindex name' '_' in | |
1807 let our_suffix = String.sub name' (underscore + 1) | |
1808 ((String.length name') - underscore - 1) | |
1809 in | |
1810 let rec strip remaining_suffixes = | |
1811 match remaining_suffixes with | |
1812 [] -> name' | |
1813 | s::ss when our_suffix = s -> String.sub name' 0 underscore | |
1814 | _::ss -> strip ss | |
1815 in | |
1816 strip suffixes_to_strip | |
1817 with (Not_found | Invalid_argument _) -> name') names | |
1818 end | |
1819 | |
1820 (* Apply a function to each element of a list and then comma-separate | |
1821 the resulting strings. *) | |
1822 let rec commas f elts acc = | |
1823 match elts with | |
1824 [] -> acc | |
1825 | [elt] -> acc ^ (f elt) | |
1826 | elt::elts -> | |
1827 commas f elts (acc ^ (f elt) ^ ", ") | |
1828 | |
1829 (* Given a list of features and the shape specified in the "ops" table, apply | |
1830 a function to each possible shape that the instruction may have. | |
1831 By default, this is the "shape" entry in "ops". If the features list | |
1832 contains a Disassembles_as entry, the shapes contained in that entry are | |
1833 mapped to corresponding outputs and returned in a list. If there is more | |
1834 than one Disassembles_as entry, only the first is used. *) | |
1835 let analyze_all_shapes features shape f = | |
1836 try | |
1837 match List.find (fun feature -> | |
1838 match feature with Disassembles_as _ -> true | |
1839 | _ -> false) | |
1840 features with | |
1841 Disassembles_as shapes -> List.map f shapes | |
1842 | _ -> assert false | |
1843 with Not_found -> [f shape] | |
1844 |