Mercurial > hg > CbC > CbC_gcc
comparison gcc/config/arm/neon.ml @ 0:a06113de4d67
first commit
author | kent <kent@cr.ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 17 Jul 2009 14:47:48 +0900 |
parents | |
children | 77e2b8dfacca |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a06113de4d67 |
---|---|
1 (* Common code for ARM NEON header file, documentation and test case | |
2 generators. | |
3 | |
4 Copyright (C) 2006, 2007 Free Software Foundation, Inc. | |
5 Contributed by CodeSourcery. | |
6 | |
7 This file is part of GCC. | |
8 | |
9 GCC is free software; you can redistribute it and/or modify it under | |
10 the terms of the GNU General Public License as published by the Free | |
11 Software Foundation; either version 3, or (at your option) any later | |
12 version. | |
13 | |
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with GCC; see the file COPYING3. If not see | |
21 <http://www.gnu.org/licenses/>. *) | |
22 | |
23 (* Shorthand types for vector elements. *) | |
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16 | |
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts | |
26 | Cast of elts * elts | NoElts | |
27 | |
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits | |
29 | ConvClass of eltclass * eltclass | NoType | |
30 | |
31 (* These vector types correspond directly to C types. *) | |
32 type vectype = T_int8x8 | T_int8x16 | |
33 | T_int16x4 | T_int16x8 | |
34 | T_int32x2 | T_int32x4 | |
35 | T_int64x1 | T_int64x2 | |
36 | T_uint8x8 | T_uint8x16 | |
37 | T_uint16x4 | T_uint16x8 | |
38 | T_uint32x2 | T_uint32x4 | |
39 | T_uint64x1 | T_uint64x2 | |
40 | T_float32x2 | T_float32x4 | |
41 | T_poly8x8 | T_poly8x16 | |
42 | T_poly16x4 | T_poly16x8 | |
43 | T_immediate of int * int | |
44 | T_int8 | T_int16 | |
45 | T_int32 | T_int64 | |
46 | T_uint8 | T_uint16 | |
47 | T_uint32 | T_uint64 | |
48 | T_poly8 | T_poly16 | |
49 | T_float32 | T_arrayof of int * vectype | |
50 | T_ptrto of vectype | T_const of vectype | |
51 | T_void | T_intQI | |
52 | T_intHI | T_intSI | |
53 | T_intDI | |
54 | |
55 (* The meanings of the following are: | |
56 TImode : "Tetra", two registers (four words). | |
57 EImode : "hExa", three registers (six words). | |
58 OImode : "Octa", four registers (eight words). | |
59 CImode : "dodeCa", six registers (twelve words). | |
60 XImode : "heXadeca", eight registers (sixteen words). | |
61 *) | |
62 | |
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode | |
64 | |
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt | |
66 | PtrTo of shape_elt | CstPtrTo of shape_elt | |
67 (* These next ones are used only in the test generator. *) | |
68 | Element_of_dreg (* Used for "lane" variants. *) | |
69 | Element_of_qreg (* Likewise. *) | |
70 | All_elements_of_dreg (* Used for "dup" variants. *) | |
71 | |
72 type shape_form = All of int * shape_elt | |
73 | Long | |
74 | Long_noreg of shape_elt | |
75 | Wide | |
76 | Wide_noreg of shape_elt | |
77 | Narrow | |
78 | Long_imm | |
79 | Narrow_imm | |
80 | Binary_imm of shape_elt | |
81 | Use_operands of shape_elt array | |
82 | By_scalar of shape_elt | |
83 | Unary_scalar of shape_elt | |
84 | Wide_lane | |
85 | Wide_scalar | |
86 | Pair_result of shape_elt | |
87 | |
88 type arity = Arity0 of vectype | |
89 | Arity1 of vectype * vectype | |
90 | Arity2 of vectype * vectype * vectype | |
91 | Arity3 of vectype * vectype * vectype * vectype | |
92 | Arity4 of vectype * vectype * vectype * vectype * vectype | |
93 | |
94 type vecmode = V8QI | V4HI | V2SI | V2SF | DI | |
95 | V16QI | V8HI | V4SI | V4SF | V2DI | |
96 | QI | HI | SI | SF | |
97 | |
98 type opcode = | |
99 (* Binary ops. *) | |
100 Vadd | |
101 | Vmul | |
102 | Vmla | |
103 | Vmls | |
104 | Vsub | |
105 | Vceq | |
106 | Vcge | |
107 | Vcgt | |
108 | Vcle | |
109 | Vclt | |
110 | Vcage | |
111 | Vcagt | |
112 | Vcale | |
113 | Vcalt | |
114 | Vtst | |
115 | Vabd | |
116 | Vaba | |
117 | Vmax | |
118 | Vmin | |
119 | Vpadd | |
120 | Vpada | |
121 | Vpmax | |
122 | Vpmin | |
123 | Vrecps | |
124 | Vrsqrts | |
125 | Vshl | |
126 | Vshr_n | |
127 | Vshl_n | |
128 | Vsra_n | |
129 | Vsri | |
130 | Vsli | |
131 (* Logic binops. *) | |
132 | Vand | |
133 | Vorr | |
134 | Veor | |
135 | Vbic | |
136 | Vorn | |
137 | Vbsl | |
138 (* Ops with scalar. *) | |
139 | Vmul_lane | |
140 | Vmla_lane | |
141 | Vmls_lane | |
142 | Vmul_n | |
143 | Vmla_n | |
144 | Vmls_n | |
145 | Vmull_n | |
146 | Vmull_lane | |
147 | Vqdmull_n | |
148 | Vqdmull_lane | |
149 | Vqdmulh_n | |
150 | Vqdmulh_lane | |
151 (* Unary ops. *) | |
152 | Vabs | |
153 | Vneg | |
154 | Vcls | |
155 | Vclz | |
156 | Vcnt | |
157 | Vrecpe | |
158 | Vrsqrte | |
159 | Vmvn | |
160 (* Vector extract. *) | |
161 | Vext | |
162 (* Reverse elements. *) | |
163 | Vrev64 | |
164 | Vrev32 | |
165 | Vrev16 | |
166 (* Transposition ops. *) | |
167 | Vtrn | |
168 | Vzip | |
169 | Vuzp | |
170 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *) | |
171 | Vldx of int | |
172 | Vstx of int | |
173 | Vldx_lane of int | |
174 | Vldx_dup of int | |
175 | Vstx_lane of int | |
176 (* Set/extract lanes from a vector. *) | |
177 | Vget_lane | |
178 | Vset_lane | |
179 (* Initialize vector from bit pattern. *) | |
180 | Vcreate | |
181 (* Set all lanes to same value. *) | |
182 | Vdup_n | |
183 | Vmov_n (* Is this the same? *) | |
184 (* Duplicate scalar to all lanes of vector. *) | |
185 | Vdup_lane | |
186 (* Combine vectors. *) | |
187 | Vcombine | |
188 (* Get quadword high/low parts. *) | |
189 | Vget_high | |
190 | Vget_low | |
191 (* Convert vectors. *) | |
192 | Vcvt | |
193 | Vcvt_n | |
194 (* Narrow/lengthen vectors. *) | |
195 | Vmovn | |
196 | Vmovl | |
197 (* Table lookup. *) | |
198 | Vtbl of int | |
199 | Vtbx of int | |
200 (* Reinterpret casts. *) | |
201 | Vreinterp | |
202 | |
203 (* Features used for documentation, to distinguish between some instruction | |
204 variants, and to signal special requirements (e.g. swapping arguments). *) | |
205 | |
206 type features = | |
207 Halving | |
208 | Rounding | |
209 | Saturating | |
210 | Dst_unsign | |
211 | High_half | |
212 | Doubling | |
213 | Flipped of string (* Builtin name to use with flipped arguments. *) | |
214 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed | |
215 for All _, Long, Wide, Narrow shape_forms. *) | |
216 | ReturnPtr (* Pass explicit pointer to return value as first argument. *) | |
217 (* A specification as to the shape of instruction expected upon | |
218 disassembly, used if it differs from the shape used to build the | |
219 intrinsic prototype. Multiple entries in the constructor's argument | |
220 indicate that the intrinsic expands to more than one assembly | |
221 instruction, each with a corresponding shape specified here. *) | |
222 | Disassembles_as of shape_form list | |
223 | Builtin_name of string (* Override the name of the builtin. *) | |
224 (* Override the name of the instruction. If more than one name | |
225 is specified, it means that the instruction can have any of those | |
226 names. *) | |
227 | Instruction_name of string list | |
228 (* Mark that the intrinsic yields no instructions, or expands to yield | |
229 behavior that the test generator cannot test. *) | |
230 | No_op | |
231 (* Mark that the intrinsic has constant arguments that cannot be set | |
232 to the defaults (zero for pointers and one otherwise) in the test | |
233 cases. The function supplied must return the integer to be written | |
234 into the testcase for the argument number (0-based) supplied to it. *) | |
235 | Const_valuator of (int -> int) | |
236 | |
237 exception MixedMode of elts * elts | |
238 | |
239 let rec elt_width = function | |
240 S8 | U8 | P8 | I8 | B8 -> 8 | |
241 | S16 | U16 | P16 | I16 | B16 -> 16 | |
242 | S32 | F32 | U32 | I32 | B32 -> 32 | |
243 | S64 | U64 | I64 | B64 -> 64 | |
244 | Conv (a, b) -> | |
245 let wa = elt_width a and wb = elt_width b in | |
246 if wa = wb then wa else failwith "element width?" | |
247 | Cast (a, b) -> raise (MixedMode (a, b)) | |
248 | NoElts -> failwith "No elts" | |
249 | |
250 let rec elt_class = function | |
251 S8 | S16 | S32 | S64 -> Signed | |
252 | U8 | U16 | U32 | U64 -> Unsigned | |
253 | P8 | P16 -> Poly | |
254 | F32 -> Float | |
255 | I8 | I16 | I32 | I64 -> Int | |
256 | B8 | B16 | B32 | B64 -> Bits | |
257 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) | |
258 | NoElts -> NoType | |
259 | |
260 let elt_of_class_width c w = | |
261 match c, w with | |
262 Signed, 8 -> S8 | |
263 | Signed, 16 -> S16 | |
264 | Signed, 32 -> S32 | |
265 | Signed, 64 -> S64 | |
266 | Float, 32 -> F32 | |
267 | Unsigned, 8 -> U8 | |
268 | Unsigned, 16 -> U16 | |
269 | Unsigned, 32 -> U32 | |
270 | Unsigned, 64 -> U64 | |
271 | Poly, 8 -> P8 | |
272 | Poly, 16 -> P16 | |
273 | Int, 8 -> I8 | |
274 | Int, 16 -> I16 | |
275 | Int, 32 -> I32 | |
276 | Int, 64 -> I64 | |
277 | Bits, 8 -> B8 | |
278 | Bits, 16 -> B16 | |
279 | Bits, 32 -> B32 | |
280 | Bits, 64 -> B64 | |
281 | _ -> failwith "Bad element type" | |
282 | |
283 (* Return unsigned integer element the same width as argument. *) | |
284 let unsigned_of_elt elt = | |
285 elt_of_class_width Unsigned (elt_width elt) | |
286 | |
287 let signed_of_elt elt = | |
288 elt_of_class_width Signed (elt_width elt) | |
289 | |
290 (* Return untyped bits element the same width as argument. *) | |
291 let bits_of_elt elt = | |
292 elt_of_class_width Bits (elt_width elt) | |
293 | |
294 let non_signed_variant = function | |
295 S8 -> I8 | |
296 | S16 -> I16 | |
297 | S32 -> I32 | |
298 | S64 -> I64 | |
299 | U8 -> I8 | |
300 | U16 -> I16 | |
301 | U32 -> I32 | |
302 | U64 -> I64 | |
303 | x -> x | |
304 | |
305 let poly_unsigned_variant v = | |
306 let elclass = match elt_class v with | |
307 Poly -> Unsigned | |
308 | x -> x in | |
309 elt_of_class_width elclass (elt_width v) | |
310 | |
311 let widen_elt elt = | |
312 let w = elt_width elt | |
313 and c = elt_class elt in | |
314 elt_of_class_width c (w * 2) | |
315 | |
316 let narrow_elt elt = | |
317 let w = elt_width elt | |
318 and c = elt_class elt in | |
319 elt_of_class_width c (w / 2) | |
320 | |
321 (* If we're trying to find a mode from a "Use_operands" instruction, use the | |
322 last vector operand as the dominant mode used to invoke the correct builtin. | |
323 We must stick to this rule in neon.md. *) | |
324 let find_key_operand operands = | |
325 let rec scan opno = | |
326 match operands.(opno) with | |
327 Qreg -> Qreg | |
328 | Dreg -> Dreg | |
329 | VecArray (_, Qreg) -> Qreg | |
330 | VecArray (_, Dreg) -> Dreg | |
331 | _ -> scan (opno-1) | |
332 in | |
333 scan ((Array.length operands) - 1) | |
334 | |
335 let rec mode_of_elt elt shape = | |
336 let flt = match elt_class elt with | |
337 Float | ConvClass(_, Float) -> true | _ -> false in | |
338 let idx = | |
339 match elt_width elt with | |
340 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | |
341 | _ -> failwith "Bad element width" | |
342 in match shape with | |
343 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg | |
344 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> | |
345 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx) | |
346 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg | |
347 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> | |
348 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx) | |
349 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> | |
350 [| QI; HI; if flt then SF else SI; DI |].(idx) | |
351 | Long | Wide | Wide_lane | Wide_scalar | |
352 | Long_imm -> | |
353 [| V8QI; V4HI; V2SI; DI |].(idx) | |
354 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) | |
355 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops))) | |
356 | _ -> failwith "invalid shape" | |
357 | |
358 (* Modify an element type dependent on the shape of the instruction and the | |
359 operand number. *) | |
360 | |
361 let shapemap shape no = | |
362 let ident = fun x -> x in | |
363 match shape with | |
364 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _ | |
365 | Binary_imm _ -> ident | |
366 | Long | Long_noreg _ | Wide_scalar | Long_imm -> | |
367 [| widen_elt; ident; ident |].(no) | |
368 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no) | |
369 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no) | |
370 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no) | |
371 | |
372 (* Register type (D/Q) of an operand, based on shape and operand number. *) | |
373 | |
374 let regmap shape no = | |
375 match shape with | |
376 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg | |
377 | Long -> [| Qreg; Dreg; Dreg |].(no) | |
378 | Wide -> [| Qreg; Qreg; Dreg |].(no) | |
379 | Narrow -> [| Dreg; Qreg; Qreg |].(no) | |
380 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no) | |
381 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no) | |
382 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no) | |
383 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no) | |
384 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no) | |
385 | Binary_imm reg -> [| reg; reg; Immed |].(no) | |
386 | Long_imm -> [| Qreg; Dreg; Immed |].(no) | |
387 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no) | |
388 | Use_operands these -> these.(no) | |
389 | |
390 let type_for_elt shape elt no = | |
391 let elt = (shapemap shape no) elt in | |
392 let reg = regmap shape no in | |
393 let rec type_for_reg_elt reg elt = | |
394 match reg with | |
395 Dreg -> | |
396 begin match elt with | |
397 S8 -> T_int8x8 | |
398 | S16 -> T_int16x4 | |
399 | S32 -> T_int32x2 | |
400 | S64 -> T_int64x1 | |
401 | U8 -> T_uint8x8 | |
402 | U16 -> T_uint16x4 | |
403 | U32 -> T_uint32x2 | |
404 | U64 -> T_uint64x1 | |
405 | F32 -> T_float32x2 | |
406 | P8 -> T_poly8x8 | |
407 | P16 -> T_poly16x4 | |
408 | _ -> failwith "Bad elt type" | |
409 end | |
410 | Qreg -> | |
411 begin match elt with | |
412 S8 -> T_int8x16 | |
413 | S16 -> T_int16x8 | |
414 | S32 -> T_int32x4 | |
415 | S64 -> T_int64x2 | |
416 | U8 -> T_uint8x16 | |
417 | U16 -> T_uint16x8 | |
418 | U32 -> T_uint32x4 | |
419 | U64 -> T_uint64x2 | |
420 | F32 -> T_float32x4 | |
421 | P8 -> T_poly8x16 | |
422 | P16 -> T_poly16x8 | |
423 | _ -> failwith "Bad elt type" | |
424 end | |
425 | Corereg -> | |
426 begin match elt with | |
427 S8 -> T_int8 | |
428 | S16 -> T_int16 | |
429 | S32 -> T_int32 | |
430 | S64 -> T_int64 | |
431 | U8 -> T_uint8 | |
432 | U16 -> T_uint16 | |
433 | U32 -> T_uint32 | |
434 | U64 -> T_uint64 | |
435 | P8 -> T_poly8 | |
436 | P16 -> T_poly16 | |
437 | F32 -> T_float32 | |
438 | _ -> failwith "Bad elt type" | |
439 end | |
440 | Immed -> | |
441 T_immediate (0, 0) | |
442 | VecArray (num, sub) -> | |
443 T_arrayof (num, type_for_reg_elt sub elt) | |
444 | PtrTo x -> | |
445 T_ptrto (type_for_reg_elt x elt) | |
446 | CstPtrTo x -> | |
447 T_ptrto (T_const (type_for_reg_elt x elt)) | |
448 (* Anything else is solely for the use of the test generator. *) | |
449 | _ -> assert false | |
450 in | |
451 type_for_reg_elt reg elt | |
452 | |
453 (* Return size of a vector type, in bits. *) | |
454 let vectype_size = function | |
455 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 | |
456 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 | |
457 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64 | |
458 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 | |
459 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 | |
460 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128 | |
461 | _ -> raise Not_found | |
462 | |
463 let inttype_for_array num elttype = | |
464 let eltsize = vectype_size elttype in | |
465 let numwords = (num * eltsize) / 32 in | |
466 match numwords with | |
467 4 -> B_TImode | |
468 | 6 -> B_EImode | |
469 | 8 -> B_OImode | |
470 | 12 -> B_CImode | |
471 | 16 -> B_XImode | |
472 | _ -> failwith ("no int type for size " ^ string_of_int numwords) | |
473 | |
474 (* These functions return pairs of (internal, external) types, where "internal" | |
475 types are those seen by GCC, and "external" are those seen by the assembler. | |
476 These types aren't necessarily the same, since the intrinsics can munge more | |
477 than one C type into each assembler opcode. *) | |
478 | |
479 let make_sign_invariant func shape elt = | |
480 let arity, elt' = func shape elt in | |
481 arity, non_signed_variant elt' | |
482 | |
483 (* Don't restrict any types. *) | |
484 | |
485 let elts_same make_arity shape elt = | |
486 let vtype = type_for_elt shape elt in | |
487 make_arity vtype, elt | |
488 | |
489 (* As sign_invar_*, but when sign matters. *) | |
490 let elts_same_io_lane = | |
491 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3)) | |
492 | |
493 let elts_same_io = | |
494 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2)) | |
495 | |
496 let elts_same_2_lane = | |
497 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3)) | |
498 | |
499 let elts_same_3 = elts_same_2_lane | |
500 | |
501 let elts_same_2 = | |
502 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2)) | |
503 | |
504 let elts_same_1 = | |
505 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1)) | |
506 | |
507 (* Use for signed/unsigned invariant operations (i.e. where the operation | |
508 doesn't depend on the sign of the data. *) | |
509 | |
510 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane | |
511 let sign_invar_io = make_sign_invariant elts_same_io | |
512 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane | |
513 let sign_invar_2 = make_sign_invariant elts_same_2 | |
514 let sign_invar_1 = make_sign_invariant elts_same_1 | |
515 | |
516 (* Sign-sensitive comparison. *) | |
517 | |
518 let cmp_sign_matters shape elt = | |
519 let vtype = type_for_elt shape elt | |
520 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in | |
521 Arity2 (rtype, vtype 1, vtype 2), elt | |
522 | |
523 (* Signed/unsigned invariant comparison. *) | |
524 | |
525 let cmp_sign_invar shape elt = | |
526 let shape', elt' = cmp_sign_matters shape elt in | |
527 let elt'' = | |
528 match non_signed_variant elt' with | |
529 P8 -> I8 | |
530 | x -> x | |
531 in | |
532 shape', elt'' | |
533 | |
534 (* Comparison (VTST) where only the element width matters. *) | |
535 | |
536 let cmp_bits shape elt = | |
537 let vtype = type_for_elt shape elt | |
538 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 | |
539 and bits_only = bits_of_elt elt in | |
540 Arity2 (rtype, vtype 1, vtype 2), bits_only | |
541 | |
542 let reg_shift shape elt = | |
543 let vtype = type_for_elt shape elt | |
544 and op2type = type_for_elt shape (signed_of_elt elt) 2 in | |
545 Arity2 (vtype 0, vtype 1, op2type), elt | |
546 | |
547 (* Genericised constant-shift type-generating function. *) | |
548 | |
549 let const_shift mkimm ?arity ?result shape elt = | |
550 let op2type = (shapemap shape 2) elt in | |
551 let op2width = elt_width op2type in | |
552 let op2 = mkimm op2width | |
553 and op1 = type_for_elt shape elt 1 | |
554 and r_elt = | |
555 match result with | |
556 None -> elt | |
557 | Some restriction -> restriction elt in | |
558 let rtype = type_for_elt shape r_elt 0 in | |
559 match arity with | |
560 None -> Arity2 (rtype, op1, op2), elt | |
561 | Some mkarity -> mkarity rtype op1 op2, elt | |
562 | |
563 (* Use for immediate right-shifts. *) | |
564 | |
565 let shift_right shape elt = | |
566 const_shift (fun imm -> T_immediate (1, imm)) shape elt | |
567 | |
568 let shift_right_acc shape elt = | |
569 const_shift (fun imm -> T_immediate (1, imm)) | |
570 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt | |
571 | |
572 (* Use for immediate right-shifts when the operation doesn't care about | |
573 signedness. *) | |
574 | |
575 let shift_right_sign_invar = | |
576 make_sign_invariant shift_right | |
577 | |
578 (* Immediate right-shift; result is unsigned even when operand is signed. *) | |
579 | |
580 let shift_right_to_uns shape elt = | |
581 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt | |
582 shape elt | |
583 | |
584 (* Immediate left-shift. *) | |
585 | |
586 let shift_left shape elt = | |
587 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt | |
588 | |
589 (* Immediate left-shift, unsigned result. *) | |
590 | |
591 let shift_left_to_uns shape elt = | |
592 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt | |
593 shape elt | |
594 | |
595 (* Immediate left-shift, don't care about signs. *) | |
596 | |
597 let shift_left_sign_invar = | |
598 make_sign_invariant shift_left | |
599 | |
600 (* Shift left/right and insert: only element size matters. *) | |
601 | |
602 let shift_insert shape elt = | |
603 let arity, elt = | |
604 const_shift (fun imm -> T_immediate (1, imm)) | |
605 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in | |
606 arity, bits_of_elt elt | |
607 | |
608 (* Get/set lane. *) | |
609 | |
610 let get_lane shape elt = | |
611 let vtype = type_for_elt shape elt in | |
612 Arity2 (vtype 0, vtype 1, vtype 2), | |
613 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) | |
614 | |
615 let set_lane shape elt = | |
616 let vtype = type_for_elt shape elt in | |
617 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt | |
618 | |
619 let set_lane_notype shape elt = | |
620 let vtype = type_for_elt shape elt in | |
621 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts | |
622 | |
623 let create_vector shape elt = | |
624 let vtype = type_for_elt shape U64 1 | |
625 and rtype = type_for_elt shape elt 0 in | |
626 Arity1 (rtype, vtype), elt | |
627 | |
628 let conv make_arity shape elt = | |
629 let edest, esrc = match elt with | |
630 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc | |
631 | _ -> failwith "Non-conversion element in conversion" in | |
632 let vtype = type_for_elt shape esrc | |
633 and rtype = type_for_elt shape edest 0 in | |
634 make_arity rtype vtype, elt | |
635 | |
636 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1)) | |
637 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2)) | |
638 | |
639 (* Operation has an unsigned result even if operands are signed. *) | |
640 | |
641 let dst_unsign make_arity shape elt = | |
642 let vtype = type_for_elt shape elt | |
643 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in | |
644 make_arity rtype vtype, elt | |
645 | |
646 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1)) | |
647 | |
648 let make_bits_only func shape elt = | |
649 let arity, elt' = func shape elt in | |
650 arity, bits_of_elt elt' | |
651 | |
652 (* Extend operation. *) | |
653 | |
654 let extend shape elt = | |
655 let vtype = type_for_elt shape elt in | |
656 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt | |
657 | |
658 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned | |
659 integer ops respectively, or unsigned for polynomial ops. *) | |
660 | |
661 let table mkarity shape elt = | |
662 let vtype = type_for_elt shape elt in | |
663 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in | |
664 mkarity vtype op2, bits_of_elt elt | |
665 | |
666 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2)) | |
667 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2)) | |
668 | |
669 (* Operations where only bits matter. *) | |
670 | |
671 let bits_1 = make_bits_only elts_same_1 | |
672 let bits_2 = make_bits_only elts_same_2 | |
673 let bits_3 = make_bits_only elts_same_3 | |
674 | |
675 (* Store insns. *) | |
676 let store_1 shape elt = | |
677 let vtype = type_for_elt shape elt in | |
678 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt | |
679 | |
680 let store_3 shape elt = | |
681 let vtype = type_for_elt shape elt in | |
682 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt | |
683 | |
684 let make_notype func shape elt = | |
685 let arity, _ = func shape elt in | |
686 arity, NoElts | |
687 | |
688 let notype_1 = make_notype elts_same_1 | |
689 let notype_2 = make_notype elts_same_2 | |
690 let notype_3 = make_notype elts_same_3 | |
691 | |
692 (* Bit-select operations (first operand is unsigned int). *) | |
693 | |
694 let bit_select shape elt = | |
695 let vtype = type_for_elt shape elt | |
696 and itype = type_for_elt shape (unsigned_of_elt elt) in | |
697 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts | |
698 | |
699 (* Common lists of supported element types. *) | |
700 | |
701 let su_8_32 = [S8; S16; S32; U8; U16; U32] | |
702 let su_8_64 = S64 :: U64 :: su_8_32 | |
703 let su_16_64 = [S16; S32; S64; U16; U32; U64] | |
704 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32 | |
705 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 | |
706 | |
707 let ops = | |
708 [ | |
709 (* Addition. *) | |
710 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64; | |
711 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; | |
712 Vadd, [], Long, "vaddl", elts_same_2, su_8_32; | |
713 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; | |
714 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32; | |
715 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32; | |
716 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], | |
717 All (3, Dreg), "vRhadd", elts_same_2, su_8_32; | |
718 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], | |
719 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32; | |
720 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64; | |
721 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64; | |
722 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64; | |
723 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half], | |
724 Narrow, "vRaddhn", sign_invar_2, su_16_64; | |
725 | |
726 (* Multiplication. *) | |
727 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32; | |
728 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32; | |
729 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh", | |
730 elts_same_2, [S16; S32]; | |
731 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ", | |
732 elts_same_2, [S16; S32]; | |
733 Vmul, | |
734 [Saturating; Rounding; Doubling; High_half; | |
735 Instruction_name ["vqrdmulh"]], | |
736 All (3, Dreg), "vqRdmulh", | |
737 elts_same_2, [S16; S32]; | |
738 Vmul, | |
739 [Saturating; Rounding; Doubling; High_half; | |
740 Instruction_name ["vqrdmulh"]], | |
741 All (3, Qreg), "vqRdmulhQ", | |
742 elts_same_2, [S16; S32]; | |
743 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32; | |
744 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32]; | |
745 | |
746 (* Multiply-accumulate. *) | |
747 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32; | |
748 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32; | |
749 Vmla, [], Long, "vmlal", elts_same_io, su_8_32; | |
750 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32]; | |
751 | |
752 (* Multiply-subtract. *) | |
753 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32; | |
754 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32; | |
755 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; | |
756 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; | |
757 | |
758 (* Subtraction. *) | |
759 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64; | |
760 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; | |
761 Vsub, [], Long, "vsubl", elts_same_2, su_8_32; | |
762 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; | |
763 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32; | |
764 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32; | |
765 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64; | |
766 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64; | |
767 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64; | |
768 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half], | |
769 Narrow, "vRsubhn", sign_invar_2, su_16_64; | |
770 | |
771 (* Comparison, equal. *) | |
772 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32; | |
773 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; | |
774 | |
775 (* Comparison, greater-than or equal. *) | |
776 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32; | |
777 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32; | |
778 | |
779 (* Comparison, less-than or equal. *) | |
780 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, | |
781 F32 :: su_8_32; | |
782 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], | |
783 All (3, Qreg), "vcleQ", cmp_sign_matters, | |
784 F32 :: su_8_32; | |
785 | |
786 (* Comparison, greater-than. *) | |
787 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32; | |
788 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32; | |
789 | |
790 (* Comparison, less-than. *) | |
791 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, | |
792 F32 :: su_8_32; | |
793 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], | |
794 All (3, Qreg), "vcltQ", cmp_sign_matters, | |
795 F32 :: su_8_32; | |
796 | |
797 (* Compare absolute greater-than or equal. *) | |
798 Vcage, [Instruction_name ["vacge"]], | |
799 All (3, Dreg), "vcage", cmp_sign_matters, [F32]; | |
800 Vcage, [Instruction_name ["vacge"]], | |
801 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32]; | |
802 | |
803 (* Compare absolute less-than or equal. *) | |
804 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"], | |
805 All (3, Dreg), "vcale", cmp_sign_matters, [F32]; | |
806 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"], | |
807 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32]; | |
808 | |
809 (* Compare absolute greater-than or equal. *) | |
810 Vcagt, [Instruction_name ["vacgt"]], | |
811 All (3, Dreg), "vcagt", cmp_sign_matters, [F32]; | |
812 Vcagt, [Instruction_name ["vacgt"]], | |
813 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32]; | |
814 | |
815 (* Compare absolute less-than or equal. *) | |
816 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"], | |
817 All (3, Dreg), "vcalt", cmp_sign_matters, [F32]; | |
818 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"], | |
819 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32]; | |
820 | |
821 (* Test bits. *) | |
822 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32; | |
823 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32; | |
824 | |
825 (* Absolute difference. *) | |
826 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32; | |
827 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32; | |
828 Vabd, [], Long, "vabdl", elts_same_2, su_8_32; | |
829 | |
830 (* Absolute difference and accumulate. *) | |
831 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32; | |
832 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32; | |
833 Vaba, [], Long, "vabal", elts_same_io, su_8_32; | |
834 | |
835 (* Max. *) | |
836 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32; | |
837 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32; | |
838 | |
839 (* Min. *) | |
840 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32; | |
841 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32; | |
842 | |
843 (* Pairwise add. *) | |
844 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32; | |
845 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32; | |
846 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32; | |
847 | |
848 (* Pairwise add, widen and accumulate. *) | |
849 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32; | |
850 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32; | |
851 | |
852 (* Folding maximum, minimum. *) | |
853 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32; | |
854 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32; | |
855 | |
856 (* Reciprocal step. *) | |
857 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32]; | |
858 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32]; | |
859 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32]; | |
860 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32]; | |
861 | |
862 (* Vector shift left. *) | |
863 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64; | |
864 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64; | |
865 Vshl, [Instruction_name ["vrshl"]; Rounding], | |
866 All (3, Dreg), "vRshl", reg_shift, su_8_64; | |
867 Vshl, [Instruction_name ["vrshl"]; Rounding], | |
868 All (3, Qreg), "vRshlQ", reg_shift, su_8_64; | |
869 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64; | |
870 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64; | |
871 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], | |
872 All (3, Dreg), "vqRshl", reg_shift, su_8_64; | |
873 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], | |
874 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64; | |
875 | |
876 (* Vector shift right by constant. *) | |
877 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64; | |
878 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64; | |
879 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg, | |
880 "vRshr_n", shift_right, su_8_64; | |
881 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg, | |
882 "vRshrQ_n", shift_right, su_8_64; | |
883 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64; | |
884 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n", | |
885 shift_right_sign_invar, su_16_64; | |
886 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64; | |
887 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm, | |
888 "vqRshrn_n", shift_right, su_16_64; | |
889 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n", | |
890 shift_right_to_uns, [S16; S32; S64]; | |
891 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding], | |
892 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64]; | |
893 | |
894 (* Vector shift left by constant. *) | |
895 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64; | |
896 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64; | |
897 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64; | |
898 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64; | |
899 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n", | |
900 shift_left_to_uns, [S8; S16; S32; S64]; | |
901 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n", | |
902 shift_left_to_uns, [S8; S16; S32; S64]; | |
903 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32; | |
904 | |
905 (* Vector shift right by constant and accumulate. *) | |
906 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64; | |
907 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64; | |
908 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg, | |
909 "vRsra_n", shift_right_acc, su_8_64; | |
910 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg, | |
911 "vRsraQ_n", shift_right_acc, su_8_64; | |
912 | |
913 (* Vector shift right and insert. *) | |
914 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, | |
915 P8 :: P16 :: su_8_64; | |
916 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, | |
917 P8 :: P16 :: su_8_64; | |
918 | |
919 (* Vector shift left and insert. *) | |
920 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, | |
921 P8 :: P16 :: su_8_64; | |
922 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, | |
923 P8 :: P16 :: su_8_64; | |
924 | |
925 (* Absolute value. *) | |
926 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32]; | |
927 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32]; | |
928 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32]; | |
929 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32]; | |
930 | |
931 (* Negate. *) | |
932 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32]; | |
933 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32]; | |
934 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32]; | |
935 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32]; | |
936 | |
937 (* Bitwise not. *) | |
938 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32; | |
939 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32; | |
940 | |
941 (* Count leading sign bits. *) | |
942 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32]; | |
943 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32]; | |
944 | |
945 (* Count leading zeros. *) | |
946 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32; | |
947 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32; | |
948 | |
949 (* Count number of set bits. *) | |
950 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8]; | |
951 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8]; | |
952 | |
953 (* Reciprocal estimate. *) | |
954 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32]; | |
955 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32]; | |
956 | |
957 (* Reciprocal square-root estimate. *) | |
958 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32]; | |
959 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32]; | |
960 | |
961 (* Get lanes from a vector. *) | |
962 Vget_lane, | |
963 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; | |
964 Instruction_name ["vmov"]], | |
965 Use_operands [| Corereg; Dreg; Immed |], | |
966 "vget_lane", get_lane, pf_su_8_32; | |
967 Vget_lane, | |
968 [InfoWord; | |
969 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; | |
970 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
971 Use_operands [| Corereg; Dreg; Immed |], | |
972 "vget_lane", notype_2, [S64; U64]; | |
973 Vget_lane, | |
974 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; | |
975 Instruction_name ["vmov"]], | |
976 Use_operands [| Corereg; Qreg; Immed |], | |
977 "vgetQ_lane", get_lane, pf_su_8_32; | |
978 Vget_lane, | |
979 [InfoWord; | |
980 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; | |
981 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
982 Use_operands [| Corereg; Qreg; Immed |], | |
983 "vgetQ_lane", notype_2, [S64; U64]; | |
984 | |
985 (* Set lanes in a vector. *) | |
986 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; | |
987 Instruction_name ["vmov"]], | |
988 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", | |
989 set_lane, pf_su_8_32; | |
990 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; | |
991 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
992 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", | |
993 set_lane_notype, [S64; U64]; | |
994 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; | |
995 Instruction_name ["vmov"]], | |
996 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", | |
997 set_lane, pf_su_8_32; | |
998 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; | |
999 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
1000 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", | |
1001 set_lane_notype, [S64; U64]; | |
1002 | |
1003 (* Create vector from literal bit pattern. *) | |
1004 Vcreate, | |
1005 [No_op], (* Not really, but it can yield various things that are too | |
1006 hard for the test generator at this time. *) | |
1007 Use_operands [| Dreg; Corereg |], "vcreate", create_vector, | |
1008 pf_su_8_64; | |
1009 | |
1010 (* Set all lanes to the same value. *) | |
1011 Vdup_n, [], | |
1012 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, | |
1013 pf_su_8_32; | |
1014 Vdup_n, | |
1015 [Instruction_name ["vmov"]; | |
1016 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], | |
1017 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, | |
1018 [S64; U64]; | |
1019 Vdup_n, [], | |
1020 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, | |
1021 pf_su_8_32; | |
1022 Vdup_n, | |
1023 [Instruction_name ["vmov"]; | |
1024 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; | |
1025 Use_operands [| Dreg; Corereg; Corereg |]]], | |
1026 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1, | |
1027 [S64; U64]; | |
1028 | |
1029 (* These are just aliases for the above. *) | |
1030 Vmov_n, | |
1031 [Builtin_name "vdup_n"], | |
1032 Use_operands [| Dreg; Corereg |], | |
1033 "vmov_n", bits_1, pf_su_8_32; | |
1034 Vmov_n, | |
1035 [Builtin_name "vdup_n"; | |
1036 Instruction_name ["vmov"]; | |
1037 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], | |
1038 Use_operands [| Dreg; Corereg |], | |
1039 "vmov_n", notype_1, [S64; U64]; | |
1040 Vmov_n, | |
1041 [Builtin_name "vdupQ_n"], | |
1042 Use_operands [| Qreg; Corereg |], | |
1043 "vmovQ_n", bits_1, pf_su_8_32; | |
1044 Vmov_n, | |
1045 [Builtin_name "vdupQ_n"; | |
1046 Instruction_name ["vmov"]; | |
1047 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; | |
1048 Use_operands [| Dreg; Corereg; Corereg |]]], | |
1049 Use_operands [| Qreg; Corereg |], | |
1050 "vmovQ_n", notype_1, [S64; U64]; | |
1051 | |
1052 (* Duplicate, lane version. We can't use Use_operands here because the | |
1053 rightmost register (always Dreg) would be picked up by find_key_operand, | |
1054 when we want the leftmost register to be used in this case (otherwise | |
1055 the modes are indistinguishable in neon.md, etc. *) | |
1056 Vdup_lane, | |
1057 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], | |
1058 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; | |
1059 Vdup_lane, | |
1060 [No_op; Const_valuator (fun _ -> 0)], | |
1061 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; | |
1062 Vdup_lane, | |
1063 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], | |
1064 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; | |
1065 Vdup_lane, | |
1066 [No_op; Const_valuator (fun _ -> 0)], | |
1067 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; | |
1068 | |
1069 (* Combining vectors. *) | |
1070 Vcombine, [No_op], | |
1071 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, | |
1072 pf_su_8_64; | |
1073 | |
1074 (* Splitting vectors. *) | |
1075 Vget_high, [No_op], | |
1076 Use_operands [| Dreg; Qreg |], "vget_high", | |
1077 notype_1, pf_su_8_64; | |
1078 Vget_low, [Instruction_name ["vmov"]; | |
1079 Disassembles_as [Use_operands [| Dreg; Dreg |]]], | |
1080 Use_operands [| Dreg; Qreg |], "vget_low", | |
1081 notype_1, pf_su_8_64; | |
1082 | |
1083 (* Conversions. *) | |
1084 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1, | |
1085 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1086 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, | |
1087 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1088 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, | |
1089 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1090 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, | |
1091 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1092 | |
1093 (* Move, narrowing. *) | |
1094 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]], | |
1095 Narrow, "vmovn", sign_invar_1, su_16_64; | |
1096 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating], | |
1097 Narrow, "vqmovn", elts_same_1, su_16_64; | |
1098 Vmovn, | |
1099 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign], | |
1100 Narrow, "vqmovun", dst_unsign_1, | |
1101 [S16; S32; S64]; | |
1102 | |
1103 (* Move, long. *) | |
1104 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]], | |
1105 Long, "vmovl", elts_same_1, su_8_32; | |
1106 | |
1107 (* Table lookup. *) | |
1108 Vtbl 1, | |
1109 [Instruction_name ["vtbl"]; | |
1110 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], | |
1111 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8]; | |
1112 Vtbl 2, [Instruction_name ["vtbl"]], | |
1113 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2, | |
1114 [U8; S8; P8]; | |
1115 Vtbl 3, [Instruction_name ["vtbl"]], | |
1116 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2, | |
1117 [U8; S8; P8]; | |
1118 Vtbl 4, [Instruction_name ["vtbl"]], | |
1119 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2, | |
1120 [U8; S8; P8]; | |
1121 | |
1122 (* Extended table lookup. *) | |
1123 Vtbx 1, | |
1124 [Instruction_name ["vtbx"]; | |
1125 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], | |
1126 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8]; | |
1127 Vtbx 2, [Instruction_name ["vtbx"]], | |
1128 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io, | |
1129 [U8; S8; P8]; | |
1130 Vtbx 3, [Instruction_name ["vtbx"]], | |
1131 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io, | |
1132 [U8; S8; P8]; | |
1133 Vtbx 4, [Instruction_name ["vtbx"]], | |
1134 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io, | |
1135 [U8; S8; P8]; | |
1136 | |
1137 (* Multiply, lane. (note: these were undocumented at the time of | |
1138 writing). *) | |
1139 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane, | |
1140 [S16; S32; U16; U32; F32]; | |
1141 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane, | |
1142 [S16; S32; U16; U32; F32]; | |
1143 | |
1144 (* Multiply-accumulate, lane. *) | |
1145 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane, | |
1146 [S16; S32; U16; U32; F32]; | |
1147 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane, | |
1148 [S16; S32; U16; U32; F32]; | |
1149 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane, | |
1150 [S16; S32; U16; U32]; | |
1151 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane", | |
1152 elts_same_io_lane, [S16; S32]; | |
1153 | |
1154 (* Multiply-subtract, lane. *) | |
1155 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane, | |
1156 [S16; S32; U16; U32; F32]; | |
1157 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane, | |
1158 [S16; S32; U16; U32; F32]; | |
1159 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane, | |
1160 [S16; S32; U16; U32]; | |
1161 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane", | |
1162 elts_same_io_lane, [S16; S32]; | |
1163 | |
1164 (* Long multiply, lane. *) | |
1165 Vmull_lane, [], | |
1166 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32]; | |
1167 | |
1168 (* Saturating doubling long multiply, lane. *) | |
1169 Vqdmull_lane, [Saturating; Doubling], | |
1170 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32]; | |
1171 | |
1172 (* Saturating doubling long multiply high, lane. *) | |
1173 Vqdmulh_lane, [Saturating; Halving], | |
1174 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32]; | |
1175 Vqdmulh_lane, [Saturating; Halving], | |
1176 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32]; | |
1177 Vqdmulh_lane, [Saturating; Halving; Rounding; | |
1178 Instruction_name ["vqrdmulh"]], | |
1179 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32]; | |
1180 Vqdmulh_lane, [Saturating; Halving; Rounding; | |
1181 Instruction_name ["vqrdmulh"]], | |
1182 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32]; | |
1183 | |
1184 (* Vector multiply by scalar. *) | |
1185 Vmul_n, [InfoWord; | |
1186 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1187 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n", | |
1188 sign_invar_2, [S16; S32; U16; U32; F32]; | |
1189 Vmul_n, [InfoWord; | |
1190 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1191 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n", | |
1192 sign_invar_2, [S16; S32; U16; U32; F32]; | |
1193 | |
1194 (* Vector long multiply by scalar. *) | |
1195 Vmull_n, [Instruction_name ["vmull"]; | |
1196 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]], | |
1197 Wide_scalar, "vmull_n", | |
1198 elts_same_2, [S16; S32; U16; U32]; | |
1199 | |
1200 (* Vector saturating doubling long multiply by scalar. *) | |
1201 Vqdmull_n, [Saturating; Doubling; | |
1202 Disassembles_as [Use_operands [| Qreg; Dreg; | |
1203 Element_of_dreg |]]], | |
1204 Wide_scalar, "vqdmull_n", | |
1205 elts_same_2, [S16; S32]; | |
1206 | |
1207 (* Vector saturating doubling long multiply high by scalar. *) | |
1208 Vqdmulh_n, | |
1209 [Saturating; Halving; InfoWord; | |
1210 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1211 Use_operands [| Qreg; Qreg; Corereg |], | |
1212 "vqdmulhQ_n", elts_same_2, [S16; S32]; | |
1213 Vqdmulh_n, | |
1214 [Saturating; Halving; InfoWord; | |
1215 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1216 Use_operands [| Dreg; Dreg; Corereg |], | |
1217 "vqdmulh_n", elts_same_2, [S16; S32]; | |
1218 Vqdmulh_n, | |
1219 [Saturating; Halving; Rounding; InfoWord; | |
1220 Instruction_name ["vqrdmulh"]; | |
1221 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1222 Use_operands [| Qreg; Qreg; Corereg |], | |
1223 "vqRdmulhQ_n", elts_same_2, [S16; S32]; | |
1224 Vqdmulh_n, | |
1225 [Saturating; Halving; Rounding; InfoWord; | |
1226 Instruction_name ["vqrdmulh"]; | |
1227 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1228 Use_operands [| Dreg; Dreg; Corereg |], | |
1229 "vqRdmulh_n", elts_same_2, [S16; S32]; | |
1230 | |
1231 (* Vector multiply-accumulate by scalar. *) | |
1232 Vmla_n, [InfoWord; | |
1233 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1234 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n", | |
1235 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1236 Vmla_n, [InfoWord; | |
1237 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1238 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n", | |
1239 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1240 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32]; | |
1241 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io, | |
1242 [S16; S32]; | |
1243 | |
1244 (* Vector multiply subtract by scalar. *) | |
1245 Vmls_n, [InfoWord; | |
1246 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1247 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n", | |
1248 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1249 Vmls_n, [InfoWord; | |
1250 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1251 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n", | |
1252 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1253 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32]; | |
1254 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io, | |
1255 [S16; S32]; | |
1256 | |
1257 (* Vector extract. *) | |
1258 Vext, [Const_valuator (fun _ -> 0)], | |
1259 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, | |
1260 pf_su_8_64; | |
1261 Vext, [Const_valuator (fun _ -> 0)], | |
1262 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, | |
1263 pf_su_8_64; | |
1264 | |
1265 (* Reverse elements. *) | |
1266 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1267 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1268 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16]; | |
1269 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16]; | |
1270 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8]; | |
1271 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8]; | |
1272 | |
1273 (* Bit selection. *) | |
1274 Vbsl, | |
1275 [Instruction_name ["vbsl"; "vbit"; "vbif"]; | |
1276 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], | |
1277 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, | |
1278 pf_su_8_64; | |
1279 Vbsl, | |
1280 [Instruction_name ["vbsl"; "vbit"; "vbif"]; | |
1281 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], | |
1282 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, | |
1283 pf_su_8_64; | |
1284 | |
1285 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards | |
1286 generating good code for intrinsics which return structure types -- | |
1287 builtins work well by themselves (and understand that the values being | |
1288 stored on e.g. the stack also reside in registers, so can optimise the | |
1289 stores away entirely if the results are used immediately), but | |
1290 intrinsics are very much less efficient. Maybe something can be improved | |
1291 re: inlining, or tweaking the ABI used for intrinsics (a special call | |
1292 attribute?). | |
1293 *) | |
1294 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32; | |
1295 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32; | |
1296 | |
1297 (* Zip elements. *) | |
1298 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32; | |
1299 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; | |
1300 | |
1301 (* Unzip elements. *) | |
1302 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32; | |
1303 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32; | |
1304 | |
1305 (* Element/structure loads. VLD1 variants. *) | |
1306 Vldx 1, | |
1307 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1308 CstPtrTo Corereg |]]], | |
1309 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, | |
1310 pf_su_8_64; | |
1311 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1312 CstPtrTo Corereg |]]], | |
1313 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, | |
1314 pf_su_8_64; | |
1315 | |
1316 Vldx_lane 1, | |
1317 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1318 CstPtrTo Corereg |]]], | |
1319 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], | |
1320 "vld1_lane", bits_3, pf_su_8_32; | |
1321 Vldx_lane 1, | |
1322 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1323 CstPtrTo Corereg |]]; | |
1324 Const_valuator (fun _ -> 0)], | |
1325 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], | |
1326 "vld1_lane", bits_3, [S64; U64]; | |
1327 Vldx_lane 1, | |
1328 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1329 CstPtrTo Corereg |]]], | |
1330 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], | |
1331 "vld1Q_lane", bits_3, pf_su_8_32; | |
1332 Vldx_lane 1, | |
1333 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1334 CstPtrTo Corereg |]]], | |
1335 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], | |
1336 "vld1Q_lane", bits_3, [S64; U64]; | |
1337 | |
1338 Vldx_dup 1, | |
1339 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg); | |
1340 CstPtrTo Corereg |]]], | |
1341 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", | |
1342 bits_1, pf_su_8_32; | |
1343 Vldx_dup 1, | |
1344 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1345 CstPtrTo Corereg |]]], | |
1346 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", | |
1347 bits_1, [S64; U64]; | |
1348 Vldx_dup 1, | |
1349 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg); | |
1350 CstPtrTo Corereg |]]], | |
1351 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", | |
1352 bits_1, pf_su_8_32; | |
1353 Vldx_dup 1, | |
1354 [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1355 CstPtrTo Corereg |]]], | |
1356 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", | |
1357 bits_1, [S64; U64]; | |
1358 | |
1359 (* VST1 variants. *) | |
1360 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1361 PtrTo Corereg |]]], | |
1362 Use_operands [| PtrTo Corereg; Dreg |], "vst1", | |
1363 store_1, pf_su_8_64; | |
1364 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1365 PtrTo Corereg |]]], | |
1366 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", | |
1367 store_1, pf_su_8_64; | |
1368 | |
1369 Vstx_lane 1, | |
1370 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1371 CstPtrTo Corereg |]]], | |
1372 Use_operands [| PtrTo Corereg; Dreg; Immed |], | |
1373 "vst1_lane", store_3, pf_su_8_32; | |
1374 Vstx_lane 1, | |
1375 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1376 CstPtrTo Corereg |]]; | |
1377 Const_valuator (fun _ -> 0)], | |
1378 Use_operands [| PtrTo Corereg; Dreg; Immed |], | |
1379 "vst1_lane", store_3, [U64; S64]; | |
1380 Vstx_lane 1, | |
1381 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1382 CstPtrTo Corereg |]]], | |
1383 Use_operands [| PtrTo Corereg; Qreg; Immed |], | |
1384 "vst1Q_lane", store_3, pf_su_8_32; | |
1385 Vstx_lane 1, | |
1386 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1387 CstPtrTo Corereg |]]], | |
1388 Use_operands [| PtrTo Corereg; Qreg; Immed |], | |
1389 "vst1Q_lane", store_3, [U64; S64]; | |
1390 | |
1391 (* VLD2 variants. *) | |
1392 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1393 "vld2", bits_1, pf_su_8_32; | |
1394 Vldx 2, [Instruction_name ["vld1"]], | |
1395 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1396 "vld2", bits_1, [S64; U64]; | |
1397 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1398 CstPtrTo Corereg |]; | |
1399 Use_operands [| VecArray (2, Dreg); | |
1400 CstPtrTo Corereg |]]], | |
1401 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |], | |
1402 "vld2Q", bits_1, pf_su_8_32; | |
1403 | |
1404 Vldx_lane 2, | |
1405 [Disassembles_as [Use_operands | |
1406 [| VecArray (2, Element_of_dreg); | |
1407 CstPtrTo Corereg |]]], | |
1408 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg; | |
1409 VecArray (2, Dreg); Immed |], | |
1410 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1411 Vldx_lane 2, | |
1412 [Disassembles_as [Use_operands | |
1413 [| VecArray (2, Element_of_dreg); | |
1414 CstPtrTo Corereg |]]], | |
1415 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg; | |
1416 VecArray (2, Qreg); Immed |], | |
1417 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1418 | |
1419 Vldx_dup 2, | |
1420 [Disassembles_as [Use_operands | |
1421 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1422 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1423 "vld2_dup", bits_1, pf_su_8_32; | |
1424 Vldx_dup 2, | |
1425 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1426 [| VecArray (2, Dreg); CstPtrTo Corereg |]]], | |
1427 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1428 "vld2_dup", bits_1, [S64; U64]; | |
1429 | |
1430 (* VST2 variants. *) | |
1431 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1432 PtrTo Corereg |]]], | |
1433 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", | |
1434 store_1, pf_su_8_32; | |
1435 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1436 PtrTo Corereg |]]; | |
1437 Instruction_name ["vst1"]], | |
1438 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", | |
1439 store_1, [S64; U64]; | |
1440 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1441 PtrTo Corereg |]; | |
1442 Use_operands [| VecArray (2, Dreg); | |
1443 PtrTo Corereg |]]], | |
1444 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q", | |
1445 store_1, pf_su_8_32; | |
1446 | |
1447 Vstx_lane 2, | |
1448 [Disassembles_as [Use_operands | |
1449 [| VecArray (2, Element_of_dreg); | |
1450 CstPtrTo Corereg |]]], | |
1451 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane", | |
1452 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1453 Vstx_lane 2, | |
1454 [Disassembles_as [Use_operands | |
1455 [| VecArray (2, Element_of_dreg); | |
1456 CstPtrTo Corereg |]]], | |
1457 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane", | |
1458 store_3, [P16; F32; U16; U32; S16; S32]; | |
1459 | |
1460 (* VLD3 variants. *) | |
1461 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1462 "vld3", bits_1, pf_su_8_32; | |
1463 Vldx 3, [Instruction_name ["vld1"]], | |
1464 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1465 "vld3", bits_1, [S64; U64]; | |
1466 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); | |
1467 CstPtrTo Corereg |]; | |
1468 Use_operands [| VecArray (3, Dreg); | |
1469 CstPtrTo Corereg |]]], | |
1470 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |], | |
1471 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1472 | |
1473 Vldx_lane 3, | |
1474 [Disassembles_as [Use_operands | |
1475 [| VecArray (3, Element_of_dreg); | |
1476 CstPtrTo Corereg |]]], | |
1477 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg; | |
1478 VecArray (3, Dreg); Immed |], | |
1479 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1480 Vldx_lane 3, | |
1481 [Disassembles_as [Use_operands | |
1482 [| VecArray (3, Element_of_dreg); | |
1483 CstPtrTo Corereg |]]], | |
1484 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg; | |
1485 VecArray (3, Qreg); Immed |], | |
1486 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1487 | |
1488 Vldx_dup 3, | |
1489 [Disassembles_as [Use_operands | |
1490 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1491 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1492 "vld3_dup", bits_1, pf_su_8_32; | |
1493 Vldx_dup 3, | |
1494 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1495 [| VecArray (3, Dreg); CstPtrTo Corereg |]]], | |
1496 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1497 "vld3_dup", bits_1, [S64; U64]; | |
1498 | |
1499 (* VST3 variants. *) | |
1500 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1501 PtrTo Corereg |]]], | |
1502 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", | |
1503 store_1, pf_su_8_32; | |
1504 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1505 PtrTo Corereg |]]; | |
1506 Instruction_name ["vst1"]], | |
1507 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", | |
1508 store_1, [S64; U64]; | |
1509 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); | |
1510 PtrTo Corereg |]; | |
1511 Use_operands [| VecArray (3, Dreg); | |
1512 PtrTo Corereg |]]], | |
1513 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q", | |
1514 store_1, pf_su_8_32; | |
1515 | |
1516 Vstx_lane 3, | |
1517 [Disassembles_as [Use_operands | |
1518 [| VecArray (3, Element_of_dreg); | |
1519 CstPtrTo Corereg |]]], | |
1520 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane", | |
1521 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1522 Vstx_lane 3, | |
1523 [Disassembles_as [Use_operands | |
1524 [| VecArray (3, Element_of_dreg); | |
1525 CstPtrTo Corereg |]]], | |
1526 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane", | |
1527 store_3, [P16; F32; U16; U32; S16; S32]; | |
1528 | |
1529 (* VLD4/VST4 variants. *) | |
1530 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1531 "vld4", bits_1, pf_su_8_32; | |
1532 Vldx 4, [Instruction_name ["vld1"]], | |
1533 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1534 "vld4", bits_1, [S64; U64]; | |
1535 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1536 CstPtrTo Corereg |]; | |
1537 Use_operands [| VecArray (4, Dreg); | |
1538 CstPtrTo Corereg |]]], | |
1539 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |], | |
1540 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1541 | |
1542 Vldx_lane 4, | |
1543 [Disassembles_as [Use_operands | |
1544 [| VecArray (4, Element_of_dreg); | |
1545 CstPtrTo Corereg |]]], | |
1546 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg; | |
1547 VecArray (4, Dreg); Immed |], | |
1548 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1549 Vldx_lane 4, | |
1550 [Disassembles_as [Use_operands | |
1551 [| VecArray (4, Element_of_dreg); | |
1552 CstPtrTo Corereg |]]], | |
1553 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg; | |
1554 VecArray (4, Qreg); Immed |], | |
1555 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1556 | |
1557 Vldx_dup 4, | |
1558 [Disassembles_as [Use_operands | |
1559 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1560 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1561 "vld4_dup", bits_1, pf_su_8_32; | |
1562 Vldx_dup 4, | |
1563 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1564 [| VecArray (4, Dreg); CstPtrTo Corereg |]]], | |
1565 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1566 "vld4_dup", bits_1, [S64; U64]; | |
1567 | |
1568 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1569 PtrTo Corereg |]]], | |
1570 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", | |
1571 store_1, pf_su_8_32; | |
1572 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1573 PtrTo Corereg |]]; | |
1574 Instruction_name ["vst1"]], | |
1575 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", | |
1576 store_1, [S64; U64]; | |
1577 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1578 PtrTo Corereg |]; | |
1579 Use_operands [| VecArray (4, Dreg); | |
1580 PtrTo Corereg |]]], | |
1581 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q", | |
1582 store_1, pf_su_8_32; | |
1583 | |
1584 Vstx_lane 4, | |
1585 [Disassembles_as [Use_operands | |
1586 [| VecArray (4, Element_of_dreg); | |
1587 CstPtrTo Corereg |]]], | |
1588 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane", | |
1589 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1590 Vstx_lane 4, | |
1591 [Disassembles_as [Use_operands | |
1592 [| VecArray (4, Element_of_dreg); | |
1593 CstPtrTo Corereg |]]], | |
1594 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane", | |
1595 store_3, [P16; F32; U16; U32; S16; S32]; | |
1596 | |
1597 (* Logical operations. And. *) | |
1598 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64; | |
1599 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64; | |
1600 | |
1601 (* Or. *) | |
1602 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64; | |
1603 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64; | |
1604 | |
1605 (* Eor. *) | |
1606 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64; | |
1607 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64; | |
1608 | |
1609 (* Bic (And-not). *) | |
1610 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64; | |
1611 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64; | |
1612 | |
1613 (* Or-not. *) | |
1614 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64; | |
1615 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64; | |
1616 ] | |
1617 | |
1618 let reinterp = | |
1619 let elems = P8 :: P16 :: F32 :: su_8_64 in | |
1620 List.fold_right | |
1621 (fun convto acc -> | |
1622 let types = List.fold_right | |
1623 (fun convfrom acc -> | |
1624 if convfrom <> convto then | |
1625 Cast (convto, convfrom) :: acc | |
1626 else | |
1627 acc) | |
1628 elems | |
1629 [] | |
1630 in | |
1631 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |], | |
1632 "vreinterpret", conv_1, types | |
1633 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |], | |
1634 "vreinterpretQ", conv_1, types in | |
1635 dconv :: qconv :: acc) | |
1636 elems | |
1637 [] | |
1638 | |
1639 (* Output routines. *) | |
1640 | |
1641 let rec string_of_elt = function | |
1642 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64" | |
1643 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" | |
1644 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" | |
1645 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" | |
1646 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" | |
1647 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b | |
1648 | NoElts -> failwith "No elts" | |
1649 | |
1650 let string_of_elt_dots elt = | |
1651 match elt with | |
1652 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b | |
1653 | _ -> string_of_elt elt | |
1654 | |
1655 let string_of_vectype vt = | |
1656 let rec name affix = function | |
1657 T_int8x8 -> affix "int8x8" | |
1658 | T_int8x16 -> affix "int8x16" | |
1659 | T_int16x4 -> affix "int16x4" | |
1660 | T_int16x8 -> affix "int16x8" | |
1661 | T_int32x2 -> affix "int32x2" | |
1662 | T_int32x4 -> affix "int32x4" | |
1663 | T_int64x1 -> affix "int64x1" | |
1664 | T_int64x2 -> affix "int64x2" | |
1665 | T_uint8x8 -> affix "uint8x8" | |
1666 | T_uint8x16 -> affix "uint8x16" | |
1667 | T_uint16x4 -> affix "uint16x4" | |
1668 | T_uint16x8 -> affix "uint16x8" | |
1669 | T_uint32x2 -> affix "uint32x2" | |
1670 | T_uint32x4 -> affix "uint32x4" | |
1671 | T_uint64x1 -> affix "uint64x1" | |
1672 | T_uint64x2 -> affix "uint64x2" | |
1673 | T_float32x2 -> affix "float32x2" | |
1674 | T_float32x4 -> affix "float32x4" | |
1675 | T_poly8x8 -> affix "poly8x8" | |
1676 | T_poly8x16 -> affix "poly8x16" | |
1677 | T_poly16x4 -> affix "poly16x4" | |
1678 | T_poly16x8 -> affix "poly16x8" | |
1679 | T_int8 -> affix "int8" | |
1680 | T_int16 -> affix "int16" | |
1681 | T_int32 -> affix "int32" | |
1682 | T_int64 -> affix "int64" | |
1683 | T_uint8 -> affix "uint8" | |
1684 | T_uint16 -> affix "uint16" | |
1685 | T_uint32 -> affix "uint32" | |
1686 | T_uint64 -> affix "uint64" | |
1687 | T_poly8 -> affix "poly8" | |
1688 | T_poly16 -> affix "poly16" | |
1689 | T_float32 -> affix "float32" | |
1690 | T_immediate _ -> "const int" | |
1691 | T_void -> "void" | |
1692 | T_intQI -> "__builtin_neon_qi" | |
1693 | T_intHI -> "__builtin_neon_hi" | |
1694 | T_intSI -> "__builtin_neon_si" | |
1695 | T_intDI -> "__builtin_neon_di" | |
1696 | T_arrayof (num, base) -> | |
1697 let basename = name (fun x -> x) base in | |
1698 affix (Printf.sprintf "%sx%d" basename num) | |
1699 | T_ptrto x -> | |
1700 let basename = name affix x in | |
1701 Printf.sprintf "%s *" basename | |
1702 | T_const x -> | |
1703 let basename = name affix x in | |
1704 Printf.sprintf "const %s" basename | |
1705 in | |
1706 name (fun x -> x ^ "_t") vt | |
1707 | |
1708 let string_of_inttype = function | |
1709 B_TImode -> "__builtin_neon_ti" | |
1710 | B_EImode -> "__builtin_neon_ei" | |
1711 | B_OImode -> "__builtin_neon_oi" | |
1712 | B_CImode -> "__builtin_neon_ci" | |
1713 | B_XImode -> "__builtin_neon_xi" | |
1714 | |
1715 let string_of_mode = function | |
1716 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf" | |
1717 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si" | |
1718 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si" | |
1719 | SF -> "sf" | |
1720 | |
1721 (* Use uppercase chars for letters which form part of the intrinsic name, but | |
1722 should be omitted from the builtin name (the info is passed in an extra | |
1723 argument, instead). *) | |
1724 let intrinsic_name name = String.lowercase name | |
1725 | |
1726 (* Allow the name of the builtin to be overridden by things (e.g. Flipped) | |
1727 found in the features list. *) | |
1728 let builtin_name features name = | |
1729 let name = List.fold_right | |
1730 (fun el name -> | |
1731 match el with | |
1732 Flipped x | Builtin_name x -> x | |
1733 | _ -> name) | |
1734 features name in | |
1735 let islower x = let str = String.make 1 x in (String.lowercase str) = str | |
1736 and buf = Buffer.create (String.length name) in | |
1737 String.iter (fun c -> if islower c then Buffer.add_char buf c) name; | |
1738 Buffer.contents buf | |
1739 | |
1740 (* Transform an arity into a list of strings. *) | |
1741 let strings_of_arity a = | |
1742 match a with | |
1743 | Arity0 vt -> [string_of_vectype vt] | |
1744 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2] | |
1745 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1; | |
1746 string_of_vectype vt2; | |
1747 string_of_vectype vt3] | |
1748 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1; | |
1749 string_of_vectype vt2; | |
1750 string_of_vectype vt3; | |
1751 string_of_vectype vt4] | |
1752 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1; | |
1753 string_of_vectype vt2; | |
1754 string_of_vectype vt3; | |
1755 string_of_vectype vt4; | |
1756 string_of_vectype vt5] | |
1757 | |
1758 (* Suffixes on the end of builtin names that are to be stripped in order | |
1759 to obtain the name used as an instruction. They are only stripped if | |
1760 preceded immediately by an underscore. *) | |
1761 let suffixes_to_strip = [ "n"; "lane"; "dup" ] | |
1762 | |
1763 (* Get the possible names of an instruction corresponding to a "name" from the | |
1764 ops table. This is done by getting the equivalent builtin name and | |
1765 stripping any suffixes from the list at the top of this file, unless | |
1766 the features list presents with an Instruction_name entry, in which | |
1767 case that is used; or unless the features list presents with a Flipped | |
1768 entry, in which case that is used. If both such entries are present, | |
1769 the first in the list will be chosen. *) | |
1770 let get_insn_names features name = | |
1771 let names = try | |
1772 begin | |
1773 match List.find (fun feature -> match feature with | |
1774 Instruction_name _ -> true | |
1775 | Flipped _ -> true | |
1776 | _ -> false) features | |
1777 with | |
1778 Instruction_name names -> names | |
1779 | Flipped name -> [name] | |
1780 | _ -> assert false | |
1781 end | |
1782 with Not_found -> [builtin_name features name] | |
1783 in | |
1784 begin | |
1785 List.map (fun name' -> | |
1786 try | |
1787 let underscore = String.rindex name' '_' in | |
1788 let our_suffix = String.sub name' (underscore + 1) | |
1789 ((String.length name') - underscore - 1) | |
1790 in | |
1791 let rec strip remaining_suffixes = | |
1792 match remaining_suffixes with | |
1793 [] -> name' | |
1794 | s::ss when our_suffix = s -> String.sub name' 0 underscore | |
1795 | _::ss -> strip ss | |
1796 in | |
1797 strip suffixes_to_strip | |
1798 with (Not_found | Invalid_argument _) -> name') names | |
1799 end | |
1800 | |
1801 (* Apply a function to each element of a list and then comma-separate | |
1802 the resulting strings. *) | |
1803 let rec commas f elts acc = | |
1804 match elts with | |
1805 [] -> acc | |
1806 | [elt] -> acc ^ (f elt) | |
1807 | elt::elts -> | |
1808 commas f elts (acc ^ (f elt) ^ ", ") | |
1809 | |
1810 (* Given a list of features and the shape specified in the "ops" table, apply | |
1811 a function to each possible shape that the instruction may have. | |
1812 By default, this is the "shape" entry in "ops". If the features list | |
1813 contains a Disassembles_as entry, the shapes contained in that entry are | |
1814 mapped to corresponding outputs and returned in a list. If there is more | |
1815 than one Disassembles_as entry, only the first is used. *) | |
1816 let analyze_all_shapes features shape f = | |
1817 try | |
1818 match List.find (fun feature -> | |
1819 match feature with Disassembles_as _ -> true | |
1820 | _ -> false) | |
1821 features with | |
1822 Disassembles_as shapes -> List.map f shapes | |
1823 | _ -> assert false | |
1824 with Not_found -> [f shape] | |
1825 |