Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/arm/neon.ml @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | a06113de4d67 |
children | b7f97abdc517 |
rev | line source |
---|---|
0 | 1 (* Common code for ARM NEON header file, documentation and test case |
2 generators. | |
3 | |
4 Copyright (C) 2006, 2007 Free Software Foundation, Inc. | |
5 Contributed by CodeSourcery. | |
6 | |
7 This file is part of GCC. | |
8 | |
9 GCC is free software; you can redistribute it and/or modify it under | |
10 the terms of the GNU General Public License as published by the Free | |
11 Software Foundation; either version 3, or (at your option) any later | |
12 version. | |
13 | |
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
17 for more details. | |
18 | |
19 You should have received a copy of the GNU General Public License | |
20 along with GCC; see the file COPYING3. If not see | |
21 <http://www.gnu.org/licenses/>. *) | |
22 | |
23 (* Shorthand types for vector elements. *) | |
24 type elts = S8 | S16 | S32 | S64 | F32 | U8 | U16 | U32 | U64 | P8 | P16 | |
25 | I8 | I16 | I32 | I64 | B8 | B16 | B32 | B64 | Conv of elts * elts | |
26 | Cast of elts * elts | NoElts | |
27 | |
28 type eltclass = Signed | Unsigned | Float | Poly | Int | Bits | |
29 | ConvClass of eltclass * eltclass | NoType | |
30 | |
31 (* These vector types correspond directly to C types. *) | |
32 type vectype = T_int8x8 | T_int8x16 | |
33 | T_int16x4 | T_int16x8 | |
34 | T_int32x2 | T_int32x4 | |
35 | T_int64x1 | T_int64x2 | |
36 | T_uint8x8 | T_uint8x16 | |
37 | T_uint16x4 | T_uint16x8 | |
38 | T_uint32x2 | T_uint32x4 | |
39 | T_uint64x1 | T_uint64x2 | |
40 | T_float32x2 | T_float32x4 | |
41 | T_poly8x8 | T_poly8x16 | |
42 | T_poly16x4 | T_poly16x8 | |
43 | T_immediate of int * int | |
44 | T_int8 | T_int16 | |
45 | T_int32 | T_int64 | |
46 | T_uint8 | T_uint16 | |
47 | T_uint32 | T_uint64 | |
48 | T_poly8 | T_poly16 | |
49 | T_float32 | T_arrayof of int * vectype | |
50 | T_ptrto of vectype | T_const of vectype | |
51 | T_void | T_intQI | |
52 | T_intHI | T_intSI | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
53 | T_intDI | T_floatSF |
0 | 54 |
55 (* The meanings of the following are: | |
56 TImode : "Tetra", two registers (four words). | |
57 EImode : "hExa", three registers (six words). | |
58 OImode : "Octa", four registers (eight words). | |
59 CImode : "dodeCa", six registers (twelve words). | |
60 XImode : "heXadeca", eight registers (sixteen words). | |
61 *) | |
62 | |
63 type inttype = B_TImode | B_EImode | B_OImode | B_CImode | B_XImode | |
64 | |
65 type shape_elt = Dreg | Qreg | Corereg | Immed | VecArray of int * shape_elt | |
66 | PtrTo of shape_elt | CstPtrTo of shape_elt | |
67 (* These next ones are used only in the test generator. *) | |
68 | Element_of_dreg (* Used for "lane" variants. *) | |
69 | Element_of_qreg (* Likewise. *) | |
70 | All_elements_of_dreg (* Used for "dup" variants. *) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
71 | Alternatives of shape_elt list (* Used for multiple valid operands *) |
0 | 72 |
73 type shape_form = All of int * shape_elt | |
74 | Long | |
75 | Long_noreg of shape_elt | |
76 | Wide | |
77 | Wide_noreg of shape_elt | |
78 | Narrow | |
79 | Long_imm | |
80 | Narrow_imm | |
81 | Binary_imm of shape_elt | |
82 | Use_operands of shape_elt array | |
83 | By_scalar of shape_elt | |
84 | Unary_scalar of shape_elt | |
85 | Wide_lane | |
86 | Wide_scalar | |
87 | Pair_result of shape_elt | |
88 | |
89 type arity = Arity0 of vectype | |
90 | Arity1 of vectype * vectype | |
91 | Arity2 of vectype * vectype * vectype | |
92 | Arity3 of vectype * vectype * vectype * vectype | |
93 | Arity4 of vectype * vectype * vectype * vectype * vectype | |
94 | |
95 type vecmode = V8QI | V4HI | V2SI | V2SF | DI | |
96 | V16QI | V8HI | V4SI | V4SF | V2DI | |
97 | QI | HI | SI | SF | |
98 | |
99 type opcode = | |
100 (* Binary ops. *) | |
101 Vadd | |
102 | Vmul | |
103 | Vmla | |
104 | Vmls | |
105 | Vsub | |
106 | Vceq | |
107 | Vcge | |
108 | Vcgt | |
109 | Vcle | |
110 | Vclt | |
111 | Vcage | |
112 | Vcagt | |
113 | Vcale | |
114 | Vcalt | |
115 | Vtst | |
116 | Vabd | |
117 | Vaba | |
118 | Vmax | |
119 | Vmin | |
120 | Vpadd | |
121 | Vpada | |
122 | Vpmax | |
123 | Vpmin | |
124 | Vrecps | |
125 | Vrsqrts | |
126 | Vshl | |
127 | Vshr_n | |
128 | Vshl_n | |
129 | Vsra_n | |
130 | Vsri | |
131 | Vsli | |
132 (* Logic binops. *) | |
133 | Vand | |
134 | Vorr | |
135 | Veor | |
136 | Vbic | |
137 | Vorn | |
138 | Vbsl | |
139 (* Ops with scalar. *) | |
140 | Vmul_lane | |
141 | Vmla_lane | |
142 | Vmls_lane | |
143 | Vmul_n | |
144 | Vmla_n | |
145 | Vmls_n | |
146 | Vmull_n | |
147 | Vmull_lane | |
148 | Vqdmull_n | |
149 | Vqdmull_lane | |
150 | Vqdmulh_n | |
151 | Vqdmulh_lane | |
152 (* Unary ops. *) | |
153 | Vabs | |
154 | Vneg | |
155 | Vcls | |
156 | Vclz | |
157 | Vcnt | |
158 | Vrecpe | |
159 | Vrsqrte | |
160 | Vmvn | |
161 (* Vector extract. *) | |
162 | Vext | |
163 (* Reverse elements. *) | |
164 | Vrev64 | |
165 | Vrev32 | |
166 | Vrev16 | |
167 (* Transposition ops. *) | |
168 | Vtrn | |
169 | Vzip | |
170 | Vuzp | |
171 (* Loads and stores (VLD1/VST1/VLD2...), elements and structures. *) | |
172 | Vldx of int | |
173 | Vstx of int | |
174 | Vldx_lane of int | |
175 | Vldx_dup of int | |
176 | Vstx_lane of int | |
177 (* Set/extract lanes from a vector. *) | |
178 | Vget_lane | |
179 | Vset_lane | |
180 (* Initialize vector from bit pattern. *) | |
181 | Vcreate | |
182 (* Set all lanes to same value. *) | |
183 | Vdup_n | |
184 | Vmov_n (* Is this the same? *) | |
185 (* Duplicate scalar to all lanes of vector. *) | |
186 | Vdup_lane | |
187 (* Combine vectors. *) | |
188 | Vcombine | |
189 (* Get quadword high/low parts. *) | |
190 | Vget_high | |
191 | Vget_low | |
192 (* Convert vectors. *) | |
193 | Vcvt | |
194 | Vcvt_n | |
195 (* Narrow/lengthen vectors. *) | |
196 | Vmovn | |
197 | Vmovl | |
198 (* Table lookup. *) | |
199 | Vtbl of int | |
200 | Vtbx of int | |
201 (* Reinterpret casts. *) | |
202 | Vreinterp | |
203 | |
204 (* Features used for documentation, to distinguish between some instruction | |
205 variants, and to signal special requirements (e.g. swapping arguments). *) | |
206 | |
207 type features = | |
208 Halving | |
209 | Rounding | |
210 | Saturating | |
211 | Dst_unsign | |
212 | High_half | |
213 | Doubling | |
214 | Flipped of string (* Builtin name to use with flipped arguments. *) | |
215 | InfoWord (* Pass an extra word for signage/rounding etc. (always passed | |
216 for All _, Long, Wide, Narrow shape_forms. *) | |
217 | ReturnPtr (* Pass explicit pointer to return value as first argument. *) | |
218 (* A specification as to the shape of instruction expected upon | |
219 disassembly, used if it differs from the shape used to build the | |
220 intrinsic prototype. Multiple entries in the constructor's argument | |
221 indicate that the intrinsic expands to more than one assembly | |
222 instruction, each with a corresponding shape specified here. *) | |
223 | Disassembles_as of shape_form list | |
224 | Builtin_name of string (* Override the name of the builtin. *) | |
225 (* Override the name of the instruction. If more than one name | |
226 is specified, it means that the instruction can have any of those | |
227 names. *) | |
228 | Instruction_name of string list | |
229 (* Mark that the intrinsic yields no instructions, or expands to yield | |
230 behavior that the test generator cannot test. *) | |
231 | No_op | |
232 (* Mark that the intrinsic has constant arguments that cannot be set | |
233 to the defaults (zero for pointers and one otherwise) in the test | |
234 cases. The function supplied must return the integer to be written | |
235 into the testcase for the argument number (0-based) supplied to it. *) | |
236 | Const_valuator of (int -> int) | |
237 | |
238 exception MixedMode of elts * elts | |
239 | |
240 let rec elt_width = function | |
241 S8 | U8 | P8 | I8 | B8 -> 8 | |
242 | S16 | U16 | P16 | I16 | B16 -> 16 | |
243 | S32 | F32 | U32 | I32 | B32 -> 32 | |
244 | S64 | U64 | I64 | B64 -> 64 | |
245 | Conv (a, b) -> | |
246 let wa = elt_width a and wb = elt_width b in | |
247 if wa = wb then wa else failwith "element width?" | |
248 | Cast (a, b) -> raise (MixedMode (a, b)) | |
249 | NoElts -> failwith "No elts" | |
250 | |
251 let rec elt_class = function | |
252 S8 | S16 | S32 | S64 -> Signed | |
253 | U8 | U16 | U32 | U64 -> Unsigned | |
254 | P8 | P16 -> Poly | |
255 | F32 -> Float | |
256 | I8 | I16 | I32 | I64 -> Int | |
257 | B8 | B16 | B32 | B64 -> Bits | |
258 | Conv (a, b) | Cast (a, b) -> ConvClass (elt_class a, elt_class b) | |
259 | NoElts -> NoType | |
260 | |
261 let elt_of_class_width c w = | |
262 match c, w with | |
263 Signed, 8 -> S8 | |
264 | Signed, 16 -> S16 | |
265 | Signed, 32 -> S32 | |
266 | Signed, 64 -> S64 | |
267 | Float, 32 -> F32 | |
268 | Unsigned, 8 -> U8 | |
269 | Unsigned, 16 -> U16 | |
270 | Unsigned, 32 -> U32 | |
271 | Unsigned, 64 -> U64 | |
272 | Poly, 8 -> P8 | |
273 | Poly, 16 -> P16 | |
274 | Int, 8 -> I8 | |
275 | Int, 16 -> I16 | |
276 | Int, 32 -> I32 | |
277 | Int, 64 -> I64 | |
278 | Bits, 8 -> B8 | |
279 | Bits, 16 -> B16 | |
280 | Bits, 32 -> B32 | |
281 | Bits, 64 -> B64 | |
282 | _ -> failwith "Bad element type" | |
283 | |
284 (* Return unsigned integer element the same width as argument. *) | |
285 let unsigned_of_elt elt = | |
286 elt_of_class_width Unsigned (elt_width elt) | |
287 | |
288 let signed_of_elt elt = | |
289 elt_of_class_width Signed (elt_width elt) | |
290 | |
291 (* Return untyped bits element the same width as argument. *) | |
292 let bits_of_elt elt = | |
293 elt_of_class_width Bits (elt_width elt) | |
294 | |
295 let non_signed_variant = function | |
296 S8 -> I8 | |
297 | S16 -> I16 | |
298 | S32 -> I32 | |
299 | S64 -> I64 | |
300 | U8 -> I8 | |
301 | U16 -> I16 | |
302 | U32 -> I32 | |
303 | U64 -> I64 | |
304 | x -> x | |
305 | |
306 let poly_unsigned_variant v = | |
307 let elclass = match elt_class v with | |
308 Poly -> Unsigned | |
309 | x -> x in | |
310 elt_of_class_width elclass (elt_width v) | |
311 | |
312 let widen_elt elt = | |
313 let w = elt_width elt | |
314 and c = elt_class elt in | |
315 elt_of_class_width c (w * 2) | |
316 | |
317 let narrow_elt elt = | |
318 let w = elt_width elt | |
319 and c = elt_class elt in | |
320 elt_of_class_width c (w / 2) | |
321 | |
322 (* If we're trying to find a mode from a "Use_operands" instruction, use the | |
323 last vector operand as the dominant mode used to invoke the correct builtin. | |
324 We must stick to this rule in neon.md. *) | |
325 let find_key_operand operands = | |
326 let rec scan opno = | |
327 match operands.(opno) with | |
328 Qreg -> Qreg | |
329 | Dreg -> Dreg | |
330 | VecArray (_, Qreg) -> Qreg | |
331 | VecArray (_, Dreg) -> Dreg | |
332 | _ -> scan (opno-1) | |
333 in | |
334 scan ((Array.length operands) - 1) | |
335 | |
336 let rec mode_of_elt elt shape = | |
337 let flt = match elt_class elt with | |
338 Float | ConvClass(_, Float) -> true | _ -> false in | |
339 let idx = | |
340 match elt_width elt with | |
341 8 -> 0 | 16 -> 1 | 32 -> 2 | 64 -> 3 | |
342 | _ -> failwith "Bad element width" | |
343 in match shape with | |
344 All (_, Dreg) | By_scalar Dreg | Pair_result Dreg | Unary_scalar Dreg | |
345 | Binary_imm Dreg | Long_noreg Dreg | Wide_noreg Dreg -> | |
346 [| V8QI; V4HI; if flt then V2SF else V2SI; DI |].(idx) | |
347 | All (_, Qreg) | By_scalar Qreg | Pair_result Qreg | Unary_scalar Qreg | |
348 | Binary_imm Qreg | Long_noreg Qreg | Wide_noreg Qreg -> | |
349 [| V16QI; V8HI; if flt then V4SF else V4SI; V2DI |].(idx) | |
350 | All (_, (Corereg | PtrTo _ | CstPtrTo _)) -> | |
351 [| QI; HI; if flt then SF else SI; DI |].(idx) | |
352 | Long | Wide | Wide_lane | Wide_scalar | |
353 | Long_imm -> | |
354 [| V8QI; V4HI; V2SI; DI |].(idx) | |
355 | Narrow | Narrow_imm -> [| V16QI; V8HI; V4SI; V2DI |].(idx) | |
356 | Use_operands ops -> mode_of_elt elt (All (0, (find_key_operand ops))) | |
357 | _ -> failwith "invalid shape" | |
358 | |
359 (* Modify an element type dependent on the shape of the instruction and the | |
360 operand number. *) | |
361 | |
362 let shapemap shape no = | |
363 let ident = fun x -> x in | |
364 match shape with | |
365 All _ | Use_operands _ | By_scalar _ | Pair_result _ | Unary_scalar _ | |
366 | Binary_imm _ -> ident | |
367 | Long | Long_noreg _ | Wide_scalar | Long_imm -> | |
368 [| widen_elt; ident; ident |].(no) | |
369 | Wide | Wide_noreg _ -> [| widen_elt; widen_elt; ident |].(no) | |
370 | Wide_lane -> [| widen_elt; ident; ident; ident |].(no) | |
371 | Narrow | Narrow_imm -> [| narrow_elt; ident; ident |].(no) | |
372 | |
373 (* Register type (D/Q) of an operand, based on shape and operand number. *) | |
374 | |
375 let regmap shape no = | |
376 match shape with | |
377 All (_, reg) | Long_noreg reg | Wide_noreg reg -> reg | |
378 | Long -> [| Qreg; Dreg; Dreg |].(no) | |
379 | Wide -> [| Qreg; Qreg; Dreg |].(no) | |
380 | Narrow -> [| Dreg; Qreg; Qreg |].(no) | |
381 | Wide_lane -> [| Qreg; Dreg; Dreg; Immed |].(no) | |
382 | Wide_scalar -> [| Qreg; Dreg; Corereg |].(no) | |
383 | By_scalar reg -> [| reg; reg; Dreg; Immed |].(no) | |
384 | Unary_scalar reg -> [| reg; Dreg; Immed |].(no) | |
385 | Pair_result reg -> [| VecArray (2, reg); reg; reg |].(no) | |
386 | Binary_imm reg -> [| reg; reg; Immed |].(no) | |
387 | Long_imm -> [| Qreg; Dreg; Immed |].(no) | |
388 | Narrow_imm -> [| Dreg; Qreg; Immed |].(no) | |
389 | Use_operands these -> these.(no) | |
390 | |
391 let type_for_elt shape elt no = | |
392 let elt = (shapemap shape no) elt in | |
393 let reg = regmap shape no in | |
394 let rec type_for_reg_elt reg elt = | |
395 match reg with | |
396 Dreg -> | |
397 begin match elt with | |
398 S8 -> T_int8x8 | |
399 | S16 -> T_int16x4 | |
400 | S32 -> T_int32x2 | |
401 | S64 -> T_int64x1 | |
402 | U8 -> T_uint8x8 | |
403 | U16 -> T_uint16x4 | |
404 | U32 -> T_uint32x2 | |
405 | U64 -> T_uint64x1 | |
406 | F32 -> T_float32x2 | |
407 | P8 -> T_poly8x8 | |
408 | P16 -> T_poly16x4 | |
409 | _ -> failwith "Bad elt type" | |
410 end | |
411 | Qreg -> | |
412 begin match elt with | |
413 S8 -> T_int8x16 | |
414 | S16 -> T_int16x8 | |
415 | S32 -> T_int32x4 | |
416 | S64 -> T_int64x2 | |
417 | U8 -> T_uint8x16 | |
418 | U16 -> T_uint16x8 | |
419 | U32 -> T_uint32x4 | |
420 | U64 -> T_uint64x2 | |
421 | F32 -> T_float32x4 | |
422 | P8 -> T_poly8x16 | |
423 | P16 -> T_poly16x8 | |
424 | _ -> failwith "Bad elt type" | |
425 end | |
426 | Corereg -> | |
427 begin match elt with | |
428 S8 -> T_int8 | |
429 | S16 -> T_int16 | |
430 | S32 -> T_int32 | |
431 | S64 -> T_int64 | |
432 | U8 -> T_uint8 | |
433 | U16 -> T_uint16 | |
434 | U32 -> T_uint32 | |
435 | U64 -> T_uint64 | |
436 | P8 -> T_poly8 | |
437 | P16 -> T_poly16 | |
438 | F32 -> T_float32 | |
439 | _ -> failwith "Bad elt type" | |
440 end | |
441 | Immed -> | |
442 T_immediate (0, 0) | |
443 | VecArray (num, sub) -> | |
444 T_arrayof (num, type_for_reg_elt sub elt) | |
445 | PtrTo x -> | |
446 T_ptrto (type_for_reg_elt x elt) | |
447 | CstPtrTo x -> | |
448 T_ptrto (T_const (type_for_reg_elt x elt)) | |
449 (* Anything else is solely for the use of the test generator. *) | |
450 | _ -> assert false | |
451 in | |
452 type_for_reg_elt reg elt | |
453 | |
454 (* Return size of a vector type, in bits. *) | |
455 let vectype_size = function | |
456 T_int8x8 | T_int16x4 | T_int32x2 | T_int64x1 | |
457 | T_uint8x8 | T_uint16x4 | T_uint32x2 | T_uint64x1 | |
458 | T_float32x2 | T_poly8x8 | T_poly16x4 -> 64 | |
459 | T_int8x16 | T_int16x8 | T_int32x4 | T_int64x2 | |
460 | T_uint8x16 | T_uint16x8 | T_uint32x4 | T_uint64x2 | |
461 | T_float32x4 | T_poly8x16 | T_poly16x8 -> 128 | |
462 | _ -> raise Not_found | |
463 | |
464 let inttype_for_array num elttype = | |
465 let eltsize = vectype_size elttype in | |
466 let numwords = (num * eltsize) / 32 in | |
467 match numwords with | |
468 4 -> B_TImode | |
469 | 6 -> B_EImode | |
470 | 8 -> B_OImode | |
471 | 12 -> B_CImode | |
472 | 16 -> B_XImode | |
473 | _ -> failwith ("no int type for size " ^ string_of_int numwords) | |
474 | |
475 (* These functions return pairs of (internal, external) types, where "internal" | |
476 types are those seen by GCC, and "external" are those seen by the assembler. | |
477 These types aren't necessarily the same, since the intrinsics can munge more | |
478 than one C type into each assembler opcode. *) | |
479 | |
480 let make_sign_invariant func shape elt = | |
481 let arity, elt' = func shape elt in | |
482 arity, non_signed_variant elt' | |
483 | |
484 (* Don't restrict any types. *) | |
485 | |
486 let elts_same make_arity shape elt = | |
487 let vtype = type_for_elt shape elt in | |
488 make_arity vtype, elt | |
489 | |
490 (* As sign_invar_*, but when sign matters. *) | |
491 let elts_same_io_lane = | |
492 elts_same (fun vtype -> Arity4 (vtype 0, vtype 0, vtype 1, vtype 2, vtype 3)) | |
493 | |
494 let elts_same_io = | |
495 elts_same (fun vtype -> Arity3 (vtype 0, vtype 0, vtype 1, vtype 2)) | |
496 | |
497 let elts_same_2_lane = | |
498 elts_same (fun vtype -> Arity3 (vtype 0, vtype 1, vtype 2, vtype 3)) | |
499 | |
500 let elts_same_3 = elts_same_2_lane | |
501 | |
502 let elts_same_2 = | |
503 elts_same (fun vtype -> Arity2 (vtype 0, vtype 1, vtype 2)) | |
504 | |
505 let elts_same_1 = | |
506 elts_same (fun vtype -> Arity1 (vtype 0, vtype 1)) | |
507 | |
508 (* Use for signed/unsigned invariant operations (i.e. where the operation | |
509 doesn't depend on the sign of the data. *) | |
510 | |
511 let sign_invar_io_lane = make_sign_invariant elts_same_io_lane | |
512 let sign_invar_io = make_sign_invariant elts_same_io | |
513 let sign_invar_2_lane = make_sign_invariant elts_same_2_lane | |
514 let sign_invar_2 = make_sign_invariant elts_same_2 | |
515 let sign_invar_1 = make_sign_invariant elts_same_1 | |
516 | |
517 (* Sign-sensitive comparison. *) | |
518 | |
519 let cmp_sign_matters shape elt = | |
520 let vtype = type_for_elt shape elt | |
521 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in | |
522 Arity2 (rtype, vtype 1, vtype 2), elt | |
523 | |
524 (* Signed/unsigned invariant comparison. *) | |
525 | |
526 let cmp_sign_invar shape elt = | |
527 let shape', elt' = cmp_sign_matters shape elt in | |
528 let elt'' = | |
529 match non_signed_variant elt' with | |
530 P8 -> I8 | |
531 | x -> x | |
532 in | |
533 shape', elt'' | |
534 | |
535 (* Comparison (VTST) where only the element width matters. *) | |
536 | |
537 let cmp_bits shape elt = | |
538 let vtype = type_for_elt shape elt | |
539 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 | |
540 and bits_only = bits_of_elt elt in | |
541 Arity2 (rtype, vtype 1, vtype 2), bits_only | |
542 | |
543 let reg_shift shape elt = | |
544 let vtype = type_for_elt shape elt | |
545 and op2type = type_for_elt shape (signed_of_elt elt) 2 in | |
546 Arity2 (vtype 0, vtype 1, op2type), elt | |
547 | |
548 (* Genericised constant-shift type-generating function. *) | |
549 | |
550 let const_shift mkimm ?arity ?result shape elt = | |
551 let op2type = (shapemap shape 2) elt in | |
552 let op2width = elt_width op2type in | |
553 let op2 = mkimm op2width | |
554 and op1 = type_for_elt shape elt 1 | |
555 and r_elt = | |
556 match result with | |
557 None -> elt | |
558 | Some restriction -> restriction elt in | |
559 let rtype = type_for_elt shape r_elt 0 in | |
560 match arity with | |
561 None -> Arity2 (rtype, op1, op2), elt | |
562 | Some mkarity -> mkarity rtype op1 op2, elt | |
563 | |
564 (* Use for immediate right-shifts. *) | |
565 | |
566 let shift_right shape elt = | |
567 const_shift (fun imm -> T_immediate (1, imm)) shape elt | |
568 | |
569 let shift_right_acc shape elt = | |
570 const_shift (fun imm -> T_immediate (1, imm)) | |
571 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt | |
572 | |
573 (* Use for immediate right-shifts when the operation doesn't care about | |
574 signedness. *) | |
575 | |
576 let shift_right_sign_invar = | |
577 make_sign_invariant shift_right | |
578 | |
579 (* Immediate right-shift; result is unsigned even when operand is signed. *) | |
580 | |
581 let shift_right_to_uns shape elt = | |
582 const_shift (fun imm -> T_immediate (1, imm)) ~result:unsigned_of_elt | |
583 shape elt | |
584 | |
585 (* Immediate left-shift. *) | |
586 | |
587 let shift_left shape elt = | |
588 const_shift (fun imm -> T_immediate (0, imm - 1)) shape elt | |
589 | |
590 (* Immediate left-shift, unsigned result. *) | |
591 | |
592 let shift_left_to_uns shape elt = | |
593 const_shift (fun imm -> T_immediate (0, imm - 1)) ~result:unsigned_of_elt | |
594 shape elt | |
595 | |
596 (* Immediate left-shift, don't care about signs. *) | |
597 | |
598 let shift_left_sign_invar = | |
599 make_sign_invariant shift_left | |
600 | |
601 (* Shift left/right and insert: only element size matters. *) | |
602 | |
603 let shift_insert shape elt = | |
604 let arity, elt = | |
605 const_shift (fun imm -> T_immediate (1, imm)) | |
606 ~arity:(fun dst op1 op2 -> Arity3 (dst, dst, op1, op2)) shape elt in | |
607 arity, bits_of_elt elt | |
608 | |
609 (* Get/set lane. *) | |
610 | |
611 let get_lane shape elt = | |
612 let vtype = type_for_elt shape elt in | |
613 Arity2 (vtype 0, vtype 1, vtype 2), | |
614 (match elt with P8 -> U8 | P16 -> U16 | S32 | U32 | F32 -> B32 | x -> x) | |
615 | |
616 let set_lane shape elt = | |
617 let vtype = type_for_elt shape elt in | |
618 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt | |
619 | |
620 let set_lane_notype shape elt = | |
621 let vtype = type_for_elt shape elt in | |
622 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), NoElts | |
623 | |
624 let create_vector shape elt = | |
625 let vtype = type_for_elt shape U64 1 | |
626 and rtype = type_for_elt shape elt 0 in | |
627 Arity1 (rtype, vtype), elt | |
628 | |
629 let conv make_arity shape elt = | |
630 let edest, esrc = match elt with | |
631 Conv (edest, esrc) | Cast (edest, esrc) -> edest, esrc | |
632 | _ -> failwith "Non-conversion element in conversion" in | |
633 let vtype = type_for_elt shape esrc | |
634 and rtype = type_for_elt shape edest 0 in | |
635 make_arity rtype vtype, elt | |
636 | |
637 let conv_1 = conv (fun rtype vtype -> Arity1 (rtype, vtype 1)) | |
638 let conv_2 = conv (fun rtype vtype -> Arity2 (rtype, vtype 1, vtype 2)) | |
639 | |
640 (* Operation has an unsigned result even if operands are signed. *) | |
641 | |
642 let dst_unsign make_arity shape elt = | |
643 let vtype = type_for_elt shape elt | |
644 and rtype = type_for_elt shape (unsigned_of_elt elt) 0 in | |
645 make_arity rtype vtype, elt | |
646 | |
647 let dst_unsign_1 = dst_unsign (fun rtype vtype -> Arity1 (rtype, vtype 1)) | |
648 | |
649 let make_bits_only func shape elt = | |
650 let arity, elt' = func shape elt in | |
651 arity, bits_of_elt elt' | |
652 | |
653 (* Extend operation. *) | |
654 | |
655 let extend shape elt = | |
656 let vtype = type_for_elt shape elt in | |
657 Arity3 (vtype 0, vtype 1, vtype 2, vtype 3), bits_of_elt elt | |
658 | |
659 (* Table look-up operations. Operand 2 is signed/unsigned for signed/unsigned | |
660 integer ops respectively, or unsigned for polynomial ops. *) | |
661 | |
662 let table mkarity shape elt = | |
663 let vtype = type_for_elt shape elt in | |
664 let op2 = type_for_elt shape (poly_unsigned_variant elt) 2 in | |
665 mkarity vtype op2, bits_of_elt elt | |
666 | |
667 let table_2 = table (fun vtype op2 -> Arity2 (vtype 0, vtype 1, op2)) | |
668 let table_io = table (fun vtype op2 -> Arity3 (vtype 0, vtype 0, vtype 1, op2)) | |
669 | |
670 (* Operations where only bits matter. *) | |
671 | |
672 let bits_1 = make_bits_only elts_same_1 | |
673 let bits_2 = make_bits_only elts_same_2 | |
674 let bits_3 = make_bits_only elts_same_3 | |
675 | |
676 (* Store insns. *) | |
677 let store_1 shape elt = | |
678 let vtype = type_for_elt shape elt in | |
679 Arity2 (T_void, vtype 0, vtype 1), bits_of_elt elt | |
680 | |
681 let store_3 shape elt = | |
682 let vtype = type_for_elt shape elt in | |
683 Arity3 (T_void, vtype 0, vtype 1, vtype 2), bits_of_elt elt | |
684 | |
685 let make_notype func shape elt = | |
686 let arity, _ = func shape elt in | |
687 arity, NoElts | |
688 | |
689 let notype_1 = make_notype elts_same_1 | |
690 let notype_2 = make_notype elts_same_2 | |
691 let notype_3 = make_notype elts_same_3 | |
692 | |
693 (* Bit-select operations (first operand is unsigned int). *) | |
694 | |
695 let bit_select shape elt = | |
696 let vtype = type_for_elt shape elt | |
697 and itype = type_for_elt shape (unsigned_of_elt elt) in | |
698 Arity3 (vtype 0, itype 1, vtype 2, vtype 3), NoElts | |
699 | |
700 (* Common lists of supported element types. *) | |
701 | |
702 let su_8_32 = [S8; S16; S32; U8; U16; U32] | |
703 let su_8_64 = S64 :: U64 :: su_8_32 | |
704 let su_16_64 = [S16; S32; S64; U16; U32; U64] | |
705 let pf_su_8_32 = P8 :: P16 :: F32 :: su_8_32 | |
706 let pf_su_8_64 = P8 :: P16 :: F32 :: su_8_64 | |
707 | |
708 let ops = | |
709 [ | |
710 (* Addition. *) | |
711 Vadd, [], All (3, Dreg), "vadd", sign_invar_2, F32 :: su_8_64; | |
712 Vadd, [], All (3, Qreg), "vaddQ", sign_invar_2, F32 :: su_8_64; | |
713 Vadd, [], Long, "vaddl", elts_same_2, su_8_32; | |
714 Vadd, [], Wide, "vaddw", elts_same_2, su_8_32; | |
715 Vadd, [Halving], All (3, Dreg), "vhadd", elts_same_2, su_8_32; | |
716 Vadd, [Halving], All (3, Qreg), "vhaddQ", elts_same_2, su_8_32; | |
717 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], | |
718 All (3, Dreg), "vRhadd", elts_same_2, su_8_32; | |
719 Vadd, [Instruction_name ["vrhadd"]; Rounding; Halving], | |
720 All (3, Qreg), "vRhaddQ", elts_same_2, su_8_32; | |
721 Vadd, [Saturating], All (3, Dreg), "vqadd", elts_same_2, su_8_64; | |
722 Vadd, [Saturating], All (3, Qreg), "vqaddQ", elts_same_2, su_8_64; | |
723 Vadd, [High_half], Narrow, "vaddhn", sign_invar_2, su_16_64; | |
724 Vadd, [Instruction_name ["vraddhn"]; Rounding; High_half], | |
725 Narrow, "vRaddhn", sign_invar_2, su_16_64; | |
726 | |
727 (* Multiplication. *) | |
728 Vmul, [], All (3, Dreg), "vmul", sign_invar_2, P8 :: F32 :: su_8_32; | |
729 Vmul, [], All (3, Qreg), "vmulQ", sign_invar_2, P8 :: F32 :: su_8_32; | |
730 Vmul, [Saturating; Doubling; High_half], All (3, Dreg), "vqdmulh", | |
731 elts_same_2, [S16; S32]; | |
732 Vmul, [Saturating; Doubling; High_half], All (3, Qreg), "vqdmulhQ", | |
733 elts_same_2, [S16; S32]; | |
734 Vmul, | |
735 [Saturating; Rounding; Doubling; High_half; | |
736 Instruction_name ["vqrdmulh"]], | |
737 All (3, Dreg), "vqRdmulh", | |
738 elts_same_2, [S16; S32]; | |
739 Vmul, | |
740 [Saturating; Rounding; Doubling; High_half; | |
741 Instruction_name ["vqrdmulh"]], | |
742 All (3, Qreg), "vqRdmulhQ", | |
743 elts_same_2, [S16; S32]; | |
744 Vmul, [], Long, "vmull", elts_same_2, P8 :: su_8_32; | |
745 Vmul, [Saturating; Doubling], Long, "vqdmull", elts_same_2, [S16; S32]; | |
746 | |
747 (* Multiply-accumulate. *) | |
748 Vmla, [], All (3, Dreg), "vmla", sign_invar_io, F32 :: su_8_32; | |
749 Vmla, [], All (3, Qreg), "vmlaQ", sign_invar_io, F32 :: su_8_32; | |
750 Vmla, [], Long, "vmlal", elts_same_io, su_8_32; | |
751 Vmla, [Saturating; Doubling], Long, "vqdmlal", elts_same_io, [S16; S32]; | |
752 | |
753 (* Multiply-subtract. *) | |
754 Vmls, [], All (3, Dreg), "vmls", sign_invar_io, F32 :: su_8_32; | |
755 Vmls, [], All (3, Qreg), "vmlsQ", sign_invar_io, F32 :: su_8_32; | |
756 Vmls, [], Long, "vmlsl", elts_same_io, su_8_32; | |
757 Vmls, [Saturating; Doubling], Long, "vqdmlsl", elts_same_io, [S16; S32]; | |
758 | |
759 (* Subtraction. *) | |
760 Vsub, [], All (3, Dreg), "vsub", sign_invar_2, F32 :: su_8_64; | |
761 Vsub, [], All (3, Qreg), "vsubQ", sign_invar_2, F32 :: su_8_64; | |
762 Vsub, [], Long, "vsubl", elts_same_2, su_8_32; | |
763 Vsub, [], Wide, "vsubw", elts_same_2, su_8_32; | |
764 Vsub, [Halving], All (3, Dreg), "vhsub", elts_same_2, su_8_32; | |
765 Vsub, [Halving], All (3, Qreg), "vhsubQ", elts_same_2, su_8_32; | |
766 Vsub, [Saturating], All (3, Dreg), "vqsub", elts_same_2, su_8_64; | |
767 Vsub, [Saturating], All (3, Qreg), "vqsubQ", elts_same_2, su_8_64; | |
768 Vsub, [High_half], Narrow, "vsubhn", sign_invar_2, su_16_64; | |
769 Vsub, [Instruction_name ["vrsubhn"]; Rounding; High_half], | |
770 Narrow, "vRsubhn", sign_invar_2, su_16_64; | |
771 | |
772 (* Comparison, equal. *) | |
773 Vceq, [], All (3, Dreg), "vceq", cmp_sign_invar, P8 :: F32 :: su_8_32; | |
774 Vceq, [], All (3, Qreg), "vceqQ", cmp_sign_invar, P8 :: F32 :: su_8_32; | |
775 | |
776 (* Comparison, greater-than or equal. *) | |
777 Vcge, [], All (3, Dreg), "vcge", cmp_sign_matters, F32 :: su_8_32; | |
778 Vcge, [], All (3, Qreg), "vcgeQ", cmp_sign_matters, F32 :: su_8_32; | |
779 | |
780 (* Comparison, less-than or equal. *) | |
781 Vcle, [Flipped "vcge"], All (3, Dreg), "vcle", cmp_sign_matters, | |
782 F32 :: su_8_32; | |
783 Vcle, [Instruction_name ["vcge"]; Flipped "vcgeQ"], | |
784 All (3, Qreg), "vcleQ", cmp_sign_matters, | |
785 F32 :: su_8_32; | |
786 | |
787 (* Comparison, greater-than. *) | |
788 Vcgt, [], All (3, Dreg), "vcgt", cmp_sign_matters, F32 :: su_8_32; | |
789 Vcgt, [], All (3, Qreg), "vcgtQ", cmp_sign_matters, F32 :: su_8_32; | |
790 | |
791 (* Comparison, less-than. *) | |
792 Vclt, [Flipped "vcgt"], All (3, Dreg), "vclt", cmp_sign_matters, | |
793 F32 :: su_8_32; | |
794 Vclt, [Instruction_name ["vcgt"]; Flipped "vcgtQ"], | |
795 All (3, Qreg), "vcltQ", cmp_sign_matters, | |
796 F32 :: su_8_32; | |
797 | |
798 (* Compare absolute greater-than or equal. *) | |
799 Vcage, [Instruction_name ["vacge"]], | |
800 All (3, Dreg), "vcage", cmp_sign_matters, [F32]; | |
801 Vcage, [Instruction_name ["vacge"]], | |
802 All (3, Qreg), "vcageQ", cmp_sign_matters, [F32]; | |
803 | |
804 (* Compare absolute less-than or equal. *) | |
805 Vcale, [Instruction_name ["vacge"]; Flipped "vcage"], | |
806 All (3, Dreg), "vcale", cmp_sign_matters, [F32]; | |
807 Vcale, [Instruction_name ["vacge"]; Flipped "vcageQ"], | |
808 All (3, Qreg), "vcaleQ", cmp_sign_matters, [F32]; | |
809 | |
810 (* Compare absolute greater-than or equal. *) | |
811 Vcagt, [Instruction_name ["vacgt"]], | |
812 All (3, Dreg), "vcagt", cmp_sign_matters, [F32]; | |
813 Vcagt, [Instruction_name ["vacgt"]], | |
814 All (3, Qreg), "vcagtQ", cmp_sign_matters, [F32]; | |
815 | |
816 (* Compare absolute less-than or equal. *) | |
817 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagt"], | |
818 All (3, Dreg), "vcalt", cmp_sign_matters, [F32]; | |
819 Vcalt, [Instruction_name ["vacgt"]; Flipped "vcagtQ"], | |
820 All (3, Qreg), "vcaltQ", cmp_sign_matters, [F32]; | |
821 | |
822 (* Test bits. *) | |
823 Vtst, [], All (3, Dreg), "vtst", cmp_bits, P8 :: su_8_32; | |
824 Vtst, [], All (3, Qreg), "vtstQ", cmp_bits, P8 :: su_8_32; | |
825 | |
826 (* Absolute difference. *) | |
827 Vabd, [], All (3, Dreg), "vabd", elts_same_2, F32 :: su_8_32; | |
828 Vabd, [], All (3, Qreg), "vabdQ", elts_same_2, F32 :: su_8_32; | |
829 Vabd, [], Long, "vabdl", elts_same_2, su_8_32; | |
830 | |
831 (* Absolute difference and accumulate. *) | |
832 Vaba, [], All (3, Dreg), "vaba", elts_same_io, su_8_32; | |
833 Vaba, [], All (3, Qreg), "vabaQ", elts_same_io, su_8_32; | |
834 Vaba, [], Long, "vabal", elts_same_io, su_8_32; | |
835 | |
836 (* Max. *) | |
837 Vmax, [], All (3, Dreg), "vmax", elts_same_2, F32 :: su_8_32; | |
838 Vmax, [], All (3, Qreg), "vmaxQ", elts_same_2, F32 :: su_8_32; | |
839 | |
840 (* Min. *) | |
841 Vmin, [], All (3, Dreg), "vmin", elts_same_2, F32 :: su_8_32; | |
842 Vmin, [], All (3, Qreg), "vminQ", elts_same_2, F32 :: su_8_32; | |
843 | |
844 (* Pairwise add. *) | |
845 Vpadd, [], All (3, Dreg), "vpadd", sign_invar_2, F32 :: su_8_32; | |
846 Vpadd, [], Long_noreg Dreg, "vpaddl", elts_same_1, su_8_32; | |
847 Vpadd, [], Long_noreg Qreg, "vpaddlQ", elts_same_1, su_8_32; | |
848 | |
849 (* Pairwise add, widen and accumulate. *) | |
850 Vpada, [], Wide_noreg Dreg, "vpadal", elts_same_2, su_8_32; | |
851 Vpada, [], Wide_noreg Qreg, "vpadalQ", elts_same_2, su_8_32; | |
852 | |
853 (* Folding maximum, minimum. *) | |
854 Vpmax, [], All (3, Dreg), "vpmax", elts_same_2, F32 :: su_8_32; | |
855 Vpmin, [], All (3, Dreg), "vpmin", elts_same_2, F32 :: su_8_32; | |
856 | |
857 (* Reciprocal step. *) | |
858 Vrecps, [], All (3, Dreg), "vrecps", elts_same_2, [F32]; | |
859 Vrecps, [], All (3, Qreg), "vrecpsQ", elts_same_2, [F32]; | |
860 Vrsqrts, [], All (3, Dreg), "vrsqrts", elts_same_2, [F32]; | |
861 Vrsqrts, [], All (3, Qreg), "vrsqrtsQ", elts_same_2, [F32]; | |
862 | |
863 (* Vector shift left. *) | |
864 Vshl, [], All (3, Dreg), "vshl", reg_shift, su_8_64; | |
865 Vshl, [], All (3, Qreg), "vshlQ", reg_shift, su_8_64; | |
866 Vshl, [Instruction_name ["vrshl"]; Rounding], | |
867 All (3, Dreg), "vRshl", reg_shift, su_8_64; | |
868 Vshl, [Instruction_name ["vrshl"]; Rounding], | |
869 All (3, Qreg), "vRshlQ", reg_shift, su_8_64; | |
870 Vshl, [Saturating], All (3, Dreg), "vqshl", reg_shift, su_8_64; | |
871 Vshl, [Saturating], All (3, Qreg), "vqshlQ", reg_shift, su_8_64; | |
872 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], | |
873 All (3, Dreg), "vqRshl", reg_shift, su_8_64; | |
874 Vshl, [Instruction_name ["vqrshl"]; Saturating; Rounding], | |
875 All (3, Qreg), "vqRshlQ", reg_shift, su_8_64; | |
876 | |
877 (* Vector shift right by constant. *) | |
878 Vshr_n, [], Binary_imm Dreg, "vshr_n", shift_right, su_8_64; | |
879 Vshr_n, [], Binary_imm Qreg, "vshrQ_n", shift_right, su_8_64; | |
880 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Dreg, | |
881 "vRshr_n", shift_right, su_8_64; | |
882 Vshr_n, [Instruction_name ["vrshr"]; Rounding], Binary_imm Qreg, | |
883 "vRshrQ_n", shift_right, su_8_64; | |
884 Vshr_n, [], Narrow_imm, "vshrn_n", shift_right_sign_invar, su_16_64; | |
885 Vshr_n, [Instruction_name ["vrshrn"]; Rounding], Narrow_imm, "vRshrn_n", | |
886 shift_right_sign_invar, su_16_64; | |
887 Vshr_n, [Saturating], Narrow_imm, "vqshrn_n", shift_right, su_16_64; | |
888 Vshr_n, [Instruction_name ["vqrshrn"]; Saturating; Rounding], Narrow_imm, | |
889 "vqRshrn_n", shift_right, su_16_64; | |
890 Vshr_n, [Saturating; Dst_unsign], Narrow_imm, "vqshrun_n", | |
891 shift_right_to_uns, [S16; S32; S64]; | |
892 Vshr_n, [Instruction_name ["vqrshrun"]; Saturating; Dst_unsign; Rounding], | |
893 Narrow_imm, "vqRshrun_n", shift_right_to_uns, [S16; S32; S64]; | |
894 | |
895 (* Vector shift left by constant. *) | |
896 Vshl_n, [], Binary_imm Dreg, "vshl_n", shift_left_sign_invar, su_8_64; | |
897 Vshl_n, [], Binary_imm Qreg, "vshlQ_n", shift_left_sign_invar, su_8_64; | |
898 Vshl_n, [Saturating], Binary_imm Dreg, "vqshl_n", shift_left, su_8_64; | |
899 Vshl_n, [Saturating], Binary_imm Qreg, "vqshlQ_n", shift_left, su_8_64; | |
900 Vshl_n, [Saturating; Dst_unsign], Binary_imm Dreg, "vqshlu_n", | |
901 shift_left_to_uns, [S8; S16; S32; S64]; | |
902 Vshl_n, [Saturating; Dst_unsign], Binary_imm Qreg, "vqshluQ_n", | |
903 shift_left_to_uns, [S8; S16; S32; S64]; | |
904 Vshl_n, [], Long_imm, "vshll_n", shift_left, su_8_32; | |
905 | |
906 (* Vector shift right by constant and accumulate. *) | |
907 Vsra_n, [], Binary_imm Dreg, "vsra_n", shift_right_acc, su_8_64; | |
908 Vsra_n, [], Binary_imm Qreg, "vsraQ_n", shift_right_acc, su_8_64; | |
909 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Dreg, | |
910 "vRsra_n", shift_right_acc, su_8_64; | |
911 Vsra_n, [Instruction_name ["vrsra"]; Rounding], Binary_imm Qreg, | |
912 "vRsraQ_n", shift_right_acc, su_8_64; | |
913 | |
914 (* Vector shift right and insert. *) | |
915 Vsri, [], Use_operands [| Dreg; Dreg; Immed |], "vsri_n", shift_insert, | |
916 P8 :: P16 :: su_8_64; | |
917 Vsri, [], Use_operands [| Qreg; Qreg; Immed |], "vsriQ_n", shift_insert, | |
918 P8 :: P16 :: su_8_64; | |
919 | |
920 (* Vector shift left and insert. *) | |
921 Vsli, [], Use_operands [| Dreg; Dreg; Immed |], "vsli_n", shift_insert, | |
922 P8 :: P16 :: su_8_64; | |
923 Vsli, [], Use_operands [| Qreg; Qreg; Immed |], "vsliQ_n", shift_insert, | |
924 P8 :: P16 :: su_8_64; | |
925 | |
926 (* Absolute value. *) | |
927 Vabs, [], All (2, Dreg), "vabs", elts_same_1, [S8; S16; S32; F32]; | |
928 Vabs, [], All (2, Qreg), "vabsQ", elts_same_1, [S8; S16; S32; F32]; | |
929 Vabs, [Saturating], All (2, Dreg), "vqabs", elts_same_1, [S8; S16; S32]; | |
930 Vabs, [Saturating], All (2, Qreg), "vqabsQ", elts_same_1, [S8; S16; S32]; | |
931 | |
932 (* Negate. *) | |
933 Vneg, [], All (2, Dreg), "vneg", elts_same_1, [S8; S16; S32; F32]; | |
934 Vneg, [], All (2, Qreg), "vnegQ", elts_same_1, [S8; S16; S32; F32]; | |
935 Vneg, [Saturating], All (2, Dreg), "vqneg", elts_same_1, [S8; S16; S32]; | |
936 Vneg, [Saturating], All (2, Qreg), "vqnegQ", elts_same_1, [S8; S16; S32]; | |
937 | |
938 (* Bitwise not. *) | |
939 Vmvn, [], All (2, Dreg), "vmvn", notype_1, P8 :: su_8_32; | |
940 Vmvn, [], All (2, Qreg), "vmvnQ", notype_1, P8 :: su_8_32; | |
941 | |
942 (* Count leading sign bits. *) | |
943 Vcls, [], All (2, Dreg), "vcls", elts_same_1, [S8; S16; S32]; | |
944 Vcls, [], All (2, Qreg), "vclsQ", elts_same_1, [S8; S16; S32]; | |
945 | |
946 (* Count leading zeros. *) | |
947 Vclz, [], All (2, Dreg), "vclz", sign_invar_1, su_8_32; | |
948 Vclz, [], All (2, Qreg), "vclzQ", sign_invar_1, su_8_32; | |
949 | |
950 (* Count number of set bits. *) | |
951 Vcnt, [], All (2, Dreg), "vcnt", bits_1, [P8; S8; U8]; | |
952 Vcnt, [], All (2, Qreg), "vcntQ", bits_1, [P8; S8; U8]; | |
953 | |
954 (* Reciprocal estimate. *) | |
955 Vrecpe, [], All (2, Dreg), "vrecpe", elts_same_1, [U32; F32]; | |
956 Vrecpe, [], All (2, Qreg), "vrecpeQ", elts_same_1, [U32; F32]; | |
957 | |
958 (* Reciprocal square-root estimate. *) | |
959 Vrsqrte, [], All (2, Dreg), "vrsqrte", elts_same_1, [U32; F32]; | |
960 Vrsqrte, [], All (2, Qreg), "vrsqrteQ", elts_same_1, [U32; F32]; | |
961 | |
962 (* Get lanes from a vector. *) | |
963 Vget_lane, | |
964 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; | |
965 Instruction_name ["vmov"]], | |
966 Use_operands [| Corereg; Dreg; Immed |], | |
967 "vget_lane", get_lane, pf_su_8_32; | |
968 Vget_lane, | |
969 [InfoWord; | |
970 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; | |
971 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
972 Use_operands [| Corereg; Dreg; Immed |], | |
973 "vget_lane", notype_2, [S64; U64]; | |
974 Vget_lane, | |
975 [InfoWord; Disassembles_as [Use_operands [| Corereg; Element_of_dreg |]]; | |
976 Instruction_name ["vmov"]], | |
977 Use_operands [| Corereg; Qreg; Immed |], | |
978 "vgetQ_lane", get_lane, pf_su_8_32; | |
979 Vget_lane, | |
980 [InfoWord; | |
981 Disassembles_as [Use_operands [| Corereg; Corereg; Dreg |]]; | |
982 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
983 Use_operands [| Corereg; Qreg; Immed |], | |
984 "vgetQ_lane", notype_2, [S64; U64]; | |
985 | |
986 (* Set lanes in a vector. *) | |
987 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; | |
988 Instruction_name ["vmov"]], | |
989 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", | |
990 set_lane, pf_su_8_32; | |
991 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; | |
992 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
993 Use_operands [| Dreg; Corereg; Dreg; Immed |], "vset_lane", | |
994 set_lane_notype, [S64; U64]; | |
995 Vset_lane, [Disassembles_as [Use_operands [| Element_of_dreg; Corereg |]]; | |
996 Instruction_name ["vmov"]], | |
997 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", | |
998 set_lane, pf_su_8_32; | |
999 Vset_lane, [Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]; | |
1000 Instruction_name ["vmov"]; Const_valuator (fun _ -> 0)], | |
1001 Use_operands [| Qreg; Corereg; Qreg; Immed |], "vsetQ_lane", | |
1002 set_lane_notype, [S64; U64]; | |
1003 | |
1004 (* Create vector from literal bit pattern. *) | |
1005 Vcreate, | |
1006 [No_op], (* Not really, but it can yield various things that are too | |
1007 hard for the test generator at this time. *) | |
1008 Use_operands [| Dreg; Corereg |], "vcreate", create_vector, | |
1009 pf_su_8_64; | |
1010 | |
1011 (* Set all lanes to the same value. *) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1012 Vdup_n, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1013 [Disassembles_as [Use_operands [| Dreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1014 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1015 Element_of_dreg ] |]]], |
0 | 1016 Use_operands [| Dreg; Corereg |], "vdup_n", bits_1, |
1017 pf_su_8_32; | |
1018 Vdup_n, | |
1019 [Instruction_name ["vmov"]; | |
1020 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], | |
1021 Use_operands [| Dreg; Corereg |], "vdup_n", notype_1, | |
1022 [S64; U64]; | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1023 Vdup_n, |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1024 [Disassembles_as [Use_operands [| Qreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1025 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1026 Element_of_dreg ] |]]], |
0 | 1027 Use_operands [| Qreg; Corereg |], "vdupQ_n", bits_1, |
1028 pf_su_8_32; | |
1029 Vdup_n, | |
1030 [Instruction_name ["vmov"]; | |
1031 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; | |
1032 Use_operands [| Dreg; Corereg; Corereg |]]], | |
1033 Use_operands [| Qreg; Corereg |], "vdupQ_n", notype_1, | |
1034 [S64; U64]; | |
1035 | |
1036 (* These are just aliases for the above. *) | |
1037 Vmov_n, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1038 [Builtin_name "vdup_n"; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1039 Disassembles_as [Use_operands [| Dreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1040 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1041 Element_of_dreg ] |]]], |
0 | 1042 Use_operands [| Dreg; Corereg |], |
1043 "vmov_n", bits_1, pf_su_8_32; | |
1044 Vmov_n, | |
1045 [Builtin_name "vdup_n"; | |
1046 Instruction_name ["vmov"]; | |
1047 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]]], | |
1048 Use_operands [| Dreg; Corereg |], | |
1049 "vmov_n", notype_1, [S64; U64]; | |
1050 Vmov_n, | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1051 [Builtin_name "vdupQ_n"; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1052 Disassembles_as [Use_operands [| Qreg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1053 Alternatives [ Corereg; |
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1054 Element_of_dreg ] |]]], |
0 | 1055 Use_operands [| Qreg; Corereg |], |
1056 "vmovQ_n", bits_1, pf_su_8_32; | |
1057 Vmov_n, | |
1058 [Builtin_name "vdupQ_n"; | |
1059 Instruction_name ["vmov"]; | |
1060 Disassembles_as [Use_operands [| Dreg; Corereg; Corereg |]; | |
1061 Use_operands [| Dreg; Corereg; Corereg |]]], | |
1062 Use_operands [| Qreg; Corereg |], | |
1063 "vmovQ_n", notype_1, [S64; U64]; | |
1064 | |
1065 (* Duplicate, lane version. We can't use Use_operands here because the | |
1066 rightmost register (always Dreg) would be picked up by find_key_operand, | |
1067 when we want the leftmost register to be used in this case (otherwise | |
1068 the modes are indistinguishable in neon.md, etc. *) | |
1069 Vdup_lane, | |
1070 [Disassembles_as [Use_operands [| Dreg; Element_of_dreg |]]], | |
1071 Unary_scalar Dreg, "vdup_lane", bits_2, pf_su_8_32; | |
1072 Vdup_lane, | |
1073 [No_op; Const_valuator (fun _ -> 0)], | |
1074 Unary_scalar Dreg, "vdup_lane", bits_2, [S64; U64]; | |
1075 Vdup_lane, | |
1076 [Disassembles_as [Use_operands [| Qreg; Element_of_dreg |]]], | |
1077 Unary_scalar Qreg, "vdupQ_lane", bits_2, pf_su_8_32; | |
1078 Vdup_lane, | |
1079 [No_op; Const_valuator (fun _ -> 0)], | |
1080 Unary_scalar Qreg, "vdupQ_lane", bits_2, [S64; U64]; | |
1081 | |
1082 (* Combining vectors. *) | |
1083 Vcombine, [No_op], | |
1084 Use_operands [| Qreg; Dreg; Dreg |], "vcombine", notype_2, | |
1085 pf_su_8_64; | |
1086 | |
1087 (* Splitting vectors. *) | |
1088 Vget_high, [No_op], | |
1089 Use_operands [| Dreg; Qreg |], "vget_high", | |
1090 notype_1, pf_su_8_64; | |
1091 Vget_low, [Instruction_name ["vmov"]; | |
1092 Disassembles_as [Use_operands [| Dreg; Dreg |]]], | |
1093 Use_operands [| Dreg; Qreg |], "vget_low", | |
1094 notype_1, pf_su_8_64; | |
1095 | |
1096 (* Conversions. *) | |
1097 Vcvt, [InfoWord], All (2, Dreg), "vcvt", conv_1, | |
1098 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1099 Vcvt, [InfoWord], All (2, Qreg), "vcvtQ", conv_1, | |
1100 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1101 Vcvt_n, [InfoWord], Use_operands [| Dreg; Dreg; Immed |], "vcvt_n", conv_2, | |
1102 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1103 Vcvt_n, [InfoWord], Use_operands [| Qreg; Qreg; Immed |], "vcvtQ_n", conv_2, | |
1104 [Conv (S32, F32); Conv (U32, F32); Conv (F32, S32); Conv (F32, U32)]; | |
1105 | |
1106 (* Move, narrowing. *) | |
1107 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]], | |
1108 Narrow, "vmovn", sign_invar_1, su_16_64; | |
1109 Vmovn, [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating], | |
1110 Narrow, "vqmovn", elts_same_1, su_16_64; | |
1111 Vmovn, | |
1112 [Disassembles_as [Use_operands [| Dreg; Qreg |]]; Saturating; Dst_unsign], | |
1113 Narrow, "vqmovun", dst_unsign_1, | |
1114 [S16; S32; S64]; | |
1115 | |
1116 (* Move, long. *) | |
1117 Vmovl, [Disassembles_as [Use_operands [| Qreg; Dreg |]]], | |
1118 Long, "vmovl", elts_same_1, su_8_32; | |
1119 | |
1120 (* Table lookup. *) | |
1121 Vtbl 1, | |
1122 [Instruction_name ["vtbl"]; | |
1123 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], | |
1124 Use_operands [| Dreg; Dreg; Dreg |], "vtbl1", table_2, [U8; S8; P8]; | |
1125 Vtbl 2, [Instruction_name ["vtbl"]], | |
1126 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbl2", table_2, | |
1127 [U8; S8; P8]; | |
1128 Vtbl 3, [Instruction_name ["vtbl"]], | |
1129 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbl3", table_2, | |
1130 [U8; S8; P8]; | |
1131 Vtbl 4, [Instruction_name ["vtbl"]], | |
1132 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbl4", table_2, | |
1133 [U8; S8; P8]; | |
1134 | |
1135 (* Extended table lookup. *) | |
1136 Vtbx 1, | |
1137 [Instruction_name ["vtbx"]; | |
1138 Disassembles_as [Use_operands [| Dreg; VecArray (1, Dreg); Dreg |]]], | |
1139 Use_operands [| Dreg; Dreg; Dreg |], "vtbx1", table_io, [U8; S8; P8]; | |
1140 Vtbx 2, [Instruction_name ["vtbx"]], | |
1141 Use_operands [| Dreg; VecArray (2, Dreg); Dreg |], "vtbx2", table_io, | |
1142 [U8; S8; P8]; | |
1143 Vtbx 3, [Instruction_name ["vtbx"]], | |
1144 Use_operands [| Dreg; VecArray (3, Dreg); Dreg |], "vtbx3", table_io, | |
1145 [U8; S8; P8]; | |
1146 Vtbx 4, [Instruction_name ["vtbx"]], | |
1147 Use_operands [| Dreg; VecArray (4, Dreg); Dreg |], "vtbx4", table_io, | |
1148 [U8; S8; P8]; | |
1149 | |
1150 (* Multiply, lane. (note: these were undocumented at the time of | |
1151 writing). *) | |
1152 Vmul_lane, [], By_scalar Dreg, "vmul_lane", sign_invar_2_lane, | |
1153 [S16; S32; U16; U32; F32]; | |
1154 Vmul_lane, [], By_scalar Qreg, "vmulQ_lane", sign_invar_2_lane, | |
1155 [S16; S32; U16; U32; F32]; | |
1156 | |
1157 (* Multiply-accumulate, lane. *) | |
1158 Vmla_lane, [], By_scalar Dreg, "vmla_lane", sign_invar_io_lane, | |
1159 [S16; S32; U16; U32; F32]; | |
1160 Vmla_lane, [], By_scalar Qreg, "vmlaQ_lane", sign_invar_io_lane, | |
1161 [S16; S32; U16; U32; F32]; | |
1162 Vmla_lane, [], Wide_lane, "vmlal_lane", elts_same_io_lane, | |
1163 [S16; S32; U16; U32]; | |
1164 Vmla_lane, [Saturating; Doubling], Wide_lane, "vqdmlal_lane", | |
1165 elts_same_io_lane, [S16; S32]; | |
1166 | |
1167 (* Multiply-subtract, lane. *) | |
1168 Vmls_lane, [], By_scalar Dreg, "vmls_lane", sign_invar_io_lane, | |
1169 [S16; S32; U16; U32; F32]; | |
1170 Vmls_lane, [], By_scalar Qreg, "vmlsQ_lane", sign_invar_io_lane, | |
1171 [S16; S32; U16; U32; F32]; | |
1172 Vmls_lane, [], Wide_lane, "vmlsl_lane", elts_same_io_lane, | |
1173 [S16; S32; U16; U32]; | |
1174 Vmls_lane, [Saturating; Doubling], Wide_lane, "vqdmlsl_lane", | |
1175 elts_same_io_lane, [S16; S32]; | |
1176 | |
1177 (* Long multiply, lane. *) | |
1178 Vmull_lane, [], | |
1179 Wide_lane, "vmull_lane", elts_same_2_lane, [S16; S32; U16; U32]; | |
1180 | |
1181 (* Saturating doubling long multiply, lane. *) | |
1182 Vqdmull_lane, [Saturating; Doubling], | |
1183 Wide_lane, "vqdmull_lane", elts_same_2_lane, [S16; S32]; | |
1184 | |
1185 (* Saturating doubling long multiply high, lane. *) | |
1186 Vqdmulh_lane, [Saturating; Halving], | |
1187 By_scalar Qreg, "vqdmulhQ_lane", elts_same_2_lane, [S16; S32]; | |
1188 Vqdmulh_lane, [Saturating; Halving], | |
1189 By_scalar Dreg, "vqdmulh_lane", elts_same_2_lane, [S16; S32]; | |
1190 Vqdmulh_lane, [Saturating; Halving; Rounding; | |
1191 Instruction_name ["vqrdmulh"]], | |
1192 By_scalar Qreg, "vqRdmulhQ_lane", elts_same_2_lane, [S16; S32]; | |
1193 Vqdmulh_lane, [Saturating; Halving; Rounding; | |
1194 Instruction_name ["vqrdmulh"]], | |
1195 By_scalar Dreg, "vqRdmulh_lane", elts_same_2_lane, [S16; S32]; | |
1196 | |
1197 (* Vector multiply by scalar. *) | |
1198 Vmul_n, [InfoWord; | |
1199 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1200 Use_operands [| Dreg; Dreg; Corereg |], "vmul_n", | |
1201 sign_invar_2, [S16; S32; U16; U32; F32]; | |
1202 Vmul_n, [InfoWord; | |
1203 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1204 Use_operands [| Qreg; Qreg; Corereg |], "vmulQ_n", | |
1205 sign_invar_2, [S16; S32; U16; U32; F32]; | |
1206 | |
1207 (* Vector long multiply by scalar. *) | |
1208 Vmull_n, [Instruction_name ["vmull"]; | |
1209 Disassembles_as [Use_operands [| Qreg; Dreg; Element_of_dreg |]]], | |
1210 Wide_scalar, "vmull_n", | |
1211 elts_same_2, [S16; S32; U16; U32]; | |
1212 | |
1213 (* Vector saturating doubling long multiply by scalar. *) | |
1214 Vqdmull_n, [Saturating; Doubling; | |
1215 Disassembles_as [Use_operands [| Qreg; Dreg; | |
1216 Element_of_dreg |]]], | |
1217 Wide_scalar, "vqdmull_n", | |
1218 elts_same_2, [S16; S32]; | |
1219 | |
1220 (* Vector saturating doubling long multiply high by scalar. *) | |
1221 Vqdmulh_n, | |
1222 [Saturating; Halving; InfoWord; | |
1223 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1224 Use_operands [| Qreg; Qreg; Corereg |], | |
1225 "vqdmulhQ_n", elts_same_2, [S16; S32]; | |
1226 Vqdmulh_n, | |
1227 [Saturating; Halving; InfoWord; | |
1228 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1229 Use_operands [| Dreg; Dreg; Corereg |], | |
1230 "vqdmulh_n", elts_same_2, [S16; S32]; | |
1231 Vqdmulh_n, | |
1232 [Saturating; Halving; Rounding; InfoWord; | |
1233 Instruction_name ["vqrdmulh"]; | |
1234 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1235 Use_operands [| Qreg; Qreg; Corereg |], | |
1236 "vqRdmulhQ_n", elts_same_2, [S16; S32]; | |
1237 Vqdmulh_n, | |
1238 [Saturating; Halving; Rounding; InfoWord; | |
1239 Instruction_name ["vqrdmulh"]; | |
1240 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1241 Use_operands [| Dreg; Dreg; Corereg |], | |
1242 "vqRdmulh_n", elts_same_2, [S16; S32]; | |
1243 | |
1244 (* Vector multiply-accumulate by scalar. *) | |
1245 Vmla_n, [InfoWord; | |
1246 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1247 Use_operands [| Dreg; Dreg; Corereg |], "vmla_n", | |
1248 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1249 Vmla_n, [InfoWord; | |
1250 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1251 Use_operands [| Qreg; Qreg; Corereg |], "vmlaQ_n", | |
1252 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1253 Vmla_n, [], Wide_scalar, "vmlal_n", elts_same_io, [S16; S32; U16; U32]; | |
1254 Vmla_n, [Saturating; Doubling], Wide_scalar, "vqdmlal_n", elts_same_io, | |
1255 [S16; S32]; | |
1256 | |
1257 (* Vector multiply subtract by scalar. *) | |
1258 Vmls_n, [InfoWord; | |
1259 Disassembles_as [Use_operands [| Dreg; Dreg; Element_of_dreg |]]], | |
1260 Use_operands [| Dreg; Dreg; Corereg |], "vmls_n", | |
1261 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1262 Vmls_n, [InfoWord; | |
1263 Disassembles_as [Use_operands [| Qreg; Qreg; Element_of_dreg |]]], | |
1264 Use_operands [| Qreg; Qreg; Corereg |], "vmlsQ_n", | |
1265 sign_invar_io, [S16; S32; U16; U32; F32]; | |
1266 Vmls_n, [], Wide_scalar, "vmlsl_n", elts_same_io, [S16; S32; U16; U32]; | |
1267 Vmls_n, [Saturating; Doubling], Wide_scalar, "vqdmlsl_n", elts_same_io, | |
1268 [S16; S32]; | |
1269 | |
1270 (* Vector extract. *) | |
1271 Vext, [Const_valuator (fun _ -> 0)], | |
1272 Use_operands [| Dreg; Dreg; Dreg; Immed |], "vext", extend, | |
1273 pf_su_8_64; | |
1274 Vext, [Const_valuator (fun _ -> 0)], | |
1275 Use_operands [| Qreg; Qreg; Qreg; Immed |], "vextQ", extend, | |
1276 pf_su_8_64; | |
1277 | |
1278 (* Reverse elements. *) | |
1279 Vrev64, [], All (2, Dreg), "vrev64", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1280 Vrev64, [], All (2, Qreg), "vrev64Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1281 Vrev32, [], All (2, Dreg), "vrev32", bits_1, [P8; P16; S8; U8; S16; U16]; | |
1282 Vrev32, [], All (2, Qreg), "vrev32Q", bits_1, [P8; P16; S8; U8; S16; U16]; | |
1283 Vrev16, [], All (2, Dreg), "vrev16", bits_1, [P8; S8; U8]; | |
1284 Vrev16, [], All (2, Qreg), "vrev16Q", bits_1, [P8; S8; U8]; | |
1285 | |
1286 (* Bit selection. *) | |
1287 Vbsl, | |
1288 [Instruction_name ["vbsl"; "vbit"; "vbif"]; | |
1289 Disassembles_as [Use_operands [| Dreg; Dreg; Dreg |]]], | |
1290 Use_operands [| Dreg; Dreg; Dreg; Dreg |], "vbsl", bit_select, | |
1291 pf_su_8_64; | |
1292 Vbsl, | |
1293 [Instruction_name ["vbsl"; "vbit"; "vbif"]; | |
1294 Disassembles_as [Use_operands [| Qreg; Qreg; Qreg |]]], | |
1295 Use_operands [| Qreg; Qreg; Qreg; Qreg |], "vbslQ", bit_select, | |
1296 pf_su_8_64; | |
1297 | |
1298 (* Transpose elements. **NOTE** ReturnPtr goes some of the way towards | |
1299 generating good code for intrinsics which return structure types -- | |
1300 builtins work well by themselves (and understand that the values being | |
1301 stored on e.g. the stack also reside in registers, so can optimise the | |
1302 stores away entirely if the results are used immediately), but | |
1303 intrinsics are very much less efficient. Maybe something can be improved | |
1304 re: inlining, or tweaking the ABI used for intrinsics (a special call | |
1305 attribute?). | |
1306 *) | |
1307 Vtrn, [ReturnPtr], Pair_result Dreg, "vtrn", bits_2, pf_su_8_32; | |
1308 Vtrn, [ReturnPtr], Pair_result Qreg, "vtrnQ", bits_2, pf_su_8_32; | |
1309 | |
1310 (* Zip elements. *) | |
1311 Vzip, [ReturnPtr], Pair_result Dreg, "vzip", bits_2, pf_su_8_32; | |
1312 Vzip, [ReturnPtr], Pair_result Qreg, "vzipQ", bits_2, pf_su_8_32; | |
1313 | |
1314 (* Unzip elements. *) | |
1315 Vuzp, [ReturnPtr], Pair_result Dreg, "vuzp", bits_2, pf_su_8_32; | |
1316 Vuzp, [ReturnPtr], Pair_result Qreg, "vuzpQ", bits_2, pf_su_8_32; | |
1317 | |
1318 (* Element/structure loads. VLD1 variants. *) | |
1319 Vldx 1, | |
1320 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1321 CstPtrTo Corereg |]]], | |
1322 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1", bits_1, | |
1323 pf_su_8_64; | |
1324 Vldx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1325 CstPtrTo Corereg |]]], | |
1326 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q", bits_1, | |
1327 pf_su_8_64; | |
1328 | |
1329 Vldx_lane 1, | |
1330 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1331 CstPtrTo Corereg |]]], | |
1332 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], | |
1333 "vld1_lane", bits_3, pf_su_8_32; | |
1334 Vldx_lane 1, | |
1335 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1336 CstPtrTo Corereg |]]; | |
1337 Const_valuator (fun _ -> 0)], | |
1338 Use_operands [| Dreg; CstPtrTo Corereg; Dreg; Immed |], | |
1339 "vld1_lane", bits_3, [S64; U64]; | |
1340 Vldx_lane 1, | |
1341 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1342 CstPtrTo Corereg |]]], | |
1343 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], | |
1344 "vld1Q_lane", bits_3, pf_su_8_32; | |
1345 Vldx_lane 1, | |
1346 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1347 CstPtrTo Corereg |]]], | |
1348 Use_operands [| Qreg; CstPtrTo Corereg; Qreg; Immed |], | |
1349 "vld1Q_lane", bits_3, [S64; U64]; | |
1350 | |
1351 Vldx_dup 1, | |
1352 [Disassembles_as [Use_operands [| VecArray (1, All_elements_of_dreg); | |
1353 CstPtrTo Corereg |]]], | |
1354 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", | |
1355 bits_1, pf_su_8_32; | |
1356 Vldx_dup 1, | |
1357 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1358 CstPtrTo Corereg |]]], | |
1359 Use_operands [| Dreg; CstPtrTo Corereg |], "vld1_dup", | |
1360 bits_1, [S64; U64]; | |
1361 Vldx_dup 1, | |
1362 [Disassembles_as [Use_operands [| VecArray (2, All_elements_of_dreg); | |
1363 CstPtrTo Corereg |]]], | |
1364 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", | |
1365 bits_1, pf_su_8_32; | |
1366 Vldx_dup 1, | |
1367 [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1368 CstPtrTo Corereg |]]], | |
1369 Use_operands [| Qreg; CstPtrTo Corereg |], "vld1Q_dup", | |
1370 bits_1, [S64; U64]; | |
1371 | |
1372 (* VST1 variants. *) | |
1373 Vstx 1, [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1374 PtrTo Corereg |]]], | |
1375 Use_operands [| PtrTo Corereg; Dreg |], "vst1", | |
1376 store_1, pf_su_8_64; | |
1377 Vstx 1, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1378 PtrTo Corereg |]]], | |
1379 Use_operands [| PtrTo Corereg; Qreg |], "vst1Q", | |
1380 store_1, pf_su_8_64; | |
1381 | |
1382 Vstx_lane 1, | |
1383 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1384 CstPtrTo Corereg |]]], | |
1385 Use_operands [| PtrTo Corereg; Dreg; Immed |], | |
1386 "vst1_lane", store_3, pf_su_8_32; | |
1387 Vstx_lane 1, | |
1388 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1389 CstPtrTo Corereg |]]; | |
1390 Const_valuator (fun _ -> 0)], | |
1391 Use_operands [| PtrTo Corereg; Dreg; Immed |], | |
1392 "vst1_lane", store_3, [U64; S64]; | |
1393 Vstx_lane 1, | |
1394 [Disassembles_as [Use_operands [| VecArray (1, Element_of_dreg); | |
1395 CstPtrTo Corereg |]]], | |
1396 Use_operands [| PtrTo Corereg; Qreg; Immed |], | |
1397 "vst1Q_lane", store_3, pf_su_8_32; | |
1398 Vstx_lane 1, | |
1399 [Disassembles_as [Use_operands [| VecArray (1, Dreg); | |
1400 CstPtrTo Corereg |]]], | |
1401 Use_operands [| PtrTo Corereg; Qreg; Immed |], | |
1402 "vst1Q_lane", store_3, [U64; S64]; | |
1403 | |
1404 (* VLD2 variants. *) | |
1405 Vldx 2, [], Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1406 "vld2", bits_1, pf_su_8_32; | |
1407 Vldx 2, [Instruction_name ["vld1"]], | |
1408 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1409 "vld2", bits_1, [S64; U64]; | |
1410 Vldx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1411 CstPtrTo Corereg |]; | |
1412 Use_operands [| VecArray (2, Dreg); | |
1413 CstPtrTo Corereg |]]], | |
1414 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg |], | |
1415 "vld2Q", bits_1, pf_su_8_32; | |
1416 | |
1417 Vldx_lane 2, | |
1418 [Disassembles_as [Use_operands | |
1419 [| VecArray (2, Element_of_dreg); | |
1420 CstPtrTo Corereg |]]], | |
1421 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg; | |
1422 VecArray (2, Dreg); Immed |], | |
1423 "vld2_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1424 Vldx_lane 2, | |
1425 [Disassembles_as [Use_operands | |
1426 [| VecArray (2, Element_of_dreg); | |
1427 CstPtrTo Corereg |]]], | |
1428 Use_operands [| VecArray (2, Qreg); CstPtrTo Corereg; | |
1429 VecArray (2, Qreg); Immed |], | |
1430 "vld2Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1431 | |
1432 Vldx_dup 2, | |
1433 [Disassembles_as [Use_operands | |
1434 [| VecArray (2, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1435 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1436 "vld2_dup", bits_1, pf_su_8_32; | |
1437 Vldx_dup 2, | |
1438 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1439 [| VecArray (2, Dreg); CstPtrTo Corereg |]]], | |
1440 Use_operands [| VecArray (2, Dreg); CstPtrTo Corereg |], | |
1441 "vld2_dup", bits_1, [S64; U64]; | |
1442 | |
1443 (* VST2 variants. *) | |
1444 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1445 PtrTo Corereg |]]], | |
1446 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", | |
1447 store_1, pf_su_8_32; | |
1448 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1449 PtrTo Corereg |]]; | |
1450 Instruction_name ["vst1"]], | |
1451 Use_operands [| PtrTo Corereg; VecArray (2, Dreg) |], "vst2", | |
1452 store_1, [S64; U64]; | |
1453 Vstx 2, [Disassembles_as [Use_operands [| VecArray (2, Dreg); | |
1454 PtrTo Corereg |]; | |
1455 Use_operands [| VecArray (2, Dreg); | |
1456 PtrTo Corereg |]]], | |
1457 Use_operands [| PtrTo Corereg; VecArray (2, Qreg) |], "vst2Q", | |
1458 store_1, pf_su_8_32; | |
1459 | |
1460 Vstx_lane 2, | |
1461 [Disassembles_as [Use_operands | |
1462 [| VecArray (2, Element_of_dreg); | |
1463 CstPtrTo Corereg |]]], | |
1464 Use_operands [| PtrTo Corereg; VecArray (2, Dreg); Immed |], "vst2_lane", | |
1465 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1466 Vstx_lane 2, | |
1467 [Disassembles_as [Use_operands | |
1468 [| VecArray (2, Element_of_dreg); | |
1469 CstPtrTo Corereg |]]], | |
1470 Use_operands [| PtrTo Corereg; VecArray (2, Qreg); Immed |], "vst2Q_lane", | |
1471 store_3, [P16; F32; U16; U32; S16; S32]; | |
1472 | |
1473 (* VLD3 variants. *) | |
1474 Vldx 3, [], Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1475 "vld3", bits_1, pf_su_8_32; | |
1476 Vldx 3, [Instruction_name ["vld1"]], | |
1477 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1478 "vld3", bits_1, [S64; U64]; | |
1479 Vldx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); | |
1480 CstPtrTo Corereg |]; | |
1481 Use_operands [| VecArray (3, Dreg); | |
1482 CstPtrTo Corereg |]]], | |
1483 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg |], | |
1484 "vld3Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1485 | |
1486 Vldx_lane 3, | |
1487 [Disassembles_as [Use_operands | |
1488 [| VecArray (3, Element_of_dreg); | |
1489 CstPtrTo Corereg |]]], | |
1490 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg; | |
1491 VecArray (3, Dreg); Immed |], | |
1492 "vld3_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1493 Vldx_lane 3, | |
1494 [Disassembles_as [Use_operands | |
1495 [| VecArray (3, Element_of_dreg); | |
1496 CstPtrTo Corereg |]]], | |
1497 Use_operands [| VecArray (3, Qreg); CstPtrTo Corereg; | |
1498 VecArray (3, Qreg); Immed |], | |
1499 "vld3Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1500 | |
1501 Vldx_dup 3, | |
1502 [Disassembles_as [Use_operands | |
1503 [| VecArray (3, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1504 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1505 "vld3_dup", bits_1, pf_su_8_32; | |
1506 Vldx_dup 3, | |
1507 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1508 [| VecArray (3, Dreg); CstPtrTo Corereg |]]], | |
1509 Use_operands [| VecArray (3, Dreg); CstPtrTo Corereg |], | |
1510 "vld3_dup", bits_1, [S64; U64]; | |
1511 | |
1512 (* VST3 variants. *) | |
1513 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1514 PtrTo Corereg |]]], | |
1515 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", | |
1516 store_1, pf_su_8_32; | |
1517 Vstx 3, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1518 PtrTo Corereg |]]; | |
1519 Instruction_name ["vst1"]], | |
1520 Use_operands [| PtrTo Corereg; VecArray (3, Dreg) |], "vst3", | |
1521 store_1, [S64; U64]; | |
1522 Vstx 3, [Disassembles_as [Use_operands [| VecArray (3, Dreg); | |
1523 PtrTo Corereg |]; | |
1524 Use_operands [| VecArray (3, Dreg); | |
1525 PtrTo Corereg |]]], | |
1526 Use_operands [| PtrTo Corereg; VecArray (3, Qreg) |], "vst3Q", | |
1527 store_1, pf_su_8_32; | |
1528 | |
1529 Vstx_lane 3, | |
1530 [Disassembles_as [Use_operands | |
1531 [| VecArray (3, Element_of_dreg); | |
1532 CstPtrTo Corereg |]]], | |
1533 Use_operands [| PtrTo Corereg; VecArray (3, Dreg); Immed |], "vst3_lane", | |
1534 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1535 Vstx_lane 3, | |
1536 [Disassembles_as [Use_operands | |
1537 [| VecArray (3, Element_of_dreg); | |
1538 CstPtrTo Corereg |]]], | |
1539 Use_operands [| PtrTo Corereg; VecArray (3, Qreg); Immed |], "vst3Q_lane", | |
1540 store_3, [P16; F32; U16; U32; S16; S32]; | |
1541 | |
1542 (* VLD4/VST4 variants. *) | |
1543 Vldx 4, [], Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1544 "vld4", bits_1, pf_su_8_32; | |
1545 Vldx 4, [Instruction_name ["vld1"]], | |
1546 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1547 "vld4", bits_1, [S64; U64]; | |
1548 Vldx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1549 CstPtrTo Corereg |]; | |
1550 Use_operands [| VecArray (4, Dreg); | |
1551 CstPtrTo Corereg |]]], | |
1552 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg |], | |
1553 "vld4Q", bits_1, P8 :: P16 :: F32 :: su_8_32; | |
1554 | |
1555 Vldx_lane 4, | |
1556 [Disassembles_as [Use_operands | |
1557 [| VecArray (4, Element_of_dreg); | |
1558 CstPtrTo Corereg |]]], | |
1559 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg; | |
1560 VecArray (4, Dreg); Immed |], | |
1561 "vld4_lane", bits_3, P8 :: P16 :: F32 :: su_8_32; | |
1562 Vldx_lane 4, | |
1563 [Disassembles_as [Use_operands | |
1564 [| VecArray (4, Element_of_dreg); | |
1565 CstPtrTo Corereg |]]], | |
1566 Use_operands [| VecArray (4, Qreg); CstPtrTo Corereg; | |
1567 VecArray (4, Qreg); Immed |], | |
1568 "vld4Q_lane", bits_3, [P16; F32; U16; U32; S16; S32]; | |
1569 | |
1570 Vldx_dup 4, | |
1571 [Disassembles_as [Use_operands | |
1572 [| VecArray (4, All_elements_of_dreg); CstPtrTo Corereg |]]], | |
1573 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1574 "vld4_dup", bits_1, pf_su_8_32; | |
1575 Vldx_dup 4, | |
1576 [Instruction_name ["vld1"]; Disassembles_as [Use_operands | |
1577 [| VecArray (4, Dreg); CstPtrTo Corereg |]]], | |
1578 Use_operands [| VecArray (4, Dreg); CstPtrTo Corereg |], | |
1579 "vld4_dup", bits_1, [S64; U64]; | |
1580 | |
1581 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1582 PtrTo Corereg |]]], | |
1583 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", | |
1584 store_1, pf_su_8_32; | |
1585 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1586 PtrTo Corereg |]]; | |
1587 Instruction_name ["vst1"]], | |
1588 Use_operands [| PtrTo Corereg; VecArray (4, Dreg) |], "vst4", | |
1589 store_1, [S64; U64]; | |
1590 Vstx 4, [Disassembles_as [Use_operands [| VecArray (4, Dreg); | |
1591 PtrTo Corereg |]; | |
1592 Use_operands [| VecArray (4, Dreg); | |
1593 PtrTo Corereg |]]], | |
1594 Use_operands [| PtrTo Corereg; VecArray (4, Qreg) |], "vst4Q", | |
1595 store_1, pf_su_8_32; | |
1596 | |
1597 Vstx_lane 4, | |
1598 [Disassembles_as [Use_operands | |
1599 [| VecArray (4, Element_of_dreg); | |
1600 CstPtrTo Corereg |]]], | |
1601 Use_operands [| PtrTo Corereg; VecArray (4, Dreg); Immed |], "vst4_lane", | |
1602 store_3, P8 :: P16 :: F32 :: su_8_32; | |
1603 Vstx_lane 4, | |
1604 [Disassembles_as [Use_operands | |
1605 [| VecArray (4, Element_of_dreg); | |
1606 CstPtrTo Corereg |]]], | |
1607 Use_operands [| PtrTo Corereg; VecArray (4, Qreg); Immed |], "vst4Q_lane", | |
1608 store_3, [P16; F32; U16; U32; S16; S32]; | |
1609 | |
1610 (* Logical operations. And. *) | |
1611 Vand, [], All (3, Dreg), "vand", notype_2, su_8_64; | |
1612 Vand, [], All (3, Qreg), "vandQ", notype_2, su_8_64; | |
1613 | |
1614 (* Or. *) | |
1615 Vorr, [], All (3, Dreg), "vorr", notype_2, su_8_64; | |
1616 Vorr, [], All (3, Qreg), "vorrQ", notype_2, su_8_64; | |
1617 | |
1618 (* Eor. *) | |
1619 Veor, [], All (3, Dreg), "veor", notype_2, su_8_64; | |
1620 Veor, [], All (3, Qreg), "veorQ", notype_2, su_8_64; | |
1621 | |
1622 (* Bic (And-not). *) | |
1623 Vbic, [], All (3, Dreg), "vbic", notype_2, su_8_64; | |
1624 Vbic, [], All (3, Qreg), "vbicQ", notype_2, su_8_64; | |
1625 | |
1626 (* Or-not. *) | |
1627 Vorn, [], All (3, Dreg), "vorn", notype_2, su_8_64; | |
1628 Vorn, [], All (3, Qreg), "vornQ", notype_2, su_8_64; | |
1629 ] | |
1630 | |
1631 let reinterp = | |
1632 let elems = P8 :: P16 :: F32 :: su_8_64 in | |
1633 List.fold_right | |
1634 (fun convto acc -> | |
1635 let types = List.fold_right | |
1636 (fun convfrom acc -> | |
1637 if convfrom <> convto then | |
1638 Cast (convto, convfrom) :: acc | |
1639 else | |
1640 acc) | |
1641 elems | |
1642 [] | |
1643 in | |
1644 let dconv = Vreinterp, [No_op], Use_operands [| Dreg; Dreg |], | |
1645 "vreinterpret", conv_1, types | |
1646 and qconv = Vreinterp, [No_op], Use_operands [| Qreg; Qreg |], | |
1647 "vreinterpretQ", conv_1, types in | |
1648 dconv :: qconv :: acc) | |
1649 elems | |
1650 [] | |
1651 | |
1652 (* Output routines. *) | |
1653 | |
1654 let rec string_of_elt = function | |
1655 S8 -> "s8" | S16 -> "s16" | S32 -> "s32" | S64 -> "s64" | |
1656 | U8 -> "u8" | U16 -> "u16" | U32 -> "u32" | U64 -> "u64" | |
1657 | I8 -> "i8" | I16 -> "i16" | I32 -> "i32" | I64 -> "i64" | |
1658 | B8 -> "8" | B16 -> "16" | B32 -> "32" | B64 -> "64" | |
1659 | F32 -> "f32" | P8 -> "p8" | P16 -> "p16" | |
1660 | Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "_" ^ string_of_elt b | |
1661 | NoElts -> failwith "No elts" | |
1662 | |
1663 let string_of_elt_dots elt = | |
1664 match elt with | |
1665 Conv (a, b) | Cast (a, b) -> string_of_elt a ^ "." ^ string_of_elt b | |
1666 | _ -> string_of_elt elt | |
1667 | |
1668 let string_of_vectype vt = | |
1669 let rec name affix = function | |
1670 T_int8x8 -> affix "int8x8" | |
1671 | T_int8x16 -> affix "int8x16" | |
1672 | T_int16x4 -> affix "int16x4" | |
1673 | T_int16x8 -> affix "int16x8" | |
1674 | T_int32x2 -> affix "int32x2" | |
1675 | T_int32x4 -> affix "int32x4" | |
1676 | T_int64x1 -> affix "int64x1" | |
1677 | T_int64x2 -> affix "int64x2" | |
1678 | T_uint8x8 -> affix "uint8x8" | |
1679 | T_uint8x16 -> affix "uint8x16" | |
1680 | T_uint16x4 -> affix "uint16x4" | |
1681 | T_uint16x8 -> affix "uint16x8" | |
1682 | T_uint32x2 -> affix "uint32x2" | |
1683 | T_uint32x4 -> affix "uint32x4" | |
1684 | T_uint64x1 -> affix "uint64x1" | |
1685 | T_uint64x2 -> affix "uint64x2" | |
1686 | T_float32x2 -> affix "float32x2" | |
1687 | T_float32x4 -> affix "float32x4" | |
1688 | T_poly8x8 -> affix "poly8x8" | |
1689 | T_poly8x16 -> affix "poly8x16" | |
1690 | T_poly16x4 -> affix "poly16x4" | |
1691 | T_poly16x8 -> affix "poly16x8" | |
1692 | T_int8 -> affix "int8" | |
1693 | T_int16 -> affix "int16" | |
1694 | T_int32 -> affix "int32" | |
1695 | T_int64 -> affix "int64" | |
1696 | T_uint8 -> affix "uint8" | |
1697 | T_uint16 -> affix "uint16" | |
1698 | T_uint32 -> affix "uint32" | |
1699 | T_uint64 -> affix "uint64" | |
1700 | T_poly8 -> affix "poly8" | |
1701 | T_poly16 -> affix "poly16" | |
1702 | T_float32 -> affix "float32" | |
1703 | T_immediate _ -> "const int" | |
1704 | T_void -> "void" | |
1705 | T_intQI -> "__builtin_neon_qi" | |
1706 | T_intHI -> "__builtin_neon_hi" | |
1707 | T_intSI -> "__builtin_neon_si" | |
1708 | T_intDI -> "__builtin_neon_di" | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
1709 | T_floatSF -> "__builtin_neon_sf" |
0 | 1710 | T_arrayof (num, base) -> |
1711 let basename = name (fun x -> x) base in | |
1712 affix (Printf.sprintf "%sx%d" basename num) | |
1713 | T_ptrto x -> | |
1714 let basename = name affix x in | |
1715 Printf.sprintf "%s *" basename | |
1716 | T_const x -> | |
1717 let basename = name affix x in | |
1718 Printf.sprintf "const %s" basename | |
1719 in | |
1720 name (fun x -> x ^ "_t") vt | |
1721 | |
1722 let string_of_inttype = function | |
1723 B_TImode -> "__builtin_neon_ti" | |
1724 | B_EImode -> "__builtin_neon_ei" | |
1725 | B_OImode -> "__builtin_neon_oi" | |
1726 | B_CImode -> "__builtin_neon_ci" | |
1727 | B_XImode -> "__builtin_neon_xi" | |
1728 | |
1729 let string_of_mode = function | |
1730 V8QI -> "v8qi" | V4HI -> "v4hi" | V2SI -> "v2si" | V2SF -> "v2sf" | |
1731 | DI -> "di" | V16QI -> "v16qi" | V8HI -> "v8hi" | V4SI -> "v4si" | |
1732 | V4SF -> "v4sf" | V2DI -> "v2di" | QI -> "qi" | HI -> "hi" | SI -> "si" | |
1733 | SF -> "sf" | |
1734 | |
1735 (* Use uppercase chars for letters which form part of the intrinsic name, but | |
1736 should be omitted from the builtin name (the info is passed in an extra | |
1737 argument, instead). *) | |
1738 let intrinsic_name name = String.lowercase name | |
1739 | |
1740 (* Allow the name of the builtin to be overridden by things (e.g. Flipped) | |
1741 found in the features list. *) | |
1742 let builtin_name features name = | |
1743 let name = List.fold_right | |
1744 (fun el name -> | |
1745 match el with | |
1746 Flipped x | Builtin_name x -> x | |
1747 | _ -> name) | |
1748 features name in | |
1749 let islower x = let str = String.make 1 x in (String.lowercase str) = str | |
1750 and buf = Buffer.create (String.length name) in | |
1751 String.iter (fun c -> if islower c then Buffer.add_char buf c) name; | |
1752 Buffer.contents buf | |
1753 | |
1754 (* Transform an arity into a list of strings. *) | |
1755 let strings_of_arity a = | |
1756 match a with | |
1757 | Arity0 vt -> [string_of_vectype vt] | |
1758 | Arity1 (vt1, vt2) -> [string_of_vectype vt1; string_of_vectype vt2] | |
1759 | Arity2 (vt1, vt2, vt3) -> [string_of_vectype vt1; | |
1760 string_of_vectype vt2; | |
1761 string_of_vectype vt3] | |
1762 | Arity3 (vt1, vt2, vt3, vt4) -> [string_of_vectype vt1; | |
1763 string_of_vectype vt2; | |
1764 string_of_vectype vt3; | |
1765 string_of_vectype vt4] | |
1766 | Arity4 (vt1, vt2, vt3, vt4, vt5) -> [string_of_vectype vt1; | |
1767 string_of_vectype vt2; | |
1768 string_of_vectype vt3; | |
1769 string_of_vectype vt4; | |
1770 string_of_vectype vt5] | |
1771 | |
1772 (* Suffixes on the end of builtin names that are to be stripped in order | |
1773 to obtain the name used as an instruction. They are only stripped if | |
1774 preceded immediately by an underscore. *) | |
1775 let suffixes_to_strip = [ "n"; "lane"; "dup" ] | |
1776 | |
1777 (* Get the possible names of an instruction corresponding to a "name" from the | |
1778 ops table. This is done by getting the equivalent builtin name and | |
1779 stripping any suffixes from the list at the top of this file, unless | |
1780 the features list presents with an Instruction_name entry, in which | |
1781 case that is used; or unless the features list presents with a Flipped | |
1782 entry, in which case that is used. If both such entries are present, | |
1783 the first in the list will be chosen. *) | |
1784 let get_insn_names features name = | |
1785 let names = try | |
1786 begin | |
1787 match List.find (fun feature -> match feature with | |
1788 Instruction_name _ -> true | |
1789 | Flipped _ -> true | |
1790 | _ -> false) features | |
1791 with | |
1792 Instruction_name names -> names | |
1793 | Flipped name -> [name] | |
1794 | _ -> assert false | |
1795 end | |
1796 with Not_found -> [builtin_name features name] | |
1797 in | |
1798 begin | |
1799 List.map (fun name' -> | |
1800 try | |
1801 let underscore = String.rindex name' '_' in | |
1802 let our_suffix = String.sub name' (underscore + 1) | |
1803 ((String.length name') - underscore - 1) | |
1804 in | |
1805 let rec strip remaining_suffixes = | |
1806 match remaining_suffixes with | |
1807 [] -> name' | |
1808 | s::ss when our_suffix = s -> String.sub name' 0 underscore | |
1809 | _::ss -> strip ss | |
1810 in | |
1811 strip suffixes_to_strip | |
1812 with (Not_found | Invalid_argument _) -> name') names | |
1813 end | |
1814 | |
1815 (* Apply a function to each element of a list and then comma-separate | |
1816 the resulting strings. *) | |
1817 let rec commas f elts acc = | |
1818 match elts with | |
1819 [] -> acc | |
1820 | [elt] -> acc ^ (f elt) | |
1821 | elt::elts -> | |
1822 commas f elts (acc ^ (f elt) ^ ", ") | |
1823 | |
1824 (* Given a list of features and the shape specified in the "ops" table, apply | |
1825 a function to each possible shape that the instruction may have. | |
1826 By default, this is the "shape" entry in "ops". If the features list | |
1827 contains a Disassembles_as entry, the shapes contained in that entry are | |
1828 mapped to corresponding outputs and returned in a list. If there is more | |
1829 than one Disassembles_as entry, only the first is used. *) | |
1830 let analyze_all_shapes features shape f = | |
1831 try | |
1832 match List.find (fun feature -> | |
1833 match feature with Disassembles_as _ -> true | |
1834 | _ -> false) | |
1835 features with | |
1836 Disassembles_as shapes -> List.map f shapes | |
1837 | _ -> assert false | |
1838 with Not_found -> [f shape] | |
1839 |