Mercurial > hg > CbC > CbC_gcc
annotate gcc/config/arm/neon-gen.ml @ 55:77e2b8dfacca gcc-4.4.5
update it from 4.4.3 to 4.5.0
author | ryoma <e075725@ie.u-ryukyu.ac.jp> |
---|---|
date | Fri, 12 Feb 2010 23:39:51 +0900 |
parents | a06113de4d67 |
children |
rev | line source |
---|---|
0 | 1 (* Auto-generate ARM Neon intrinsics header file. |
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc. | |
3 Contributed by CodeSourcery. | |
4 | |
5 This file is part of GCC. | |
6 | |
7 GCC is free software; you can redistribute it and/or modify it under | |
8 the terms of the GNU General Public License as published by the Free | |
9 Software Foundation; either version 3, or (at your option) any later | |
10 version. | |
11 | |
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY | |
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or | |
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License | |
15 for more details. | |
16 | |
17 You should have received a copy of the GNU General Public License | |
18 along with GCC; see the file COPYING3. If not see | |
19 <http://www.gnu.org/licenses/>. | |
20 | |
21 This is an O'Caml program. The O'Caml compiler is available from: | |
22 | |
23 http://caml.inria.fr/ | |
24 | |
25 Or from your favourite OS's friendly packaging system. Tested with version | |
26 3.09.2, though other versions will probably work too. | |
27 | |
28 Compile with: | |
29 ocamlc -c neon.ml | |
30 ocamlc -o neon-gen neon.cmo neon-gen.ml | |
31 | |
32 Run with: | |
33 ./neon-gen > arm_neon.h | |
34 *) | |
35 | |
36 open Neon | |
37 | |
38 (* The format codes used in the following functions are documented at: | |
39 http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\ | |
40 #6_printflikefunctionsforprettyprinting | |
41 (one line, remove the backslash.) | |
42 *) | |
43 | |
44 (* Following functions can be used to approximate GNU indentation style. *) | |
45 let start_function () = | |
46 Format.printf "@[<v 0>"; | |
47 ref 0 | |
48 | |
49 let end_function nesting = | |
50 match !nesting with | |
51 0 -> Format.printf "@;@;@]" | |
52 | _ -> failwith ("Bad nesting (ending function at level " | |
53 ^ (string_of_int !nesting) ^ ")") | |
54 | |
55 let open_braceblock nesting = | |
56 begin match !nesting with | |
57 0 -> Format.printf "@,@<0>{@[<v 2>@," | |
58 | _ -> Format.printf "@,@[<v 2> @<0>{@[<v 2>@," | |
59 end; | |
60 incr nesting | |
61 | |
62 let close_braceblock nesting = | |
63 decr nesting; | |
64 match !nesting with | |
65 0 -> Format.printf "@]@,@<0>}" | |
66 | _ -> Format.printf "@]@,@<0>}@]" | |
67 | |
68 let print_function arity fnname body = | |
69 let ffmt = start_function () in | |
70 Format.printf "__extension__ static __inline "; | |
71 let inl = "__attribute__ ((__always_inline__))" in | |
72 begin match arity with | |
73 Arity0 ret -> | |
74 Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname | |
75 | Arity1 (ret, arg0) -> | |
76 Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname | |
77 (string_of_vectype arg0) | |
78 | Arity2 (ret, arg0, arg1) -> | |
79 Format.printf "%s %s@,%s (%s __a, %s __b)" | |
80 (string_of_vectype ret) inl fnname (string_of_vectype arg0) | |
81 (string_of_vectype arg1) | |
82 | Arity3 (ret, arg0, arg1, arg2) -> | |
83 Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)" | |
84 (string_of_vectype ret) inl fnname (string_of_vectype arg0) | |
85 (string_of_vectype arg1) (string_of_vectype arg2) | |
86 | Arity4 (ret, arg0, arg1, arg2, arg3) -> | |
87 Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)" | |
88 (string_of_vectype ret) inl fnname (string_of_vectype arg0) | |
89 (string_of_vectype arg1) (string_of_vectype arg2) | |
90 (string_of_vectype arg3) | |
91 end; | |
92 open_braceblock ffmt; | |
93 let rec print_lines = function | |
94 [] -> () | |
95 | [line] -> Format.printf "%s" line | |
96 | line::lines -> Format.printf "%s@," line; print_lines lines in | |
97 print_lines body; | |
98 close_braceblock ffmt; | |
99 end_function ffmt | |
100 | |
101 let return_by_ptr features = List.mem ReturnPtr features | |
102 | |
103 let union_string num elts base = | |
104 let itype = inttype_for_array num elts in | |
105 let iname = string_of_inttype itype | |
106 and sname = string_of_vectype (T_arrayof (num, elts)) in | |
107 Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base | |
108 | |
109 let rec signed_ctype = function | |
110 T_uint8x8 | T_poly8x8 -> T_int8x8 | |
111 | T_uint8x16 | T_poly8x16 -> T_int8x16 | |
112 | T_uint16x4 | T_poly16x4 -> T_int16x4 | |
113 | T_uint16x8 | T_poly16x8 -> T_int16x8 | |
114 | T_uint32x2 -> T_int32x2 | |
115 | T_uint32x4 -> T_int32x4 | |
116 | T_uint64x1 -> T_int64x1 | |
117 | T_uint64x2 -> T_int64x2 | |
118 (* Cast to types defined by mode in arm.c, not random types pulled in from | |
119 the <stdint.h> header in use. This fixes incompatible pointer errors when | |
120 compiling with C++. *) | |
121 | T_uint8 | T_int8 -> T_intQI | |
122 | T_uint16 | T_int16 -> T_intHI | |
123 | T_uint32 | T_int32 -> T_intSI | |
124 | T_uint64 | T_int64 -> T_intDI | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
125 | T_float32 -> T_floatSF |
0 | 126 | T_poly8 -> T_intQI |
127 | T_poly16 -> T_intHI | |
128 | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt) | |
129 | T_ptrto elt -> T_ptrto (signed_ctype elt) | |
130 | T_const elt -> T_const (signed_ctype elt) | |
131 | x -> x | |
132 | |
133 let add_cast ctype cval = | |
134 let stype = signed_ctype ctype in | |
135 if ctype <> stype then | |
136 Printf.sprintf "(%s) %s" (string_of_vectype stype) cval | |
137 else | |
138 cval | |
139 | |
140 let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")" | |
141 | |
142 (* Return a tuple of a list of declarations to go at the start of the function, | |
143 and a list of statements needed to return THING. *) | |
144 let return arity return_by_ptr thing = | |
145 match arity with | |
146 Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) | |
147 | Arity4 (ret, _, _, _, _) -> | |
148 match ret with | |
149 T_arrayof (num, vec) -> | |
150 if return_by_ptr then | |
151 let sname = string_of_vectype ret in | |
152 [Printf.sprintf "%s __rv;" sname], | |
153 [thing ^ ";"; "return __rv;"] | |
154 else | |
155 let uname = union_string num vec "__rv" in | |
156 [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"] | |
157 | T_void -> [], [thing ^ ";"] | |
158 | _ -> | |
159 [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"] | |
160 | |
161 let rec element_type ctype = | |
162 match ctype with | |
163 T_arrayof (_, v) -> element_type v | |
164 | _ -> ctype | |
165 | |
166 let params return_by_ptr ps = | |
167 let pdecls = ref [] in | |
168 let ptype t p = | |
169 match t with | |
170 T_arrayof (num, elts) -> | |
171 let uname = union_string num elts (p ^ "u") in | |
172 let decl = Printf.sprintf "%s = { %s };" uname p in | |
173 pdecls := decl :: !pdecls; | |
174 p ^ "u.__o" | |
175 | _ -> add_cast t p in | |
176 let plist = match ps with | |
177 Arity0 _ -> [] | |
178 | Arity1 (_, t1) -> [ptype t1 "__a"] | |
179 | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"] | |
180 | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"] | |
181 | Arity4 (_, t1, t2, t3, t4) -> | |
182 [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in | |
183 match ps with | |
184 Arity0 ret | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _) | |
185 | Arity4 (ret, _, _, _, _) -> | |
186 if return_by_ptr then | |
187 !pdecls, add_cast (T_ptrto (element_type ret)) "&__rv.val[0]" :: plist | |
188 else | |
189 !pdecls, plist | |
190 | |
191 let modify_params features plist = | |
192 let is_flipped = | |
193 List.exists (function Flipped _ -> true | _ -> false) features in | |
194 if is_flipped then | |
195 match plist with | |
196 [ a; b ] -> [ b; a ] | |
197 | _ -> | |
198 failwith ("Don't know how to flip args " ^ (String.concat ", " plist)) | |
199 else | |
200 plist | |
201 | |
202 (* !!! Decide whether to add an extra information word based on the shape | |
203 form. *) | |
204 let extra_word shape features paramlist bits = | |
205 let use_word = | |
206 match shape with | |
207 All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow | |
208 | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm | |
209 | Narrow_imm -> true | |
210 | _ -> List.mem InfoWord features | |
211 in | |
212 if use_word then | |
213 paramlist @ [string_of_int bits] | |
214 else | |
215 paramlist | |
216 | |
217 (* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0). | |
218 Bit 1 represents floats & polynomials (1), or ordinary integers (0). | |
219 Bit 2 represents rounding (1) vs none (0). *) | |
220 let infoword_value elttype features = | |
221 let bits01 = | |
222 match elt_class elttype with | |
223 Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001 | |
224 | Poly -> 0b010 | |
225 | Float -> 0b011 | |
226 | _ -> 0b000 | |
227 and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in | |
228 bits01 lor rounding_bit | |
229 | |
230 (* "Cast" type operations will throw an exception in mode_of_elt (actually in | |
231 elt_width, called from there). Deal with that here, and generate a suffix | |
232 with multiple modes (<to><from>). *) | |
233 let rec mode_suffix elttype shape = | |
234 try | |
235 let mode = mode_of_elt elttype shape in | |
236 string_of_mode mode | |
237 with MixedMode (dst, src) -> | |
238 let dstmode = mode_of_elt dst shape | |
239 and srcmode = mode_of_elt src shape in | |
240 string_of_mode dstmode ^ string_of_mode srcmode | |
241 | |
242 let print_variant opcode features shape name (ctype, asmtype, elttype) = | |
243 let bits = infoword_value elttype features in | |
244 let modesuf = mode_suffix elttype shape in | |
245 let return_by_ptr = return_by_ptr features in | |
246 let pdecls, paramlist = params return_by_ptr ctype in | |
247 let paramlist' = modify_params features paramlist in | |
248 let paramlist'' = extra_word shape features paramlist' bits in | |
249 let parstr = String.concat ", " paramlist'' in | |
250 let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)" | |
251 (builtin_name features name) modesuf parstr in | |
252 let rdecls, stmts = return ctype return_by_ptr builtin in | |
253 let body = pdecls @ rdecls @ stmts | |
254 and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in | |
255 print_function ctype fnname body | |
256 | |
257 (* When this function processes the element types in the ops table, it rewrites | |
258 them in a list of tuples (a,b,c): | |
259 a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8) | |
260 b : Asm type : a single, processed element type, e.g. P16. This is the | |
261 type which should be attached to the asm opcode. | |
262 c : Variant type : the unprocessed type for this variant (e.g. in add | |
263 instructions which don't care about the sign, b might be i16 and c | |
264 might be s16.) | |
265 *) | |
266 | |
267 let print_op (opcode, features, shape, name, munge, types) = | |
268 let sorted_types = List.sort compare types in | |
269 let munged_types = List.map | |
270 (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in | |
271 List.iter | |
272 (fun variant -> print_variant opcode features shape name variant) | |
273 munged_types | |
274 | |
275 let print_ops ops = | |
276 List.iter print_op ops | |
277 | |
278 (* Output type definitions. Table entries are: | |
279 cbase : "C" name for the type. | |
280 abase : "ARM" base name for the type (i.e. int in int8x8_t). | |
281 esize : element size. | |
282 enum : element count. | |
283 *) | |
284 | |
285 let deftypes () = | |
286 let typeinfo = [ | |
287 (* Doubleword vector types. *) | |
288 "__builtin_neon_qi", "int", 8, 8; | |
289 "__builtin_neon_hi", "int", 16, 4; | |
290 "__builtin_neon_si", "int", 32, 2; | |
291 "__builtin_neon_di", "int", 64, 1; | |
292 "__builtin_neon_sf", "float", 32, 2; | |
293 "__builtin_neon_poly8", "poly", 8, 8; | |
294 "__builtin_neon_poly16", "poly", 16, 4; | |
295 "__builtin_neon_uqi", "uint", 8, 8; | |
296 "__builtin_neon_uhi", "uint", 16, 4; | |
297 "__builtin_neon_usi", "uint", 32, 2; | |
298 "__builtin_neon_udi", "uint", 64, 1; | |
299 | |
300 (* Quadword vector types. *) | |
301 "__builtin_neon_qi", "int", 8, 16; | |
302 "__builtin_neon_hi", "int", 16, 8; | |
303 "__builtin_neon_si", "int", 32, 4; | |
304 "__builtin_neon_di", "int", 64, 2; | |
305 "__builtin_neon_sf", "float", 32, 4; | |
306 "__builtin_neon_poly8", "poly", 8, 16; | |
307 "__builtin_neon_poly16", "poly", 16, 8; | |
308 "__builtin_neon_uqi", "uint", 8, 16; | |
309 "__builtin_neon_uhi", "uint", 16, 8; | |
310 "__builtin_neon_usi", "uint", 32, 4; | |
311 "__builtin_neon_udi", "uint", 64, 2 | |
312 ] in | |
313 List.iter | |
314 (fun (cbase, abase, esize, enum) -> | |
315 let attr = | |
316 match enum with | |
317 1 -> "" | |
318 | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))" | |
319 (esize * enum / 8) in | |
320 Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr) | |
321 typeinfo; | |
322 Format.print_newline (); | |
323 (* Extra types not in <stdint.h>. *) | |
55
77e2b8dfacca
update it from 4.4.3 to 4.5.0
ryoma <e075725@ie.u-ryukyu.ac.jp>
parents:
0
diff
changeset
|
324 Format.printf "typedef float float32_t;\n"; |
0 | 325 Format.printf "typedef __builtin_neon_poly8 poly8_t;\n"; |
326 Format.printf "typedef __builtin_neon_poly16 poly16_t;\n" | |
327 | |
328 (* Output structs containing arrays, for load & store instructions etc. *) | |
329 | |
330 let arrtypes () = | |
331 let typeinfo = [ | |
332 "int", 8; "int", 16; | |
333 "int", 32; "int", 64; | |
334 "uint", 8; "uint", 16; | |
335 "uint", 32; "uint", 64; | |
336 "float", 32; "poly", 8; | |
337 "poly", 16 | |
338 ] in | |
339 let writestruct elname elsize regsize arrsize = | |
340 let elnum = regsize / elsize in | |
341 let structname = | |
342 Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in | |
343 let sfmt = start_function () in | |
344 Format.printf "typedef struct %s" structname; | |
345 open_braceblock sfmt; | |
346 Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize; | |
347 close_braceblock sfmt; | |
348 Format.printf " %s;" structname; | |
349 end_function sfmt; | |
350 in | |
351 for n = 2 to 4 do | |
352 List.iter | |
353 (fun (elname, elsize) -> | |
354 writestruct elname elsize 64 n; | |
355 writestruct elname elsize 128 n) | |
356 typeinfo | |
357 done | |
358 | |
359 let print_lines = List.iter (fun s -> Format.printf "%s@\n" s) | |
360 | |
361 (* Do it. *) | |
362 | |
363 let _ = | |
364 print_lines [ | |
365 "/* ARM NEON intrinsics include file. This file is generated automatically"; | |
366 " using neon-gen.ml. Please do not edit manually."; | |
367 ""; | |
368 " Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc."; | |
369 " Contributed by CodeSourcery."; | |
370 ""; | |
371 " This file is part of GCC."; | |
372 ""; | |
373 " GCC is free software; you can redistribute it and/or modify it"; | |
374 " under the terms of the GNU General Public License as published"; | |
375 " by the Free Software Foundation; either version 3, or (at your"; | |
376 " option) any later version."; | |
377 ""; | |
378 " GCC is distributed in the hope that it will be useful, but WITHOUT"; | |
379 " ANY WARRANTY; without even the implied warranty of MERCHANTABILITY"; | |
380 " or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public"; | |
381 " License for more details."; | |
382 ""; | |
383 " Under Section 7 of GPL version 3, you are granted additional"; | |
384 " permissions described in the GCC Runtime Library Exception, version"; | |
385 " 3.1, as published by the Free Software Foundation."; | |
386 ""; | |
387 " You should have received a copy of the GNU General Public License and"; | |
388 " a copy of the GCC Runtime Library Exception along with this program;"; | |
389 " see the files COPYING3 and COPYING.RUNTIME respectively. If not, see"; | |
390 " <http://www.gnu.org/licenses/>. */"; | |
391 ""; | |
392 "#ifndef _GCC_ARM_NEON_H"; | |
393 "#define _GCC_ARM_NEON_H 1"; | |
394 ""; | |
395 "#ifndef __ARM_NEON__"; | |
396 "#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h"; | |
397 "#else"; | |
398 ""; | |
399 "#ifdef __cplusplus"; | |
400 "extern \"C\" {"; | |
401 "#endif"; | |
402 ""; | |
403 "#include <stdint.h>"; | |
404 ""]; | |
405 deftypes (); | |
406 arrtypes (); | |
407 Format.print_newline (); | |
408 print_ops ops; | |
409 Format.print_newline (); | |
410 print_ops reinterp; | |
411 print_lines [ | |
412 "#ifdef __cplusplus"; | |
413 "}"; | |
414 "#endif"; | |
415 "#endif"; | |
416 "#endif"] |