comparison gcc/config/arm/neon-gen.ml @ 0:a06113de4d67

first commit
author kent <kent@cr.ie.u-ryukyu.ac.jp>
date Fri, 17 Jul 2009 14:47:48 +0900
parents
children 77e2b8dfacca
comparison
equal deleted inserted replaced
-1:000000000000 0:a06113de4d67
1 (* Auto-generate ARM Neon intrinsics header file.
2 Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.
3 Contributed by CodeSourcery.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>.
20
21 This is an O'Caml program. The O'Caml compiler is available from:
22
23 http://caml.inria.fr/
24
25 Or from your favourite OS's friendly packaging system. Tested with version
26 3.09.2, though other versions will probably work too.
27
28 Compile with:
29 ocamlc -c neon.ml
30 ocamlc -o neon-gen neon.cmo neon-gen.ml
31
32 Run with:
33 ./neon-gen > arm_neon.h
34 *)
35
36 open Neon
37
38 (* The format codes used in the following functions are documented at:
39 http://caml.inria.fr/pub/docs/manual-ocaml/libref/Format.html\
40 #6_printflikefunctionsforprettyprinting
41 (one line, remove the backslash.)
42 *)
43
44 (* Following functions can be used to approximate GNU indentation style. *)
45 let start_function () =
46 Format.printf "@[<v 0>";
47 ref 0
48
49 let end_function nesting =
50 match !nesting with
51 0 -> Format.printf "@;@;@]"
52 | _ -> failwith ("Bad nesting (ending function at level "
53 ^ (string_of_int !nesting) ^ ")")
54
55 let open_braceblock nesting =
56 begin match !nesting with
57 0 -> Format.printf "@,@<0>{@[<v 2>@,"
58 | _ -> Format.printf "@,@[<v 2> @<0>{@[<v 2>@,"
59 end;
60 incr nesting
61
62 let close_braceblock nesting =
63 decr nesting;
64 match !nesting with
65 0 -> Format.printf "@]@,@<0>}"
66 | _ -> Format.printf "@]@,@<0>}@]"
67
68 let print_function arity fnname body =
69 let ffmt = start_function () in
70 Format.printf "__extension__ static __inline ";
71 let inl = "__attribute__ ((__always_inline__))" in
72 begin match arity with
73 Arity0 ret ->
74 Format.printf "%s %s@,%s (void)" (string_of_vectype ret) inl fnname
75 | Arity1 (ret, arg0) ->
76 Format.printf "%s %s@,%s (%s __a)" (string_of_vectype ret) inl fnname
77 (string_of_vectype arg0)
78 | Arity2 (ret, arg0, arg1) ->
79 Format.printf "%s %s@,%s (%s __a, %s __b)"
80 (string_of_vectype ret) inl fnname (string_of_vectype arg0)
81 (string_of_vectype arg1)
82 | Arity3 (ret, arg0, arg1, arg2) ->
83 Format.printf "%s %s@,%s (%s __a, %s __b, %s __c)"
84 (string_of_vectype ret) inl fnname (string_of_vectype arg0)
85 (string_of_vectype arg1) (string_of_vectype arg2)
86 | Arity4 (ret, arg0, arg1, arg2, arg3) ->
87 Format.printf "%s %s@,%s (%s __a, %s __b, %s __c, %s __d)"
88 (string_of_vectype ret) inl fnname (string_of_vectype arg0)
89 (string_of_vectype arg1) (string_of_vectype arg2)
90 (string_of_vectype arg3)
91 end;
92 open_braceblock ffmt;
93 let rec print_lines = function
94 [] -> ()
95 | [line] -> Format.printf "%s" line
96 | line::lines -> Format.printf "%s@," line; print_lines lines in
97 print_lines body;
98 close_braceblock ffmt;
99 end_function ffmt
100
101 let return_by_ptr features = List.mem ReturnPtr features
102
103 let union_string num elts base =
104 let itype = inttype_for_array num elts in
105 let iname = string_of_inttype itype
106 and sname = string_of_vectype (T_arrayof (num, elts)) in
107 Printf.sprintf "union { %s __i; %s __o; } %s" sname iname base
108
109 let rec signed_ctype = function
110 T_uint8x8 | T_poly8x8 -> T_int8x8
111 | T_uint8x16 | T_poly8x16 -> T_int8x16
112 | T_uint16x4 | T_poly16x4 -> T_int16x4
113 | T_uint16x8 | T_poly16x8 -> T_int16x8
114 | T_uint32x2 -> T_int32x2
115 | T_uint32x4 -> T_int32x4
116 | T_uint64x1 -> T_int64x1
117 | T_uint64x2 -> T_int64x2
118 (* Cast to types defined by mode in arm.c, not random types pulled in from
119 the <stdint.h> header in use. This fixes incompatible pointer errors when
120 compiling with C++. *)
121 | T_uint8 | T_int8 -> T_intQI
122 | T_uint16 | T_int16 -> T_intHI
123 | T_uint32 | T_int32 -> T_intSI
124 | T_uint64 | T_int64 -> T_intDI
125 | T_poly8 -> T_intQI
126 | T_poly16 -> T_intHI
127 | T_arrayof (n, elt) -> T_arrayof (n, signed_ctype elt)
128 | T_ptrto elt -> T_ptrto (signed_ctype elt)
129 | T_const elt -> T_const (signed_ctype elt)
130 | x -> x
131
132 let add_cast ctype cval =
133 let stype = signed_ctype ctype in
134 if ctype <> stype then
135 Printf.sprintf "(%s) %s" (string_of_vectype stype) cval
136 else
137 cval
138
139 let cast_for_return to_ty = "(" ^ (string_of_vectype to_ty) ^ ")"
140
141 (* Return a tuple of a list of declarations to go at the start of the function,
142 and a list of statements needed to return THING. *)
143 let return arity return_by_ptr thing =
144 match arity with
145 Arity0 (ret) | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
146 | Arity4 (ret, _, _, _, _) ->
147 match ret with
148 T_arrayof (num, vec) ->
149 if return_by_ptr then
150 let sname = string_of_vectype ret in
151 [Printf.sprintf "%s __rv;" sname],
152 [thing ^ ";"; "return __rv;"]
153 else
154 let uname = union_string num vec "__rv" in
155 [uname ^ ";"], ["__rv.__o = " ^ thing ^ ";"; "return __rv.__i;"]
156 | T_void -> [], [thing ^ ";"]
157 | _ ->
158 [], ["return " ^ (cast_for_return ret) ^ thing ^ ";"]
159
160 let rec element_type ctype =
161 match ctype with
162 T_arrayof (_, v) -> element_type v
163 | _ -> ctype
164
165 let params return_by_ptr ps =
166 let pdecls = ref [] in
167 let ptype t p =
168 match t with
169 T_arrayof (num, elts) ->
170 let uname = union_string num elts (p ^ "u") in
171 let decl = Printf.sprintf "%s = { %s };" uname p in
172 pdecls := decl :: !pdecls;
173 p ^ "u.__o"
174 | _ -> add_cast t p in
175 let plist = match ps with
176 Arity0 _ -> []
177 | Arity1 (_, t1) -> [ptype t1 "__a"]
178 | Arity2 (_, t1, t2) -> [ptype t1 "__a"; ptype t2 "__b"]
179 | Arity3 (_, t1, t2, t3) -> [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"]
180 | Arity4 (_, t1, t2, t3, t4) ->
181 [ptype t1 "__a"; ptype t2 "__b"; ptype t3 "__c"; ptype t4 "__d"] in
182 match ps with
183 Arity0 ret | Arity1 (ret, _) | Arity2 (ret, _, _) | Arity3 (ret, _, _, _)
184 | Arity4 (ret, _, _, _, _) ->
185 if return_by_ptr then
186 !pdecls, add_cast (T_ptrto (element_type ret)) "&__rv.val[0]" :: plist
187 else
188 !pdecls, plist
189
190 let modify_params features plist =
191 let is_flipped =
192 List.exists (function Flipped _ -> true | _ -> false) features in
193 if is_flipped then
194 match plist with
195 [ a; b ] -> [ b; a ]
196 | _ ->
197 failwith ("Don't know how to flip args " ^ (String.concat ", " plist))
198 else
199 plist
200
201 (* !!! Decide whether to add an extra information word based on the shape
202 form. *)
203 let extra_word shape features paramlist bits =
204 let use_word =
205 match shape with
206 All _ | Long | Long_noreg _ | Wide | Wide_noreg _ | Narrow
207 | By_scalar _ | Wide_scalar | Wide_lane | Binary_imm _ | Long_imm
208 | Narrow_imm -> true
209 | _ -> List.mem InfoWord features
210 in
211 if use_word then
212 paramlist @ [string_of_int bits]
213 else
214 paramlist
215
216 (* Bit 0 represents signed (1) vs unsigned (0), or float (1) vs poly (0).
217 Bit 1 represents floats & polynomials (1), or ordinary integers (0).
218 Bit 2 represents rounding (1) vs none (0). *)
219 let infoword_value elttype features =
220 let bits01 =
221 match elt_class elttype with
222 Signed | ConvClass (Signed, _) | ConvClass (_, Signed) -> 0b001
223 | Poly -> 0b010
224 | Float -> 0b011
225 | _ -> 0b000
226 and rounding_bit = if List.mem Rounding features then 0b100 else 0b000 in
227 bits01 lor rounding_bit
228
229 (* "Cast" type operations will throw an exception in mode_of_elt (actually in
230 elt_width, called from there). Deal with that here, and generate a suffix
231 with multiple modes (<to><from>). *)
232 let rec mode_suffix elttype shape =
233 try
234 let mode = mode_of_elt elttype shape in
235 string_of_mode mode
236 with MixedMode (dst, src) ->
237 let dstmode = mode_of_elt dst shape
238 and srcmode = mode_of_elt src shape in
239 string_of_mode dstmode ^ string_of_mode srcmode
240
241 let print_variant opcode features shape name (ctype, asmtype, elttype) =
242 let bits = infoword_value elttype features in
243 let modesuf = mode_suffix elttype shape in
244 let return_by_ptr = return_by_ptr features in
245 let pdecls, paramlist = params return_by_ptr ctype in
246 let paramlist' = modify_params features paramlist in
247 let paramlist'' = extra_word shape features paramlist' bits in
248 let parstr = String.concat ", " paramlist'' in
249 let builtin = Printf.sprintf "__builtin_neon_%s%s (%s)"
250 (builtin_name features name) modesuf parstr in
251 let rdecls, stmts = return ctype return_by_ptr builtin in
252 let body = pdecls @ rdecls @ stmts
253 and fnname = (intrinsic_name name) ^ "_" ^ (string_of_elt elttype) in
254 print_function ctype fnname body
255
256 (* When this function processes the element types in the ops table, it rewrites
257 them in a list of tuples (a,b,c):
258 a : C type as an "arity", e.g. Arity1 (T_poly8x8, T_poly8x8)
259 b : Asm type : a single, processed element type, e.g. P16. This is the
260 type which should be attached to the asm opcode.
261 c : Variant type : the unprocessed type for this variant (e.g. in add
262 instructions which don't care about the sign, b might be i16 and c
263 might be s16.)
264 *)
265
266 let print_op (opcode, features, shape, name, munge, types) =
267 let sorted_types = List.sort compare types in
268 let munged_types = List.map
269 (fun elt -> let c, asm = munge shape elt in c, asm, elt) sorted_types in
270 List.iter
271 (fun variant -> print_variant opcode features shape name variant)
272 munged_types
273
274 let print_ops ops =
275 List.iter print_op ops
276
277 (* Output type definitions. Table entries are:
278 cbase : "C" name for the type.
279 abase : "ARM" base name for the type (i.e. int in int8x8_t).
280 esize : element size.
281 enum : element count.
282 *)
283
284 let deftypes () =
285 let typeinfo = [
286 (* Doubleword vector types. *)
287 "__builtin_neon_qi", "int", 8, 8;
288 "__builtin_neon_hi", "int", 16, 4;
289 "__builtin_neon_si", "int", 32, 2;
290 "__builtin_neon_di", "int", 64, 1;
291 "__builtin_neon_sf", "float", 32, 2;
292 "__builtin_neon_poly8", "poly", 8, 8;
293 "__builtin_neon_poly16", "poly", 16, 4;
294 "__builtin_neon_uqi", "uint", 8, 8;
295 "__builtin_neon_uhi", "uint", 16, 4;
296 "__builtin_neon_usi", "uint", 32, 2;
297 "__builtin_neon_udi", "uint", 64, 1;
298
299 (* Quadword vector types. *)
300 "__builtin_neon_qi", "int", 8, 16;
301 "__builtin_neon_hi", "int", 16, 8;
302 "__builtin_neon_si", "int", 32, 4;
303 "__builtin_neon_di", "int", 64, 2;
304 "__builtin_neon_sf", "float", 32, 4;
305 "__builtin_neon_poly8", "poly", 8, 16;
306 "__builtin_neon_poly16", "poly", 16, 8;
307 "__builtin_neon_uqi", "uint", 8, 16;
308 "__builtin_neon_uhi", "uint", 16, 8;
309 "__builtin_neon_usi", "uint", 32, 4;
310 "__builtin_neon_udi", "uint", 64, 2
311 ] in
312 List.iter
313 (fun (cbase, abase, esize, enum) ->
314 let attr =
315 match enum with
316 1 -> ""
317 | _ -> Printf.sprintf "\t__attribute__ ((__vector_size__ (%d)))"
318 (esize * enum / 8) in
319 Format.printf "typedef %s %s%dx%d_t%s;@\n" cbase abase esize enum attr)
320 typeinfo;
321 Format.print_newline ();
322 (* Extra types not in <stdint.h>. *)
323 Format.printf "typedef __builtin_neon_sf float32_t;\n";
324 Format.printf "typedef __builtin_neon_poly8 poly8_t;\n";
325 Format.printf "typedef __builtin_neon_poly16 poly16_t;\n"
326
327 (* Output structs containing arrays, for load & store instructions etc. *)
328
329 let arrtypes () =
330 let typeinfo = [
331 "int", 8; "int", 16;
332 "int", 32; "int", 64;
333 "uint", 8; "uint", 16;
334 "uint", 32; "uint", 64;
335 "float", 32; "poly", 8;
336 "poly", 16
337 ] in
338 let writestruct elname elsize regsize arrsize =
339 let elnum = regsize / elsize in
340 let structname =
341 Printf.sprintf "%s%dx%dx%d_t" elname elsize elnum arrsize in
342 let sfmt = start_function () in
343 Format.printf "typedef struct %s" structname;
344 open_braceblock sfmt;
345 Format.printf "%s%dx%d_t val[%d];" elname elsize elnum arrsize;
346 close_braceblock sfmt;
347 Format.printf " %s;" structname;
348 end_function sfmt;
349 in
350 for n = 2 to 4 do
351 List.iter
352 (fun (elname, elsize) ->
353 writestruct elname elsize 64 n;
354 writestruct elname elsize 128 n)
355 typeinfo
356 done
357
358 let print_lines = List.iter (fun s -> Format.printf "%s@\n" s)
359
360 (* Do it. *)
361
362 let _ =
363 print_lines [
364 "/* ARM NEON intrinsics include file. This file is generated automatically";
365 " using neon-gen.ml. Please do not edit manually.";
366 "";
367 " Copyright (C) 2006, 2007, 2009 Free Software Foundation, Inc.";
368 " Contributed by CodeSourcery.";
369 "";
370 " This file is part of GCC.";
371 "";
372 " GCC is free software; you can redistribute it and/or modify it";
373 " under the terms of the GNU General Public License as published";
374 " by the Free Software Foundation; either version 3, or (at your";
375 " option) any later version.";
376 "";
377 " GCC is distributed in the hope that it will be useful, but WITHOUT";
378 " ANY WARRANTY; without even the implied warranty of MERCHANTABILITY";
379 " or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public";
380 " License for more details.";
381 "";
382 " Under Section 7 of GPL version 3, you are granted additional";
383 " permissions described in the GCC Runtime Library Exception, version";
384 " 3.1, as published by the Free Software Foundation.";
385 "";
386 " You should have received a copy of the GNU General Public License and";
387 " a copy of the GCC Runtime Library Exception along with this program;";
388 " see the files COPYING3 and COPYING.RUNTIME respectively. If not, see";
389 " <http://www.gnu.org/licenses/>. */";
390 "";
391 "#ifndef _GCC_ARM_NEON_H";
392 "#define _GCC_ARM_NEON_H 1";
393 "";
394 "#ifndef __ARM_NEON__";
395 "#error You must enable NEON instructions (e.g. -mfloat-abi=softfp -mfpu=neon) to use arm_neon.h";
396 "#else";
397 "";
398 "#ifdef __cplusplus";
399 "extern \"C\" {";
400 "#endif";
401 "";
402 "#include <stdint.h>";
403 ""];
404 deftypes ();
405 arrtypes ();
406 Format.print_newline ();
407 print_ops ops;
408 Format.print_newline ();
409 print_ops reinterp;
410 print_lines [
411 "#ifdef __cplusplus";
412 "}";
413 "#endif";
414 "#endif";
415 "#endif"]