0
|
1 (* ARM NEON documentation generator.
|
|
2
|
|
3 Copyright (C) 2006, 2007 Free Software Foundation, Inc.
|
|
4 Contributed by CodeSourcery.
|
|
5
|
|
6 This file is part of GCC.
|
|
7
|
|
8 GCC is free software; you can redistribute it and/or modify it under
|
|
9 the terms of the GNU General Public License as published by the Free
|
|
10 Software Foundation; either version 3, or (at your option) any later
|
|
11 version.
|
|
12
|
|
13 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
|
|
14 WARRANTY; without even the implied warranty of MERCHANTABILITY or
|
|
15 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
|
|
16 for more details.
|
|
17
|
|
18 You should have received a copy of the GNU General Public License
|
|
19 along with GCC; see the file COPYING3. If not see
|
|
20 <http://www.gnu.org/licenses/>.
|
|
21
|
|
22 This is an O'Caml program. The O'Caml compiler is available from:
|
|
23
|
|
24 http://caml.inria.fr/
|
|
25
|
|
26 Or from your favourite OS's friendly packaging system. Tested with version
|
|
27 3.09.2, though other versions will probably work too.
|
|
28
|
|
29 Compile with:
|
|
30 ocamlc -c neon.ml
|
|
31 ocamlc -o neon-docgen neon.cmo neon-docgen.ml
|
|
32
|
|
33 Run with:
|
|
34 /path/to/neon-docgen /path/to/gcc/doc/arm-neon-intrinsics.texi
|
|
35 *)
|
|
36
|
|
37 open Neon
|
|
38
|
|
39 (* The combined "ops" and "reinterp" table. *)
|
|
40 let ops_reinterp = reinterp @ ops
|
|
41
|
|
42 (* Helper functions for extracting things from the "ops" table. *)
|
|
43 let single_opcode desired_opcode () =
|
|
44 List.fold_left (fun got_so_far ->
|
|
45 fun row ->
|
|
46 match row with
|
|
47 (opcode, _, _, _, _, _) ->
|
|
48 if opcode = desired_opcode then row :: got_so_far
|
|
49 else got_so_far
|
|
50 ) [] ops_reinterp
|
|
51
|
|
52 let multiple_opcodes desired_opcodes () =
|
|
53 List.fold_left (fun got_so_far ->
|
|
54 fun desired_opcode ->
|
|
55 (single_opcode desired_opcode ()) @ got_so_far)
|
|
56 [] desired_opcodes
|
|
57
|
|
58 let ldx_opcode number () =
|
|
59 List.fold_left (fun got_so_far ->
|
|
60 fun row ->
|
|
61 match row with
|
|
62 (opcode, _, _, _, _, _) ->
|
|
63 match opcode with
|
|
64 Vldx n | Vldx_lane n | Vldx_dup n when n = number ->
|
|
65 row :: got_so_far
|
|
66 | _ -> got_so_far
|
|
67 ) [] ops_reinterp
|
|
68
|
|
69 let stx_opcode number () =
|
|
70 List.fold_left (fun got_so_far ->
|
|
71 fun row ->
|
|
72 match row with
|
|
73 (opcode, _, _, _, _, _) ->
|
|
74 match opcode with
|
|
75 Vstx n | Vstx_lane n when n = number ->
|
|
76 row :: got_so_far
|
|
77 | _ -> got_so_far
|
|
78 ) [] ops_reinterp
|
|
79
|
|
80 let tbl_opcode () =
|
|
81 List.fold_left (fun got_so_far ->
|
|
82 fun row ->
|
|
83 match row with
|
|
84 (opcode, _, _, _, _, _) ->
|
|
85 match opcode with
|
|
86 Vtbl _ -> row :: got_so_far
|
|
87 | _ -> got_so_far
|
|
88 ) [] ops_reinterp
|
|
89
|
|
90 let tbx_opcode () =
|
|
91 List.fold_left (fun got_so_far ->
|
|
92 fun row ->
|
|
93 match row with
|
|
94 (opcode, _, _, _, _, _) ->
|
|
95 match opcode with
|
|
96 Vtbx _ -> row :: got_so_far
|
|
97 | _ -> got_so_far
|
|
98 ) [] ops_reinterp
|
|
99
|
|
100 (* The groups of intrinsics. *)
|
|
101 let intrinsic_groups =
|
|
102 [ "Addition", single_opcode Vadd;
|
|
103 "Multiplication", single_opcode Vmul;
|
|
104 "Multiply-accumulate", single_opcode Vmla;
|
|
105 "Multiply-subtract", single_opcode Vmls;
|
|
106 "Subtraction", single_opcode Vsub;
|
|
107 "Comparison (equal-to)", single_opcode Vceq;
|
|
108 "Comparison (greater-than-or-equal-to)", single_opcode Vcge;
|
|
109 "Comparison (less-than-or-equal-to)", single_opcode Vcle;
|
|
110 "Comparison (greater-than)", single_opcode Vcgt;
|
|
111 "Comparison (less-than)", single_opcode Vclt;
|
|
112 "Comparison (absolute greater-than-or-equal-to)", single_opcode Vcage;
|
|
113 "Comparison (absolute less-than-or-equal-to)", single_opcode Vcale;
|
|
114 "Comparison (absolute greater-than)", single_opcode Vcagt;
|
|
115 "Comparison (absolute less-than)", single_opcode Vcalt;
|
|
116 "Test bits", single_opcode Vtst;
|
|
117 "Absolute difference", single_opcode Vabd;
|
|
118 "Absolute difference and accumulate", single_opcode Vaba;
|
|
119 "Maximum", single_opcode Vmax;
|
|
120 "Minimum", single_opcode Vmin;
|
|
121 "Pairwise add", single_opcode Vpadd;
|
|
122 "Pairwise add, single_opcode widen and accumulate", single_opcode Vpada;
|
|
123 "Folding maximum", single_opcode Vpmax;
|
|
124 "Folding minimum", single_opcode Vpmin;
|
|
125 "Reciprocal step", multiple_opcodes [Vrecps; Vrsqrts];
|
|
126 "Vector shift left", single_opcode Vshl;
|
|
127 "Vector shift left by constant", single_opcode Vshl_n;
|
|
128 "Vector shift right by constant", single_opcode Vshr_n;
|
|
129 "Vector shift right by constant and accumulate", single_opcode Vsra_n;
|
|
130 "Vector shift right and insert", single_opcode Vsri;
|
|
131 "Vector shift left and insert", single_opcode Vsli;
|
|
132 "Absolute value", single_opcode Vabs;
|
|
133 "Negation", single_opcode Vneg;
|
|
134 "Bitwise not", single_opcode Vmvn;
|
|
135 "Count leading sign bits", single_opcode Vcls;
|
|
136 "Count leading zeros", single_opcode Vclz;
|
|
137 "Count number of set bits", single_opcode Vcnt;
|
|
138 "Reciprocal estimate", single_opcode Vrecpe;
|
|
139 "Reciprocal square-root estimate", single_opcode Vrsqrte;
|
|
140 "Get lanes from a vector", single_opcode Vget_lane;
|
|
141 "Set lanes in a vector", single_opcode Vset_lane;
|
|
142 "Create vector from literal bit pattern", single_opcode Vcreate;
|
|
143 "Set all lanes to the same value",
|
|
144 multiple_opcodes [Vdup_n; Vmov_n; Vdup_lane];
|
|
145 "Combining vectors", single_opcode Vcombine;
|
|
146 "Splitting vectors", multiple_opcodes [Vget_high; Vget_low];
|
|
147 "Conversions", multiple_opcodes [Vcvt; Vcvt_n];
|
|
148 "Move, single_opcode narrowing", single_opcode Vmovn;
|
|
149 "Move, single_opcode long", single_opcode Vmovl;
|
|
150 "Table lookup", tbl_opcode;
|
|
151 "Extended table lookup", tbx_opcode;
|
|
152 "Multiply, lane", single_opcode Vmul_lane;
|
|
153 "Long multiply, lane", single_opcode Vmull_lane;
|
|
154 "Saturating doubling long multiply, lane", single_opcode Vqdmull_lane;
|
|
155 "Saturating doubling multiply high, lane", single_opcode Vqdmulh_lane;
|
|
156 "Multiply-accumulate, lane", single_opcode Vmla_lane;
|
|
157 "Multiply-subtract, lane", single_opcode Vmls_lane;
|
|
158 "Vector multiply by scalar", single_opcode Vmul_n;
|
|
159 "Vector long multiply by scalar", single_opcode Vmull_n;
|
|
160 "Vector saturating doubling long multiply by scalar",
|
|
161 single_opcode Vqdmull_n;
|
|
162 "Vector saturating doubling multiply high by scalar",
|
|
163 single_opcode Vqdmulh_n;
|
|
164 "Vector multiply-accumulate by scalar", single_opcode Vmla_n;
|
|
165 "Vector multiply-subtract by scalar", single_opcode Vmls_n;
|
|
166 "Vector extract", single_opcode Vext;
|
|
167 "Reverse elements", multiple_opcodes [Vrev64; Vrev32; Vrev16];
|
|
168 "Bit selection", single_opcode Vbsl;
|
|
169 "Transpose elements", single_opcode Vtrn;
|
|
170 "Zip elements", single_opcode Vzip;
|
|
171 "Unzip elements", single_opcode Vuzp;
|
|
172 "Element/structure loads, VLD1 variants", ldx_opcode 1;
|
|
173 "Element/structure stores, VST1 variants", stx_opcode 1;
|
|
174 "Element/structure loads, VLD2 variants", ldx_opcode 2;
|
|
175 "Element/structure stores, VST2 variants", stx_opcode 2;
|
|
176 "Element/structure loads, VLD3 variants", ldx_opcode 3;
|
|
177 "Element/structure stores, VST3 variants", stx_opcode 3;
|
|
178 "Element/structure loads, VLD4 variants", ldx_opcode 4;
|
|
179 "Element/structure stores, VST4 variants", stx_opcode 4;
|
|
180 "Logical operations (AND)", single_opcode Vand;
|
|
181 "Logical operations (OR)", single_opcode Vorr;
|
|
182 "Logical operations (exclusive OR)", single_opcode Veor;
|
|
183 "Logical operations (AND-NOT)", single_opcode Vbic;
|
|
184 "Logical operations (OR-NOT)", single_opcode Vorn;
|
|
185 "Reinterpret casts", single_opcode Vreinterp ]
|
|
186
|
|
187 (* Given an intrinsic shape, produce a string to document the corresponding
|
|
188 operand shapes. *)
|
|
189 let rec analyze_shape shape =
|
|
190 let rec n_things n thing =
|
|
191 match n with
|
|
192 0 -> []
|
|
193 | n -> thing :: (n_things (n - 1) thing)
|
|
194 in
|
|
195 let rec analyze_shape_elt reg_no elt =
|
|
196 match elt with
|
|
197 Dreg -> "@var{d" ^ (string_of_int reg_no) ^ "}"
|
|
198 | Qreg -> "@var{q" ^ (string_of_int reg_no) ^ "}"
|
|
199 | Corereg -> "@var{r" ^ (string_of_int reg_no) ^ "}"
|
|
200 | Immed -> "#@var{0}"
|
|
201 | VecArray (1, elt) ->
|
|
202 let elt_regexp = analyze_shape_elt 0 elt in
|
|
203 "@{" ^ elt_regexp ^ "@}"
|
|
204 | VecArray (n, elt) ->
|
|
205 let rec f m =
|
|
206 match m with
|
|
207 0 -> []
|
|
208 | m -> (analyze_shape_elt (m - 1) elt) :: (f (m - 1))
|
|
209 in
|
|
210 let ops = List.rev (f n) in
|
|
211 "@{" ^ (commas (fun x -> x) ops "") ^ "@}"
|
|
212 | (PtrTo elt | CstPtrTo elt) ->
|
|
213 "[" ^ (analyze_shape_elt reg_no elt) ^ "]"
|
|
214 | Element_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[@var{0}]"
|
|
215 | Element_of_qreg -> (analyze_shape_elt reg_no Qreg) ^ "[@var{0}]"
|
|
216 | All_elements_of_dreg -> (analyze_shape_elt reg_no Dreg) ^ "[]"
|
|
217 in
|
|
218 match shape with
|
|
219 All (n, elt) -> commas (analyze_shape_elt 0) (n_things n elt) ""
|
|
220 | Long -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Dreg) ^
|
|
221 ", " ^ (analyze_shape_elt 0 Dreg)
|
|
222 | Long_noreg elt -> (analyze_shape_elt 0 elt) ^ ", " ^
|
|
223 (analyze_shape_elt 0 elt)
|
|
224 | Wide -> (analyze_shape_elt 0 Qreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
|
|
225 ", " ^ (analyze_shape_elt 0 Dreg)
|
|
226 | Wide_noreg elt -> analyze_shape (Long_noreg elt)
|
|
227 | Narrow -> (analyze_shape_elt 0 Dreg) ^ ", " ^ (analyze_shape_elt 0 Qreg) ^
|
|
228 ", " ^ (analyze_shape_elt 0 Qreg)
|
|
229 | Use_operands elts -> commas (analyze_shape_elt 0) (Array.to_list elts) ""
|
|
230 | By_scalar Dreg ->
|
|
231 analyze_shape (Use_operands [| Dreg; Dreg; Element_of_dreg |])
|
|
232 | By_scalar Qreg ->
|
|
233 analyze_shape (Use_operands [| Qreg; Qreg; Element_of_dreg |])
|
|
234 | By_scalar _ -> assert false
|
|
235 | Wide_lane ->
|
|
236 analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
|
|
237 | Wide_scalar ->
|
|
238 analyze_shape (Use_operands [| Qreg; Dreg; Element_of_dreg |])
|
|
239 | Pair_result elt ->
|
|
240 let elt_regexp = analyze_shape_elt 0 elt in
|
|
241 let elt_regexp' = analyze_shape_elt 1 elt in
|
|
242 elt_regexp ^ ", " ^ elt_regexp'
|
|
243 | Unary_scalar _ -> "FIXME Unary_scalar"
|
|
244 | Binary_imm elt -> analyze_shape (Use_operands [| elt; elt; Immed |])
|
|
245 | Narrow_imm -> analyze_shape (Use_operands [| Dreg; Qreg; Immed |])
|
|
246 | Long_imm -> analyze_shape (Use_operands [| Qreg; Dreg; Immed |])
|
|
247
|
|
248 (* Document a single intrinsic. *)
|
|
249 let describe_intrinsic first chan
|
|
250 (elt_ty, (_, features, shape, name, munge, _)) =
|
|
251 let c_arity, new_elt_ty = munge shape elt_ty in
|
|
252 let c_types = strings_of_arity c_arity in
|
|
253 Printf.fprintf chan "@itemize @bullet\n";
|
|
254 let item_code = if first then "@item" else "@itemx" in
|
|
255 Printf.fprintf chan "%s %s %s_%s (" item_code (List.hd c_types)
|
|
256 (intrinsic_name name) (string_of_elt elt_ty);
|
|
257 Printf.fprintf chan "%s)\n" (commas (fun ty -> ty) (List.tl c_types) "");
|
|
258 if not (List.exists (fun feature -> feature = No_op) features) then
|
|
259 begin
|
|
260 let print_one_insn name =
|
|
261 Printf.fprintf chan "@code{";
|
|
262 let no_suffix = (new_elt_ty = NoElts) in
|
|
263 let name_with_suffix =
|
|
264 if no_suffix then name
|
|
265 else name ^ "." ^ (string_of_elt_dots new_elt_ty)
|
|
266 in
|
|
267 let possible_operands = analyze_all_shapes features shape
|
|
268 analyze_shape
|
|
269 in
|
|
270 let rec print_one_possible_operand op =
|
|
271 Printf.fprintf chan "%s %s}" name_with_suffix op
|
|
272 in
|
|
273 (* If the intrinsic expands to multiple instructions, we assume
|
|
274 they are all of the same form. *)
|
|
275 print_one_possible_operand (List.hd possible_operands)
|
|
276 in
|
|
277 let rec print_insns names =
|
|
278 match names with
|
|
279 [] -> ()
|
|
280 | [name] -> print_one_insn name
|
|
281 | name::names -> (print_one_insn name;
|
|
282 Printf.fprintf chan " @emph{or} ";
|
|
283 print_insns names)
|
|
284 in
|
|
285 let insn_names = get_insn_names features name in
|
|
286 Printf.fprintf chan "@*@emph{Form of expected instruction(s):} ";
|
|
287 print_insns insn_names;
|
|
288 Printf.fprintf chan "\n"
|
|
289 end;
|
|
290 Printf.fprintf chan "@end itemize\n";
|
|
291 Printf.fprintf chan "\n\n"
|
|
292
|
|
293 (* Document a group of intrinsics. *)
|
|
294 let document_group chan (group_title, group_extractor) =
|
|
295 (* Extract the rows in question from the ops table and then turn them
|
|
296 into a list of intrinsics. *)
|
|
297 let intrinsics =
|
|
298 List.fold_left (fun got_so_far ->
|
|
299 fun row ->
|
|
300 match row with
|
|
301 (_, _, _, _, _, elt_tys) ->
|
|
302 List.fold_left (fun got_so_far' ->
|
|
303 fun elt_ty ->
|
|
304 (elt_ty, row) :: got_so_far')
|
|
305 got_so_far elt_tys
|
|
306 ) [] (group_extractor ())
|
|
307 in
|
|
308 (* Emit the title for this group. *)
|
|
309 Printf.fprintf chan "@subsubsection %s\n\n" group_title;
|
|
310 (* Emit a description of each intrinsic. *)
|
|
311 List.iter (describe_intrinsic true chan) intrinsics;
|
|
312 (* Close this group. *)
|
|
313 Printf.fprintf chan "\n\n"
|
|
314
|
|
315 let gnu_header chan =
|
|
316 List.iter (fun s -> Printf.fprintf chan "%s\n" s) [
|
|
317 "@c Copyright (C) 2006 Free Software Foundation, Inc.";
|
|
318 "@c This is part of the GCC manual.";
|
|
319 "@c For copying conditions, see the file gcc.texi.";
|
|
320 "";
|
|
321 "@c This file is generated automatically using gcc/config/arm/neon-docgen.ml";
|
|
322 "@c Please do not edit manually."]
|
|
323
|
|
324 (* Program entry point. *)
|
|
325 let _ =
|
|
326 if Array.length Sys.argv <> 2 then
|
|
327 failwith "Usage: neon-docgen <output filename>"
|
|
328 else
|
|
329 let file = Sys.argv.(1) in
|
|
330 try
|
|
331 let chan = open_out file in
|
|
332 gnu_header chan;
|
|
333 List.iter (document_group chan) intrinsic_groups;
|
|
334 close_out chan
|
|
335 with Sys_error sys ->
|
|
336 failwith ("Could not create output file " ^ file ^ ": " ^ sys)
|