111
|
1 /* Builtins' description for AArch64 SIMD architecture.
|
145
|
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
|
111
|
3 Contributed by ARM Ltd.
|
|
4
|
|
5 This file is part of GCC.
|
|
6
|
|
7 GCC is free software; you can redistribute it and/or modify it
|
|
8 under the terms of the GNU General Public License as published by
|
|
9 the Free Software Foundation; either version 3, or (at your option)
|
|
10 any later version.
|
|
11
|
|
12 GCC is distributed in the hope that it will be useful, but
|
|
13 WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
15 General Public License for more details.
|
|
16
|
|
17 You should have received a copy of the GNU General Public License
|
|
18 along with GCC; see the file COPYING3. If not see
|
|
19 <http://www.gnu.org/licenses/>. */
|
|
20
|
131
|
21 #define IN_TARGET_CODE 1
|
|
22
|
111
|
23 #include "config.h"
|
|
24 #include "system.h"
|
|
25 #include "coretypes.h"
|
|
26 #include "tm.h"
|
|
27 #include "function.h"
|
|
28 #include "basic-block.h"
|
|
29 #include "rtl.h"
|
|
30 #include "tree.h"
|
|
31 #include "gimple.h"
|
|
32 #include "memmodel.h"
|
|
33 #include "tm_p.h"
|
|
34 #include "expmed.h"
|
|
35 #include "optabs.h"
|
|
36 #include "recog.h"
|
|
37 #include "diagnostic-core.h"
|
|
38 #include "fold-const.h"
|
|
39 #include "stor-layout.h"
|
|
40 #include "explow.h"
|
|
41 #include "expr.h"
|
|
42 #include "langhooks.h"
|
|
43 #include "gimple-iterator.h"
|
|
44 #include "case-cfn-macros.h"
|
145
|
45 #include "emit-rtl.h"
|
111
|
46
|
|
47 #define v8qi_UP E_V8QImode
|
|
48 #define v4hi_UP E_V4HImode
|
|
49 #define v4hf_UP E_V4HFmode
|
|
50 #define v2si_UP E_V2SImode
|
|
51 #define v2sf_UP E_V2SFmode
|
|
52 #define v1df_UP E_V1DFmode
|
|
53 #define di_UP E_DImode
|
|
54 #define df_UP E_DFmode
|
|
55 #define v16qi_UP E_V16QImode
|
|
56 #define v8hi_UP E_V8HImode
|
|
57 #define v8hf_UP E_V8HFmode
|
|
58 #define v4si_UP E_V4SImode
|
|
59 #define v4sf_UP E_V4SFmode
|
|
60 #define v2di_UP E_V2DImode
|
|
61 #define v2df_UP E_V2DFmode
|
|
62 #define ti_UP E_TImode
|
|
63 #define oi_UP E_OImode
|
|
64 #define ci_UP E_CImode
|
|
65 #define xi_UP E_XImode
|
|
66 #define si_UP E_SImode
|
|
67 #define sf_UP E_SFmode
|
|
68 #define hi_UP E_HImode
|
|
69 #define hf_UP E_HFmode
|
|
70 #define qi_UP E_QImode
|
145
|
71 #define bf_UP E_BFmode
|
|
72 #define v4bf_UP E_V4BFmode
|
|
73 #define v8bf_UP E_V8BFmode
|
111
|
74 #define UP(X) X##_UP
|
|
75
|
|
76 #define SIMD_MAX_BUILTIN_ARGS 5
|
|
77
|
|
78 enum aarch64_type_qualifiers
|
|
79 {
|
|
80 /* T foo. */
|
|
81 qualifier_none = 0x0,
|
|
82 /* unsigned T foo. */
|
|
83 qualifier_unsigned = 0x1, /* 1 << 0 */
|
|
84 /* const T foo. */
|
|
85 qualifier_const = 0x2, /* 1 << 1 */
|
|
86 /* T *foo. */
|
|
87 qualifier_pointer = 0x4, /* 1 << 2 */
|
|
88 /* Used when expanding arguments if an operand could
|
|
89 be an immediate. */
|
|
90 qualifier_immediate = 0x8, /* 1 << 3 */
|
|
91 qualifier_maybe_immediate = 0x10, /* 1 << 4 */
|
|
92 /* void foo (...). */
|
|
93 qualifier_void = 0x20, /* 1 << 5 */
|
|
94 /* Some patterns may have internal operands, this qualifier is an
|
|
95 instruction to the initialisation code to skip this operand. */
|
|
96 qualifier_internal = 0x40, /* 1 << 6 */
|
|
97 /* Some builtins should use the T_*mode* encoded in a simd_builtin_datum
|
|
98 rather than using the type of the operand. */
|
|
99 qualifier_map_mode = 0x80, /* 1 << 7 */
|
|
100 /* qualifier_pointer | qualifier_map_mode */
|
|
101 qualifier_pointer_map_mode = 0x84,
|
|
102 /* qualifier_const | qualifier_pointer | qualifier_map_mode */
|
|
103 qualifier_const_pointer_map_mode = 0x86,
|
|
104 /* Polynomial types. */
|
|
105 qualifier_poly = 0x100,
|
|
106 /* Lane indices - must be in range, and flipped for bigendian. */
|
|
107 qualifier_lane_index = 0x200,
|
|
108 /* Lane indices for single lane structure loads and stores. */
|
145
|
109 qualifier_struct_load_store_lane_index = 0x400,
|
|
110 /* Lane indices selected in pairs. - must be in range, and flipped for
|
|
111 bigendian. */
|
|
112 qualifier_lane_pair_index = 0x800,
|
|
113 /* Lane indices selected in quadtuplets. - must be in range, and flipped for
|
|
114 bigendian. */
|
|
115 qualifier_lane_quadtup_index = 0x1000,
|
111
|
116 };
|
|
117
|
|
118 typedef struct
|
|
119 {
|
|
120 const char *name;
|
|
121 machine_mode mode;
|
|
122 const enum insn_code code;
|
|
123 unsigned int fcode;
|
|
124 enum aarch64_type_qualifiers *qualifiers;
|
|
125 } aarch64_simd_builtin_datum;
|
|
126
|
|
127 static enum aarch64_type_qualifiers
|
|
128 aarch64_types_unop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
129 = { qualifier_none, qualifier_none };
|
|
130 #define TYPES_UNOP (aarch64_types_unop_qualifiers)
|
|
131 static enum aarch64_type_qualifiers
|
|
132 aarch64_types_unopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
133 = { qualifier_unsigned, qualifier_unsigned };
|
|
134 #define TYPES_UNOPU (aarch64_types_unopu_qualifiers)
|
|
135 static enum aarch64_type_qualifiers
|
|
136 aarch64_types_unopus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
137 = { qualifier_unsigned, qualifier_none };
|
|
138 #define TYPES_UNOPUS (aarch64_types_unopus_qualifiers)
|
|
139 static enum aarch64_type_qualifiers
|
|
140 aarch64_types_binop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
141 = { qualifier_none, qualifier_none, qualifier_maybe_immediate };
|
|
142 #define TYPES_BINOP (aarch64_types_binop_qualifiers)
|
|
143 static enum aarch64_type_qualifiers
|
|
144 aarch64_types_binopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
145 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned };
|
|
146 #define TYPES_BINOPU (aarch64_types_binopu_qualifiers)
|
|
147 static enum aarch64_type_qualifiers
|
|
148 aarch64_types_binop_uus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
149 = { qualifier_unsigned, qualifier_unsigned, qualifier_none };
|
|
150 #define TYPES_BINOP_UUS (aarch64_types_binop_uus_qualifiers)
|
|
151 static enum aarch64_type_qualifiers
|
|
152 aarch64_types_binop_ssu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
153 = { qualifier_none, qualifier_none, qualifier_unsigned };
|
|
154 #define TYPES_BINOP_SSU (aarch64_types_binop_ssu_qualifiers)
|
|
155 static enum aarch64_type_qualifiers
|
|
156 aarch64_types_binop_uss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
157 = { qualifier_unsigned, qualifier_none, qualifier_none };
|
|
158 #define TYPES_BINOP_USS (aarch64_types_binop_uss_qualifiers)
|
|
159 static enum aarch64_type_qualifiers
|
|
160 aarch64_types_binopp_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
161 = { qualifier_poly, qualifier_poly, qualifier_poly };
|
|
162 #define TYPES_BINOPP (aarch64_types_binopp_qualifiers)
|
|
163
|
|
164 static enum aarch64_type_qualifiers
|
|
165 aarch64_types_ternop_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
166 = { qualifier_none, qualifier_none, qualifier_none, qualifier_none };
|
|
167 #define TYPES_TERNOP (aarch64_types_ternop_qualifiers)
|
|
168 static enum aarch64_type_qualifiers
|
|
169 aarch64_types_ternop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
170 = { qualifier_none, qualifier_none, qualifier_none, qualifier_lane_index };
|
|
171 #define TYPES_TERNOP_LANE (aarch64_types_ternop_lane_qualifiers)
|
|
172 static enum aarch64_type_qualifiers
|
|
173 aarch64_types_ternopu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
174 = { qualifier_unsigned, qualifier_unsigned,
|
|
175 qualifier_unsigned, qualifier_unsigned };
|
|
176 #define TYPES_TERNOPU (aarch64_types_ternopu_qualifiers)
|
131
|
177 static enum aarch64_type_qualifiers
|
|
178 aarch64_types_ternopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
179 = { qualifier_unsigned, qualifier_unsigned,
|
|
180 qualifier_unsigned, qualifier_immediate };
|
|
181 #define TYPES_TERNOPUI (aarch64_types_ternopu_imm_qualifiers)
|
145
|
182 static enum aarch64_type_qualifiers
|
|
183 aarch64_types_ternop_ssus_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
184 = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_none };
|
|
185 #define TYPES_TERNOP_SSUS (aarch64_types_ternop_ssus_qualifiers)
|
131
|
186
|
111
|
187
|
|
188 static enum aarch64_type_qualifiers
|
145
|
189 aarch64_types_quadop_lane_pair_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
190 = { qualifier_none, qualifier_none, qualifier_none,
|
|
191 qualifier_none, qualifier_lane_pair_index };
|
|
192 #define TYPES_QUADOP_LANE_PAIR (aarch64_types_quadop_lane_pair_qualifiers)
|
|
193 static enum aarch64_type_qualifiers
|
111
|
194 aarch64_types_quadop_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
195 = { qualifier_none, qualifier_none, qualifier_none,
|
|
196 qualifier_none, qualifier_lane_index };
|
|
197 #define TYPES_QUADOP_LANE (aarch64_types_quadop_lane_qualifiers)
|
|
198 static enum aarch64_type_qualifiers
|
|
199 aarch64_types_quadopu_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
200 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
|
|
201 qualifier_unsigned, qualifier_lane_index };
|
|
202 #define TYPES_QUADOPU_LANE (aarch64_types_quadopu_lane_qualifiers)
|
|
203
|
|
204 static enum aarch64_type_qualifiers
|
145
|
205 aarch64_types_quadopssus_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
206 = { qualifier_none, qualifier_none, qualifier_unsigned,
|
|
207 qualifier_none, qualifier_lane_quadtup_index };
|
|
208 #define TYPES_QUADOPSSUS_LANE_QUADTUP \
|
|
209 (aarch64_types_quadopssus_lane_quadtup_qualifiers)
|
|
210 static enum aarch64_type_qualifiers
|
|
211 aarch64_types_quadopsssu_lane_quadtup_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
212 = { qualifier_none, qualifier_none, qualifier_none,
|
|
213 qualifier_unsigned, qualifier_lane_quadtup_index };
|
|
214 #define TYPES_QUADOPSSSU_LANE_QUADTUP \
|
|
215 (aarch64_types_quadopsssu_lane_quadtup_qualifiers)
|
|
216
|
|
217 static enum aarch64_type_qualifiers
|
131
|
218 aarch64_types_quadopu_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
219 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
|
|
220 qualifier_unsigned, qualifier_immediate };
|
|
221 #define TYPES_QUADOPUI (aarch64_types_quadopu_imm_qualifiers)
|
|
222
|
|
223 static enum aarch64_type_qualifiers
|
111
|
224 aarch64_types_binop_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
225 = { qualifier_poly, qualifier_none, qualifier_immediate };
|
|
226 #define TYPES_GETREGP (aarch64_types_binop_imm_p_qualifiers)
|
|
227 static enum aarch64_type_qualifiers
|
|
228 aarch64_types_binop_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
229 = { qualifier_none, qualifier_none, qualifier_immediate };
|
|
230 #define TYPES_GETREG (aarch64_types_binop_imm_qualifiers)
|
|
231 #define TYPES_SHIFTIMM (aarch64_types_binop_imm_qualifiers)
|
|
232 static enum aarch64_type_qualifiers
|
|
233 aarch64_types_shift_to_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
234 = { qualifier_unsigned, qualifier_none, qualifier_immediate };
|
|
235 #define TYPES_SHIFTIMM_USS (aarch64_types_shift_to_unsigned_qualifiers)
|
|
236 static enum aarch64_type_qualifiers
|
|
237 aarch64_types_fcvt_from_unsigned_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
238 = { qualifier_none, qualifier_unsigned, qualifier_immediate };
|
|
239 #define TYPES_FCVTIMM_SUS (aarch64_types_fcvt_from_unsigned_qualifiers)
|
|
240 static enum aarch64_type_qualifiers
|
|
241 aarch64_types_unsigned_shift_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
242 = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate };
|
|
243 #define TYPES_USHIFTIMM (aarch64_types_unsigned_shift_qualifiers)
|
|
244
|
|
245 static enum aarch64_type_qualifiers
|
|
246 aarch64_types_ternop_s_imm_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
247 = { qualifier_none, qualifier_none, qualifier_poly, qualifier_immediate};
|
|
248 #define TYPES_SETREGP (aarch64_types_ternop_s_imm_p_qualifiers)
|
|
249 static enum aarch64_type_qualifiers
|
|
250 aarch64_types_ternop_s_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
251 = { qualifier_none, qualifier_none, qualifier_none, qualifier_immediate};
|
|
252 #define TYPES_SETREG (aarch64_types_ternop_s_imm_qualifiers)
|
|
253 #define TYPES_SHIFTINSERT (aarch64_types_ternop_s_imm_qualifiers)
|
|
254 #define TYPES_SHIFTACC (aarch64_types_ternop_s_imm_qualifiers)
|
|
255
|
|
256 static enum aarch64_type_qualifiers
|
|
257 aarch64_types_ternop_p_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
258 = { qualifier_poly, qualifier_poly, qualifier_poly, qualifier_immediate};
|
|
259 #define TYPES_SHIFTINSERTP (aarch64_types_ternop_p_imm_qualifiers)
|
|
260
|
|
261 static enum aarch64_type_qualifiers
|
|
262 aarch64_types_unsigned_shiftacc_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
263 = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
|
|
264 qualifier_immediate };
|
|
265 #define TYPES_USHIFTACC (aarch64_types_unsigned_shiftacc_qualifiers)
|
|
266
|
|
267
|
|
268 static enum aarch64_type_qualifiers
|
|
269 aarch64_types_combine_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
270 = { qualifier_none, qualifier_none, qualifier_none };
|
|
271 #define TYPES_COMBINE (aarch64_types_combine_qualifiers)
|
|
272
|
|
273 static enum aarch64_type_qualifiers
|
|
274 aarch64_types_combine_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
275 = { qualifier_poly, qualifier_poly, qualifier_poly };
|
|
276 #define TYPES_COMBINEP (aarch64_types_combine_p_qualifiers)
|
|
277
|
|
278 static enum aarch64_type_qualifiers
|
|
279 aarch64_types_load1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
280 = { qualifier_none, qualifier_const_pointer_map_mode };
|
|
281 #define TYPES_LOAD1 (aarch64_types_load1_qualifiers)
|
|
282 #define TYPES_LOADSTRUCT (aarch64_types_load1_qualifiers)
|
|
283 static enum aarch64_type_qualifiers
|
|
284 aarch64_types_loadstruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
285 = { qualifier_none, qualifier_const_pointer_map_mode,
|
|
286 qualifier_none, qualifier_struct_load_store_lane_index };
|
|
287 #define TYPES_LOADSTRUCT_LANE (aarch64_types_loadstruct_lane_qualifiers)
|
|
288
|
|
289 static enum aarch64_type_qualifiers
|
|
290 aarch64_types_bsl_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
291 = { qualifier_poly, qualifier_unsigned,
|
|
292 qualifier_poly, qualifier_poly };
|
|
293 #define TYPES_BSL_P (aarch64_types_bsl_p_qualifiers)
|
|
294 static enum aarch64_type_qualifiers
|
|
295 aarch64_types_bsl_s_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
296 = { qualifier_none, qualifier_unsigned,
|
|
297 qualifier_none, qualifier_none };
|
|
298 #define TYPES_BSL_S (aarch64_types_bsl_s_qualifiers)
|
|
299 static enum aarch64_type_qualifiers
|
|
300 aarch64_types_bsl_u_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
301 = { qualifier_unsigned, qualifier_unsigned,
|
|
302 qualifier_unsigned, qualifier_unsigned };
|
|
303 #define TYPES_BSL_U (aarch64_types_bsl_u_qualifiers)
|
|
304
|
|
305 /* The first argument (return type) of a store should be void type,
|
|
306 which we represent with qualifier_void. Their first operand will be
|
|
307 a DImode pointer to the location to store to, so we must use
|
|
308 qualifier_map_mode | qualifier_pointer to build a pointer to the
|
|
309 element type of the vector. */
|
|
310 static enum aarch64_type_qualifiers
|
|
311 aarch64_types_store1_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
312 = { qualifier_void, qualifier_pointer_map_mode, qualifier_poly };
|
|
313 #define TYPES_STORE1P (aarch64_types_store1_p_qualifiers)
|
|
314 static enum aarch64_type_qualifiers
|
|
315 aarch64_types_store1_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
316 = { qualifier_void, qualifier_pointer_map_mode, qualifier_none };
|
|
317 #define TYPES_STORE1 (aarch64_types_store1_qualifiers)
|
|
318 #define TYPES_STORESTRUCT (aarch64_types_store1_qualifiers)
|
|
319 static enum aarch64_type_qualifiers
|
|
320 aarch64_types_storestruct_lane_qualifiers[SIMD_MAX_BUILTIN_ARGS]
|
|
321 = { qualifier_void, qualifier_pointer_map_mode,
|
|
322 qualifier_none, qualifier_struct_load_store_lane_index };
|
|
323 #define TYPES_STORESTRUCT_LANE (aarch64_types_storestruct_lane_qualifiers)
|
|
324
|
|
325 #define CF0(N, X) CODE_FOR_aarch64_##N##X
|
|
326 #define CF1(N, X) CODE_FOR_##N##X##1
|
|
327 #define CF2(N, X) CODE_FOR_##N##X##2
|
|
328 #define CF3(N, X) CODE_FOR_##N##X##3
|
|
329 #define CF4(N, X) CODE_FOR_##N##X##4
|
|
330 #define CF10(N, X) CODE_FOR_##N##X
|
|
331
|
|
332 #define VAR1(T, N, MAP, A) \
|
|
333 {#N #A, UP (A), CF##MAP (N, A), 0, TYPES_##T},
|
|
334 #define VAR2(T, N, MAP, A, B) \
|
|
335 VAR1 (T, N, MAP, A) \
|
|
336 VAR1 (T, N, MAP, B)
|
|
337 #define VAR3(T, N, MAP, A, B, C) \
|
|
338 VAR2 (T, N, MAP, A, B) \
|
|
339 VAR1 (T, N, MAP, C)
|
|
340 #define VAR4(T, N, MAP, A, B, C, D) \
|
|
341 VAR3 (T, N, MAP, A, B, C) \
|
|
342 VAR1 (T, N, MAP, D)
|
|
343 #define VAR5(T, N, MAP, A, B, C, D, E) \
|
|
344 VAR4 (T, N, MAP, A, B, C, D) \
|
|
345 VAR1 (T, N, MAP, E)
|
|
346 #define VAR6(T, N, MAP, A, B, C, D, E, F) \
|
|
347 VAR5 (T, N, MAP, A, B, C, D, E) \
|
|
348 VAR1 (T, N, MAP, F)
|
|
349 #define VAR7(T, N, MAP, A, B, C, D, E, F, G) \
|
|
350 VAR6 (T, N, MAP, A, B, C, D, E, F) \
|
|
351 VAR1 (T, N, MAP, G)
|
|
352 #define VAR8(T, N, MAP, A, B, C, D, E, F, G, H) \
|
|
353 VAR7 (T, N, MAP, A, B, C, D, E, F, G) \
|
|
354 VAR1 (T, N, MAP, H)
|
|
355 #define VAR9(T, N, MAP, A, B, C, D, E, F, G, H, I) \
|
|
356 VAR8 (T, N, MAP, A, B, C, D, E, F, G, H) \
|
|
357 VAR1 (T, N, MAP, I)
|
|
358 #define VAR10(T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
|
|
359 VAR9 (T, N, MAP, A, B, C, D, E, F, G, H, I) \
|
|
360 VAR1 (T, N, MAP, J)
|
|
361 #define VAR11(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
|
|
362 VAR10 (T, N, MAP, A, B, C, D, E, F, G, H, I, J) \
|
|
363 VAR1 (T, N, MAP, K)
|
|
364 #define VAR12(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
|
|
365 VAR11 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K) \
|
|
366 VAR1 (T, N, MAP, L)
|
|
367 #define VAR13(T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
|
|
368 VAR12 (T, N, MAP, A, B, C, D, E, F, G, H, I, J, K, L) \
|
|
369 VAR1 (T, N, MAP, M)
|
|
370 #define VAR14(T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M, N) \
|
|
371 VAR13 (T, X, MAP, A, B, C, D, E, F, G, H, I, J, K, L, M) \
|
|
372 VAR1 (T, X, MAP, N)
|
|
373
|
|
374 #include "aarch64-builtin-iterators.h"
|
|
375
|
|
376 static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
|
|
377 #include "aarch64-simd-builtins.def"
|
|
378 };
|
|
379
|
|
380 /* There's only 8 CRC32 builtins. Probably not worth their own .def file. */
|
|
381 #define AARCH64_CRC32_BUILTINS \
|
|
382 CRC32_BUILTIN (crc32b, QI) \
|
|
383 CRC32_BUILTIN (crc32h, HI) \
|
|
384 CRC32_BUILTIN (crc32w, SI) \
|
|
385 CRC32_BUILTIN (crc32x, DI) \
|
|
386 CRC32_BUILTIN (crc32cb, QI) \
|
|
387 CRC32_BUILTIN (crc32ch, HI) \
|
|
388 CRC32_BUILTIN (crc32cw, SI) \
|
|
389 CRC32_BUILTIN (crc32cx, DI)
|
|
390
|
145
|
391 /* The next 8 FCMLA instrinsics require some special handling compared the
|
|
392 normal simd intrinsics. */
|
|
393 #define AARCH64_SIMD_FCMLA_LANEQ_BUILTINS \
|
|
394 FCMLA_LANEQ_BUILTIN (0, v2sf, fcmla, V2SF, false) \
|
|
395 FCMLA_LANEQ_BUILTIN (90, v2sf, fcmla, V2SF, false) \
|
|
396 FCMLA_LANEQ_BUILTIN (180, v2sf, fcmla, V2SF, false) \
|
|
397 FCMLA_LANEQ_BUILTIN (270, v2sf, fcmla, V2SF, false) \
|
|
398 FCMLA_LANEQ_BUILTIN (0, v4hf, fcmla_laneq, V4HF, true) \
|
|
399 FCMLA_LANEQ_BUILTIN (90, v4hf, fcmla_laneq, V4HF, true) \
|
|
400 FCMLA_LANEQ_BUILTIN (180, v4hf, fcmla_laneq, V4HF, true) \
|
|
401 FCMLA_LANEQ_BUILTIN (270, v4hf, fcmla_laneq, V4HF, true) \
|
|
402
|
111
|
403 typedef struct
|
|
404 {
|
|
405 const char *name;
|
|
406 machine_mode mode;
|
|
407 const enum insn_code icode;
|
|
408 unsigned int fcode;
|
|
409 } aarch64_crc_builtin_datum;
|
|
410
|
145
|
411 /* Hold information about how to expand the FCMLA_LANEQ builtins. */
|
|
412 typedef struct
|
|
413 {
|
|
414 const char *name;
|
|
415 machine_mode mode;
|
|
416 const enum insn_code icode;
|
|
417 unsigned int fcode;
|
|
418 bool lane;
|
|
419 } aarch64_fcmla_laneq_builtin_datum;
|
|
420
|
111
|
421 #define CRC32_BUILTIN(N, M) \
|
|
422 AARCH64_BUILTIN_##N,
|
|
423
|
145
|
424 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
|
|
425 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M,
|
|
426
|
111
|
427 #undef VAR1
|
|
428 #define VAR1(T, N, MAP, A) \
|
|
429 AARCH64_SIMD_BUILTIN_##T##_##N##A,
|
|
430
|
|
431 enum aarch64_builtins
|
|
432 {
|
|
433 AARCH64_BUILTIN_MIN,
|
|
434
|
|
435 AARCH64_BUILTIN_GET_FPCR,
|
|
436 AARCH64_BUILTIN_SET_FPCR,
|
|
437 AARCH64_BUILTIN_GET_FPSR,
|
|
438 AARCH64_BUILTIN_SET_FPSR,
|
|
439
|
|
440 AARCH64_BUILTIN_RSQRT_DF,
|
|
441 AARCH64_BUILTIN_RSQRT_SF,
|
|
442 AARCH64_BUILTIN_RSQRT_V2DF,
|
|
443 AARCH64_BUILTIN_RSQRT_V2SF,
|
|
444 AARCH64_BUILTIN_RSQRT_V4SF,
|
|
445 AARCH64_SIMD_BUILTIN_BASE,
|
|
446 AARCH64_SIMD_BUILTIN_LANE_CHECK,
|
|
447 #include "aarch64-simd-builtins.def"
|
|
448 /* The first enum element which is based on an insn_data pattern. */
|
|
449 AARCH64_SIMD_PATTERN_START = AARCH64_SIMD_BUILTIN_LANE_CHECK + 1,
|
|
450 AARCH64_SIMD_BUILTIN_MAX = AARCH64_SIMD_PATTERN_START
|
|
451 + ARRAY_SIZE (aarch64_simd_builtin_data) - 1,
|
|
452 AARCH64_CRC32_BUILTIN_BASE,
|
|
453 AARCH64_CRC32_BUILTINS
|
|
454 AARCH64_CRC32_BUILTIN_MAX,
|
|
455 /* ARMv8.3-A Pointer Authentication Builtins. */
|
|
456 AARCH64_PAUTH_BUILTIN_AUTIA1716,
|
|
457 AARCH64_PAUTH_BUILTIN_PACIA1716,
|
145
|
458 AARCH64_PAUTH_BUILTIN_AUTIB1716,
|
|
459 AARCH64_PAUTH_BUILTIN_PACIB1716,
|
111
|
460 AARCH64_PAUTH_BUILTIN_XPACLRI,
|
145
|
461 /* Special cased Armv8.3-A Complex FMA by Lane quad Builtins. */
|
|
462 AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE,
|
|
463 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
|
|
464 /* Builtin for Arm8.3-a Javascript conversion instruction. */
|
|
465 AARCH64_JSCVT,
|
|
466 /* TME builtins. */
|
|
467 AARCH64_TME_BUILTIN_TSTART,
|
|
468 AARCH64_TME_BUILTIN_TCOMMIT,
|
|
469 AARCH64_TME_BUILTIN_TTEST,
|
|
470 AARCH64_TME_BUILTIN_TCANCEL,
|
|
471 /* Armv8.5-a RNG instruction builtins. */
|
|
472 AARCH64_BUILTIN_RNG_RNDR,
|
|
473 AARCH64_BUILTIN_RNG_RNDRRS,
|
|
474 /* MEMTAG builtins. */
|
|
475 AARCH64_MEMTAG_BUILTIN_START,
|
|
476 AARCH64_MEMTAG_BUILTIN_IRG,
|
|
477 AARCH64_MEMTAG_BUILTIN_GMI,
|
|
478 AARCH64_MEMTAG_BUILTIN_SUBP,
|
|
479 AARCH64_MEMTAG_BUILTIN_INC_TAG,
|
|
480 AARCH64_MEMTAG_BUILTIN_SET_TAG,
|
|
481 AARCH64_MEMTAG_BUILTIN_GET_TAG,
|
|
482 AARCH64_MEMTAG_BUILTIN_END,
|
111
|
483 AARCH64_BUILTIN_MAX
|
|
484 };
|
|
485
|
|
486 #undef CRC32_BUILTIN
|
|
487 #define CRC32_BUILTIN(N, M) \
|
|
488 {"__builtin_aarch64_"#N, E_##M##mode, CODE_FOR_aarch64_##N, AARCH64_BUILTIN_##N},
|
|
489
|
|
490 static aarch64_crc_builtin_datum aarch64_crc_builtin_data[] = {
|
|
491 AARCH64_CRC32_BUILTINS
|
|
492 };
|
|
493
|
145
|
494
|
|
495 #undef FCMLA_LANEQ_BUILTIN
|
|
496 #define FCMLA_LANEQ_BUILTIN(I, N, X, M, T) \
|
|
497 {"__builtin_aarch64_fcmla_laneq"#I#N, E_##M##mode, CODE_FOR_aarch64_##X##I##N, \
|
|
498 AARCH64_SIMD_BUILTIN_FCMLA_LANEQ##I##_##M, T},
|
|
499
|
|
500 /* This structure contains how to manage the mapping form the builtin to the
|
|
501 instruction to generate in the backend and how to invoke the instruction. */
|
|
502 static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
|
|
503 AARCH64_SIMD_FCMLA_LANEQ_BUILTINS
|
|
504 };
|
|
505
|
111
|
506 #undef CRC32_BUILTIN
|
|
507
|
|
508 static GTY(()) tree aarch64_builtin_decls[AARCH64_BUILTIN_MAX];
|
|
509
|
|
510 #define NUM_DREG_TYPES 6
|
|
511 #define NUM_QREG_TYPES 6
|
|
512
|
|
513 /* Internal scalar builtin types. These types are used to support
|
|
514 neon intrinsic builtins. They are _not_ user-visible types. Therefore
|
|
515 the mangling for these types are implementation defined. */
|
|
516 const char *aarch64_scalar_builtin_types[] = {
|
|
517 "__builtin_aarch64_simd_qi",
|
|
518 "__builtin_aarch64_simd_hi",
|
|
519 "__builtin_aarch64_simd_si",
|
|
520 "__builtin_aarch64_simd_hf",
|
|
521 "__builtin_aarch64_simd_sf",
|
|
522 "__builtin_aarch64_simd_di",
|
|
523 "__builtin_aarch64_simd_df",
|
|
524 "__builtin_aarch64_simd_poly8",
|
|
525 "__builtin_aarch64_simd_poly16",
|
|
526 "__builtin_aarch64_simd_poly64",
|
|
527 "__builtin_aarch64_simd_poly128",
|
|
528 "__builtin_aarch64_simd_ti",
|
|
529 "__builtin_aarch64_simd_uqi",
|
|
530 "__builtin_aarch64_simd_uhi",
|
|
531 "__builtin_aarch64_simd_usi",
|
|
532 "__builtin_aarch64_simd_udi",
|
|
533 "__builtin_aarch64_simd_ei",
|
|
534 "__builtin_aarch64_simd_oi",
|
|
535 "__builtin_aarch64_simd_ci",
|
|
536 "__builtin_aarch64_simd_xi",
|
|
537 NULL
|
|
538 };
|
|
539
|
|
540 #define ENTRY(E, M, Q, G) E,
|
|
541 enum aarch64_simd_type
|
|
542 {
|
|
543 #include "aarch64-simd-builtin-types.def"
|
|
544 ARM_NEON_H_TYPES_LAST
|
|
545 };
|
|
546 #undef ENTRY
|
|
547
|
|
548 struct aarch64_simd_type_info
|
|
549 {
|
|
550 enum aarch64_simd_type type;
|
|
551
|
|
552 /* Internal type name. */
|
|
553 const char *name;
|
|
554
|
|
555 /* Internal type name(mangled). The mangled names conform to the
|
|
556 AAPCS64 (see "Procedure Call Standard for the ARM 64-bit Architecture",
|
|
557 Appendix A). To qualify for emission with the mangled names defined in
|
|
558 that document, a vector type must not only be of the correct mode but also
|
|
559 be of the correct internal AdvSIMD vector type (e.g. __Int8x8_t); these
|
|
560 types are registered by aarch64_init_simd_builtin_types (). In other
|
|
561 words, vector types defined in other ways e.g. via vector_size attribute
|
|
562 will get default mangled names. */
|
|
563 const char *mangle;
|
|
564
|
|
565 /* Internal type. */
|
|
566 tree itype;
|
|
567
|
|
568 /* Element type. */
|
|
569 tree eltype;
|
|
570
|
|
571 /* Machine mode the internal type maps to. */
|
|
572 enum machine_mode mode;
|
|
573
|
|
574 /* Qualifiers. */
|
|
575 enum aarch64_type_qualifiers q;
|
|
576 };
|
|
577
|
|
578 #define ENTRY(E, M, Q, G) \
|
|
579 {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
|
|
580 static struct aarch64_simd_type_info aarch64_simd_types [] = {
|
|
581 #include "aarch64-simd-builtin-types.def"
|
|
582 };
|
|
583 #undef ENTRY
|
|
584
|
|
585 static tree aarch64_simd_intOI_type_node = NULL_TREE;
|
|
586 static tree aarch64_simd_intCI_type_node = NULL_TREE;
|
|
587 static tree aarch64_simd_intXI_type_node = NULL_TREE;
|
|
588
|
|
589 /* The user-visible __fp16 type, and a pointer to that type. Used
|
|
590 across the back-end. */
|
|
591 tree aarch64_fp16_type_node = NULL_TREE;
|
|
592 tree aarch64_fp16_ptr_type_node = NULL_TREE;
|
|
593
|
145
|
594 /* Back-end node type for brain float (bfloat) types. */
|
|
595 tree aarch64_bf16_type_node = NULL_TREE;
|
|
596 tree aarch64_bf16_ptr_type_node = NULL_TREE;
|
|
597
|
|
598 /* Wrapper around add_builtin_function. NAME is the name of the built-in
|
|
599 function, TYPE is the function type, and CODE is the function subcode
|
|
600 (relative to AARCH64_BUILTIN_GENERAL). */
|
|
601 static tree
|
|
602 aarch64_general_add_builtin (const char *name, tree type, unsigned int code)
|
|
603 {
|
|
604 code = (code << AARCH64_BUILTIN_SHIFT) | AARCH64_BUILTIN_GENERAL;
|
|
605 return add_builtin_function (name, type, code, BUILT_IN_MD,
|
|
606 NULL, NULL_TREE);
|
|
607 }
|
|
608
|
111
|
609 static const char *
|
|
610 aarch64_mangle_builtin_scalar_type (const_tree type)
|
|
611 {
|
|
612 int i = 0;
|
|
613
|
|
614 while (aarch64_scalar_builtin_types[i] != NULL)
|
|
615 {
|
|
616 const char *name = aarch64_scalar_builtin_types[i];
|
|
617
|
|
618 if (TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
|
|
619 && DECL_NAME (TYPE_NAME (type))
|
|
620 && !strcmp (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))), name))
|
|
621 return aarch64_scalar_builtin_types[i];
|
|
622 i++;
|
|
623 }
|
|
624 return NULL;
|
|
625 }
|
|
626
|
|
627 static const char *
|
|
628 aarch64_mangle_builtin_vector_type (const_tree type)
|
|
629 {
|
|
630 int i;
|
|
631 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
|
|
632
|
|
633 for (i = 0; i < nelts; i++)
|
|
634 if (aarch64_simd_types[i].mode == TYPE_MODE (type)
|
|
635 && TYPE_NAME (type)
|
|
636 && TREE_CODE (TYPE_NAME (type)) == TYPE_DECL
|
|
637 && DECL_NAME (TYPE_NAME (type))
|
|
638 && !strcmp
|
|
639 (IDENTIFIER_POINTER (DECL_NAME (TYPE_NAME (type))),
|
|
640 aarch64_simd_types[i].name))
|
|
641 return aarch64_simd_types[i].mangle;
|
|
642
|
|
643 return NULL;
|
|
644 }
|
|
645
|
|
646 const char *
|
145
|
647 aarch64_general_mangle_builtin_type (const_tree type)
|
111
|
648 {
|
|
649 const char *mangle;
|
|
650 /* Walk through all the AArch64 builtins types tables to filter out the
|
|
651 incoming type. */
|
|
652 if ((mangle = aarch64_mangle_builtin_vector_type (type))
|
|
653 || (mangle = aarch64_mangle_builtin_scalar_type (type)))
|
|
654 return mangle;
|
|
655
|
|
656 return NULL;
|
|
657 }
|
|
658
|
|
659 static tree
|
|
660 aarch64_simd_builtin_std_type (machine_mode mode,
|
|
661 enum aarch64_type_qualifiers q)
|
|
662 {
|
|
663 #define QUAL_TYPE(M) \
|
|
664 ((q == qualifier_none) ? int##M##_type_node : unsigned_int##M##_type_node);
|
|
665 switch (mode)
|
|
666 {
|
|
667 case E_QImode:
|
|
668 return QUAL_TYPE (QI);
|
|
669 case E_HImode:
|
|
670 return QUAL_TYPE (HI);
|
|
671 case E_SImode:
|
|
672 return QUAL_TYPE (SI);
|
|
673 case E_DImode:
|
|
674 return QUAL_TYPE (DI);
|
|
675 case E_TImode:
|
|
676 return QUAL_TYPE (TI);
|
|
677 case E_OImode:
|
|
678 return aarch64_simd_intOI_type_node;
|
|
679 case E_CImode:
|
|
680 return aarch64_simd_intCI_type_node;
|
|
681 case E_XImode:
|
|
682 return aarch64_simd_intXI_type_node;
|
|
683 case E_HFmode:
|
|
684 return aarch64_fp16_type_node;
|
|
685 case E_SFmode:
|
|
686 return float_type_node;
|
|
687 case E_DFmode:
|
|
688 return double_type_node;
|
145
|
689 case E_BFmode:
|
|
690 return aarch64_bf16_type_node;
|
111
|
691 default:
|
|
692 gcc_unreachable ();
|
|
693 }
|
|
694 #undef QUAL_TYPE
|
|
695 }
|
|
696
|
|
697 static tree
|
|
698 aarch64_lookup_simd_builtin_type (machine_mode mode,
|
|
699 enum aarch64_type_qualifiers q)
|
|
700 {
|
|
701 int i;
|
|
702 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
|
|
703
|
|
704 /* Non-poly scalar modes map to standard types not in the table. */
|
|
705 if (q != qualifier_poly && !VECTOR_MODE_P (mode))
|
|
706 return aarch64_simd_builtin_std_type (mode, q);
|
|
707
|
|
708 for (i = 0; i < nelts; i++)
|
|
709 if (aarch64_simd_types[i].mode == mode
|
|
710 && aarch64_simd_types[i].q == q)
|
|
711 return aarch64_simd_types[i].itype;
|
|
712
|
|
713 return NULL_TREE;
|
|
714 }
|
|
715
|
|
716 static tree
|
|
717 aarch64_simd_builtin_type (machine_mode mode,
|
|
718 bool unsigned_p, bool poly_p)
|
|
719 {
|
|
720 if (poly_p)
|
|
721 return aarch64_lookup_simd_builtin_type (mode, qualifier_poly);
|
|
722 else if (unsigned_p)
|
|
723 return aarch64_lookup_simd_builtin_type (mode, qualifier_unsigned);
|
|
724 else
|
|
725 return aarch64_lookup_simd_builtin_type (mode, qualifier_none);
|
|
726 }
|
|
727
|
|
728 static void
|
|
729 aarch64_init_simd_builtin_types (void)
|
|
730 {
|
|
731 int i;
|
|
732 int nelts = sizeof (aarch64_simd_types) / sizeof (aarch64_simd_types[0]);
|
|
733 tree tdecl;
|
|
734
|
|
735 /* Init all the element types built by the front-end. */
|
|
736 aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
|
|
737 aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
|
|
738 aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
|
|
739 aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
|
|
740 aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
|
|
741 aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
|
|
742 aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
|
|
743 aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
|
|
744 aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
|
|
745 aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
|
|
746 aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
|
|
747 aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
|
|
748 aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
|
|
749 aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
|
|
750 aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
|
|
751 aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
|
|
752
|
|
753 /* Poly types are a world of their own. */
|
|
754 aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
|
|
755 build_distinct_type_copy (unsigned_intQI_type_node);
|
131
|
756 /* Prevent front-ends from transforming Poly8_t arrays into string
|
|
757 literals. */
|
|
758 TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
|
|
759
|
111
|
760 aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
|
|
761 build_distinct_type_copy (unsigned_intHI_type_node);
|
|
762 aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
|
|
763 build_distinct_type_copy (unsigned_intDI_type_node);
|
|
764 aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
|
|
765 build_distinct_type_copy (unsigned_intTI_type_node);
|
|
766 /* Init poly vector element types with scalar poly types. */
|
|
767 aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
|
|
768 aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
|
|
769 aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
|
|
770 aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
|
|
771 aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
|
|
772 aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
|
|
773
|
|
774 /* Continue with standard types. */
|
|
775 aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
|
|
776 aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
|
|
777 aarch64_simd_types[Float32x2_t].eltype = float_type_node;
|
|
778 aarch64_simd_types[Float32x4_t].eltype = float_type_node;
|
|
779 aarch64_simd_types[Float64x1_t].eltype = double_type_node;
|
|
780 aarch64_simd_types[Float64x2_t].eltype = double_type_node;
|
|
781
|
145
|
782 /* Init Bfloat vector types with underlying __bf16 type. */
|
|
783 aarch64_simd_types[Bfloat16x4_t].eltype = aarch64_bf16_type_node;
|
|
784 aarch64_simd_types[Bfloat16x8_t].eltype = aarch64_bf16_type_node;
|
|
785
|
111
|
786 for (i = 0; i < nelts; i++)
|
|
787 {
|
|
788 tree eltype = aarch64_simd_types[i].eltype;
|
|
789 machine_mode mode = aarch64_simd_types[i].mode;
|
|
790
|
|
791 if (aarch64_simd_types[i].itype == NULL)
|
|
792 {
|
|
793 aarch64_simd_types[i].itype
|
|
794 = build_distinct_type_copy
|
|
795 (build_vector_type (eltype, GET_MODE_NUNITS (mode)));
|
|
796 SET_TYPE_STRUCTURAL_EQUALITY (aarch64_simd_types[i].itype);
|
|
797 }
|
|
798
|
|
799 tdecl = add_builtin_type (aarch64_simd_types[i].name,
|
|
800 aarch64_simd_types[i].itype);
|
|
801 TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
|
|
802 }
|
|
803
|
|
804 #define AARCH64_BUILD_SIGNED_TYPE(mode) \
|
|
805 make_signed_type (GET_MODE_PRECISION (mode));
|
|
806 aarch64_simd_intOI_type_node = AARCH64_BUILD_SIGNED_TYPE (OImode);
|
|
807 aarch64_simd_intCI_type_node = AARCH64_BUILD_SIGNED_TYPE (CImode);
|
|
808 aarch64_simd_intXI_type_node = AARCH64_BUILD_SIGNED_TYPE (XImode);
|
|
809 #undef AARCH64_BUILD_SIGNED_TYPE
|
|
810
|
|
811 tdecl = add_builtin_type
|
|
812 ("__builtin_aarch64_simd_oi" , aarch64_simd_intOI_type_node);
|
|
813 TYPE_NAME (aarch64_simd_intOI_type_node) = tdecl;
|
|
814 tdecl = add_builtin_type
|
|
815 ("__builtin_aarch64_simd_ci" , aarch64_simd_intCI_type_node);
|
|
816 TYPE_NAME (aarch64_simd_intCI_type_node) = tdecl;
|
|
817 tdecl = add_builtin_type
|
|
818 ("__builtin_aarch64_simd_xi" , aarch64_simd_intXI_type_node);
|
|
819 TYPE_NAME (aarch64_simd_intXI_type_node) = tdecl;
|
|
820 }
|
|
821
|
|
822 static void
|
|
823 aarch64_init_simd_builtin_scalar_types (void)
|
|
824 {
|
|
825 /* Define typedefs for all the standard scalar types. */
|
|
826 (*lang_hooks.types.register_builtin_type) (intQI_type_node,
|
|
827 "__builtin_aarch64_simd_qi");
|
|
828 (*lang_hooks.types.register_builtin_type) (intHI_type_node,
|
|
829 "__builtin_aarch64_simd_hi");
|
|
830 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node,
|
|
831 "__builtin_aarch64_simd_hf");
|
|
832 (*lang_hooks.types.register_builtin_type) (intSI_type_node,
|
|
833 "__builtin_aarch64_simd_si");
|
|
834 (*lang_hooks.types.register_builtin_type) (float_type_node,
|
|
835 "__builtin_aarch64_simd_sf");
|
|
836 (*lang_hooks.types.register_builtin_type) (intDI_type_node,
|
|
837 "__builtin_aarch64_simd_di");
|
|
838 (*lang_hooks.types.register_builtin_type) (double_type_node,
|
|
839 "__builtin_aarch64_simd_df");
|
|
840 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
|
|
841 "__builtin_aarch64_simd_poly8");
|
|
842 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
|
|
843 "__builtin_aarch64_simd_poly16");
|
|
844 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
|
|
845 "__builtin_aarch64_simd_poly64");
|
|
846 (*lang_hooks.types.register_builtin_type) (unsigned_intTI_type_node,
|
|
847 "__builtin_aarch64_simd_poly128");
|
|
848 (*lang_hooks.types.register_builtin_type) (intTI_type_node,
|
|
849 "__builtin_aarch64_simd_ti");
|
|
850 /* Unsigned integer types for various mode sizes. */
|
|
851 (*lang_hooks.types.register_builtin_type) (unsigned_intQI_type_node,
|
|
852 "__builtin_aarch64_simd_uqi");
|
|
853 (*lang_hooks.types.register_builtin_type) (unsigned_intHI_type_node,
|
|
854 "__builtin_aarch64_simd_uhi");
|
|
855 (*lang_hooks.types.register_builtin_type) (unsigned_intSI_type_node,
|
|
856 "__builtin_aarch64_simd_usi");
|
|
857 (*lang_hooks.types.register_builtin_type) (unsigned_intDI_type_node,
|
|
858 "__builtin_aarch64_simd_udi");
|
|
859 }
|
|
860
|
|
861 static bool aarch64_simd_builtins_initialized_p = false;
|
|
862
|
145
|
863 /* Due to the architecture not providing lane variant of the lane instructions
|
|
864 for fcmla we can't use the standard simd builtin expansion code, but we
|
|
865 still want the majority of the validation that would normally be done. */
|
|
866
|
|
867 void
|
|
868 aarch64_init_fcmla_laneq_builtins (void)
|
|
869 {
|
|
870 unsigned int i = 0;
|
|
871
|
|
872 for (i = 0; i < ARRAY_SIZE (aarch64_fcmla_lane_builtin_data); ++i)
|
|
873 {
|
|
874 aarch64_fcmla_laneq_builtin_datum* d
|
|
875 = &aarch64_fcmla_lane_builtin_data[i];
|
|
876 tree argtype = aarch64_lookup_simd_builtin_type (d->mode, qualifier_none);
|
|
877 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
|
|
878 tree quadtype
|
|
879 = aarch64_lookup_simd_builtin_type (quadmode, qualifier_none);
|
|
880 tree lanetype
|
|
881 = aarch64_simd_builtin_std_type (SImode, qualifier_lane_pair_index);
|
|
882 tree ftype = build_function_type_list (argtype, argtype, argtype,
|
|
883 quadtype, lanetype, NULL_TREE);
|
|
884 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode);
|
|
885
|
|
886 aarch64_builtin_decls[d->fcode] = fndecl;
|
|
887 }
|
|
888 }
|
|
889
|
111
|
890 void
|
|
891 aarch64_init_simd_builtins (void)
|
|
892 {
|
|
893 unsigned int i, fcode = AARCH64_SIMD_PATTERN_START;
|
|
894
|
|
895 if (aarch64_simd_builtins_initialized_p)
|
|
896 return;
|
|
897
|
|
898 aarch64_simd_builtins_initialized_p = true;
|
|
899
|
|
900 aarch64_init_simd_builtin_types ();
|
|
901
|
|
902 /* Strong-typing hasn't been implemented for all AdvSIMD builtin intrinsics.
|
|
903 Therefore we need to preserve the old __builtin scalar types. It can be
|
|
904 removed once all the intrinsics become strongly typed using the qualifier
|
|
905 system. */
|
|
906 aarch64_init_simd_builtin_scalar_types ();
|
|
907
|
|
908 tree lane_check_fpr = build_function_type_list (void_type_node,
|
|
909 size_type_node,
|
|
910 size_type_node,
|
|
911 intSI_type_node,
|
|
912 NULL);
|
145
|
913 aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
|
|
914 = aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
|
|
915 lane_check_fpr,
|
|
916 AARCH64_SIMD_BUILTIN_LANE_CHECK);
|
111
|
917
|
|
918 for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
|
|
919 {
|
|
920 bool print_type_signature_p = false;
|
|
921 char type_signature[SIMD_MAX_BUILTIN_ARGS + 1] = { 0 };
|
|
922 aarch64_simd_builtin_datum *d = &aarch64_simd_builtin_data[i];
|
|
923 char namebuf[60];
|
|
924 tree ftype = NULL;
|
|
925 tree fndecl = NULL;
|
|
926
|
|
927 d->fcode = fcode;
|
|
928
|
|
929 /* We must track two variables here. op_num is
|
|
930 the operand number as in the RTL pattern. This is
|
|
931 required to access the mode (e.g. V4SF mode) of the
|
|
932 argument, from which the base type can be derived.
|
|
933 arg_num is an index in to the qualifiers data, which
|
|
934 gives qualifiers to the type (e.g. const unsigned).
|
|
935 The reason these two variables may differ by one is the
|
|
936 void return type. While all return types take the 0th entry
|
|
937 in the qualifiers array, there is no operand for them in the
|
|
938 RTL pattern. */
|
|
939 int op_num = insn_data[d->code].n_operands - 1;
|
|
940 int arg_num = d->qualifiers[0] & qualifier_void
|
|
941 ? op_num + 1
|
|
942 : op_num;
|
|
943 tree return_type = void_type_node, args = void_list_node;
|
|
944 tree eltype;
|
|
945
|
|
946 /* Build a function type directly from the insn_data for this
|
|
947 builtin. The build_function_type () function takes care of
|
|
948 removing duplicates for us. */
|
|
949 for (; op_num >= 0; arg_num--, op_num--)
|
|
950 {
|
|
951 machine_mode op_mode = insn_data[d->code].operand[op_num].mode;
|
|
952 enum aarch64_type_qualifiers qualifiers = d->qualifiers[arg_num];
|
|
953
|
|
954 if (qualifiers & qualifier_unsigned)
|
|
955 {
|
|
956 type_signature[op_num] = 'u';
|
|
957 print_type_signature_p = true;
|
|
958 }
|
|
959 else if (qualifiers & qualifier_poly)
|
|
960 {
|
|
961 type_signature[op_num] = 'p';
|
|
962 print_type_signature_p = true;
|
|
963 }
|
|
964 else
|
|
965 type_signature[op_num] = 's';
|
|
966
|
|
967 /* Skip an internal operand for vget_{low, high}. */
|
|
968 if (qualifiers & qualifier_internal)
|
|
969 continue;
|
|
970
|
|
971 /* Some builtins have different user-facing types
|
|
972 for certain arguments, encoded in d->mode. */
|
|
973 if (qualifiers & qualifier_map_mode)
|
|
974 op_mode = d->mode;
|
|
975
|
|
976 /* For pointers, we want a pointer to the basic type
|
|
977 of the vector. */
|
|
978 if (qualifiers & qualifier_pointer && VECTOR_MODE_P (op_mode))
|
|
979 op_mode = GET_MODE_INNER (op_mode);
|
|
980
|
|
981 eltype = aarch64_simd_builtin_type
|
|
982 (op_mode,
|
|
983 (qualifiers & qualifier_unsigned) != 0,
|
|
984 (qualifiers & qualifier_poly) != 0);
|
|
985 gcc_assert (eltype != NULL);
|
|
986
|
|
987 /* Add qualifiers. */
|
|
988 if (qualifiers & qualifier_const)
|
|
989 eltype = build_qualified_type (eltype, TYPE_QUAL_CONST);
|
|
990
|
|
991 if (qualifiers & qualifier_pointer)
|
|
992 eltype = build_pointer_type (eltype);
|
|
993
|
|
994 /* If we have reached arg_num == 0, we are at a non-void
|
|
995 return type. Otherwise, we are still processing
|
|
996 arguments. */
|
|
997 if (arg_num == 0)
|
|
998 return_type = eltype;
|
|
999 else
|
|
1000 args = tree_cons (NULL_TREE, eltype, args);
|
|
1001 }
|
|
1002
|
|
1003 ftype = build_function_type (return_type, args);
|
|
1004
|
|
1005 gcc_assert (ftype != NULL);
|
|
1006
|
|
1007 if (print_type_signature_p)
|
|
1008 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s_%s",
|
|
1009 d->name, type_signature);
|
|
1010 else
|
|
1011 snprintf (namebuf, sizeof (namebuf), "__builtin_aarch64_%s",
|
|
1012 d->name);
|
|
1013
|
145
|
1014 fndecl = aarch64_general_add_builtin (namebuf, ftype, fcode);
|
111
|
1015 aarch64_builtin_decls[fcode] = fndecl;
|
|
1016 }
|
145
|
1017
|
|
1018 /* Initialize the remaining fcmla_laneq intrinsics. */
|
|
1019 aarch64_init_fcmla_laneq_builtins ();
|
111
|
1020 }
|
|
1021
|
|
1022 static void
|
|
1023 aarch64_init_crc32_builtins ()
|
|
1024 {
|
|
1025 tree usi_type = aarch64_simd_builtin_std_type (SImode, qualifier_unsigned);
|
|
1026 unsigned int i = 0;
|
|
1027
|
|
1028 for (i = 0; i < ARRAY_SIZE (aarch64_crc_builtin_data); ++i)
|
|
1029 {
|
|
1030 aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
|
|
1031 tree argtype = aarch64_simd_builtin_std_type (d->mode,
|
|
1032 qualifier_unsigned);
|
|
1033 tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
|
145
|
1034 tree fndecl = aarch64_general_add_builtin (d->name, ftype, d->fcode);
|
111
|
1035
|
|
1036 aarch64_builtin_decls[d->fcode] = fndecl;
|
|
1037 }
|
|
1038 }
|
|
1039
|
|
1040 /* Add builtins for reciprocal square root. */
|
|
1041
|
|
1042 void
|
|
1043 aarch64_init_builtin_rsqrt (void)
|
|
1044 {
|
|
1045 tree fndecl = NULL;
|
|
1046 tree ftype = NULL;
|
|
1047
|
|
1048 tree V2SF_type_node = build_vector_type (float_type_node, 2);
|
|
1049 tree V2DF_type_node = build_vector_type (double_type_node, 2);
|
|
1050 tree V4SF_type_node = build_vector_type (float_type_node, 4);
|
|
1051
|
|
1052 struct builtin_decls_data
|
|
1053 {
|
|
1054 tree type_node;
|
|
1055 const char *builtin_name;
|
|
1056 int function_code;
|
|
1057 };
|
|
1058
|
|
1059 builtin_decls_data bdda[] =
|
|
1060 {
|
|
1061 { double_type_node, "__builtin_aarch64_rsqrt_df", AARCH64_BUILTIN_RSQRT_DF },
|
|
1062 { float_type_node, "__builtin_aarch64_rsqrt_sf", AARCH64_BUILTIN_RSQRT_SF },
|
|
1063 { V2DF_type_node, "__builtin_aarch64_rsqrt_v2df", AARCH64_BUILTIN_RSQRT_V2DF },
|
|
1064 { V2SF_type_node, "__builtin_aarch64_rsqrt_v2sf", AARCH64_BUILTIN_RSQRT_V2SF },
|
|
1065 { V4SF_type_node, "__builtin_aarch64_rsqrt_v4sf", AARCH64_BUILTIN_RSQRT_V4SF }
|
|
1066 };
|
|
1067
|
|
1068 builtin_decls_data *bdd = bdda;
|
|
1069 builtin_decls_data *bdd_end = bdd + (sizeof (bdda) / sizeof (builtin_decls_data));
|
|
1070
|
|
1071 for (; bdd < bdd_end; bdd++)
|
|
1072 {
|
|
1073 ftype = build_function_type_list (bdd->type_node, bdd->type_node, NULL_TREE);
|
145
|
1074 fndecl = aarch64_general_add_builtin (bdd->builtin_name,
|
|
1075 ftype, bdd->function_code);
|
111
|
1076 aarch64_builtin_decls[bdd->function_code] = fndecl;
|
|
1077 }
|
|
1078 }
|
|
1079
|
|
1080 /* Initialize the backend types that support the user-visible __fp16
|
|
1081 type, also initialize a pointer to that type, to be used when
|
|
1082 forming HFAs. */
|
|
1083
|
|
1084 static void
|
|
1085 aarch64_init_fp16_types (void)
|
|
1086 {
|
|
1087 aarch64_fp16_type_node = make_node (REAL_TYPE);
|
|
1088 TYPE_PRECISION (aarch64_fp16_type_node) = 16;
|
|
1089 layout_type (aarch64_fp16_type_node);
|
|
1090
|
|
1091 (*lang_hooks.types.register_builtin_type) (aarch64_fp16_type_node, "__fp16");
|
|
1092 aarch64_fp16_ptr_type_node = build_pointer_type (aarch64_fp16_type_node);
|
|
1093 }
|
|
1094
|
145
|
1095 /* Initialize the backend REAL_TYPE type supporting bfloat types. */
|
|
1096 static void
|
|
1097 aarch64_init_bf16_types (void)
|
|
1098 {
|
|
1099 aarch64_bf16_type_node = make_node (REAL_TYPE);
|
|
1100 TYPE_PRECISION (aarch64_bf16_type_node) = 16;
|
|
1101 SET_TYPE_MODE (aarch64_bf16_type_node, BFmode);
|
|
1102 layout_type (aarch64_bf16_type_node);
|
|
1103
|
|
1104 lang_hooks.types.register_builtin_type (aarch64_bf16_type_node, "__bf16");
|
|
1105 aarch64_bf16_ptr_type_node = build_pointer_type (aarch64_bf16_type_node);
|
|
1106 }
|
|
1107
|
111
|
1108 /* Pointer authentication builtins that will become NOP on legacy platform.
|
|
1109 Currently, these builtins are for internal use only (libgcc EH unwinder). */
|
|
1110
|
|
1111 void
|
|
1112 aarch64_init_pauth_hint_builtins (void)
|
|
1113 {
|
|
1114 /* Pointer Authentication builtins. */
|
|
1115 tree ftype_pointer_auth
|
|
1116 = build_function_type_list (ptr_type_node, ptr_type_node,
|
|
1117 unsigned_intDI_type_node, NULL_TREE);
|
|
1118 tree ftype_pointer_strip
|
|
1119 = build_function_type_list (ptr_type_node, ptr_type_node, NULL_TREE);
|
|
1120
|
|
1121 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIA1716]
|
145
|
1122 = aarch64_general_add_builtin ("__builtin_aarch64_autia1716",
|
|
1123 ftype_pointer_auth,
|
|
1124 AARCH64_PAUTH_BUILTIN_AUTIA1716);
|
111
|
1125 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIA1716]
|
145
|
1126 = aarch64_general_add_builtin ("__builtin_aarch64_pacia1716",
|
|
1127 ftype_pointer_auth,
|
|
1128 AARCH64_PAUTH_BUILTIN_PACIA1716);
|
|
1129 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_AUTIB1716]
|
|
1130 = aarch64_general_add_builtin ("__builtin_aarch64_autib1716",
|
|
1131 ftype_pointer_auth,
|
|
1132 AARCH64_PAUTH_BUILTIN_AUTIB1716);
|
|
1133 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_PACIB1716]
|
|
1134 = aarch64_general_add_builtin ("__builtin_aarch64_pacib1716",
|
|
1135 ftype_pointer_auth,
|
|
1136 AARCH64_PAUTH_BUILTIN_PACIB1716);
|
111
|
1137 aarch64_builtin_decls[AARCH64_PAUTH_BUILTIN_XPACLRI]
|
145
|
1138 = aarch64_general_add_builtin ("__builtin_aarch64_xpaclri",
|
|
1139 ftype_pointer_strip,
|
|
1140 AARCH64_PAUTH_BUILTIN_XPACLRI);
|
|
1141 }
|
|
1142
|
|
1143 /* Initialize the transactional memory extension (TME) builtins. */
|
|
1144 static void
|
|
1145 aarch64_init_tme_builtins (void)
|
|
1146 {
|
|
1147 tree ftype_uint64_void
|
|
1148 = build_function_type_list (uint64_type_node, NULL);
|
|
1149 tree ftype_void_void
|
|
1150 = build_function_type_list (void_type_node, NULL);
|
|
1151 tree ftype_void_uint64
|
|
1152 = build_function_type_list (void_type_node, uint64_type_node, NULL);
|
|
1153
|
|
1154 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TSTART]
|
|
1155 = aarch64_general_add_builtin ("__builtin_aarch64_tstart",
|
|
1156 ftype_uint64_void,
|
|
1157 AARCH64_TME_BUILTIN_TSTART);
|
|
1158 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TTEST]
|
|
1159 = aarch64_general_add_builtin ("__builtin_aarch64_ttest",
|
|
1160 ftype_uint64_void,
|
|
1161 AARCH64_TME_BUILTIN_TTEST);
|
|
1162 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCOMMIT]
|
|
1163 = aarch64_general_add_builtin ("__builtin_aarch64_tcommit",
|
|
1164 ftype_void_void,
|
|
1165 AARCH64_TME_BUILTIN_TCOMMIT);
|
|
1166 aarch64_builtin_decls[AARCH64_TME_BUILTIN_TCANCEL]
|
|
1167 = aarch64_general_add_builtin ("__builtin_aarch64_tcancel",
|
|
1168 ftype_void_uint64,
|
|
1169 AARCH64_TME_BUILTIN_TCANCEL);
|
111
|
1170 }
|
|
1171
|
145
|
1172 /* Add builtins for Random Number instructions. */
|
|
1173
|
|
1174 static void
|
|
1175 aarch64_init_rng_builtins (void)
|
|
1176 {
|
|
1177 tree unsigned_ptr_type = build_pointer_type (unsigned_intDI_type_node);
|
|
1178 tree ftype
|
|
1179 = build_function_type_list (integer_type_node, unsigned_ptr_type, NULL);
|
|
1180 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDR]
|
|
1181 = aarch64_general_add_builtin ("__builtin_aarch64_rndr", ftype,
|
|
1182 AARCH64_BUILTIN_RNG_RNDR);
|
|
1183 aarch64_builtin_decls[AARCH64_BUILTIN_RNG_RNDRRS]
|
|
1184 = aarch64_general_add_builtin ("__builtin_aarch64_rndrrs", ftype,
|
|
1185 AARCH64_BUILTIN_RNG_RNDRRS);
|
|
1186 }
|
|
1187
|
|
1188 /* Initialize the memory tagging extension (MTE) builtins. */
|
|
1189 struct
|
|
1190 {
|
|
1191 tree ftype;
|
|
1192 enum insn_code icode;
|
|
1193 } aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_END -
|
|
1194 AARCH64_MEMTAG_BUILTIN_START - 1];
|
|
1195
|
|
1196 static void
|
|
1197 aarch64_init_memtag_builtins (void)
|
|
1198 {
|
|
1199 tree fntype = NULL;
|
|
1200
|
|
1201 #define AARCH64_INIT_MEMTAG_BUILTINS_DECL(F, N, I, T) \
|
|
1202 aarch64_builtin_decls[AARCH64_MEMTAG_BUILTIN_##F] \
|
|
1203 = aarch64_general_add_builtin ("__builtin_aarch64_memtag_"#N, \
|
|
1204 T, AARCH64_MEMTAG_BUILTIN_##F); \
|
|
1205 aarch64_memtag_builtin_data[AARCH64_MEMTAG_BUILTIN_##F - \
|
|
1206 AARCH64_MEMTAG_BUILTIN_START - 1] = \
|
|
1207 {T, CODE_FOR_##I};
|
|
1208
|
|
1209 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
|
|
1210 uint64_type_node, NULL);
|
|
1211 AARCH64_INIT_MEMTAG_BUILTINS_DECL (IRG, irg, irg, fntype);
|
|
1212
|
|
1213 fntype = build_function_type_list (uint64_type_node, ptr_type_node,
|
|
1214 uint64_type_node, NULL);
|
|
1215 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GMI, gmi, gmi, fntype);
|
|
1216
|
|
1217 fntype = build_function_type_list (ptrdiff_type_node, ptr_type_node,
|
|
1218 ptr_type_node, NULL);
|
|
1219 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SUBP, subp, subp, fntype);
|
|
1220
|
|
1221 fntype = build_function_type_list (ptr_type_node, ptr_type_node,
|
|
1222 unsigned_type_node, NULL);
|
|
1223 AARCH64_INIT_MEMTAG_BUILTINS_DECL (INC_TAG, inc_tag, addg, fntype);
|
|
1224
|
|
1225 fntype = build_function_type_list (void_type_node, ptr_type_node, NULL);
|
|
1226 AARCH64_INIT_MEMTAG_BUILTINS_DECL (SET_TAG, set_tag, stg, fntype);
|
|
1227
|
|
1228 fntype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
|
|
1229 AARCH64_INIT_MEMTAG_BUILTINS_DECL (GET_TAG, get_tag, ldg, fntype);
|
|
1230
|
|
1231 #undef AARCH64_INIT_MEMTAG_BUILTINS_DECL
|
|
1232 }
|
|
1233
|
|
1234 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group. */
|
|
1235
|
111
|
1236 void
|
145
|
1237 aarch64_general_init_builtins (void)
|
111
|
1238 {
|
|
1239 tree ftype_set_fpr
|
|
1240 = build_function_type_list (void_type_node, unsigned_type_node, NULL);
|
|
1241 tree ftype_get_fpr
|
|
1242 = build_function_type_list (unsigned_type_node, NULL);
|
|
1243
|
|
1244 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR]
|
145
|
1245 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpcr",
|
|
1246 ftype_get_fpr,
|
|
1247 AARCH64_BUILTIN_GET_FPCR);
|
111
|
1248 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR]
|
145
|
1249 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpcr",
|
|
1250 ftype_set_fpr,
|
|
1251 AARCH64_BUILTIN_SET_FPCR);
|
111
|
1252 aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR]
|
145
|
1253 = aarch64_general_add_builtin ("__builtin_aarch64_get_fpsr",
|
|
1254 ftype_get_fpr,
|
|
1255 AARCH64_BUILTIN_GET_FPSR);
|
111
|
1256 aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR]
|
145
|
1257 = aarch64_general_add_builtin ("__builtin_aarch64_set_fpsr",
|
|
1258 ftype_set_fpr,
|
|
1259 AARCH64_BUILTIN_SET_FPSR);
|
111
|
1260
|
|
1261 aarch64_init_fp16_types ();
|
|
1262
|
145
|
1263 aarch64_init_bf16_types ();
|
|
1264
|
111
|
1265 if (TARGET_SIMD)
|
|
1266 aarch64_init_simd_builtins ();
|
|
1267
|
|
1268 aarch64_init_crc32_builtins ();
|
|
1269 aarch64_init_builtin_rsqrt ();
|
145
|
1270 aarch64_init_rng_builtins ();
|
|
1271
|
|
1272 tree ftype_jcvt
|
|
1273 = build_function_type_list (intSI_type_node, double_type_node, NULL);
|
|
1274 aarch64_builtin_decls[AARCH64_JSCVT]
|
|
1275 = aarch64_general_add_builtin ("__builtin_aarch64_jcvtzs", ftype_jcvt,
|
|
1276 AARCH64_JSCVT);
|
111
|
1277
|
|
1278 /* Initialize pointer authentication builtins which are backed by instructions
|
|
1279 in NOP encoding space.
|
|
1280
|
|
1281 NOTE: these builtins are supposed to be used by libgcc unwinder only, as
|
|
1282 there is no support on return address signing under ILP32, we don't
|
|
1283 register them. */
|
|
1284 if (!TARGET_ILP32)
|
|
1285 aarch64_init_pauth_hint_builtins ();
|
145
|
1286
|
|
1287 if (TARGET_TME)
|
|
1288 aarch64_init_tme_builtins ();
|
|
1289
|
|
1290 if (TARGET_MEMTAG)
|
|
1291 aarch64_init_memtag_builtins ();
|
111
|
1292 }
|
|
1293
|
145
|
1294 /* Implement TARGET_BUILTIN_DECL for the AARCH64_BUILTIN_GENERAL group. */
|
111
|
1295 tree
|
145
|
1296 aarch64_general_builtin_decl (unsigned code, bool)
|
111
|
1297 {
|
|
1298 if (code >= AARCH64_BUILTIN_MAX)
|
|
1299 return error_mark_node;
|
|
1300
|
|
1301 return aarch64_builtin_decls[code];
|
|
1302 }
|
|
1303
|
|
1304 typedef enum
|
|
1305 {
|
|
1306 SIMD_ARG_COPY_TO_REG,
|
|
1307 SIMD_ARG_CONSTANT,
|
|
1308 SIMD_ARG_LANE_INDEX,
|
|
1309 SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX,
|
145
|
1310 SIMD_ARG_LANE_PAIR_INDEX,
|
|
1311 SIMD_ARG_LANE_QUADTUP_INDEX,
|
111
|
1312 SIMD_ARG_STOP
|
|
1313 } builtin_simd_arg;
|
|
1314
|
|
1315
|
|
1316 static rtx
|
|
1317 aarch64_simd_expand_args (rtx target, int icode, int have_retval,
|
|
1318 tree exp, builtin_simd_arg *args,
|
|
1319 machine_mode builtin_mode)
|
|
1320 {
|
|
1321 rtx pat;
|
|
1322 rtx op[SIMD_MAX_BUILTIN_ARGS + 1]; /* First element for result operand. */
|
|
1323 int opc = 0;
|
|
1324
|
|
1325 if (have_retval)
|
|
1326 {
|
|
1327 machine_mode tmode = insn_data[icode].operand[0].mode;
|
|
1328 if (!target
|
|
1329 || GET_MODE (target) != tmode
|
|
1330 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
|
|
1331 target = gen_reg_rtx (tmode);
|
|
1332 op[opc++] = target;
|
|
1333 }
|
|
1334
|
|
1335 for (;;)
|
|
1336 {
|
|
1337 builtin_simd_arg thisarg = args[opc - have_retval];
|
|
1338
|
|
1339 if (thisarg == SIMD_ARG_STOP)
|
|
1340 break;
|
|
1341 else
|
|
1342 {
|
|
1343 tree arg = CALL_EXPR_ARG (exp, opc - have_retval);
|
|
1344 machine_mode mode = insn_data[icode].operand[opc].mode;
|
|
1345 op[opc] = expand_normal (arg);
|
|
1346
|
|
1347 switch (thisarg)
|
|
1348 {
|
|
1349 case SIMD_ARG_COPY_TO_REG:
|
|
1350 if (POINTER_TYPE_P (TREE_TYPE (arg)))
|
|
1351 op[opc] = convert_memory_address (Pmode, op[opc]);
|
|
1352 /*gcc_assert (GET_MODE (op[opc]) == mode); */
|
|
1353 if (!(*insn_data[icode].operand[opc].predicate)
|
|
1354 (op[opc], mode))
|
|
1355 op[opc] = copy_to_mode_reg (mode, op[opc]);
|
|
1356 break;
|
|
1357
|
|
1358 case SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX:
|
|
1359 gcc_assert (opc > 1);
|
|
1360 if (CONST_INT_P (op[opc]))
|
|
1361 {
|
131
|
1362 unsigned int nunits
|
|
1363 = GET_MODE_NUNITS (builtin_mode).to_constant ();
|
|
1364 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
|
111
|
1365 /* Keep to GCC-vector-extension lane indices in the RTL. */
|
131
|
1366 op[opc] = aarch64_endian_lane_rtx (builtin_mode,
|
|
1367 INTVAL (op[opc]));
|
111
|
1368 }
|
|
1369 goto constant_arg;
|
|
1370
|
|
1371 case SIMD_ARG_LANE_INDEX:
|
|
1372 /* Must be a previous operand into which this is an index. */
|
|
1373 gcc_assert (opc > 0);
|
|
1374 if (CONST_INT_P (op[opc]))
|
|
1375 {
|
|
1376 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
|
131
|
1377 unsigned int nunits
|
|
1378 = GET_MODE_NUNITS (vmode).to_constant ();
|
|
1379 aarch64_simd_lane_bounds (op[opc], 0, nunits, exp);
|
111
|
1380 /* Keep to GCC-vector-extension lane indices in the RTL. */
|
131
|
1381 op[opc] = aarch64_endian_lane_rtx (vmode, INTVAL (op[opc]));
|
111
|
1382 }
|
145
|
1383 /* If the lane index isn't a constant then error out. */
|
|
1384 goto constant_arg;
|
|
1385
|
|
1386 case SIMD_ARG_LANE_PAIR_INDEX:
|
|
1387 /* Must be a previous operand into which this is an index and
|
|
1388 index is restricted to nunits / 2. */
|
|
1389 gcc_assert (opc > 0);
|
|
1390 if (CONST_INT_P (op[opc]))
|
|
1391 {
|
|
1392 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
|
|
1393 unsigned int nunits
|
|
1394 = GET_MODE_NUNITS (vmode).to_constant ();
|
|
1395 aarch64_simd_lane_bounds (op[opc], 0, nunits / 2, exp);
|
|
1396 /* Keep to GCC-vector-extension lane indices in the RTL. */
|
|
1397 int lane = INTVAL (op[opc]);
|
|
1398 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 2, lane),
|
|
1399 SImode);
|
|
1400 }
|
|
1401 /* If the lane index isn't a constant then error out. */
|
|
1402 goto constant_arg;
|
|
1403 case SIMD_ARG_LANE_QUADTUP_INDEX:
|
|
1404 /* Must be a previous operand into which this is an index and
|
|
1405 index is restricted to nunits / 4. */
|
|
1406 gcc_assert (opc > 0);
|
|
1407 if (CONST_INT_P (op[opc]))
|
|
1408 {
|
|
1409 machine_mode vmode = insn_data[icode].operand[opc - 1].mode;
|
|
1410 unsigned int nunits
|
|
1411 = GET_MODE_NUNITS (vmode).to_constant ();
|
|
1412 aarch64_simd_lane_bounds (op[opc], 0, nunits / 4, exp);
|
|
1413 /* Keep to GCC-vector-extension lane indices in the RTL. */
|
|
1414 int lane = INTVAL (op[opc]);
|
|
1415 op[opc] = gen_int_mode (ENDIAN_LANE_N (nunits / 4, lane),
|
|
1416 SImode);
|
|
1417 }
|
|
1418 /* If the lane index isn't a constant then error out. */
|
|
1419 goto constant_arg;
|
111
|
1420 case SIMD_ARG_CONSTANT:
|
|
1421 constant_arg:
|
|
1422 if (!(*insn_data[icode].operand[opc].predicate)
|
|
1423 (op[opc], mode))
|
|
1424 {
|
|
1425 error ("%Kargument %d must be a constant immediate",
|
|
1426 exp, opc + 1 - have_retval);
|
|
1427 return const0_rtx;
|
|
1428 }
|
|
1429 break;
|
|
1430
|
|
1431 case SIMD_ARG_STOP:
|
|
1432 gcc_unreachable ();
|
|
1433 }
|
|
1434
|
|
1435 opc++;
|
|
1436 }
|
|
1437 }
|
|
1438
|
|
1439 switch (opc)
|
|
1440 {
|
|
1441 case 1:
|
|
1442 pat = GEN_FCN (icode) (op[0]);
|
|
1443 break;
|
|
1444
|
|
1445 case 2:
|
|
1446 pat = GEN_FCN (icode) (op[0], op[1]);
|
|
1447 break;
|
|
1448
|
|
1449 case 3:
|
|
1450 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
|
|
1451 break;
|
|
1452
|
|
1453 case 4:
|
|
1454 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
|
|
1455 break;
|
|
1456
|
|
1457 case 5:
|
|
1458 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
|
|
1459 break;
|
|
1460
|
|
1461 case 6:
|
|
1462 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
|
|
1463 break;
|
|
1464
|
|
1465 default:
|
|
1466 gcc_unreachable ();
|
|
1467 }
|
|
1468
|
|
1469 if (!pat)
|
|
1470 return NULL_RTX;
|
|
1471
|
|
1472 emit_insn (pat);
|
|
1473
|
|
1474 return target;
|
|
1475 }
|
|
1476
|
|
1477 /* Expand an AArch64 AdvSIMD builtin(intrinsic). */
|
|
1478 rtx
|
|
1479 aarch64_simd_expand_builtin (int fcode, tree exp, rtx target)
|
|
1480 {
|
|
1481 if (fcode == AARCH64_SIMD_BUILTIN_LANE_CHECK)
|
|
1482 {
|
|
1483 rtx totalsize = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
1484 rtx elementsize = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
1485 if (CONST_INT_P (totalsize) && CONST_INT_P (elementsize)
|
|
1486 && UINTVAL (elementsize) != 0
|
|
1487 && UINTVAL (totalsize) != 0)
|
|
1488 {
|
|
1489 rtx lane_idx = expand_normal (CALL_EXPR_ARG (exp, 2));
|
|
1490 if (CONST_INT_P (lane_idx))
|
|
1491 aarch64_simd_lane_bounds (lane_idx, 0,
|
|
1492 UINTVAL (totalsize)
|
|
1493 / UINTVAL (elementsize),
|
|
1494 exp);
|
|
1495 else
|
|
1496 error ("%Klane index must be a constant immediate", exp);
|
|
1497 }
|
|
1498 else
|
|
1499 error ("%Ktotal size and element size must be a non-zero constant immediate", exp);
|
|
1500 /* Don't generate any RTL. */
|
|
1501 return const0_rtx;
|
|
1502 }
|
|
1503 aarch64_simd_builtin_datum *d =
|
|
1504 &aarch64_simd_builtin_data[fcode - AARCH64_SIMD_PATTERN_START];
|
|
1505 enum insn_code icode = d->code;
|
|
1506 builtin_simd_arg args[SIMD_MAX_BUILTIN_ARGS + 1];
|
|
1507 int num_args = insn_data[d->code].n_operands;
|
|
1508 int is_void = 0;
|
|
1509 int k;
|
|
1510
|
|
1511 is_void = !!(d->qualifiers[0] & qualifier_void);
|
|
1512
|
|
1513 num_args += is_void;
|
|
1514
|
|
1515 for (k = 1; k < num_args; k++)
|
|
1516 {
|
|
1517 /* We have four arrays of data, each indexed in a different fashion.
|
|
1518 qualifiers - element 0 always describes the function return type.
|
|
1519 operands - element 0 is either the operand for return value (if
|
|
1520 the function has a non-void return type) or the operand for the
|
|
1521 first argument.
|
|
1522 expr_args - element 0 always holds the first argument.
|
|
1523 args - element 0 is always used for the return type. */
|
|
1524 int qualifiers_k = k;
|
|
1525 int operands_k = k - is_void;
|
|
1526 int expr_args_k = k - 1;
|
|
1527
|
|
1528 if (d->qualifiers[qualifiers_k] & qualifier_lane_index)
|
|
1529 args[k] = SIMD_ARG_LANE_INDEX;
|
145
|
1530 else if (d->qualifiers[qualifiers_k] & qualifier_lane_pair_index)
|
|
1531 args[k] = SIMD_ARG_LANE_PAIR_INDEX;
|
|
1532 else if (d->qualifiers[qualifiers_k] & qualifier_lane_quadtup_index)
|
|
1533 args[k] = SIMD_ARG_LANE_QUADTUP_INDEX;
|
111
|
1534 else if (d->qualifiers[qualifiers_k] & qualifier_struct_load_store_lane_index)
|
|
1535 args[k] = SIMD_ARG_STRUCT_LOAD_STORE_LANE_INDEX;
|
|
1536 else if (d->qualifiers[qualifiers_k] & qualifier_immediate)
|
|
1537 args[k] = SIMD_ARG_CONSTANT;
|
|
1538 else if (d->qualifiers[qualifiers_k] & qualifier_maybe_immediate)
|
|
1539 {
|
|
1540 rtx arg
|
|
1541 = expand_normal (CALL_EXPR_ARG (exp,
|
|
1542 (expr_args_k)));
|
|
1543 /* Handle constants only if the predicate allows it. */
|
|
1544 bool op_const_int_p =
|
|
1545 (CONST_INT_P (arg)
|
|
1546 && (*insn_data[icode].operand[operands_k].predicate)
|
|
1547 (arg, insn_data[icode].operand[operands_k].mode));
|
|
1548 args[k] = op_const_int_p ? SIMD_ARG_CONSTANT : SIMD_ARG_COPY_TO_REG;
|
|
1549 }
|
|
1550 else
|
|
1551 args[k] = SIMD_ARG_COPY_TO_REG;
|
|
1552
|
|
1553 }
|
|
1554 args[k] = SIMD_ARG_STOP;
|
|
1555
|
|
1556 /* The interface to aarch64_simd_expand_args expects a 0 if
|
|
1557 the function is void, and a 1 if it is not. */
|
|
1558 return aarch64_simd_expand_args
|
|
1559 (target, icode, !is_void, exp, &args[1], d->mode);
|
|
1560 }
|
|
1561
|
|
1562 rtx
|
|
1563 aarch64_crc32_expand_builtin (int fcode, tree exp, rtx target)
|
|
1564 {
|
|
1565 rtx pat;
|
|
1566 aarch64_crc_builtin_datum *d
|
|
1567 = &aarch64_crc_builtin_data[fcode - (AARCH64_CRC32_BUILTIN_BASE + 1)];
|
|
1568 enum insn_code icode = d->icode;
|
|
1569 tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
1570 tree arg1 = CALL_EXPR_ARG (exp, 1);
|
|
1571 rtx op0 = expand_normal (arg0);
|
|
1572 rtx op1 = expand_normal (arg1);
|
|
1573 machine_mode tmode = insn_data[icode].operand[0].mode;
|
|
1574 machine_mode mode0 = insn_data[icode].operand[1].mode;
|
|
1575 machine_mode mode1 = insn_data[icode].operand[2].mode;
|
|
1576
|
|
1577 if (! target
|
|
1578 || GET_MODE (target) != tmode
|
|
1579 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
|
|
1580 target = gen_reg_rtx (tmode);
|
|
1581
|
|
1582 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
|
|
1583 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
|
|
1584
|
|
1585 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
|
|
1586 op0 = copy_to_mode_reg (mode0, op0);
|
|
1587 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
|
|
1588 op1 = copy_to_mode_reg (mode1, op1);
|
|
1589
|
|
1590 pat = GEN_FCN (icode) (target, op0, op1);
|
|
1591 if (!pat)
|
|
1592 return NULL_RTX;
|
|
1593
|
|
1594 emit_insn (pat);
|
|
1595 return target;
|
|
1596 }
|
|
1597
|
|
1598 /* Function to expand reciprocal square root builtins. */
|
|
1599
|
|
1600 static rtx
|
|
1601 aarch64_expand_builtin_rsqrt (int fcode, tree exp, rtx target)
|
|
1602 {
|
|
1603 tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
1604 rtx op0 = expand_normal (arg0);
|
|
1605
|
|
1606 rtx (*gen) (rtx, rtx);
|
|
1607
|
|
1608 switch (fcode)
|
|
1609 {
|
|
1610 case AARCH64_BUILTIN_RSQRT_DF:
|
|
1611 gen = gen_rsqrtdf2;
|
|
1612 break;
|
|
1613 case AARCH64_BUILTIN_RSQRT_SF:
|
|
1614 gen = gen_rsqrtsf2;
|
|
1615 break;
|
|
1616 case AARCH64_BUILTIN_RSQRT_V2DF:
|
|
1617 gen = gen_rsqrtv2df2;
|
|
1618 break;
|
|
1619 case AARCH64_BUILTIN_RSQRT_V2SF:
|
|
1620 gen = gen_rsqrtv2sf2;
|
|
1621 break;
|
|
1622 case AARCH64_BUILTIN_RSQRT_V4SF:
|
|
1623 gen = gen_rsqrtv4sf2;
|
|
1624 break;
|
|
1625 default: gcc_unreachable ();
|
|
1626 }
|
|
1627
|
|
1628 if (!target)
|
|
1629 target = gen_reg_rtx (GET_MODE (op0));
|
|
1630
|
|
1631 emit_insn (gen (target, op0));
|
|
1632
|
|
1633 return target;
|
|
1634 }
|
|
1635
|
145
|
1636 /* Expand a FCMLA lane expression EXP with code FCODE and
|
|
1637 result going to TARGET if that is convenient. */
|
|
1638
|
111
|
1639 rtx
|
145
|
1640 aarch64_expand_fcmla_builtin (tree exp, rtx target, int fcode)
|
|
1641 {
|
|
1642 int bcode = fcode - AARCH64_SIMD_FCMLA_LANEQ_BUILTIN_BASE - 1;
|
|
1643 aarch64_fcmla_laneq_builtin_datum* d
|
|
1644 = &aarch64_fcmla_lane_builtin_data[bcode];
|
|
1645 machine_mode quadmode = GET_MODE_2XWIDER_MODE (d->mode).require ();
|
|
1646 rtx op0 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 0)));
|
|
1647 rtx op1 = force_reg (d->mode, expand_normal (CALL_EXPR_ARG (exp, 1)));
|
|
1648 rtx op2 = force_reg (quadmode, expand_normal (CALL_EXPR_ARG (exp, 2)));
|
|
1649 tree tmp = CALL_EXPR_ARG (exp, 3);
|
|
1650 rtx lane_idx = expand_expr (tmp, NULL_RTX, VOIDmode, EXPAND_INITIALIZER);
|
|
1651
|
|
1652 /* Validate that the lane index is a constant. */
|
|
1653 if (!CONST_INT_P (lane_idx))
|
|
1654 {
|
|
1655 error ("%Kargument %d must be a constant immediate", exp, 4);
|
|
1656 return const0_rtx;
|
|
1657 }
|
|
1658
|
|
1659 /* Validate that the index is within the expected range. */
|
|
1660 int nunits = GET_MODE_NUNITS (quadmode).to_constant ();
|
|
1661 aarch64_simd_lane_bounds (lane_idx, 0, nunits / 2, exp);
|
|
1662
|
|
1663 /* Generate the correct register and mode. */
|
|
1664 int lane = INTVAL (lane_idx);
|
|
1665
|
|
1666 if (lane < nunits / 4)
|
|
1667 op2 = simplify_gen_subreg (d->mode, op2, quadmode,
|
|
1668 subreg_lowpart_offset (d->mode, quadmode));
|
|
1669 else
|
|
1670 {
|
|
1671 /* Select the upper 64 bits, either a V2SF or V4HF, this however
|
|
1672 is quite messy, as the operation required even though simple
|
|
1673 doesn't have a simple RTL pattern, and seems it's quite hard to
|
|
1674 define using a single RTL pattern. The target generic version
|
|
1675 gen_highpart_mode generates code that isn't optimal. */
|
|
1676 rtx temp1 = gen_reg_rtx (d->mode);
|
|
1677 rtx temp2 = gen_reg_rtx (DImode);
|
|
1678 temp1 = simplify_gen_subreg (d->mode, op2, quadmode,
|
|
1679 subreg_lowpart_offset (d->mode, quadmode));
|
|
1680 temp1 = simplify_gen_subreg (V2DImode, temp1, d->mode, 0);
|
|
1681 if (BYTES_BIG_ENDIAN)
|
|
1682 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const0_rtx));
|
|
1683 else
|
|
1684 emit_insn (gen_aarch64_get_lanev2di (temp2, temp1, const1_rtx));
|
|
1685 op2 = simplify_gen_subreg (d->mode, temp2, GET_MODE (temp2), 0);
|
|
1686
|
|
1687 /* And recalculate the index. */
|
|
1688 lane -= nunits / 4;
|
|
1689 }
|
|
1690
|
|
1691 /* Keep to GCC-vector-extension lane indices in the RTL, only nunits / 4
|
|
1692 (max nunits in range check) are valid. Which means only 0-1, so we
|
|
1693 only need to know the order in a V2mode. */
|
|
1694 lane_idx = aarch64_endian_lane_rtx (V2DImode, lane);
|
|
1695
|
|
1696 if (!target)
|
|
1697 target = gen_reg_rtx (d->mode);
|
|
1698 else
|
|
1699 target = force_reg (d->mode, target);
|
|
1700
|
|
1701 rtx pat = NULL_RTX;
|
|
1702
|
|
1703 if (d->lane)
|
|
1704 pat = GEN_FCN (d->icode) (target, op0, op1, op2, lane_idx);
|
|
1705 else
|
|
1706 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
|
|
1707
|
|
1708 if (!pat)
|
|
1709 return NULL_RTX;
|
|
1710
|
|
1711 emit_insn (pat);
|
|
1712 return target;
|
|
1713 }
|
|
1714
|
|
1715 /* Function to expand an expression EXP which calls one of the Transactional
|
|
1716 Memory Extension (TME) builtins FCODE with the result going to TARGET. */
|
|
1717 static rtx
|
|
1718 aarch64_expand_builtin_tme (int fcode, tree exp, rtx target)
|
111
|
1719 {
|
145
|
1720 switch (fcode)
|
|
1721 {
|
|
1722 case AARCH64_TME_BUILTIN_TSTART:
|
|
1723 target = gen_reg_rtx (DImode);
|
|
1724 emit_insn (GEN_FCN (CODE_FOR_tstart) (target));
|
|
1725 break;
|
|
1726
|
|
1727 case AARCH64_TME_BUILTIN_TTEST:
|
|
1728 target = gen_reg_rtx (DImode);
|
|
1729 emit_insn (GEN_FCN (CODE_FOR_ttest) (target));
|
|
1730 break;
|
|
1731
|
|
1732 case AARCH64_TME_BUILTIN_TCOMMIT:
|
|
1733 emit_insn (GEN_FCN (CODE_FOR_tcommit) ());
|
|
1734 break;
|
|
1735
|
|
1736 case AARCH64_TME_BUILTIN_TCANCEL:
|
|
1737 {
|
|
1738 tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
1739 rtx op0 = expand_normal (arg0);
|
|
1740 if (CONST_INT_P (op0) && UINTVAL (op0) <= 65536)
|
|
1741 emit_insn (GEN_FCN (CODE_FOR_tcancel) (op0));
|
|
1742 else
|
|
1743 {
|
|
1744 error ("%Kargument must be a 16-bit constant immediate", exp);
|
|
1745 return const0_rtx;
|
|
1746 }
|
|
1747 }
|
|
1748 break;
|
|
1749
|
|
1750 default :
|
|
1751 gcc_unreachable ();
|
|
1752 }
|
|
1753 return target;
|
|
1754 }
|
|
1755
|
|
1756 /* Expand a random number builtin EXP with code FCODE, putting the result
|
|
1757 int TARGET. If IGNORE is true the return value is ignored. */
|
|
1758
|
|
1759 rtx
|
|
1760 aarch64_expand_rng_builtin (tree exp, rtx target, int fcode, int ignore)
|
|
1761 {
|
|
1762 rtx pat;
|
|
1763 enum insn_code icode;
|
|
1764 if (fcode == AARCH64_BUILTIN_RNG_RNDR)
|
|
1765 icode = CODE_FOR_aarch64_rndr;
|
|
1766 else if (fcode == AARCH64_BUILTIN_RNG_RNDRRS)
|
|
1767 icode = CODE_FOR_aarch64_rndrrs;
|
|
1768 else
|
|
1769 gcc_unreachable ();
|
|
1770
|
|
1771 rtx rand = gen_reg_rtx (DImode);
|
|
1772 pat = GEN_FCN (icode) (rand);
|
|
1773 if (!pat)
|
|
1774 return NULL_RTX;
|
|
1775
|
|
1776 tree arg0 = CALL_EXPR_ARG (exp, 0);
|
|
1777 rtx res_addr = expand_normal (arg0);
|
|
1778 res_addr = convert_memory_address (Pmode, res_addr);
|
|
1779 rtx res_mem = gen_rtx_MEM (DImode, res_addr);
|
|
1780 emit_insn (pat);
|
|
1781 emit_move_insn (res_mem, rand);
|
|
1782 /* If the status result is unused don't generate the CSET code. */
|
|
1783 if (ignore)
|
|
1784 return target;
|
|
1785
|
|
1786 rtx cc_reg = gen_rtx_REG (CC_Zmode, CC_REGNUM);
|
|
1787 rtx cmp_rtx = gen_rtx_fmt_ee (NE, SImode, cc_reg, const0_rtx);
|
|
1788 emit_insn (gen_aarch64_cstoresi (target, cmp_rtx, cc_reg));
|
|
1789 return target;
|
|
1790 }
|
|
1791
|
|
1792 /* Expand an expression EXP that calls a MEMTAG built-in FCODE
|
|
1793 with result going to TARGET. */
|
|
1794 static rtx
|
|
1795 aarch64_expand_builtin_memtag (int fcode, tree exp, rtx target)
|
|
1796 {
|
|
1797 if (TARGET_ILP32)
|
|
1798 {
|
|
1799 error ("Memory Tagging Extension does not support %<-mabi=ilp32%>");
|
|
1800 return const0_rtx;
|
|
1801 }
|
|
1802
|
|
1803 rtx pat = NULL;
|
|
1804 enum insn_code icode = aarch64_memtag_builtin_data[fcode -
|
|
1805 AARCH64_MEMTAG_BUILTIN_START - 1].icode;
|
|
1806
|
|
1807 rtx op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
|
|
1808 machine_mode mode0 = GET_MODE (op0);
|
|
1809 op0 = force_reg (mode0 == VOIDmode ? DImode : mode0, op0);
|
|
1810 op0 = convert_to_mode (DImode, op0, true);
|
|
1811
|
|
1812 switch (fcode)
|
|
1813 {
|
|
1814 case AARCH64_MEMTAG_BUILTIN_IRG:
|
|
1815 case AARCH64_MEMTAG_BUILTIN_GMI:
|
|
1816 case AARCH64_MEMTAG_BUILTIN_SUBP:
|
|
1817 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
|
|
1818 {
|
|
1819 if (! target
|
|
1820 || GET_MODE (target) != DImode
|
|
1821 || ! (*insn_data[icode].operand[0].predicate) (target, DImode))
|
|
1822 target = gen_reg_rtx (DImode);
|
|
1823
|
|
1824 if (fcode == AARCH64_MEMTAG_BUILTIN_INC_TAG)
|
|
1825 {
|
|
1826 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
1827
|
|
1828 if ((*insn_data[icode].operand[3].predicate) (op1, QImode))
|
|
1829 {
|
|
1830 pat = GEN_FCN (icode) (target, op0, const0_rtx, op1);
|
|
1831 break;
|
|
1832 }
|
|
1833 error ("%Kargument %d must be a constant immediate "
|
|
1834 "in range [0,15]", exp, 2);
|
|
1835 return const0_rtx;
|
|
1836 }
|
|
1837 else
|
|
1838 {
|
|
1839 rtx op1 = expand_normal (CALL_EXPR_ARG (exp, 1));
|
|
1840 machine_mode mode1 = GET_MODE (op1);
|
|
1841 op1 = force_reg (mode1 == VOIDmode ? DImode : mode1, op1);
|
|
1842 op1 = convert_to_mode (DImode, op1, true);
|
|
1843 pat = GEN_FCN (icode) (target, op0, op1);
|
|
1844 }
|
|
1845 break;
|
|
1846 }
|
|
1847 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
|
|
1848 target = op0;
|
|
1849 pat = GEN_FCN (icode) (target, op0, const0_rtx);
|
|
1850 break;
|
|
1851 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
|
|
1852 pat = GEN_FCN (icode) (op0, op0, const0_rtx);
|
|
1853 break;
|
|
1854 default:
|
|
1855 gcc_unreachable();
|
|
1856 }
|
|
1857
|
|
1858 if (!pat)
|
|
1859 return NULL_RTX;
|
|
1860
|
|
1861 emit_insn (pat);
|
|
1862 return target;
|
|
1863 }
|
|
1864
|
|
1865 /* Expand an expression EXP that calls built-in function FCODE,
|
|
1866 with result going to TARGET if that's convenient. IGNORE is true
|
|
1867 if the result of the builtin is ignored. */
|
|
1868 rtx
|
|
1869 aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
|
|
1870 int ignore)
|
|
1871 {
|
111
|
1872 int icode;
|
|
1873 rtx pat, op0;
|
|
1874 tree arg0;
|
|
1875
|
|
1876 switch (fcode)
|
|
1877 {
|
|
1878 case AARCH64_BUILTIN_GET_FPCR:
|
|
1879 case AARCH64_BUILTIN_SET_FPCR:
|
|
1880 case AARCH64_BUILTIN_GET_FPSR:
|
|
1881 case AARCH64_BUILTIN_SET_FPSR:
|
|
1882 if ((fcode == AARCH64_BUILTIN_GET_FPCR)
|
|
1883 || (fcode == AARCH64_BUILTIN_GET_FPSR))
|
|
1884 {
|
|
1885 icode = (fcode == AARCH64_BUILTIN_GET_FPSR) ?
|
|
1886 CODE_FOR_get_fpsr : CODE_FOR_get_fpcr;
|
|
1887 target = gen_reg_rtx (SImode);
|
|
1888 pat = GEN_FCN (icode) (target);
|
|
1889 }
|
|
1890 else
|
|
1891 {
|
|
1892 target = NULL_RTX;
|
|
1893 icode = (fcode == AARCH64_BUILTIN_SET_FPSR) ?
|
|
1894 CODE_FOR_set_fpsr : CODE_FOR_set_fpcr;
|
|
1895 arg0 = CALL_EXPR_ARG (exp, 0);
|
|
1896 op0 = force_reg (SImode, expand_normal (arg0));
|
|
1897 pat = GEN_FCN (icode) (op0);
|
|
1898 }
|
|
1899 emit_insn (pat);
|
|
1900 return target;
|
|
1901
|
|
1902 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
|
|
1903 case AARCH64_PAUTH_BUILTIN_PACIA1716:
|
145
|
1904 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
|
|
1905 case AARCH64_PAUTH_BUILTIN_PACIB1716:
|
111
|
1906 case AARCH64_PAUTH_BUILTIN_XPACLRI:
|
|
1907 arg0 = CALL_EXPR_ARG (exp, 0);
|
|
1908 op0 = force_reg (Pmode, expand_normal (arg0));
|
|
1909
|
|
1910 if (!target)
|
|
1911 target = gen_reg_rtx (Pmode);
|
|
1912 else
|
|
1913 target = force_reg (Pmode, target);
|
|
1914
|
|
1915 emit_move_insn (target, op0);
|
|
1916
|
|
1917 if (fcode == AARCH64_PAUTH_BUILTIN_XPACLRI)
|
|
1918 {
|
|
1919 rtx lr = gen_rtx_REG (Pmode, R30_REGNUM);
|
|
1920 icode = CODE_FOR_xpaclri;
|
|
1921 emit_move_insn (lr, op0);
|
|
1922 emit_insn (GEN_FCN (icode) ());
|
|
1923 emit_move_insn (target, lr);
|
|
1924 }
|
|
1925 else
|
|
1926 {
|
|
1927 tree arg1 = CALL_EXPR_ARG (exp, 1);
|
|
1928 rtx op1 = force_reg (Pmode, expand_normal (arg1));
|
145
|
1929 switch (fcode)
|
|
1930 {
|
|
1931 case AARCH64_PAUTH_BUILTIN_AUTIA1716:
|
|
1932 icode = CODE_FOR_autia1716;
|
|
1933 break;
|
|
1934 case AARCH64_PAUTH_BUILTIN_AUTIB1716:
|
|
1935 icode = CODE_FOR_autib1716;
|
|
1936 break;
|
|
1937 case AARCH64_PAUTH_BUILTIN_PACIA1716:
|
|
1938 icode = CODE_FOR_pacia1716;
|
|
1939 break;
|
|
1940 case AARCH64_PAUTH_BUILTIN_PACIB1716:
|
|
1941 icode = CODE_FOR_pacib1716;
|
|
1942 break;
|
|
1943 default:
|
|
1944 icode = 0;
|
|
1945 gcc_unreachable ();
|
|
1946 }
|
111
|
1947
|
|
1948 rtx x16_reg = gen_rtx_REG (Pmode, R16_REGNUM);
|
|
1949 rtx x17_reg = gen_rtx_REG (Pmode, R17_REGNUM);
|
|
1950 emit_move_insn (x17_reg, op0);
|
|
1951 emit_move_insn (x16_reg, op1);
|
|
1952 emit_insn (GEN_FCN (icode) ());
|
|
1953 emit_move_insn (target, x17_reg);
|
|
1954 }
|
|
1955
|
|
1956 return target;
|
145
|
1957
|
|
1958 case AARCH64_JSCVT:
|
|
1959 arg0 = CALL_EXPR_ARG (exp, 0);
|
|
1960 op0 = force_reg (DFmode, expand_normal (arg0));
|
|
1961 if (!target)
|
|
1962 target = gen_reg_rtx (SImode);
|
|
1963 else
|
|
1964 target = force_reg (SImode, target);
|
|
1965 emit_insn (GEN_FCN (CODE_FOR_aarch64_fjcvtzs) (target, op0));
|
|
1966 return target;
|
|
1967
|
|
1968 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V2SF:
|
|
1969 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V2SF:
|
|
1970 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V2SF:
|
|
1971 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V2SF:
|
|
1972 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ0_V4HF:
|
|
1973 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ90_V4HF:
|
|
1974 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ180_V4HF:
|
|
1975 case AARCH64_SIMD_BUILTIN_FCMLA_LANEQ270_V4HF:
|
|
1976 return aarch64_expand_fcmla_builtin (exp, target, fcode);
|
|
1977 case AARCH64_BUILTIN_RNG_RNDR:
|
|
1978 case AARCH64_BUILTIN_RNG_RNDRRS:
|
|
1979 return aarch64_expand_rng_builtin (exp, target, fcode, ignore);
|
111
|
1980 }
|
|
1981
|
|
1982 if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
|
|
1983 return aarch64_simd_expand_builtin (fcode, exp, target);
|
|
1984 else if (fcode >= AARCH64_CRC32_BUILTIN_BASE && fcode <= AARCH64_CRC32_BUILTIN_MAX)
|
|
1985 return aarch64_crc32_expand_builtin (fcode, exp, target);
|
|
1986
|
|
1987 if (fcode == AARCH64_BUILTIN_RSQRT_DF
|
|
1988 || fcode == AARCH64_BUILTIN_RSQRT_SF
|
|
1989 || fcode == AARCH64_BUILTIN_RSQRT_V2DF
|
|
1990 || fcode == AARCH64_BUILTIN_RSQRT_V2SF
|
|
1991 || fcode == AARCH64_BUILTIN_RSQRT_V4SF)
|
|
1992 return aarch64_expand_builtin_rsqrt (fcode, exp, target);
|
|
1993
|
145
|
1994 if (fcode == AARCH64_TME_BUILTIN_TSTART
|
|
1995 || fcode == AARCH64_TME_BUILTIN_TCOMMIT
|
|
1996 || fcode == AARCH64_TME_BUILTIN_TTEST
|
|
1997 || fcode == AARCH64_TME_BUILTIN_TCANCEL)
|
|
1998 return aarch64_expand_builtin_tme (fcode, exp, target);
|
|
1999
|
|
2000 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
|
|
2001 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
|
|
2002 return aarch64_expand_builtin_memtag (fcode, exp, target);
|
|
2003
|
111
|
2004 gcc_unreachable ();
|
|
2005 }
|
|
2006
|
|
2007 tree
|
|
2008 aarch64_builtin_vectorized_function (unsigned int fn, tree type_out,
|
|
2009 tree type_in)
|
|
2010 {
|
|
2011 machine_mode in_mode, out_mode;
|
|
2012
|
|
2013 if (TREE_CODE (type_out) != VECTOR_TYPE
|
|
2014 || TREE_CODE (type_in) != VECTOR_TYPE)
|
|
2015 return NULL_TREE;
|
|
2016
|
145
|
2017 out_mode = TYPE_MODE (type_out);
|
|
2018 in_mode = TYPE_MODE (type_in);
|
111
|
2019
|
|
2020 #undef AARCH64_CHECK_BUILTIN_MODE
|
|
2021 #define AARCH64_CHECK_BUILTIN_MODE(C, N) 1
|
|
2022 #define AARCH64_FIND_FRINT_VARIANT(N) \
|
|
2023 (AARCH64_CHECK_BUILTIN_MODE (2, D) \
|
|
2024 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2df] \
|
|
2025 : (AARCH64_CHECK_BUILTIN_MODE (4, S) \
|
|
2026 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v4sf] \
|
|
2027 : (AARCH64_CHECK_BUILTIN_MODE (2, S) \
|
|
2028 ? aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_##N##v2sf] \
|
|
2029 : NULL_TREE)))
|
|
2030 switch (fn)
|
|
2031 {
|
|
2032 #undef AARCH64_CHECK_BUILTIN_MODE
|
|
2033 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
|
145
|
2034 (out_mode == V##C##N##Fmode && in_mode == V##C##N##Fmode)
|
111
|
2035 CASE_CFN_FLOOR:
|
|
2036 return AARCH64_FIND_FRINT_VARIANT (floor);
|
|
2037 CASE_CFN_CEIL:
|
|
2038 return AARCH64_FIND_FRINT_VARIANT (ceil);
|
|
2039 CASE_CFN_TRUNC:
|
|
2040 return AARCH64_FIND_FRINT_VARIANT (btrunc);
|
|
2041 CASE_CFN_ROUND:
|
|
2042 return AARCH64_FIND_FRINT_VARIANT (round);
|
|
2043 CASE_CFN_NEARBYINT:
|
|
2044 return AARCH64_FIND_FRINT_VARIANT (nearbyint);
|
|
2045 CASE_CFN_SQRT:
|
|
2046 return AARCH64_FIND_FRINT_VARIANT (sqrt);
|
|
2047 #undef AARCH64_CHECK_BUILTIN_MODE
|
|
2048 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
|
145
|
2049 (out_mode == V##C##SImode && in_mode == V##C##N##Imode)
|
111
|
2050 CASE_CFN_CLZ:
|
|
2051 {
|
|
2052 if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
2053 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_clzv4si];
|
|
2054 return NULL_TREE;
|
|
2055 }
|
|
2056 CASE_CFN_CTZ:
|
|
2057 {
|
|
2058 if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
2059 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv2si];
|
|
2060 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
2061 return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_UNOP_ctzv4si];
|
|
2062 return NULL_TREE;
|
|
2063 }
|
|
2064 #undef AARCH64_CHECK_BUILTIN_MODE
|
|
2065 #define AARCH64_CHECK_BUILTIN_MODE(C, N) \
|
145
|
2066 (out_mode == V##C##N##Imode && in_mode == V##C##N##Fmode)
|
111
|
2067 CASE_CFN_IFLOOR:
|
|
2068 CASE_CFN_LFLOOR:
|
|
2069 CASE_CFN_LLFLOOR:
|
|
2070 {
|
|
2071 enum aarch64_builtins builtin;
|
|
2072 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
|
|
2073 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2dfv2di;
|
|
2074 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
2075 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv4sfv4si;
|
|
2076 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
2077 builtin = AARCH64_SIMD_BUILTIN_UNOP_lfloorv2sfv2si;
|
|
2078 else
|
|
2079 return NULL_TREE;
|
|
2080
|
|
2081 return aarch64_builtin_decls[builtin];
|
|
2082 }
|
|
2083 CASE_CFN_ICEIL:
|
|
2084 CASE_CFN_LCEIL:
|
|
2085 CASE_CFN_LLCEIL:
|
|
2086 {
|
|
2087 enum aarch64_builtins builtin;
|
|
2088 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
|
|
2089 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2dfv2di;
|
|
2090 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
2091 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv4sfv4si;
|
|
2092 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
2093 builtin = AARCH64_SIMD_BUILTIN_UNOP_lceilv2sfv2si;
|
|
2094 else
|
|
2095 return NULL_TREE;
|
|
2096
|
|
2097 return aarch64_builtin_decls[builtin];
|
|
2098 }
|
|
2099 CASE_CFN_IROUND:
|
|
2100 CASE_CFN_LROUND:
|
|
2101 CASE_CFN_LLROUND:
|
|
2102 {
|
|
2103 enum aarch64_builtins builtin;
|
|
2104 if (AARCH64_CHECK_BUILTIN_MODE (2, D))
|
|
2105 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2dfv2di;
|
|
2106 else if (AARCH64_CHECK_BUILTIN_MODE (4, S))
|
|
2107 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv4sfv4si;
|
|
2108 else if (AARCH64_CHECK_BUILTIN_MODE (2, S))
|
|
2109 builtin = AARCH64_SIMD_BUILTIN_UNOP_lroundv2sfv2si;
|
|
2110 else
|
|
2111 return NULL_TREE;
|
|
2112
|
|
2113 return aarch64_builtin_decls[builtin];
|
|
2114 }
|
|
2115 default:
|
|
2116 return NULL_TREE;
|
|
2117 }
|
|
2118
|
|
2119 return NULL_TREE;
|
|
2120 }
|
|
2121
|
|
2122 /* Return builtin for reciprocal square root. */
|
|
2123
|
|
2124 tree
|
145
|
2125 aarch64_general_builtin_rsqrt (unsigned int fn)
|
111
|
2126 {
|
|
2127 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2df)
|
|
2128 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2DF];
|
|
2129 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv2sf)
|
|
2130 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V2SF];
|
|
2131 if (fn == AARCH64_SIMD_BUILTIN_UNOP_sqrtv4sf)
|
|
2132 return aarch64_builtin_decls[AARCH64_BUILTIN_RSQRT_V4SF];
|
|
2133 return NULL_TREE;
|
|
2134 }
|
|
2135
|
|
2136 #undef VAR1
|
|
2137 #define VAR1(T, N, MAP, A) \
|
|
2138 case AARCH64_SIMD_BUILTIN_##T##_##N##A:
|
|
2139
|
145
|
2140 /* Try to fold a call to the built-in function with subcode FCODE. The
|
|
2141 function is passed the N_ARGS arguments in ARGS and it returns a value
|
|
2142 of type TYPE. Return the new expression on success and NULL_TREE on
|
|
2143 failure. */
|
111
|
2144 tree
|
145
|
2145 aarch64_general_fold_builtin (unsigned int fcode, tree type,
|
|
2146 unsigned int n_args ATTRIBUTE_UNUSED, tree *args)
|
111
|
2147 {
|
|
2148 switch (fcode)
|
|
2149 {
|
|
2150 BUILTIN_VDQF (UNOP, abs, 2)
|
|
2151 return fold_build1 (ABS_EXPR, type, args[0]);
|
|
2152 VAR1 (UNOP, floatv2si, 2, v2sf)
|
|
2153 VAR1 (UNOP, floatv4si, 2, v4sf)
|
|
2154 VAR1 (UNOP, floatv2di, 2, v2df)
|
|
2155 return fold_build1 (FLOAT_EXPR, type, args[0]);
|
|
2156 default:
|
|
2157 break;
|
|
2158 }
|
|
2159
|
|
2160 return NULL_TREE;
|
|
2161 }
|
|
2162
|
145
|
2163 /* Try to fold STMT, given that it's a call to the built-in function with
|
|
2164 subcode FCODE. Return the new statement on success and null on
|
|
2165 failure. */
|
|
2166 gimple *
|
|
2167 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
|
111
|
2168 {
|
|
2169 gimple *new_stmt = NULL;
|
145
|
2170 unsigned nargs = gimple_call_num_args (stmt);
|
|
2171 tree *args = (nargs > 0
|
|
2172 ? gimple_call_arg_ptr (stmt, 0)
|
|
2173 : &error_mark_node);
|
111
|
2174
|
145
|
2175 /* We use gimple's IFN_REDUC_(PLUS|MIN|MAX)s for float, signed int
|
|
2176 and unsigned int; it will distinguish according to the types of
|
|
2177 the arguments to the __builtin. */
|
|
2178 switch (fcode)
|
|
2179 {
|
|
2180 BUILTIN_VALL (UNOP, reduc_plus_scal_, 10)
|
|
2181 new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
|
|
2182 1, args[0]);
|
|
2183 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
|
2184 break;
|
|
2185 BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10)
|
|
2186 BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10)
|
|
2187 new_stmt = gimple_build_call_internal (IFN_REDUC_MAX,
|
|
2188 1, args[0]);
|
|
2189 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
|
2190 break;
|
|
2191 BUILTIN_VDQIF (UNOP, reduc_smin_scal_, 10)
|
|
2192 BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10)
|
|
2193 new_stmt = gimple_build_call_internal (IFN_REDUC_MIN,
|
|
2194 1, args[0]);
|
|
2195 gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
|
|
2196 break;
|
|
2197 BUILTIN_GPF (BINOP, fmulx, 0)
|
|
2198 {
|
|
2199 gcc_assert (nargs == 2);
|
|
2200 bool a0_cst_p = TREE_CODE (args[0]) == REAL_CST;
|
|
2201 bool a1_cst_p = TREE_CODE (args[1]) == REAL_CST;
|
|
2202 if (a0_cst_p || a1_cst_p)
|
111
|
2203 {
|
145
|
2204 if (a0_cst_p && a1_cst_p)
|
111
|
2205 {
|
145
|
2206 tree t0 = TREE_TYPE (args[0]);
|
|
2207 real_value a0 = (TREE_REAL_CST (args[0]));
|
|
2208 real_value a1 = (TREE_REAL_CST (args[1]));
|
|
2209 if (real_equal (&a1, &dconst0))
|
|
2210 std::swap (a0, a1);
|
|
2211 /* According to real_equal (), +0 equals -0. */
|
|
2212 if (real_equal (&a0, &dconst0) && real_isinf (&a1))
|
111
|
2213 {
|
145
|
2214 real_value res = dconst2;
|
|
2215 res.sign = a0.sign ^ a1.sign;
|
|
2216 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
2217 REAL_CST,
|
|
2218 build_real (t0, res));
|
111
|
2219 }
|
145
|
2220 else
|
|
2221 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
2222 MULT_EXPR,
|
|
2223 args[0], args[1]);
|
111
|
2224 }
|
145
|
2225 else /* a0_cst_p ^ a1_cst_p. */
|
|
2226 {
|
|
2227 real_value const_part = a0_cst_p
|
|
2228 ? TREE_REAL_CST (args[0]) : TREE_REAL_CST (args[1]);
|
|
2229 if (!real_equal (&const_part, &dconst0)
|
|
2230 && !real_isinf (&const_part))
|
|
2231 new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
|
|
2232 MULT_EXPR, args[0],
|
|
2233 args[1]);
|
|
2234 }
|
111
|
2235 }
|
145
|
2236 if (new_stmt)
|
|
2237 {
|
|
2238 gimple_set_vuse (new_stmt, gimple_vuse (stmt));
|
|
2239 gimple_set_vdef (new_stmt, gimple_vdef (stmt));
|
|
2240 }
|
|
2241 break;
|
111
|
2242 }
|
145
|
2243 default:
|
|
2244 break;
|
111
|
2245 }
|
145
|
2246 return new_stmt;
|
111
|
2247 }
|
|
2248
|
|
2249 void
|
|
2250 aarch64_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
|
|
2251 {
|
|
2252 const unsigned AARCH64_FE_INVALID = 1;
|
|
2253 const unsigned AARCH64_FE_DIVBYZERO = 2;
|
|
2254 const unsigned AARCH64_FE_OVERFLOW = 4;
|
|
2255 const unsigned AARCH64_FE_UNDERFLOW = 8;
|
|
2256 const unsigned AARCH64_FE_INEXACT = 16;
|
|
2257 const unsigned HOST_WIDE_INT AARCH64_FE_ALL_EXCEPT = (AARCH64_FE_INVALID
|
|
2258 | AARCH64_FE_DIVBYZERO
|
|
2259 | AARCH64_FE_OVERFLOW
|
|
2260 | AARCH64_FE_UNDERFLOW
|
|
2261 | AARCH64_FE_INEXACT);
|
|
2262 const unsigned HOST_WIDE_INT AARCH64_FE_EXCEPT_SHIFT = 8;
|
|
2263 tree fenv_cr, fenv_sr, get_fpcr, set_fpcr, mask_cr, mask_sr;
|
|
2264 tree ld_fenv_cr, ld_fenv_sr, masked_fenv_cr, masked_fenv_sr, hold_fnclex_cr;
|
|
2265 tree hold_fnclex_sr, new_fenv_var, reload_fenv, restore_fnenv, get_fpsr, set_fpsr;
|
|
2266 tree update_call, atomic_feraiseexcept, hold_fnclex, masked_fenv, ld_fenv;
|
|
2267
|
|
2268 /* Generate the equivalence of :
|
|
2269 unsigned int fenv_cr;
|
|
2270 fenv_cr = __builtin_aarch64_get_fpcr ();
|
|
2271
|
|
2272 unsigned int fenv_sr;
|
|
2273 fenv_sr = __builtin_aarch64_get_fpsr ();
|
|
2274
|
|
2275 Now set all exceptions to non-stop
|
|
2276 unsigned int mask_cr
|
|
2277 = ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT);
|
|
2278 unsigned int masked_cr;
|
|
2279 masked_cr = fenv_cr & mask_cr;
|
|
2280
|
|
2281 And clear all exception flags
|
|
2282 unsigned int maske_sr = ~AARCH64_FE_ALL_EXCEPT;
|
|
2283 unsigned int masked_cr;
|
|
2284 masked_sr = fenv_sr & mask_sr;
|
|
2285
|
|
2286 __builtin_aarch64_set_cr (masked_cr);
|
|
2287 __builtin_aarch64_set_sr (masked_sr); */
|
|
2288
|
|
2289 fenv_cr = create_tmp_var_raw (unsigned_type_node);
|
|
2290 fenv_sr = create_tmp_var_raw (unsigned_type_node);
|
|
2291
|
|
2292 get_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPCR];
|
|
2293 set_fpcr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPCR];
|
|
2294 get_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_GET_FPSR];
|
|
2295 set_fpsr = aarch64_builtin_decls[AARCH64_BUILTIN_SET_FPSR];
|
|
2296
|
|
2297 mask_cr = build_int_cst (unsigned_type_node,
|
|
2298 ~(AARCH64_FE_ALL_EXCEPT << AARCH64_FE_EXCEPT_SHIFT));
|
|
2299 mask_sr = build_int_cst (unsigned_type_node,
|
|
2300 ~(AARCH64_FE_ALL_EXCEPT));
|
|
2301
|
|
2302 ld_fenv_cr = build2 (MODIFY_EXPR, unsigned_type_node,
|
|
2303 fenv_cr, build_call_expr (get_fpcr, 0));
|
|
2304 ld_fenv_sr = build2 (MODIFY_EXPR, unsigned_type_node,
|
|
2305 fenv_sr, build_call_expr (get_fpsr, 0));
|
|
2306
|
|
2307 masked_fenv_cr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_cr, mask_cr);
|
|
2308 masked_fenv_sr = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_sr, mask_sr);
|
|
2309
|
|
2310 hold_fnclex_cr = build_call_expr (set_fpcr, 1, masked_fenv_cr);
|
|
2311 hold_fnclex_sr = build_call_expr (set_fpsr, 1, masked_fenv_sr);
|
|
2312
|
|
2313 hold_fnclex = build2 (COMPOUND_EXPR, void_type_node, hold_fnclex_cr,
|
|
2314 hold_fnclex_sr);
|
|
2315 masked_fenv = build2 (COMPOUND_EXPR, void_type_node, masked_fenv_cr,
|
|
2316 masked_fenv_sr);
|
|
2317 ld_fenv = build2 (COMPOUND_EXPR, void_type_node, ld_fenv_cr, ld_fenv_sr);
|
|
2318
|
|
2319 *hold = build2 (COMPOUND_EXPR, void_type_node,
|
|
2320 build2 (COMPOUND_EXPR, void_type_node, masked_fenv, ld_fenv),
|
|
2321 hold_fnclex);
|
|
2322
|
|
2323 /* Store the value of masked_fenv to clear the exceptions:
|
|
2324 __builtin_aarch64_set_fpsr (masked_fenv_sr); */
|
|
2325
|
|
2326 *clear = build_call_expr (set_fpsr, 1, masked_fenv_sr);
|
|
2327
|
|
2328 /* Generate the equivalent of :
|
|
2329 unsigned int new_fenv_var;
|
|
2330 new_fenv_var = __builtin_aarch64_get_fpsr ();
|
|
2331
|
|
2332 __builtin_aarch64_set_fpsr (fenv_sr);
|
|
2333
|
|
2334 __atomic_feraiseexcept (new_fenv_var); */
|
|
2335
|
|
2336 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
|
|
2337 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
|
|
2338 new_fenv_var, build_call_expr (get_fpsr, 0));
|
|
2339 restore_fnenv = build_call_expr (set_fpsr, 1, fenv_sr);
|
|
2340 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
|
|
2341 update_call = build_call_expr (atomic_feraiseexcept, 1,
|
|
2342 fold_convert (integer_type_node, new_fenv_var));
|
|
2343 *update = build2 (COMPOUND_EXPR, void_type_node,
|
|
2344 build2 (COMPOUND_EXPR, void_type_node,
|
|
2345 reload_fenv, restore_fnenv), update_call);
|
|
2346 }
|
|
2347
|
145
|
2348 /* Resolve overloaded MEMTAG build-in functions. */
|
|
2349 #define AARCH64_BUILTIN_SUBCODE(F) \
|
|
2350 (DECL_MD_FUNCTION_CODE (F) >> AARCH64_BUILTIN_SHIFT)
|
|
2351
|
|
2352 static tree
|
|
2353 aarch64_resolve_overloaded_memtag (location_t loc,
|
|
2354 tree fndecl, void *pass_params)
|
|
2355 {
|
|
2356 vec<tree, va_gc> *params = static_cast<vec<tree, va_gc> *> (pass_params);
|
|
2357 unsigned param_num = params ? params->length() : 0;
|
|
2358 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (fndecl);
|
|
2359 tree inittype = aarch64_memtag_builtin_data[
|
|
2360 fcode - AARCH64_MEMTAG_BUILTIN_START - 1].ftype;
|
|
2361 unsigned arg_num = list_length (TYPE_ARG_TYPES (inittype)) - 1;
|
|
2362
|
|
2363 if (param_num != arg_num)
|
|
2364 {
|
|
2365 TREE_TYPE (fndecl) = inittype;
|
|
2366 return NULL_TREE;
|
|
2367 }
|
|
2368 tree retype = NULL;
|
|
2369
|
|
2370 if (fcode == AARCH64_MEMTAG_BUILTIN_SUBP)
|
|
2371 {
|
|
2372 tree t0 = TREE_TYPE ((*params)[0]);
|
|
2373 tree t1 = TREE_TYPE ((*params)[1]);
|
|
2374
|
|
2375 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
|
|
2376 t0 = ptr_type_node;
|
|
2377 if (t1 == error_mark_node || TREE_CODE (t1) != POINTER_TYPE)
|
|
2378 t1 = ptr_type_node;
|
|
2379
|
|
2380 if (TYPE_MODE (t0) != DImode)
|
|
2381 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
|
|
2382 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
|
|
2383
|
|
2384 if (TYPE_MODE (t1) != DImode)
|
|
2385 warning_at (loc, 1, "expected 64-bit address but argument 2 is %d-bit",
|
|
2386 (int)tree_to_shwi (DECL_SIZE ((*params)[1])));
|
|
2387
|
|
2388 retype = build_function_type_list (ptrdiff_type_node, t0, t1, NULL);
|
|
2389 }
|
|
2390 else
|
|
2391 {
|
|
2392 tree t0 = TREE_TYPE ((*params)[0]);
|
|
2393
|
|
2394 if (t0 == error_mark_node || TREE_CODE (t0) != POINTER_TYPE)
|
|
2395 {
|
|
2396 TREE_TYPE (fndecl) = inittype;
|
|
2397 return NULL_TREE;
|
|
2398 }
|
|
2399
|
|
2400 if (TYPE_MODE (t0) != DImode)
|
|
2401 warning_at (loc, 1, "expected 64-bit address but argument 1 is %d-bit",
|
|
2402 (int)tree_to_shwi (DECL_SIZE ((*params)[0])));
|
|
2403
|
|
2404 switch (fcode)
|
|
2405 {
|
|
2406 case AARCH64_MEMTAG_BUILTIN_IRG:
|
|
2407 retype = build_function_type_list (t0, t0, uint64_type_node, NULL);
|
|
2408 break;
|
|
2409 case AARCH64_MEMTAG_BUILTIN_GMI:
|
|
2410 retype = build_function_type_list (uint64_type_node, t0,
|
|
2411 uint64_type_node, NULL);
|
|
2412 break;
|
|
2413 case AARCH64_MEMTAG_BUILTIN_INC_TAG:
|
|
2414 retype = build_function_type_list (t0, t0, unsigned_type_node, NULL);
|
|
2415 break;
|
|
2416 case AARCH64_MEMTAG_BUILTIN_SET_TAG:
|
|
2417 retype = build_function_type_list (void_type_node, t0, NULL);
|
|
2418 break;
|
|
2419 case AARCH64_MEMTAG_BUILTIN_GET_TAG:
|
|
2420 retype = build_function_type_list (t0, t0, NULL);
|
|
2421 break;
|
|
2422 default:
|
|
2423 return NULL_TREE;
|
|
2424 }
|
|
2425 }
|
|
2426
|
|
2427 if (!retype || retype == error_mark_node)
|
|
2428 TREE_TYPE (fndecl) = inittype;
|
|
2429 else
|
|
2430 TREE_TYPE (fndecl) = retype;
|
|
2431
|
|
2432 return NULL_TREE;
|
|
2433 }
|
|
2434
|
|
2435 /* Called at aarch64_resolve_overloaded_builtin in aarch64-c.c. */
|
|
2436 tree
|
|
2437 aarch64_resolve_overloaded_builtin_general (location_t loc, tree function,
|
|
2438 void *pass_params)
|
|
2439 {
|
|
2440 unsigned int fcode = AARCH64_BUILTIN_SUBCODE (function);
|
|
2441
|
|
2442 if (fcode >= AARCH64_MEMTAG_BUILTIN_START
|
|
2443 && fcode <= AARCH64_MEMTAG_BUILTIN_END)
|
|
2444 return aarch64_resolve_overloaded_memtag(loc, function, pass_params);
|
|
2445
|
|
2446 return NULL_TREE;
|
|
2447 }
|
111
|
2448
|
|
2449 #undef AARCH64_CHECK_BUILTIN_MODE
|
|
2450 #undef AARCH64_FIND_FRINT_VARIANT
|
|
2451 #undef CF0
|
|
2452 #undef CF1
|
|
2453 #undef CF2
|
|
2454 #undef CF3
|
|
2455 #undef CF4
|
|
2456 #undef CF10
|
|
2457 #undef VAR1
|
|
2458 #undef VAR2
|
|
2459 #undef VAR3
|
|
2460 #undef VAR4
|
|
2461 #undef VAR5
|
|
2462 #undef VAR6
|
|
2463 #undef VAR7
|
|
2464 #undef VAR8
|
|
2465 #undef VAR9
|
|
2466 #undef VAR10
|
|
2467 #undef VAR11
|
|
2468
|
|
2469 #include "gt-aarch64-builtins.h"
|