Mercurial > hg > CbC > CbC_gcc
comparison gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c @ 152:2b5abeee2509
update gcc11
author | anatofuz |
---|---|
date | Mon, 25 May 2020 07:50:57 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
145:1830386684a0 | 152:2b5abeee2509 |
---|---|
1 /* { dg-do assemble { target { aarch64*-*-* } } } */ | |
2 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */ | |
3 /* { dg-add-options arm_v8_2a_bf16_neon } */ | |
4 /* { dg-additional-options "-save-temps" } */ | |
5 | |
6 #include <arm_neon.h> | |
7 | |
8 float32x2_t | |
9 test_vbfdot_f32_s8 (float32x2_t r, int8x8_t a, int8x8_t b) | |
10 { | |
11 bfloat16x4_t _a = vreinterpret_bf16_s8(a); | |
12 bfloat16x4_t _b = vreinterpret_bf16_s8(b); | |
13 | |
14 return vbfdot_f32 (r, _a, _b); | |
15 } | |
16 | |
17 float32x2_t | |
18 test_vbfdot_f32_s16 (float32x2_t r, int16x4_t a, int16x4_t b) | |
19 { | |
20 bfloat16x4_t _a = vreinterpret_bf16_s16(a); | |
21 bfloat16x4_t _b = vreinterpret_bf16_s16(b); | |
22 | |
23 return vbfdot_f32 (r, _a, _b); | |
24 } | |
25 | |
26 float32x2_t | |
27 test_vbfdot_f32_s32 (float32x2_t r, int32x2_t a, int32x2_t b) | |
28 { | |
29 bfloat16x4_t _a = vreinterpret_bf16_s32(a); | |
30 bfloat16x4_t _b = vreinterpret_bf16_s32(b); | |
31 | |
32 return vbfdot_f32 (r, _a, _b); | |
33 } | |
34 | |
35 float32x2_t | |
36 test_vbfdot_f32_s64 (float32x2_t r, int64x1_t a, int64x1_t b) | |
37 { | |
38 bfloat16x4_t _a = vreinterpret_bf16_s64(a); | |
39 bfloat16x4_t _b = vreinterpret_bf16_s64(b); | |
40 | |
41 return vbfdot_f32 (r, _a, _b); | |
42 } | |
43 | |
44 float32x2_t | |
45 test_vbfdot_f32_u8 (float32x2_t r, uint8x8_t a, uint8x8_t b) | |
46 { | |
47 bfloat16x4_t _a = vreinterpret_bf16_u8(a); | |
48 bfloat16x4_t _b = vreinterpret_bf16_u8(b); | |
49 | |
50 return vbfdot_f32 (r, _a, _b); | |
51 } | |
52 | |
53 float32x2_t | |
54 test_vbfdot_f32_u16 (float32x2_t r, uint16x4_t a, uint16x4_t b) | |
55 { | |
56 bfloat16x4_t _a = vreinterpret_bf16_u16(a); | |
57 bfloat16x4_t _b = vreinterpret_bf16_u16(b); | |
58 | |
59 return vbfdot_f32 (r, _a, _b); | |
60 } | |
61 | |
62 float32x2_t | |
63 test_vbfdot_f32_u32 (float32x2_t r, uint32x2_t a, uint32x2_t b) | |
64 { | |
65 bfloat16x4_t _a = vreinterpret_bf16_u32(a); | |
66 bfloat16x4_t _b = vreinterpret_bf16_u32(b); | |
67 | |
68 return vbfdot_f32 (r, _a, _b); | |
69 } | |
70 | |
71 float32x2_t | |
72 test_vbfdot_f32_u64 (float32x2_t r, uint64x1_t a, uint64x1_t b) | |
73 { | |
74 bfloat16x4_t _a = vreinterpret_bf16_u64(a); | |
75 bfloat16x4_t _b = vreinterpret_bf16_u64(b); | |
76 | |
77 return vbfdot_f32 (r, _a, _b); | |
78 } | |
79 | |
80 float32x2_t | |
81 test_vbfdot_f32_p8 (float32x2_t r, poly8x8_t a, poly8x8_t b) | |
82 { | |
83 bfloat16x4_t _a = vreinterpret_bf16_p8(a); | |
84 bfloat16x4_t _b = vreinterpret_bf16_p8(b); | |
85 | |
86 return vbfdot_f32 (r, _a, _b); | |
87 } | |
88 | |
89 float32x2_t | |
90 test_vbfdot_f32_p16 (float32x2_t r, poly16x4_t a, poly16x4_t b) | |
91 { | |
92 bfloat16x4_t _a = vreinterpret_bf16_p16(a); | |
93 bfloat16x4_t _b = vreinterpret_bf16_p16(b); | |
94 | |
95 return vbfdot_f32 (r, _a, _b); | |
96 } | |
97 | |
98 float32x2_t | |
99 test_vbfdot_f32_p64 (float32x2_t r, poly64x1_t a, poly64x1_t b) | |
100 { | |
101 bfloat16x4_t _a = vreinterpret_bf16_p64(a); | |
102 bfloat16x4_t _b = vreinterpret_bf16_p64(b); | |
103 | |
104 return vbfdot_f32 (r, _a, _b); | |
105 } | |
106 | |
107 float32x2_t | |
108 test_vbfdot_f32_f16 (float32x2_t r, float16x4_t a, float16x4_t b) | |
109 { | |
110 bfloat16x4_t _a = vreinterpret_bf16_f16(a); | |
111 bfloat16x4_t _b = vreinterpret_bf16_f16(b); | |
112 | |
113 return vbfdot_f32 (r, _a, _b); | |
114 } | |
115 | |
116 float32x2_t | |
117 test_vbfdot_f32_f32 (float32x2_t r, float32x2_t a, float32x2_t b) | |
118 { | |
119 bfloat16x4_t _a = vreinterpret_bf16_f32(a); | |
120 bfloat16x4_t _b = vreinterpret_bf16_f32(b); | |
121 | |
122 return vbfdot_f32 (r, _a, _b); | |
123 } | |
124 | |
125 float32x2_t | |
126 test_vbfdot_f32_f64 (float32x2_t r, float64x1_t a, float64x1_t b) | |
127 { | |
128 bfloat16x4_t _a = vreinterpret_bf16_f64(a); | |
129 bfloat16x4_t _b = vreinterpret_bf16_f64(b); | |
130 | |
131 return vbfdot_f32 (r, _a, _b); | |
132 } | |
133 | |
134 float32x4_t | |
135 test_vbfdotq_f32_s8 (float32x4_t r, int8x16_t a, int8x16_t b) | |
136 { | |
137 bfloat16x8_t _a = vreinterpretq_bf16_s8(a); | |
138 bfloat16x8_t _b = vreinterpretq_bf16_s8(b); | |
139 | |
140 return vbfdotq_f32 (r, _a, _b); | |
141 } | |
142 | |
143 float32x4_t | |
144 test_vbfdotq_f32_s16 (float32x4_t r, int16x8_t a, int16x8_t b) | |
145 { | |
146 bfloat16x8_t _a = vreinterpretq_bf16_s16(a); | |
147 bfloat16x8_t _b = vreinterpretq_bf16_s16(b); | |
148 | |
149 return vbfdotq_f32 (r, _a, _b); | |
150 } | |
151 | |
152 float32x4_t | |
153 test_vbfdotq_f32_s32 (float32x4_t r, int32x4_t a, int32x4_t b) | |
154 { | |
155 bfloat16x8_t _a = vreinterpretq_bf16_s32(a); | |
156 bfloat16x8_t _b = vreinterpretq_bf16_s32(b); | |
157 | |
158 return vbfdotq_f32 (r, _a, _b); | |
159 } | |
160 | |
161 float32x4_t | |
162 test_vbfdotq_f32_s64 (float32x4_t r, int64x2_t a, int64x2_t b) | |
163 { | |
164 bfloat16x8_t _a = vreinterpretq_bf16_s64(a); | |
165 bfloat16x8_t _b = vreinterpretq_bf16_s64(b); | |
166 | |
167 return vbfdotq_f32 (r, _a, _b); | |
168 } | |
169 | |
170 float32x4_t | |
171 test_vbfdotq_f32_u8 (float32x4_t r, uint8x16_t a, uint8x16_t b) | |
172 { | |
173 bfloat16x8_t _a = vreinterpretq_bf16_u8(a); | |
174 bfloat16x8_t _b = vreinterpretq_bf16_u8(b); | |
175 | |
176 return vbfdotq_f32 (r, _a, _b); | |
177 } | |
178 | |
179 float32x4_t | |
180 test_vbfdotq_f32_u16 (float32x4_t r, uint16x8_t a, uint16x8_t b) | |
181 { | |
182 bfloat16x8_t _a = vreinterpretq_bf16_u16(a); | |
183 bfloat16x8_t _b = vreinterpretq_bf16_u16(b); | |
184 | |
185 return vbfdotq_f32 (r, _a, _b); | |
186 } | |
187 | |
188 float32x4_t | |
189 test_vbfdotq_f32_u32 (float32x4_t r, uint32x4_t a, uint32x4_t b) | |
190 { | |
191 bfloat16x8_t _a = vreinterpretq_bf16_u32(a); | |
192 bfloat16x8_t _b = vreinterpretq_bf16_u32(b); | |
193 | |
194 return vbfdotq_f32 (r, _a, _b); | |
195 } | |
196 | |
197 float32x4_t | |
198 test_vbfdotq_f32_u64 (float32x4_t r, uint64x2_t a, uint64x2_t b) | |
199 { | |
200 bfloat16x8_t _a = vreinterpretq_bf16_u64(a); | |
201 bfloat16x8_t _b = vreinterpretq_bf16_u64(b); | |
202 | |
203 return vbfdotq_f32 (r, _a, _b); | |
204 } | |
205 | |
206 float32x4_t | |
207 test_vbfdotq_f32_p8 (float32x4_t r, poly8x16_t a, poly8x16_t b) | |
208 { | |
209 bfloat16x8_t _a = vreinterpretq_bf16_p8(a); | |
210 bfloat16x8_t _b = vreinterpretq_bf16_p8(b); | |
211 | |
212 return vbfdotq_f32 (r, _a, _b); | |
213 } | |
214 | |
215 float32x4_t | |
216 test_vbfdotq_f32_p16 (float32x4_t r, poly16x8_t a, poly16x8_t b) | |
217 { | |
218 bfloat16x8_t _a = vreinterpretq_bf16_p16(a); | |
219 bfloat16x8_t _b = vreinterpretq_bf16_p16(b); | |
220 | |
221 return vbfdotq_f32 (r, _a, _b); | |
222 } | |
223 | |
224 float32x4_t | |
225 test_vbfdotq_f32_p64 (float32x4_t r, poly64x2_t a, poly64x2_t b) | |
226 { | |
227 bfloat16x8_t _a = vreinterpretq_bf16_p64(a); | |
228 bfloat16x8_t _b = vreinterpretq_bf16_p64(b); | |
229 | |
230 return vbfdotq_f32 (r, _a, _b); | |
231 } | |
232 | |
233 float32x4_t | |
234 test_vbfdotq_f32_p128 (float32x4_t r, poly128_t a, poly128_t b) | |
235 { | |
236 bfloat16x8_t _a = vreinterpretq_bf16_p128(a); | |
237 bfloat16x8_t _b = vreinterpretq_bf16_p128(b); | |
238 | |
239 return vbfdotq_f32 (r, _a, _b); | |
240 } | |
241 | |
242 float32x4_t | |
243 test_vbfdotq_f32_f16 (float32x4_t r, float16x8_t a, float16x8_t b) | |
244 { | |
245 bfloat16x8_t _a = vreinterpretq_bf16_f16(a); | |
246 bfloat16x8_t _b = vreinterpretq_bf16_f16(b); | |
247 | |
248 return vbfdotq_f32 (r, _a, _b); | |
249 } | |
250 | |
251 float32x4_t | |
252 test_vbfdotq_f32_f32 (float32x4_t r, float32x4_t a, float32x4_t b) | |
253 { | |
254 bfloat16x8_t _a = vreinterpretq_bf16_f32(a); | |
255 bfloat16x8_t _b = vreinterpretq_bf16_f32(b); | |
256 | |
257 return vbfdotq_f32 (r, _a, _b); | |
258 } | |
259 | |
260 float32x4_t | |
261 test_vbfdotq_f32_f64 (float32x4_t r, float64x2_t a, float64x2_t b) | |
262 { | |
263 bfloat16x8_t _a = vreinterpretq_bf16_f64(a); | |
264 bfloat16x8_t _b = vreinterpretq_bf16_f64(b); | |
265 | |
266 return vbfdotq_f32 (r, _a, _b); | |
267 } | |
268 | |
269 /* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} 14 } } */ | |
270 /* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.4s, v[0-9]+.8h, v[0-9]+.8h} 15 } } */ | |
271 | |
272 int8x8_t test_vreinterpret_s8_bf16 (bfloat16x4_t a, int8x8_t b) | |
273 { | |
274 int8x8_t _a = vreinterpret_s8_bf16 (a); | |
275 return vadd_s8 (_a, b); | |
276 } | |
277 | |
278 int16x4_t test_vreinterpret_s16_bf16 (bfloat16x4_t a, int16x4_t b) | |
279 { | |
280 int16x4_t _a = vreinterpret_s16_bf16 (a); | |
281 return vadd_s16 (_a, b); | |
282 } | |
283 | |
284 int32x2_t test_vreinterpret_s32_bf16 (bfloat16x4_t a, int32x2_t b) | |
285 { | |
286 int32x2_t _a = vreinterpret_s32_bf16 (a); | |
287 return vadd_s32 (_a, b); | |
288 } | |
289 | |
290 int64x1_t test_vreinterpret_s64_bf16 (bfloat16x4_t a, int64x1_t b) | |
291 { | |
292 int64x1_t _a = vreinterpret_s64_bf16 (a); | |
293 return vrshl_s64 (_a, b); | |
294 } | |
295 | |
296 uint8x8_t test_vreinterpret_u8_bf16 (bfloat16x4_t a, uint8x8_t b) | |
297 { | |
298 uint8x8_t _a = vreinterpret_u8_bf16 (a); | |
299 return vadd_u8 (_a, b); | |
300 } | |
301 | |
302 uint16x4_t test_vreinterpret_u16_bf16 (bfloat16x4_t a, uint16x4_t b) | |
303 { | |
304 uint16x4_t _a = vreinterpret_u16_bf16 (a); | |
305 return vadd_u16 (_a, b); | |
306 } | |
307 | |
308 uint32x2_t test_vreinterpret_u32_bf16 (bfloat16x4_t a, uint32x2_t b) | |
309 { | |
310 uint32x2_t _a = vreinterpret_u32_bf16 (a); | |
311 return vadd_u32 (_a, b); | |
312 } | |
313 | |
314 uint64x1_t test_vreinterpret_u64_bf16 (bfloat16x4_t a, int64x1_t b) | |
315 { | |
316 uint64x1_t _a = vreinterpret_u64_bf16 (a); | |
317 return vrshl_u64 (_a, b); | |
318 } | |
319 | |
320 poly8x8_t test_vreinterpret_p8_bf16 (bfloat16x4_t a, poly8x8_t b) | |
321 { | |
322 poly8x8_t _a = vreinterpret_p8_bf16 (a); | |
323 return vzip1_p8 (_a, b); | |
324 } | |
325 | |
326 poly16x4_t test_vreinterpret_p16_bf16 (bfloat16x4_t a, poly16x4_t b) | |
327 { | |
328 poly16x4_t _a = vreinterpret_p16_bf16 (a); | |
329 return vzip1_p16 (_a, b); | |
330 } | |
331 | |
332 poly64x1_t test_vreinterpret_p64_bf16 (bfloat16x4_t a, poly64x1_t b) | |
333 { | |
334 poly64x1_t _a = vreinterpret_p64_bf16 (a); | |
335 return vsli_n_p64 (_a, b, 3); | |
336 } | |
337 | |
338 float32x2_t test_vreinterpret_f32_bf16 (bfloat16x4_t a, float32x2_t b) | |
339 { | |
340 float32x2_t _a = vreinterpret_f32_bf16 (a); | |
341 return vsub_f32 (_a, b); | |
342 } | |
343 | |
344 float64x1_t test_vreinterpret_f64_bf16 (bfloat16x4_t a, float64x1_t b) | |
345 { | |
346 float64x1_t _a = vreinterpret_f64_bf16 (a); | |
347 return vsub_f64 (_a, b); | |
348 } | |
349 | |
350 int8x16_t test_vreinterpretq_s8_bf16 (bfloat16x8_t a, int8x16_t b) | |
351 { | |
352 int8x16_t _a = vreinterpretq_s8_bf16 (a); | |
353 return vaddq_s8 (_a, b); | |
354 } | |
355 | |
356 int16x8_t test_vreinterpretq_s16_bf16 (bfloat16x8_t a, int16x8_t b) | |
357 { | |
358 int16x8_t _a = vreinterpretq_s16_bf16 (a); | |
359 return vaddq_s16 (_a, b); | |
360 } | |
361 | |
362 int32x4_t test_vreinterpretq_s32_bf16 (bfloat16x8_t a, int32x4_t b) | |
363 { | |
364 int32x4_t _a = vreinterpretq_s32_bf16 (a); | |
365 return vaddq_s32 (_a, b); | |
366 } | |
367 | |
368 int64x2_t test_vreinterpretq_s64_bf16 (bfloat16x8_t a, int64x2_t b) | |
369 { | |
370 int64x2_t _a = vreinterpretq_s64_bf16 (a); | |
371 return vaddq_s64 (_a, b); | |
372 } | |
373 | |
374 uint8x16_t test_vreinterpretq_u8_bf16 (bfloat16x8_t a, uint8x16_t b) | |
375 { | |
376 uint8x16_t _a = vreinterpretq_u8_bf16 (a); | |
377 return vaddq_u8 (_a, b); | |
378 } | |
379 | |
380 uint16x8_t test_vreinterpretq_u16_bf16 (bfloat16x8_t a, uint16x8_t b) | |
381 { | |
382 uint16x8_t _a = vreinterpretq_u16_bf16 (a); | |
383 return vaddq_u16 (_a, b); | |
384 } | |
385 | |
386 uint32x4_t test_vreinterpretq_u32_bf16 (bfloat16x8_t a, uint32x4_t b) | |
387 { | |
388 uint32x4_t _a = vreinterpretq_u32_bf16 (a); | |
389 return vaddq_u32 (_a, b); | |
390 } | |
391 | |
392 uint64x2_t test_vreinterpretq_u64_bf16 (bfloat16x8_t a, uint64x2_t b) | |
393 { | |
394 uint64x2_t _a = vreinterpretq_u64_bf16 (a); | |
395 return vaddq_u64 (_a, b); | |
396 } | |
397 | |
398 poly8x16_t test_vreinterpretq_p8_bf16 (bfloat16x8_t a, poly8x16_t b) | |
399 { | |
400 poly8x16_t _a = vreinterpretq_p8_bf16 (a); | |
401 return vzip1q_p8 (_a, b); | |
402 } | |
403 | |
404 poly16x8_t test_vreinterpretq_p16_bf16 (bfloat16x8_t a, poly16x8_t b) | |
405 { | |
406 poly16x8_t _a = vreinterpretq_p16_bf16 (a); | |
407 return vzip1q_p16 (_a, b); | |
408 } | |
409 | |
410 poly64x2_t test_vreinterpretq_p64_bf16 (bfloat16x8_t a, poly64x2_t b) | |
411 { | |
412 poly64x2_t _a = vreinterpretq_p64_bf16 (a); | |
413 return vsliq_n_p64 (_a, b, 3); | |
414 } | |
415 | |
416 poly128_t test_vreinterpretq_p128_bf16 (bfloat16x8_t a, poly16x8_t b) | |
417 { | |
418 poly128_t _a = vreinterpretq_p128_bf16 (a); | |
419 return _a; | |
420 } | |
421 | |
422 float32x4_t test_vreinterpretq_f32_bf16 (bfloat16x8_t a, float32x4_t b) | |
423 { | |
424 float32x4_t _a = vreinterpretq_f32_bf16 (a); | |
425 return vsubq_f32 (_a, b); | |
426 } | |
427 | |
428 float64x2_t test_vreinterpretq_f64_bf16 (bfloat16x8_t a, float64x2_t b) | |
429 { | |
430 float64x2_t _a = vreinterpretq_f64_bf16 (a); | |
431 return vsubq_f64 (_a, b); | |
432 } | |
433 | |
434 float16x4_t test_vreinterpret_f16_bf16 (bfloat16x4_t a) | |
435 { | |
436 return vreinterpret_f16_bf16 (a); | |
437 } | |
438 | |
439 float16x8_t test_vreinterpretq_f16_bf16 (bfloat16x8_t a) | |
440 { | |
441 return vreinterpretq_f16_bf16 (a); | |
442 } | |
443 | |
444 /* { dg-final { scan-assembler-times {add\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} 2 } } */ | |
445 /* { dg-final { scan-assembler-times {add\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} 2 } } */ | |
446 /* { dg-final { scan-assembler-times {add\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} 2 } } */ | |
447 | |
448 /* { dg-final { scan-assembler-times {add\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} 2 } } */ | |
449 /* { dg-final { scan-assembler-times {add\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} 2 } } */ | |
450 /* { dg-final { scan-assembler-times {add\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 2 } } */ | |
451 | |
452 /* { dg-final { scan-assembler {fsub\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} } } */ | |
453 /* { dg-final { scan-assembler {fsub\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} } } */ | |
454 /* { dg-final { scan-assembler {fsub\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d} } } */ | |
455 /* { dg-final { scan-assembler {fsub\td[0-9]+, d[0-9]+, d[0-9]+} } } */ | |
456 | |
457 /* { dg-final { scan-assembler {zip1\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} } } */ | |
458 /* { dg-final { scan-assembler {zip1\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} } } */ | |
459 /* { dg-final { scan-assembler {zip1\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} } } */ | |
460 /* { dg-final { scan-assembler {zip1\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} } } */ | |
461 | |
462 /* { dg-final { scan-assembler {sli\tv[0-9]+.2d, v[0-9]+.2d, 3} } } */ | |
463 /* { dg-final { scan-assembler {sli\td[0-9]+, d[0-9]+, 3} } } */ | |
464 | |
465 /* { dg-final { scan-assembler {urshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */ | |
466 /* { dg-final { scan-assembler {srshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */ |