comparison gcc/testsuite/gcc.target/aarch64/advsimd-intrinsics/bf16_reinterpret.c @ 152:2b5abeee2509

update gcc11
author anatofuz
date Mon, 25 May 2020 07:50:57 +0900
parents
children
comparison
equal deleted inserted replaced
145:1830386684a0 152:2b5abeee2509
1 /* { dg-do assemble { target { aarch64*-*-* } } } */
2 /* { dg-require-effective-target arm_v8_2a_bf16_neon_ok } */
3 /* { dg-add-options arm_v8_2a_bf16_neon } */
4 /* { dg-additional-options "-save-temps" } */
5
6 #include <arm_neon.h>
7
8 float32x2_t
9 test_vbfdot_f32_s8 (float32x2_t r, int8x8_t a, int8x8_t b)
10 {
11 bfloat16x4_t _a = vreinterpret_bf16_s8(a);
12 bfloat16x4_t _b = vreinterpret_bf16_s8(b);
13
14 return vbfdot_f32 (r, _a, _b);
15 }
16
17 float32x2_t
18 test_vbfdot_f32_s16 (float32x2_t r, int16x4_t a, int16x4_t b)
19 {
20 bfloat16x4_t _a = vreinterpret_bf16_s16(a);
21 bfloat16x4_t _b = vreinterpret_bf16_s16(b);
22
23 return vbfdot_f32 (r, _a, _b);
24 }
25
26 float32x2_t
27 test_vbfdot_f32_s32 (float32x2_t r, int32x2_t a, int32x2_t b)
28 {
29 bfloat16x4_t _a = vreinterpret_bf16_s32(a);
30 bfloat16x4_t _b = vreinterpret_bf16_s32(b);
31
32 return vbfdot_f32 (r, _a, _b);
33 }
34
35 float32x2_t
36 test_vbfdot_f32_s64 (float32x2_t r, int64x1_t a, int64x1_t b)
37 {
38 bfloat16x4_t _a = vreinterpret_bf16_s64(a);
39 bfloat16x4_t _b = vreinterpret_bf16_s64(b);
40
41 return vbfdot_f32 (r, _a, _b);
42 }
43
44 float32x2_t
45 test_vbfdot_f32_u8 (float32x2_t r, uint8x8_t a, uint8x8_t b)
46 {
47 bfloat16x4_t _a = vreinterpret_bf16_u8(a);
48 bfloat16x4_t _b = vreinterpret_bf16_u8(b);
49
50 return vbfdot_f32 (r, _a, _b);
51 }
52
53 float32x2_t
54 test_vbfdot_f32_u16 (float32x2_t r, uint16x4_t a, uint16x4_t b)
55 {
56 bfloat16x4_t _a = vreinterpret_bf16_u16(a);
57 bfloat16x4_t _b = vreinterpret_bf16_u16(b);
58
59 return vbfdot_f32 (r, _a, _b);
60 }
61
62 float32x2_t
63 test_vbfdot_f32_u32 (float32x2_t r, uint32x2_t a, uint32x2_t b)
64 {
65 bfloat16x4_t _a = vreinterpret_bf16_u32(a);
66 bfloat16x4_t _b = vreinterpret_bf16_u32(b);
67
68 return vbfdot_f32 (r, _a, _b);
69 }
70
71 float32x2_t
72 test_vbfdot_f32_u64 (float32x2_t r, uint64x1_t a, uint64x1_t b)
73 {
74 bfloat16x4_t _a = vreinterpret_bf16_u64(a);
75 bfloat16x4_t _b = vreinterpret_bf16_u64(b);
76
77 return vbfdot_f32 (r, _a, _b);
78 }
79
80 float32x2_t
81 test_vbfdot_f32_p8 (float32x2_t r, poly8x8_t a, poly8x8_t b)
82 {
83 bfloat16x4_t _a = vreinterpret_bf16_p8(a);
84 bfloat16x4_t _b = vreinterpret_bf16_p8(b);
85
86 return vbfdot_f32 (r, _a, _b);
87 }
88
89 float32x2_t
90 test_vbfdot_f32_p16 (float32x2_t r, poly16x4_t a, poly16x4_t b)
91 {
92 bfloat16x4_t _a = vreinterpret_bf16_p16(a);
93 bfloat16x4_t _b = vreinterpret_bf16_p16(b);
94
95 return vbfdot_f32 (r, _a, _b);
96 }
97
98 float32x2_t
99 test_vbfdot_f32_p64 (float32x2_t r, poly64x1_t a, poly64x1_t b)
100 {
101 bfloat16x4_t _a = vreinterpret_bf16_p64(a);
102 bfloat16x4_t _b = vreinterpret_bf16_p64(b);
103
104 return vbfdot_f32 (r, _a, _b);
105 }
106
107 float32x2_t
108 test_vbfdot_f32_f16 (float32x2_t r, float16x4_t a, float16x4_t b)
109 {
110 bfloat16x4_t _a = vreinterpret_bf16_f16(a);
111 bfloat16x4_t _b = vreinterpret_bf16_f16(b);
112
113 return vbfdot_f32 (r, _a, _b);
114 }
115
116 float32x2_t
117 test_vbfdot_f32_f32 (float32x2_t r, float32x2_t a, float32x2_t b)
118 {
119 bfloat16x4_t _a = vreinterpret_bf16_f32(a);
120 bfloat16x4_t _b = vreinterpret_bf16_f32(b);
121
122 return vbfdot_f32 (r, _a, _b);
123 }
124
125 float32x2_t
126 test_vbfdot_f32_f64 (float32x2_t r, float64x1_t a, float64x1_t b)
127 {
128 bfloat16x4_t _a = vreinterpret_bf16_f64(a);
129 bfloat16x4_t _b = vreinterpret_bf16_f64(b);
130
131 return vbfdot_f32 (r, _a, _b);
132 }
133
134 float32x4_t
135 test_vbfdotq_f32_s8 (float32x4_t r, int8x16_t a, int8x16_t b)
136 {
137 bfloat16x8_t _a = vreinterpretq_bf16_s8(a);
138 bfloat16x8_t _b = vreinterpretq_bf16_s8(b);
139
140 return vbfdotq_f32 (r, _a, _b);
141 }
142
143 float32x4_t
144 test_vbfdotq_f32_s16 (float32x4_t r, int16x8_t a, int16x8_t b)
145 {
146 bfloat16x8_t _a = vreinterpretq_bf16_s16(a);
147 bfloat16x8_t _b = vreinterpretq_bf16_s16(b);
148
149 return vbfdotq_f32 (r, _a, _b);
150 }
151
152 float32x4_t
153 test_vbfdotq_f32_s32 (float32x4_t r, int32x4_t a, int32x4_t b)
154 {
155 bfloat16x8_t _a = vreinterpretq_bf16_s32(a);
156 bfloat16x8_t _b = vreinterpretq_bf16_s32(b);
157
158 return vbfdotq_f32 (r, _a, _b);
159 }
160
161 float32x4_t
162 test_vbfdotq_f32_s64 (float32x4_t r, int64x2_t a, int64x2_t b)
163 {
164 bfloat16x8_t _a = vreinterpretq_bf16_s64(a);
165 bfloat16x8_t _b = vreinterpretq_bf16_s64(b);
166
167 return vbfdotq_f32 (r, _a, _b);
168 }
169
170 float32x4_t
171 test_vbfdotq_f32_u8 (float32x4_t r, uint8x16_t a, uint8x16_t b)
172 {
173 bfloat16x8_t _a = vreinterpretq_bf16_u8(a);
174 bfloat16x8_t _b = vreinterpretq_bf16_u8(b);
175
176 return vbfdotq_f32 (r, _a, _b);
177 }
178
179 float32x4_t
180 test_vbfdotq_f32_u16 (float32x4_t r, uint16x8_t a, uint16x8_t b)
181 {
182 bfloat16x8_t _a = vreinterpretq_bf16_u16(a);
183 bfloat16x8_t _b = vreinterpretq_bf16_u16(b);
184
185 return vbfdotq_f32 (r, _a, _b);
186 }
187
188 float32x4_t
189 test_vbfdotq_f32_u32 (float32x4_t r, uint32x4_t a, uint32x4_t b)
190 {
191 bfloat16x8_t _a = vreinterpretq_bf16_u32(a);
192 bfloat16x8_t _b = vreinterpretq_bf16_u32(b);
193
194 return vbfdotq_f32 (r, _a, _b);
195 }
196
197 float32x4_t
198 test_vbfdotq_f32_u64 (float32x4_t r, uint64x2_t a, uint64x2_t b)
199 {
200 bfloat16x8_t _a = vreinterpretq_bf16_u64(a);
201 bfloat16x8_t _b = vreinterpretq_bf16_u64(b);
202
203 return vbfdotq_f32 (r, _a, _b);
204 }
205
206 float32x4_t
207 test_vbfdotq_f32_p8 (float32x4_t r, poly8x16_t a, poly8x16_t b)
208 {
209 bfloat16x8_t _a = vreinterpretq_bf16_p8(a);
210 bfloat16x8_t _b = vreinterpretq_bf16_p8(b);
211
212 return vbfdotq_f32 (r, _a, _b);
213 }
214
215 float32x4_t
216 test_vbfdotq_f32_p16 (float32x4_t r, poly16x8_t a, poly16x8_t b)
217 {
218 bfloat16x8_t _a = vreinterpretq_bf16_p16(a);
219 bfloat16x8_t _b = vreinterpretq_bf16_p16(b);
220
221 return vbfdotq_f32 (r, _a, _b);
222 }
223
224 float32x4_t
225 test_vbfdotq_f32_p64 (float32x4_t r, poly64x2_t a, poly64x2_t b)
226 {
227 bfloat16x8_t _a = vreinterpretq_bf16_p64(a);
228 bfloat16x8_t _b = vreinterpretq_bf16_p64(b);
229
230 return vbfdotq_f32 (r, _a, _b);
231 }
232
233 float32x4_t
234 test_vbfdotq_f32_p128 (float32x4_t r, poly128_t a, poly128_t b)
235 {
236 bfloat16x8_t _a = vreinterpretq_bf16_p128(a);
237 bfloat16x8_t _b = vreinterpretq_bf16_p128(b);
238
239 return vbfdotq_f32 (r, _a, _b);
240 }
241
242 float32x4_t
243 test_vbfdotq_f32_f16 (float32x4_t r, float16x8_t a, float16x8_t b)
244 {
245 bfloat16x8_t _a = vreinterpretq_bf16_f16(a);
246 bfloat16x8_t _b = vreinterpretq_bf16_f16(b);
247
248 return vbfdotq_f32 (r, _a, _b);
249 }
250
251 float32x4_t
252 test_vbfdotq_f32_f32 (float32x4_t r, float32x4_t a, float32x4_t b)
253 {
254 bfloat16x8_t _a = vreinterpretq_bf16_f32(a);
255 bfloat16x8_t _b = vreinterpretq_bf16_f32(b);
256
257 return vbfdotq_f32 (r, _a, _b);
258 }
259
260 float32x4_t
261 test_vbfdotq_f32_f64 (float32x4_t r, float64x2_t a, float64x2_t b)
262 {
263 bfloat16x8_t _a = vreinterpretq_bf16_f64(a);
264 bfloat16x8_t _b = vreinterpretq_bf16_f64(b);
265
266 return vbfdotq_f32 (r, _a, _b);
267 }
268
269 /* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.2s, v[0-9]+.4h, v[0-9]+.4h} 14 } } */
270 /* { dg-final { scan-assembler-times {bfdot\tv[0-9]+.4s, v[0-9]+.8h, v[0-9]+.8h} 15 } } */
271
272 int8x8_t test_vreinterpret_s8_bf16 (bfloat16x4_t a, int8x8_t b)
273 {
274 int8x8_t _a = vreinterpret_s8_bf16 (a);
275 return vadd_s8 (_a, b);
276 }
277
278 int16x4_t test_vreinterpret_s16_bf16 (bfloat16x4_t a, int16x4_t b)
279 {
280 int16x4_t _a = vreinterpret_s16_bf16 (a);
281 return vadd_s16 (_a, b);
282 }
283
284 int32x2_t test_vreinterpret_s32_bf16 (bfloat16x4_t a, int32x2_t b)
285 {
286 int32x2_t _a = vreinterpret_s32_bf16 (a);
287 return vadd_s32 (_a, b);
288 }
289
290 int64x1_t test_vreinterpret_s64_bf16 (bfloat16x4_t a, int64x1_t b)
291 {
292 int64x1_t _a = vreinterpret_s64_bf16 (a);
293 return vrshl_s64 (_a, b);
294 }
295
296 uint8x8_t test_vreinterpret_u8_bf16 (bfloat16x4_t a, uint8x8_t b)
297 {
298 uint8x8_t _a = vreinterpret_u8_bf16 (a);
299 return vadd_u8 (_a, b);
300 }
301
302 uint16x4_t test_vreinterpret_u16_bf16 (bfloat16x4_t a, uint16x4_t b)
303 {
304 uint16x4_t _a = vreinterpret_u16_bf16 (a);
305 return vadd_u16 (_a, b);
306 }
307
308 uint32x2_t test_vreinterpret_u32_bf16 (bfloat16x4_t a, uint32x2_t b)
309 {
310 uint32x2_t _a = vreinterpret_u32_bf16 (a);
311 return vadd_u32 (_a, b);
312 }
313
314 uint64x1_t test_vreinterpret_u64_bf16 (bfloat16x4_t a, int64x1_t b)
315 {
316 uint64x1_t _a = vreinterpret_u64_bf16 (a);
317 return vrshl_u64 (_a, b);
318 }
319
320 poly8x8_t test_vreinterpret_p8_bf16 (bfloat16x4_t a, poly8x8_t b)
321 {
322 poly8x8_t _a = vreinterpret_p8_bf16 (a);
323 return vzip1_p8 (_a, b);
324 }
325
326 poly16x4_t test_vreinterpret_p16_bf16 (bfloat16x4_t a, poly16x4_t b)
327 {
328 poly16x4_t _a = vreinterpret_p16_bf16 (a);
329 return vzip1_p16 (_a, b);
330 }
331
332 poly64x1_t test_vreinterpret_p64_bf16 (bfloat16x4_t a, poly64x1_t b)
333 {
334 poly64x1_t _a = vreinterpret_p64_bf16 (a);
335 return vsli_n_p64 (_a, b, 3);
336 }
337
338 float32x2_t test_vreinterpret_f32_bf16 (bfloat16x4_t a, float32x2_t b)
339 {
340 float32x2_t _a = vreinterpret_f32_bf16 (a);
341 return vsub_f32 (_a, b);
342 }
343
344 float64x1_t test_vreinterpret_f64_bf16 (bfloat16x4_t a, float64x1_t b)
345 {
346 float64x1_t _a = vreinterpret_f64_bf16 (a);
347 return vsub_f64 (_a, b);
348 }
349
350 int8x16_t test_vreinterpretq_s8_bf16 (bfloat16x8_t a, int8x16_t b)
351 {
352 int8x16_t _a = vreinterpretq_s8_bf16 (a);
353 return vaddq_s8 (_a, b);
354 }
355
356 int16x8_t test_vreinterpretq_s16_bf16 (bfloat16x8_t a, int16x8_t b)
357 {
358 int16x8_t _a = vreinterpretq_s16_bf16 (a);
359 return vaddq_s16 (_a, b);
360 }
361
362 int32x4_t test_vreinterpretq_s32_bf16 (bfloat16x8_t a, int32x4_t b)
363 {
364 int32x4_t _a = vreinterpretq_s32_bf16 (a);
365 return vaddq_s32 (_a, b);
366 }
367
368 int64x2_t test_vreinterpretq_s64_bf16 (bfloat16x8_t a, int64x2_t b)
369 {
370 int64x2_t _a = vreinterpretq_s64_bf16 (a);
371 return vaddq_s64 (_a, b);
372 }
373
374 uint8x16_t test_vreinterpretq_u8_bf16 (bfloat16x8_t a, uint8x16_t b)
375 {
376 uint8x16_t _a = vreinterpretq_u8_bf16 (a);
377 return vaddq_u8 (_a, b);
378 }
379
380 uint16x8_t test_vreinterpretq_u16_bf16 (bfloat16x8_t a, uint16x8_t b)
381 {
382 uint16x8_t _a = vreinterpretq_u16_bf16 (a);
383 return vaddq_u16 (_a, b);
384 }
385
386 uint32x4_t test_vreinterpretq_u32_bf16 (bfloat16x8_t a, uint32x4_t b)
387 {
388 uint32x4_t _a = vreinterpretq_u32_bf16 (a);
389 return vaddq_u32 (_a, b);
390 }
391
392 uint64x2_t test_vreinterpretq_u64_bf16 (bfloat16x8_t a, uint64x2_t b)
393 {
394 uint64x2_t _a = vreinterpretq_u64_bf16 (a);
395 return vaddq_u64 (_a, b);
396 }
397
398 poly8x16_t test_vreinterpretq_p8_bf16 (bfloat16x8_t a, poly8x16_t b)
399 {
400 poly8x16_t _a = vreinterpretq_p8_bf16 (a);
401 return vzip1q_p8 (_a, b);
402 }
403
404 poly16x8_t test_vreinterpretq_p16_bf16 (bfloat16x8_t a, poly16x8_t b)
405 {
406 poly16x8_t _a = vreinterpretq_p16_bf16 (a);
407 return vzip1q_p16 (_a, b);
408 }
409
410 poly64x2_t test_vreinterpretq_p64_bf16 (bfloat16x8_t a, poly64x2_t b)
411 {
412 poly64x2_t _a = vreinterpretq_p64_bf16 (a);
413 return vsliq_n_p64 (_a, b, 3);
414 }
415
416 poly128_t test_vreinterpretq_p128_bf16 (bfloat16x8_t a, poly16x8_t b)
417 {
418 poly128_t _a = vreinterpretq_p128_bf16 (a);
419 return _a;
420 }
421
422 float32x4_t test_vreinterpretq_f32_bf16 (bfloat16x8_t a, float32x4_t b)
423 {
424 float32x4_t _a = vreinterpretq_f32_bf16 (a);
425 return vsubq_f32 (_a, b);
426 }
427
428 float64x2_t test_vreinterpretq_f64_bf16 (bfloat16x8_t a, float64x2_t b)
429 {
430 float64x2_t _a = vreinterpretq_f64_bf16 (a);
431 return vsubq_f64 (_a, b);
432 }
433
434 float16x4_t test_vreinterpret_f16_bf16 (bfloat16x4_t a)
435 {
436 return vreinterpret_f16_bf16 (a);
437 }
438
439 float16x8_t test_vreinterpretq_f16_bf16 (bfloat16x8_t a)
440 {
441 return vreinterpretq_f16_bf16 (a);
442 }
443
444 /* { dg-final { scan-assembler-times {add\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} 2 } } */
445 /* { dg-final { scan-assembler-times {add\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} 2 } } */
446 /* { dg-final { scan-assembler-times {add\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} 2 } } */
447
448 /* { dg-final { scan-assembler-times {add\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} 2 } } */
449 /* { dg-final { scan-assembler-times {add\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} 2 } } */
450 /* { dg-final { scan-assembler-times {add\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} 2 } } */
451
452 /* { dg-final { scan-assembler {fsub\tv[0-9]+.2s, v[0-9]+.2s, v[0-9]+.2s} } } */
453 /* { dg-final { scan-assembler {fsub\tv[0-9]+.4s, v[0-9]+.4s, v[0-9]+.4s} } } */
454 /* { dg-final { scan-assembler {fsub\tv[0-9]+.2d, v[0-9]+.2d, v[0-9]+.2d} } } */
455 /* { dg-final { scan-assembler {fsub\td[0-9]+, d[0-9]+, d[0-9]+} } } */
456
457 /* { dg-final { scan-assembler {zip1\tv[0-9]+.8b, v[0-9]+.8b, v[0-9]+.8b} } } */
458 /* { dg-final { scan-assembler {zip1\tv[0-9]+.16b, v[0-9]+.16b, v[0-9]+.16b} } } */
459 /* { dg-final { scan-assembler {zip1\tv[0-9]+.4h, v[0-9]+.4h, v[0-9]+.4h} } } */
460 /* { dg-final { scan-assembler {zip1\tv[0-9]+.8h, v[0-9]+.8h, v[0-9]+.8h} } } */
461
462 /* { dg-final { scan-assembler {sli\tv[0-9]+.2d, v[0-9]+.2d, 3} } } */
463 /* { dg-final { scan-assembler {sli\td[0-9]+, d[0-9]+, 3} } } */
464
465 /* { dg-final { scan-assembler {urshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */
466 /* { dg-final { scan-assembler {srshl\td[0-9]+, d[0-9]+, d[0-9]+} } } */