131
|
1 /* { dg-do run { target arm*-*-* } } */
|
|
2 /* { dg-require-effective-target arm_neon } */
|
|
3 /* { dg-options "-O2" } */
|
|
4 /* { dg-add-options arm_neon } */
|
|
5
|
|
6 #include <arm_neon.h>
|
|
7 #include <stdlib.h>
|
|
8
|
|
9 struct __attribute__((aligned(16))) _v16u8_ {
|
|
10 uint8x16_t val;
|
|
11 _v16u8_() { }
|
|
12
|
|
13 _v16u8_( const uint8x16_t &src) { val = src; }
|
|
14 _v16u8_( const int16x8_t &src) { val = vreinterpretq_u8_s16(src); }
|
|
15 _v16u8_( const uint32x4_t &src) { val = vreinterpretq_u8_u32(src); }
|
|
16
|
|
17 operator uint8x16_t () const { return val; }
|
|
18 operator int8x16_t () const { return vreinterpretq_s8_u8 (val); }
|
|
19 operator int16x8_t () const { return vreinterpretq_s16_u8(val); }
|
|
20 operator uint32x4_t () const { return vreinterpretq_u32_u8(val); }
|
|
21 operator int32x4_t () const { return vreinterpretq_s32_u8(val); }
|
|
22 };
|
|
23 typedef struct _v16u8_ v16u8;
|
|
24 typedef const v16u8 cv16u8;
|
|
25
|
|
26 typedef v16u8 v16i8;
|
|
27 typedef v16u8 v8i16;
|
|
28 typedef v16u8 v4u32;
|
|
29
|
|
30 inline v16u8 __attribute__((always_inline)) mergelo( const v16u8 & s, const v16u8 & t )
|
|
31 {
|
|
32 uint8x8x2_t r = vzip_u8( vget_low_u8(s), vget_low_u8(t) );
|
|
33 return vcombine_u8( r.val[0], r.val[1] );
|
|
34 }
|
|
35
|
|
36 inline v8i16 __attribute__((always_inline)) unpacklo(const v16i8 & s)
|
|
37 {
|
|
38 return vmovl_s8( vget_low_s8( s ) );
|
|
39 }
|
|
40
|
|
41 const uint32_t __attribute__((aligned(16))) _InA [4] = { 0xFF020001, 0xFF020001, 0xFF000101, 0xFF000101 } ;
|
|
42 const uint32_t __attribute__((aligned(16))) _InB [4] = { 0xFF050002, 0xFF050002, 0xFF000303, 0xFF000203 } ;
|
|
43
|
|
44 __attribute__((noinline)) v16i8 test_func(void)
|
|
45 {
|
|
46 v16u8 A = vld1q_u8( (uint8_t*) _InA );
|
|
47 v16u8 B = vld1q_u8( (uint8_t*) _InB );
|
|
48 v8i16 r = vdupq_n_s16(2);
|
|
49
|
|
50 v16u8 _0 = mergelo( A, B );
|
|
51 v16u8 _1 = mergelo( B, A );
|
|
52
|
|
53 v16u8 _2 = mergelo( _0, _1 );
|
|
54 v16u8 _3 = mergelo( _1, _0 );
|
|
55
|
|
56 v8i16 _4 = vsubq_s16( unpacklo( _2 ), r );
|
|
57 v8i16 _5 = vsubq_s16( unpacklo( _3 ), r );
|
|
58
|
|
59 v8i16 ret = vaddq_s16( _4, _5 );
|
|
60
|
|
61 return ( ret );
|
|
62 }
|
|
63
|
|
64 int main (int argc, char **argv)
|
|
65 {
|
|
66 v16u8 val = test_func();
|
|
67
|
|
68 if (vgetq_lane_u32( val, 0 ) != 0xffffffff
|
|
69 || vgetq_lane_u32( val, 1 ) != 0xffffffff
|
|
70 || vgetq_lane_u32( val, 2 ) != 0xfffcfffc
|
|
71 || vgetq_lane_u32( val, 3 ) != 0xfffcfffc)
|
|
72 abort ();
|
|
73 exit (0);
|
|
74 }
|