111
|
1 /* { dg-require-effective-target vect_int } */
|
|
2
|
|
3 #include <stdarg.h>
|
|
4 #include "tree-vect.h"
|
|
5
|
|
6 #define A 3
|
|
7 #define B 4
|
|
8 #define N 256
|
|
9
|
|
10 short src[N], dst[N];
|
|
11
|
131
|
12 void foo (short * __restrict__ dst, short * __restrict__ src, int h,
|
|
13 int stride)
|
111
|
14 {
|
|
15 int i;
|
|
16 h /= 16;
|
|
17 for (i = 0; i < h; i++)
|
|
18 {
|
|
19 dst[0] = A*src[0] + B*src[1];
|
|
20 dst[1] = A*src[1] + B*src[2];
|
|
21 dst[2] = A*src[2] + B*src[3];
|
|
22 dst[3] = A*src[3] + B*src[4];
|
|
23 dst[4] = A*src[4] + B*src[5];
|
|
24 dst[5] = A*src[5] + B*src[6];
|
|
25 dst[6] = A*src[6] + B*src[7];
|
|
26 dst[7] = A*src[7] + B*src[8];
|
|
27 dst += stride;
|
|
28 src += stride;
|
131
|
29 asm volatile ("" ::: "memory");
|
111
|
30 }
|
|
31 }
|
|
32
|
|
33
|
|
34 int main (void)
|
|
35 {
|
|
36 int i;
|
|
37
|
|
38 check_vect ();
|
|
39
|
|
40 for (i = 0; i < N; i++)
|
|
41 {
|
|
42 dst[i] = 0;
|
|
43 src[i] = i;
|
|
44 }
|
|
45
|
131
|
46 foo (dst, src, N, 8);
|
111
|
47
|
|
48 for (i = 0; i < N/2; i++)
|
|
49 {
|
|
50 if (dst[i] != A * src[i] + B * src[i+1])
|
|
51 abort ();
|
|
52 }
|
|
53
|
|
54 return 0;
|
|
55 }
|
|
56
|
|
57 /* Exclude POWER8 (only POWER cpu for which vect_element_align is true)
|
|
58 because loops have vectorized before SLP gets a shot. */
|
|
59 /* { dg-final { scan-tree-dump-times "basic block vectorized" 1 "slp1" { target { { vect_int_mult && vect_element_align } && { ! powerpc*-*-* } } } } } */
|
|
60
|