111
|
1 /* { dg-require-effective-target vect_simd_clones } */
|
|
2 /* { dg-additional-options "-O3 -fopenmp-simd -ffast-math" } */
|
|
3 #include <stdlib.h>
|
|
4 #include "tree-vect.h"
|
|
5 #define N 64
|
|
6
|
|
7 float *px, *py;
|
|
8 float *tx, *ty;
|
|
9 float *x1, *z1, *t1, *t2;
|
|
10 int bound[N];
|
|
11
|
|
12 static void inline bar(const float cx, float cy,
|
|
13 float *vx, float *vy, int n)
|
|
14 {
|
|
15 int j;
|
|
16 for (j = 0; j < n; ++j)
|
|
17 {
|
|
18 const float dx = cx - px[j];
|
|
19 const float dy = cy - py[j];
|
|
20 *vx -= dx * tx[j];
|
|
21 *vy -= dy * ty[j];
|
|
22 }
|
|
23 }
|
|
24
|
|
25 __attribute__((noinline, noclone)) void foo1 ()
|
|
26 {
|
|
27 int i;
|
|
28 int n = bound[63];
|
|
29 #pragma omp simd
|
|
30 for (i=0; i<N; i++)
|
|
31 bar(px[i], py[i], x1+i, z1+i, n);
|
|
32 }
|
|
33
|
|
34 __attribute__((noinline, noclone)) void foo2 ()
|
|
35 {
|
|
36 volatile int i;
|
|
37 int n = bound[63];
|
|
38 for (i=0; i<N; i++)
|
|
39 bar(px[i], py[i], x1+i, z1+i, n);
|
|
40 }
|
|
41
|
|
42
|
|
43 int main()
|
|
44 {
|
|
45 float *X = (float*)malloc(N * 8 * sizeof (float));
|
|
46 int i;
|
|
47 /* check_vect (); */
|
|
48 px = &X[0];
|
|
49 py = &X[N * 1];
|
|
50 tx = &X[N * 2];
|
|
51 ty = &X[N * 3];
|
|
52 x1 = &X[N * 4];
|
|
53 z1 = &X[N * 5];
|
|
54 t1 = &X[N * 6];
|
|
55 t2 = &X[N * 7];
|
|
56
|
|
57 for (i=0; i<N; i++)
|
|
58 {
|
|
59 px[i] = (float) (i+2);
|
|
60 tx[i] = (float) (i+1);
|
|
61 py[i] = (float) (i+4);
|
|
62 ty[i] = (float) (i+3);
|
|
63 x1[i] = z1[i] = 1.0f;
|
|
64 bound[i] = i + 1;
|
|
65 }
|
|
66 foo1 (); /* vector variant. */
|
|
67 for (i=0; i<N;i++)
|
|
68 {
|
|
69 t1[i] = x1[i]; x1[i] = 1.0f;
|
|
70 t2[i] = z1[i]; z1[i] = 1.0f;
|
|
71 }
|
|
72 foo2 (); /* scalar variant. */
|
|
73 for (i=0; i<N; i++)
|
|
74 if (x1[i] != t1[i] || z1[i] != t2[i])
|
|
75 abort ();
|
|
76 return 0;
|
|
77 }
|
|
78 /* { dg-final { scan-tree-dump "OUTER LOOP VECTORIZED" "vect" } } */
|