145
|
1 // { dg-require-effective-target size32plus }
|
|
2 // { dg-additional-options "-fopenmp-simd" }
|
|
3 // { dg-additional-options "-mavx" { target avx_runtime } }
|
|
4 // { dg-final { scan-tree-dump-times "vectorized \[1-3] loops" 2 "vect" { target i?86-*-* x86_64-*-* } } }
|
|
5
|
|
6 #include "../../gcc.dg/vect/tree-vect.h"
|
|
7
|
|
8 int r, a[1024], b[1024], q;
|
|
9
|
|
10 #pragma omp declare reduction (foo: int: omp_out += omp_in) initializer (omp_priv = 0)
|
|
11
|
|
12 __attribute__((noipa)) void
|
|
13 foo (int *a, int *b, int &r)
|
|
14 {
|
|
15 #pragma omp simd reduction (inscan, foo:r)
|
|
16 for (int i = 0; i < 1024; i++)
|
|
17 {
|
|
18 b[i] = r;
|
|
19 #pragma omp scan exclusive(r)
|
|
20 r += a[i];
|
|
21 }
|
|
22 }
|
|
23
|
|
24 __attribute__((noipa)) int
|
|
25 bar (void)
|
|
26 {
|
|
27 int &s = q;
|
|
28 q = 0;
|
|
29 #pragma omp simd reduction (inscan, foo:s)
|
|
30 for (int i = 0; i < 1024; i++)
|
|
31 {
|
|
32 b[i] = s;
|
|
33 #pragma omp scan exclusive(s)
|
|
34 s += 2 * a[i];
|
|
35 }
|
|
36 return s;
|
|
37 }
|
|
38
|
|
39 __attribute__((noipa)) void
|
|
40 baz (int *a, int *b, int &r)
|
|
41 {
|
|
42 #pragma omp simd reduction (inscan, foo:r) if (simd: 0)
|
|
43 for (int i = 0; i < 1024; i++)
|
|
44 {
|
|
45 b[i] = r;
|
|
46 #pragma omp scan exclusive(r)
|
|
47 r += a[i];
|
|
48 }
|
|
49 }
|
|
50
|
|
51 __attribute__((noipa)) int
|
|
52 qux (void)
|
|
53 {
|
|
54 int &s = q;
|
|
55 q = 0;
|
|
56 #pragma omp simd reduction (inscan, foo:s) simdlen (1)
|
|
57 for (int i = 0; i < 1024; i++)
|
|
58 {
|
|
59 b[i] = s;
|
|
60 #pragma omp scan exclusive(s)
|
|
61 s += 2 * a[i];
|
|
62 }
|
|
63 return s;
|
|
64 }
|
|
65
|
|
66 int
|
|
67 main ()
|
|
68 {
|
|
69 int s = 0;
|
|
70 check_vect ();
|
|
71 for (int i = 0; i < 1024; ++i)
|
|
72 {
|
|
73 a[i] = i;
|
|
74 b[i] = -1;
|
|
75 asm ("" : "+g" (i));
|
|
76 }
|
|
77 foo (a, b, r);
|
|
78 if (r != 1024 * 1023 / 2)
|
|
79 abort ();
|
|
80 for (int i = 0; i < 1024; ++i)
|
|
81 {
|
|
82 if (b[i] != s)
|
|
83 abort ();
|
|
84 else
|
|
85 b[i] = 25;
|
|
86 s += i;
|
|
87 }
|
|
88 if (bar () != 1024 * 1023)
|
|
89 abort ();
|
|
90 s = 0;
|
|
91 for (int i = 0; i < 1024; ++i)
|
|
92 {
|
|
93 if (b[i] != s)
|
|
94 abort ();
|
|
95 else
|
|
96 b[i] = -1;
|
|
97 s += 2 * i;
|
|
98 }
|
|
99 r = 0;
|
|
100 baz (a, b, r);
|
|
101 if (r != 1024 * 1023 / 2)
|
|
102 abort ();
|
|
103 s = 0;
|
|
104 for (int i = 0; i < 1024; ++i)
|
|
105 {
|
|
106 if (b[i] != s)
|
|
107 abort ();
|
|
108 else
|
|
109 b[i] = -25;
|
|
110 s += i;
|
|
111 }
|
|
112 if (qux () != 1024 * 1023)
|
|
113 abort ();
|
|
114 s = 0;
|
|
115 for (int i = 0; i < 1024; ++i)
|
|
116 {
|
|
117 if (b[i] != s)
|
|
118 abort ();
|
|
119 s += 2 * i;
|
|
120 }
|
|
121 return 0;
|
|
122 }
|