111
|
1 /* { dg-do compile } */
|
|
2 /* { dg-options "-O2 -ftree-parallelize-loops=4 -fdump-tree-parloops2-details -fdump-tree-optimized -fno-partial-inlining" } */
|
|
3
|
|
4 #include <stdio.h>
|
|
5 #define MB 100
|
|
6 #define NA 450
|
|
7 #define MA 400
|
|
8
|
|
9 int T[MA][MB],A[MA][NA],B[MB][NA];
|
131
|
10 void __attribute__((noinline))
|
|
11 MRTRBR(int MA_1, int NA_1, int MB_1)
|
111
|
12 {
|
|
13 int i,j, t,k;
|
|
14
|
|
15 /* At the moment we are not able to hoist the loop headers out of the loop
|
|
16 nest.
|
|
17 Partial inlining needs to be disabled so we do not optimize this out
|
|
18 of the function body. */
|
|
19 if (MA_1 < 4 || NA_1 < 4 || MB_1 < 4)
|
|
20 return;
|
|
21
|
|
22 /* The outer most loop is not parallel because for different k's there
|
|
23 is write-write dependency for T[i][j]. */
|
|
24
|
131
|
25 /* The innermost loop doesn't get parallelized due to low number of
|
111
|
26 iterations. */
|
|
27
|
|
28 for (k = 3; k < NA_1; k++)
|
|
29 for (i = 3; i < MA_1; i++)
|
|
30 for (j = 3; j < MB_1; j++)
|
|
31 {
|
|
32 t = T[i][j];
|
|
33 T[i][j] = t+2+A[i][k]*B[j][k];
|
|
34 }
|
|
35 }
|
|
36 void main ()
|
|
37 {
|
|
38 int j,i;
|
|
39
|
|
40 for (i = 3; i < MA; i++)
|
|
41 for (j = 3; j < MB; j++)
|
131
|
42 {
|
|
43 __asm__ volatile ("" : : : "memory");
|
|
44 T[i][j] = (i>j?i:j);
|
|
45 }
|
111
|
46
|
|
47 MRTRBR (MA,NA,MB);
|
|
48
|
|
49 for (i = MA-1; i < MA; i++)
|
|
50 for (j = MB-10; j < MB; j++)
|
|
51 printf ("i %d j %d T[i][j] = %d\n",i,j,T[i][j]);
|
|
52 }
|
|
53
|
|
54
|
131
|
55 /* Check that the outer most loop doesn't get parallelized. */
|
111
|
56
|
131
|
57 /* { dg-final { scan-tree-dump-times "SUCCESS: may be parallelized" 1 "parloops2" } } */
|
|
58 /* { dg-final { scan-tree-dump-times "__builtin_GOMP_parallel" 1 "optimized" } } */
|