Mercurial > hg > CbC > CbC_gcc
view gcc/testsuite/c-c++-common/goacc/nested-reductions.c @ 145:1830386684a0
gcc-9.2.0
author | anatofuz |
---|---|
date | Thu, 13 Feb 2020 11:34:05 +0900 |
parents | |
children |
line wrap: on
line source
/* Test cases of nested reduction loops that should compile cleanly. */ void acc_parallel (void) { int i, j, k, sum, diff; #pragma acc parallel { #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop collapse(2) reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop collapse(2) reduction(+:sum) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) reduction(-:diff) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(-:diff) for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } } } /* The same tests as above, but using a combined parallel loop construct. */ void acc_parallel_loop (void) { int i, j, k, l, sum, diff; #pragma acc parallel loop for (int h = 0; h < 10; ++h) { #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop collapse(2) reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop collapse(2) reduction(+:sum) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) reduction(-:diff) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(-:diff) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } } } /* The same tests as above, but now the outermost reduction clause is on the parallel region, not the outermost loop. */ void acc_parallel_reduction (void) { int i, j, k, sum, diff; #pragma acc parallel reduction(+:sum) { for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; for (i = 0; i < 10; i++) #pragma acc loop for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) reduction(-:diff) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(-:diff) for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(-:diff) for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } } } /* The same tests as above, but using a combined parallel loop construct, and the outermost reduction clause is on that one, not the outermost loop. */ void acc_parallel_loop_reduction (void) { int i, j, k, sum, diff; #pragma acc parallel loop reduction(+:sum) for (int h = 0; h < 10; ++h) { for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; for (i = 0; i < 10; i++) #pragma acc loop for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) reduction(-:diff) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(-:diff) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(-:diff) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop // { dg-warning "insufficient partitioning available to parallelize loop" } for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } } } /* The same tests as above, but inside a routine construct. */ #pragma acc routine gang void acc_routine (void) { int i, j, k, sum, diff; { #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop collapse(2) reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop collapse(2) reduction(+:sum) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) reduction(-:diff) for (i = 0; i < 10; i++) { #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(-:diff) for (j = 0; j < 10; j++) #pragma acc loop reduction(-:diff) for (k = 0; k < 10; k++) diff = 1; } } } void acc_kernels (void) { int i, j, k, sum, diff; /* FIXME: These tests are not meaningful yet because reductions in kernels regions are not supported yet. */ #pragma acc kernels { #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; #pragma acc loop reduction(+:sum) for (i = 0; i < 10; i++) #pragma acc loop reduction(+:sum) for (j = 0; j < 10; j++) #pragma acc loop reduction(+:sum) for (k = 0; k < 10; k++) sum = 1; } }