annotate gcc/testsuite/c-c++-common/gomp/gridify-2.c @ 111:04ced10e8804

gcc 7
author kono
date Fri, 27 Oct 2017 22:46:09 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 /* { dg-do compile } */
kono
parents:
diff changeset
2 /* { dg-require-effective-target offload_hsa } */
kono
parents:
diff changeset
3 /* { dg-options "-fopenmp -fdump-tree-omplower-details" } */
kono
parents:
diff changeset
4
kono
parents:
diff changeset
5 #define BLOCK_SIZE 16
kono
parents:
diff changeset
6
kono
parents:
diff changeset
7
kono
parents:
diff changeset
8 void tiled_sgemm_tt(const int M, const int N, const int K, const float alpha, const float*A, const int LDA,
kono
parents:
diff changeset
9 const float*B, const int LDB, const float beta, float*C, const int LDC){
kono
parents:
diff changeset
10
kono
parents:
diff changeset
11 #pragma omp target teams map(to:A[M*K],B[K*N]) map(from:C[M*N])
kono
parents:
diff changeset
12 #pragma omp distribute collapse(2)
kono
parents:
diff changeset
13 for (int C_row_start=0 ; C_row_start < M ; C_row_start+=BLOCK_SIZE)
kono
parents:
diff changeset
14 for (int C_col_start=0 ; C_col_start < N ; C_col_start+=BLOCK_SIZE)
kono
parents:
diff changeset
15 {
kono
parents:
diff changeset
16 // Each team has a local copy of these mini matrices
kono
parents:
diff changeset
17 float As[BLOCK_SIZE][BLOCK_SIZE];
kono
parents:
diff changeset
18 float Bs[BLOCK_SIZE][BLOCK_SIZE];
kono
parents:
diff changeset
19 #pragma omp parallel
kono
parents:
diff changeset
20 {
kono
parents:
diff changeset
21 int C_row, C_col;
kono
parents:
diff changeset
22 float Cval = 0.0;
kono
parents:
diff changeset
23
kono
parents:
diff changeset
24 for (int kblock = 0; kblock < K ; kblock += BLOCK_SIZE )
kono
parents:
diff changeset
25 {
kono
parents:
diff changeset
26 #pragma omp for collapse(2)
kono
parents:
diff changeset
27 for (int row=0 ; row < BLOCK_SIZE ; row++)
kono
parents:
diff changeset
28 for (int col=0 ; col < BLOCK_SIZE ; col++)
kono
parents:
diff changeset
29 {
kono
parents:
diff changeset
30 C_row = C_row_start + row;
kono
parents:
diff changeset
31 C_col = C_col_start + col;
kono
parents:
diff changeset
32 if ((C_row < M) && (kblock + col < K))
kono
parents:
diff changeset
33 As[row][col] = A[(C_row*LDA)+ kblock + col];
kono
parents:
diff changeset
34 else
kono
parents:
diff changeset
35 As[row][col] = 0;
kono
parents:
diff changeset
36 if ((kblock + row < K) && C_col < N)
kono
parents:
diff changeset
37 Bs[row][col] = B[((kblock+row)*LDB)+ C_col];
kono
parents:
diff changeset
38 else
kono
parents:
diff changeset
39 Bs[row][col] = 0;
kono
parents:
diff changeset
40 }
kono
parents:
diff changeset
41
kono
parents:
diff changeset
42 #pragma omp for collapse(2)
kono
parents:
diff changeset
43 for (int row=0 ; row < BLOCK_SIZE ; row++)
kono
parents:
diff changeset
44 for (int col=0 ; col < BLOCK_SIZE ; col++)
kono
parents:
diff changeset
45 {
kono
parents:
diff changeset
46 for (int e = 0; e < BLOCK_SIZE; ++e)
kono
parents:
diff changeset
47 Cval += As[row][e] * Bs[e][col];
kono
parents:
diff changeset
48 }
kono
parents:
diff changeset
49 } /* End for kblock .. */
kono
parents:
diff changeset
50
kono
parents:
diff changeset
51
kono
parents:
diff changeset
52 #pragma omp for collapse(2)
kono
parents:
diff changeset
53 for (int row=0 ; row < BLOCK_SIZE ; row++)
kono
parents:
diff changeset
54 for (int col=0 ; col < BLOCK_SIZE ; col++)
kono
parents:
diff changeset
55 {
kono
parents:
diff changeset
56 C_row = C_row_start + row;
kono
parents:
diff changeset
57 C_col = C_col_start + col;
kono
parents:
diff changeset
58 if ((C_row < M) && (C_col < N))
kono
parents:
diff changeset
59 C[(C_row*LDC)+C_col] = alpha*Cval + beta*C[(C_row*LDC)+C_col];
kono
parents:
diff changeset
60
kono
parents:
diff changeset
61 }
kono
parents:
diff changeset
62 } /* end parallel */
kono
parents:
diff changeset
63 } /* end target teams distribute */
kono
parents:
diff changeset
64 }
kono
parents:
diff changeset
65
kono
parents:
diff changeset
66 /* { dg-final { scan-tree-dump "Target construct will be turned into a gridified HSA kernel" "omplower" } } */