111
|
1 /* { dg-do compile } */
|
|
2 /* { dg-require-effective-target size32plus } */
|
|
3
|
|
4 typedef unsigned long long uint64_t;
|
|
5
|
|
6 #define n 4096
|
|
7 double A[n][n] __attribute__((aligned(16)));
|
|
8 double B[n][n] __attribute__((aligned(16)));
|
|
9 double C[n][n] __attribute__((aligned(16)));
|
|
10
|
|
11 #define tilesize 128
|
|
12
|
|
13 typedef double adouble __attribute__((__aligned__(16)));
|
|
14
|
|
15 void foo ()
|
|
16 {
|
|
17 int ih, jh, kh, il, kl, jl;
|
|
18 for (ih = 0; ih < n; ih += tilesize)
|
|
19 for (jh = 0; jh < n; jh += tilesize)
|
|
20 for (kh = 0; kh < n; kh += tilesize)
|
|
21 for (il = 0; il < tilesize; ++il)
|
|
22 {
|
|
23 adouble *Ap = (adouble *)&A[ih+il][kh];
|
|
24 for (kl = 0; kl < tilesize; ++kl)
|
|
25 for (jl = 0; jl < tilesize; ++jl)
|
|
26 C[ih+il][jh+jl] += Ap[kl] * B[kh+kl][jh+jl];
|
|
27 }
|
|
28 }
|