annotate libgfortran/m4/matmul.m4 @ 158:494b0b89df80 default tip

...
author Shinji KONO <kono@ie.u-ryukyu.ac.jp>
date Mon, 25 May 2020 18:13:55 +0900
parents 1830386684a0
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
111
kono
parents:
diff changeset
1 `/* Implementation of the MATMUL intrinsic
145
1830386684a0 gcc-9.2.0
anatofuz
parents: 131
diff changeset
2 Copyright (C) 2002-2020 Free Software Foundation, Inc.
111
kono
parents:
diff changeset
3 Contributed by Paul Brook <paul@nowt.org>
kono
parents:
diff changeset
4
kono
parents:
diff changeset
5 This file is part of the GNU Fortran runtime library (libgfortran).
kono
parents:
diff changeset
6
kono
parents:
diff changeset
7 Libgfortran is free software; you can redistribute it and/or
kono
parents:
diff changeset
8 modify it under the terms of the GNU General Public
kono
parents:
diff changeset
9 License as published by the Free Software Foundation; either
kono
parents:
diff changeset
10 version 3 of the License, or (at your option) any later version.
kono
parents:
diff changeset
11
kono
parents:
diff changeset
12 Libgfortran is distributed in the hope that it will be useful,
kono
parents:
diff changeset
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
kono
parents:
diff changeset
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kono
parents:
diff changeset
15 GNU General Public License for more details.
kono
parents:
diff changeset
16
kono
parents:
diff changeset
17 Under Section 7 of GPL version 3, you are granted additional
kono
parents:
diff changeset
18 permissions described in the GCC Runtime Library Exception, version
kono
parents:
diff changeset
19 3.1, as published by the Free Software Foundation.
kono
parents:
diff changeset
20
kono
parents:
diff changeset
21 You should have received a copy of the GNU General Public License and
kono
parents:
diff changeset
22 a copy of the GCC Runtime Library Exception along with this program;
kono
parents:
diff changeset
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
kono
parents:
diff changeset
24 <http://www.gnu.org/licenses/>. */
kono
parents:
diff changeset
25
kono
parents:
diff changeset
26 #include "libgfortran.h"
kono
parents:
diff changeset
27 #include <string.h>
kono
parents:
diff changeset
28 #include <assert.h>'
kono
parents:
diff changeset
29
kono
parents:
diff changeset
30 include(iparm.m4)dnl
kono
parents:
diff changeset
31
kono
parents:
diff changeset
32 `#if defined (HAVE_'rtype_name`)
kono
parents:
diff changeset
33
kono
parents:
diff changeset
34 /* Prototype for the BLAS ?gemm subroutine, a pointer to which can be
kono
parents:
diff changeset
35 passed to us by the front-end, in which case we call it for large
kono
parents:
diff changeset
36 matrices. */
kono
parents:
diff changeset
37
kono
parents:
diff changeset
38 typedef void (*blas_call)(const char *, const char *, const int *, const int *,
kono
parents:
diff changeset
39 const int *, const 'rtype_name` *, const 'rtype_name` *,
kono
parents:
diff changeset
40 const int *, const 'rtype_name` *, const int *,
kono
parents:
diff changeset
41 const 'rtype_name` *, 'rtype_name` *, const int *,
kono
parents:
diff changeset
42 int, int);
kono
parents:
diff changeset
43
kono
parents:
diff changeset
44 /* The order of loops is different in the case of plain matrix
kono
parents:
diff changeset
45 multiplication C=MATMUL(A,B), and in the frequent special case where
kono
parents:
diff changeset
46 the argument A is the temporary result of a TRANSPOSE intrinsic:
kono
parents:
diff changeset
47 C=MATMUL(TRANSPOSE(A),B). Transposed temporaries are detected by
kono
parents:
diff changeset
48 looking at their strides.
kono
parents:
diff changeset
49
kono
parents:
diff changeset
50 The equivalent Fortran pseudo-code is:
kono
parents:
diff changeset
51
kono
parents:
diff changeset
52 DIMENSION A(M,COUNT), B(COUNT,N), C(M,N)
kono
parents:
diff changeset
53 IF (.NOT.IS_TRANSPOSED(A)) THEN
kono
parents:
diff changeset
54 C = 0
kono
parents:
diff changeset
55 DO J=1,N
kono
parents:
diff changeset
56 DO K=1,COUNT
kono
parents:
diff changeset
57 DO I=1,M
kono
parents:
diff changeset
58 C(I,J) = C(I,J)+A(I,K)*B(K,J)
kono
parents:
diff changeset
59 ELSE
kono
parents:
diff changeset
60 DO J=1,N
kono
parents:
diff changeset
61 DO I=1,M
kono
parents:
diff changeset
62 S = 0
kono
parents:
diff changeset
63 DO K=1,COUNT
kono
parents:
diff changeset
64 S = S+A(I,K)*B(K,J)
kono
parents:
diff changeset
65 C(I,J) = S
kono
parents:
diff changeset
66 ENDIF
kono
parents:
diff changeset
67 */
kono
parents:
diff changeset
68
kono
parents:
diff changeset
69 /* If try_blas is set to a nonzero value, then the matmul function will
kono
parents:
diff changeset
70 see if there is a way to perform the matrix multiplication by a call
kono
parents:
diff changeset
71 to the BLAS gemm function. */
kono
parents:
diff changeset
72
kono
parents:
diff changeset
73 extern void matmul_'rtype_code` ('rtype` * const restrict retarray,
kono
parents:
diff changeset
74 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
75 int blas_limit, blas_call gemm);
kono
parents:
diff changeset
76 export_proto(matmul_'rtype_code`);
kono
parents:
diff changeset
77
kono
parents:
diff changeset
78 /* Put exhaustive list of possible architectures here here, ORed together. */
kono
parents:
diff changeset
79
kono
parents:
diff changeset
80 #if defined(HAVE_AVX) || defined(HAVE_AVX2) || defined(HAVE_AVX512F)
kono
parents:
diff changeset
81
kono
parents:
diff changeset
82 #ifdef HAVE_AVX
kono
parents:
diff changeset
83 'define(`matmul_name',`matmul_'rtype_code`_avx')dnl
kono
parents:
diff changeset
84 `static void
kono
parents:
diff changeset
85 'matmul_name` ('rtype` * const restrict retarray,
kono
parents:
diff changeset
86 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
87 int blas_limit, blas_call gemm) __attribute__((__target__("avx")));
kono
parents:
diff changeset
88 static' include(matmul_internal.m4)dnl
kono
parents:
diff changeset
89 `#endif /* HAVE_AVX */
kono
parents:
diff changeset
90
kono
parents:
diff changeset
91 #ifdef HAVE_AVX2
kono
parents:
diff changeset
92 'define(`matmul_name',`matmul_'rtype_code`_avx2')dnl
kono
parents:
diff changeset
93 `static void
kono
parents:
diff changeset
94 'matmul_name` ('rtype` * const restrict retarray,
kono
parents:
diff changeset
95 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
96 int blas_limit, blas_call gemm) __attribute__((__target__("avx2,fma")));
kono
parents:
diff changeset
97 static' include(matmul_internal.m4)dnl
kono
parents:
diff changeset
98 `#endif /* HAVE_AVX2 */
kono
parents:
diff changeset
99
kono
parents:
diff changeset
100 #ifdef HAVE_AVX512F
kono
parents:
diff changeset
101 'define(`matmul_name',`matmul_'rtype_code`_avx512f')dnl
kono
parents:
diff changeset
102 `static void
kono
parents:
diff changeset
103 'matmul_name` ('rtype` * const restrict retarray,
kono
parents:
diff changeset
104 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
105 int blas_limit, blas_call gemm) __attribute__((__target__("avx512f")));
kono
parents:
diff changeset
106 static' include(matmul_internal.m4)dnl
kono
parents:
diff changeset
107 `#endif /* HAVE_AVX512F */
kono
parents:
diff changeset
108
kono
parents:
diff changeset
109 /* AMD-specifix funtions with AVX128 and FMA3/FMA4. */
kono
parents:
diff changeset
110
kono
parents:
diff changeset
111 #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
kono
parents:
diff changeset
112 'define(`matmul_name',`matmul_'rtype_code`_avx128_fma3')dnl
kono
parents:
diff changeset
113 `void
kono
parents:
diff changeset
114 'matmul_name` ('rtype` * const restrict retarray,
kono
parents:
diff changeset
115 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
116 int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma")));
kono
parents:
diff changeset
117 internal_proto('matmul_name`);
kono
parents:
diff changeset
118 #endif
kono
parents:
diff changeset
119
kono
parents:
diff changeset
120 #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
kono
parents:
diff changeset
121 'define(`matmul_name',`matmul_'rtype_code`_avx128_fma4')dnl
kono
parents:
diff changeset
122 `void
kono
parents:
diff changeset
123 'matmul_name` ('rtype` * const restrict retarray,
kono
parents:
diff changeset
124 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
125 int blas_limit, blas_call gemm) __attribute__((__target__("avx,fma4")));
kono
parents:
diff changeset
126 internal_proto('matmul_name`);
kono
parents:
diff changeset
127 #endif
kono
parents:
diff changeset
128
kono
parents:
diff changeset
129 /* Function to fall back to if there is no special processor-specific version. */
kono
parents:
diff changeset
130 'define(`matmul_name',`matmul_'rtype_code`_vanilla')dnl
kono
parents:
diff changeset
131 `static' include(matmul_internal.m4)dnl
kono
parents:
diff changeset
132
kono
parents:
diff changeset
133 `/* Compiling main function, with selection code for the processor. */
kono
parents:
diff changeset
134
kono
parents:
diff changeset
135 /* Currently, this is i386 only. Adjust for other architectures. */
kono
parents:
diff changeset
136
kono
parents:
diff changeset
137 #include <config/i386/cpuinfo.h>
kono
parents:
diff changeset
138 void matmul_'rtype_code` ('rtype` * const restrict retarray,
kono
parents:
diff changeset
139 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
140 int blas_limit, blas_call gemm)
kono
parents:
diff changeset
141 {
kono
parents:
diff changeset
142 static void (*matmul_p) ('rtype` * const restrict retarray,
kono
parents:
diff changeset
143 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
144 int blas_limit, blas_call gemm);
kono
parents:
diff changeset
145
kono
parents:
diff changeset
146 void (*matmul_fn) ('rtype` * const restrict retarray,
kono
parents:
diff changeset
147 'rtype` * const restrict a, 'rtype` * const restrict b, int try_blas,
kono
parents:
diff changeset
148 int blas_limit, blas_call gemm);
kono
parents:
diff changeset
149
kono
parents:
diff changeset
150 matmul_fn = __atomic_load_n (&matmul_p, __ATOMIC_RELAXED);
kono
parents:
diff changeset
151 if (matmul_fn == NULL)
kono
parents:
diff changeset
152 {
kono
parents:
diff changeset
153 matmul_fn = matmul_'rtype_code`_vanilla;
kono
parents:
diff changeset
154 if (__cpu_model.__cpu_vendor == VENDOR_INTEL)
kono
parents:
diff changeset
155 {
kono
parents:
diff changeset
156 /* Run down the available processors in order of preference. */
kono
parents:
diff changeset
157 #ifdef HAVE_AVX512F
kono
parents:
diff changeset
158 if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX512F))
kono
parents:
diff changeset
159 {
kono
parents:
diff changeset
160 matmul_fn = matmul_'rtype_code`_avx512f;
kono
parents:
diff changeset
161 goto store;
kono
parents:
diff changeset
162 }
kono
parents:
diff changeset
163
kono
parents:
diff changeset
164 #endif /* HAVE_AVX512F */
kono
parents:
diff changeset
165
kono
parents:
diff changeset
166 #ifdef HAVE_AVX2
kono
parents:
diff changeset
167 if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX2))
kono
parents:
diff changeset
168 && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
kono
parents:
diff changeset
169 {
kono
parents:
diff changeset
170 matmul_fn = matmul_'rtype_code`_avx2;
kono
parents:
diff changeset
171 goto store;
kono
parents:
diff changeset
172 }
kono
parents:
diff changeset
173
kono
parents:
diff changeset
174 #endif
kono
parents:
diff changeset
175
kono
parents:
diff changeset
176 #ifdef HAVE_AVX
kono
parents:
diff changeset
177 if (__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
kono
parents:
diff changeset
178 {
kono
parents:
diff changeset
179 matmul_fn = matmul_'rtype_code`_avx;
kono
parents:
diff changeset
180 goto store;
kono
parents:
diff changeset
181 }
kono
parents:
diff changeset
182 #endif /* HAVE_AVX */
kono
parents:
diff changeset
183 }
kono
parents:
diff changeset
184 else if (__cpu_model.__cpu_vendor == VENDOR_AMD)
kono
parents:
diff changeset
185 {
kono
parents:
diff changeset
186 #if defined(HAVE_AVX) && defined(HAVE_FMA3) && defined(HAVE_AVX128)
kono
parents:
diff changeset
187 if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
kono
parents:
diff changeset
188 && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA)))
kono
parents:
diff changeset
189 {
kono
parents:
diff changeset
190 matmul_fn = matmul_'rtype_code`_avx128_fma3;
kono
parents:
diff changeset
191 goto store;
kono
parents:
diff changeset
192 }
kono
parents:
diff changeset
193 #endif
kono
parents:
diff changeset
194 #if defined(HAVE_AVX) && defined(HAVE_FMA4) && defined(HAVE_AVX128)
kono
parents:
diff changeset
195 if ((__cpu_model.__cpu_features[0] & (1 << FEATURE_AVX))
kono
parents:
diff changeset
196 && (__cpu_model.__cpu_features[0] & (1 << FEATURE_FMA4)))
kono
parents:
diff changeset
197 {
kono
parents:
diff changeset
198 matmul_fn = matmul_'rtype_code`_avx128_fma4;
kono
parents:
diff changeset
199 goto store;
kono
parents:
diff changeset
200 }
kono
parents:
diff changeset
201 #endif
kono
parents:
diff changeset
202
kono
parents:
diff changeset
203 }
kono
parents:
diff changeset
204 store:
kono
parents:
diff changeset
205 __atomic_store_n (&matmul_p, matmul_fn, __ATOMIC_RELAXED);
kono
parents:
diff changeset
206 }
kono
parents:
diff changeset
207
kono
parents:
diff changeset
208 (*matmul_fn) (retarray, a, b, try_blas, blas_limit, gemm);
kono
parents:
diff changeset
209 }
kono
parents:
diff changeset
210
kono
parents:
diff changeset
211 #else /* Just the vanilla function. */
kono
parents:
diff changeset
212
kono
parents:
diff changeset
213 'define(`matmul_name',`matmul_'rtype_code)dnl
kono
parents:
diff changeset
214 define(`target_attribute',`')dnl
kono
parents:
diff changeset
215 include(matmul_internal.m4)dnl
kono
parents:
diff changeset
216 `#endif
kono
parents:
diff changeset
217 #endif
kono
parents:
diff changeset
218 '