111
|
1 /* FPU-related code for x86 and x86_64 processors.
|
145
|
2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
|
111
|
3 Contributed by Francois-Xavier Coudert <coudert@clipper.ens.fr>
|
|
4
|
|
5 This file is part of the GNU Fortran 95 runtime library (libgfortran).
|
|
6
|
|
7 Libgfortran is free software; you can redistribute it and/or
|
|
8 modify it under the terms of the GNU General Public
|
|
9 License as published by the Free Software Foundation; either
|
|
10 version 3 of the License, or (at your option) any later version.
|
|
11
|
|
12 Libgfortran is distributed in the hope that it will be useful,
|
|
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
15 GNU General Public License for more details.
|
|
16
|
|
17 Under Section 7 of GPL version 3, you are granted additional
|
|
18 permissions described in the GCC Runtime Library Exception, version
|
|
19 3.1, as published by the Free Software Foundation.
|
|
20
|
|
21 You should have received a copy of the GNU General Public License and
|
|
22 a copy of the GCC Runtime Library Exception along with this program;
|
|
23 see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
|
|
24 <http://www.gnu.org/licenses/>. */
|
|
25
|
|
26 #ifndef __SSE_MATH__
|
|
27 #include "cpuid.h"
|
|
28 #endif
|
|
29
|
|
30 static int
|
|
31 has_sse (void)
|
|
32 {
|
|
33 #ifndef __SSE_MATH__
|
|
34 unsigned int eax, ebx, ecx, edx;
|
|
35
|
|
36 if (!__get_cpuid (1, &eax, &ebx, &ecx, &edx))
|
|
37 return 0;
|
|
38
|
|
39 return edx & bit_SSE;
|
|
40 #else
|
|
41 return 1;
|
|
42 #endif
|
|
43 }
|
|
44
|
|
45 /* i387 exceptions -- see linux <fpu_control.h> header file for details. */
|
|
46 #define _FPU_MASK_IM 0x01
|
|
47 #define _FPU_MASK_DM 0x02
|
|
48 #define _FPU_MASK_ZM 0x04
|
|
49 #define _FPU_MASK_OM 0x08
|
|
50 #define _FPU_MASK_UM 0x10
|
|
51 #define _FPU_MASK_PM 0x20
|
|
52 #define _FPU_MASK_ALL 0x3f
|
|
53
|
|
54 #define _FPU_EX_ALL 0x3f
|
|
55
|
|
56 /* i387 rounding modes. */
|
|
57
|
|
58 #define _FPU_RC_NEAREST 0x0
|
|
59 #define _FPU_RC_DOWN 0x1
|
|
60 #define _FPU_RC_UP 0x2
|
|
61 #define _FPU_RC_ZERO 0x3
|
|
62
|
|
63 #define _FPU_RC_MASK 0x3
|
|
64
|
|
65 /* Enable flush to zero mode. */
|
|
66
|
|
67 #define MXCSR_FTZ (1 << 15)
|
|
68
|
|
69
|
|
70 /* This structure corresponds to the layout of the block
|
|
71 written by FSTENV. */
|
|
72 typedef struct
|
|
73 {
|
|
74 unsigned short int __control_word;
|
|
75 unsigned short int __unused1;
|
|
76 unsigned short int __status_word;
|
|
77 unsigned short int __unused2;
|
|
78 unsigned short int __tags;
|
|
79 unsigned short int __unused3;
|
|
80 unsigned int __eip;
|
|
81 unsigned short int __cs_selector;
|
|
82 unsigned short int __opcode;
|
|
83 unsigned int __data_offset;
|
|
84 unsigned short int __data_selector;
|
|
85 unsigned short int __unused5;
|
|
86 unsigned int __mxcsr;
|
|
87 }
|
|
88 my_fenv_t;
|
|
89
|
|
90 /* Check we can actually store the FPU state in the allocated size. */
|
|
91 _Static_assert (sizeof(my_fenv_t) <= (size_t) GFC_FPE_STATE_BUFFER_SIZE,
|
|
92 "GFC_FPE_STATE_BUFFER_SIZE is too small");
|
|
93
|
|
94
|
|
95 /* Raise the supported floating-point exceptions from EXCEPTS. Other
|
|
96 bits in EXCEPTS are ignored. Code originally borrowed from
|
|
97 libatomic/config/x86/fenv.c. */
|
|
98
|
|
99 static void
|
|
100 local_feraiseexcept (int excepts)
|
|
101 {
|
|
102 if (excepts & _FPU_MASK_IM)
|
|
103 {
|
|
104 float f = 0.0f;
|
|
105 #ifdef __SSE_MATH__
|
|
106 volatile float r __attribute__ ((unused));
|
|
107 __asm__ __volatile__ ("%vdivss\t{%0, %d0|%d0, %0}" : "+x" (f));
|
|
108 r = f; /* Needed to trigger exception. */
|
|
109 #else
|
|
110 __asm__ __volatile__ ("fdiv\t{%y0, %0|%0, %y0}" : "+t" (f));
|
|
111 /* No need for fwait, exception is triggered by emitted fstp. */
|
|
112 #endif
|
|
113 }
|
|
114 if (excepts & _FPU_MASK_DM)
|
|
115 {
|
|
116 my_fenv_t temp;
|
|
117 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
118 temp.__status_word |= _FPU_MASK_DM;
|
|
119 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
120 __asm__ __volatile__ ("fwait");
|
|
121 }
|
|
122 if (excepts & _FPU_MASK_ZM)
|
|
123 {
|
|
124 float f = 1.0f, g = 0.0f;
|
|
125 #ifdef __SSE_MATH__
|
|
126 volatile float r __attribute__ ((unused));
|
|
127 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
|
|
128 r = f; /* Needed to trigger exception. */
|
|
129 #else
|
|
130 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
|
|
131 /* No need for fwait, exception is triggered by emitted fstp. */
|
|
132 #endif
|
|
133 }
|
|
134 if (excepts & _FPU_MASK_OM)
|
|
135 {
|
|
136 my_fenv_t temp;
|
|
137 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
138 temp.__status_word |= _FPU_MASK_OM;
|
|
139 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
140 __asm__ __volatile__ ("fwait");
|
|
141 }
|
|
142 if (excepts & _FPU_MASK_UM)
|
|
143 {
|
|
144 my_fenv_t temp;
|
|
145 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
146 temp.__status_word |= _FPU_MASK_UM;
|
|
147 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
148 __asm__ __volatile__ ("fwait");
|
|
149 }
|
|
150 if (excepts & _FPU_MASK_PM)
|
|
151 {
|
|
152 float f = 1.0f, g = 3.0f;
|
|
153 #ifdef __SSE_MATH__
|
|
154 volatile float r __attribute__ ((unused));
|
|
155 __asm__ __volatile__ ("%vdivss\t{%1, %d0|%d0, %1}" : "+x" (f) : "xm" (g));
|
|
156 r = f; /* Needed to trigger exception. */
|
|
157 #else
|
|
158 __asm__ __volatile__ ("fdivs\t%1" : "+t" (f) : "m" (g));
|
|
159 /* No need for fwait, exception is triggered by emitted fstp. */
|
|
160 #endif
|
|
161 }
|
|
162 }
|
|
163
|
|
164
|
|
165 void
|
|
166 set_fpu_trap_exceptions (int trap, int notrap)
|
|
167 {
|
|
168 int exc_set = 0, exc_clr = 0;
|
|
169 unsigned short cw;
|
|
170
|
|
171 if (trap & GFC_FPE_INVALID) exc_set |= _FPU_MASK_IM;
|
|
172 if (trap & GFC_FPE_DENORMAL) exc_set |= _FPU_MASK_DM;
|
|
173 if (trap & GFC_FPE_ZERO) exc_set |= _FPU_MASK_ZM;
|
|
174 if (trap & GFC_FPE_OVERFLOW) exc_set |= _FPU_MASK_OM;
|
|
175 if (trap & GFC_FPE_UNDERFLOW) exc_set |= _FPU_MASK_UM;
|
|
176 if (trap & GFC_FPE_INEXACT) exc_set |= _FPU_MASK_PM;
|
|
177
|
|
178 if (notrap & GFC_FPE_INVALID) exc_clr |= _FPU_MASK_IM;
|
|
179 if (notrap & GFC_FPE_DENORMAL) exc_clr |= _FPU_MASK_DM;
|
|
180 if (notrap & GFC_FPE_ZERO) exc_clr |= _FPU_MASK_ZM;
|
|
181 if (notrap & GFC_FPE_OVERFLOW) exc_clr |= _FPU_MASK_OM;
|
|
182 if (notrap & GFC_FPE_UNDERFLOW) exc_clr |= _FPU_MASK_UM;
|
|
183 if (notrap & GFC_FPE_INEXACT) exc_clr |= _FPU_MASK_PM;
|
|
184
|
|
185 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
|
|
186
|
|
187 cw |= exc_clr;
|
|
188 cw &= ~exc_set;
|
|
189
|
|
190 __asm__ __volatile__ ("fnclex\n\tfldcw\t%0" : : "m" (cw));
|
|
191
|
|
192 if (has_sse())
|
|
193 {
|
|
194 unsigned int cw_sse;
|
|
195
|
|
196 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
197
|
|
198 /* The SSE exception masks are shifted by 7 bits. */
|
|
199 cw_sse |= (exc_clr << 7);
|
|
200 cw_sse &= ~(exc_set << 7);
|
|
201
|
|
202 /* Clear stalled exception flags. */
|
|
203 cw_sse &= ~_FPU_EX_ALL;
|
|
204
|
|
205 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
206 }
|
|
207 }
|
|
208
|
|
209 void
|
|
210 set_fpu (void)
|
|
211 {
|
|
212 set_fpu_trap_exceptions (options.fpe, 0);
|
|
213 }
|
|
214
|
|
215 int
|
|
216 get_fpu_trap_exceptions (void)
|
|
217 {
|
|
218 unsigned short cw;
|
|
219 int mask;
|
|
220 int res = 0;
|
|
221
|
|
222 __asm__ __volatile__ ("fstcw\t%0" : "=m" (cw));
|
|
223 mask = cw;
|
|
224
|
|
225 if (has_sse())
|
|
226 {
|
|
227 unsigned int cw_sse;
|
|
228
|
|
229 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
230
|
|
231 /* The SSE exception masks are shifted by 7 bits. */
|
|
232 mask |= (cw_sse >> 7);
|
|
233 }
|
|
234
|
|
235 mask = ~mask & _FPU_MASK_ALL;
|
|
236
|
|
237 if (mask & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
|
|
238 if (mask & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
|
|
239 if (mask & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
|
|
240 if (mask & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
|
|
241 if (mask & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
|
|
242 if (mask & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
|
|
243
|
|
244 return res;
|
|
245 }
|
|
246
|
|
247 int
|
|
248 support_fpu_trap (int flag __attribute__((unused)))
|
|
249 {
|
|
250 return 1;
|
|
251 }
|
|
252
|
|
253 int
|
|
254 get_fpu_except_flags (void)
|
|
255 {
|
|
256 unsigned short cw;
|
|
257 int excepts;
|
|
258 int res = 0;
|
|
259
|
|
260 __asm__ __volatile__ ("fnstsw\t%0" : "=am" (cw));
|
|
261 excepts = cw;
|
|
262
|
|
263 if (has_sse())
|
|
264 {
|
|
265 unsigned int cw_sse;
|
|
266
|
|
267 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
268 excepts |= cw_sse;
|
|
269 }
|
|
270
|
|
271 excepts &= _FPU_EX_ALL;
|
|
272
|
|
273 if (excepts & _FPU_MASK_IM) res |= GFC_FPE_INVALID;
|
|
274 if (excepts & _FPU_MASK_DM) res |= GFC_FPE_DENORMAL;
|
|
275 if (excepts & _FPU_MASK_ZM) res |= GFC_FPE_ZERO;
|
|
276 if (excepts & _FPU_MASK_OM) res |= GFC_FPE_OVERFLOW;
|
|
277 if (excepts & _FPU_MASK_UM) res |= GFC_FPE_UNDERFLOW;
|
|
278 if (excepts & _FPU_MASK_PM) res |= GFC_FPE_INEXACT;
|
|
279
|
|
280 return res;
|
|
281 }
|
|
282
|
|
283 void
|
|
284 set_fpu_except_flags (int set, int clear)
|
|
285 {
|
|
286 my_fenv_t temp;
|
|
287 int exc_set = 0, exc_clr = 0;
|
|
288
|
|
289 /* Translate from GFC_PE_* values to _FPU_MASK_* values. */
|
|
290 if (set & GFC_FPE_INVALID)
|
|
291 exc_set |= _FPU_MASK_IM;
|
|
292 if (clear & GFC_FPE_INVALID)
|
|
293 exc_clr |= _FPU_MASK_IM;
|
|
294
|
|
295 if (set & GFC_FPE_DENORMAL)
|
|
296 exc_set |= _FPU_MASK_DM;
|
|
297 if (clear & GFC_FPE_DENORMAL)
|
|
298 exc_clr |= _FPU_MASK_DM;
|
|
299
|
|
300 if (set & GFC_FPE_ZERO)
|
|
301 exc_set |= _FPU_MASK_ZM;
|
|
302 if (clear & GFC_FPE_ZERO)
|
|
303 exc_clr |= _FPU_MASK_ZM;
|
|
304
|
|
305 if (set & GFC_FPE_OVERFLOW)
|
|
306 exc_set |= _FPU_MASK_OM;
|
|
307 if (clear & GFC_FPE_OVERFLOW)
|
|
308 exc_clr |= _FPU_MASK_OM;
|
|
309
|
|
310 if (set & GFC_FPE_UNDERFLOW)
|
|
311 exc_set |= _FPU_MASK_UM;
|
|
312 if (clear & GFC_FPE_UNDERFLOW)
|
|
313 exc_clr |= _FPU_MASK_UM;
|
|
314
|
|
315 if (set & GFC_FPE_INEXACT)
|
|
316 exc_set |= _FPU_MASK_PM;
|
|
317 if (clear & GFC_FPE_INEXACT)
|
|
318 exc_clr |= _FPU_MASK_PM;
|
|
319
|
|
320
|
|
321 /* Change the flags. This is tricky on 387 (unlike SSE), because we have
|
|
322 FNSTSW but no FLDSW instruction. */
|
|
323 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (temp));
|
|
324 temp.__status_word &= ~exc_clr;
|
|
325 __asm__ __volatile__ ("fldenv\t%0" : : "m" (temp));
|
|
326
|
|
327 /* Change the flags on SSE. */
|
|
328
|
|
329 if (has_sse())
|
|
330 {
|
|
331 unsigned int cw_sse;
|
|
332
|
|
333 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
334 cw_sse &= ~exc_clr;
|
|
335 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
336 }
|
|
337
|
|
338 local_feraiseexcept (exc_set);
|
|
339 }
|
|
340
|
|
341 int
|
|
342 support_fpu_flag (int flag __attribute__((unused)))
|
|
343 {
|
|
344 return 1;
|
|
345 }
|
|
346
|
|
347 void
|
|
348 set_fpu_rounding_mode (int round)
|
|
349 {
|
|
350 int round_mode;
|
|
351 unsigned short cw;
|
|
352
|
|
353 switch (round)
|
|
354 {
|
|
355 case GFC_FPE_TONEAREST:
|
|
356 round_mode = _FPU_RC_NEAREST;
|
|
357 break;
|
|
358 case GFC_FPE_UPWARD:
|
|
359 round_mode = _FPU_RC_UP;
|
|
360 break;
|
|
361 case GFC_FPE_DOWNWARD:
|
|
362 round_mode = _FPU_RC_DOWN;
|
|
363 break;
|
|
364 case GFC_FPE_TOWARDZERO:
|
|
365 round_mode = _FPU_RC_ZERO;
|
|
366 break;
|
|
367 default:
|
|
368 return; /* Should be unreachable. */
|
|
369 }
|
|
370
|
|
371 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
|
|
372
|
|
373 /* The x87 round control bits are shifted by 10 bits. */
|
|
374 cw &= ~(_FPU_RC_MASK << 10);
|
|
375 cw |= round_mode << 10;
|
|
376
|
|
377 __asm__ __volatile__ ("fldcw\t%0" : : "m" (cw));
|
|
378
|
|
379 if (has_sse())
|
|
380 {
|
|
381 unsigned int cw_sse;
|
|
382
|
|
383 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
384
|
|
385 /* The SSE round control bits are shifted by 13 bits. */
|
|
386 cw_sse &= ~(_FPU_RC_MASK << 13);
|
|
387 cw_sse |= round_mode << 13;
|
|
388
|
|
389 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
390 }
|
|
391 }
|
|
392
|
|
393 int
|
|
394 get_fpu_rounding_mode (void)
|
|
395 {
|
|
396 int round_mode;
|
|
397
|
|
398 #ifdef __SSE_MATH__
|
|
399 unsigned int cw;
|
|
400
|
|
401 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw));
|
|
402
|
|
403 /* The SSE round control bits are shifted by 13 bits. */
|
|
404 round_mode = cw >> 13;
|
|
405 #else
|
|
406 unsigned short cw;
|
|
407
|
|
408 __asm__ __volatile__ ("fnstcw\t%0" : "=m" (cw));
|
|
409
|
|
410 /* The x87 round control bits are shifted by 10 bits. */
|
|
411 round_mode = cw >> 10;
|
|
412 #endif
|
|
413
|
|
414 round_mode &= _FPU_RC_MASK;
|
|
415
|
|
416 switch (round_mode)
|
|
417 {
|
|
418 case _FPU_RC_NEAREST:
|
|
419 return GFC_FPE_TONEAREST;
|
|
420 case _FPU_RC_UP:
|
|
421 return GFC_FPE_UPWARD;
|
|
422 case _FPU_RC_DOWN:
|
|
423 return GFC_FPE_DOWNWARD;
|
|
424 case _FPU_RC_ZERO:
|
|
425 return GFC_FPE_TOWARDZERO;
|
|
426 default:
|
|
427 return 0; /* Should be unreachable. */
|
|
428 }
|
|
429 }
|
|
430
|
|
431 int
|
|
432 support_fpu_rounding_mode (int mode __attribute__((unused)))
|
|
433 {
|
|
434 return 1;
|
|
435 }
|
|
436
|
|
437 void
|
|
438 get_fpu_state (void *state)
|
|
439 {
|
|
440 my_fenv_t *envp = state;
|
|
441
|
|
442 __asm__ __volatile__ ("fnstenv\t%0" : "=m" (*envp));
|
|
443
|
|
444 /* fnstenv has the side effect of masking all exceptions, so we need
|
|
445 to restore the control word after that. */
|
|
446 __asm__ __volatile__ ("fldcw\t%0" : : "m" (envp->__control_word));
|
|
447
|
|
448 if (has_sse())
|
|
449 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (envp->__mxcsr));
|
|
450 }
|
|
451
|
|
452 void
|
|
453 set_fpu_state (void *state)
|
|
454 {
|
|
455 my_fenv_t *envp = state;
|
|
456
|
|
457 /* glibc sources (sysdeps/x86_64/fpu/fesetenv.c) do something more
|
|
458 complex than this, but I think it suffices in our case. */
|
|
459 __asm__ __volatile__ ("fldenv\t%0" : : "m" (*envp));
|
|
460
|
|
461 if (has_sse())
|
|
462 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (envp->__mxcsr));
|
|
463 }
|
|
464
|
|
465
|
|
466 int
|
|
467 support_fpu_underflow_control (int kind)
|
|
468 {
|
|
469 if (!has_sse())
|
|
470 return 0;
|
|
471
|
|
472 return (kind == 4 || kind == 8) ? 1 : 0;
|
|
473 }
|
|
474
|
|
475
|
|
476 int
|
|
477 get_fpu_underflow_mode (void)
|
|
478 {
|
|
479 unsigned int cw_sse;
|
|
480
|
|
481 if (!has_sse())
|
|
482 return 1;
|
|
483
|
|
484 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
485
|
|
486 /* Return 0 for abrupt underflow (flush to zero), 1 for gradual underflow. */
|
|
487 return (cw_sse & MXCSR_FTZ) ? 0 : 1;
|
|
488 }
|
|
489
|
|
490
|
|
491 void
|
|
492 set_fpu_underflow_mode (int gradual)
|
|
493 {
|
|
494 unsigned int cw_sse;
|
|
495
|
|
496 if (!has_sse())
|
|
497 return;
|
|
498
|
|
499 __asm__ __volatile__ ("%vstmxcsr\t%0" : "=m" (cw_sse));
|
|
500
|
|
501 if (gradual)
|
|
502 cw_sse &= ~MXCSR_FTZ;
|
|
503 else
|
|
504 cw_sse |= MXCSR_FTZ;
|
|
505
|
|
506 __asm__ __volatile__ ("%vldmxcsr\t%0" : : "m" (cw_sse));
|
|
507 }
|
|
508
|