Mercurial > hg > CbC > CbC_gcc
comparison gcc/testsuite/g++.dg/pr80481.C @ 131:84e7813d76e9
gcc-8.2
author | mir3636 |
---|---|
date | Thu, 25 Oct 2018 07:37:49 +0900 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
111:04ced10e8804 | 131:84e7813d76e9 |
---|---|
1 // { dg-do compile { target { i?86-*-* x86_64-*-* } && { ! *-*-solaris* } } } | |
2 // { dg-options "-Ofast -funroll-loops -fopenmp -march=knl" } | |
3 // { dg-final { scan-assembler-not "vmovaps" } } | |
4 | |
5 #include <math.h> | |
6 | |
7 #include <xmmintrin.h> | |
8 | |
9 #define max(a, b) ( (a) > (b) ? (a) : (b) ) | |
10 | |
11 struct Sdata { | |
12 float w; | |
13 float s; | |
14 float r; | |
15 float t; | |
16 float v; | |
17 }; | |
18 extern int N1, N2, N3; | |
19 | |
20 #define func(p, up, down) ((p)*(up) + (1.0f-(p)) * (down)) | |
21 | |
22 void foo (Sdata *in, int idx, float *out) | |
23 { | |
24 float* y1 = (float*)_mm_malloc(sizeof(float) * N1,16); | |
25 float* y2 = (float*)_mm_malloc(sizeof(float) * N1,16); | |
26 float* y3 = (float*)_mm_malloc(sizeof(float) * N1,16); | |
27 float* y4 = (float*)_mm_malloc(sizeof(float) * N1,16); | |
28 | |
29 for (int k = idx; k < idx + N3; k++) { | |
30 float x1 = in[k].r; | |
31 float x2 = in[k].s; | |
32 float x3 = in[k].w; | |
33 float x4 = in[k].v; | |
34 float x5 = in[k].t; | |
35 x5 /= N2; | |
36 float u = exp(x4 * sqrt(x5)); | |
37 float d = exp(-x4 * sqrt(x5)); | |
38 float a = exp(x1 * x5); | |
39 float m = exp(-x1 * x5); | |
40 float p = (a - d) / (u - d); | |
41 y2[0] = x2; | |
42 y3[0] = float(1.f); | |
43 for (int i = 1; i <= N2; i++) { | |
44 y2[i] = u * y2[i - 1]; | |
45 y3[i] = d * y3[i - 1]; | |
46 } | |
47 #pragma omp simd | |
48 for (int i = 0; i <= N2; i++) { | |
49 y1[i] = | |
50 max((x3 - y2[N2 - i] * y3[i]), float(0.f)); | |
51 } | |
52 for (int i = N2 - 1; i >= 0; i--) { | |
53 #pragma omp simd | |
54 for (int j = 0; j <= i; j++) { | |
55 y4[j] = func(p,y1[j],y1[j+1]) * m; | |
56 } | |
57 #pragma omp simd | |
58 for (int j = 0; j <= i; j++) { | |
59 float t1 = y2[i - j] * y3[j]; | |
60 float t2 = max(x3 - t1, float(0.f)); | |
61 y1[j] = max(t2, y4[j]); | |
62 } | |
63 } | |
64 out[k] = y1[0]; | |
65 } | |
66 _mm_free(y1); | |
67 _mm_free(y2); | |
68 _mm_free(y3); | |
69 _mm_free(y4); | |
70 } |