changeset 775:97a514cf6ad3 before-simple-task

simd
author yutaka@henri.cr.ie.u-ryukyu.ac.jp
date Tue, 16 Feb 2010 15:09:54 +0900
parents 5102e1751d6f
children 4455e7b0caf3
files Renderer/Engine/spe/DrawSpan.cc
diffstat 1 files changed, 43 insertions(+), 48 deletions(-) [+]
line wrap: on
line diff
--- a/Renderer/Engine/spe/DrawSpan.cc	Tue Feb 16 15:00:50 2010 +0900
+++ b/Renderer/Engine/spe/DrawSpan.cc	Tue Feb 16 15:09:54 2010 +0900
@@ -57,40 +57,40 @@
                                   SchedTask *smanager,int x, int y, float z, int world_x, int world_y, float world_z);
 
 
-void
-normalize(vector float *v0, vector float *v1)
+
+static inline void
+normalize(vector float& v0, vector float& v1)
 {
-    float norm;
-    float ret[4] __attribute__((aligned(16)));
-    vector float *vret = (vector float *) ret;
-    *vret = spu_mul(*v0,*v1);
+  float norm;
+  vector float ret __attribute__((aligned(16))) = {0,0,0,0};
 
-    norm = (ret[0] + ret[1] + ret[2]);
-    
-    *vret = (vector float)spu_splats(norm);
-    *vret = spu_rsqrte(*vret);
-    *v0 = spu_mul(*v1,*vret);
+  ret = spu_mul(v0,v1);
+  norm = (ret[0] + ret[1] + ret[2]);
+
+  ret = (vector float)spu_splats(norm);
+  ret = spu_rsqrte(ret);
+  v0 = spu_mul(v1,ret);
 }
 
-static float
-innerProduct1(vector float *v0, vector float *v1)
+static inline float
+innerProduct1(vector float& v0, vector float& v1)
 {
 
-    float ret[4] __attribute__((aligned(16)));
-    float inner;
-    vector float *vret = (vector float *) ret;
-    *vret = spu_mul(*v0,*v1);
-    
-    inner = (ret[0] + ret[1] + ret[2]);
-    if (inner < 0) {
-      inner = 0;
-    }
-    
-    return inner;
+  vector float ret __attribute__((aligned(16))) = {0,0,0,0};
+  float inner;
+  ret = spu_mul(v0,v1);
+
+  inner = (ret[0] + ret[1] + ret[2]);
+  if (inner < 0) {
+    inner = 0;
+  }
+
+  return inner;
 }
 
 
 
+
 /**
  * テクスチャは、TEXTURE_SPLIT_PIXEL^2 のブロックに分割する
  *
@@ -433,50 +433,45 @@
 #endif
 
 
+    vector float *light_xyz = (vector float*)smanager->global_get(Light);
+
     vector float v_rgb __attribute__((aligned(16))) = {(float)rgb[0],(float)rgb[1],(float)rgb[2],0};
-    int light_rgb;
-    float normal_vector[4] __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0};
-    vector float *vnormal_vector = (vector float *) normal_vector;
-    float light_vector[4];
-    vector float *vlight_vector = (vector float *) light_vector;
-    float inner_product;
-    float *light_xyz = (float*)smanager->global_get(Light);
-    vector float *vlight_xyz = (vector float *) light_xyz;
+    vector float normal_vector __attribute__((aligned(16))) = {normal_x,normal_y,normal_z,0};
+    vector float light_vector __attribute__((aligned(16))) = {0,0,0,0};
     vector float v_inner __attribute__((aligned(16)));
-
     vector float v_world[4] __attribute__((aligned(16))) = {{world_x, world_y, -world_z, 0},
                                                             {world_x, world_y, -world_z, 0},
                                                             {world_x, world_y, -world_z, 0},
                                                             {0,       0,        0,       0}};
 
 
-    normalize(vnormal_vector, vnormal_vector);
-
+    int light_rgb;
+    float inner_product;
 
-    float tmp_rgb[4] __attribute__((aligned(16))) = {0,0,0,0};
-    vector float *vtmp_rgb = (vector float *) tmp_rgb;
+    normalize(normal_vector, normal_vector);
+
+    vector float vtmp_rgb __attribute__((aligned(16))) = {0,0,0,0};
 
     int light_num = 4;
 
     for (int i = 0; i < light_num; i++) {
 
-      *vlight_vector = spu_sub(v_world[i],vlight_xyz[i]);
-      normalize(vlight_vector, vlight_vector);
-      inner_product = innerProduct1(vnormal_vector,vlight_vector);
+      light_vector = spu_sub(v_world[i],light_xyz[i]);
+      normalize(light_vector, light_vector);
+      inner_product = innerProduct1(normal_vector,light_vector);
       v_inner = spu_splats(inner_product);
-      *vtmp_rgb = spu_madd(v_rgb,v_inner,*vtmp_rgb);
+      vtmp_rgb = spu_madd(v_rgb,v_inner,vtmp_rgb);
 
     }
 
     vector unsigned int v_flag __attribute__((aligned(16)));
-    vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255);
+    vector float max_rgb __attribute__((aligned(16))) = (vector float)spu_splats((float)255.0f);
 
-    v_flag = spu_cmpgt(max_rgb,*vtmp_rgb);
-    *vtmp_rgb = spu_sel(max_rgb,*vtmp_rgb,v_flag);
-    
-    vector unsigned int vlast_rgb __attribute__((aligned(16)));
-    vlast_rgb = spu_convtu(*vtmp_rgb,0); 
-    unsigned int *last_rgb = (unsigned int*) &vlast_rgb;
+    v_flag = spu_cmpgt(max_rgb,vtmp_rgb);
+    vtmp_rgb = spu_sel(max_rgb,vtmp_rgb,v_flag);
+
+    vector unsigned int last_rgb __attribute__((aligned(16)));
+    last_rgb = spu_convtu(vtmp_rgb,0);
 
 
     //計算した rgb を light_rgb にまとめる。