changeset 1977:126b28fdae50 draft

fix cuda sample
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Tue, 04 Mar 2014 18:11:06 +0900
parents a8f4227d6a21
children 8fbe022126e1
files example/Cuda/main.cc example/Cuda/multiply.cu
diffstat 2 files changed, 18 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/example/Cuda/main.cc	Mon Mar 03 19:12:02 2014 +0900
+++ b/example/Cuda/main.cc	Tue Mar 04 18:11:06 2014 +0900
@@ -4,7 +4,7 @@
 #include <cuda.h>
 
 #define LENGTH 10000
-#define THREAD 1000
+#define THREAD 100
 
 static double
 getTime() {
@@ -81,16 +81,16 @@
 
     // Synchronous data transfer(host to device)
     cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float));
-
+    
     // Asynchronous data transfer(host to device)
     int cur = 0;
 
-    for (int i=0;i<num_exec;i++,cur++) {
-        if (num_stream <= cur)
-            cur = 0;
-        B[i] = (float)(i+1);
-        cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
-    }
+    // for (int i=0;i<num_exec;i++,cur++) {
+    //     if (num_stream <= cur)
+    //         cur = 0;
+    //     B[i] = (float)(i+1);
+    //     cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
+    // }
 
     cur = 0;
 
@@ -98,22 +98,25 @@
     for (int i=0;i<num_exec;i++,cur++) {
         if (num_stream <= cur)
             cur=0;
+        B[i] = (float)(i+1);
+        cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
         void* args[] = {&devA, &devB[i], &devOut[i]};
         cuLaunchKernel(function,
                        LENGTH, 1, 1,
                        THREAD, 1, 1,
-                       0, 0, args, NULL);
+                       0, stream[cur], args, NULL);
+        cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
     }
 
     cur = 0;
 
     
     // Asynchronous data transfer(device to host)
-    for (int i=0;i<num_exec;i++,cur++) {
-        if (num_stream <= cur)
-            cur = 0;
-        cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
-    }
+    // for (int i=0;i<num_exec;i++,cur++) {
+    //     if (num_stream <= cur)
+    //         cur = 0;
+    //     cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
+    // }
     
     // wait for stream
     for (int i=0;i<num_stream;i++)
--- a/example/Cuda/multiply.cu	Mon Mar 03 19:12:02 2014 +0900
+++ b/example/Cuda/multiply.cu	Tue Mar 04 18:11:06 2014 +0900
@@ -4,3 +4,4 @@
         C[index] = A[index] * B[0];
     }
 }
+