Mercurial > hg > Game > Cerium
annotate example/Cuda/main.cc @ 2006:f6aa6d6a3fa2 draft
add fft using cuda, not running
author | Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp> |
---|---|
date | Tue, 03 Jun 2014 12:07:00 +0900 |
parents | c3b4083c4467 |
children | 0e2389a5ac4e |
rev | line source |
---|---|
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
1 #include <stdio.h> |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
2 #include <sys/time.h> |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
3 #include <string.h> |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
4 #include <cuda.h> |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
5 |
2006
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1983
diff
changeset
|
6 #define LENGTH 10 |
f6aa6d6a3fa2
add fft using cuda, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1983
diff
changeset
|
7 #define THREAD 10 |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
8 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
9 static double |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
10 getTime() { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
11 struct timeval tv; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
12 gettimeofday(&tv, NULL); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
13 return tv.tv_sec + (double)tv.tv_usec*1e-6; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
14 } |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
15 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
16 void |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
17 check_data(float* A, float B, float* C) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
18 for (int i=0; i<LENGTH*THREAD; i++) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
19 if (A[i]*B!=C[i]) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
20 puts("multiply failure."); |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
21 return; |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
22 } |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
23 } |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
24 puts("success."); |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
25 } |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
26 |
1919
d6e033734c12
running cuda sample
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1918
diff
changeset
|
27 void print_result(float* C) { |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
28 for (int i=0; i<LENGTH*THREAD; i++) { |
1919
d6e033734c12
running cuda sample
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1918
diff
changeset
|
29 printf("%f\n",C[i]); |
d6e033734c12
running cuda sample
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1918
diff
changeset
|
30 } |
d6e033734c12
running cuda sample
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1918
diff
changeset
|
31 } |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
32 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
33 int main(int args, char* argv[]) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
34 int num_stream = 1; // number of stream |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
35 int num_exec = 16; // number of executed kernel |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
36 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
37 for (int i=1;argv[i];i++) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
38 if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
39 num_stream = atoi(argv[++i]); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
40 } |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
41 } |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
42 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
43 // initialize and load kernel |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
44 CUdevice device; |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
45 CUcontext context; |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
46 CUmodule module; |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
47 CUfunction function; |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
48 CUstream stream[num_stream]; |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
49 |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
50 cuInit(0); |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
51 cuDeviceGet(&device, 0); |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
52 cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device); |
1919
d6e033734c12
running cuda sample
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1918
diff
changeset
|
53 cuModuleLoad(&module, "multiply.ptx"); |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
54 cuModuleGetFunction(&function, module, "multiply"); |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
55 for (int i=0;i<num_stream;i++) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
56 cuStreamCreate(&stream[i],0); |
1935
67e50779feb4
CudaScheduler is runnig.
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1925
diff
changeset
|
57 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
58 // memory allocate |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
59 CUdeviceptr devA; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
60 CUdeviceptr devB[num_exec]; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
61 CUdeviceptr devOut[num_exec]; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
62 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
63 cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float)); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
64 for (int i=0;i<num_exec;i++) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
65 cuMemAlloc(&devB[i], sizeof(float)); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
66 cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float)); |
1935
67e50779feb4
CudaScheduler is runnig.
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1925
diff
changeset
|
67 } |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
68 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
69 // input buffer |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
70 float* A = new float[LENGTH*THREAD]; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
71 float* B = new float[num_exec]; |
1923 | 72 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
73 for (int i=0; i<LENGTH*THREAD; i++) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
74 A[i] = (float)(i+1000); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
75 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
76 // output buffer |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
77 float** result = new float* [num_exec]; |
1923 | 78 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
79 for (int i=0;i<num_exec;i++) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
80 result[i] = new float[LENGTH*THREAD]; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
81 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
82 // Synchronous data transfer(host to device) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
83 cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)); |
1977 | 84 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
85 // Asynchronous data transfer(host to device) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
86 int cur = 0; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
87 |
1983
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
88 for (int i=0;i<num_exec;i++,cur++) { |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
89 if (num_stream <= cur) |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
90 cur = 0; |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
91 B[i] = (float)(i+1); |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
92 cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
93 } |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
94 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
95 cur = 0; |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
96 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
97 // Asynchronous launch kernel |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
98 for (int i=0;i<num_exec;i++,cur++) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
99 if (num_stream <= cur) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
100 cur=0; |
1977 | 101 B[i] = (float)(i+1); |
1983
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
102 //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]); |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
103 void* args[] = {&devA, &devB[i], &devOut[i]}; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
104 cuLaunchKernel(function, |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
105 LENGTH, 1, 1, |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
106 THREAD, 1, 1, |
1977 | 107 0, stream[cur], args, NULL); |
1983
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
108 //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
109 } |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
110 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
111 cur = 0; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
112 |
1935
67e50779feb4
CudaScheduler is runnig.
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1925
diff
changeset
|
113 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
114 // Asynchronous data transfer(device to host) |
1983
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
115 for (int i=0;i<num_exec;i++,cur++) { |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
116 if (num_stream <= cur) |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
117 cur = 0; |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
118 cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]); |
c3b4083c4467
fix CudaScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1977
diff
changeset
|
119 } |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
120 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
121 // wait for stream |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
122 for (int i=0;i<num_stream;i++) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
123 cuStreamSynchronize(stream[i]); |
1919
d6e033734c12
running cuda sample
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1918
diff
changeset
|
124 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
125 //printf("%0.6f\n",getTime()-start); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
126 |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
127 for (int i=0;i<num_exec;i++) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
128 check_data(A,(float)(i+1),result[i]); |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
129 |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
130 // memory release |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
131 cuMemFree(devA); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
132 for (int i=0;i<num_exec;i++) { |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
133 cuMemFree(devB[i]); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
134 cuMemFree(devOut[i]); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
135 } |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
136 for (int i=0;i<num_stream;i++) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
137 cuStreamDestroy(stream[i]); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
138 cuModuleUnload(module); |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
139 cuCtxDestroy(context); |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
140 |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
141 delete[] A; |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
142 delete[] B; |
1969
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
143 for (int i=0;i<num_exec;i++) |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
144 delete[] result[i]; |
a68dbdf9b429
fix GpuScheduler
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
1935
diff
changeset
|
145 delete[] result; |
1918
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
146 |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
147 return 0; |
15e8c50ed570
add cuda sample, not running
Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
parents:
diff
changeset
|
148 } |