changeset 1918:15e8c50ed570 draft

add cuda sample, not running
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Fri, 24 Jan 2014 07:16:26 +0900
parents 64bd56aed386
children d6e033734c12
files example/Cuda/Makefile example/Cuda/Makefile.def example/Cuda/main.cc example/Cuda/multiply.cu
diffstat 4 files changed, 123 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/Cuda/Makefile	Fri Jan 24 07:16:26 2014 +0900
@@ -0,0 +1,34 @@
+include ./Makefile.def
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE = # 除外するファイルを書く																				 
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+TASK_SRCS_TMP = $(wildcard $(TASK_DIR2)/*.cc $(TASK_DIR1)/*.cc)
+TASK_SRCS = $(filter-out $(TASK_DIR1)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
+TASK_OBJS = $(TASK_SRCS:.cc=.o)
+
+CC += $(ABI)
+
+LIBS = -I/Developer/NVIDIA/CUDA-5.5/include -F/Library/Frameworks -framework CUDA
+
+.SUFFIXES: .cc .o
+
+.cc.o:
+	$(CC) $(CFLAGS) $(LIBS) $(INCLUDE) -c $< -o $@
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS) $(TASK_OBJS)
+	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
+
+link:
+	$(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS)
+
+debug: $(TARGET)
+	sudo gdb ./$(TARGET)
+
+clean:
+	rm -f $(TARGET) $(OBJS) $(TASK_OBJS)
+	rm -f *~ \#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/Cuda/Makefile.def	Fri Jan 24 07:16:26 2014 +0900
@@ -0,0 +1,8 @@
+TARGET = multiply
+
+ABIBIT=64
+
+OPT = -g -O0
+
+CC = clang++
+CFLAGS = -Wall $(OPT)
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/Cuda/main.cc	Fri Jan 24 07:16:26 2014 +0900
@@ -0,0 +1,76 @@
+#include <stdio.h>
+
+#include <cuda.h>
+
+#define LENGTH 1000
+
+void check_data(float* A,float* B,float* C) {
+    for (int i=0; i<LENGTH; i++) {
+        if (A[i]*B[i]!=C[i]) {
+            puts("failure.");
+            return;
+        }
+    }
+    puts("success.");
+    return;
+}
+
+
+
+int main() {
+    CUdevice device;
+    CUcontext context;
+    CUmodule module;
+    CUfunction function;
+    //    CUresult result;
+
+    cuInit(0);
+    cuDeviceGet(&device, 0);
+    cuCtxCreate(&context, 0, device);
+    cuModuleLoad(&module, "multiply.cu");
+    cuModuleGetFunction(&function, module, "multiply");
+
+    float* A = new float[LENGTH];
+    float* B = new float[LENGTH];
+    float* C = new float[LENGTH];
+
+    for (int i=0; i<LENGTH; i++) {
+        A[i] = (float)(i+1000);
+        B[i] = (float)(i+1)/10.f;
+    }
+
+    CUdeviceptr devA,devB,devC;
+
+    cuMemAlloc(&devA, LENGTH*sizeof(float));
+    cuMemAlloc(&devB, LENGTH*sizeof(float));
+    cuMemAlloc(&devC, LENGTH*sizeof(float));
+
+    cuMemcpyHtoD(devA, A, LENGTH*sizeof(float));
+    cuMemcpyHtoD(devB, B, LENGTH*sizeof(float));
+    cuMemcpyHtoD(devC, C, LENGTH*sizeof(float));
+
+    cuParamSetv(function, 0, A, LENGTH*sizeof(float));
+    cuParamSetv(function, 0, B, LENGTH*sizeof(float));
+    cuParamSetv(function, 0, C, LENGTH*sizeof(float));
+
+    cuLaunchKernel(function,
+                   LENGTH, 1, 1,
+                   1, 1, 1,
+                   0, NULL, NULL, NULL);
+
+    cuMemcpyDtoH(C, devC, LENGTH*sizeof(float));
+
+    check_data(A, B, C);
+
+    delete[] A;
+    delete[] B;
+    delete[] C;
+    cuModuleUnload(module);
+    cuMemFree(devA);
+    cuMemFree(devB);
+    cuMemFree(devC);
+    
+
+    return 0;
+}
+    
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/Cuda/multiply.cu	Fri Jan 24 07:16:26 2014 +0900
@@ -0,0 +1,5 @@
+__global__ void multiply(int* A, int* B, int* C) {
+    int index = blockIdx.x * blockDim.x + threadIdx.x;
+    
+    C[index] = A[index] * B[index];
+}