changeset 302:8e7926f3e271

fix CUDAWorker
author ikkun
date Mon, 13 Feb 2017 17:58:04 +0900
parents 609bf62768b9
children 1dbaef86593b
files src/parallel_execution/CMakeLists.txt src/parallel_execution/CUDAWorker.cbc src/parallel_execution/CUDAtwice.cbc src/parallel_execution/CUDAtwice.cu src/parallel_execution/GPUWorker.cbc src/parallel_execution/GPUtwice.cbc src/parallel_execution/GPUtwice.cu src/parallel_execution/context.h
diffstat 8 files changed, 112 insertions(+), 110 deletions(-) [+]
line wrap: on
line diff
--- a/src/parallel_execution/CMakeLists.txt	Sun Feb 12 12:35:11 2017 +0900
+++ b/src/parallel_execution/CMakeLists.txt	Mon Feb 13 17:58:04 2017 +0900
@@ -4,7 +4,6 @@
 
 # -DUSE_CUDA
 #  add_definitions("-Wall -g -O")
-add_definitions("-Wall -g")
 
 set(CMAKE_C_COMPILER $ENV{CBC_COMPILER})
 
@@ -12,8 +11,12 @@
     set(NVCCFLAG "-std=c++11" "-g" "-O0" )
     set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /Developer/NVIDIA/CUDA-8.0/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib") 
     find_package(CUDA REQUIRED)
+    add_definitions("-Wall -g -DUSE_CUDAWorker")
+else()                
+    add_definitions("-Wall -g")
 endif()
 
+
 macro( GearsCommand )
     set( _OPTIONS_ARGS )
     set( _ONE_VALUE_ARGS TARGET )
@@ -30,7 +33,7 @@
                 COMMAND  "perl" "generate_stub.pl" "-o" ${j} ${i}
             )
         elseif (${i} MATCHES "\\.cu")
-            string(REGEX REPLACE "(.*).cbc" "c/\\1.ptx" j ${i})
+            string(REGEX REPLACE "(.*).cu" "c/\\1.ptx" j ${i})
             add_custom_command (
                 OUTPUT    ${j} 
                 DEPENDS   ${i} 
@@ -61,9 +64,9 @@
 if (${USE_CUDA})
     GearsCommand(
       TARGET
-          GPUtwice
+          CUDAtwice
       SOURCES 
-          main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc  GPUWorker.cbc GPUtwice.cu
+          main.cbc RedBlackTree.cbc compare.c SingleLinkedStack.cbc CPUWorker.cbc time.cbc twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc SemaphoreImpl.cbc  CUDAWorker.cbc CUDAtwice.cu
     )
 endif()
 
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/CUDAWorker.cbc	Mon Feb 13 17:58:04 2017 +0900
@@ -0,0 +1,67 @@
+#include <libkern/OSAtomic.h>
+
+#include "../context.h"
+
+static void start_worker(Worker* worker);
+
+union Data* createCUDAWorker(struct Context* context, int id, Queue* queue) {
+    struct Worker* worker = ALLOC(context, Worker);
+    struct CUDAWorker* CUDAWorker = ALLOC(context, CUDAWorker);
+    worker->worker = (union Data*)CUDAWorker;
+    worker->tasks = queue;
+    cpuWorker->id = id;
+    worker->taskReceive = C_taskReceiveCUDAWorker;
+    worker->shutdown = C_shutdownCUDAWorker;
+    pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&start_CUDAworker, worker);
+    return (union Data*)(worker);
+}
+
+static void start_worker(Worker* worker) {
+    CUDAWorker* CUDAWorker = (CUDAWorker*)worker->worker;
+    CUDAWorker->context = NEW(struct Context);
+    initContext(CUDAWorker->context);
+    Gearef(CUDAWorker->context, Worker)->worker = (union Data*)worker;
+    goto meta(CUDAWorker->context, C_taskReceiveCUDAWorker);
+}
+
+__code taskReceiveCUDAWorker(struct Context* context, Worker* worker, Queue* queue) {
+    queue->queue = (union Data*)worker->tasks;
+    queue->next = C_getTask;
+    goto meta(context, worker->tasks->take);
+}
+
+__code taskReceiveCUDAWorker_stub(struct Context* context) {
+    CUDAWorker* CUDAWorker = (CUDAWorker *)GearImpl(context, CUDAWorker, CUDAworker);
+    pthread_cond_wait(&CUDAWorker->cond, &CUDAWorker->mutex);
+    goto taskReceiveCUDAWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue));
+}
+
+__code getCUDATask(struct Context* context, Worker* worker, struct Context* task) {
+    if (!task)
+        return; // end thread
+    task->worker = worker;
+    context->next = C_taskReceiveCUDAWorker; // set CG after task exec
+    goto meta(task, task->next);
+}
+
+__code getCUDATask_stub(struct Context* context) {
+    Worker* worker = &Gearef(context,Worker)->worker->Worker;
+    struct Context* task = &Gearef(context, Queue)->data->Context;
+    goto getCUDATask(context, worker, task);
+}
+
+#ifdef USE_CUDA
+__code twiceCUDA(struct Context* context) {
+    cuMemcpyHtoDAsync(context,context,context,context->stream);
+    cuLaunchkanel();
+    cuMemcpyDtoHAsync();
+}
+#endif
+
+__code shutdownWorker(struct Context* context, CPUWorker* worker) {
+}
+
+__code shutdownWorker_stub(struct Context* context) {
+    CPUWorker* worker = (CPUWorker *)GearImpl(context, Worker, worker);
+    goto shutdownWorker(context,worker);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/CUDAtwice.cbc	Mon Feb 13 17:58:04 2017 +0900
@@ -0,0 +1,27 @@
+#include <stdio.h>
+
+#include "context.h"
+#include "origin_cs.h"
+
+__code twice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) {
+    int i = loopCounter->i;
+    if (i < prefix) {
+        array[i+index*prefix] = array[i+index*prefix]*2;
+        loopCounter->i++;
+
+        goto meta(context, C_twice);
+    }
+
+    loopCounter->i = 0;
+    goto meta(workerContext, workerContext->next);
+}
+
+__code twice_stub(struct Context* context) {
+    struct Context* workerContext = context->worker->worker->CPUWorker.context;
+    //入力のDataGearをGPUにbuffer経由で送る
+    //twiceカーネルが定義されてなければそれをロードする
+    //結果を取ってくるコマンドを入力する
+    //コマンドの終了待ちを行う
+    //continationにそってGPUworkerに戻る
+    goto twice(context, Gearef(context, LoopCounter), 0, 0, NULL, workerContext);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/parallel_execution/CUDAtwice.cu	Mon Feb 13 17:58:04 2017 +0900
@@ -0,0 +1,8 @@
+extern "C" {
+    __global__ void multiply(float* A, float* B, float* C) {
+//        printf("%d %d\n",i[0],i[1]);
+        int index = blockIdx.x * blockDim.x + threadIdx.x;
+        C[index] = A[index] * B[0];
+    }
+
+}
--- a/src/parallel_execution/GPUWorker.cbc	Sun Feb 12 12:35:11 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#include <libkern/OSAtomic.h>
-
-#include "context.h"
-#include "origin_cs.h"
-
-static void start_worker(Worker* worker);
-
-union Data* createGPUWorker(struct Context* context, int id, Queue* queue) {
-    struct Worker* worker = ALLOC(context, Worker);
-    struct GPUWorker* gpuWorker = ALLOC(context, GPUWorker);
-    worker->worker = (union Data*)gpuWorker;
-    worker->tasks = queue;
-    cpuWorker->id = id;
-    worker->taskReceive = C_taskReceiveGPUWorker;
-    worker->shutdown = C_shutdownGPUWorker;
-    pthread_create(&worker->worker->GPUWorker.thread, NULL, (void*)&start_GPUworker, worker);
-    return (union Data*)(worker);
-}
-
-static void start_worker(Worker* worker) {
-    GPUWorker* gpuWorker = (GPUWorker*)worker->worker;
-    gpuWorker->context = NEW(struct Context);
-    initContext(gpuWorker->context);
-    Gearef(gpuWorker->context, Worker)->worker = (union Data*)worker;
-    goto meta(gpuWorker->context, C_taskReceiveGPUWorker);
-}
-
-__code taskReceiveGPUWorker(struct Context* context, Worker* worker, Queue* queue) {
-    queue->queue = (union Data*)worker->tasks;
-    queue->next = C_getTask;
-    goto meta(context, worker->tasks->take);
-}
-
-__code taskReceiveGPUWorker_stub(struct Context* context) {
-    GPUWorker* gpuWorker = (GPUWorker *)GearImpl(context, GPUWorker, gPUworker);
-    pthread_cond_wait(&gpuWorker->cond, &gpuWorker->mutex);
-    goto taskReceiveGPUWorker(context, &Gearef(context, Worker)->worker->Worker, Gearef(context, Queue));
-}
-
-__code getGPUTask(struct Context* context, Worker* worker, struct Context* task) {
-    if (!task)
-        return; // end thread
-    task->worker = worker;
-    context->next = C_taskReceiveGPUWorker; // set CG after task exec
-    goto meta(task, task->next);
-}
-
-__code getGPUTask_stub(struct Context* context) {
-    Worker* worker = &Gearef(context,Worker)->worker->Worker;
-    struct Context* task = &Gearef(context, Queue)->data->Context;
-    goto getGPUTask(context, worker, task);
-}
-
-#ifdef USE_CUDA
-__code twiceGpu(struct Context* context) {
-    cuMemcpyHtoDAsync(context,context,context,context->stream);
-    cuLaunchkanel();
-    cuMemcpyDtoHAsync();
-}
-#endif
-
-__code shutdownWorker(struct Context* context, CPUWorker* worker) {
-}
-
-__code shutdownWorker_stub(struct Context* context) {
-    CPUWorker* worker = (CPUWorker *)GearImpl(context, Worker, worker);
-    goto shutdownWorker(context,worker);
-}
--- a/src/parallel_execution/GPUtwice.cbc	Sun Feb 12 12:35:11 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-#include <stdio.h>
-
-#include "context.h"
-#include "origin_cs.h"
-
-__code twice(struct Context* context, struct LoopCounter* loopCounter, int index, int prefix, int* array, struct Context* workerContext) {
-    int i = loopCounter->i;
-    if (i < prefix) {
-        array[i+index*prefix] = array[i+index*prefix]*2;
-        loopCounter->i++;
-
-        goto meta(context, C_twice);
-    }
-
-    loopCounter->i = 0;
-    goto meta(workerContext, workerContext->next);
-}
-
-__code twice_stub(struct Context* context) {
-    struct Context* workerContext = context->worker->worker->CPUWorker.context;
-    //入力のDataGearをGPUにbuffer経由で送る
-    //twiceカーネルが定義されてなければそれをロードする
-    //結果を取ってくるコマンドを入力する
-    //コマンドの終了待ちを行う
-    //continationにそってGPUworkerに戻る
-    goto twice(context, Gearef(context, LoopCounter), 0, 0, NULL, workerContext);
-}
--- a/src/parallel_execution/GPUtwice.cu	Sun Feb 12 12:35:11 2017 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-extern "C" {
-    __global__ void multiply(float* A, float* B, float* C) {
-//        printf("%d %d\n",i[0],i[1]);
-        int index = blockIdx.x * blockDim.x + threadIdx.x;
-        C[index] = A[index] * B[0];
-    }
-
-}
--- a/src/parallel_execution/context.h	Sun Feb 12 12:35:11 2017 +0900
+++ b/src/parallel_execution/context.h	Mon Feb 13 17:58:04 2017 +0900
@@ -135,8 +135,8 @@
         struct Context* context;
         int id;
     } CPUWorker;
-#ifdef USE_CUDA
-    struct CudaWorker {
+#ifdef USE_CUDAWorker
+    struct CUDAWorker {
         pthread_t thread;
         struct Context* context;
         int id;
@@ -150,7 +150,7 @@
         CUstream stream;
     } CudaWorker;
 #else
-    struct CudaWorker {
+    struct CUDAWorker {
     } CudaWorker;
 #endif
     struct Main {