changeset 411:0eba9a04633f

Work CUDAtwice
author Tatsuki IHA <innparusu@cr.ie.u-ryukyu.ac.jp>
date Thu, 14 Sep 2017 18:26:52 +0900
parents 85b0ddbf458e
children 409e6b5fb775
files src/parallel_execution/CPUWorker.cbc src/parallel_execution/CUDAWorker.cbc src/parallel_execution/MultiDimIterator.cbc src/parallel_execution/TaskManagerImpl.cbc src/parallel_execution/context.h src/parallel_execution/cuda.c src/parallel_execution/examples/twice/CUDAtwice.cbc src/parallel_execution/examples/twice/CUDAtwice.cu src/parallel_execution/examples/twice/main.cbc
diffstat 9 files changed, 44 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/src/parallel_execution/CPUWorker.cbc	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/CPUWorker.cbc	Thu Sep 14 18:26:52 2017 +0900
@@ -10,7 +10,7 @@
     cpuWorker->id = id;
     worker->taskReceive = C_taskReceiveWorker;
     worker->shutdown = C_shutdownWorker;
-    pthread_create(&worker->worker->CPUWorker.thread, NULL, (void*)&startWorker, worker);
+    pthread_create(&worker->thread, NULL, (void*)&startWorker, worker);
     return worker;
 }
 
@@ -34,7 +34,7 @@
 
 __code getTask(struct Worker* worker, struct Context* task) {
     if (!task)
-        return; // end thread
+        goto meta(context, worker->shutdown); // end thread
     task->worker = worker;
     enum Code taskCg = task->next;
     if (task->iterate) {
--- a/src/parallel_execution/CUDAWorker.cbc	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/CUDAWorker.cbc	Thu Sep 14 18:26:52 2017 +0900
@@ -10,15 +10,12 @@
 int cuda_initialized = 0;
 
 Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, TaskManagerImpl *im) {
-    struct Worker* worker = ALLOC(context, Worker);
+    struct Worker* worker = new Worker();
     struct CUDAWorker* cudaWorker = new CUDAWorker();
-
-    cudaInit(cudaWorker, 0);
-
     worker->worker = (union Data*)cudaWorker;
     worker->tasks = queue;
     cudaWorker->id = id;
-    worker->taskReceive = C_taskReceiveWorker;
+    worker->taskReceive = C_taskReceiveCUDAWorker;
     worker->shutdown = C_shutdownCUDAWorker;
 #ifndef USE_CUDA_MAIN_THREAD
     pthread_create(&worker->worker->CUDAWorker.thread, NULL, (void*)&startCUDAWorker, worker);
@@ -33,7 +30,8 @@
 }
 
 static void startCUDAWorker(Worker* worker) {
-    CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
+    struct CUDAWorker* cudaWorker = &worker->worker->CUDAWorker;
+    cudaInit(cudaWorker, 0);
     cudaWorker->context = NEW(struct Context);
     initContext(cudaWorker->context);
     Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker;
@@ -52,7 +50,7 @@
 
 __code getTaskCUDA(struct Worker* worker, struct Context* task) {
     if (!task)
-        return; // end thread
+        goto meta(context, worker->shutdown); // end thread
     task->worker = worker;
     enum Code taskCg = task->next;
     if (task->iterate) {
@@ -156,10 +154,10 @@
             Gearef(context, LoopCounter));
 }
 
-extern void cudaShutdown( CUDAWorker *cudaWorker) ;
+extern void cudaShutdown( CUDAWorker *cudaWorker);
 
 __code shutdownCUDAWorker(struct Context* context, CUDAWorker* worker) {
-    cudaShutdown(worker) ;
+    cudaShutdown(worker);
 }
 
 __code shutdownCUDAWorker_stub(struct Context* context) {
--- a/src/parallel_execution/MultiDimIterator.cbc	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/MultiDimIterator.cbc	Thu Sep 14 18:26:52 2017 +0900
@@ -109,7 +109,7 @@
 }
 
 __code barrierMultiDimIterator(struct MultiDimIterator* iterator, struct Context* task, __code next(...), __code whenWait(...)) {
-    if (__sync_fetch_and_sub(&iterator->count, 1) == 1) {
+    if (task->gpu || __sync_fetch_and_sub(&iterator->count, 1) == 1) {
         goto next(...);
     }
     goto whenWait(...);
--- a/src/parallel_execution/TaskManagerImpl.cbc	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/TaskManagerImpl.cbc	Thu Sep 14 18:26:52 2017 +0900
@@ -221,36 +221,35 @@
             Gearef(context, TaskManager)->next);
 }
 
-__code shutdownTaskManagerImpl(struct TaskManagerImpl* taskManagerImpl, __code next(...), struct TaskManager* taskManager, struct Queue* queue) {
-    if (taskManagerImpl->taskCount != 0) {
-        sleep(1);
-        goto meta(context, taskManager->shutdown);
+__code shutdownTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...), struct Queue* queue) {
+    if (taskManager->taskCount != 0) {
+        usleep(1000);
+        goto meta(context, C_shutdownTaskManagerImpl);
     }
-    int i = taskManagerImpl->loopCounter->i;
-    if (taskManager->cpu <= i && i < taskManager->maxCPU) {
-        struct Queue* tasks = taskManagerImpl->workers[i]->tasks;
+    int i = taskManager->loopCounter->i;
+    if (i < taskManager->numWorker) {
+        struct Queue* tasks = taskManager->workers[i]->tasks;
         queue->queue = (union Data*)tasks;
         queue->data = NULL;
         queue->next = C_shutdownTaskManagerImpl1;
         goto meta(context, tasks->put);
     }
 
-    taskManagerImpl->loopCounter->i = 0;
+    taskManager->loopCounter->i = 0;
     goto meta(context, next);
 }
 
 __code shutdownTaskManagerImpl_stub(struct Context* context) {
     TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
     goto shutdownTaskManagerImpl(context,
-            taskManagerImpl,
-            Gearef(context, TaskManager)->next,
-            &Gearef(context, TaskManager)->taskManager->TaskManager,
-            Gearef(context, Queue));
+                                 taskManagerImpl,
+                                 Gearef(context, TaskManager)->next,
+                                 Gearef(context, Queue));
 }
 
 __code shutdownTaskManagerImpl1(TaskManagerImpl* taskManager) {
     int i = taskManager->loopCounter->i;
-    pthread_join(taskManager->workers[i]->worker->CPUWorker.thread, NULL);
+    pthread_join(taskManager->workers[i]->thread, NULL);
     taskManager->loopCounter->i++;
     goto meta(context, C_shutdownTaskManagerImpl);
 }
--- a/src/parallel_execution/context.h	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/context.h	Thu Sep 14 18:26:52 2017 +0900
@@ -171,10 +171,10 @@
         enum Code shutdown;
         enum Code next;
         struct Queue* tasks;
+        pthread_t thread;
         struct TaskManager* taskManager;
     } Worker;
     struct CPUWorker {
-        pthread_t thread;
         pthread_mutex_t mutex;
         pthread_cond_t cond;
         struct Context* context;
--- a/src/parallel_execution/cuda.c	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/cuda.c	Thu Sep 14 18:26:52 2017 +0900
@@ -91,8 +91,10 @@
     // CUDAWorker* cudaWorker = (CUDAWorker*)worker->worker;
     // memory allocate
     CUdeviceptr devA;
+    CUdeviceptr devB;
 
     checkCudaErrors(cuMemAlloc(&devA, sizeof(int)*array->size));
+    checkCudaErrors(cuMemAlloc(&devB, sizeof(int)));
 
     //twiceカーネルが定義されてなければそれをロードする
     checkCudaErrors(cuModuleLoad(&context->module, "c/examples/twice/CUDAtwice.ptx"));
@@ -100,11 +102,12 @@
 
     //入力のDataGearをGPUにbuffer経由で送る
     // Synchronous data transfer(host to device)
-    checkCudaErrors(cuMemcpyHtoD(devA, array->array, array->size));
+    checkCudaErrors(cuMemcpyHtoD(devA, array->array, sizeof(int)*array->size));
+    checkCudaErrors(cuMemcpyHtoD(devB, &array->prefix, sizeof(int)));
 
     // Asynchronous launch kernel
     context->num_exec = 1;
-    void* args[] = {&devA};
+    void* args[] = {&devA, &devB};
     if (context->iterate) {
         struct MultiDimIterator* iterator = &context->iterator->iterator->MultiDimIterator;
         checkCudaErrors(cuLaunchKernel(context->function,
@@ -120,7 +123,7 @@
     }
     //結果を取ってくるコマンドを入力する
     //コマンドの終了待ちを行う   
-    checkCudaErrors(cuMemcpyDtoH(array->array, devA, array->size));
+    checkCudaErrors(cuMemcpyDtoH(array->array, devA, sizeof(int)*array->size));
 
     // wait for stream
     checkCudaErrors(cuCtxSynchronize());
--- a/src/parallel_execution/examples/twice/CUDAtwice.cbc	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/examples/twice/CUDAtwice.cbc	Thu Sep 14 18:26:52 2017 +0900
@@ -19,7 +19,7 @@
 
 __code CUDAtwice_stub(struct Context* context) {
 printf("CUDAtwice stub\n");
-    struct Array* array = &context->data[context->dataNum+1]->Array;
+    struct Array* array = &context->data[context->idg]->Array;
     CUDAExec(context,array);
 
     //continuationにそってGPUworkerに戻る
--- a/src/parallel_execution/examples/twice/CUDAtwice.cu	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/examples/twice/CUDAtwice.cu	Thu Sep 14 18:26:52 2017 +0900
@@ -1,8 +1,14 @@
 extern "C" {
 
 #include <stdio.h>
-    __global__ void twice(int* array) {
-        printf("array %p",array);
-        array[blockIdx.x] = array[blockIdx.x]*2;
+    __global__ void twice(int* array, int* prefixPtr) {
+        int i = 0;
+        int prefix = *prefixPtr;
+C_twice:
+        if (i < prefix) {
+            array[i+blockIdx.x*prefix] = array[i+blockIdx.x*prefix]*2;
+            i++;
+            goto C_twice;
+        }
     }
 }
--- a/src/parallel_execution/examples/twice/main.cbc	Thu Sep 14 02:35:20 2017 +0900
+++ b/src/parallel_execution/examples/twice/main.cbc	Thu Sep 14 18:26:52 2017 +0900
@@ -87,7 +87,6 @@
             goto meta(context, C_code3);
         } else
             puts("wrong result");
-
     }
 
     goto meta(context, C_exit_code);
@@ -98,7 +97,11 @@
 
     par goto createArray(array, __exit);
 
-    par goto CUDAtwice(array, iterate(split), __exit);
+    if(gpu_num) {
+        par goto CUDAtwice(array, iterate(split), __exit);
+    } else {
+        par goto twice(array, iterate(split), __exit);
+    }
     goto code2();
 }