# HG changeset patch
# User anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp>
# Date 1579155126 -32400
# Node ID 9146d6017f18d1ba80bda11aec661e7082bb9ca6
# Parent  a4cab67624f7c518b37b9485c5d418afede8d971
hg mv parallel_execution/* ..

diff -r a4cab67624f7 -r 9146d6017f18 src/Atomic.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Atomic.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,9 @@
+typedef struct Atomic<Impl>{
+    union Data* atomic;
+    union Data** ptr;
+    union Data* oldData;
+    union Data* newData;
+    __code checkAndSet(Impl* atomic, union Data** ptr, union Data* oldData, union Data* newData, __code next(...), __code fail(...));
+    __code next(...);
+    __code fail(...);
+} Atomic;
diff -r a4cab67624f7 -r 9146d6017f18 src/AtomicReference.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/AtomicReference.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,18 @@
+#include "../context.h"
+#interface "Atomic.h"
+#include <stdio.h>
+
+Atomic* createAtomicReference(struct Context* context) {
+    struct Atomic* atomic = new Atomic();
+    struct AtomicReference* atomicReference = new AtomicReference();
+    atomic->atomic = (union Data*)atomicReference;
+    atomic->checkAndSet = C_checkAndSetAtomicReference;
+    return atomic;
+}
+
+__code checkAndSetAtomicReference(struct AtomicReference* atomic, union Data** ptr, union Data* oldData, union Data* newData, __code next(...), __code fail(...)) {
+    if (__sync_bool_compare_and_swap(ptr, oldData, newData)) {
+        goto next(...);
+    }
+    goto fail(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/Buffer.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Buffer.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,7 @@
+typedef struct Buffer<Impl>{
+        union Data* buffer;
+        union Data* data;
+        __code put(Impl* buffer, union Data* data, __code next(...));
+        __code take(Impl* buffer, __code next(union Data*, ...));
+        __code next(...);
+} Buffer;
diff -r a4cab67624f7 -r 9146d6017f18 src/CMakeLists.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/CMakeLists.txt	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,142 @@
+cmake_minimum_required(VERSION 3.8)
+
+set(USE_CUDA,0)
+# -DUSE_CUDA
+#  add_definitions("-Wall -g -O")
+
+set(CMAKE_C_COMPILER $ENV{CBC_COMPILER})
+add_definitions("-Wall -g")
+
+# -DCMAKE_BUILD_TYPE=Debug
+set(CMAKE_C_FLAGS_DEBUG "-O0")
+
+if (${USE_CUDA})
+    include_directories("/usr/local/cuda/include")
+    set(NVCCFLAG "-std=c++11" "-g" "-O0" )
+    if (UNIX AND NOT APPLE) # LINUX
+        set(CUDA_LINK_FLAGS "-L/usr/local/cuda/lib64 -lcuda -lcudart")
+    elseif (APPLE)
+        set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /usr/local/cuda/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib")
+    endif()
+    find_package(CUDA REQUIRED)
+    SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${CUDA_LINK_FLAGS}" )
+endif()
+
+macro( GearsCommand )
+    set( _OPTIONS_ARGS )
+    set( _ONE_VALUE_ARGS TARGET )
+    set( _MULTI_VALUE_ARGS SOURCES )
+    cmake_parse_arguments( _Gears "${_OPTIONS_ARGS}" "${_ONE_VALUE_ARGS}" "${_MULTI_VALUE_ARGS}" ${ARGN} )
+
+    set (_Gears_CSOURCES)
+    foreach(i ${_Gears_SOURCES})
+        if (${i} MATCHES "\\.cbc")
+            string(REGEX REPLACE "(.*).cbc" "c/\\1.c" j ${i})
+            add_custom_command (
+                OUTPUT    ${j}
+                DEPENDS   ${i}
+                COMMAND  "perl" "generate_stub.pl" "-o" ${j} ${i}
+            )
+        elseif (${i} MATCHES "\\.cu")
+            string(REGEX REPLACE "(.*).cu" "c/\\1.ptx" j ${i})
+            add_custom_command (
+                OUTPUT    ${j}
+                DEPENDS   ${i}
+                COMMAND  nvcc ${NVCCFLAG} -c -ptx -o ${j} ${i}
+            )
+        else()
+            set(j ${i})
+        endif()
+        list(APPEND _Gears_CSOURCES ${j})
+    endforeach(i)
+
+    add_custom_command (
+          OUTPUT    c/${_Gears_TARGET}-context.c
+          DEPENDS   ${_Gears_CSOURCES}
+          COMMAND  "perl" "generate_context.pl" "-o" ${_Gears_TARGET} "-w" ${_Gears_CSOURCES}
+    )
+    add_executable(${_Gears_TARGET} ${_Gears_CSOURCES} c/${_Gears_TARGET}-context.c)
+    target_link_libraries(${_Gears_TARGET} m pthread)
+endmacro()
+
+
+GearsCommand(
+  TARGET
+      twice
+  SOURCES
+      examples/twice/main.cbc examples/twice/createArray.cbc examples/twice/twice.cbc examples/twice/printArray.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimerImpl.cbc MultiDimIterator.cbc AtomicReference.cbc
+)
+
+GearsCommand(
+  TARGET
+      calc
+  SOURCES
+      examples/calc/calc.cbc examples/calc/add.cbc examples/calc/mult.cbc examples/calc/initIntegerDataGears.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc AtomicReference.cbc
+)
+
+GearsCommand(
+  TARGET
+      bitonicSort
+  SOURCES
+      examples/bitonicSort/bitonicSort.cbc examples/bitonicSort/bitonicSwap.cbc examples/bitonicSort/makeArray.cbc examples/bitonicSort/printArray.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc MultiDimIterator.cbc TimerImpl.cbc AtomicReference.cbc
+)
+
+if (${USE_CUDA})
+    GearsCommand(
+      TARGET
+          CUDAtwice
+      SOURCES 
+          examples/twice/main.cbc examples/twice/twice.cbc examples/twice/CUDAtwice.cu examples/twice/createArray.cbc examples/twice/printArray.cbc CPUWorker.cbc TimerImpl.cbc examples/twice/twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc CUDAWorker.cbc cuda.c MultiDimIterator.cbc CUDAExecutor.cbc AtomicReference.cbc 
+    )
+    set_target_properties(CUDAtwice PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1")
+
+    GearsCommand(
+      TARGET
+          CUDAbitonicSort
+      SOURCES 
+          examples/bitonicSort/bitonicSort.cbc examples/bitonicSort/bitonicSwap.cbc examples/bitonicSort/CUDAbitonicSwap.cu examples/bitonicSort/makeArray.cbc examples/bitonicSort/printArray.cbc CPUWorker.cbc CUDAWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc cuda.c MultiDimIterator.cbc TimerImpl.cbc CUDAExecutor.cbc AtomicReference.cbc
+    )
+    set_target_properties(CUDAbitonicSort PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1")
+endif()
+
+GearsCommand(
+  TARGET
+      queue_test
+  SOURCES 
+      test/queue_test.cbc SingleLinkedQueue.cbc
+)
+
+GearsCommand(
+  TARGET
+      stack_test
+  SOURCES 
+      test/stack_test.cbc SingleLinkedStack.cbc SingleLinkedQueue.cbc
+)
+
+GearsCommand(
+  TARGET
+      multiDimIterator_test
+  SOURCES
+      test/multiDimIterator_test.cbc test/printIterator.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc MultiDimIterator.cbc AtomicReference.cbc
+)
+
+#GearsCommand(
+#  TARGET
+#      sort
+#  SOURCES
+#      examples/bitonicSort/sort.cbc
+#)
+
+GearsCommand(
+  TARGET
+      rbtree
+  SOURCES
+      SingleLinkedQueue.cbc test/rbTree_test.cbc RedBlackTree.cbc SingleLinkedStack.cbc compare.c
+)
+
+GearsCommand(
+  TARGET
+      boundedBuffer
+  SOURCES
+  examples/boundedBuffer/main.cbc examples/boundedBuffer/initBuffer.cbc examples/boundedBuffer/SemaphoreImpl.cbc examples/boundedBuffer/BoundedBuffer.cbc examples/boundedBuffer/consumer.cbc examples/boundedBuffer/producer.cbc SpinLock.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc MultiDimIterator.cbc AtomicReference.cbc
+)
diff -r a4cab67624f7 -r 9146d6017f18 src/CPUWorker.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/CPUWorker.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,124 @@
+#include "../context.h"
+#interface "TaskManager.h"
+#interface "Worker.h"
+#interface "Iterator.h"
+#interface "Queue.h"
+
+static void startWorker(Worker* worker);
+
+Worker* createCPUWorker(struct Context* context, int id, Queue* queue) {
+    struct Worker* worker = new Worker();
+    struct CPUWorker* cpuWorker = new CPUWorker();
+    worker->worker = (union Data*)cpuWorker;
+    worker->tasks = queue;
+    cpuWorker->id = id;
+    cpuWorker->loopCounter = 0;
+    worker->taskReceive = C_taskReceiveCPUWorker;
+    worker->shutdown = C_shutdownCPUWorker;
+    pthread_create(&worker->thread, NULL, (void*)&startWorker, worker);
+    return worker;
+}
+
+static void startWorker(struct Worker* worker) {
+    struct CPUWorker* cpuWorker = &worker->worker->CPUWorker;
+    cpuWorker->context = NEW(struct Context);
+    initContext(cpuWorker->context);
+    Gearef(cpuWorker->context, Worker)->worker = (union Data*)worker;
+    Gearef(cpuWorker->context, Worker)->tasks = worker->tasks;
+    goto meta(cpuWorker->context, worker->taskReceive);
+}
+
+__code taskReceiveCPUWorker(struct CPUWorker* worker, struct Queue* tasks) {
+    goto tasks->take(getTaskCPUWorker);
+}
+
+__code getTaskCPUWorker(struct CPUWorker* cpuWorker, struct Context* task, struct Worker* worker) {
+    if (!task) {
+        goto worker->shutdown(); // end thread
+    }
+    task->worker = worker;
+    enum Code taskCg = task->next;
+    task->next = C_odgCommitCPUWorker; // commit outputDG after task exec
+    goto meta(task, taskCg); // switch task context
+}
+
+__code getTaskCPUWorker_stub(struct Context* context) {
+    CPUWorker* cpuWorker = (CPUWorker*)GearImpl(context, Worker, worker);
+    Worker* worker = &Gearef(context,Worker)->worker->Worker;
+    struct Context* task = &Gearef(context, Queue)->data->Context;
+    goto getTaskCPUWorker(context, cpuWorker, task, worker);
+}
+
+__code odgCommitCPUWorker(struct CPUWorker* worker, struct Context* task) {
+    if (task->iterate) {
+        struct Iterator* iterator = task->iterator;
+        goto iterator->barrier(task, odgCommitCPUWorker1, odgCommitCPUWorker6);
+    } else {
+        goto odgCommitCPUWorker1();
+    }
+}
+
+__code odgCommitCPUWorker_stub(struct Context* context) {
+    // switch worker context
+    struct Context* workerContext = context->worker->worker->CPUWorker.context;
+    Gearef(workerContext, Worker)->worker = (union Data*)context->worker;
+    Gearef(workerContext, Worker)->task = context;
+    CPUWorker* cpuWorker = (CPUWorker*)GearImpl(workerContext, Worker, worker);
+    goto odgCommitCPUWorker(workerContext,
+                            cpuWorker,
+                            context);
+}
+
+__code odgCommitCPUWorker1(struct CPUWorker* worker, struct Context* task) {
+    int i = worker->loopCounter;
+    if (task->odg+i < task->maxOdg) {
+        goto odgCommitCPUWorker2();
+    }
+    worker->loopCounter = 0;
+    struct TaskManager* taskManager = task->taskManager;
+    goto taskManager->decrementTaskCount(odgCommitCPUWorker6);
+}
+
+__code odgCommitCPUWorker2(struct CPUWorker* worker, struct Context* task) {
+    int i = worker->loopCounter;
+    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
+    goto queue->isEmpty(odgCommitCPUWorker3, odgCommitCPUWorker5);
+}
+
+__code odgCommitCPUWorker3(struct CPUWorker* worker, struct Context* task) {
+    int i = worker->loopCounter;
+    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
+    goto queue->take(odgCommitCPUWorker4);
+}
+
+__code odgCommitCPUWorker4(struct CPUWorker* worker, struct Context* task, struct Context* waitTask) {
+    if (__sync_fetch_and_sub(&waitTask->idgCount, 1) == 1) { // atomic decrement idg counter(__sync_fetch_and_sub function return initial value of waitTask->idgCount point)
+        struct TaskManager* taskManager = waitTask->taskManager;
+        goto taskManager->spawn(waitTask, odgCommitCPUWorker2);
+    }
+    goto odgCommitCPUWorker2();
+}
+
+__code odgCommitCPUWorker4_stub(struct Context* context) {
+    CPUWorker* cpuWorker     = (CPUWorker*)GearImpl(context, Worker, worker);
+    struct Context* task     = Gearef(context, Worker)->task;
+    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
+    goto odgCommitCPUWorker4(context,
+                             cpuWorker,
+                             task,
+                             waitTask);
+}
+
+__code odgCommitCPUWorker5(struct CPUWorker* worker, struct Context* task) {
+    worker->loopCounter++;
+    goto odgCommitCPUWorker1();
+}
+
+__code odgCommitCPUWorker6(struct CPUWorker* worker, struct Context* task) {
+    struct Worker* taskWorker = task->worker;
+    goto taskWorker->taskReceive(taskWorker->tasks);
+}
+
+__code shutdownCPUWorker(struct CPUWorker* worker) {
+    goto exit_code();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/CUDAExecutor.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/CUDAExecutor.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,108 @@
+#include "../context.h"
+#interface "Executor.h"
+#interface "Timer.h"
+#include <stdio.h>
+#include <math.h>
+
+Executor* createCUDAExecutor(struct Context* context, CUdevice device) {
+    struct Executor* executor = new Executor();
+    struct CUDAExecutor* cudaExecutor = new CUDAExecutor();
+    cudaExecutor->timer = createTimerImpl(context);
+    checkCudaErrors(cuDeviceGetAttribute(&cudaExecutor->maxThreadPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device));
+    executor->executor = (union Data*)cudaExecutor;
+    executor->read  = C_readCUDAExecutor;
+    executor->exec  = C_execCUDAExecutor;
+    executor->write = C_writeCUDAExecutor;
+    return executor;
+}
+
+__code readCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
+    struct CUDABuffer* buffer = executor->buffer;
+    int paramLen = buffer->inputLen + buffer->outputLen;
+    executor->kernelParams = (CUdeviceptr**)ALLOCATE_PTR_ARRAY(context, CUdeviceptr, paramLen);
+    for (int i = 0; i < paramLen; i++) {
+        CUdeviceptr* deviceptr = new CUdeviceptr();
+        // memory allocate
+        union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
+        checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(data)));
+        checkCudaErrors(cuMemcpyHtoD(*deviceptr, data, GET_SIZE(data)));
+        // Synchronous data transfer(host to device)
+        executor->kernelParams[i] = deviceptr;
+    }
+    // TODO: Implements pipeline
+    // goto next(...);
+    struct Timer* timer = executor->timer;
+    goto timer->start(execCUDAExecutor);
+}
+
+int computeblockDim(int count, int maxThreadPerBlock) {
+    return count < maxThreadPerBlock ? count : maxThreadPerBlock;
+}
+
+void calcBlockMaxThread(struct MultiDimIterator* iterator, struct CUDAExecutor* executor) {
+    executor->maxThreadPerBlockX = 1;
+    executor->maxThreadPerBlockY = 1;
+    executor->maxThreadPerBlockZ = 1;
+    // maxThreadPerBlockX * maxThreadPerBlockY * maxThreadPerBlockZ <= maxThreadPerBlock
+    if (iterator->x > 1 && iterator->y == 1 && iterator->z == 1) {
+        executor->maxThreadPerBlockX = executor->maxThreadPerBlock;
+        executor->maxThreadPerBlockY = 1;
+        executor->maxThreadPerBlockZ = 1;
+    } else if (iterator->x > 1 && iterator->y > 1 && iterator->z == 1) {
+        int ln_2 = log2(executor->maxThreadPerBlock);
+        int maxThread = 1 << (ln_2/2);
+        executor->maxThreadPerBlockX = maxThread;
+        executor->maxThreadPerBlockY = maxThread;
+        executor->maxThreadPerBlockZ = 1;
+    } else {
+        int ln_2 = log2(executor->maxThreadPerBlock);
+        int maxThread = 1 << (ln_2/3);
+        executor->maxThreadPerBlockX = maxThread * (1 << (ln_2%3));
+        executor->maxThreadPerBlockY = maxThread;
+        executor->maxThreadPerBlockZ = maxThread;
+    }
+}
+
+__code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
+    task->num_exec = 1;
+    if (task->iterate) {
+        struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator;
+        calcBlockMaxThread(iterator, executor);
+        int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlockX);
+        int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlockY);
+        int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlockZ);
+        // launch kernel
+        checkCudaErrors(cuLaunchKernel(task->function,
+                    iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ,
+                    blockDimX, blockDimY, blockDimZ,
+                    0, NULL, (void**)executor->kernelParams, NULL));
+    } else {
+        checkCudaErrors(cuLaunchKernel(task->function,
+                    1, 1, 1,
+                    1, 1, 1,
+                    0, NULL, (void**)executor->kernelParams, NULL));
+    }
+    // TODO: Implements pipeline
+    // goto next(...);
+    goto writeCUDAExecutor();
+}
+
+__code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
+    // Asynchronous launch kernel
+    checkCudaErrors(cuCtxSynchronize());
+    struct Timer* timer = executor->timer;
+    goto timer->end(writeCUDAExecutor1);
+}
+
+__code writeCUDAExecutor1(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
+    //結果を取ってくるコマンドを入力する
+    struct CUDABuffer* buffer = executor->buffer;
+    int paramLen = buffer->inputLen + buffer->outputLen;
+    for (int i = 0; i < paramLen; i++) {
+        CUdeviceptr deviceptr =  *(executor->kernelParams[i]);
+        union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
+        checkCudaErrors(cuMemcpyDtoH(data, deviceptr, GET_SIZE(data)));
+        cuMemFree(deviceptr);
+    }
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/CUDAWorker.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/CUDAWorker.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,131 @@
+#include "../context.h"
+#interface "TaskManager.h"
+#interface "Worker.h"
+#interface "Iterator.h"
+#interface "Queue.h"
+
+extern void cudaInit(struct CUDAWorker *cudaWorker,int phase, int deviceNum);
+extern void cudaShutdown(CUDAWorker *cudaWorker);
+
+static void startCUDAWorker(Worker* worker);
+
+Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, int deviceNum) {
+    struct Worker* worker = new Worker();
+    struct CUDAWorker* cudaWorker = new CUDAWorker();
+    worker->worker = (union Data*)cudaWorker;
+    worker->tasks = queue;
+    cudaWorker->id = id;
+    cudaWorker->loopCounter = 0;
+    cudaWorker->deviceNum = deviceNum;
+    worker->taskReceive = C_taskReceiveCUDAWorker;
+    worker->shutdown = C_shutdownCUDAWorker;
+    pthread_create(&worker->thread, NULL, (void*)&startCUDAWorker, worker);
+    return worker;
+}
+
+static void startCUDAWorker(Worker* worker) {
+    struct CUDAWorker* cudaWorker = &worker->worker->CUDAWorker;
+    cudaInit(cudaWorker, 0, cudaWorker->deviceNum);
+    cudaWorker->context  = NEW(struct Context);
+    initContext(cudaWorker->context);
+    cudaWorker->executor = createCUDAExecutor(cudaWorker->context, cudaWorker->device);
+    Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker;
+    Gearef(cudaWorker->context, Worker)->tasks = worker->tasks;
+    goto meta(cudaWorker->context, worker->taskReceive);
+}
+
+__code taskReceiveCUDAWorker(struct Worker* worker, struct Queue* tasks) {
+    goto tasks->take(getTaskCUDAWorker);
+}
+
+__code getTaskCUDAWorker(struct CUDAWorker* cudaWorker, struct Context* task, struct Worker* worker) {
+    if (!task) {
+        goto worker->shutdown(); // end thread
+    }
+    task->worker = worker;
+    enum Code taskCg = task->next;
+    task->next = C_odgCommitCUDAWorker; // commit outputDG after task exec
+    goto meta(task, taskCg); // switch task context
+}
+
+__code getTaskCUDAWorker_stub(struct Context* context) {
+    CUDAWorker* cudaWorker = (CUDAWorker*)GearImpl(context, Worker, worker);
+    Worker* worker = &Gearef(context,Worker)->worker->Worker;
+    struct Context* task = &Gearef(context, Queue)->data->Context;
+    goto getTaskCUDAWorker(context, cudaWorker, task, worker);
+}
+
+__code odgCommitCUDAWorker(struct CUDAWorker* worker, struct Context* task) {
+    if (task->iterate) {
+        struct Iterator* iterator = task->iterator;
+        goto iterator->barrier(task, odgCommitCUDAWorker1, odgCommitCUDAWorker6);
+    } else {
+        goto odgCommitCUDAWorker1();
+    }
+}
+
+__code odgCommitCUDAWorker_stub(struct Context* context) {
+    // switch worker context
+    struct Context* workerContext = context->worker->worker->CUDAWorker.context;
+    Gearef(workerContext, Worker)->worker = (union Data*)context->worker;
+    Gearef(workerContext, Worker)->task = context;
+    CUDAWorker* cudaWorker = (CUDAWorker*)GearImpl(workerContext, Worker, worker);
+    goto odgCommitCUDAWorker(workerContext,
+                            cudaWorker,
+                            context);
+}
+
+__code odgCommitCUDAWorker1(struct CUDAWorker* worker, struct Context* task) {
+    int i = worker->loopCounter;
+    if (task->odg+i < task->maxOdg) {
+        goto odgCommitCUDAWorker2();
+    }
+    worker->loopCounter = 0;
+    struct TaskManager* taskManager = task->taskManager;
+    goto taskManager->decrementTaskCount(odgCommitCUDAWorker6);
+}
+
+__code odgCommitCUDAWorker2(struct CUDAWorker* worker, struct Context* task) {
+    int i = worker->loopCounter;
+    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
+    goto queue->isEmpty(odgCommitCUDAWorker3, odgCommitCUDAWorker5);
+}
+
+__code odgCommitCUDAWorker3(struct CUDAWorker* worker, struct Context* task) {
+    int i = worker->loopCounter;
+    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
+    goto queue->take(odgCommitCUDAWorker4);
+}
+
+__code odgCommitCUDAWorker4(struct CUDAWorker* worker, struct Context* task, struct Context* waitTask) {
+    if (__sync_fetch_and_sub(&waitTask->idgCount, 1) == 1) { // atomic decrement idg counter(__sync_fetch_and_sub function return initial value of waitTask->idgCount point)
+        struct TaskManager* taskManager = waitTask->taskManager;
+        goto taskManager->spawn(waitTask, odgCommitCUDAWorker2);
+    }
+    goto odgCommitCUDAWorker2();
+}
+
+__code odgCommitCUDAWorker4_stub(struct Context* context) {
+    CUDAWorker* cudaWorker     = (CUDAWorker*)GearImpl(context, Worker, worker);
+    struct Context* task     = Gearef(context, Worker)->task;
+    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
+    goto odgCommitCUDAWorker4(context,
+                             cudaWorker,
+                             task,
+                             waitTask);
+}
+
+__code odgCommitCUDAWorker5(struct CUDAWorker* worker, struct Context* task) {
+    worker->loopCounter++;
+    goto odgCommitCUDAWorker1();
+}
+
+__code odgCommitCUDAWorker6(struct CUDAWorker* worker, struct Context* task) {
+    struct Worker* taskWorker = task->worker;
+    goto taskWorker->taskReceive(taskWorker->tasks);
+}
+
+__code shutdownCUDAWorker(struct CUDAWorker* worker) {
+    cudaShutdown(worker);
+    goto meta(context, C_exit_code);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/CodeGear.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/CodeGear.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,8 @@
+typedef struct CodeGear<Impl>{
+        union Data* codeGear;
+        enum Code code;
+        __code code(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...));
+        __code setInfo(struct Context* codeGear, union Data** dataGears, __code next(...));
+        union Data* dataGears[10];
+        __code next(...);
+} CodeGear;
diff -r a4cab67624f7 -r 9146d6017f18 src/Executor.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Executor.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,8 @@
+typedef struct Executor<Impl>{
+    union Data* Executor;
+    struct Context* task;
+    __code next(...);
+    __code read(Impl* executor, struct Context* task, __code next(...));
+    __code exec(Impl* executor, struct Context* task, __code next(...));
+    __code write(Impl* executor, struct Context* task, __code next(...));
+} Executor;
diff -r a4cab67624f7 -r 9146d6017f18 src/Iterator.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Iterator.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,9 @@
+typedef struct Iterator<Impl>{
+        union Data* iterator;
+        struct Context* task;
+        int numGPU;
+        __code exec(Impl* iterator, struct Context* task, int numGPU, __code next(...));
+        __code barrier(Impl* iterator, struct Context* task, __code next(...), __code whenWait(...));
+        __code whenWait(...);
+        __code next(...);
+} Iterator;
diff -r a4cab67624f7 -r 9146d6017f18 src/Lock.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Lock.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,6 @@
+typedef struct Lock<Impl>{
+        union Data* lock;
+        __code doLock(Impl* lock, __code next(...)); 
+        __code doUnlock(Impl* lock, __code next(...)); 
+        __code next(...);
+} Lock;
diff -r a4cab67624f7 -r 9146d6017f18 src/LockImpl.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/LockImpl.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,86 @@
+#include "../context.h"
+#interface "Queue.h"
+#interface "Atomic.h"
+#interface "Lock.h"
+#interface "Worker.h"
+#interface "TaskManager.h"
+
+Lock* createLockImpl(struct Context* context) {
+    struct Lock* lock = new Lock();
+    struct LockImpl* lockImpl = new LockImpl();
+    lockImpl->lock = NULL;
+    lockImpl->waitThreadQueue = createSynchronizedQueue(context);
+    lockImpl->atomic = createAtomicReference(context);
+    lock->lock = (union Data*)lockImpl;
+    lock->doLock = C_doLockLockImpl;
+    lock->doUnlock = C_doUnlockLockImpl;
+    return lock;
+}
+
+__code doLockLockImpl(struct LockImpl* lock, __code next(...)) {
+    struct Atomic* atomic = lock->atomic;
+    goto atomic->checkAndSet(&lock->lock, NULL, 1, doLockLockImpl1, doLockLockImpl2);
+}
+
+__code doLockLockImpl1(struct LockImpl* lock, __code next(...)) {
+    lock->lockContext = context;
+    goto next(...);
+}
+
+__code doLockLockImpl2(struct LockImpl* lock, __code next(...)) {
+    struct Queue* queue = lock->waitThreadQueue;
+    context->next= C_doLockLockImpl;
+    printf("Put task\n");
+    goto queue->put(context, doLockLockImpl3);
+}
+
+__code doLockLockImpl3(struct LockImpl* lock, struct Worker* worker, __code next(...)) {
+    goto worker->taskReceive(); // goto shceduler
+}
+
+__code doLockLockImpl3_stub(struct Context* context) {
+    // switch worker context
+    struct Context* workerContext = context->worker->worker->CPUWorker.context;
+    LockImpl* lockImpl = (LockImpl*)GearImpl(context, Lock, lock);
+    goto doLockLockImpl3(workerContext,
+            lockImpl,
+            context->worker,
+            Gearef(context, Lock)->next);
+}
+
+__code doUnlockLockImpl(struct LockImpl* lock, __code next(...)) {
+    if (lock->lockContext == context) {
+        struct Atomic* atomic = lock->atomic;
+        goto atomic->checkAndSet(&lock->lock, 1, NULL, doUnlockLockImpl1, doUnlockLockImpl);
+    }
+    goto next(...);
+}
+
+__code doUnlockLockImpl1(struct LockImpl* lock, __code next(...)) {
+    struct Queue* queue = lock->waitThreadQueue;
+    goto queue->isEmpty(doUnlockLockImpl2, doUnlockLockImpl4);
+}
+
+__code doUnlockLockImpl2(struct LockImpl* lock, __code next(...)) {
+    struct Queue* queue = lock->waitThreadQueue;
+    printf("%p: Take task\n", lock);
+    goto queue->take(doUnlockLockImpl3);
+}
+
+__code doUnlockLockImpl3(struct LockImpl* lock, struct Context* waitTask, __code next(...)) {
+    struct TaskManager* taskManager = waitTask->taskManager;
+    goto taskManager->spawn(waitTask, next(...)); //notify
+}
+
+__code doUnlockLockImpl3_stub(struct Context* context) {
+    LockImpl* lockImpl = (LockImpl*)GearImpl(context, Lock, lock);
+    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
+    goto doUnlockLockImpl3(context,
+            lockImpl,
+            waitTask,
+            Gearef(context, Lock)->next);
+}
+
+__code doUnlockLockImpl4(struct LockImpl* lock, __code next(...)) {
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/MultiDimIterator.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/MultiDimIterator.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,96 @@
+#include "../context.h"
+#interface "Iterator.h"
+#interface "TaskManager.h"
+#include <stdio.h>
+
+Iterator* createMultiDimIterator(struct Context* context, int x, int y, int z) {
+    struct Iterator* iterator = new Iterator();
+    struct MultiDimIterator* multiDimIterator = new MultiDimIterator();
+    iterator->iterator = (union Data*)multiDimIterator;
+    iterator->exec = C_execMultiDimIterator;
+    iterator->barrier = C_barrierMultiDimIterator;
+    multiDimIterator->x = x;
+    multiDimIterator->y = y;
+    multiDimIterator->z = z;
+    multiDimIterator->count = x * y * z;
+    multiDimIterator->counterX = 0;
+    multiDimIterator->counterY = 0;
+    multiDimIterator->counterZ = 0;
+    return iterator;
+}
+
+/**
+ * create iterateTask with index, that copy from task argument
+ * @return created iterateTask
+ * @param task      task of the copy source
+ * @x     index
+ */
+struct Context* createMultiDimIterateTask(struct Context* task, int x, int y, int z) {
+    struct Context* task1 = NEW(struct Context);
+    initContext(task1);
+    task1->taskManager = task->taskManager;
+    task1->next     = task->next;
+    task1->iterate  = 1;
+    task1->iterator = task->iterator;
+    task1->idgCount = task->idgCount;
+    task1->idg      = task->idg;
+    task1->maxIdg   = task->maxIdg;
+    for(int i = task1->idg; i < task1->maxIdg; i++) {
+        task1->data[i] = task->data[i];
+    }
+
+    // create index data gear and register input data to iterate task
+    struct MultiDim* multiDim = &ALLOCATE_DATA_GEAR(task1, MultiDim)->MultiDim;
+    multiDim->x = x;
+    multiDim->y = y;
+    multiDim->z = z;
+    task1->data[task1->maxIdg++] = (union Data*)multiDim;
+    task1->odg      = task->odg + 1;
+    task1->maxOdg   = task->maxOdg + 1;
+    for (int i = task1->odg; i < task1->maxOdg; i++) {
+        task1->data[i] = task->data[i-1];
+    }
+
+    return task1;
+}
+
+__code execMultiDimIterator(struct MultiDimIterator* iterator, struct Context* task, int numGPU, __code next(...)) {
+    // No GPU device
+    if (numGPU == 0) {
+        goto execMultiDimIterator1();
+    }
+    task->iterate = 1;
+    task->gpu = 1;
+    struct TaskManager* taskManager = task->taskManager;
+    goto taskManager->spawn(task, next(...));
+}
+
+__code execMultiDimIterator1(struct MultiDimIterator* iterator, struct Context* task, __code next(...)) {
+    int x = iterator->counterX;
+    int y = iterator->counterY;
+    int z = iterator->counterZ;
+    struct Context* iterateTask = createMultiDimIterateTask(task, x, y, z);
+    struct TaskManager* taskManager = task->taskManager;
+    goto taskManager->spawn(iterateTask, execMultiDimIterator2);
+}
+
+__code execMultiDimIterator2(struct MultiDimIterator* iterator, struct Context* task, __code next(...)) {
+    if (++iterator->counterX >= iterator->x) {
+        iterator->counterX = 0;
+        if (++iterator->counterY >= iterator->y) {
+            iterator->counterY = 0;
+            if (++iterator->counterZ >= iterator->z) {
+                iterator->counterZ = 0;
+                goto next(...);
+            }
+        }
+    }
+    goto execMultiDimIterator1();
+}
+
+__code barrierMultiDimIterator(struct MultiDimIterator* iterator, struct Context* task, __code next(...), __code whenWait(...)) {
+    if (task->gpu || __sync_fetch_and_sub(&iterator->count, 1) == 1) {
+        goto next(...);
+    }
+    goto whenWait(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/Queue.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Queue.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,10 @@
+typedef struct Queue<Impl>{
+        union Data* queue;
+        union Data* data;
+        __code whenEmpty(...);
+        __code clear(Impl* queue, __code next(...));
+        __code put(Impl* queue, union Data* data, __code next(...));
+        __code take(Impl* queue, __code next(union Data*, ...));
+        __code isEmpty(Impl* queue, __code next(...), __code whenEmpty(...));
+        __code next(...);
+} Queue;
diff -r a4cab67624f7 -r 9146d6017f18 src/RedBlackTree.agda
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/RedBlackTree.agda	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,145 @@
+module RedBlackTree where
+
+open import stack
+open import Level
+
+record TreeMethods {n m : Level } {a : Set n } {t : Set m } (treeImpl : Set n ) : Set (m Level.⊔ n) where
+  field
+    putImpl : treeImpl -> a -> (treeImpl -> t) -> t
+    getImpl  : treeImpl -> (treeImpl -> Maybe a -> t) -> t
+open TreeMethods
+
+record Tree  {n m : Level } {a : Set n } {t : Set m } (treeImpl : Set n ) : Set (m Level.⊔ n) where
+  field
+    tree : treeImpl
+    treeMethods : TreeMethods {n} {m} {a} {t} treeImpl
+  putTree : a -> (Tree treeImpl -> t) -> t
+  putTree d next = putImpl (treeMethods ) tree d (\t1 -> next (record {tree = t1 ; treeMethods = treeMethods} ))
+  getTree : (Tree treeImpl -> Maybe a -> t) -> t
+  getTree next = getImpl (treeMethods ) tree (\t1 d -> next (record {tree = t1 ; treeMethods = treeMethods} ) d )
+
+open Tree
+
+data Color {n : Level } : Set n where
+  Red   : Color
+  Black : Color
+
+data CompareResult {n : Level } : Set n where
+  LT : CompareResult
+  GT : CompareResult
+  EQ : CompareResult
+
+record Node {n : Level } (a k : Set n) : Set n where
+  inductive
+  field
+    key   : k
+    value : a
+    right : Maybe (Node a k)
+    left  : Maybe (Node a k)
+    color : Color {n}
+open Node
+
+record RedBlackTree {n m : Level } {t : Set m} (a k si : Set n) : Set (m Level.⊔ n) where
+  field
+    root : Maybe (Node a k)
+    nodeStack : Stack {n} {m} (Node a k) {t} si
+    compare : k -> k -> CompareResult {n}
+
+open RedBlackTree
+
+open Stack
+
+--
+-- put new node at parent node, and rebuild tree to the top
+--
+{-# TERMINATING #-}   -- https://agda.readthedocs.io/en/v2.5.3/language/termination-checking.html
+replaceNode : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) si -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
+replaceNode {n} {m} {t} {a} {k} {si} tree s parent n0 next = popStack s (
+      \s grandParent -> replaceNode1 s grandParent ( compare tree (key parent) (key n0) ) )
+  where
+        replaceNode1 : Stack (Node a k) si -> Maybe ( Node a k ) -> CompareResult -> t
+        replaceNode1 s Nothing LT = next ( record tree { root = Just ( record parent { left = Just n0 ; color = Black } ) } )   
+        replaceNode1 s Nothing GT = next ( record tree { root = Just ( record parent { right = Just n0 ; color = Black } ) } )   
+        replaceNode1 s Nothing EQ = next ( record tree { root = Just ( record parent { right = Just n0 ; color = Black } ) } )   
+        replaceNode1 s (Just grandParent) result with result
+        ... | LT =  replaceNode tree s grandParent ( record parent { left = Just n0 } ) next
+        ... | GT =  replaceNode tree s grandParent ( record parent { right = Just n0 } ) next
+        ... | EQ =  next tree 
+
+rotateRight : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node  a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
+rotateRight {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
+
+rotateLeft : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
+rotateLeft {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
+
+insertCase5 : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
+insertCase5 {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
+
+insertCase4 : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
+insertCase4 {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
+
+{-# TERMINATING #-}
+insertNode : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
+insertNode {n} {m} {t} {a} {k} {si} tree s n0 next = get2Stack s (\ s d1 d2 -> insertCase1 s n0 d1 d2 )
+   where
+    insertCase1 : Stack (Node a k) si -> Node a k -> Maybe (Node a k) -> Maybe (Node a k) -> t    -- placed here to allow mutual recursion
+          -- http://agda.readthedocs.io/en/v2.5.2/language/mutual-recursion.html
+    insertCase3 : Stack (Node a k) si -> Node a k -> Node a k -> Node a k -> t
+    insertCase3 s n0 parent grandParent with left grandParent | right grandParent
+    ... | Nothing | Nothing = insertCase4 tree s n0 parent grandParent next
+    ... | Nothing | Just uncle  = insertCase4 tree s n0 parent grandParent next
+    ... | Just uncle | _  with compare tree ( key uncle ) ( key parent )
+    ...                   | EQ =  insertCase4 tree s n0 parent grandParent next
+    ...                   | _ with color uncle
+    ...                           | Red = pop2Stack s ( \s p0 p1 -> insertCase1 s ( 
+           record grandParent { color = Red ; left = Just ( record parent { color = Black ; left = Just n0 } )  ; right = Just ( record uncle { color = Black } ) }) p0 p1 )
+    ...                           | Black = insertCase4 tree s n0 parent grandParent next
+    insertCase2 : Stack (Node a k) si -> Node a k -> Node a k -> Node a k -> t
+    insertCase2 s n0 parent grandParent with color parent
+    ... | Black = replaceNode tree s grandParent n0 next
+    ... | Red = insertCase3 s n0 parent grandParent
+    insertCase1 s n0 Nothing Nothing = next tree
+    insertCase1 s n0 Nothing (Just grandParent) = replaceNode tree s grandParent n0 next
+    insertCase1 s n0 (Just grandParent) Nothing = replaceNode tree s grandParent n0 next
+    insertCase1 s n0 (Just parent) (Just grandParent) = insertCase2 s n0 parent grandParent
+      where
+
+findNode : {n m : Level } {a k si : Set n} {t : Set m} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) si -> (Node a k) -> (Node a k) -> (RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) si -> Node a k -> t) -> t
+findNode {n} {m} {a} {k} {si} {t} tree s n0 n1 next = pushStack s n1 (\ s -> findNode1 s n1)
+  where
+    findNode2 : Stack (Node a k) si -> (Maybe (Node a k)) -> t
+    findNode2 s Nothing = next tree s n0
+    findNode2 s (Just n) = findNode tree s n0 n next
+    findNode1 : Stack (Node a k) si -> (Node a k)  -> t
+    findNode1 s n1 with (compare tree (key n0) (key n1))
+    ...                                | EQ = next tree s n0 
+    ...                                | GT = findNode2 s (right n1)
+    ...                                | LT = findNode2 s (left n1)
+
+
+leafNode : {n : Level } {a k : Set n}  -> k -> a -> Node a k
+leafNode k1 value = record {
+    key   = k1 ;
+    value = value ;
+    right = Nothing ;
+    left  = Nothing ;
+    color = Black 
+  }
+
+putRedBlackTree : {n m : Level } {a k si : Set n} {t : Set m} -> RedBlackTree {n} {m} {t} a k si -> k -> a -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
+putRedBlackTree {n} {m} {a} {k} {si} {t} tree k1 value next with (root tree)
+...                                | Nothing = next (record tree {root = Just (leafNode k1 value) })
+...                                | Just n2  = findNode tree (nodeStack tree) (leafNode k1 value) n2 (\ tree1 s n1 -> insertNode tree1 s n1 next)
+
+getRedBlackTree : {n m : Level } {a k si : Set n} {t : Set m} -> RedBlackTree {n} {m} {t} a k si -> k -> (RedBlackTree {n} {m} {t} a k si -> (Maybe (Node a k)) -> t) -> t
+getRedBlackTree {_} {_} {a} {k} {_} {t} tree k1 cs = checkNode (root tree)
+  where
+    checkNode : Maybe (Node a k) -> t
+    checkNode Nothing = cs tree Nothing
+    checkNode (Just n) = search n
+      where
+        search : Node a k -> t
+        search n with compare tree k1 (key n)
+        search n | LT = checkNode (left n)
+        search n | GT = checkNode (right n)
+        search n | EQ = cs tree (Just n)
diff -r a4cab67624f7 -r 9146d6017f18 src/RedBlackTree.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/RedBlackTree.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,602 @@
+#include <stdio.h>
+
+#include "../context.h"
+#interface "Tree.h"
+#interface "Stack.h"
+
+extern enum Relational compare(struct Node* node1, struct Node* node2);
+
+Tree* createRedBlackTree(struct Context* context) {
+    struct Tree* tree = new Tree();
+    struct RedBlackTree* redBlackTree = new RedBlackTree();
+    tree->tree = (union Data*)redBlackTree;
+    redBlackTree->root = NULL;
+    redBlackTree->nodeStack = createSingleLinkedStack(context);
+    tree->put = C_putRedBlackTree;
+    tree->get = C_getRedBlackTree;
+    tree->remove = C_removeRedBlackTree;
+    // tree->clear = C_clearRedBlackTree;
+    return tree;
+}
+
+void printTree1(union Data* data) {
+    struct Node* node = &data->Node;
+    if (node == NULL) {
+        printf("NULL");
+    } else {
+        printf("key = %d (", node->key);
+        printTree1((union Data*)(node->right));
+        printf("), (");
+        printTree1((union Data*)(node->left));
+        printf(")");
+    }
+}
+
+void printTree(union Data* data) {
+    printTree1(data);
+    printf("\n");
+}
+
+__code putRedBlackTree(struct RedBlackTree* tree, struct Node* node) {
+    struct Node* newNode = &ALLOCATE(context, Node)->Node;
+    struct Node* root = tree->root;
+    printTree((union Data*)(tree->root));
+    tree->newNode = newNode;
+    tree->root = newNode; // this should done at stackClear
+    tree->parent = NULL;
+    if (root) {
+        tree->current = root;
+        tree->result = compare(tree->current, node);
+        tree->findNodeNext = C_insertNode;
+        goto findNode(tree);
+    }
+    goto insertNode(tree, node);
+}
+
+__code findNode(struct RedBlackTree* tree) {
+    struct Stack* nodeStack = tree->nodeStack;
+    struct Node* oldNode = tree->current;
+    struct Node* newNode = tree->newNode;
+    tree->previous = newNode;
+    *newNode = *oldNode;
+    goto nodeStack->push((union Data*)newNode, findNode1);
+}
+
+__code findNode1(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
+    struct Node* oldNode = tree->current;
+    struct Node* newNode = tree->previous;
+    struct Node* newnewNode = &ALLOCATE(context, Node)->Node;
+    int result = tree->result;
+    if (result == EQ) {
+        newNode->value = node->value;
+        // go to stack clear
+        goto next(...);
+    } else if (result == GT) {
+        tree->current = oldNode->right;
+        newNode->right = newnewNode;
+    } else {
+        tree->current = oldNode->left;
+        newNode->left = newnewNode;
+    }
+    tree->newNode = newnewNode;
+    if (tree->current) {
+        tree->result = compare(tree->current, node);
+        goto findNode(tree);
+    }
+    goto meta(context, tree->findNodeNext);
+    //   gato tree->findNodeNext(tree, node);
+    
+}
+
+__code insertNode(struct RedBlackTree* tree, struct Node* node) {
+    struct Stack* nodeStack = tree->nodeStack;
+    struct Node* newNode = tree->newNode;
+    *newNode = *node;
+    newNode->color = Red;
+    tree->current = newNode;
+    goto nodeStack->get2(insertCase1);
+}
+
+__code insertCase1(struct RedBlackTree* tree, struct Node *parent, struct Node *grandparent) {
+    if (parent != NULL) {
+        tree->parent = parent;
+        tree->grandparent = grandparent;
+        goto insertCase2(tree);
+    }
+    tree->root->color = Black;
+    goto stackClear();
+}
+
+__code insertCase1_stub(struct Context* context) {
+    goto insertCase1(context, 
+        &Gearef(context, Tree)->tree->Tree.tree->RedBlackTree,
+        &context->data[D_Stack]->Stack.data->Node,
+        &context->data[D_Stack]->Stack.data1->Node);
+}
+
+__code insertCase2(struct RedBlackTree* tree) {
+    if (tree->parent->color == Black) {
+        goto stackClear();
+    }
+    goto insertCase3(tree);
+}
+
+__code insertCase3(struct RedBlackTree* tree) {
+    struct Stack* nodeStack = tree->nodeStack;
+    struct Node* uncle;
+
+    if (tree->grandparent->left == tree->parent) {
+        uncle = tree->grandparent->right;
+    } else {
+        uncle = tree->grandparent->left;
+    }
+
+    if (uncle && (uncle->color == Red)) {
+        // do insertcase1 on grandparent, stack must be pop by two
+        tree->parent->color = Black;
+        uncle->color = Black;
+        tree->grandparent->color = Red;
+        tree->current = tree->grandparent;
+        goto nodeStack->pop2(insertCase1);
+    }
+    goto insertCase4();
+}
+
+__code insertCase4(struct RedBlackTree* tree, struct RotateTree* rotateTree) {
+    struct Stack* nodeStack = tree->nodeStack;
+
+    if ((tree->current == tree->parent->right) && (tree->parent == tree->grandparent->left)) {
+        tree->current = tree->current->left;
+        tree->parent = tree->grandparent;
+
+        rotateTree->traverse = tree;
+        rotateTree->next = C_insertCase5;
+
+        goto nodeStack->pop(rotateLeft);
+    } else if ((tree->current == tree->parent->left) && (tree->parent == tree->grandparent->right)) {
+        tree->parent = tree->grandparent;
+        tree->current = tree->current->right;
+
+        rotateTree->traverse = tree;
+        rotateTree->next = C_insertCase5;
+
+        goto nodeStack->pop(rotateRight);
+    }
+
+    goto insertCase5();
+}
+
+__code insertCase5(struct RedBlackTree* tree) {
+    struct Stack* nodeStack = tree->nodeStack;
+    goto nodeStack->pop2(insertCase51);
+}
+
+__code insertCase51(struct RedBlackTree* tree, struct RotateTree* rotateTree, struct Node* parent, struct Node* grandparent) {
+    struct Node* current = tree->current;
+    tree->parent = parent;
+    tree->grandparent = grandparent;
+
+    parent->color = Black;
+    grandparent->color = Red;
+
+    tree->current = grandparent;
+
+    rotateTree->traverse = tree;
+    rotateTree->next = C_stackClear;
+
+    if ((current == parent->left) && (parent == grandparent->left)){
+        goto rotateRight();
+    } else {
+        goto rotateLeft();
+    }
+}
+
+__code insertCase51_stub(struct Context* context) {
+    struct Node* parent = &context->data[D_Stack]->Stack.data->Node;
+    struct Node* grandparent = &context->data[D_Stack]->Stack.data1->Node;
+    goto insertCase51(context,
+                      &Gearef(context, Tree)->tree->Tree.tree->RedBlackTree,
+                      Gearef(context, RotateTree),
+                      parent,
+                      grandparent);
+}
+
+__code rotateLeft(struct RedBlackTree* tree) {
+    struct Stack* nodeStack = tree->nodeStack;
+    goto nodeStack->get(rotateLeft1);
+}
+
+__code rotateLeft_stub(struct Context* context) {
+    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
+    goto rotateLeft(context, traverse);
+}
+    
+__code rotateLeft1(struct Node* node, struct RedBlackTree* tree, struct Node* parent, struct RotateTree* rotateTree) {
+    struct Node* tmp = node->right;
+
+    if (parent) {
+        if (node == parent->left)
+            parent->left = tmp;
+        else
+            parent->right = tmp;
+    } else {
+        tree->root = tmp;
+    }
+
+    node->right = tmp->left;
+    tmp->left = node;
+    tree->current = tmp;
+    
+    goto meta(context, rotateTree->next);
+}
+
+__code rotateLeft1_stub(struct Context* context) {
+    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
+    struct Node* parent = &context->data[D_Stack]->Stack.data->Node;
+    goto rotateLeft1(context,
+                    traverse->current,
+                    traverse,
+                    parent,
+                    Gearef(context, RotateTree));
+}
+
+__code rotateRight(struct RedBlackTree* tree) {
+    struct Stack* nodeStack = tree->nodeStack;
+    goto nodeStack->get(rotateRight1);
+}
+
+__code rotateRight_stub(struct Context* context) {
+    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
+    goto rotateLeft(context, traverse);
+}
+
+__code rotateRight1(struct Node* node, struct RedBlackTree* traverse,struct Node *parent,struct RotateTree *rotateTree) {
+    struct Node* tmp = node->left;
+    
+    if (parent) {
+        if (node == parent->left)
+            parent->left = tmp;
+        else
+            parent->right = tmp;
+    } else {
+        traverse->root = tmp;
+    }
+
+    node->left = tmp->right;
+    tmp->right = node;
+    traverse->current = tmp;
+    
+    goto meta(context, rotateTree->next);
+}
+
+__code rotateRight1_stub(struct Context* context) {
+    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
+    struct Node* parent = &context->data[D_Stack]->Stack.data->Node;
+    goto rotateRight1(context,
+                     traverse->current,
+                     traverse,
+                     parent,
+                     Gearef(context, RotateTree));
+}
+
+__code stackClear(struct RedBlackTree* tree, struct Stack* nodeStack, __code next(...)) {
+    tree->current = 0;
+    nodeStack->stack = (union Data*)tree->nodeStack;
+    nodeStack->next = next;
+    goto meta(context, tree->nodeStack->clear);
+}
+
+__code getRedBlackTree(struct RedBlackTree* tree, __code next(...)) {
+    if (tree->root) {
+        tree->current = tree->root;
+
+        goto search();
+    }
+
+    goto next(...);
+}
+
+__code search(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
+    // compare(context, traverse, traverse->current->key, node->key);
+    tree->result = compare(tree->current, node);
+    if (tree->result == EQ) {
+        *node = *tree->current;
+        
+        goto meta(context, next);
+    } else if (tree->result == GT) {
+        tree->current = tree->current->right;
+    } else {
+        tree->current = tree->current->left;
+    }
+        
+    if (tree->current) {
+        goto meta(context, C_search);
+    }
+
+    goto next(...);
+}
+
+
+__code removeRedBlackTree(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
+    struct Node* newNode = &ALLOCATE(context, Node)->Node;
+    struct Node* root = tree->root;
+    printTree((union Data*)(tree->root));
+    tree->newNode = newNode;
+    tree->root = newNode; // this should done at stackClear
+    tree->parent = NULL;
+    if (root) {
+        tree->current = root;
+        tree->result = compare(tree->current, node);
+        tree->findNodeNext = C_replaceNodeForDelete2;
+        goto findNode(tree);
+    }
+    goto next(...);
+}
+
+
+
+__code delete2(struct Node* current) {
+    if (current->color == Black) {
+        struct Node* child = current->right == NULL ? current->left : current->right;
+        current->color = child == NULL ? Black : child->color;
+
+        goto deleteCase1(current);
+    }
+
+    goto delete3(tree, current);
+}
+
+
+
+__code delete3(struct RedBlackTree* tree, struct Node* current, __code next(...)) {
+    struct Node* tmp = current->right == NULL ? current->left : current->right;
+    struct Stack* nodeStack = tree->nodeStack;
+
+    if (tree->parent) {
+        if (current == tree->parent->left)
+            tree->parent->left = tmp;
+        else
+            tree->parent->right = tmp;
+    } else {
+        tree->root = tmp;
+    }
+
+
+    if (tree->parent == NULL && tmp) {
+        tmp->color = Black;
+    }
+
+    current == tree->parent->left ? (tree->parent->left = NULL) : (tree->parent->right = NULL);
+
+    Gearef(context, Stack)->stack = (union Data*) nodeStack;
+    Gearef(context, Stack)->next = next;
+    goto meta(context, nodeStack->pop);
+
+//    gato nodeStack->pop(next);
+}
+
+
+
+__code replaceNodeForDelete2(struct RedBlackTree* tree, struct Node* newNode) {
+    if (tree->current->left && tree->current->right) {
+        tree->parent = newNode;
+        tree->current = newNode->left;
+        newNode->left = context->heap;
+
+
+        tree->parent = newNode;
+        
+        goto findMax1(tree,oldNode, newNode);
+    }
+
+    goto delete2(current);
+}
+
+
+__code findMax1(struct RedBlackTree* tree, struct Node* oldNode, struct Node* newNode) {
+    *newNode = *oldNode;
+
+    if (newNode->right) {
+        goto findMax2(tree, oldNode, newNode);
+    }
+    
+    tree->current = newNode;
+
+    goto delete2(current);
+}
+
+
+    
+
+__code findMax2(struct RedBlackTree* tree, struct Node* oldNode, struct Node* newNode) {
+    *newNode = *oldNode;
+
+    if (newNode->right->right) {
+        tree->current = newNode->right;
+        newNode->right = context->heap;
+
+        tree->parent = newNode;
+        
+        goto findMax2(tree, oldNode, newNode);
+    }
+
+    tree->current = newNode;
+    
+    goto delete2(tree,current);
+}
+    
+
+__code deleteCase1(struct RedBlackTree* tree, struct Node* current) {
+    if (tree->parent) {
+        goto deleteCase2(tree,current);
+    }
+
+    goto delete3(tree, current);
+}
+
+
+
+__code deleteCase2(struct RedBlackTree* tree, struct Node* current, struct RotateTree* rotateTree) {
+    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
+    struct Stack* nodeStack = tree->nodeStack;
+    
+    if ((sibling == NULL ? Black : sibling->color) == Red) {
+        tree->parent->color = Red;
+        sibling->color = Black;
+
+        current == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
+
+        struct Node* node = sibling;
+        
+        tree->current = tree->parent;
+
+        rotateTree->traverse = tree;
+        rotateTree->next = C_deleteCase3;
+
+        if (current == tree->parent->left) {
+            goto nodeStack->push((union Data*)node,rotateLeft);
+        } else {
+            goto nodeStack->push((union Data*)node,rotateRight);
+        }
+
+        goto deleteCase3(tree,current);
+    }
+}
+
+
+
+__code deleteCase3(struct RedBlackTree* tree, struct Node* current) {
+    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
+    
+    if (tree->parent->color == Black &&
+        (sibling == NULL ? Black : sibling->color) == Black &&
+        (sibling->left == NULL ? Black : sibling->left->color) == Black &&
+        (sibling->right == NULL ? Black : sibling->right->color) == Black) {
+        sibling->color = Red;
+
+        tree->current = tree->parent;
+        goto deleteCase1(current);
+    }
+
+    goto deleteCase4(current);
+}
+
+
+
+__code deleteCase4(struct RedBlackTree* tree,struct Node* current) {
+    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
+    
+    if (tree->parent->color == Red &&
+        (sibling == NULL ? Black : sibling->color) == Black &&
+        (sibling->left == NULL ? Black : sibling->left->color) == Black &&
+        (sibling->right == NULL ? Black : sibling->right->color) == Black) {
+        sibling->color = Red;
+        tree->parent->color = Black;
+
+        goto delete3(tree,current);
+    }
+
+    goto deleteCase5(tree,current);
+}
+
+
+
+__code deleteCase5(struct RedBlackTree* tree, struct Node* current, struct RotateTree* rotateTree) {
+    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
+    struct Stack* nodeStack = tree->nodeStack;
+    // sibling->parent = tree->parent;
+    
+    if (current == tree->parent->left &&
+        (sibling == NULL ? Black : sibling->color) == Black &&
+        (sibling->left == NULL ? Black : sibling->left->color) == Red &&
+        (sibling->right == NULL ? Black : sibling->right->color) == Black) {
+        sibling->color = Red;
+        sibling->left->color = Black;
+        
+        // sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap);
+        sibling == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
+
+        struct Node* node = new Node();
+        node = sibling->left;
+
+        struct Node* tmp = node;
+        *tmp = *sibling;
+        tree->parent = current;
+        
+        tmp->left = context->heap;
+/*         struct Node* node = new Node(); */
+/*         node = *sibling->left; */
+        tree->parent = tmp;
+
+        tree->current = tmp;
+        
+
+        rotateTree->traverse = tree;
+        rotateTree->next = C_deleteCase6;
+
+        goto nodeStack->push((union Data*)node,rotateRight);
+    } else if (current == tree->parent->right &&
+               (sibling == NULL ? Black : sibling->color) == Black &&
+               (sibling->left == NULL ? Black : sibling->left->color) == Black &&
+               (sibling->right == NULL ? Black : sibling->right->color) == Red) {
+        sibling->color = Red;
+        sibling->right->color = Black;
+
+        sibling == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
+
+        struct Node* node = new Node();
+        node = sibling->right;
+
+        struct Node* tmp = node;
+        *tmp = *sibling;
+        // tmp->parent = current;
+
+        tmp->right = context->heap;
+/*         struct Node* node = new Node(); */
+/*         node = *sibling->right; */
+        //node->parent = tmp;
+
+        tree->current = tmp;
+        
+
+        rotateTree->traverse = tree;
+        rotateTree->next = C_deleteCase6;
+
+        goto nodeStack->push((union Data*)node,rotateLeft);
+    }
+
+    goto deleteCase6(tree,current);
+}
+
+
+__code deleteCase6(struct RedBlackTree* tree, struct Node* current, struct RotateTree* rotateTree) {
+    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
+    struct Stack* nodeStack = tree->nodeStack;
+    sibling == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
+
+    struct Node* tmp = sibling;
+    // *tmp = *sibling;
+    tree->parent = current;
+
+    tmp->color = tree->parent->color;
+    tree->parent->color = Black;
+    
+    
+    if (current == tree->parent->left) {
+        tmp->right->color = Black;
+        tree->current = tree->parent;
+
+        rotateTree->traverse = tree;
+        rotateTree->next = C_delete3;
+
+        goto nodeStack->push((union Data*)tmp,rotateLeft);
+    } else {
+        tmp->left->color = Black;
+        tree->current = tree->parent;
+
+        rotateTree->traverse = tree;
+        rotateTree->next = C_delete3;
+
+        goto nodeStack->push((union Data*)tmp,rotateLeft);
+    }
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/RedBlackTreeReWright.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/RedBlackTreeReWright.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,269 @@
+#include <stdio.h>
+
+#include "../context.h"
+#include "../compare.c"
+#interface "Tree.h"
+#interface "Stack.h"
+
+extern enum Relational compare(struct Node* node1, struct Node* node2);
+
+
+Tree* createRedBlackTree(struct Context* context) {
+    struct Tree* tree = new Tree();
+    struct RedBlackTree* rbtree = new RedBlackTree();
+
+    tree->tree = (union Data*)rbtree;
+     rbtree->root = NULL;
+     rbtree->nodeStack = (union Data*)createSingleLinkedStack(context);
+     tree->put = C_putRedBlackTree;
+    // tree->get = C_getRedBlackTree;
+    // tree->remove = C_removeRedBlackTree;
+    // tree->clear = C_clearRedBlackTree;
+     return tree;
+}
+
+void printNode(struct Node* node) {
+  if (node == NULL) {
+    printf("leaf");
+  } else {
+    printf("((%d,%d (",node->color, node->key);
+    printNode(node->right);
+    printf(") (");
+    printNode(node->left);
+    printf(")");
+  }
+}
+
+void printTree(struct RedBlackTree* tree) {
+  printf("\n");
+  tree->current = tree->root;
+  printNode(tree->current);
+  printf(")\n");
+}
+
+__code putRedBlackTree(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
+    printf("C_putRedBlackTree\n");
+    printf("value->%d,key->%d \n",node->value,node->key);
+    tree->previous = tree->newNode;
+    tree->newNode = node;
+    tree->newNode->color = Red;
+    tree->current = tree->root;
+    goto insertRBTree(node, tree);
+}
+
+__code stackClear(struct RedBlackTree* tree, struct Stack* nodeStack, __code next(...)) {
+   tree->current = 0;
+   nodeStack->stack = tree->nodeStack;
+   nodeStack->next = next;
+   goto meta(context, tree->nodeStack->clear);
+  }
+
+__code getRedBlackTree(struct RedBlackTree* tree, __code next(...)) {
+    if (tree->root) {
+        tree->current = tree->root;
+        goto insertRBTree();
+        // goto deleteRBTree();
+      }
+    goto next(...);
+}
+
+__code insertRBTree(struct Node* node, struct RedBlackTree* tree, struct Stack* stack, __code next(...)) {
+  // first case tree->current = root;
+  printf("C_insertRBTree\n");
+  printf("value->%d,key->%d\n",node->value,node->key);
+  printf("newNode value->%d,newNode key->%d\n",tree->newNode->value,tree->newNode->key);
+
+  if (tree->root == NULL) {
+    printf("insertRBTree_root eq NULL\n");
+    tree->root = tree->newNode;
+    tree->root->color = Black;
+    printf("tree->root->key = %d, tree->root->color = %d \n",tree->root->key,tree->root->color);
+    printTree(tree);
+    goto next(tree,...);
+  } else {
+    goto searchInsertLocation(node, tree, stack);
+  }
+}
+
+__code insertRBTree_stub(struct Context* context) {
+	Node* node = Gearef(context, Tree)->node;
+	RedBlackTree* tree = (RedBlackTree*)GearImpl(context, Tree, tree);
+	Stack* stack = createSingleLinkedStack(context);
+	enum Code next = Gearef(context, Tree)->next;
+	goto insertRBTree(context, node, tree, stack, next);
+} 
+
+__code searchInsertLocation(struct Node* node, struct RedBlackTree* tree) {
+  // first case tree->current = root; PreCase remove root=NULL case.don't exist firstCase tree->current=NULL
+  printf("C_searchInsertLocation\n");
+  printf("nownode->key %d , previous->key %d \n",tree->newNode->key,tree->previous->key);
+
+  tree->result = compare(tree->current, node);
+  printf("tree->current->key = %d, node->key %d\n",tree->current->key,node->key);
+  printf("compare (%d,%d)\n",tree->current,node);
+
+  Stack* stack = tree->nodeStack;
+
+  if (tree->current == NULL) {
+    printf("goto insertLocationBackInsert stack->pop\n");
+    goto stack->pop(insertLocationBackInsert);
+  }
+  if (tree->result == GT) {
+    printf("GT searchInsertLocation\n");
+    tree->current = tree->current->right;
+    goto stack->push(tree->newNode,insertLocationBackInsert);
+  } else if (tree->result == LT) {
+    printf("LT searchInsertLocation\n");
+    tree->current = tree->current->left;
+    goto stack->push(tree->newNode, searchInsertLocation);
+  } else if (tree->result == EQ) {
+    printf("already member this node : __code searchInsertLocation()\n");
+    goto meta(context, C_exit_code);
+  } else {
+    printf("$insert value tree : __code searchInsertLocation() \n");
+    goto meta(context, C_exit_code);
+  }
+}
+
+__code searchInsertLocation_stub(struct Context* context) {
+	Node* node = Gearef(context, Tree)->node;
+	RedBlackTree* tree = (RedBlackTree*)GearImpl(context, Tree, tree);
+  Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
+	goto searchInsertLocation(context, node, tree);
+}
+
+__code insertLocationBackInsert(struct RedBlackTree* tree, struct Node* node, struct Stack* stack) {
+  printf("C_insertLocationBackInsert\n");
+  struct Node* hoge = stack->data;
+  printf("stackpopdata%d\n",stack->data);
+  tree->current = tree->previous;
+  // tree->current = nodeStack->data;
+  // this CS is ones only backTrace, and insert node
+  tree->result = compare(tree->previous,tree->newNode);
+  printf("back,compare\n");
+  if (tree->result == GT) {
+    printf("GT\n");
+    tree->current->right = tree->newNode;
+    printTree(tree);
+    goto insertBalance(tree, stack, node, next);
+  } else if (tree->result == LT) {
+    printf("LT\n");
+    tree->current->left = tree->newNode;
+    goto insertBalance(tree, stack, node, next);
+  } else {
+    printf("error : __code insertLocationBackTrace() \n");
+    goto meta(context, C_exit_code);
+  }
+}
+
+__code insertLocationBackInsert_stub(struct Context* context) {
+	RedBlackTree* tree = (RedBlackTree*)GearImpl(context, Tree, tree);
+  SingleLinkedStack* singleLinkedStack = (SingleLinkedStack*)GearImpl(context, Stack, stack);
+	Node* node = Gearef(context, Tree)->node;
+  Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
+	goto insertLocationBackInsert(context, tree, node, stack);
+}
+
+__code insertBalance(struct RedBlackTree* tree, struct Node* nodeStack, struct Node* node, __code next(...)) {
+  printf("C_insertBalance\n");
+  struct Node* traceNode = tree->nodeStack->data;
+  tree->current = traceNode;
+  struct Stack* stack = tree->nodeStack;
+
+  // exit insertion code
+  if (tree->current == tree->root) {
+    tree->current->color = Black;
+    printTree(tree);
+    //printTree
+    goto next(tree,...);
+  }
+
+
+  //current color eq Red
+  if (tree->current->color == Red)
+    goto stack->pop(insertBalance);
+
+  // current color eq Black
+  if (tree->current->left->left || tree->current->left->right) {
+    goto insertBalanceLeft(tree,nodeStack);
+  } else if (tree->current->right->left || tree->current->right->right) {
+    goto insertBalanceRight(tree,nodeStack);
+  } else {
+    goto stack->pop(insertBalance);
+  }
+}
+
+__code insertBalanceLeft(struct RedBlackTree* tree, struct Node* nodeStack, struct Node* node) {
+  printf("C_insertBalanceLeft\n");
+  struct Stack* stack = tree->nodeStack;
+
+  if (tree->current->color == Black && tree->current->left->color == Red && tree->current->left->left->color == Red) {
+    struct Node* tmpCurrent  = tree->current;
+    struct Node* tmpLeft     = tree->current->left;
+    struct Node* tmpLeftLeft = tree->current->left->left;
+
+    tree->current = tmpLeft;
+    tree->current->right = tmpCurrent;
+    tree->current->left = tmpLeftLeft;
+    tree->current->right->left = tmpLeft->right;
+    tree->current->color = Red;
+    tree->current->left->color = Black;
+    tree->current->right->color = Black;
+    goto stack->pop(insertBalance);
+
+  } else if (tree->current->color == Black && tree->current->left->color == Red && tree->current->left->right->color == Red) {
+    struct Node* tmpCurrent   = tree->current;
+    struct Node* tmpLeft      = tree->current->left;
+    struct Node* tmpLeftRight = tree->current->left->right;
+
+    tree->current = tmpLeft;
+    tree->current->right = tmpCurrent;
+    tree->current->left = tmpLeftRight;
+    tree->current->right->left = tmpLeft->left;
+    tree->current->color = Red;
+    tree->current->left->color = Black;
+    tree->current->right->color = Black;
+    goto stack->pop(insertBalance);
+
+  }
+}
+
+__code insertBalanceRight(struct RedBlackTree* tree, struct Node* nodeStack, struct Node* node) {
+  printf("C_insertBalanceLeft\n");
+  struct Stack* stack = tree->nodeStack;
+
+  if (tree->current->color == Black && tree->current->right->color == Red && tree->current->right->right->color == Red) {
+    struct Node* tmpCurrent    = tree->current;
+    struct Node* tmpRight      = tree->current->right;
+    struct Node* tmpRightRight = tree->current->right->right;
+
+    tree->current = tmpRight;
+    tree->current->left = tmpCurrent;
+    tree->current->right = tmpRightRight;
+    tree->current->left->right = tmpRight->left;
+    tree->current->color = Red;
+    tree->current->left->color = Black;
+    tree->current->right->color = Black;
+    goto stack->pop(insertBalance);
+
+  } else if (tree->current->color == Black && tree->current->right->color == Red && tree->current->right->left->color == Red) {
+
+    struct Node* tmpCurrent = tree->current;
+    struct Node* tmpRight = tree->current->right;
+    struct Node* tmpRightLeft = tree->current->right->left;
+
+    tree->current = tmpRight;
+    tree->current->right = tmpCurrent;
+    tree->current->left = tmpRightLeft;
+    tree->current->left->right = tmpRight->right;
+    tree->current->color = Red;
+    tree->current->left->color = Black;
+    tree->current->right->color = Black;
+    goto stack->pop(insertBalance);
+
+  } else {
+    printf("unkwon error : __code insertBalanceRight() \n");
+    goto meta(context, C_exit_code);
+  }
+}
+// insertCode end
diff -r a4cab67624f7 -r 9146d6017f18 src/Semaphore.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Semaphore.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,6 @@
+typedef struct Semaphore<Impl>{
+        union Data* semaphore;
+        __code p(Impl* semaphore, __code next(...)); 
+        __code v(Impl* semaphore, __code next(...)); 
+        __code next(...);
+} Semaphore;
diff -r a4cab67624f7 -r 9146d6017f18 src/SemaphoreImpl.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SemaphoreImpl.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,37 @@
+#include "../context.h"
+#interface "semaphore.h"
+
+Semaphore* createSemaphoreImpl(struct Context* context, int n) {
+    struct Semaphore* semaphore = new Semaphore();
+    struct SemaphoreImpl* semaphoreImpl = new SemaphoreImpl();
+    semaphore->semaphore = (union Data*)semaphoreImpl;
+    semaphoreImpl->value =  n;
+    pthread_mutex_init(&semaphoreImpl->mutex, NULL);
+    pthread_cond_init(&semaphoreImpl->cond, NULL);
+    semaphore->p = C_pOperationSemaphoreImpl;
+    semaphore->v = C_vOperationSemaphoreImpl;
+    return semaphore;
+}
+
+__code pOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
+    pthread_mutex_lock(&semaphore->mutex);
+    goto meta(context, C_pOperationSemaphoreImpl1);
+}
+
+__code pOperationSemaphoreImpl1(struct SemaphoreImpl* semaphore, __code next(...)) {
+    if (semaphore->value == 0) {
+        pthread_cond_wait(&semaphore->cond, &semaphore->mutex);
+        goto meta(context, C_pOperationSemaphoreImpl1);
+    }
+    semaphore->value--;
+    pthread_mutex_unlock(&semaphore->mutex);
+    goto next(...);
+}
+
+__code vOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
+    pthread_mutex_lock(&semaphore->mutex);
+    semaphore->value++;
+    pthread_cond_signal(&semaphore->cond);
+    pthread_mutex_unlock(&semaphore->mutex);
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/SingleLinkedQueue.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SingleLinkedQueue.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,68 @@
+#include "../context.h"
+#include <stdio.h>
+#interface "Queue.h"
+// use "Node.h"
+// use "Element.h"
+
+Queue* createSingleLinkedQueue(struct Context* context) {
+    struct Queue* queue = new Queue();
+    struct SingleLinkedQueue* singleLinkedQueue = new SingleLinkedQueue();
+    queue->queue = (union Data*)singleLinkedQueue;
+    singleLinkedQueue->top  = new Element();
+    singleLinkedQueue->last = singleLinkedQueue->top;
+    queue->take  = C_takeSingleLinkedQueue;
+    queue->put  = C_putSingleLinkedQueue;
+    queue->isEmpty = C_isEmptySingleLinkedQueue;
+    queue->clear = C_clearSingleLinkedQueue;
+    return queue;
+}
+
+void printQueue1(union Data* data) {
+    struct Node* node = &data->Element.data->Node;
+    if (node == NULL) {
+        printf("NULL");
+    } else {
+        printf("key = %d ,", node->key);
+        printQueue1((union Data*)data->Element.next);
+    }
+}
+
+void printQueue(union Data* data) {
+    printQueue1(data);
+    printf("\n");
+}
+
+__code clearSingleLinkedQueue(struct SingleLinkedQueue* queue, __code next(...)) {
+    queue->top = NULL;
+    goto next(...);
+}
+
+__code putSingleLinkedQueue(struct SingleLinkedQueue* queue, union Data* data, __code next(...)) {
+    Element* element = new Element();
+    element->data = data;
+    element->next = NULL;
+    queue->last->next  = element;
+    queue->last = element;
+    goto next(...);
+}
+
+__code takeSingleLinkedQueue(struct SingleLinkedQueue* queue, __code next(union Data* data, ...)) {
+    struct Element* top = queue->top;
+    struct Element* nextElement = top->next;
+    if (queue->top == queue->last) {
+        data = NULL;
+    } else {
+        queue->top = nextElement;
+        data = nextElement->data;
+    }
+    goto next(data, ...);
+}
+
+__code isEmptySingleLinkedQueue(struct SingleLinkedQueue* queue, __code next(...), __code whenEmpty(...)) {
+    if (queue->top == queue->last) {
+        goto whenEmpty(...);
+    } else {
+        goto next(...);
+    }
+}
+
diff -r a4cab67624f7 -r 9146d6017f18 src/SingleLinkedStack.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SingleLinkedStack.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,111 @@
+#include "../context.h"
+#interface "Stack.h"
+#include <stdio.h>
+
+// typedef struct SingleLinkedStack {
+//     struct Element* top;
+// } SingleLinkedStack;
+
+Stack* createSingleLinkedStack(struct Context* context) {
+    struct Stack* stack = new Stack();
+    struct SingleLinkedStack* singleLinkedStack = new SingleLinkedStack();
+    stack->stack = (union Data*)singleLinkedStack;
+    singleLinkedStack->top = NULL;
+    stack->push = C_pushSingleLinkedStack;
+    stack->pop  = C_popSingleLinkedStack;
+    stack->pop2  = C_pop2SingleLinkedStack;
+    stack->get  = C_getSingleLinkedStack;
+    stack->get2  = C_get2SingleLinkedStack;
+    stack->isEmpty = C_isEmptySingleLinkedStack;
+    stack->clear = C_clearSingleLinkedStack;
+    return stack;
+}
+
+void printStack1(union Data* data) {
+    struct Node* node = &data->Element.data->Node;
+    if (node == NULL) {
+        printf("NULL");
+    } else {
+        printf("key = %d ,", node->key);
+        printStack1((union Data*)data->Element.next);
+    }
+}
+
+void printStack(union Data* data) {
+    printStack1(data);
+    printf("\n");
+}
+
+__code clearSingleLinkedStack(struct SingleLinkedStack* stack,__code next(...)) {
+    stack->top = NULL;
+    goto next(...);
+}
+
+__code pushSingleLinkedStack(struct SingleLinkedStack* stack, union Data* data, __code next(...)) {
+    Element* element = new Element();
+    element->next = stack->top;
+    element->data = data;
+    stack->top = element;
+    goto next(...);
+}
+
+__code popSingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, ...)) {
+    if (stack->top) {
+        data = stack->top->data;
+        stack->top = stack->top->next;
+    } else {
+        data = NULL;
+    }
+    goto next(data, ...);
+}
+
+__code pop2SingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, union Data* data1, ...)) {
+    if (stack->top) {
+        data = stack->top->data;
+        stack->top = stack->top->next;
+    } else {
+        data = NULL;
+    }
+    if (stack->top) {
+        data1 = stack->top->data;
+        stack->top = stack->top->next;
+    } else {
+        data1 = NULL;
+    }
+    goto next(data, data1, ...);
+}
+
+
+__code getSingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, ...)) {
+    if (stack->top) {
+        data = stack->top->data;
+    } else {
+        data = NULL;
+    }
+    goto next(data, ...);
+}
+
+__code get2SingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, union Data* data1, ...)) {
+    if (stack->top) {
+        data = stack->top->data;
+        if (stack->top->next) {
+            data1 = stack->top->next->data;
+        } else {
+            data1 = NULL;
+        }
+    } else {
+        data = NULL;
+        data1 = NULL;
+    }
+    goto next(data, data1, ...);
+}
+    
+__code isEmptySingleLinkedStack(struct SingleLinkedStack* stack, __code next(...), __code whenEmpty(...)) {
+    if (stack->top) {
+        goto next(...);
+    } else {
+        goto whenEmpty(...);
+    }
+}
+
+
diff -r a4cab67624f7 -r 9146d6017f18 src/SpinLock.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SpinLock.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,32 @@
+#include "../context.h"
+#interface "Atomic.h"
+#interface "Lock.h"
+
+Lock* createSpinLock(struct Context* context) {
+    struct Lock* lock = new Lock();
+    struct SpinLock* spinLock = new SpinLock();
+    spinLock->lock = NULL;
+    spinLock->atomic = createAtomicReference(context);
+    lock->lock = (union Data*)spinLock;
+    lock->doLock = C_doLockSpinLock;
+    lock->doUnlock = C_doUnlockSpinLock;
+    return lock;
+}
+
+__code doLockSpinLock(struct SpinLock* lock, __code next(...)) {
+    struct Atomic* atomic = lock->atomic;
+    goto atomic->checkAndSet(&lock->lock, NULL, 1, doLockSpinLock1, doLockSpinLock);
+}
+
+__code doLockSpinLock1(struct SpinLock* lock, __code next(...)) {
+    lock->lockContext = context;
+    goto next(...);
+}
+
+__code doUnlockSpinLock(struct SpinLock* lock, __code next(...)) {
+    if (lock->lockContext == context) {
+        struct Atomic* atomic = lock->atomic;
+        goto atomic->checkAndSet(&lock->lock, 1, NULL, next(...), doUnlockSpinLock);
+    }
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/Stack.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Stack.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,17 @@
+typedef struct Stack<Type, Impl>{
+        union Data* stack;
+        union Data* data;
+        union Data* data1;
+        /* Type* stack; */
+        /* Type* data; */
+        /* Type* data1; */
+        __code whenEmpty(...);
+        __code clear(Impl* stack,__code next(...));
+        __code push(Impl* stack,Type* data, __code next(...));
+        __code pop(Impl* stack, __code next(Type* data, ...));
+        __code pop2(Impl* stack, __code next(Type* data, Type* data1, ...));
+        __code isEmpty(Impl* stack, __code next(...), __code whenEmpty(...));
+        __code get(Impl* stack, __code next(Type* data, ...));
+        __code get2(Impl* stack, __code next(Type* data, Type* data1, ...));
+        __code next(...);
+} Stack;
diff -r a4cab67624f7 -r 9146d6017f18 src/SynchronizedQueue.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/SynchronizedQueue.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,96 @@
+#include "../context.h"
+#interface "Queue.h"
+#interface "Atomic.h"
+
+#include <stdio.h>
+
+/*
+ * Non-blocking queue of Paper: Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms(https://www.research.ibm.com/people/m/michael/podc-1996.pdf).
+ */
+
+Queue* createSynchronizedQueue(struct Context* context) {
+    struct Queue* queue = new Queue();
+    struct SynchronizedQueue* synchronizedQueue = new SynchronizedQueue();
+    synchronizedQueue->top = new Element(); // allocate a free node
+    synchronizedQueue->top->next = NULL;
+    synchronizedQueue->last = synchronizedQueue->top;
+    synchronizedQueue->atomic = createAtomicReference(context);
+    queue->queue = (union Data*)synchronizedQueue;
+    queue->take  = C_takeSynchronizedQueue;
+    queue->put  = C_putSynchronizedQueue;
+    queue->isEmpty = C_isEmptySynchronizedQueue;
+    queue->clear = C_clearSynchronizedQueue;
+    return queue;
+}
+
+__code clearSynchronizedQueue(struct SynchronizedQueue* queue, __code next(...)) {
+    struct Element* top = queue->top;
+    struct Atomic* atomic = queue->atomic;
+    goto atomic->checkAndSet(&queue->top, top, NULL, next(...), clearSynchronizedQueue);
+}
+
+__code putSynchronizedQueue(struct SynchronizedQueue* queue, union Data* data, __code next(...)) {
+    Element* element = new Element();
+    element->data = data;
+    element->next = NULL;
+    Element* last = queue->last;
+    Element* nextElement = last->next;
+    if (last != queue->last) {
+        goto putSynchronizedQueue();
+    }
+    if (nextElement == NULL) {
+        struct Atomic* atomic = queue->atomic;
+        goto atomic->checkAndSet(&last->next, nextElement, element, next(...), putSynchronizedQueue);
+    } else {
+        struct Atomic* atomic = queue->atomic;
+        goto atomic->checkAndSet(&queue->last, last, nextElement, putSynchronizedQueue, putSynchronizedQueue);
+    }
+}
+
+__code takeSynchronizedQueue(struct SynchronizedQueue* queue, __code next(union Data* data, ...)) {
+    struct Element* top = queue->top;
+    struct Element* last = queue->last;
+    struct Element* nextElement = top->next;
+    if (top != queue->top) {
+        goto takeSynchronizedQueue();
+    }
+    if (top == last) {
+        if (nextElement != NULL) {
+            struct Atomic* atomic = queue->atomic;
+            goto atomic->checkAndSet(&queue->last, last, nextElement, takeSynchronizedQueue, takeSynchronizedQueue);
+        }
+    } else {
+        struct Atomic* atomic = queue->atomic;
+        goto atomic->checkAndSet(&queue->top, top, nextElement, takeSynchronizedQueue1, takeSynchronizedQueue);
+    }
+    goto takeSynchronizedQueue();
+}
+
+__code takeSynchronizedQueue1(struct SynchronizedQueue* queue, __code next(union Data* data, ...), struct Element* nextElement) {
+    data = nextElement->data;
+    goto next(data, ...);
+}
+
+__code takeSynchronizedQueue1_stub(struct Context* context) {
+	SynchronizedQueue* queue = (SynchronizedQueue*)GearImpl(context, Queue, queue);
+	enum Code next = Gearef(context, Queue)->next;
+	Data** O_data = &Gearef(context, Queue)->data;
+	goto takeSynchronizedQueue1(context,
+                                queue,
+                                next,
+                                O_data,
+                                (struct Element*)Gearef(context, Atomic)->newData);
+}
+
+__code isEmptySynchronizedQueue(struct SynchronizedQueue* queue, __code next(...), __code whenEmpty(...)) {
+    struct Element* top = queue->top;
+    struct Element* last = queue->last;
+    struct Element* nextElement = top->next;
+    if (top != queue->top) {
+        goto isEmptySynchronizedQueue();
+    }
+    if (top == last && nextElement == NULL) {
+        goto whenEmpty(...);
+    }
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/TaskIterator.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/TaskIterator.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,29 @@
+#include "../context.h"
+#interface "Iterator.h"
+#include <stdio.h>
+
+createTaskIterator(struct Context, struct List list) {
+    struct Iterator* iterator = new Iterator();
+    struct TaskIterator* taskIterator = new TaskIterator();
+    iterator->itearot = (union Data*)taskIterator;
+    iterator->exec = C_execTaskIterator;
+    iterator->barrier = C_barrierTaskIterator;
+    taskIterator->taskList = list;
+}
+
+__code execTaskIterator(struct taskIterator* iterator, struct TaskManager* taskManager, struct Context* task, __code next(...)) {
+    if (iterator->list->next == null) {
+        goto next(...);
+    }
+    iterator->list = list->next;
+    struct Context* task = (struct Context*)iterator->list->data;
+    struct TaskManager taskManager = task->taskManager;
+    taskManager->spawn(task, C_execTaskIterator);
+}
+
+__code barrierTaskIterator(struct MultiDimIterator* iterator, struct Context* task, __code next(...), __code whenWait(...)) {
+    if (__sync_fetch_and_sub(&iterator->count, 1) == 1) {
+        goto next(...);
+    }
+    goto whenWait(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/TaskManager.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/TaskManager.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,13 @@
+typedef struct TaskManager<Impl>{
+    union Data* taskManager;
+    struct Context* task;
+    struct Element* taskList;
+    __code spawn(Impl* taskManager, struct Context* task, __code next(...));
+    __code spawnTasks(Impl* taskManagerImpl, struct Element* taskList, __code next1(...));
+    __code setWaitTask(Impl* taskManagerImpl, struct Context* task, __code next(...));
+    __code shutdown(Impl* taskManagerImpl, __code next(...));
+    __code incrementTaskCount(Impl* taskManagerImpl, __code next(...));
+    __code decrementTaskCount(Impl* taskManagerImpl, __code next(...));
+    __code next(...);
+    __code next1(...);
+} TaskManager;
diff -r a4cab67624f7 -r 9146d6017f18 src/TaskManagerImpl.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/TaskManagerImpl.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,206 @@
+#include "../context.h"
+#interface "TaskManager.h"
+#interface "Iterator.h"
+#interface "Queue.h"
+#interface "Worker.h"
+
+#include <stdio.h>
+#include <unistd.h>
+
+void createWorkers(struct Context* context, TaskManagerImpl* taskManager);
+
+TaskManager* createTaskManagerImpl(struct Context* context, int numCPU, int numGPU, int numIO) {
+    struct TaskManager* taskManager = new TaskManager();
+    taskManager->spawnTasks = C_spawnTasksTaskManagerImpl;
+    taskManager->spawn = C_spawnTaskManagerImpl;
+    taskManager->shutdown  = C_shutdownTaskManagerImpl;
+    taskManager->incrementTaskCount = C_incrementTaskCountTaskManagerImpl;
+    taskManager->decrementTaskCount = C_decrementTaskCountTaskManagerImpl;
+    taskManager->setWaitTask = C_setWaitTaskTaskManagerImpl;
+    struct TaskManagerImpl* taskManagerImpl = new TaskManagerImpl();
+    // 0...numIO-1 IOProcessor
+    // numIO...numIO+numGPU-1 GPUProcessor
+    // numIO+numGPU...numIO+numGPU+numCPU-1 CPUProcessor
+    taskManagerImpl->io = 0;
+    taskManagerImpl->gpu = numIO;
+    taskManagerImpl->cpu = numIO+numGPU;
+    taskManagerImpl->maxCPU = numIO+numGPU+numCPU;
+    taskManagerImpl->numWorker = taskManagerImpl->maxCPU;
+    taskManagerImpl->sendGPUWorkerIndex = taskManagerImpl->gpu;
+    taskManagerImpl->sendCPUWorkerIndex = taskManagerImpl->cpu;
+    taskManagerImpl->taskCount = 0;
+    taskManagerImpl->loopCounter = 0;
+    createWorkers(context, taskManagerImpl);
+    taskManager->taskManager = (union Data*)taskManagerImpl;
+    return taskManager;
+}
+
+void createWorkers(struct Context* context, TaskManagerImpl* taskManager) {
+    int i = 0;
+    taskManager->workers = (Worker**)ALLOCATE_PTR_ARRAY(context, Worker, taskManager->maxCPU);
+    for (;i<taskManager->gpu;i++) {
+        Queue* queue = createSynchronizedQueue(context);
+        taskManager->workers[i] = (Worker*)createCPUWorker(context, i, queue);
+    }
+    for (;i<taskManager->cpu;i++) {
+        Queue* queue = createSynchronizedQueue(context);
+#ifdef USE_CUDAWorker
+        taskManager->workers[i] = (Worker*)createCUDAWorker(context, i, queue,0);
+#else
+        taskManager->workers[i] = (Worker*)createCPUWorker(context, i, queue);
+#endif
+    }
+    for (;i<taskManager->maxCPU;i++) {
+        Queue* queue = createSynchronizedQueue(context);
+        taskManager->workers[i] = (Worker*)createCPUWorker(context, i, queue);
+    }
+}
+
+__code spawnTasksTaskManagerImpl(struct TaskManagerImpl* taskManager, struct Element* taskList, __code next1(...)) {
+    taskManager->taskList = taskList;
+    goto spawnTasksTaskManagerImpl1();
+}
+
+__code spawnTasksTaskManagerImpl1(struct TaskManagerImpl* taskManagerImpl, struct TaskManager* taskManager) {
+    if (taskManagerImpl->taskList == NULL) {
+        goto spawnTasksTaskManagerImpl2();
+    }
+    struct Context* task = (struct Context*)taskManagerImpl->taskList->data;
+    taskManagerImpl->taskList = taskManagerImpl->taskList->next;
+    goto taskManager->setWaitTask(task, spawnTasksTaskManagerImpl1);
+}
+
+__code spawnTasksTaskManagerImpl1_stub(struct Context* context) {
+    TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
+    TaskManager* taskManager = &Gearef(context, TaskManager)->taskManager->TaskManager;
+    goto spawnTasksTaskManagerImpl1(context, taskManagerImpl, taskManager);
+}
+
+__code spawnTasksTaskManagerImpl2(struct TaskManagerImpl* taskManager, struct Element* taskList, __code next1(...)) {
+    taskManager->taskList = taskList;
+    goto spawnTasksTaskManagerImpl3();
+}
+
+__code spawnTasksTaskManagerImpl3(struct TaskManagerImpl* taskManagerImpl, __code next1(...), struct TaskManager* taskManager) {
+    if (taskManagerImpl->taskList == NULL) {
+        goto next1(...);
+    }
+    struct Context* task = (struct Context*)taskManagerImpl->taskList->data;
+    taskManagerImpl->taskList = taskManagerImpl->taskList->next;
+    goto taskManager->spawn(task, spawnTasksTaskManagerImpl3);
+}
+
+__code spawnTasksTaskManagerImpl3_stub(struct Context* context) {
+    TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
+    enum Code next1 = Gearef(context, TaskManager)->next1;
+    TaskManager* taskManager = &Gearef(context, TaskManager)->taskManager->TaskManager;
+    goto spawnTasksTaskManagerImpl3(context, taskManagerImpl, next1, taskManager);
+}
+
+__code setWaitTaskTaskManagerImpl(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
+    int i = taskManager->loopCounter;
+    if (task->idg+i < task->maxIdg) {
+        struct Queue* queue = GET_WAIT_LIST(task->data[task->idg + i]);
+        taskManager->loopCounter++;
+        goto queue->put(task, setWaitTaskTaskManagerImpl);
+    }
+    taskManager->loopCounter = 0;
+    goto incrementTaskCountTaskManagerImpl();
+}
+
+__code incrementTaskCountTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...)) {
+    __sync_fetch_and_add(&taskManager->taskCount, 1);
+    goto next(...);
+}
+
+__code decrementTaskCountTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...)) {
+    __sync_fetch_and_sub(&taskManager->taskCount, 1);
+    goto next(...);
+}
+
+__code spawnTaskManagerImpl(struct TaskManagerImpl* taskManagerImpl, struct Context* task, __code next(...), struct TaskManager* taskManager) {
+    task->taskManager = taskManager;
+    if (task->idgCount == 0) {
+        // iterator task is normal task until spawned
+        if (task->iterator != NULL && task->iterate == 0) {
+            pthread_mutex_unlock(&taskManagerImpl->mutex);
+            struct Iterator* iterator = task->iterator;
+            goto iterator->exec(task, taskManagerImpl->cpu - taskManagerImpl->gpu, next(...));
+        }
+        goto taskSend();
+    }
+    pthread_mutex_unlock(&taskManagerImpl->mutex);
+    goto next(...);
+}
+
+__code spawnTaskManagerImpl_stub(struct Context* context) {
+    TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
+    struct Context* task = Gearef(context, TaskManager)->task;
+    TaskManager* taskManager = &Gearef(context, TaskManager)->taskManager->TaskManager;
+    goto spawnTaskManagerImpl(context,
+                              taskManagerImpl,
+                              task,
+                              Gearef(context, TaskManager)->next,
+                              taskManager);
+}
+
+
+__code taskSend(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
+    // set workerId
+    if (task->gpu) {
+        goto taskSend1();
+    } else {
+        goto taskSend2();
+    }
+}
+
+__code taskSend1(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
+    int workerId = taskManager->sendGPUWorkerIndex;
+    if (++taskManager->sendGPUWorkerIndex >= taskManager->cpu) {
+        taskManager->sendGPUWorkerIndex = taskManager->gpu;
+    }
+    pthread_mutex_unlock(&taskManager->mutex);
+    struct Queue* queue = taskManager->workers[workerId]->tasks;
+    goto queue->put(task, next(...));
+}
+
+__code taskSend2(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
+    int workerId = taskManager->sendCPUWorkerIndex;
+    if (++taskManager->sendCPUWorkerIndex >= taskManager->maxCPU) {
+        taskManager->sendCPUWorkerIndex = taskManager->cpu;
+    }
+    pthread_mutex_unlock(&taskManager->mutex);
+    struct Queue* queue = taskManager->workers[workerId]->tasks;
+    goto queue->put(task, next(...));
+}
+
+__code shutdownTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...)) {
+    if (taskManager->taskCount != 0) {
+        usleep(1000);
+        goto shutdownTaskManagerImpl();
+    }
+    int i = taskManager->loopCounter;
+    if (i < taskManager->numWorker) {
+        struct Queue* tasks = taskManager->workers[i]->tasks;
+        goto tasks->put(NULL, shutdownTaskManagerImpl1);
+    }
+
+    taskManager->loopCounter = 0;
+    goto shutdownTaskManagerImpl2();
+}
+
+__code shutdownTaskManagerImpl1(struct TaskManagerImpl* taskManager, __code next(...)) {
+    taskManager->loopCounter++;
+    goto shutdownTaskManagerImpl();
+}
+
+__code shutdownTaskManagerImpl2(struct TaskManagerImpl* taskManager, __code next(...)) {
+    int i = taskManager->loopCounter;
+    if (i < taskManager->numWorker) {
+        pthread_join(taskManager->workers[i]->thread, NULL);
+        taskManager->loopCounter++;
+        goto shutdownTaskManagerImpl2();
+    }
+    taskManager->loopCounter = 0;
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/Timer.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Timer.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,7 @@
+typedef struct Timer<Impl>{
+        union Data* timer;
+        __code start(Impl* timer, __code next(...));
+        __code end(Impl* timer, __code next(...));
+        __code next(...);
+} Timer;
+
diff -r a4cab67624f7 -r 9146d6017f18 src/TimerImpl.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/TimerImpl.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,30 @@
+#include <stdio.h>
+#include <sys/time.h>
+
+#include "../context.h"
+#interface "Timer.h"
+
+Timer* createTimerImpl(struct Context* context) {
+    struct Timer* timer = new Timer();
+    struct TimerImpl* timerImpl = new TimerImpl();
+    timer->timer = (union Data*)timerImpl;
+    timer->start = C_startTimer;
+    timer->end = C_endTimer;
+    return timer;
+}
+
+__code startTimer(struct TimerImpl* timer, __code next(...)) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+
+    timer->time = tv.tv_sec + (double)tv.tv_usec*1e-6;
+
+    goto next(...);
+}
+
+__code endTimer(struct TimerImpl* timer, __code next(...)) {
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    printf("%0.6f\n", (tv.tv_sec+(double)tv.tv_usec*1e-6) - timer->time);
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/Todo
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Todo	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,144 @@
+Fri May  4 20:06:48 JST 2018
+
+    par goto がある code segment は $inParGoto ではなく $hasParGoto にする
+    par goto があるばあいは goto meta ではなく goto parGotoMeta にする
+    taskList の処理も parGotoMeta で行う
+    
+    par goto の遅い理由を調べる
+        多分 Context と Synchronized Queue の生成に時間がかかってる？
+    
+    Perl スクリプトを一つにする
+    Context を生成するモジュールとstubを生成するモジュールをそれぞれオブジェクトとして作る
+    
+    Code Gear のプロトタイプを格納するオブジェクトをつくる
+
+Tue Aug  1 19:32:55 JST 2017
+ 
+    DataGear の待ち合わせ
+    DataGear の Commit
+     
+    これらは、stubとgoto meta の部分で行う
+    
+    どれに対して行うかを実行時あるいはコンパイル時に指定する必要がある
+
+    一つの解決策は、 typedefのときにannotution してあげる
+    もう一つの解決策は, Data Gear の allocation 時に指定する
+    Code Gearのプロトタイプのなかで指定する事も考えられる
+    
+    par goto時に渡す continuation で同期をとっても良い, このときにはこのcontinuation を作成するinterfaceを作る必要がある
+
+    実行時に指定してしまうと、毎回フラグのチェックが必要になる。
+    これを abstract model checking を事前に行うことで, static なコードに置き換える事はできる
+
+    例題としては, chat、dining philosophers, map reduce
+
+Fri Apr 14 18:44:09 JST 2017
+
+    struct B {
+        A a;
+        .....
+    }
+    struct A {
+        __code init(..., __code next(A a, ...));
+    }
+    par goto A->init(a);
+    // meta level
+    task->code = C_init_A;
+    task->data[idg] = ...;
+    task->data[idg + 1] = ...;
+    task->data[odg] = ...;
+    task->next = C_writeToa;
+    goto meta(context, context->TaskManager->spawn)
+
+    // lambda version?
+    par goto A->init(\A -> a = A)
+
+    // meta level
+    par goto A->init(next = \A -> a = A)
+
+Wed Mar  1 18:25:36 JST 2017
+
+    parallel_executtion/test/ を .cbc に書き直す
+    rb_tree の stub をできるだけ取り外す
+    synchornizedQueue の meta部分を分離する
+    synchronizedQueue のバグをとる
+    GPU のバグとり
+    cbc++...?
+
+Sat Jan 28 16:10:28 JST 2017
+
+    stackからpopした後、呼び出される continuation は出力を受けとる。
+    出力を受けとる stub を生成する必要がある。
+    なので、CodeGear が、そのような interface で定義されたものかどうかを調べる必要がある。
+    Stackのnext(やisEmpty)に代入された時点でわかる。なので、あまり自明な見つける方法がない。 
+    引数の異なるnextは異なる名前を持つべきか? 持たなくてもできるが...
+
+         goto next(data, ...);                                       引数で渡された continuation に移動
+         goto nodeStack->push(newNode, replaceNode1);                Interface の呼び出し。(ここで replaceNode1 が stack の戻り値を受けることがわかる。
+         goto replaceNode(traverse, traverse->current, newNode);     普通のgoto
+         goto rotateTree->next(...);                                 DataGearに格納された continuation
+
+    などをチェックする必要がある。これらの型チェックは CbC level では行われない。(CbCはmeta levelだから)
+
+     戻り値の部分は interface に記述させるという手もあるな。
+
+
+Sun Jan 22 20:11:28 JST 2017
+
+    TaskManagerから必要なCPUWorkerを生成する
+    WorkerはcreateWorker時に新しくthreadを作る
+    
+    TaskManager->createTaskで新しいContextを生成する
+    この時点でWorkerを番号で指定する
+    このContextにGearefで値を設定していく
+    待ち合わせ用のDSを設定する
+    taskManager->spawnでWorkerにcontextを送る
+
+Fri Jan 13 17:47:40 JST 2017
+
+    Task は contextを直接使うことにする
+        DS には, まっているcontextをListを作る
+        context に実行中断中のCS の番号をいれるフィールドを用意する
+        待っているDS のcount
+    createTaskの手順
+        新しくcontextを作る
+            allocate 用のheap も用意
+            もとのcontextを全部copyする or 必要なものだけcopyする
+            待ち合わせのDS群を指定する
+            終わったあとの行き先を指定する(default は task_exit)
+            exception の行き先も必要な指定する
+            待っているDSが全部揃っていたら active Queueに入れる
+    task の実行
+        taskの実行後、 goto meta する直前で code gear commit を呼んで, Reader list を消化する
+        複数から参照されるDSは一旦localに書き出して, その後atomic に書き出す
+        複数から参照されるDSは何かしら宣言が必要
+            つまり DS には 一つ一つ owner がいる
+
+Mon Nov 28 17:39:39 JST 2016
+
+    Task,TaskManager,Workerのインターフェースの実装を作成する
+    Taskを一旦Treeに入れずに直接Queueに入れる
+
+    Task
+        CodeGen
+            IDataSeg
+            IDataSeg
+            ...
+        idsCount
+        nextTask(can be C_exit)
+            ODataSeg?
+
+    TaskManager
+        createWorker
+        spawn (any,cpu,GPU)
+        taskSend
+        activeQueue
+        shutdown
+        deadlockDetectid
+
+    SynchronizedQueue * Workerの数だけ
+
+    Worker
+        execute
+        taskRecive
+        shutdown
diff -r a4cab67624f7 -r 9146d6017f18 src/Tree.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Tree.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,12 @@
+typedef struct Tree<Type, Impl>{
+    /* future Code */
+    /* Type* tree; */
+    /* Type* node; */
+    union Data* tree;
+    struct Node* node;
+    __code put(Impl* tree,Type* node, __code next(...));
+    // __code get(Impl* tree, __code next(...));
+    __code remove(Impl* tree,Type* node, __code next(...));
+    // __code clearRedBlackTree();
+    __code next(...);
+} Tree;
diff -r a4cab67624f7 -r 9146d6017f18 src/Worker.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/Worker.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,10 @@
+typedef struct Worker<Impl>{
+    union Data* worker;
+    struct Queue* tasks;
+    struct Context* task;
+    pthread_t thread;
+    struct TaskManager* taskManager;
+    __code taskReceive(Impl* worker, struct Queue* tasks);
+    __code shutdown(Impl* worker);
+    __code next(...);
+} Worker;
diff -r a4cab67624f7 -r 9146d6017f18 src/auto_generate_context.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/auto_generate_context.pl	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,87 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use FindBin;
+use lib "$FindBin::Bin/lib";
+
+use Gears::Context;
+use Getopt::Std;
+my %opt;
+getopts("w" => \%opt);
+
+my $h = find_target_from_camke_list();
+
+my $target = shift;
+unless (exists $h->{$target}) {
+  map { print "$_\n" }  keys %$h;
+  print "invalid target name\n";
+  exit 0;
+}
+
+my @cbc_files;
+
+for my $file_name (split / /, $h->{$target}) {
+  if ($file_name =~ /^\s*$/) {
+    next;
+  }
+  chomp $file_name;
+  push(@cbc_files,$file_name);
+}
+
+my $output     = $opt{w} ? "$FindBin::Bin/context.h" : "stdout";
+my $gears      = Gears::Context->new(compile_sources => \@cbc_files, find_root => $FindBin::Bin, output => $output);
+my $data_gears = $gears->extraction_dg_compile_sources();
+my $g          = $gears->set_data_gear_header_path();
+
+my $dg2path    = $gears->update_dg_each_header_path($data_gears,$g);
+
+my $tree = $gears->createImplTree_from_header($dg2path);
+$gears->tree2create_context_h($tree);
+
+
+sub find_target_from_camke_list {
+  open my $fh, '<', "CMakeLists.txt";
+  my $in_gears = 0;
+  my $target;
+  my %res;
+
+  while (my $line = <$fh>) {
+    if ($in_gears == 1) {
+      $in_gears++;
+      next;
+    }
+
+    if ($in_gears == 2) {
+       $line =~ s/\s*(\w+)\s*/$1/g;
+       $target = $line;
+       chomp $target;
+       $in_gears++;
+       next;
+    }
+
+    if ($in_gears == 3) {
+      $in_gears++;
+      next;
+    }
+
+    if ($in_gears == 4) {
+      $res{$target} = $line;
+      chomp $res{$target};
+      $in_gears = 0;
+      next;
+    }
+
+    if ($line =~ /^GearsCommand\(/) {
+        $in_gears++;
+    }
+
+  }
+  return \%res;
+}
+
+#GearsCommand(
+#  TARGET
+#      rbtree
+#  SOURCES
+#      SingleLinkedQueue.cbc test/rbTree_test.cbc RedBlackTree.cbc SingleLinkedStack.cbc compare.c
+#)
diff -r a4cab67624f7 -r 9146d6017f18 src/compare.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/compare.c	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,13 @@
+#include "context.h"
+
+enum Relational compare(struct Node* node1, struct Node* node2) {
+    int key1 = node1->key;
+    int key2 = node2->key;
+    if (key1 == key2) {
+        return EQ;
+    } else if (key1 < key2) {
+        return GT;
+    } else {
+        return LT;
+    }
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/cuda.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/cuda.c	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,95 @@
+#include <stdio.h>
+#include <sys/time.h>
+#include <string.h>
+#include <stdlib.h>
+
+// includes, project
+#include <driver_types.h>
+#include <cuda_runtime.h>
+#include <cuda.h>
+#include "helper_cuda.h"
+#include "pthread.h"
+
+#include "context.h"
+
+/*
+struct Context {
+    int next;
+    struct Worker* worker;
+    struct TaskManager* taskManager;
+    int codeNum;
+    void  (**code) (struct Context*);
+    void* heapStart;
+    void* heap;
+    long heapLimit;
+    int dataNum;
+    int idgCount; //number of waiting dataGear
+    int idg;
+    int maxIdg;
+    int odg;
+    int maxOdg;
+    int workerId;
+    struct Context* task;
+    struct Queue* tasks;
+    int num_exec;
+    CUmodule module;
+    CUfunction function;
+    union Data **data;
+
+    // multi dimension parameter
+    int iterate;
+    struct Iterator* iterator;
+};
+
+struct CUDAWorker {
+    CUdevice device;
+    CUcontext cuCtx;
+    pthread_t thread;
+    struct Context* context;
+    int id;
+    struct Queue* tasks;
+    int runFlag;
+    int next;
+    int numStream;
+    CUstream *stream;
+} CUDAWorker;
+
+struct LoopCounter {
+    int i;
+} LoopCounter;
+
+struct Array {
+    int size;
+    int index;
+    int prefix;
+    int* array;
+} Array;
+*/
+
+void cudaInit(struct CUDAWorker *cudaWorker,int phase, int deviceNum) {
+    // initialize and load kernel
+    cudaWorker->numStream = 1; // number of stream
+    //    cudaWorker->stream = NEWN(cudaWorker->numStream, CUstream );
+    if (phase==0)
+        checkCudaErrors(cuInit(0));
+    if (phase==0)
+        checkCudaErrors(cuDeviceGet(&cudaWorker->device, deviceNum));
+    if (phase==0)
+        checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
+    //    if (cudaWorker->num_stream) {
+    //        for (int i=0;i<cudaWorker->num_stream;i++)
+    //            checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
+    //    }
+    printf("cuda Init: Done\n");
+}
+
+void cudaLoadFunction(struct Context* context, char* filename, char* function) {
+    checkCudaErrors(cuModuleLoad(&context->module, filename));
+    checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function));
+}
+
+void cudaShutdown(struct CUDAWorker *worker) {
+    //    for (int i=0;i<worker->num_stream;i++)
+    //        checkCudaErrors(cuStreamDestroy(worker->stream[i]));
+    checkCudaErrors(cuCtxDestroy(worker->cuCtx));
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/bitonicSort/CUDAbitonicSwap.cu
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/bitonicSort/CUDAbitonicSwap.cu	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,32 @@
+extern "C" {
+    struct Integer {
+        int value;
+    };
+    struct SortArray {
+        struct Integer *array;
+        int loopCounter;
+        int block;
+        int first;
+        int prefix;
+    };
+    __global__ void bitonicSwap(struct Integer* array, struct SortArray* sortArray) {
+        int block = sortArray->block;
+        int first = sortArray->first;
+        int prefix = sortArray->prefix;
+        int i = 0;
+C_bitonicSwap:
+        if (i < prefix) {
+            int index = i + (blockIdx.x * blockDim.x + threadIdx.x) * prefix;
+            int position = index/block;
+            int index1 = index+block*position;
+            int index2 = (first == 1)? ((block<<1)*(position+1))-(index1%block)-1 : index1+block;
+            if (array[index2].value < array[index1].value) {
+                struct Integer tmp = array[index1];
+                array[index1] = array[index2];
+                array[index2] = tmp;
+            }
+            i++;
+            goto C_bitonicSwap;
+        }
+    }
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/bitonicSort/SortArray.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/bitonicSort/SortArray.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,8 @@
+typedef struct SortArray<Impl>{
+    union Data* sortArray;
+    struct Integer *array;
+    int loopCounter;
+    int block;
+    int first;
+    int prefix;
+} SortArray;
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/bitonicSort/bitonicSort.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/bitonicSort/bitonicSort.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,89 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <math.h>
+
+#include "../../../context.h"
+#interface "TaskManager.h"
+
+int cpu_num = 1;
+int length = 1024;
+int split  = 8;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+    goto code1();
+}
+
+__code code1(struct LoopCounter* loopCounter) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    printf("length/task:\t%d\n", length/split);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+
+    goto createTask1();
+}
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    struct SortArray* outputSortArray = new SortArray();
+    struct SortArray* inputSortArray = outputSortArray;
+    struct Timer* timer = createTimerImpl(context);
+
+    par goto makeArray(outputSortArray, timer, __exit);
+
+    for (int i=2; i <= length; i=2*i) {
+        int first = 1;
+        for (int j=i>>1; j > 0; j=j>>1) {
+            outputSortArray = new SortArray();
+            inputSortArray->prefix = length/2/split;
+            inputSortArray->block = j;
+            inputSortArray->first = first;
+            par goto bitonicSwap(inputSortArray, outputSortArray, iterate(split), __exit);
+            first = 0;
+            inputSortArray = outputSortArray;
+        }
+    }
+
+    par goto printArray(inputSortArray, timer, __exit);
+
+    goto code2();
+}
+
+__code code2(struct TaskManager* taskManager) {
+    goto taskManager->shutdown(exit_code);
+}
+
+__code code2_stub(struct Context* context) {
+    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-s") == 0)
+            split = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+    goto initDataGears();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/bitonicSort/bitonicSwap.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/bitonicSort/bitonicSwap.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,60 @@
+#include "../../../context.h"
+#include <stdio.h>
+
+#ifdef USE_CUDAWorker
+extern void cudaLoadFunction(struct Context* context, char* filename, char* function);
+#endif
+
+__code bitonicSwap(struct SortArray* inputArray, struct MultiDim* multiDim, __code next(struct SortArray* output, ...), struct LoopCounter* loopCounter) {
+    int block = inputArray->block;
+    int first = inputArray->first;
+    if (loopCounter->i < inputArray->prefix) {
+        int index = loopCounter->i + multiDim->x * inputArray->prefix;
+        int position = index/block;
+        int index1 = index+block*position;
+        int index2 = (first == 1)? ((block<<1)*(position+1))-(index1%block)-1 : index1+block;
+        struct Integer* array = inputArray->array;
+        if (array[index2].value < array[index1].value) {
+            struct Integer tmp = array[index1];
+            array[index1] = array[index2];
+            array[index2] = tmp;
+        }
+        loopCounter->i++;
+        goto bitonicSwap();
+    }
+    loopCounter->i = 0;
+    output->array = inputArray->array;
+    goto next(output, ...);
+}
+
+__code bitonicSwap_stub(struct Context* context) {
+#ifdef USE_CUDAWorker
+    if (context->gpu) {
+        SortArray* inputSortArray  = &context->data[context->idg]->SortArray;
+        SortArray* outputSortArray = &context->data[context->odg]->SortArray;
+        CUDABuffer* buffer = &ALLOCATE(context, CUDABuffer)->CUDABuffer;
+        buffer->inputData = (union Data**)ALLOCATE_PTR_ARRAY(context, SortArray, 2);
+        buffer->inputData[0] = (union Data*)inputSortArray->array;
+        buffer->inputData[1] = (union Data*)inputSortArray;
+        buffer->outputData = NULL;
+        buffer->inputLen = 2;
+        buffer->outputLen = 0;
+        //continuationにそってGPUworkerに戻る
+        outputSortArray->array = inputSortArray->array;
+        Executor* executor = context->worker->worker->CUDAWorker.executor;
+        executor->executor->CUDAExecutor.buffer = buffer;
+        cudaLoadFunction(context, "c/examples/bitonicSort/CUDAbitonicSwap.ptx", "bitonicSwap");
+        Gearef(context, Executor)->executor = (union Data*)executor;
+        Gearef(context, Executor)->task = context;
+        Gearef(context, Executor)->next = context->next;
+        goto meta(context, executor->read);
+    }
+#endif
+    SortArray** O_output = (struct SortArray **)&context->data[context->odg];
+    goto bitonicSwap(context,
+            &context->data[context->idg]->SortArray,
+            &context->data[context->idg+1]->MultiDim,
+            context->next,
+            O_output,
+            Gearef(context, LoopCounter));
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/bitonicSort/makeArray.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/bitonicSort/makeArray.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,20 @@
+#include "../../../context.h"
+#include <stdio.h>
+#interface "Timer.h"
+
+extern int length;
+__code makeArray(__code next(struct SortArray* output, struct Timer* output1, ...)){
+    if (output->loopCounter == 0){
+        output->array = (Integer*)ALLOCATE_ARRAY(context, Integer, length);
+        srand((unsigned) time(NULL));
+    }
+    if (output->loopCounter == GET_LEN(output->array)){
+        printf("created Array\n");
+        output->loopCounter = 0;
+        goto output1->start(next(...));
+    }
+    output->array[output->loopCounter].value = rand() % 1000;
+    //printf("%d\n", output->array[output->loopCounter]->value);
+    output->loopCounter++;
+    goto makeArray();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/bitonicSort/printArray.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/bitonicSort/printArray.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,22 @@
+#include "../../../context.h"
+#interface "Timer.h"
+#include <stdio.h>
+
+__code printArray(struct SortArray* inputArray, struct Timer* inputTimer, __code next(...)){
+    goto inputTimer->end(printArray1);
+}
+
+__code printArray1(struct SortArray* inputArray, __code next(...)){
+    //printf("%d\n", inputArray->array[inputArray->loopCounter].value);
+    inputArray->loopCounter++;
+    if (inputArray->loopCounter == GET_LEN(inputArray->array)){
+        printf("sort completed\n");
+        inputArray->loopCounter = 0;
+        goto next(...);
+    }
+    if (inputArray->array[inputArray->loopCounter-1].value > inputArray->array[inputArray->loopCounter].value) {
+        printf("wrong result\n");
+        goto next(...);
+    }
+    goto printArray1();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/bitonicSort/sort.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/bitonicSort/sort.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,95 @@
+#include<stdio.h>
+#include <stdlib.h>
+#include "../../../context.h"
+#define LOGN 5
+#define MAX 1 << LOGN
+
+int main(int argc, char const* argv[]) {
+    struct Context* main_context = NEW(struct Context);
+    initContext(main_context);
+    main_context->next = C_sort_start;
+    goto start_code(main_context);
+}
+
+__code sort_start(struct SortArray* sortArray){
+    sortArray->sortArray = new SortArray();
+    sortArray->sortArray->array = (Integer**)ALLOCATE_PTR_ARRAY(context, Integer, MAX);//ALLOC_ARRAYはDSの配列なのでintではできない
+    sortArray->sortArray->loop_counter = 0;
+    sortArray->sortArray->loop_counter2 = 0;
+    sortArray->sortArray->loop_counter3 = 0;
+    srand((unsigned) time(NULL));
+    goto meta(context, C_make_array);
+}
+
+__code make_array(struct SortArray* sortArray){//乱数生成
+    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→配列表示へ
+        sortArray->sortArray->loop_counter = 0;
+        goto meta(context, C_print);
+    }
+    struct Integer* integer = new Integer();
+    integer->value = rand() % 1000;
+    sortArray->sortArray->array[sortArray->sortArray->loop_counter] = integer;
+    sortArray->sortArray->loop_counter++;
+    goto meta(context, C_make_array);
+}
+
+__code print(struct SortArray* sortArray){//配列表示
+    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→ソートへ
+        printf("\n");
+        if (sortArray->sortArray->sort_finish == 1){//ソート終わってたら終了
+            goto meta(context, C_exit_code);
+        }
+        sortArray->sortArray->loop_counter = 0;
+        sortArray->sortArray->loop_counter2 = 0;
+        goto meta(context, C_bitonic_sort);
+    }
+
+    printf("%d, ", sortArray->sortArray->array[sortArray->sortArray->loop_counter]->value);
+    sortArray->sortArray->loop_counter++;
+    goto meta(context, C_print);
+}
+
+__code bitonic_sort(struct SortArray* sortArray){//ソートの繰り返し
+    if (sortArray->sortArray->loop_counter >= LOGN){//ループの終了→配列表示へ
+        sortArray->sortArray->loop_counter = 0;
+        sortArray->sortArray->sort_finish = 1;
+        goto meta(context, C_print);
+    }
+    goto meta(context, C_kernel);
+}
+
+__code kernel(struct SortArray* sortArray){//繰り返し２
+    if (sortArray->sortArray->loop_counter2 > sortArray->sortArray->loop_counter){//ループの終了→上のループへ
+        sortArray->sortArray->loop_counter++;
+        sortArray->sortArray->loop_counter2 = 0;
+        goto meta(context, C_bitonic_sort);
+    }
+
+    goto meta(context, C_kernel2);
+}
+
+__code kernel2(struct SortArray* sortArray){//ソートの中身
+    int i = sortArray->sortArray->loop_counter3;
+
+    if (i >= GET_LEN(sortArray->sortArray->array)){//ループの終了→上のループへ
+        sortArray->sortArray->loop_counter2++;
+        sortArray->sortArray->loop_counter3 = 0;
+        goto meta(context, C_kernel);
+    }
+
+     goto meta(context, C_swap);
+}
+
+__code swap(struct SortArray* sortArray){//配列の要素を入れ替える
+    int i = sortArray->sortArray->loop_counter3;
+    int d = 1 << (sortArray->sortArray->loop_counter - sortArray->sortArray->loop_counter2);
+    int up = ((i >> sortArray->sortArray->loop_counter) & 2) == 0;
+
+    if ((i & d) == 0 && (sortArray->sortArray->array[i]->value > sortArray->sortArray->array[i | d]->value) == up) {
+        struct Integer *tmp = sortArray->sortArray->array[i];
+        sortArray->sortArray->array[i] = sortArray->sortArray->array[i | d];
+        sortArray->sortArray->array[i | d] = tmp;
+    }
+    sortArray->sortArray->loop_counter3++;
+    goto meta(context, C_kernel2);//上位のループへ
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/boundedBuffer/BoundedBuffer.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/boundedBuffer/BoundedBuffer.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,77 @@
+#include "../../../context.h"
+#interface "Buffer.h"
+#interface "Semaphore.h"
+
+Buffer* createBoundedBuffer(struct Context* context, int size) {
+    struct Buffer* buffer = new Buffer();
+    struct BoundedBuffer* boundedBuffer = new BoundedBuffer();
+    boundedBuffer->top = new Element();
+    boundedBuffer->top->next = NULL;
+    boundedBuffer->last = boundedBuffer->top;
+    boundedBuffer->fullCount = createSemaphoreImpl(context, 0);
+    boundedBuffer->emptyCount = createSemaphoreImpl(context, size);
+    boundedBuffer->lock = createSemaphoreImpl(context, 1); // binary semaphore
+    buffer->buffer = (union Data*)boundedBuffer;
+    buffer->take = C_takeBoundedBuffer;
+    buffer->put = C_putBoundedBuffer;
+    return buffer;
+}
+
+__code putBoundedBuffer(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
+    struct Semaphore* semaphore = buffer->emptyCount;
+    goto semaphore->p(putBoundedBuffer1);
+}
+
+__code putBoundedBuffer1(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
+    struct Semaphore* semaphore = buffer->lock;
+    goto semaphore->p(putBoundedBuffer2);
+}
+
+__code putBoundedBuffer2(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
+    struct Element* element = new Element();
+    element->data = data;
+    element->next = NULL;
+    struct Element* last = buffer->last;
+    last->next = element;
+    buffer->last = element;
+    struct Semaphore* semaphore = buffer->lock;
+    goto semaphore->v(putBoundedBuffer3);
+}
+
+__code putBoundedBuffer3(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
+    struct Semaphore* semaphore = buffer->fullCount;
+    goto semaphore->v(putBoundedBuffer4);
+}
+
+__code putBoundedBuffer4(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
+    goto next(...);
+}
+
+__code takeBoundedBuffer(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
+    struct Semaphore* semaphore = buffer->fullCount;
+    goto semaphore->p(takeBoundedBuffer1);
+}
+
+__code takeBoundedBuffer1(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
+    struct Semaphore* semaphore = buffer->lock;
+    goto semaphore->p(takeBoundedBuffer2);
+}
+
+__code takeBoundedBuffer2(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
+    struct Element* top = buffer->top;
+    struct Element* nextElement = top->next;
+    data = nextElement->data;
+    *O_data =data;
+    buffer->top = nextElement;
+    struct Semaphore* semaphore = buffer->lock;
+    goto semaphore->v(takeBoundedBuffer3);
+}
+
+__code takeBoundedBuffer3(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
+    struct Semaphore* semaphore = buffer->emptyCount;
+    goto semaphore->v(takeBoundedBuffer4);
+}
+
+__code takeBoundedBuffer4(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
+    goto next(data, ...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/boundedBuffer/SemaphoreImpl.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/boundedBuffer/SemaphoreImpl.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,87 @@
+#include "../../../context.h"
+#interface "Semaphore.h"
+#interface "Queue.h"
+#interface "TaskManager.h"
+#interface "Lock.h"
+
+Semaphore* createSemaphoreImpl(struct Context* context, int n) {
+    struct Semaphore* semaphore = new Semaphore();
+    struct SemaphoreImpl* semaphoreImpl = new SemaphoreImpl();
+    semaphore->semaphore = (union Data*)semaphoreImpl;
+    semaphoreImpl->value =  n;
+    semaphoreImpl->waitThreadQueue = createSingleLinkedQueue(context);
+    semaphoreImpl->lock = createSpinLock(context);
+    semaphore->p = C_pOperationSemaphoreImpl;
+    semaphore->v = C_vOperationSemaphoreImpl;
+    return semaphore;
+}
+
+__code pOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
+    struct Lock* lock = semaphore->lock;
+    goto lock->doLock(pOperationSemaphoreImpl1);
+}
+
+__code pOperationSemaphoreImpl1(struct SemaphoreImpl* semaphore, __code next(...)) {
+    if (semaphore->value == 0) {
+        context->next= C_pOperationSemaphoreImpl;
+        struct Queue* queue = semaphore->waitThreadQueue;
+        goto queue->put(context, pOperationSemaphoreImpl2); // put this context(thread, process)
+    }
+    semaphore->value--;
+    struct Lock* lock = semaphore->lock;
+    goto lock->doUnlock(next(...));
+}
+
+__code pOperationSemaphoreImpl2(struct SemaphoreImpl* semaphore, __code next(...)) {
+    struct Lock* lock = semaphore->lock;
+    goto lock->doUnlock(pOperationSemaphoreImpl3);
+}
+
+__code pOperationSemaphoreImpl3(struct SemaphoreImpl* semaphore, struct Worker* worker, __code next(...)) {
+    goto worker->taskReceive(); // goto shceduler
+}
+
+__code pOperationSemaphoreImpl3_stub(struct Context* context) {
+    // switch worker context
+    struct Context* workerContext = context->worker->worker->CPUWorker.context;
+    SemaphoreImpl* semaphoreImpl = (SemaphoreImpl*)GearImpl(context, Semaphore, semaphore);
+    goto pOperationSemaphoreImpl3(workerContext,
+                                  semaphoreImpl,
+                                  context->worker,
+                                  Gearef(context, Semaphore)->next);
+}
+
+__code vOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
+    struct Lock* lock = semaphore->lock;
+    goto lock->doLock(vOperationSemaphoreImpl1);
+}
+
+__code vOperationSemaphoreImpl1(struct SemaphoreImpl* semaphore, __code next(...)) {
+    semaphore->value++;
+    struct Queue* queue = semaphore->waitThreadQueue;
+    goto queue->isEmpty(vOperationSemaphoreImpl2, vOperationSemaphoreImpl4);
+}
+
+__code vOperationSemaphoreImpl2(struct SemaphoreImpl* semaphore, __code next(...)) {
+    struct Queue* queue = semaphore->waitThreadQueue;
+    goto queue->take(vOperationSemaphoreImpl3);
+}
+
+__code vOperationSemaphoreImpl3(struct SemaphoreImpl* semaphore, struct Context* waitTask, __code next(...)) {
+    struct TaskManager* taskManager = waitTask->taskManager;
+    goto taskManager->spawn(waitTask, vOperationSemaphoreImpl4); //notify
+}
+
+__code vOperationSemaphoreImpl3_stub(struct Context* context) {
+    SemaphoreImpl* semaphoreImpl = (SemaphoreImpl*)GearImpl(context, Semaphore, semaphore);
+    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
+    goto vOperationSemaphoreImpl3(context,
+                                  semaphoreImpl,
+                                  waitTask,
+                                  Gearef(context, Semaphore)->next);
+}
+
+__code vOperationSemaphoreImpl4(struct SemaphoreImpl* semaphore, __code next(...)) {
+    struct Lock* lock = semaphore->lock;
+    goto lock->doUnlock(next(...));
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/boundedBuffer/consumer.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/boundedBuffer/consumer.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,25 @@
+#include "../../../context.h"
+#include <stdio.h>
+#interface "Buffer.h"
+
+__code consumer(struct Buffer* buffer, struct Integer* length, __code next(...), struct LoopCounter* loopCounter) {
+    int i = loopCounter->i;
+    if (i < length->value) {
+        loopCounter->i++;
+        goto buffer->take(consumer1);
+    }
+    goto next(...);
+}
+
+__code consumer1(struct Buffer* buffer, struct Integer* length, __code next(...), struct Node* node) {
+    printf("getData %d\n", node->value->Integer.value);
+    goto consumer();
+}
+
+__code consumer1_stub(struct Context* context) {
+    goto consumer1(context,
+                   &context->data[context->idg]->Buffer,
+                   &context->data[context->idg+1]->Integer,
+                   context->next,
+                   &Gearef(context, Buffer)->data->Node);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/boundedBuffer/initBuffer.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/boundedBuffer/initBuffer.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+#include "../../../context.h"
+
+__code initBuffer(__code next(struct Buffer* output, struct Integer* output1, ...)) {
+    goto next(output, output1, ...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/boundedBuffer/main.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/boundedBuffer/main.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,71 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "../../../context.h"
+#interface "TaskManager.h"
+
+int cpu_num = 1;
+int length = 100;
+int buffer_size = 10;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+    goto code1();
+}
+
+__code code1(struct Timer* timer) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    goto createTask1();
+}
+
+__code code2(struct TaskManager* taskManager) {
+    goto taskManager->shutdown(exit_code);
+}
+
+__code code2_stub(struct Context* context) {
+    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
+}
+
+__code createTask1(struct TaskManager* taskManager) {
+    struct Buffer* buffer = createBoundedBuffer(context, buffer_size);
+    Integer* len = new Integer();
+    len->value = length;
+    par goto producer(buffer, len, __exit);
+    par goto producer(buffer, len, __exit);
+    par goto producer(buffer, len, __exit);
+    par goto consumer(buffer, len, __exit);
+    par goto consumer(buffer, len, __exit);
+    par goto consumer(buffer, len, __exit);
+    par goto initBuffer(buffer, len, __exit);
+    goto code2();
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-bufferSize") == 0)
+            buffer_size = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+    goto initDataGears();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/boundedBuffer/producer.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/boundedBuffer/producer.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,14 @@
+#include "../../../context.h"
+#interface "Buffer.h"
+
+__code producer(struct Buffer* buffer, struct Integer* length, __code next(...), struct LoopCounter* loopCounter) {
+    int i = loopCounter->i;
+    if (i < length->value) {
+        Node* node = new Node();
+        node->value = (union Data*)new Integer();
+        node->value->Integer.value = i;
+        loopCounter->i++;
+        goto buffer->put(node, producer);
+    }
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/calc/add.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/calc/add.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,7 @@
+#include "../../../context.h"
+#include <stdio.h>
+__code add(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
+    output->value = input1->value + input2->value;
+    printf("%d + %d = %d\n", input1->value, input2->value, output->value);
+    goto next(output, ...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/calc/calc.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/calc/calc.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "../../../context.h"
+#interface "TaskManager.h"
+
+int cpu_num = 1;
+int length = 100;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+    goto meta(context, C_code1);
+}
+
+__code code1(struct Timer* timer) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+
+    //time->next = C_code2;
+    goto meta(context, C_createTask1);
+    //goto meta(context, C_start_time);
+}
+
+__code code1_stub(struct Context* context) {
+    goto code1(context, Gearef(context, Timer));
+}
+
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    int i = loopCounter->i;
+
+    if (i < length) {
+        loopCounter->i++;
+        goto meta(context, C_createTask2);
+    }
+
+    loopCounter->i = 0;
+    taskManager->next = C_exit_code;
+    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
+}
+
+__code createTask2(struct LoopCounter* loopCounter) {
+    Integer* integer1 = new Integer();
+    Integer* integer2 = new Integer();
+    Integer* integer3 = new Integer();
+    par goto mult(integer1, integer2, integer3, __exit);
+
+    Integer* integer4 = new Integer();
+    Integer* integer5 = new Integer();
+    par goto add(integer4, integer5, integer1, __exit);
+
+    par goto initIntegerDataGears(integer2, integer4, integer5, __exit);
+
+    goto createTask1();
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+    goto initDataGears();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/calc/initIntegerDataGears.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/calc/initIntegerDataGears.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,8 @@
+#include "../../../context.h"
+#include <stdio.h>
+__code initIntegerDataGears(__code next(struct Integer* output1, struct Integer* output2, struct Integer* output3, ...)) {
+    output1->value = 1;
+    output2->value = 2;
+    output3->value = 3;
+    goto next(output1, output2, output3, ...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/calc/mult.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/calc/mult.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,7 @@
+#include "../../../context.h"
+#include <stdio.h>
+__code mult(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
+    output->value = input1->value * input2->value;
+    printf("%d * %d = %d\n", input1->value, input2->value, output->value);
+    goto next(output, ...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/twice/CUDAtwice.cu
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/twice/CUDAtwice.cu	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,17 @@
+extern "C" {
+    struct Array {
+        int prefix;
+        int* array;
+    } Array;
+
+    __global__ void twice(int* array, struct Array* inputArray) {
+        int i = 0;
+        int prefix = inputArray->prefix;
+C_twice:
+        if (i < prefix) {
+            array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix]->value = array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix]->value*2;
+            i++;
+            goto C_twice;
+        }
+    }
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/twice/createArray.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/twice/createArray.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,22 @@
+#include <stdio.h>
+#include "../../../context.h"
+#interface "Timer.h"
+
+extern int length;
+extern int split;
+
+__code createArray(__code next(struct Array* output, struct Timer* output1, ...), struct LoopCounter* loopCounter) {
+    int i = loopCounter->i;
+    if (i == 0){
+        output->array = (Integer*)ALLOCATE_ARRAY(context, Integer, length);
+        output->prefix = length/split;
+    }
+    if (i == GET_LEN(output->array)){
+        printf("created Array\n");
+        loopCounter->i = 0;
+        goto output1->start(next(...));
+    }
+    output->array[i].value = i;
+    loopCounter->i++;
+    goto createArray();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/twice/main.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/twice/main.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,75 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../../../context.h"
+#interface "TaskManager.h"
+
+int cpu_num = 1;
+int length = 102400;
+int split = 8;
+int* array_ptr;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+    goto code1();
+}
+
+__code code1(struct LoopCounter* loopCounter) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    printf("length/task:\t%d\n", length/split);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+    goto createTask1();
+}
+
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    Array* array1 = new Array();
+    Array* array2 = new Array();
+    Timer* timer = createTimerImpl(context);
+
+    par goto createArray(array1, timer, __exit);
+    par goto twice(array1, array2, iterate(split), __exit);
+    par goto printArray(array2, timer, __exit);
+    goto code2();
+}
+
+__code code2(struct TaskManager* taskManager) {
+    goto taskManager->shutdown(exit_code);
+}
+
+__code code2_stub(struct Context* context) {
+    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-s") == 0)
+            split = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+    goto initDataGears();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/twice/printArray.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/twice/printArray.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,22 @@
+#include "../../../context.h"
+#interface "Timer.h"
+#include <stdio.h>
+
+__code printArray(struct Array* array, struct Timer* inputTimer, __code next(...)){
+    goto inputTimer->end(printArray1);
+}
+
+__code printArray1(struct Array* array, __code next(...), struct LoopCounter* loopCounter){
+    int i = loopCounter->i;
+    //printf("%d\n", array->array[i]);
+    if (i < GET_LEN(array->array)) {
+        if (array->array[i].value == i*2) {
+            loopCounter->i++;
+            goto printArray1();
+        } else {
+            printf("wrong result\n");
+        }
+    }
+   loopCounter->i = 0;
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/examples/twice/twice.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/examples/twice/twice.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,53 @@
+#include <stdio.h>
+
+#include "../../../context.h"
+
+#ifdef USE_CUDAWorker
+extern void cudaLoadFunction(struct Context* context, char* filename, char* function);
+#endif
+
+__code twice(struct Array* array, struct MultiDim* multiDim, __code next(struct Array* output, ...), struct LoopCounter* loopCounter) {
+    int i = loopCounter->i;
+    int index = multiDim->x;
+    if (i < array->prefix) {
+        array->array[i+index*array->prefix].value = array->array[i+index*array->prefix].value*2;
+        loopCounter->i++;
+
+        goto meta(context, C_twice);
+    }
+
+    loopCounter->i = 0;
+    output->array = array->array;
+    goto next(output, ...);
+}
+
+__code twice_stub(struct Context* context) {
+#ifdef USE_CUDAWorker
+    if (context->gpu) {
+        Array* inputArray  = &context->data[context->idg]->Array;
+        Array* outputArray = &context->data[context->odg]->Array;
+        CUDABuffer* buffer = &ALLOCATE(context, CUDABuffer)->CUDABuffer;
+        buffer->inputData = (union Data**)ALLOCATE_PTR_ARRAY(context, Array, 2);
+        buffer->inputData[0] = (union Data*)inputArray->array;
+        buffer->inputData[1] = (union Data*)inputArray;
+        buffer->outputData = NULL;
+        buffer->inputLen = 2;
+        buffer->outputLen = 0;
+        Executor* executor = context->worker->worker->CUDAWorker.executor;
+        executor->executor->CUDAExecutor.buffer = buffer;
+        cudaLoadFunction(context, "c/examples/twice/CUDAtwice.ptx", "twice");
+        outputArray->array = inputArray->array;
+        Gearef(context, Executor)->executor = (union Data*)executor;
+        Gearef(context, Executor)->task = context;
+        Gearef(context, Executor)->next = context->next;
+        goto meta(context, executor->read);
+    }
+#endif
+    Array** O_output = (struct Array **)&context->data[context->odg];
+    goto twice(context,
+               &context->data[context->idg]->Array,
+               &context->data[context->idg+1]->MultiDim,
+               context->next,
+               O_output,
+               Gearef(context, LoopCounter));
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/generate_context.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/generate_context.pl	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,249 @@
+#!/usr/bin/perl
+
+use Getopt::Std;
+use strict;
+
+# 
+# generrate Gears OS context heaader and initializer from CbC sources
+#
+# CodeGear
+# 
+# get stub information from # *.c
+#     __code taskManager_stub(struct Context* context) {
+# 
+# generate CodeGear indexn in context.h
+#     C_taskManager,
+# 
+# generate CodeGear stub reference in context.h
+#     extern __code taskManager_stub(struct Context*);
+# 
+# generate CodeGear stub reference in $name-context.h for each module
+#     context->code[C_taskManager]   = taskManager_stub;
+# 
+# DataGear
+# 
+# get DataGear information from context.h
+#     struct Worker {
+#         int id;
+#         struct Context* contexts;
+#         enum Code execute;
+#         enum Code taskSend;
+#         enum Code taskRecive;
+#         enum Code shutdown;
+#         struct Queue* tasks;
+#     } Worker;
+# 
+# generate typedefs and DataGear index in context.h
+#     typedef struct Worker Worker;
+#     D_Worker,
+# 
+# generate DataGear allocator in context.h
+#      ALLOC_DATA(context, Worker);
+#
+
+my $ddir = "c";
+
+our($opt_o,$opt_d,$opt_h,$opt_w);
+getopts('o:d:hw');
+
+my $name = $opt_o?$opt_o:"gears";
+
+if ($opt_d) {
+    $ddir = $opt_d;
+}
+
+if ( ! -d $ddir) {
+    mkdir $ddir;
+}
+
+if ($opt_h) {
+    print "$0  [-d distdir] [-h]\n";
+    exit;
+}
+
+my %codeGear;
+my %dataGear;
+my %constructor;
+
+{
+  use FindBin;
+  use lib "$FindBin::Bin/lib";
+
+  use File::Spec;
+
+  use Gears::Context;
+  use Getopt::Std;
+
+  my $output     = $opt_w ? "$FindBin::Bin/context.h" : "stdout";
+
+  my @cbc_files;
+  map { push(@cbc_files,File::Spec->rel2abs($_)); }  @ARGV;
+  my $gears      = Gears::Context->new(compile_sources => \@cbc_files, find_root => $FindBin::Bin, output => $output);
+  my $data_gears = $gears->extraction_dg_compile_sources();
+  my $g          = $gears->set_data_gear_header_path();
+
+  #use DDP {deparse =>1};
+  my $dg2path    = $gears->update_dg_each_header_path($data_gears,$g);
+  my $tree = $gears->createImplTree_from_header($dg2path);
+  $gears->tree2create_context_h($tree);
+}
+
+# gather module Information for code table initialization
+for (@ARGV) {
+    next if (/context.c/);
+    &getStubInfo($_);
+}
+
+my (%mCodeGear) = (%codeGear);
+
+# anyway we gather all Gears Information
+while (<*.c test/*.c>) {
+    next if (/context.c/);
+    &getStubInfo($_);
+}
+
+&generateContext();
+
+sub getStubInfo {
+    my ($filename) = @_;
+    open my $fd,"<",$filename or die("can't open $filename $!");
+    while (<$fd>) {
+        if (/^__code (\w+)_stub\(struct  *Context *\* *context\)/) {
+            $codeGear{$1} = $filename;
+        } elsif (/^(\w+)(\*)+  *create(\w+)\(([^]]*)\)/) {
+            my $interface = $1;
+            my $implementation = $3;
+            my $constructorArgs = $4;
+            $constructor{$implementation} = [$interface, $constructorArgs];
+        }
+    }
+
+    open my $cx,"<","context.h" or die("can't open context.h $!");
+    my $inUnionData = 0;
+    while (<$cx>) {
+        if (! $inUnionData) {
+            if ( /^union Data/) {
+                $inUnionData = 1;
+            }
+            next;
+        }
+        last if (/union Data end/);
+        if (/struct (\w+) \{/) {
+            $dataGear{$1} = 'struct';
+        } elsif (/^\s{4}(\w+) (\w+);/) { # primitive type
+            $dataGear{$1} = 'primitive';
+        }
+        $dataGear{"Context"} = "struct";
+    }
+}
+
+sub generateContext {
+    $codeGear{"start_code"} = "$ddir/$name-context.c";
+    $codeGear{"exit_code"} = "$ddir/$name-context.c";
+    $mCodeGear{"start_code"} = "$ddir/$name-context.c";
+    $mCodeGear{"exit_code"} = "$ddir/$name-context.c";
+    open my $fd,">","$ddir/extern.h" or die("can't open $ddir/extern.h $!");
+    for my $code ( sort keys %codeGear ) {
+        print $fd "extern __code ${code}_stub(struct Context*);\n";
+    }
+    for my $impl ( sort keys %constructor ) {
+        my ($interface, $constructorArgs) = @{$constructor{$impl}};
+        print $fd "extern ${interface}* create${impl}($constructorArgs);\n";
+    }
+    print $fd "\n";
+
+    open my $fd,">","$ddir/enumCode.h" or die("can't open $ddir/enumCode.h $!");
+    print $fd "enum Code {\n";
+    for my $code ( sort keys %codeGear ) {
+        print $fd "    C_${code},\n";
+    }
+    print $fd "};\n";
+   
+    my $code_init = ''; 
+    for my $code ( sort keys %mCodeGear ) {
+        $code_init .=  "    context->code[C_${code}]    = ${code}_stub;\n";
+    }
+
+    my $data_num = keys(%dataGear);
+    $data_num++;
+my $context_c = << "EOFEOF";
+#include <stdlib.h>
+
+#include "../context.h"
+
+void initContext(struct Context* context) {
+    context->heapLimit = sizeof(union Data)*ALLOCATE_SIZE;
+    context->code = (__code(**) (struct Context*)) NEWN(ALLOCATE_SIZE, void*);
+    context->data = NEWN(ALLOCATE_SIZE, union Data*);
+    context->heapStart = NEWN(context->heapLimit, char);
+    context->heap = context->heapStart;
+    // context->codeNum = Exit;
+
+$code_init
+
+#include "dataGearInit.c"
+    context->dataNum = $data_num;
+}
+EOFEOF
+
+    open my $fd,">","$ddir/$name-context.c" or die("can't open $ddir/$name-context.c $!");
+    print $fd $context_c;
+
+my $meta_call = <<"EOFEOF";
+__code meta(struct Context* context, enum Code next) {
+    // printf("meta %d\\n",next);
+    goto (context->code[next])(context);
+}
+
+__code parGotoMeta(struct Context* context, enum Code next) {
+    context->task     = NULL;
+    context->taskList = NULL;
+    goto (context->code[Gearef(context, TaskManager)->taskManager->TaskManager.spawnTasks])(context);
+}
+
+__code start_code(struct Context* context) {
+    goto meta(context, context->next);
+}
+
+__code start_code_stub(struct Context* context) {
+    goto start_code(context);
+}
+
+__code exit_code(struct Context* context) {
+    free(context->code);
+    free(context->data);
+    free(context->heapStart);
+    goto exit(0);
+}
+
+__code exit_code_stub(struct Context* context) {
+    goto exit_code(context);
+}    
+
+// end context_c
+EOFEOF
+
+print $fd $meta_call;
+
+open my $fd,">","$ddir/enumData.h" or die("can't open $ddir/enumData.h $!");
+print $fd "enum DataType {\n";
+print $fd "    D_Code,\n";
+for my $data ( sort keys %dataGear ) {
+    print $fd "    D_${data},\n";
+}
+print $fd "};\n\n";
+
+open my $fd,">","$ddir/typedefData.h" or die("can't open $ddir/typedefData.h $!");
+for my $data ( sort keys %dataGear ) {
+    if ($dataGear{$data} eq 'struct') {
+        print $fd "typedef struct ${data} ${data};\n";
+    }
+}
+
+open my $fd,">","$ddir/dataGearInit.c" or die("can't open $ddir/dataGearInit.c $!");
+for my $data ( sort keys %dataGear ) {
+    print $fd "    ALLOC_DATA(context, ${data});\n";
+}
+}
+
+# end
diff -r a4cab67624f7 -r 9146d6017f18 src/generate_stub.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/generate_stub.pl	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,636 @@
+#!/usr/bin/perl
+
+use strict;
+use Getopt::Std;
+use File::Path qw(make_path);
+
+# interface.h
+# typedef struct Worker {
+#         int id;
+#         struct Context* contexts;
+#         enum Code execute;
+#         enum Code taskSend;
+#         enum Code taskRecive;
+#         enum Code shutdown;
+#         struct Queue* tasks;
+#     } Worker;
+
+our($opt_o,$opt_d,$opt_h);
+getopts('o:d:h');
+
+my $dir = ".";
+if ($opt_d) {
+    $dir = $opt_d;
+    if (! -d $dir) {
+        make_path $dir;
+    }
+}
+
+for my $fn (@ARGV) {
+    next if ($fn !~ /\.cbc$/);
+    &getDataGear($fn);
+    &generateDataGear($fn);
+}
+
+my %var;
+my %code;
+my %dataGearVar;
+my %outputVar;       # output var initializer
+my %outputArgs;      # continuation's output variables
+my %dataGear;
+my %dataGearName;
+my %generic;
+my %dataGearVarType;
+my %codeGear;
+my $implementation;
+my $interface;
+
+# interface definision
+#
+# typedef struct Stack<Type, Impl>{
+#         Type* stack;
+#         Type* data;
+#         Type* data1;
+#         __code whenEmpty(...);
+#         __code clear(Impl* stack,__code next(...));
+#         __code push(Impl* stack,Type* data, __code next(...));
+#         __code pop(Impl* stack, __code next(Type*, ...));
+#         __code pop2(Impl* stack, Type** data, Type** data1, __code next(Type**, Type**, ...));
+#         __code isEmpty(Impl* stack, __code next(...), __code whenEmpty(...));
+#         __code get(Impl* stack, Type** data, __code next(...));
+#         __code get2(Impl* stack,..., __code next(...));
+#         __code next(...);
+# } Stack;
+#
+# calling example
+#
+# goto nodeStack->push((union Data*)node, stackTest3);
+#
+# generated meta level code
+#
+# Gearef(context, Stack)->stack = (union Data*)nodeStack;
+# Gearef(context, Stack)->data = (union Data*)node;
+# Gearef(context, Stack)->next = C_stackTest3;
+# goto meta(context, nodeStack->push);
+
+sub getDataGear {
+    my ($filename) = @_;
+    my ($codeGearName, $name, $inTypedef);
+    open my $fd,"<",$filename or die("can't open $filename $!");
+    while (<$fd>) {
+        if (! $inTypedef) {
+            if (/^typedef struct (\w+)\s*<(.*)>/) {
+                $inTypedef = 1;
+                $name = $1;
+                $dataGear{$name} = $_;
+                $var{$name} = {};
+                $code{$name} = {};
+                $generic{$name} = \split(/,/,$2);
+            } elsif (/^typedef struct (\w+)/) {
+                $inTypedef = 1;
+                $name = $1;
+                $dataGear{$name} = $_;
+                $var{$name} = {};
+                $code{$name} = {};
+                $generic{$name} = [];
+            } elsif (/^(\w+)(\*)+ create(\w+)\(/) {
+                if (defined $interface) {
+                   die "duplicate interface $interface\n";
+                }
+                $interface = $1;
+                $implementation = $3;
+                if ( -f "$interface.cbc") {
+                    &getDataGear("$interface.cbc");
+                }
+            } elsif(/^(.*)par goto (\w+)\((.*)\)/) {
+                my $codeGearName = $2;
+                if ($filename =~ /^(.*)\/(.*)/) {
+                    $codeGearName = "$1/$codeGearName";
+                }
+                if ( -f "$codeGearName.cbc") {
+                    &getCodeGear("$codeGearName.cbc");
+                }
+			} elsif(/^#interface "(.*)"/) {
+                # use interface
+                my $interfaceHeader = $1;
+                next if ($interfaceHeader =~ /context.h/);
+                if (-f $interfaceHeader) {
+                    &getDataGear("$interfaceHeader");
+                    &getCodeGear("$interfaceHeader");
+                }
+            } elsif (/^\_\_code (\w+)\((.*)\)(.*)/) {
+                my $codeGearName = $1;
+                if ($filename =~ /^(.*)\/(.*)/) {
+                    $codeGearName = "$1/$codeGearName";
+                }
+                if ( -f "$codeGearName.cbc") {
+                    &getCodeGear("$codeGearName.cbc");
+                }
+            }
+            next;
+        }
+        # gather type name and type
+        $dataGear{$name} .= $_;
+        if (/^\s*(.*)\s+(\w+);$/ ) {
+            my $ttype = $1;
+            my $tname = $2;
+            if ($ttype =~ /^(union|struct)?\s*(\w+)/) {
+                $ttype = $2;
+            }
+            $var{$name}->{$tname} = $ttype;
+        }
+        if (/^}/) {
+            $inTypedef = 0;
+        }
+    }
+
+}
+
+sub getCodeGear {
+    my ($filename) = @_;
+    open my $fd,"<",$filename or die("can't open $filename $!");
+    my ($name,$impln);
+    while (<$fd>) {
+        if (/^(\w+)(\*)+ create(\w+)\(/) {
+            $name = $1;
+            $impln = $3;
+        } elsif(/^typedef struct (.*)<.*>\s*{/) {
+            $name = $1;
+        }
+        if (defined $name) {
+            if (/^\s*\_\_code (\w+)\((.*)\);/) {
+                my $args = $2;
+                my $method = $1;
+                $code{$name}->{$method} = [];
+                while($args) {
+                    # replace comma
+                    $args =~ s/(^\s*,\s*)//;
+                    # continuation case
+                    if ($args =~ s/^(\s)*\_\_code\s+(\w+)\(([^)]*)\)//) {
+                        my $next = $2;
+                        my @args = split(/,/,$3);
+                        push(@{$code{$name}->{$method}},"\_\_code $next");
+                    } elsif ($args =~ s/^(struct|union)?\s*(\w+)(\**)\s+(\w+)//) {
+                        my $structType = $1;
+                        my $typeName = $2;
+                        my $ptrType = $3;
+                        my $varName = $4;
+                        my $typeField = lcfirst($typeName);
+                        push(@{$code{$name}->{$method}},"$typeName$ptrType $varName");
+                    } elsif ($args =~ s/(.*,)//) {
+                    } else {
+                        last;
+                    }
+                }
+            }
+        } elsif (/^\_\_code (\w+)\((.*)\)(.*)/) {
+            my $codeGearName = $1;
+            my $args = $2;
+            my $inputCount = 0;
+            my $outputCount = 0;
+            my $inputIncFlag = 1;
+            while($args) {
+                if ($args =~ s/(^\s*,\s*)//) {
+                }
+                if ($args =~ s/^(\s)*\_\_code\s+(\w+)\((.*?)\)//) {
+                    $codeGear{$codeGearName}->{"code"}->{$2} = "\_\_code";
+                    $inputIncFlag = 0;
+                    my @outputs = split(/,/,$3);
+                    for my $output (@outputs) {
+                        if ($output =~ /\s*(struct|union)?\s*(\w+)(\*)?+\s(\w+)/) {
+                            my $type = $2;
+                            my $varName = $4;
+                            $codeGear{$codeGearName}->{"var"}->{$varName} = "$type $outputCount";
+                            $outputCount++;
+                        }
+                    }
+                } elsif ($args =~ s/^(struct|union)?\s*(\w+)(\*)?+\s(\w+)// && $inputIncFlag) {
+                    my $type = $2;
+                    my $varName = $4;
+                    $codeGear{$codeGearName}->{"var"}->{$varName} = "$type $inputCount";
+                    $inputCount++;
+                } elsif ($args =~ s/(.*,)//) {
+                } else {
+                    last;
+                }
+            }
+            $codeGear{$codeGearName}->{"input"} = $inputCount;
+            $codeGear{$codeGearName}->{"output"} = $outputCount;
+        }
+    }
+}
+
+sub generateStub {
+    my($fd,$prevCodeGearName,$dataGearName) = @_;
+    print $fd "__code ", $prevCodeGearName ,"_stub(struct Context* context) {\n";
+    print $fd $dataGearName;
+    print $fd "\n} \n\n";
+    return 1;
+}
+
+sub generateStubArgs {
+    my($codeGearName, $varName, $typeName, $ptrType, $typeField, $interface,$output) = @_;
+    my $varname1 = $output?"O_$varName":$varName;
+    for my $n ( @{$dataGearVar{$codeGearName}} ) {
+        # we already have it
+        return 0 if ( $n eq $varname1);
+    }
+    push @{$dataGearVar{$codeGearName}}, $varname1;
+    push @{$dataGearVarType{$codeGearName}}, $typeName;
+    if ($typeName eq $implementation) {
+        # get implementation
+        $dataGearName{$codeGearName} .= "\t$typeName* $varName = ($typeName*)GearImpl(context, $interface, $varName);\n";
+    } else {
+        # interface var
+        for my $ivar (keys %{$var{$interface}}) {
+            #  input data gear field
+            if ($varName eq $ivar) {
+                if ($typeName eq $var{$interface}->{$ivar}) {
+                    if ($output) {
+                        $dataGearName{$codeGearName} .= "\t$typeName$ptrType* O_$varName = &Gearef(context, $interface)->$varName;\n";
+                        $outputVar{$codeGearName} .= "\t$typeName$ptrType $varName  __attribute__((unused)) = *O_$varName;\n";
+                        return 1;
+                    }
+                    $dataGearName{$codeGearName} .= "\t$typeName$ptrType $varName = Gearef(context, $interface)->$varName;\n";
+                    return 1;
+                }
+            }
+        }
+
+        # interface continuation
+        for my $cName (keys %{$code{$interface}}) {
+            if ($varName eq $cName) {
+                # continuation field
+                $dataGearName{$codeGearName} .= "\tenum Code $varName = Gearef(context, $interface)->$varName;\n";
+                return 1;
+            }
+        }
+
+        # par goto  var
+        for my $var (keys %{$codeGear{$codeGearName}->{"var"}}) {
+            #  input data gear field
+            if ($varName eq $var) {
+                my ($type, $count) = split(/\s/, $codeGear{$codeGearName}->{"var"}->{$var});
+                if ($typeName eq $type) {
+                    if ($output) {
+                        $dataGearName{$codeGearName} .= "\t$typeName$ptrType* O_$varName = ($typeName $ptrType*)&context->data[context->odg + $count];\n";
+                        $outputVar{$codeGearName} .= "\t$typeName$ptrType $varName = *O_$varName;\n";
+                        return 1;
+                    }
+                    $dataGearName{$codeGearName} .= "\t$typeName$ptrType $varName = &context->data[context->idg + $count]->$typeName;\n";
+                    return 1;
+                }
+            }
+        }
+
+        # par goto continuation
+        for my $cName (keys %{$codeGear{$codeGearName}->{"code"}}) {
+            if ($varName eq $cName) {
+                # continuation field
+                $dataGearName{$codeGearName} .= "\tenum Code $varName = context->next;\n";
+                return 1;
+            }
+        }
+
+        # par goto continuation
+        # global or local variable case
+        if ($typeName eq "Code") {
+            $dataGearName{$codeGearName} .= "\tenum $typeName$ptrType $varName = Gearef(context, $interface)->$varName;\n";
+            return 1;
+        }
+        $dataGearName{$codeGearName} .= "\t$typeName$ptrType $varName = Gearef(context, $typeName);\n";
+        return 1;
+    }
+}
+
+sub generateDataGear {
+    my ($filename) = @_;
+    open my $in,"<",$filename or die("can't open $filename $!");
+
+    my $fn;
+    if ($opt_o) {
+        $fn = $opt_o;
+    } else {
+        my $fn1 = $filename;
+        $fn1 =~ s/\.cbc/.c/;
+        my $i = 1;
+        $fn = "$dir/$fn1";
+        while ( -f $fn) {
+            $fn = "$dir/$fn1.$i";
+            $i++;
+        }
+    }
+    if ( $fn =~ m=(.*)/[^/]+$= ) {
+        if (! -d $1) {
+            make_path $1;
+        }
+    }
+    open my $fd,">",$fn or die("can't write $fn $!");
+
+    my $prevCodeGearName;
+    my $inTypedef = 0;
+    my $inStub = 0;
+    my $hasParGoto = 0;
+    my $inMain = 0 ;
+    my %stub;
+    my $codeGearName;
+    my %localVarType;
+
+    while (<$in>) {
+        if (! $inTypedef && ! $inStub && ! $inMain) {
+            if (/^typedef struct (\w+) \{/) {
+                $inTypedef = 1;
+            } elsif (/^int main\((.*)\) \{/) {
+                $inMain = 1;
+            } elsif(/^#interface "(.*)"/) {
+                my $interfaceHeader = $1;
+                # #interface not write
+                next unless ($interfaceHeader =~ /context.h/);
+            } elsif (/^\_\_code (\w+)\((.*)\)(.*)/) {
+                %localVarType = {};
+                $codeGearName = $1;
+                my $args = $2;
+                my $tail = $3;
+                if ($codeGearName =~ /_stub$/) {
+                    # don't touch already existing stub
+                    $inStub = 1;
+                    $stub{$codeGearName} = 1;
+                    print $fd $_;
+                    next;
+                }
+                if (defined $prevCodeGearName) {
+                    # stub is generated just before next CodeGear
+                    if (defined $stub{$prevCodeGearName."_stub"}) {
+                        undef $prevCodeGearName;
+                    } else {
+                        &generateStub($fd,$prevCodeGearName,$dataGearName{$prevCodeGearName});
+                        $stub{$prevCodeGearName."_stub"} = 1;
+                    }
+                }
+                # analyzing CodeGear argument
+                #      these arguments are extract from current context's arugment DataGear Interface
+                #      and passed to the CodeGear
+                #      struct Implementaion needs special handling
+                #      __code next(...)   --->   enum Code next
+                $prevCodeGearName = $codeGearName;
+                $dataGearVar{$codeGearName} = [];
+                $outputVar{$codeGearName} = "";
+                $outputArgs{$codeGearName} = {};
+                my $newArgs = "struct Context *context,";
+                if ($args=~/^struct Context\s*\*\s*context/) {
+                    $newArgs = "";
+                }
+                if (!$args){
+                    $newArgs = "struct Context *context";
+                }
+                while($args) {
+                    if ($args =~ s/(^\s*,\s*)//) {
+                        $newArgs .= $1;
+                    }
+                    # continuation case
+                    if ($args =~ s/^(\s)*\_\_code\s+(\w+)\(([^)]*)\)//) {
+                        my $next = $2;
+                        my @args = split(/,/,$3);
+                        if (&generateStubArgs($codeGearName, $next, "Code", "", $next, $interface,0) ) {
+                            $newArgs .= "enum Code $next";
+                        }
+                        # analyze continuation arguments
+                        #    output arguments are defined in the Interface take the pointer of these
+                        #    output arguments are put into the Interface DataGear just before the goto
+                        for my $arg (@args) {
+                            $arg =~ s/^\s*//;
+                            last if ($arg =~ /\.\.\./);
+                            $arg =~ s/^(struct|union)?\s*(\w+)(\**)\s(\w+)//;
+                            my $structType = $1;
+                            my $typeName = $2;
+                            my $ptrType = $3;
+                            my $varName = $4;
+                            my $typeField = lcfirst($typeName);
+                            push(@{$outputArgs{$codeGearName}->{$next}}, $varName);
+                            if (&generateStubArgs($codeGearName, $varName, $typeName, $ptrType, $typeField, $interface,1)) {
+                                $newArgs .= ",$structType $typeName **O_$varName";
+                            }
+                        }
+                    } elsif ($args =~ s/^(struct|union)?\s*(\w+)(\**)\s(\w+)//) {
+                        my $structType = $1;
+                        my $typeName = $2;
+                        my $ptrType = $3;
+                        my $varName = $4;
+                        my $typeField = lcfirst($typeName);
+                        $newArgs .= $&;    # assuming no duplicate
+                        &generateStubArgs($codeGearName, $varName, $typeName, $ptrType, $typeField, $interface,0);
+                    } elsif ($args =~ s/(.*,)//) {
+                        $newArgs .= $1;
+                    } else {
+                        $newArgs .= $args;
+                        last;
+                    }
+                }
+                # generate goto statement from stub to the CodeGear in the buffer
+                $dataGearName{$codeGearName} .= "\tgoto $codeGearName(context";
+                for my $arg ( @{$dataGearVar{$codeGearName}}) {
+                    $dataGearName{$codeGearName} .= ", $arg";
+                }
+                $dataGearName{$codeGearName} .= ");";
+                # generate CodeGear header with new arguments
+                print $fd "__code $codeGearName($newArgs)$tail\n";
+                if ($outputVar{$codeGearName} ne "") {
+                    # output data var can be use before write
+                    # it should be initialze by gearef
+                    print $fd $outputVar{$codeGearName};
+                }
+                next;
+            } elsif (/^(.*)goto (\w+)\-\>(\w+)\((.*)\);/) {
+                # handling goto statement
+                # convert it to the meta call form with two arugments, that is context and enum Code
+                my $prev = $1;
+                my $next = $2;
+                my $method = $3;
+                my $tmpArgs = $4;
+                #$tmpArgs =~ s/\(.*\)/\(\)/;
+                my @args = split(/,/,$tmpArgs);
+                my @types = @{$dataGearVarType{$codeGearName}};
+                my $ntype;
+                my $ftype;
+                for my $v (@{$dataGearVar{$codeGearName}}) {
+                    my $t = shift @types;
+                    if ($v eq $next || $v eq "O_$next") {
+                        $ntype = $t;
+                        $ftype = lcfirst($ntype);
+                    }
+                }
+                if (!defined $ntype) {
+                    $ntype = $localVarType{$next};
+                    $ftype = lcfirst($ntype);
+                }
+                print $fd "\tGearef(context, $ntype)->$ftype = (union Data*) $next;\n";
+                # Put interface argument
+                my $prot = $code{$ntype}->{$method};
+                my $i = 1;
+                for my $arg (@args) {
+                    my $pType;
+                    my $pName;
+                    my $p = @$prot[$i];
+                    next if ($p eq $arg);
+                    $p =~ s/^(.*)\s(\w+)//;
+                    $pType = $1;
+                    $pName = $2;
+                    $arg =~ s/^(\s)*(\w+)/$2/;
+                    if ($pType =~ s/\_\_code$//) {
+                        if ($arg =~ /(\w+)\(.*\)/) {
+                            print $fd "\tGearef(context, $ntype)->$pName = $1;\n";
+                        } else {
+                            print $fd "\tGearef(context, $ntype)->$pName = C_$arg;\n";
+                        }
+                    } elsif ($pType =~ /Data\**$/){
+                        print $fd "\tGearef(context, $ntype)->$pName = (union $pType) $arg;\n";
+                    } else {
+                        print $fd "\tGearef(context, $ntype)->$pName = $arg;\n";
+                    }
+                    $i++;
+                }
+                print $fd "${prev}context->before = C_$codeGearName;\n";
+                print $fd "${prev}goto meta(context, $next->$method);\n";
+                next;
+            } elsif(/^(.*)par goto (\w+)\((.*)\);/) {
+                # handling par goto statement
+                # convert it to the parallel
+                my $prev = $1;
+                my $codeGearName = $2;
+                my $args = $3;
+                my $inputCount = $codeGear{$codeGearName}->{'input'};
+                my $outputCount = $codeGear{$codeGearName}->{'output'};
+                my @iterateCounts;
+                # parse examples 'par goto(.., iterate(10), exit);'
+                if ($args =~ /iterate\((.*)?\),/) {
+                    @iterateCounts = split(/,/,$1);;
+                    $inputCount--;
+                }
+                # replace iterate keyword
+                $args =~ s/iterate\((.*)?\),//;
+                my @dataGears = split(/,\s*/, $args);
+                my $nextCodeGear = pop(@dataGears);
+                if (! $hasParGoto) {
+                    $hasParGoto = 1;
+                    print $fd "${prev}struct Element* element;\n";
+                }
+                my $initTask = << "EOFEOF";
+                ${prev}context->task = NEW(struct Context);
+                ${prev}initContext(context->task);
+                ${prev}context->task->next = C_$codeGearName;
+                ${prev}context->task->idgCount = $inputCount;
+                ${prev}context->task->idg = context->task->dataNum;
+                ${prev}context->task->maxIdg = context->task->idg + $inputCount;
+                ${prev}context->task->odg = context->task->maxIdg;
+                ${prev}context->task->maxOdg = context->task->odg + $outputCount;
+EOFEOF
+                print $fd $initTask;
+                if (@iterateCounts) {
+                    print $fd "${prev}context->task->iterate = 0;\n";
+                    my $len = @iterateCounts;
+                    if ($len == 1) {
+                        print $fd "${prev}context->task->iterator = createMultiDimIterator(context, $iterateCounts[0], 1, 1);\n";
+                    } elsif ($len == 2) {
+                        print $fd "${prev}context->task->iterator = createMultiDimIterator(context, $iterateCounts[0], $iterateCounts[1], 1);\n";
+                    } elsif ($len == 3) {
+                        print $fd "${prev}context->task->iterator = createMultiDimIterator(context, $iterateCounts[0], $iterateCounts[1], $iterateCounts[2]);\n";
+                    }
+                }
+                for my $dataGear (@dataGears) {
+                    print $fd "${prev}GET_META($dataGear)->wait = createSynchronizedQueue(context);\n";
+                }
+                for my $i (0..$inputCount-1) {
+                    print $fd "${prev}context->task->data[context->task->idg+$i] = (union Data*)@dataGears[$i];\n";
+                }
+                for my $i (0..$outputCount-1) {
+                    print $fd "${prev}context->task->data[context->task->odg+$i] = (union Data*)@dataGears[$inputCount+$i];\n";
+                }
+                my $putTask = << "EOFEOF";
+                ${prev}element = &ALLOCATE(context, Element)->Element;
+                ${prev}element->data = (union Data*)context->task;
+                ${prev}element->next = context->taskList;
+                ${prev}context->taskList = element;
+EOFEOF
+                print $fd $putTask;
+                next;
+            } elsif (/^(.*)goto (\w+)\((.*)\);/) {
+                # handling goto statement
+                # convert it to the meta call form with two arugments, that is context and enum Code
+                my $prev = $1;
+                my $next = $2;
+                my @args = split(/,/, $3);
+                my $v = 0;
+                for my $n ( @{$dataGearVar{$codeGearName}} ) {
+                    # continuation arguments
+                    $v = 1  if ( $n eq $next);
+                }
+                if ($v || defined $code{$interface}->{$next}) {
+                    # write continuation's arguments into the interface arguments
+                    # we may need a commit for a shared DataGear
+                    for my $arg ( @{$outputArgs{$codeGearName}->{$next}} ) {
+                        my $v = shift(@args);
+                        print $fd "\t*O_$arg = $v;\n";
+                    }
+                    if ($hasParGoto) {
+                        print $fd "${prev}Gearef(context, TaskManager)->taskList = context->taskList;\n";
+                        print $fd "${prev}Gearef(context, TaskManager)->next1 = C_$next;\n";
+                        print $fd "${prev}goto meta(context, C_$next);\n";
+                    } else {
+                        print $fd "${prev}context->before = C_$codeGearName;\n";
+                        print $fd "${prev}goto meta(context, $next);\n";
+                    }
+                    next;
+                }
+                if ($hasParGoto) {
+                    print $fd "${prev}Gearef(context, TaskManager)->taskList = context->taskList;\n";
+                    print $fd "${prev}Gearef(context, TaskManager)->next1 = C_$next;\n";
+                    print $fd "${prev}goto parGotoMeta(context, C_$next);\n";
+                    next;
+                } elsif ($next eq "meta") {
+                    print $fd $_;
+                    next;
+                } else {
+                    print $fd "${prev}context->before = C_$codeGearName;\n";
+                    print $fd "${prev}goto meta(context, C_$next);\n";
+                    next;
+                }
+            } elsif(/^.*(struct|union)?\s(\w+)\*\s(\w+)\s?[=;]/) {
+                my $type    = $2;
+                my $varName = $3;
+                $localVarType{$varName} = $type;
+                s/new\s+(\w+)\(\)/\&ALLOCATE(context, \1)->\1/g;   # replacing new
+            } elsif(/^}/) {
+                $hasParGoto = 0;
+            } else {
+                s/new\s+(\w+)\(\)/\&ALLOCATE(context, \1)->\1/g;   # replacing new
+            }
+            # gather type name and type
+        } elsif ($inMain) {
+            if (/^(.*)goto start_code\(main_context\);/) {
+                print $fd $_;
+                next;
+            } elsif (/^(.*)goto (\w+)\((.*)\);/) {
+                my $prev = $1;
+                my $next = $2;
+                print $fd "${prev}struct Context* main_context = NEW(struct Context);\n";
+                print $fd "${prev}initContext(main_context);\n";
+                print $fd "${prev}main_context->next = C_$next;\n";
+                print $fd "${prev}goto start_code(main_context);\n";
+                next;
+            }
+        }
+        if (/^}/) {
+            $inStub = 0;
+            $inTypedef = 0;
+            $inMain = 0;
+        }
+        print $fd $_;
+    }
+    if (defined $prevCodeGearName) {
+        if (!defined $stub{$prevCodeGearName."_stub"}) {
+            $stub{$prevCodeGearName."_stub"} = &generateStub($fd,$prevCodeGearName,$dataGearName{$codeGearName});
+        }
+    }
+}
+
+# end
diff -r a4cab67624f7 -r 9146d6017f18 src/helper_cuda.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/helper_cuda.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,1088 @@
+/**
+ * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+////////////////////////////////////////////////////////////////////////////////
+// These are CUDA Helper functions for initialization and error checking
+
+#ifndef HELPER_CUDA_H
+#define HELPER_CUDA_H
+
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "helper_string.h"
+
+#ifndef EXIT_WAIVED
+#define EXIT_WAIVED 2
+#endif
+
+// Note, it is required that your SDK sample to include the proper header files, please
+// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
+// on which CUDA functions are used.
+
+// CUDA Runtime error messages
+#ifndef __DRIVER_TYPES_H__
+static const char *_cudaGetErrorEnum(cudaError_t error)
+{
+    switch (error)
+    {
+        case cudaSuccess:
+            return "cudaSuccess";
+
+        case cudaErrorMissingConfiguration:
+            return "cudaErrorMissingConfiguration";
+
+        case cudaErrorMemoryAllocation:
+            return "cudaErrorMemoryAllocation";
+
+        case cudaErrorInitializationError:
+            return "cudaErrorInitializationError";
+
+        case cudaErrorLaunchFailure:
+            return "cudaErrorLaunchFailure";
+
+        case cudaErrorPriorLaunchFailure:
+            return "cudaErrorPriorLaunchFailure";
+
+        case cudaErrorLaunchTimeout:
+            return "cudaErrorLaunchTimeout";
+
+        case cudaErrorLaunchOutOfResources:
+            return "cudaErrorLaunchOutOfResources";
+
+        case cudaErrorInvalidDeviceFunction:
+            return "cudaErrorInvalidDeviceFunction";
+
+        case cudaErrorInvalidConfiguration:
+            return "cudaErrorInvalidConfiguration";
+
+        case cudaErrorInvalidDevice:
+            return "cudaErrorInvalidDevice";
+
+        case cudaErrorInvalidValue:
+            return "cudaErrorInvalidValue";
+
+        case cudaErrorInvalidPitchValue:
+            return "cudaErrorInvalidPitchValue";
+
+        case cudaErrorInvalidSymbol:
+            return "cudaErrorInvalidSymbol";
+
+        case cudaErrorMapBufferObjectFailed:
+            return "cudaErrorMapBufferObjectFailed";
+
+        case cudaErrorUnmapBufferObjectFailed:
+            return "cudaErrorUnmapBufferObjectFailed";
+
+        case cudaErrorInvalidHostPointer:
+            return "cudaErrorInvalidHostPointer";
+
+        case cudaErrorInvalidDevicePointer:
+            return "cudaErrorInvalidDevicePointer";
+
+        case cudaErrorInvalidTexture:
+            return "cudaErrorInvalidTexture";
+
+        case cudaErrorInvalidTextureBinding:
+            return "cudaErrorInvalidTextureBinding";
+
+        case cudaErrorInvalidChannelDescriptor:
+            return "cudaErrorInvalidChannelDescriptor";
+
+        case cudaErrorInvalidMemcpyDirection:
+            return "cudaErrorInvalidMemcpyDirection";
+
+        case cudaErrorAddressOfConstant:
+            return "cudaErrorAddressOfConstant";
+
+        case cudaErrorTextureFetchFailed:
+            return "cudaErrorTextureFetchFailed";
+
+        case cudaErrorTextureNotBound:
+            return "cudaErrorTextureNotBound";
+
+        case cudaErrorSynchronizationError:
+            return "cudaErrorSynchronizationError";
+
+        case cudaErrorInvalidFilterSetting:
+            return "cudaErrorInvalidFilterSetting";
+
+        case cudaErrorInvalidNormSetting:
+            return "cudaErrorInvalidNormSetting";
+
+        case cudaErrorMixedDeviceExecution:
+            return "cudaErrorMixedDeviceExecution";
+
+        case cudaErrorCudartUnloading:
+            return "cudaErrorCudartUnloading";
+
+        case cudaErrorUnknown:
+            return "cudaErrorUnknown";
+
+        case cudaErrorNotYetImplemented:
+            return "cudaErrorNotYetImplemented";
+
+        case cudaErrorMemoryValueTooLarge:
+            return "cudaErrorMemoryValueTooLarge";
+
+        case cudaErrorInvalidResourceHandle:
+            return "cudaErrorInvalidResourceHandle";
+
+        case cudaErrorNotReady:
+            return "cudaErrorNotReady";
+
+        case cudaErrorInsufficientDriver:
+            return "cudaErrorInsufficientDriver";
+
+        case cudaErrorSetOnActiveProcess:
+            return "cudaErrorSetOnActiveProcess";
+
+        case cudaErrorInvalidSurface:
+            return "cudaErrorInvalidSurface";
+
+        case cudaErrorNoDevice:
+            return "cudaErrorNoDevice";
+
+        case cudaErrorECCUncorrectable:
+            return "cudaErrorECCUncorrectable";
+
+        case cudaErrorSharedObjectSymbolNotFound:
+            return "cudaErrorSharedObjectSymbolNotFound";
+
+        case cudaErrorSharedObjectInitFailed:
+            return "cudaErrorSharedObjectInitFailed";
+
+        case cudaErrorUnsupportedLimit:
+            return "cudaErrorUnsupportedLimit";
+
+        case cudaErrorDuplicateVariableName:
+            return "cudaErrorDuplicateVariableName";
+
+        case cudaErrorDuplicateTextureName:
+            return "cudaErrorDuplicateTextureName";
+
+        case cudaErrorDuplicateSurfaceName:
+            return "cudaErrorDuplicateSurfaceName";
+
+        case cudaErrorDevicesUnavailable:
+            return "cudaErrorDevicesUnavailable";
+
+        case cudaErrorInvalidKernelImage:
+            return "cudaErrorInvalidKernelImage";
+
+        case cudaErrorNoKernelImageForDevice:
+            return "cudaErrorNoKernelImageForDevice";
+
+        case cudaErrorIncompatibleDriverContext:
+            return "cudaErrorIncompatibleDriverContext";
+
+        case cudaErrorPeerAccessAlreadyEnabled:
+            return "cudaErrorPeerAccessAlreadyEnabled";
+
+        case cudaErrorPeerAccessNotEnabled:
+            return "cudaErrorPeerAccessNotEnabled";
+
+        case cudaErrorDeviceAlreadyInUse:
+            return "cudaErrorDeviceAlreadyInUse";
+
+        case cudaErrorProfilerDisabled:
+            return "cudaErrorProfilerDisabled";
+
+        case cudaErrorProfilerNotInitialized:
+            return "cudaErrorProfilerNotInitialized";
+
+        case cudaErrorProfilerAlreadyStarted:
+            return "cudaErrorProfilerAlreadyStarted";
+
+        case cudaErrorProfilerAlreadyStopped:
+            return "cudaErrorProfilerAlreadyStopped";
+
+        /* Since CUDA 4.0*/
+        case cudaErrorAssert:
+            return "cudaErrorAssert";
+
+        case cudaErrorTooManyPeers:
+            return "cudaErrorTooManyPeers";
+
+        case cudaErrorHostMemoryAlreadyRegistered:
+            return "cudaErrorHostMemoryAlreadyRegistered";
+
+        case cudaErrorHostMemoryNotRegistered:
+            return "cudaErrorHostMemoryNotRegistered";
+
+        /* Since CUDA 5.0 */
+        case cudaErrorOperatingSystem:
+            return "cudaErrorOperatingSystem";
+
+        case cudaErrorPeerAccessUnsupported:
+            return "cudaErrorPeerAccessUnsupported";
+
+        case cudaErrorLaunchMaxDepthExceeded:
+            return "cudaErrorLaunchMaxDepthExceeded";
+
+        case cudaErrorLaunchFileScopedTex:
+            return "cudaErrorLaunchFileScopedTex";
+
+        case cudaErrorLaunchFileScopedSurf:
+            return "cudaErrorLaunchFileScopedSurf";
+
+        case cudaErrorSyncDepthExceeded:
+            return "cudaErrorSyncDepthExceeded";
+
+        case cudaErrorLaunchPendingCountExceeded:
+            return "cudaErrorLaunchPendingCountExceeded";
+
+        case cudaErrorNotPermitted:
+            return "cudaErrorNotPermitted";
+
+        case cudaErrorNotSupported:
+            return "cudaErrorNotSupported";
+
+        /* Since CUDA 6.0 */
+        case cudaErrorHardwareStackError:
+            return "cudaErrorHardwareStackError";
+
+        case cudaErrorIllegalInstruction:
+            return "cudaErrorIllegalInstruction";
+
+        case cudaErrorMisalignedAddress:
+            return "cudaErrorMisalignedAddress";
+
+        case cudaErrorInvalidAddressSpace:
+            return "cudaErrorInvalidAddressSpace";
+
+        case cudaErrorInvalidPc:
+            return "cudaErrorInvalidPc";
+
+        case cudaErrorIllegalAddress:
+            return "cudaErrorIllegalAddress";
+
+        /* Since CUDA 6.5*/
+        case cudaErrorInvalidPtx:
+            return "cudaErrorInvalidPtx";
+
+        case cudaErrorInvalidGraphicsContext:
+            return "cudaErrorInvalidGraphicsContext";
+
+        case cudaErrorStartupFailure:
+            return "cudaErrorStartupFailure";
+
+        case cudaErrorApiFailureBase:
+            return "cudaErrorApiFailureBase";
+
+        /* Since CUDA 8.0*/        
+        case cudaErrorNvlinkUncorrectable :   
+            return "cudaErrorNvlinkUncorrectable";
+    }
+
+    return "<unknown>";
+}
+#else
+// CUDA Driver API errors
+static const char *_cudaGetErrorEnum(CUresult error)
+{
+    switch (error)
+    {
+        case CUDA_SUCCESS:
+            return "CUDA_SUCCESS";
+
+        case CUDA_ERROR_INVALID_VALUE:
+            return "CUDA_ERROR_INVALID_VALUE";
+
+        case CUDA_ERROR_OUT_OF_MEMORY:
+            return "CUDA_ERROR_OUT_OF_MEMORY";
+
+        case CUDA_ERROR_NOT_INITIALIZED:
+            return "CUDA_ERROR_NOT_INITIALIZED";
+
+        case CUDA_ERROR_DEINITIALIZED:
+            return "CUDA_ERROR_DEINITIALIZED";
+
+        case CUDA_ERROR_PROFILER_DISABLED:
+            return "CUDA_ERROR_PROFILER_DISABLED";
+
+        case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
+            return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
+
+        case CUDA_ERROR_PROFILER_ALREADY_STARTED:
+            return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
+
+        case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
+            return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
+
+        case CUDA_ERROR_NO_DEVICE:
+            return "CUDA_ERROR_NO_DEVICE";
+
+        case CUDA_ERROR_INVALID_DEVICE:
+            return "CUDA_ERROR_INVALID_DEVICE";
+
+        case CUDA_ERROR_INVALID_IMAGE:
+            return "CUDA_ERROR_INVALID_IMAGE";
+
+        case CUDA_ERROR_INVALID_CONTEXT:
+            return "CUDA_ERROR_INVALID_CONTEXT";
+
+        case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
+            return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
+
+        case CUDA_ERROR_MAP_FAILED:
+            return "CUDA_ERROR_MAP_FAILED";
+
+        case CUDA_ERROR_UNMAP_FAILED:
+            return "CUDA_ERROR_UNMAP_FAILED";
+
+        case CUDA_ERROR_ARRAY_IS_MAPPED:
+            return "CUDA_ERROR_ARRAY_IS_MAPPED";
+
+        case CUDA_ERROR_ALREADY_MAPPED:
+            return "CUDA_ERROR_ALREADY_MAPPED";
+
+        case CUDA_ERROR_NO_BINARY_FOR_GPU:
+            return "CUDA_ERROR_NO_BINARY_FOR_GPU";
+
+        case CUDA_ERROR_ALREADY_ACQUIRED:
+            return "CUDA_ERROR_ALREADY_ACQUIRED";
+
+        case CUDA_ERROR_NOT_MAPPED:
+            return "CUDA_ERROR_NOT_MAPPED";
+
+        case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
+            return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
+
+        case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
+            return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
+
+        case CUDA_ERROR_ECC_UNCORRECTABLE:
+            return "CUDA_ERROR_ECC_UNCORRECTABLE";
+
+        case CUDA_ERROR_UNSUPPORTED_LIMIT:
+            return "CUDA_ERROR_UNSUPPORTED_LIMIT";
+
+        case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
+            return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
+
+        case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
+            return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
+
+        case CUDA_ERROR_INVALID_PTX:
+            return "CUDA_ERROR_INVALID_PTX";
+
+        case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
+            return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
+
+        case CUDA_ERROR_NVLINK_UNCORRECTABLE:
+            return "CUDA_ERROR_NVLINK_UNCORRECTABLE";
+
+        case CUDA_ERROR_INVALID_SOURCE:
+            return "CUDA_ERROR_INVALID_SOURCE";
+
+        case CUDA_ERROR_FILE_NOT_FOUND:
+            return "CUDA_ERROR_FILE_NOT_FOUND";
+
+        case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
+            return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
+
+        case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
+            return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
+
+        case CUDA_ERROR_OPERATING_SYSTEM:
+            return "CUDA_ERROR_OPERATING_SYSTEM";
+
+        case CUDA_ERROR_INVALID_HANDLE:
+            return "CUDA_ERROR_INVALID_HANDLE";
+
+        case CUDA_ERROR_NOT_FOUND:
+            return "CUDA_ERROR_NOT_FOUND";
+
+        case CUDA_ERROR_NOT_READY:
+            return "CUDA_ERROR_NOT_READY";
+
+        case CUDA_ERROR_ILLEGAL_ADDRESS:
+            return "CUDA_ERROR_ILLEGAL_ADDRESS";
+
+        case CUDA_ERROR_LAUNCH_FAILED:
+            return "CUDA_ERROR_LAUNCH_FAILED";
+
+        case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
+            return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
+
+        case CUDA_ERROR_LAUNCH_TIMEOUT:
+            return "CUDA_ERROR_LAUNCH_TIMEOUT";
+
+        case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
+            return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
+
+        case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
+            return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
+
+        case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
+            return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
+
+        case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
+            return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
+
+        case CUDA_ERROR_CONTEXT_IS_DESTROYED:
+            return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
+
+        case CUDA_ERROR_ASSERT:
+            return "CUDA_ERROR_ASSERT";
+
+        case CUDA_ERROR_TOO_MANY_PEERS:
+            return "CUDA_ERROR_TOO_MANY_PEERS";
+
+        case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
+            return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
+
+        case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
+            return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
+
+        case CUDA_ERROR_HARDWARE_STACK_ERROR:
+            return "CUDA_ERROR_HARDWARE_STACK_ERROR";
+
+        case CUDA_ERROR_ILLEGAL_INSTRUCTION:
+            return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
+
+        case CUDA_ERROR_MISALIGNED_ADDRESS:
+            return "CUDA_ERROR_MISALIGNED_ADDRESS";
+
+        case CUDA_ERROR_INVALID_ADDRESS_SPACE:
+            return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
+
+        case CUDA_ERROR_INVALID_PC:
+            return "CUDA_ERROR_INVALID_PC";
+
+        case CUDA_ERROR_NOT_PERMITTED:
+            return "CUDA_ERROR_NOT_PERMITTED";
+
+        case CUDA_ERROR_NOT_SUPPORTED:
+            return "CUDA_ERROR_NOT_SUPPORTED";
+
+        case CUDA_ERROR_UNKNOWN:
+            return "CUDA_ERROR_UNKNOWN";
+    }
+
+    return "<unknown>";
+}
+#endif
+
+#ifdef CUBLAS_API_H_
+// cuBLAS API errors
+static const char *_cudaGetErrorEnum(cublasStatus_t error)
+{
+    switch (error)
+    {
+        case CUBLAS_STATUS_SUCCESS:
+            return "CUBLAS_STATUS_SUCCESS";
+
+        case CUBLAS_STATUS_NOT_INITIALIZED:
+            return "CUBLAS_STATUS_NOT_INITIALIZED";
+
+        case CUBLAS_STATUS_ALLOC_FAILED:
+            return "CUBLAS_STATUS_ALLOC_FAILED";
+
+        case CUBLAS_STATUS_INVALID_VALUE:
+            return "CUBLAS_STATUS_INVALID_VALUE";
+
+        case CUBLAS_STATUS_ARCH_MISMATCH:
+            return "CUBLAS_STATUS_ARCH_MISMATCH";
+
+        case CUBLAS_STATUS_MAPPING_ERROR:
+            return "CUBLAS_STATUS_MAPPING_ERROR";
+
+        case CUBLAS_STATUS_EXECUTION_FAILED:
+            return "CUBLAS_STATUS_EXECUTION_FAILED";
+
+        case CUBLAS_STATUS_INTERNAL_ERROR:
+            return "CUBLAS_STATUS_INTERNAL_ERROR";
+
+        case CUBLAS_STATUS_NOT_SUPPORTED:
+            return "CUBLAS_STATUS_NOT_SUPPORTED";
+
+        case CUBLAS_STATUS_LICENSE_ERROR:
+            return "CUBLAS_STATUS_LICENSE_ERROR";
+    }
+
+    return "<unknown>";
+}
+#endif
+
+#ifdef _CUFFT_H_
+// cuFFT API errors
+static const char *_cudaGetErrorEnum(cufftResult error)
+{
+    switch (error)
+    {
+        case CUFFT_SUCCESS:
+            return "CUFFT_SUCCESS";
+
+        case CUFFT_INVALID_PLAN:
+            return "CUFFT_INVALID_PLAN";
+
+        case CUFFT_ALLOC_FAILED:
+            return "CUFFT_ALLOC_FAILED";
+
+        case CUFFT_INVALID_TYPE:
+            return "CUFFT_INVALID_TYPE";
+
+        case CUFFT_INVALID_VALUE:
+            return "CUFFT_INVALID_VALUE";
+
+        case CUFFT_INTERNAL_ERROR:
+            return "CUFFT_INTERNAL_ERROR";
+
+        case CUFFT_EXEC_FAILED:
+            return "CUFFT_EXEC_FAILED";
+
+        case CUFFT_SETUP_FAILED:
+            return "CUFFT_SETUP_FAILED";
+
+        case CUFFT_INVALID_SIZE:
+            return "CUFFT_INVALID_SIZE";
+
+        case CUFFT_UNALIGNED_DATA:
+            return "CUFFT_UNALIGNED_DATA";
+
+        case CUFFT_INCOMPLETE_PARAMETER_LIST:
+            return "CUFFT_INCOMPLETE_PARAMETER_LIST";
+
+        case CUFFT_INVALID_DEVICE:
+            return "CUFFT_INVALID_DEVICE";
+
+        case CUFFT_PARSE_ERROR:
+            return "CUFFT_PARSE_ERROR";
+
+        case CUFFT_NO_WORKSPACE:
+            return "CUFFT_NO_WORKSPACE";
+
+        case CUFFT_NOT_IMPLEMENTED:
+            return "CUFFT_NOT_IMPLEMENTED";
+
+        case CUFFT_LICENSE_ERROR:
+            return "CUFFT_LICENSE_ERROR";
+
+        case CUFFT_NOT_SUPPORTED:
+            return "CUFFT_NOT_SUPPORTED";
+    }
+
+    return "<unknown>";
+}
+#endif
+
+
+#ifdef CUSPARSEAPI
+// cuSPARSE API errors
+static const char *_cudaGetErrorEnum(cusparseStatus_t error)
+{
+    switch (error)
+    {
+        case CUSPARSE_STATUS_SUCCESS:
+            return "CUSPARSE_STATUS_SUCCESS";
+
+        case CUSPARSE_STATUS_NOT_INITIALIZED:
+            return "CUSPARSE_STATUS_NOT_INITIALIZED";
+
+        case CUSPARSE_STATUS_ALLOC_FAILED:
+            return "CUSPARSE_STATUS_ALLOC_FAILED";
+
+        case CUSPARSE_STATUS_INVALID_VALUE:
+            return "CUSPARSE_STATUS_INVALID_VALUE";
+
+        case CUSPARSE_STATUS_ARCH_MISMATCH:
+            return "CUSPARSE_STATUS_ARCH_MISMATCH";
+
+        case CUSPARSE_STATUS_MAPPING_ERROR:
+            return "CUSPARSE_STATUS_MAPPING_ERROR";
+
+        case CUSPARSE_STATUS_EXECUTION_FAILED:
+            return "CUSPARSE_STATUS_EXECUTION_FAILED";
+
+        case CUSPARSE_STATUS_INTERNAL_ERROR:
+            return "CUSPARSE_STATUS_INTERNAL_ERROR";
+
+        case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
+            return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
+    }
+
+    return "<unknown>";
+}
+#endif
+
+#ifdef CUSOLVER_COMMON_H_
+//cuSOLVER API errors
+static const char *_cudaGetErrorEnum(cusolverStatus_t error)
+{
+   switch(error)
+   {
+       case CUSOLVER_STATUS_SUCCESS:
+           return "CUSOLVER_STATUS_SUCCESS";
+       case CUSOLVER_STATUS_NOT_INITIALIZED:
+           return "CUSOLVER_STATUS_NOT_INITIALIZED";
+       case CUSOLVER_STATUS_ALLOC_FAILED:
+           return "CUSOLVER_STATUS_ALLOC_FAILED";
+       case CUSOLVER_STATUS_INVALID_VALUE:
+           return "CUSOLVER_STATUS_INVALID_VALUE";
+       case CUSOLVER_STATUS_ARCH_MISMATCH:
+           return "CUSOLVER_STATUS_ARCH_MISMATCH";
+       case CUSOLVER_STATUS_MAPPING_ERROR:
+           return "CUSOLVER_STATUS_MAPPING_ERROR";
+       case CUSOLVER_STATUS_EXECUTION_FAILED:
+           return "CUSOLVER_STATUS_EXECUTION_FAILED";
+       case CUSOLVER_STATUS_INTERNAL_ERROR:
+           return "CUSOLVER_STATUS_INTERNAL_ERROR";
+       case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
+           return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
+       case CUSOLVER_STATUS_NOT_SUPPORTED :
+           return "CUSOLVER_STATUS_NOT_SUPPORTED ";
+       case CUSOLVER_STATUS_ZERO_PIVOT:
+           return "CUSOLVER_STATUS_ZERO_PIVOT";
+       case CUSOLVER_STATUS_INVALID_LICENSE:
+           return "CUSOLVER_STATUS_INVALID_LICENSE";
+    }
+
+    return "<unknown>";
+
+}
+#endif
+
+#ifdef CURAND_H_
+// cuRAND API errors
+static const char *_cudaGetErrorEnum(curandStatus_t error)
+{
+    switch (error)
+    {
+        case CURAND_STATUS_SUCCESS:
+            return "CURAND_STATUS_SUCCESS";
+
+        case CURAND_STATUS_VERSION_MISMATCH:
+            return "CURAND_STATUS_VERSION_MISMATCH";
+
+        case CURAND_STATUS_NOT_INITIALIZED:
+            return "CURAND_STATUS_NOT_INITIALIZED";
+
+        case CURAND_STATUS_ALLOCATION_FAILED:
+            return "CURAND_STATUS_ALLOCATION_FAILED";
+
+        case CURAND_STATUS_TYPE_ERROR:
+            return "CURAND_STATUS_TYPE_ERROR";
+
+        case CURAND_STATUS_OUT_OF_RANGE:
+            return "CURAND_STATUS_OUT_OF_RANGE";
+
+        case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
+            return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
+
+        case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
+            return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
+
+        case CURAND_STATUS_LAUNCH_FAILURE:
+            return "CURAND_STATUS_LAUNCH_FAILURE";
+
+        case CURAND_STATUS_PREEXISTING_FAILURE:
+            return "CURAND_STATUS_PREEXISTING_FAILURE";
+
+        case CURAND_STATUS_INITIALIZATION_FAILED:
+            return "CURAND_STATUS_INITIALIZATION_FAILED";
+
+        case CURAND_STATUS_ARCH_MISMATCH:
+            return "CURAND_STATUS_ARCH_MISMATCH";
+
+        case CURAND_STATUS_INTERNAL_ERROR:
+            return "CURAND_STATUS_INTERNAL_ERROR";
+    }
+
+    return "<unknown>";
+}
+#endif
+
+#ifdef NV_NPPIDEFS_H
+// NPP API errors
+static const char *_cudaGetErrorEnum(NppStatus error)
+{
+    switch (error)
+    {
+        case NPP_NOT_SUPPORTED_MODE_ERROR:
+            return "NPP_NOT_SUPPORTED_MODE_ERROR";
+
+        case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
+            return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
+
+        case NPP_RESIZE_NO_OPERATION_ERROR:
+            return "NPP_RESIZE_NO_OPERATION_ERROR";
+
+        case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
+            return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
+
+#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
+
+        case NPP_BAD_ARG_ERROR:
+            return "NPP_BAD_ARGUMENT_ERROR";
+
+        case NPP_COEFF_ERROR:
+            return "NPP_COEFFICIENT_ERROR";
+
+        case NPP_RECT_ERROR:
+            return "NPP_RECTANGLE_ERROR";
+
+        case NPP_QUAD_ERROR:
+            return "NPP_QUADRANGLE_ERROR";
+
+        case NPP_MEM_ALLOC_ERR:
+            return "NPP_MEMORY_ALLOCATION_ERROR";
+
+        case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
+            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
+
+        case NPP_INVALID_INPUT:
+            return "NPP_INVALID_INPUT";
+
+        case NPP_POINTER_ERROR:
+            return "NPP_POINTER_ERROR";
+
+        case NPP_WARNING:
+            return "NPP_WARNING";
+
+        case NPP_ODD_ROI_WARNING:
+            return "NPP_ODD_ROI_WARNING";
+#else
+
+            // These are for CUDA 5.5 or higher
+        case NPP_BAD_ARGUMENT_ERROR:
+            return "NPP_BAD_ARGUMENT_ERROR";
+
+        case NPP_COEFFICIENT_ERROR:
+            return "NPP_COEFFICIENT_ERROR";
+
+        case NPP_RECTANGLE_ERROR:
+            return "NPP_RECTANGLE_ERROR";
+
+        case NPP_QUADRANGLE_ERROR:
+            return "NPP_QUADRANGLE_ERROR";
+
+        case NPP_MEMORY_ALLOCATION_ERR:
+            return "NPP_MEMORY_ALLOCATION_ERROR";
+
+        case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
+            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
+
+        case NPP_INVALID_HOST_POINTER_ERROR:
+            return "NPP_INVALID_HOST_POINTER_ERROR";
+
+        case NPP_INVALID_DEVICE_POINTER_ERROR:
+            return "NPP_INVALID_DEVICE_POINTER_ERROR";
+#endif
+
+        case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
+            return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
+
+        case NPP_TEXTURE_BIND_ERROR:
+            return "NPP_TEXTURE_BIND_ERROR";
+
+        case NPP_WRONG_INTERSECTION_ROI_ERROR:
+            return "NPP_WRONG_INTERSECTION_ROI_ERROR";
+
+        case NPP_NOT_EVEN_STEP_ERROR:
+            return "NPP_NOT_EVEN_STEP_ERROR";
+
+        case NPP_INTERPOLATION_ERROR:
+            return "NPP_INTERPOLATION_ERROR";
+
+        case NPP_RESIZE_FACTOR_ERROR:
+            return "NPP_RESIZE_FACTOR_ERROR";
+
+        case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
+            return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
+
+
+#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
+
+        case NPP_MEMFREE_ERR:
+            return "NPP_MEMFREE_ERR";
+
+        case NPP_MEMSET_ERR:
+            return "NPP_MEMSET_ERR";
+
+        case NPP_MEMCPY_ERR:
+            return "NPP_MEMCPY_ERROR";
+
+        case NPP_MIRROR_FLIP_ERR:
+            return "NPP_MIRROR_FLIP_ERR";
+#else
+
+        case NPP_MEMFREE_ERROR:
+            return "NPP_MEMFREE_ERROR";
+
+        case NPP_MEMSET_ERROR:
+            return "NPP_MEMSET_ERROR";
+
+        case NPP_MEMCPY_ERROR:
+            return "NPP_MEMCPY_ERROR";
+
+        case NPP_MIRROR_FLIP_ERROR:
+            return "NPP_MIRROR_FLIP_ERROR";
+#endif
+
+        case NPP_ALIGNMENT_ERROR:
+            return "NPP_ALIGNMENT_ERROR";
+
+        case NPP_STEP_ERROR:
+            return "NPP_STEP_ERROR";
+
+        case NPP_SIZE_ERROR:
+            return "NPP_SIZE_ERROR";
+
+        case NPP_NULL_POINTER_ERROR:
+            return "NPP_NULL_POINTER_ERROR";
+
+        case NPP_CUDA_KERNEL_EXECUTION_ERROR:
+            return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
+
+        case NPP_NOT_IMPLEMENTED_ERROR:
+            return "NPP_NOT_IMPLEMENTED_ERROR";
+
+        case NPP_ERROR:
+            return "NPP_ERROR";
+
+        case NPP_SUCCESS:
+            return "NPP_SUCCESS";
+
+        case NPP_WRONG_INTERSECTION_QUAD_WARNING:
+            return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
+
+        case NPP_MISALIGNED_DST_ROI_WARNING:
+            return "NPP_MISALIGNED_DST_ROI_WARNING";
+
+        case NPP_AFFINE_QUAD_INCORRECT_WARNING:
+            return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
+
+        case NPP_DOUBLE_SIZE_WARNING:
+            return "NPP_DOUBLE_SIZE_WARNING";
+
+        case NPP_WRONG_INTERSECTION_ROI_WARNING:
+            return "NPP_WRONG_INTERSECTION_ROI_WARNING";
+
+#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
+        /* These are 6.0 or higher */
+        case NPP_LUT_PALETTE_BITSIZE_ERROR:
+            return "NPP_LUT_PALETTE_BITSIZE_ERROR";
+
+        case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
+            return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
+
+        case NPP_QUALITY_INDEX_ERROR:
+            return "NPP_QUALITY_INDEX_ERROR";
+
+        case NPP_CHANNEL_ORDER_ERROR:
+            return "NPP_CHANNEL_ORDER_ERROR";
+
+        case NPP_ZERO_MASK_VALUE_ERROR:
+            return "NPP_ZERO_MASK_VALUE_ERROR";
+
+        case NPP_NUMBER_OF_CHANNELS_ERROR:
+            return "NPP_NUMBER_OF_CHANNELS_ERROR";
+
+        case NPP_COI_ERROR:
+            return "NPP_COI_ERROR";
+
+        case NPP_DIVISOR_ERROR:
+            return "NPP_DIVISOR_ERROR";
+
+        case NPP_CHANNEL_ERROR:
+            return "NPP_CHANNEL_ERROR";
+
+        case NPP_STRIDE_ERROR:
+            return "NPP_STRIDE_ERROR";
+
+        case NPP_ANCHOR_ERROR:
+            return "NPP_ANCHOR_ERROR";
+
+        case NPP_MASK_SIZE_ERROR:
+            return "NPP_MASK_SIZE_ERROR";
+
+        case NPP_MOMENT_00_ZERO_ERROR:
+            return "NPP_MOMENT_00_ZERO_ERROR";
+
+        case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
+            return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
+
+        case NPP_THRESHOLD_ERROR:
+            return "NPP_THRESHOLD_ERROR";
+
+        case NPP_CONTEXT_MATCH_ERROR:
+            return "NPP_CONTEXT_MATCH_ERROR";
+
+        case NPP_FFT_FLAG_ERROR:
+            return "NPP_FFT_FLAG_ERROR";
+
+        case NPP_FFT_ORDER_ERROR:
+            return "NPP_FFT_ORDER_ERROR";
+
+        case NPP_SCALE_RANGE_ERROR:
+            return "NPP_SCALE_RANGE_ERROR";
+
+        case NPP_DATA_TYPE_ERROR:
+            return "NPP_DATA_TYPE_ERROR";
+
+        case NPP_OUT_OFF_RANGE_ERROR:
+            return "NPP_OUT_OFF_RANGE_ERROR";
+
+        case NPP_DIVIDE_BY_ZERO_ERROR:
+            return "NPP_DIVIDE_BY_ZERO_ERROR";
+
+        case NPP_RANGE_ERROR:
+            return "NPP_RANGE_ERROR";
+
+        case NPP_NO_MEMORY_ERROR:
+            return "NPP_NO_MEMORY_ERROR";
+
+        case NPP_ERROR_RESERVED:
+            return "NPP_ERROR_RESERVED";
+
+        case NPP_NO_OPERATION_WARNING:
+            return "NPP_NO_OPERATION_WARNING";
+
+        case NPP_DIVIDE_BY_ZERO_WARNING:
+            return "NPP_DIVIDE_BY_ZERO_WARNING";
+#endif
+
+#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
+        /* These are 7.0 or higher */
+        case NPP_OVERFLOW_ERROR:
+            return "NPP_OVERFLOW_ERROR";
+
+        case NPP_CORRUPTED_DATA_ERROR:
+            return "NPP_CORRUPTED_DATA_ERROR";
+#endif
+    }
+
+    return "<unknown>";
+}
+#endif
+
+#ifdef __DRIVER_TYPES_H__
+#ifndef DEVICE_RESET
+#define DEVICE_RESET cudaDeviceReset();
+#endif
+#else
+#ifndef DEVICE_RESET
+#define DEVICE_RESET
+#endif
+#endif
+
+#ifdef __DRIVER_TYPES_H__
+static inline void check(CUresult result, char const *const func, const char *const file, int const line)
+{
+    if (result)
+    {
+        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
+                file, line, (unsigned int)(result), _cudaGetErrorEnum(result), func);
+        DEVICE_RESET
+        // Make sure we call CUDA Device Reset before exiting
+        exit(EXIT_FAILURE);
+    }
+}
+#else
+static inline void check(cudaError_t result, char const *const func, const char *const file, int const line)
+{
+    if (result)
+    {
+        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
+                file, line, (unsigned int)(result), _cudaGetErrorEnum(result), func);
+        DEVICE_RESET
+        // Make sure we call CUDA Device Reset before exiting
+        exit(EXIT_FAILURE);
+    }
+}
+#endif
+
+#ifdef __DRIVER_TYPES_H__
+// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
+#define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ )
+
+// This will output the proper error string when calling cudaGetLastError
+#define getLastCudaError(msg)      __getLastCudaError (msg, __FILE__, __LINE__)
+
+inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
+{
+    cudaError_t err = cudaGetLastError();
+
+    if (cudaSuccess != err)
+    {
+        fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
+                file, line, errorMessage, (int)err, cudaGetErrorString(err));
+        DEVICE_RESET
+        exit(EXIT_FAILURE);
+    }
+}
+#endif
+
+#ifndef MAX
+#define MAX(a,b) (a > b ? a : b)
+#endif
+
+// Float To Int conversion
+inline int ftoi(float value)
+{
+    return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
+}
+
+// Beginning of GPU Architecture definitions
+inline int _ConvertSMVer2Cores(int major, int minor)
+{
+    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
+    typedef struct sSMtoCores
+    {
+        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
+        int Cores;
+    } sSMtoCores;
+
+    sSMtoCores nGpuArchCoresPerSM[] =
+    {
+        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
+        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
+        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
+        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
+        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
+        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
+        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
+        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
+        { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class
+        { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class
+        { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class
+        { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class
+        {   -1, -1 }
+    };
+
+    int index = 0;
+
+    while (nGpuArchCoresPerSM[index].SM != -1)
+    {
+        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
+        {
+            return nGpuArchCoresPerSM[index].Cores;
+        }
+
+        index++;
+    }
+
+    // If we don't find the values, we default use the previous one to run properly
+    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
+    return nGpuArchCoresPerSM[index-1].Cores;
+}
+// end of GPU Architecture definitions
+
+
+// end of CUDA Helper Functions
+
+
+#endif
diff -r a4cab67624f7 -r 9146d6017f18 src/helper_string.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/helper_string.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,264 @@
+/**
+ * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
+ *
+ * Please refer to the NVIDIA end user license agreement (EULA) associated
+ * with this source code for terms and conditions that govern your use of
+ * this software. Any use, reproduction, disclosure, or distribution of
+ * this software and related documentation outside the terms of the EULA
+ * is strictly prohibited.
+ *
+ */
+
+// These are helper functions for the SDK samples (string parsing, timers, etc)
+#ifndef STRING_HELPER_H
+#define STRING_HELPER_H
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+#ifndef _CRT_SECURE_NO_DEPRECATE
+#define _CRT_SECURE_NO_DEPRECATE
+#endif
+#ifndef STRCASECMP
+#define STRCASECMP  _stricmp
+#endif
+#ifndef STRNCASECMP
+#define STRNCASECMP _strnicmp
+#endif
+#ifndef STRCPY
+#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
+#endif
+
+#ifndef FOPEN
+#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
+#endif
+#ifndef FOPEN_FAIL
+#define FOPEN_FAIL(result) (result != 0)
+#endif
+#ifndef SSCANF
+#define SSCANF sscanf_s
+#endif
+#ifndef SPRINTF
+#define SPRINTF sprintf_s
+#endif
+#else // Linux Includes
+#include <string.h>
+#include <strings.h>
+
+#ifndef STRCASECMP
+#define STRCASECMP  strcasecmp
+#endif
+#ifndef STRNCASECMP
+#define STRNCASECMP strncasecmp
+#endif
+#ifndef STRCPY
+#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
+#endif
+
+#ifndef FOPEN
+#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
+#endif
+#ifndef FOPEN_FAIL
+#define FOPEN_FAIL(result) (result == NULL)
+#endif
+#ifndef SSCANF
+#define SSCANF sscanf
+#endif
+#ifndef SPRINTF
+#define SPRINTF sprintf
+#endif
+#endif
+
+#ifndef EXIT_WAIVED
+#define EXIT_WAIVED 2
+#endif
+
+#ifndef bool
+typedef int bool;
+#define false 0
+#define true 1
+#endif
+
+// CUDA Utility Helper Functions
+inline int stringRemoveDelimiter(char delimiter, const char *string)
+{
+    int string_start = 0;
+
+    while (string[string_start] == delimiter)
+    {
+        string_start++;
+    }
+
+    if (string_start >= (int)strlen(string)-1)
+    {
+        return 0;
+    }
+
+    return string_start;
+}
+
+inline int getFileExtension(char *filename, char **extension)
+{
+    int string_length = (int)strlen(filename);
+
+    while (filename[string_length--] != '.')
+    {
+        if (string_length == 0)
+            break;
+    }
+
+    if (string_length > 0) string_length += 2;
+
+    if (string_length == 0)
+        *extension = NULL;
+    else
+        *extension = &filename[string_length];
+
+    return string_length;
+}
+
+
+inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
+{
+    bool bFound = false;
+
+    if (argc >= 1)
+    {
+        for (int i=1; i < argc; i++)
+        {
+            int string_start = stringRemoveDelimiter('-', argv[i]);
+            const char *string_argv = &argv[i][string_start];
+
+            const char *equal_pos = strchr(string_argv, '=');
+            int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
+
+            int length = (int)strlen(string_ref);
+
+            if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
+            {
+                bFound = true;
+                continue;
+            }
+        }
+    }
+
+    return bFound;
+}
+
+
+inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
+{
+    bool bFound = false;
+    int value = -1;
+
+    if (argc >= 1)
+    {
+        for (int i=1; i < argc; i++)
+        {
+            int string_start = stringRemoveDelimiter('-', argv[i]);
+            const char *string_argv = &argv[i][string_start];
+            int length = (int)strlen(string_ref);
+
+            if (!STRNCASECMP(string_argv, string_ref, length))
+            {
+                if (length+1 <= (int)strlen(string_argv))
+                {
+                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
+                    value = atoi(&string_argv[length + auto_inc]);
+                }
+                else
+                {
+                    value = 0;
+                }
+
+                bFound = true;
+                continue;
+            }
+        }
+    }
+
+    if (bFound)
+    {
+        return value;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
+{
+    bool bFound = false;
+    float value = -1;
+
+    if (argc >= 1)
+    {
+        for (int i=1; i < argc; i++)
+        {
+            int string_start = stringRemoveDelimiter('-', argv[i]);
+            const char *string_argv = &argv[i][string_start];
+            int length = (int)strlen(string_ref);
+
+            if (!STRNCASECMP(string_argv, string_ref, length))
+            {
+                if (length+1 <= (int)strlen(string_argv))
+                {
+                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
+                    value = (float)atof(&string_argv[length + auto_inc]);
+                }
+                else
+                {
+                    value = 0.f;
+                }
+
+                bFound = true;
+                continue;
+            }
+        }
+    }
+
+    if (bFound)
+    {
+        return value;
+    }
+    else
+    {
+        return 0;
+    }
+}
+
+inline bool getCmdLineArgumentString(const int argc, const char **argv,
+                                     const char *string_ref, char **string_retval)
+{
+    bool bFound = false;
+
+    if (argc >= 1)
+    {
+        for (int i=1; i < argc; i++)
+        {
+            int string_start = stringRemoveDelimiter('-', argv[i]);
+            char *string_argv = (char *)&argv[i][string_start];
+            int length = (int)strlen(string_ref);
+
+            if (!STRNCASECMP(string_argv, string_ref, length))
+            {
+                *string_retval = &string_argv[length+1];
+                bFound = true;
+                continue;
+            }
+        }
+    }
+
+    if (!bFound)
+    {
+        *string_retval = NULL;
+    }
+
+    return bFound;
+}
+
+
+#endif
diff -r a4cab67624f7 -r 9146d6017f18 src/lib/Gears/Context.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/Gears/Context.pm	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,191 @@
+package Gears::Context;
+
+use strict;
+use warnings;
+
+use Gears::Util;
+use Gears::Context::Template;
+use Carp qw/croak/;
+
+sub new {
+  my ($class, %args) = @_;
+  my $self = {
+    data_gears_with_count => {},
+    find_root => $args{find_root} // ".",
+    output   => $args{output},
+  };
+
+  if ($args{compile_sources}) {
+    $self->{compile_sources} = $args{compile_sources};
+    map { Gears::Util->file_checking($_); } @{$self->{compile_sources}};
+  }
+
+  return bless $self, $class;
+}
+
+
+sub extraction_dg_compile_sources {
+  my $self = shift;
+  my %counter;
+  for my $cbc_file (@{$self->{compile_sources}}) {
+    open my $fh , '<', $cbc_file;
+    while (my $line = <$fh>) {
+       if ($line =~ /#interface\s*"(.*)\.h"/ || $line =~ /^\/\/\s*use\s*"(.*)\.h"/) {
+          $self->{data_gears_with_count}->{$1}->{caller}->{$cbc_file}++;
+          $counter{interfaces}->{$1}++;
+          next;
+       }
+
+       if ($line =~ /^(\w+)(\*)+  *create(\w+)\(([^]]*)\)/) {
+          my $interface = $1;
+          my $implementation = $3;
+          $self->{data_gears_with_count}->{$interface}->{caller}->{$cbc_file}++;
+          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
+          $counter{interfaces}->{$interface}++;
+          $counter{impl}->{$implementation}++;
+          next;
+       }
+
+       if ($line =~ /Gearef\(context,\s*(\w+)\)/) {
+          my $implementation = $1;
+          $counter{impl}->{$implementation}++;
+          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
+          next;
+       }
+
+       if ($line =~ /ALLOCATE_(?:PTR_)?ARRAY\(context,\s*(\w+),[\s\w]+\)/) {
+          my $implementation = $1;
+          $counter{impl}->{$implementation}++;
+          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
+          next;
+       }
+
+       if ($line =~ /ALLOCATE_DATA_GEAR\((\w+),\s*(\w+)\)/) {
+          my $implementation = $2;
+          $counter{impl}->{$implementation}++;
+          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
+          next;
+       }
+
+       #TaskManagerImpl* taskManager = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
+       if ($line =~ /\((\w+)\*\)GearImpl\(context,\s*(\w+),\s*(\w+)\)/) {
+          my $interface = $2;
+          my $implementation = $1;
+          $self->{data_gears_with_count}->{$interface}->{caller}->{$cbc_file}++;
+          $counter{interfaces}->{$interface}++;
+          $counter{impl}->{$implementation}++;
+          next;
+       }
+
+       if ($line =~ /__code/) {
+         while ($line =~ /struct (\w+)*/g) {
+           next if $1 eq "Context";
+           $self->{data_gears_with_count}->{$1}->{caller}->{$cbc_file}++;
+           next if (exists $counter{interfaces}->{$1});
+           $counter{impl}->{$1}++;
+         }
+       }
+    }
+    close $fh;
+  }
+  $counter{interfaces}->{Meta}++;
+  $self->{data_gears_with_count}->{Meta}++;
+  return \%counter;
+}
+
+sub set_data_gear_header_path {
+  my $self = shift;
+  my @data_gears_name;
+  if (@_) {
+    @data_gears_name = @_;
+  } else {
+    map { push (@data_gears_name,$_) if $_ ne "Context" } keys %{$self->{data_gears_with_count}};
+  }
+  return _find_headers($self->{find_root},\@data_gears_name);
+}
+
+sub update_dg_each_header_path {
+  my ($self, $dgs, $dg2path) = @_;
+  my $new_dgs;
+  for my $kind (keys %$dgs) {
+    for my $dg_name (keys %{$dgs->{$kind}}) {
+      if ($dg2path->{$dg_name}) {
+        $new_dgs->{$kind}->{$dg_name} = $dg2path->{$dg_name};
+      } else {
+        croak "failed trans header $dg_name\n";
+      }
+    }
+  }
+  return $new_dgs;
+}
+
+sub tree2create_context_h {
+  my ($self, $dg2path) = @_;
+  my $data_struct_str = $self->tree2data_struct_str($dg2path);
+
+  my $output = $self->_select_output();
+  Gears::Context::Template->emit_top_header($output);
+  Gears::Context::Template->emit_data_gears($output,$data_struct_str);
+  Gears::Context::Template->emit_last_header($output);
+  close $output;
+}
+
+sub _select_output {
+  my $self = shift;
+  print "$self->{output}\n";
+  if ($self->{output} eq  'stdout') {
+    return *STDOUT;
+  }
+  open my $fh, '>', $self->{output};
+  return $fh;
+}
+
+sub tree2data_struct_str {
+  my ($self, $dg_str) = @_;
+  my $data_struct_str  = "";
+  for my $interface (sort keys %$dg_str) {
+    $data_struct_str .= Gears::Util->h2context_str($dg_str->{$interface}->{elem});
+    next unless ($dg_str->{$interface}->{impl});
+    for my $impl (sort keys %{$dg_str->{$interface}->{impl}}) {
+       $data_struct_str .= Gears::Util->h2context_str($dg_str->{$interface}->{impl}->{$impl});
+    }
+  }
+  return $data_struct_str;
+}
+
+sub createImplTree_from_header {
+  my ($self, $dg2path) = @_;
+  my %dg_str = ();
+
+  my $inters = $dg2path->{interfaces};
+  my $impls = $dg2path->{impl};
+  map { $dg_str{$_}->{elem} = Gears::Util->parse_interface($inters->{$_}) } keys %$inters;
+
+  map {
+    my $res = Gears::Util->parse($impls->{$_});
+    if ($res->{isa}) {
+        $dg_str{$res->{isa}}->{impl}->{$_} = $res;
+    } else {
+        $dg_str{$_}->{elem} = $res;
+    }
+  } keys %$impls;
+  return \%dg_str;
+}
+
+sub _find_headers {
+  my ($search_bash_path, $targets) = @_;
+  my %res;
+  map { $res{$_}++ } @$targets;
+
+  my $header_paths = Gears::Util->find_headers_path($search_bash_path);
+  map {
+    /(\w+)\.h/;
+    my $header_tile = $1;
+    if (exists $res{$header_tile}){
+      $res{$header_tile} = $_;
+    }
+  } @$header_paths;
+  return \%res;
+}
+
+1;
diff -r a4cab67624f7 -r 9146d6017f18 src/lib/Gears/Context/Template.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/Gears/Context/Template.pm	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,171 @@
+package Gears::Context::Template;
+use strict;
+use warnings;
+
+sub emit_top_header {
+  my ($class, $out) = @_;
+my $str =  << 'EOFEOF';
+/* Context definition for llrb example */
+#ifndef CONTEXT_H
+#define CONTEXT_H
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef USE_CUDAWorker
+#include <cuda.h>
+#include <driver_types.h>
+#include <cuda_runtime.h>
+#include "helper_cuda.h"
+#endif
+
+#define ALLOCATE_SIZE 20000000
+#define NEW(type) (type*)(calloc(1, sizeof(type)))
+#define NEWN(n, type) (type*)(calloc(n, sizeof(type)))
+
+#define ALLOC_DATA(context, dseg) ({\
+    Meta* meta = (Meta*)context->heap;\
+    meta->type = D_##dseg;\
+    meta->size = sizeof(dseg);\
+    meta->len = 1;\
+    context->heap += sizeof(Meta);\
+    context->data[D_##dseg] = context->heap; context->heap += sizeof(dseg); (dseg *)context->data[D_##dseg]; })
+
+#define ALLOC_DATA_TYPE(context, dseg, t) ({\
+    Meta* meta = (Meta*)context->heap;\
+    meta->type = D_##t;\
+    meta->size = sizeof(t);\
+    meta->len = 1;\
+    context->heap += sizeof(Meta);\
+    context->data[D_##dseg] = context->heap; context->heap += sizeof(t); (t *)context->data[D_##dseg]; })
+
+#define ALLOCATE(context, t) ({ \
+    Meta* meta = (Meta*)context->heap;\
+    context->heap += sizeof(Meta);\
+    union Data* data = context->heap; \
+    context->heap += sizeof(t); \
+    meta->type = D_##t; \
+    meta->size = sizeof(t);     \
+    meta->len = 1;\
+    data; })
+
+#define ALLOCATE_ARRAY(context, t, length) ({ \
+    Meta* meta = (Meta*)context->heap;\
+    context->heap += sizeof(Meta);\
+    union Data* data = context->heap; \
+    context->heap += sizeof(t)*length; \
+    meta->type = D_##t; \
+    meta->size = sizeof(t)*length; \
+    meta->len = length; \
+    data;   })
+
+#define ALLOCATE_PTR_ARRAY(context, dseg, length) ({\
+    Meta* meta = (Meta*)context->heap;\
+    context->heap += sizeof(Meta);\
+    union Data* data = context->heap; \
+    context->heap += sizeof(dseg *)*length; \
+    meta->type = D_##dseg; \
+    meta->size = sizeof(dseg *)*length; \
+    meta->len = length; \
+    data; })
+
+#define ALLOCATE_DATA_GEAR(context, t) ({ \
+        union Data* data = ALLOCATE(context, t); \
+        Meta* meta = GET_META(data); \
+        meta->wait = createSynchronizedQueue(context); \
+        data; })
+
+#define ALLOC(context, t) (&ALLOCATE(context, t)->t)
+
+#define GET_META(dseg) ((Meta*)(((void*)dseg) - sizeof(Meta)))
+#define GET_TYPE(dseg) (GET_META(dseg)->type)
+#define GET_SIZE(dseg) (GET_META(dseg)->size)
+#define GET_LEN(dseg) (GET_META(dseg)->len)
+#define GET_WAIT_LIST(dseg) (GET_META(dseg)->wait)
+
+#define Gearef(context, t) (&(context)->data[D_##t]->t)
+
+// (SingleLinkedStack *)context->data[D_Stack]->Stack.stack->Stack.stack
+
+#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name)
+
+#include "c/enumCode.h"
+
+enum Relational {
+    EQ,
+    GT,
+    LT,
+};
+
+#include "c/enumData.h"
+
+struct Context {
+    enum Code next;
+    struct Worker* worker;
+    struct TaskManager* taskManager;
+    int codeNum;
+    __code (**code) (struct Context*);
+    union Data **data;
+    void* heapStart;
+    void* heap;
+    long heapLimit;
+    int dataNum;
+
+    // task parameter
+    int idgCount; //number of waiting dataGear
+    int idg;
+    int maxIdg;
+    int odg;
+    int maxOdg;
+    int gpu; // GPU task
+    struct Context* task;
+    struct Element* taskList;
+#ifdef USE_CUDAWorker
+    int num_exec;
+    CUmodule module;
+    CUfunction function;
+#endif
+    /* multi dimension parameter */
+    int iterate;
+    struct Iterator* iterator;
+    enum Code before;
+};
+
+typedef int Int;
+#ifndef USE_CUDAWorker
+typedef unsigned long long CUdeviceptr;
+#endif
+EOFEOF
+    print $out $str;
+}
+
+sub emit_data_gears {
+  my ($class, $out, $dgs) = @_;
+
+print $out "union Data {\n";
+print $out $dgs;
+print $out  <<'EOF';
+    struct Context Context;
+}; // union Data end       this is necessary for context generator
+typedef union Data Data;
+EOF
+}
+
+
+sub emit_last_header {
+  my($class, $out) = @_;
+  print $out <<'EOF';
+#include "c/typedefData.h"
+
+#include "c/extern.h"
+
+extern __code start_code(struct Context* context);
+extern __code exit_code(struct Context* context);
+extern __code meta(struct Context* context, enum Code next);
+//extern __code par_meta(struct Context* context, enum Code spawns, enum Code next);
+extern __code parGotoMeta(struct Context* context, enum Code next);
+extern void initContext(struct Context* context);
+
+#endif
+EOF
+}
+
+1;
diff -r a4cab67624f7 -r 9146d6017f18 src/lib/Gears/Util.pm
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lib/Gears/Util.pm	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,178 @@
+package Gears::Util;
+use strict;
+use warnings;
+use Carp qw/croak/;
+use File::Find;
+
+sub parse {
+  my ($class, $file_name) = @_;
+  my $ir = _parse_base($file_name);
+  return $ir;
+}
+
+sub parse_code_verbose {
+  my ($class, $file_name) = @_;
+  my $ir = _parse_base($file_name,1);
+  return $ir;
+}
+
+sub parse_interface {
+  my ($class, $file_name) = @_;
+  my $ir = _parse_base($file_name);
+  
+  unless ($ir->{name}) {
+    croak 'invalid struct name';
+  }
+  return $ir;
+}
+
+
+sub parse_impl {
+  my ($class, $file_name) = @_;
+  my $ir = _parse_base($file_name);
+
+  unless ($ir->{isa} && $ir->{name}) {
+    croak 'invalid struct name';
+  }
+  return $ir;
+}
+
+sub _parse_base {
+  my ($file,$code_verbose) = @_;
+  my $ir  = {};
+
+  Gears::Util->file_checking($file);
+  open my $fh, '<', $file;
+  my $line = <$fh>;
+
+  if ($line =~ /typedef struct (\w+)\s?<.*>([\s\w{]+)/) {
+    die "invalied struct name $1" unless $1;
+    $ir->{name} = $1;
+
+    if ($2 =~ m|\s*impl\s*([\w+]+)\s*{|) {
+      $ir->{isa} = $1;
+    }
+  }
+
+  while ($line = <$fh>) {
+    if ($line =~ m|\s*/\*|) {
+      while ( $line !~ m|\*/|) {
+        $line = <$fh>;
+        next;
+      }
+      next;
+    }
+    next if ($line =~ /^\s+$/);
+    next if ($line =~ m[^\s*//]);
+    next if ($line =~ m[^\}\s*$ir->{name};]);
+
+    if ($line =~ m|__code (\w+)\(([()\.\*\s\w,_]+)\)|) {
+      $line = "enum Code $1;\n";
+    }
+
+    push(@{$ir->{content}},$line);
+  }
+
+  return $ir;
+}
+
+sub parse_with_rewrite {
+  my ($class, $file)  = @_;
+  my $ir = _parse_base($file);
+
+  my @data_gears;
+  my @code_gears;
+  map { push (@data_gears, $_) unless ($_ =~ /enum Code/);} @{$ir->{content}};
+  map { push (@code_gears, $1) if ($_ =~ /enum Code (\w+);/);} @{$ir->{content}};
+
+  open my $fh , '<', $file;
+  my $i = 0;
+  while (($i < scalar @code_gears) && (my $line = <$fh>)) {
+      my $cg = $code_gears[$i];
+      if ($line =~ m|__code $cg\(([()\.\*\s\w,_]+)\)|) {
+        $code_gears[$i] = {
+          name => $cg,
+          args => $1, 
+        };
+        $i++;
+      }
+  }
+  $ir->{codes} = \@code_gears;
+  $ir->{data}  = \@data_gears;
+  return $ir;
+}
+
+sub file_checking {
+  my ($class, $file_name) = @_;
+  unless (-f $file_name) {
+    croak "invalid filepath :$file_name\n";
+  }
+}
+
+sub slup {
+  my ($class,$file) = @_;
+  open my $fh, '<', $file;
+  local $/;
+  my $f = <$fh>;
+  return $f;
+}
+
+sub find_header {
+  my $class = shift;
+  my $header_name = shift;
+
+  my $find_path = shift // ".";
+  my $header_file = '';
+
+  find(
+    {
+      wanted => sub {
+        if ($_ =~ /\/$header_name\.h/) {
+          $header_file = $_;
+        }
+      },
+      no_chdir => 1,
+    },
+    $find_path);
+  return $header_file;
+}
+
+sub find_headers_path {
+  my $class = shift;
+  my $find_path = shift // ".";
+
+  my @files;
+  find( { wanted => sub { push @files, $_ if /\.(?:h|dg)/ }, no_chdir => 1 }, $find_path);
+
+  return \@files;
+}
+
+sub h2context_str {
+  my ($class, $h2context) = @_;
+  my $context = '';
+  my $space = '    ';
+
+  $context =  "${space}struct $h2context->{name} {\n";
+  my $content_space;
+  if (exists $h2context->{content}){
+    my @chars = split //, $h2context->{content}->[0];
+    for my $w (@chars) {
+      last if ($w !~ /\s/);
+      $content_space .= $w;
+    }
+  }
+
+  unless (defined $content_space) {
+    $content_space = "";
+  }
+
+  for my $c (@{$h2context->{content}}) {
+    $c =~ s/$content_space//;
+    $context .= "${space}${space}$c";
+  }
+  $context .= "${space}} $h2context->{name};\n";
+  return $context;
+}
+
+
+1;
diff -r a4cab67624f7 -r 9146d6017f18 src/main.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/main.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,110 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include "../context.h"
+
+int cpu_num = 1;
+int length = 102400;
+int split = 8;
+int* array_ptr;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+void *start_taskManager(struct Context *context) {
+    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
+    return 0;
+}
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+    goto meta(context, C_code1);
+}
+
+__code initDataGears_stub(struct Context* context) {
+    struct TaskManager* taskManager =  Gearef(context, TaskManager);
+    taskManager->taskManager = 0;
+    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
+    goto initDataGears(context, loopCounter, taskManager);
+}
+
+__code code1(struct Time* time) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    printf("length/task:\t%d\n", length/split);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+    time->time = (union Data*)createTimeImpl(context);
+    time->next = C_createTask1;
+    goto meta(context, time->time->Time.start);
+}
+
+__code code2(struct Time* time, struct TaskManager* taskManager) {
+    time->next = C_code3;
+    taskManager->next = time->time->Time.end;
+    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
+}
+
+__code code3(struct LoopCounter* loopCounter) {
+    int i = loopCounter->i;
+
+    if (i < length) {
+        //printf("%d\n", array_ptr[i]);
+        if (array_ptr[i] == (i*2)) {
+            loopCounter->i++;
+            goto meta(context, C_code3);
+        } else
+            puts("wrong result");
+
+    }
+
+    goto meta(context, C_exit_code);
+}
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    Array* array = new Array();
+
+    par goto createArray(array, __exit);
+
+    par goto twice(array, iterate(split), __exit);
+    goto code2();
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-s") == 0)
+            split = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+
+int main(int argc, char** argv) {
+    init(argc, argv);
+
+    array_ptr = NEWN(length, int);
+
+    for(int i=0; i<length; i++)
+        array_ptr[i]=i;
+
+    struct Context* main_context = NEW(struct Context);
+    initContext(main_context);
+    main_context->next = C_initDataGears;
+
+    goto start_code(main_context);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Atomic.h
--- a/src/parallel_execution/Atomic.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-typedef struct Atomic<Impl>{
-    union Data* atomic;
-    union Data** ptr;
-    union Data* oldData;
-    union Data* newData;
-    __code checkAndSet(Impl* atomic, union Data** ptr, union Data* oldData, union Data* newData, __code next(...), __code fail(...));
-    __code next(...);
-    __code fail(...);
-} Atomic;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/AtomicReference.cbc
--- a/src/parallel_execution/AtomicReference.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-#include "../context.h"
-#interface "Atomic.h"
-#include <stdio.h>
-
-Atomic* createAtomicReference(struct Context* context) {
-    struct Atomic* atomic = new Atomic();
-    struct AtomicReference* atomicReference = new AtomicReference();
-    atomic->atomic = (union Data*)atomicReference;
-    atomic->checkAndSet = C_checkAndSetAtomicReference;
-    return atomic;
-}
-
-__code checkAndSetAtomicReference(struct AtomicReference* atomic, union Data** ptr, union Data* oldData, union Data* newData, __code next(...), __code fail(...)) {
-    if (__sync_bool_compare_and_swap(ptr, oldData, newData)) {
-        goto next(...);
-    }
-    goto fail(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Buffer.h
--- a/src/parallel_execution/Buffer.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-typedef struct Buffer<Impl>{
-        union Data* buffer;
-        union Data* data;
-        __code put(Impl* buffer, union Data* data, __code next(...));
-        __code take(Impl* buffer, __code next(union Data*, ...));
-        __code next(...);
-} Buffer;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/CMakeLists.txt
--- a/src/parallel_execution/CMakeLists.txt	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,142 +0,0 @@
-cmake_minimum_required(VERSION 3.8)
-
-set(USE_CUDA,0)
-# -DUSE_CUDA
-#  add_definitions("-Wall -g -O")
-
-set(CMAKE_C_COMPILER $ENV{CBC_COMPILER})
-add_definitions("-Wall -g")
-
-# -DCMAKE_BUILD_TYPE=Debug
-set(CMAKE_C_FLAGS_DEBUG "-O0")
-
-if (${USE_CUDA})
-    include_directories("/usr/local/cuda/include")
-    set(NVCCFLAG "-std=c++11" "-g" "-O0" )
-    if (UNIX AND NOT APPLE) # LINUX
-        set(CUDA_LINK_FLAGS "-L/usr/local/cuda/lib64 -lcuda -lcudart")
-    elseif (APPLE)
-        set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /usr/local/cuda/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib")
-    endif()
-    find_package(CUDA REQUIRED)
-    SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${CUDA_LINK_FLAGS}" )
-endif()
-
-macro( GearsCommand )
-    set( _OPTIONS_ARGS )
-    set( _ONE_VALUE_ARGS TARGET )
-    set( _MULTI_VALUE_ARGS SOURCES )
-    cmake_parse_arguments( _Gears "${_OPTIONS_ARGS}" "${_ONE_VALUE_ARGS}" "${_MULTI_VALUE_ARGS}" ${ARGN} )
-
-    set (_Gears_CSOURCES)
-    foreach(i ${_Gears_SOURCES})
-        if (${i} MATCHES "\\.cbc")
-            string(REGEX REPLACE "(.*).cbc" "c/\\1.c" j ${i})
-            add_custom_command (
-                OUTPUT    ${j}
-                DEPENDS   ${i}
-                COMMAND  "perl" "generate_stub.pl" "-o" ${j} ${i}
-            )
-        elseif (${i} MATCHES "\\.cu")
-            string(REGEX REPLACE "(.*).cu" "c/\\1.ptx" j ${i})
-            add_custom_command (
-                OUTPUT    ${j}
-                DEPENDS   ${i}
-                COMMAND  nvcc ${NVCCFLAG} -c -ptx -o ${j} ${i}
-            )
-        else()
-            set(j ${i})
-        endif()
-        list(APPEND _Gears_CSOURCES ${j})
-    endforeach(i)
-
-    add_custom_command (
-          OUTPUT    c/${_Gears_TARGET}-context.c
-          DEPENDS   ${_Gears_CSOURCES}
-          COMMAND  "perl" "generate_context.pl" "-o" ${_Gears_TARGET} "-w" ${_Gears_CSOURCES}
-    )
-    add_executable(${_Gears_TARGET} ${_Gears_CSOURCES} c/${_Gears_TARGET}-context.c)
-    target_link_libraries(${_Gears_TARGET} m pthread)
-endmacro()
-
-
-GearsCommand(
-  TARGET
-      twice
-  SOURCES
-      examples/twice/main.cbc examples/twice/createArray.cbc examples/twice/twice.cbc examples/twice/printArray.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc TimerImpl.cbc MultiDimIterator.cbc AtomicReference.cbc
-)
-
-GearsCommand(
-  TARGET
-      calc
-  SOURCES
-      examples/calc/calc.cbc examples/calc/add.cbc examples/calc/mult.cbc examples/calc/initIntegerDataGears.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc AtomicReference.cbc
-)
-
-GearsCommand(
-  TARGET
-      bitonicSort
-  SOURCES
-      examples/bitonicSort/bitonicSort.cbc examples/bitonicSort/bitonicSwap.cbc examples/bitonicSort/makeArray.cbc examples/bitonicSort/printArray.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc MultiDimIterator.cbc TimerImpl.cbc AtomicReference.cbc
-)
-
-if (${USE_CUDA})
-    GearsCommand(
-      TARGET
-          CUDAtwice
-      SOURCES 
-          examples/twice/main.cbc examples/twice/twice.cbc examples/twice/CUDAtwice.cu examples/twice/createArray.cbc examples/twice/printArray.cbc CPUWorker.cbc TimerImpl.cbc examples/twice/twice.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc CUDAWorker.cbc cuda.c MultiDimIterator.cbc CUDAExecutor.cbc AtomicReference.cbc 
-    )
-    set_target_properties(CUDAtwice PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1")
-
-    GearsCommand(
-      TARGET
-          CUDAbitonicSort
-      SOURCES 
-          examples/bitonicSort/bitonicSort.cbc examples/bitonicSort/bitonicSwap.cbc examples/bitonicSort/CUDAbitonicSwap.cu examples/bitonicSort/makeArray.cbc examples/bitonicSort/printArray.cbc CPUWorker.cbc CUDAWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc cuda.c MultiDimIterator.cbc TimerImpl.cbc CUDAExecutor.cbc AtomicReference.cbc
-    )
-    set_target_properties(CUDAbitonicSort PROPERTIES COMPILE_FLAGS "-Wall -g -DUSE_CUDAWorker=1")
-endif()
-
-GearsCommand(
-  TARGET
-      queue_test
-  SOURCES 
-      test/queue_test.cbc SingleLinkedQueue.cbc
-)
-
-GearsCommand(
-  TARGET
-      stack_test
-  SOURCES 
-      test/stack_test.cbc SingleLinkedStack.cbc SingleLinkedQueue.cbc
-)
-
-GearsCommand(
-  TARGET
-      multiDimIterator_test
-  SOURCES
-      test/multiDimIterator_test.cbc test/printIterator.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc MultiDimIterator.cbc AtomicReference.cbc
-)
-
-#GearsCommand(
-#  TARGET
-#      sort
-#  SOURCES
-#      examples/bitonicSort/sort.cbc
-#)
-
-GearsCommand(
-  TARGET
-      rbtree
-  SOURCES
-      SingleLinkedQueue.cbc test/rbTree_test.cbc RedBlackTree.cbc SingleLinkedStack.cbc compare.c
-)
-
-GearsCommand(
-  TARGET
-      boundedBuffer
-  SOURCES
-  examples/boundedBuffer/main.cbc examples/boundedBuffer/initBuffer.cbc examples/boundedBuffer/SemaphoreImpl.cbc examples/boundedBuffer/BoundedBuffer.cbc examples/boundedBuffer/consumer.cbc examples/boundedBuffer/producer.cbc SpinLock.cbc CPUWorker.cbc TaskManagerImpl.cbc SingleLinkedQueue.cbc SynchronizedQueue.cbc MultiDimIterator.cbc AtomicReference.cbc
-)
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/CPUWorker.cbc
--- a/src/parallel_execution/CPUWorker.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,124 +0,0 @@
-#include "../context.h"
-#interface "TaskManager.h"
-#interface "Worker.h"
-#interface "Iterator.h"
-#interface "Queue.h"
-
-static void startWorker(Worker* worker);
-
-Worker* createCPUWorker(struct Context* context, int id, Queue* queue) {
-    struct Worker* worker = new Worker();
-    struct CPUWorker* cpuWorker = new CPUWorker();
-    worker->worker = (union Data*)cpuWorker;
-    worker->tasks = queue;
-    cpuWorker->id = id;
-    cpuWorker->loopCounter = 0;
-    worker->taskReceive = C_taskReceiveCPUWorker;
-    worker->shutdown = C_shutdownCPUWorker;
-    pthread_create(&worker->thread, NULL, (void*)&startWorker, worker);
-    return worker;
-}
-
-static void startWorker(struct Worker* worker) {
-    struct CPUWorker* cpuWorker = &worker->worker->CPUWorker;
-    cpuWorker->context = NEW(struct Context);
-    initContext(cpuWorker->context);
-    Gearef(cpuWorker->context, Worker)->worker = (union Data*)worker;
-    Gearef(cpuWorker->context, Worker)->tasks = worker->tasks;
-    goto meta(cpuWorker->context, worker->taskReceive);
-}
-
-__code taskReceiveCPUWorker(struct CPUWorker* worker, struct Queue* tasks) {
-    goto tasks->take(getTaskCPUWorker);
-}
-
-__code getTaskCPUWorker(struct CPUWorker* cpuWorker, struct Context* task, struct Worker* worker) {
-    if (!task) {
-        goto worker->shutdown(); // end thread
-    }
-    task->worker = worker;
-    enum Code taskCg = task->next;
-    task->next = C_odgCommitCPUWorker; // commit outputDG after task exec
-    goto meta(task, taskCg); // switch task context
-}
-
-__code getTaskCPUWorker_stub(struct Context* context) {
-    CPUWorker* cpuWorker = (CPUWorker*)GearImpl(context, Worker, worker);
-    Worker* worker = &Gearef(context,Worker)->worker->Worker;
-    struct Context* task = &Gearef(context, Queue)->data->Context;
-    goto getTaskCPUWorker(context, cpuWorker, task, worker);
-}
-
-__code odgCommitCPUWorker(struct CPUWorker* worker, struct Context* task) {
-    if (task->iterate) {
-        struct Iterator* iterator = task->iterator;
-        goto iterator->barrier(task, odgCommitCPUWorker1, odgCommitCPUWorker6);
-    } else {
-        goto odgCommitCPUWorker1();
-    }
-}
-
-__code odgCommitCPUWorker_stub(struct Context* context) {
-    // switch worker context
-    struct Context* workerContext = context->worker->worker->CPUWorker.context;
-    Gearef(workerContext, Worker)->worker = (union Data*)context->worker;
-    Gearef(workerContext, Worker)->task = context;
-    CPUWorker* cpuWorker = (CPUWorker*)GearImpl(workerContext, Worker, worker);
-    goto odgCommitCPUWorker(workerContext,
-                            cpuWorker,
-                            context);
-}
-
-__code odgCommitCPUWorker1(struct CPUWorker* worker, struct Context* task) {
-    int i = worker->loopCounter;
-    if (task->odg+i < task->maxOdg) {
-        goto odgCommitCPUWorker2();
-    }
-    worker->loopCounter = 0;
-    struct TaskManager* taskManager = task->taskManager;
-    goto taskManager->decrementTaskCount(odgCommitCPUWorker6);
-}
-
-__code odgCommitCPUWorker2(struct CPUWorker* worker, struct Context* task) {
-    int i = worker->loopCounter;
-    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
-    goto queue->isEmpty(odgCommitCPUWorker3, odgCommitCPUWorker5);
-}
-
-__code odgCommitCPUWorker3(struct CPUWorker* worker, struct Context* task) {
-    int i = worker->loopCounter;
-    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
-    goto queue->take(odgCommitCPUWorker4);
-}
-
-__code odgCommitCPUWorker4(struct CPUWorker* worker, struct Context* task, struct Context* waitTask) {
-    if (__sync_fetch_and_sub(&waitTask->idgCount, 1) == 1) { // atomic decrement idg counter(__sync_fetch_and_sub function return initial value of waitTask->idgCount point)
-        struct TaskManager* taskManager = waitTask->taskManager;
-        goto taskManager->spawn(waitTask, odgCommitCPUWorker2);
-    }
-    goto odgCommitCPUWorker2();
-}
-
-__code odgCommitCPUWorker4_stub(struct Context* context) {
-    CPUWorker* cpuWorker     = (CPUWorker*)GearImpl(context, Worker, worker);
-    struct Context* task     = Gearef(context, Worker)->task;
-    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
-    goto odgCommitCPUWorker4(context,
-                             cpuWorker,
-                             task,
-                             waitTask);
-}
-
-__code odgCommitCPUWorker5(struct CPUWorker* worker, struct Context* task) {
-    worker->loopCounter++;
-    goto odgCommitCPUWorker1();
-}
-
-__code odgCommitCPUWorker6(struct CPUWorker* worker, struct Context* task) {
-    struct Worker* taskWorker = task->worker;
-    goto taskWorker->taskReceive(taskWorker->tasks);
-}
-
-__code shutdownCPUWorker(struct CPUWorker* worker) {
-    goto exit_code();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/CUDAExecutor.cbc
--- a/src/parallel_execution/CUDAExecutor.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-#include "../context.h"
-#interface "Executor.h"
-#interface "Timer.h"
-#include <stdio.h>
-#include <math.h>
-
-Executor* createCUDAExecutor(struct Context* context, CUdevice device) {
-    struct Executor* executor = new Executor();
-    struct CUDAExecutor* cudaExecutor = new CUDAExecutor();
-    cudaExecutor->timer = createTimerImpl(context);
-    checkCudaErrors(cuDeviceGetAttribute(&cudaExecutor->maxThreadPerBlock, CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, device));
-    executor->executor = (union Data*)cudaExecutor;
-    executor->read  = C_readCUDAExecutor;
-    executor->exec  = C_execCUDAExecutor;
-    executor->write = C_writeCUDAExecutor;
-    return executor;
-}
-
-__code readCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
-    struct CUDABuffer* buffer = executor->buffer;
-    int paramLen = buffer->inputLen + buffer->outputLen;
-    executor->kernelParams = (CUdeviceptr**)ALLOCATE_PTR_ARRAY(context, CUdeviceptr, paramLen);
-    for (int i = 0; i < paramLen; i++) {
-        CUdeviceptr* deviceptr = new CUdeviceptr();
-        // memory allocate
-        union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
-        checkCudaErrors(cuMemAlloc(deviceptr, GET_SIZE(data)));
-        checkCudaErrors(cuMemcpyHtoD(*deviceptr, data, GET_SIZE(data)));
-        // Synchronous data transfer(host to device)
-        executor->kernelParams[i] = deviceptr;
-    }
-    // TODO: Implements pipeline
-    // goto next(...);
-    struct Timer* timer = executor->timer;
-    goto timer->start(execCUDAExecutor);
-}
-
-int computeblockDim(int count, int maxThreadPerBlock) {
-    return count < maxThreadPerBlock ? count : maxThreadPerBlock;
-}
-
-void calcBlockMaxThread(struct MultiDimIterator* iterator, struct CUDAExecutor* executor) {
-    executor->maxThreadPerBlockX = 1;
-    executor->maxThreadPerBlockY = 1;
-    executor->maxThreadPerBlockZ = 1;
-    // maxThreadPerBlockX * maxThreadPerBlockY * maxThreadPerBlockZ <= maxThreadPerBlock
-    if (iterator->x > 1 && iterator->y == 1 && iterator->z == 1) {
-        executor->maxThreadPerBlockX = executor->maxThreadPerBlock;
-        executor->maxThreadPerBlockY = 1;
-        executor->maxThreadPerBlockZ = 1;
-    } else if (iterator->x > 1 && iterator->y > 1 && iterator->z == 1) {
-        int ln_2 = log2(executor->maxThreadPerBlock);
-        int maxThread = 1 << (ln_2/2);
-        executor->maxThreadPerBlockX = maxThread;
-        executor->maxThreadPerBlockY = maxThread;
-        executor->maxThreadPerBlockZ = 1;
-    } else {
-        int ln_2 = log2(executor->maxThreadPerBlock);
-        int maxThread = 1 << (ln_2/3);
-        executor->maxThreadPerBlockX = maxThread * (1 << (ln_2%3));
-        executor->maxThreadPerBlockY = maxThread;
-        executor->maxThreadPerBlockZ = maxThread;
-    }
-}
-
-__code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
-    task->num_exec = 1;
-    if (task->iterate) {
-        struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator;
-        calcBlockMaxThread(iterator, executor);
-        int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlockX);
-        int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlockY);
-        int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlockZ);
-        // launch kernel
-        checkCudaErrors(cuLaunchKernel(task->function,
-                    iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ,
-                    blockDimX, blockDimY, blockDimZ,
-                    0, NULL, (void**)executor->kernelParams, NULL));
-    } else {
-        checkCudaErrors(cuLaunchKernel(task->function,
-                    1, 1, 1,
-                    1, 1, 1,
-                    0, NULL, (void**)executor->kernelParams, NULL));
-    }
-    // TODO: Implements pipeline
-    // goto next(...);
-    goto writeCUDAExecutor();
-}
-
-__code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
-    // Asynchronous launch kernel
-    checkCudaErrors(cuCtxSynchronize());
-    struct Timer* timer = executor->timer;
-    goto timer->end(writeCUDAExecutor1);
-}
-
-__code writeCUDAExecutor1(struct CUDAExecutor* executor, struct Context* task, __code next(...)) {
-    //結果を取ってくるコマンドを入力する
-    struct CUDABuffer* buffer = executor->buffer;
-    int paramLen = buffer->inputLen + buffer->outputLen;
-    for (int i = 0; i < paramLen; i++) {
-        CUdeviceptr deviceptr =  *(executor->kernelParams[i]);
-        union Data* data = i < buffer->inputLen? buffer->inputData[i] : buffer->outputData[i-buffer->inputLen];
-        checkCudaErrors(cuMemcpyDtoH(data, deviceptr, GET_SIZE(data)));
-        cuMemFree(deviceptr);
-    }
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/CUDAWorker.cbc
--- a/src/parallel_execution/CUDAWorker.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,131 +0,0 @@
-#include "../context.h"
-#interface "TaskManager.h"
-#interface "Worker.h"
-#interface "Iterator.h"
-#interface "Queue.h"
-
-extern void cudaInit(struct CUDAWorker *cudaWorker,int phase, int deviceNum);
-extern void cudaShutdown(CUDAWorker *cudaWorker);
-
-static void startCUDAWorker(Worker* worker);
-
-Worker* createCUDAWorker(struct Context* context, int id, Queue* queue, int deviceNum) {
-    struct Worker* worker = new Worker();
-    struct CUDAWorker* cudaWorker = new CUDAWorker();
-    worker->worker = (union Data*)cudaWorker;
-    worker->tasks = queue;
-    cudaWorker->id = id;
-    cudaWorker->loopCounter = 0;
-    cudaWorker->deviceNum = deviceNum;
-    worker->taskReceive = C_taskReceiveCUDAWorker;
-    worker->shutdown = C_shutdownCUDAWorker;
-    pthread_create(&worker->thread, NULL, (void*)&startCUDAWorker, worker);
-    return worker;
-}
-
-static void startCUDAWorker(Worker* worker) {
-    struct CUDAWorker* cudaWorker = &worker->worker->CUDAWorker;
-    cudaInit(cudaWorker, 0, cudaWorker->deviceNum);
-    cudaWorker->context  = NEW(struct Context);
-    initContext(cudaWorker->context);
-    cudaWorker->executor = createCUDAExecutor(cudaWorker->context, cudaWorker->device);
-    Gearef(cudaWorker->context, Worker)->worker = (union Data*)worker;
-    Gearef(cudaWorker->context, Worker)->tasks = worker->tasks;
-    goto meta(cudaWorker->context, worker->taskReceive);
-}
-
-__code taskReceiveCUDAWorker(struct Worker* worker, struct Queue* tasks) {
-    goto tasks->take(getTaskCUDAWorker);
-}
-
-__code getTaskCUDAWorker(struct CUDAWorker* cudaWorker, struct Context* task, struct Worker* worker) {
-    if (!task) {
-        goto worker->shutdown(); // end thread
-    }
-    task->worker = worker;
-    enum Code taskCg = task->next;
-    task->next = C_odgCommitCUDAWorker; // commit outputDG after task exec
-    goto meta(task, taskCg); // switch task context
-}
-
-__code getTaskCUDAWorker_stub(struct Context* context) {
-    CUDAWorker* cudaWorker = (CUDAWorker*)GearImpl(context, Worker, worker);
-    Worker* worker = &Gearef(context,Worker)->worker->Worker;
-    struct Context* task = &Gearef(context, Queue)->data->Context;
-    goto getTaskCUDAWorker(context, cudaWorker, task, worker);
-}
-
-__code odgCommitCUDAWorker(struct CUDAWorker* worker, struct Context* task) {
-    if (task->iterate) {
-        struct Iterator* iterator = task->iterator;
-        goto iterator->barrier(task, odgCommitCUDAWorker1, odgCommitCUDAWorker6);
-    } else {
-        goto odgCommitCUDAWorker1();
-    }
-}
-
-__code odgCommitCUDAWorker_stub(struct Context* context) {
-    // switch worker context
-    struct Context* workerContext = context->worker->worker->CUDAWorker.context;
-    Gearef(workerContext, Worker)->worker = (union Data*)context->worker;
-    Gearef(workerContext, Worker)->task = context;
-    CUDAWorker* cudaWorker = (CUDAWorker*)GearImpl(workerContext, Worker, worker);
-    goto odgCommitCUDAWorker(workerContext,
-                            cudaWorker,
-                            context);
-}
-
-__code odgCommitCUDAWorker1(struct CUDAWorker* worker, struct Context* task) {
-    int i = worker->loopCounter;
-    if (task->odg+i < task->maxOdg) {
-        goto odgCommitCUDAWorker2();
-    }
-    worker->loopCounter = 0;
-    struct TaskManager* taskManager = task->taskManager;
-    goto taskManager->decrementTaskCount(odgCommitCUDAWorker6);
-}
-
-__code odgCommitCUDAWorker2(struct CUDAWorker* worker, struct Context* task) {
-    int i = worker->loopCounter;
-    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
-    goto queue->isEmpty(odgCommitCUDAWorker3, odgCommitCUDAWorker5);
-}
-
-__code odgCommitCUDAWorker3(struct CUDAWorker* worker, struct Context* task) {
-    int i = worker->loopCounter;
-    struct Queue* queue = GET_WAIT_LIST(task->data[task->odg+i]);
-    goto queue->take(odgCommitCUDAWorker4);
-}
-
-__code odgCommitCUDAWorker4(struct CUDAWorker* worker, struct Context* task, struct Context* waitTask) {
-    if (__sync_fetch_and_sub(&waitTask->idgCount, 1) == 1) { // atomic decrement idg counter(__sync_fetch_and_sub function return initial value of waitTask->idgCount point)
-        struct TaskManager* taskManager = waitTask->taskManager;
-        goto taskManager->spawn(waitTask, odgCommitCUDAWorker2);
-    }
-    goto odgCommitCUDAWorker2();
-}
-
-__code odgCommitCUDAWorker4_stub(struct Context* context) {
-    CUDAWorker* cudaWorker     = (CUDAWorker*)GearImpl(context, Worker, worker);
-    struct Context* task     = Gearef(context, Worker)->task;
-    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
-    goto odgCommitCUDAWorker4(context,
-                             cudaWorker,
-                             task,
-                             waitTask);
-}
-
-__code odgCommitCUDAWorker5(struct CUDAWorker* worker, struct Context* task) {
-    worker->loopCounter++;
-    goto odgCommitCUDAWorker1();
-}
-
-__code odgCommitCUDAWorker6(struct CUDAWorker* worker, struct Context* task) {
-    struct Worker* taskWorker = task->worker;
-    goto taskWorker->taskReceive(taskWorker->tasks);
-}
-
-__code shutdownCUDAWorker(struct CUDAWorker* worker) {
-    cudaShutdown(worker);
-    goto meta(context, C_exit_code);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/CodeGear.h
--- a/src/parallel_execution/CodeGear.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-typedef struct CodeGear<Impl>{
-        union Data* codeGear;
-        enum Code code;
-        __code code(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...));
-        __code setInfo(struct Context* codeGear, union Data** dataGears, __code next(...));
-        union Data* dataGears[10];
-        __code next(...);
-} CodeGear;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Executor.h
--- a/src/parallel_execution/Executor.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-typedef struct Executor<Impl>{
-    union Data* Executor;
-    struct Context* task;
-    __code next(...);
-    __code read(Impl* executor, struct Context* task, __code next(...));
-    __code exec(Impl* executor, struct Context* task, __code next(...));
-    __code write(Impl* executor, struct Context* task, __code next(...));
-} Executor;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Iterator.h
--- a/src/parallel_execution/Iterator.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-typedef struct Iterator<Impl>{
-        union Data* iterator;
-        struct Context* task;
-        int numGPU;
-        __code exec(Impl* iterator, struct Context* task, int numGPU, __code next(...));
-        __code barrier(Impl* iterator, struct Context* task, __code next(...), __code whenWait(...));
-        __code whenWait(...);
-        __code next(...);
-} Iterator;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Lock.h
--- a/src/parallel_execution/Lock.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-typedef struct Lock<Impl>{
-        union Data* lock;
-        __code doLock(Impl* lock, __code next(...)); 
-        __code doUnlock(Impl* lock, __code next(...)); 
-        __code next(...);
-} Lock;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/LockImpl.cbc
--- a/src/parallel_execution/LockImpl.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,86 +0,0 @@
-#include "../context.h"
-#interface "Queue.h"
-#interface "Atomic.h"
-#interface "Lock.h"
-#interface "Worker.h"
-#interface "TaskManager.h"
-
-Lock* createLockImpl(struct Context* context) {
-    struct Lock* lock = new Lock();
-    struct LockImpl* lockImpl = new LockImpl();
-    lockImpl->lock = NULL;
-    lockImpl->waitThreadQueue = createSynchronizedQueue(context);
-    lockImpl->atomic = createAtomicReference(context);
-    lock->lock = (union Data*)lockImpl;
-    lock->doLock = C_doLockLockImpl;
-    lock->doUnlock = C_doUnlockLockImpl;
-    return lock;
-}
-
-__code doLockLockImpl(struct LockImpl* lock, __code next(...)) {
-    struct Atomic* atomic = lock->atomic;
-    goto atomic->checkAndSet(&lock->lock, NULL, 1, doLockLockImpl1, doLockLockImpl2);
-}
-
-__code doLockLockImpl1(struct LockImpl* lock, __code next(...)) {
-    lock->lockContext = context;
-    goto next(...);
-}
-
-__code doLockLockImpl2(struct LockImpl* lock, __code next(...)) {
-    struct Queue* queue = lock->waitThreadQueue;
-    context->next= C_doLockLockImpl;
-    printf("Put task\n");
-    goto queue->put(context, doLockLockImpl3);
-}
-
-__code doLockLockImpl3(struct LockImpl* lock, struct Worker* worker, __code next(...)) {
-    goto worker->taskReceive(); // goto shceduler
-}
-
-__code doLockLockImpl3_stub(struct Context* context) {
-    // switch worker context
-    struct Context* workerContext = context->worker->worker->CPUWorker.context;
-    LockImpl* lockImpl = (LockImpl*)GearImpl(context, Lock, lock);
-    goto doLockLockImpl3(workerContext,
-            lockImpl,
-            context->worker,
-            Gearef(context, Lock)->next);
-}
-
-__code doUnlockLockImpl(struct LockImpl* lock, __code next(...)) {
-    if (lock->lockContext == context) {
-        struct Atomic* atomic = lock->atomic;
-        goto atomic->checkAndSet(&lock->lock, 1, NULL, doUnlockLockImpl1, doUnlockLockImpl);
-    }
-    goto next(...);
-}
-
-__code doUnlockLockImpl1(struct LockImpl* lock, __code next(...)) {
-    struct Queue* queue = lock->waitThreadQueue;
-    goto queue->isEmpty(doUnlockLockImpl2, doUnlockLockImpl4);
-}
-
-__code doUnlockLockImpl2(struct LockImpl* lock, __code next(...)) {
-    struct Queue* queue = lock->waitThreadQueue;
-    printf("%p: Take task\n", lock);
-    goto queue->take(doUnlockLockImpl3);
-}
-
-__code doUnlockLockImpl3(struct LockImpl* lock, struct Context* waitTask, __code next(...)) {
-    struct TaskManager* taskManager = waitTask->taskManager;
-    goto taskManager->spawn(waitTask, next(...)); //notify
-}
-
-__code doUnlockLockImpl3_stub(struct Context* context) {
-    LockImpl* lockImpl = (LockImpl*)GearImpl(context, Lock, lock);
-    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
-    goto doUnlockLockImpl3(context,
-            lockImpl,
-            waitTask,
-            Gearef(context, Lock)->next);
-}
-
-__code doUnlockLockImpl4(struct LockImpl* lock, __code next(...)) {
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/MultiDimIterator.cbc
--- a/src/parallel_execution/MultiDimIterator.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-#include "../context.h"
-#interface "Iterator.h"
-#interface "TaskManager.h"
-#include <stdio.h>
-
-Iterator* createMultiDimIterator(struct Context* context, int x, int y, int z) {
-    struct Iterator* iterator = new Iterator();
-    struct MultiDimIterator* multiDimIterator = new MultiDimIterator();
-    iterator->iterator = (union Data*)multiDimIterator;
-    iterator->exec = C_execMultiDimIterator;
-    iterator->barrier = C_barrierMultiDimIterator;
-    multiDimIterator->x = x;
-    multiDimIterator->y = y;
-    multiDimIterator->z = z;
-    multiDimIterator->count = x * y * z;
-    multiDimIterator->counterX = 0;
-    multiDimIterator->counterY = 0;
-    multiDimIterator->counterZ = 0;
-    return iterator;
-}
-
-/**
- * create iterateTask with index, that copy from task argument
- * @return created iterateTask
- * @param task      task of the copy source
- * @x     index
- */
-struct Context* createMultiDimIterateTask(struct Context* task, int x, int y, int z) {
-    struct Context* task1 = NEW(struct Context);
-    initContext(task1);
-    task1->taskManager = task->taskManager;
-    task1->next     = task->next;
-    task1->iterate  = 1;
-    task1->iterator = task->iterator;
-    task1->idgCount = task->idgCount;
-    task1->idg      = task->idg;
-    task1->maxIdg   = task->maxIdg;
-    for(int i = task1->idg; i < task1->maxIdg; i++) {
-        task1->data[i] = task->data[i];
-    }
-
-    // create index data gear and register input data to iterate task
-    struct MultiDim* multiDim = &ALLOCATE_DATA_GEAR(task1, MultiDim)->MultiDim;
-    multiDim->x = x;
-    multiDim->y = y;
-    multiDim->z = z;
-    task1->data[task1->maxIdg++] = (union Data*)multiDim;
-    task1->odg      = task->odg + 1;
-    task1->maxOdg   = task->maxOdg + 1;
-    for (int i = task1->odg; i < task1->maxOdg; i++) {
-        task1->data[i] = task->data[i-1];
-    }
-
-    return task1;
-}
-
-__code execMultiDimIterator(struct MultiDimIterator* iterator, struct Context* task, int numGPU, __code next(...)) {
-    // No GPU device
-    if (numGPU == 0) {
-        goto execMultiDimIterator1();
-    }
-    task->iterate = 1;
-    task->gpu = 1;
-    struct TaskManager* taskManager = task->taskManager;
-    goto taskManager->spawn(task, next(...));
-}
-
-__code execMultiDimIterator1(struct MultiDimIterator* iterator, struct Context* task, __code next(...)) {
-    int x = iterator->counterX;
-    int y = iterator->counterY;
-    int z = iterator->counterZ;
-    struct Context* iterateTask = createMultiDimIterateTask(task, x, y, z);
-    struct TaskManager* taskManager = task->taskManager;
-    goto taskManager->spawn(iterateTask, execMultiDimIterator2);
-}
-
-__code execMultiDimIterator2(struct MultiDimIterator* iterator, struct Context* task, __code next(...)) {
-    if (++iterator->counterX >= iterator->x) {
-        iterator->counterX = 0;
-        if (++iterator->counterY >= iterator->y) {
-            iterator->counterY = 0;
-            if (++iterator->counterZ >= iterator->z) {
-                iterator->counterZ = 0;
-                goto next(...);
-            }
-        }
-    }
-    goto execMultiDimIterator1();
-}
-
-__code barrierMultiDimIterator(struct MultiDimIterator* iterator, struct Context* task, __code next(...), __code whenWait(...)) {
-    if (task->gpu || __sync_fetch_and_sub(&iterator->count, 1) == 1) {
-        goto next(...);
-    }
-    goto whenWait(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Queue.h
--- a/src/parallel_execution/Queue.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-typedef struct Queue<Impl>{
-        union Data* queue;
-        union Data* data;
-        __code whenEmpty(...);
-        __code clear(Impl* queue, __code next(...));
-        __code put(Impl* queue, union Data* data, __code next(...));
-        __code take(Impl* queue, __code next(union Data*, ...));
-        __code isEmpty(Impl* queue, __code next(...), __code whenEmpty(...));
-        __code next(...);
-} Queue;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/RedBlackTree.agda
--- a/src/parallel_execution/RedBlackTree.agda	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,145 +0,0 @@
-module RedBlackTree where
-
-open import stack
-open import Level
-
-record TreeMethods {n m : Level } {a : Set n } {t : Set m } (treeImpl : Set n ) : Set (m Level.⊔ n) where
-  field
-    putImpl : treeImpl -> a -> (treeImpl -> t) -> t
-    getImpl  : treeImpl -> (treeImpl -> Maybe a -> t) -> t
-open TreeMethods
-
-record Tree  {n m : Level } {a : Set n } {t : Set m } (treeImpl : Set n ) : Set (m Level.⊔ n) where
-  field
-    tree : treeImpl
-    treeMethods : TreeMethods {n} {m} {a} {t} treeImpl
-  putTree : a -> (Tree treeImpl -> t) -> t
-  putTree d next = putImpl (treeMethods ) tree d (\t1 -> next (record {tree = t1 ; treeMethods = treeMethods} ))
-  getTree : (Tree treeImpl -> Maybe a -> t) -> t
-  getTree next = getImpl (treeMethods ) tree (\t1 d -> next (record {tree = t1 ; treeMethods = treeMethods} ) d )
-
-open Tree
-
-data Color {n : Level } : Set n where
-  Red   : Color
-  Black : Color
-
-data CompareResult {n : Level } : Set n where
-  LT : CompareResult
-  GT : CompareResult
-  EQ : CompareResult
-
-record Node {n : Level } (a k : Set n) : Set n where
-  inductive
-  field
-    key   : k
-    value : a
-    right : Maybe (Node a k)
-    left  : Maybe (Node a k)
-    color : Color {n}
-open Node
-
-record RedBlackTree {n m : Level } {t : Set m} (a k si : Set n) : Set (m Level.⊔ n) where
-  field
-    root : Maybe (Node a k)
-    nodeStack : Stack {n} {m} (Node a k) {t} si
-    compare : k -> k -> CompareResult {n}
-
-open RedBlackTree
-
-open Stack
-
---
--- put new node at parent node, and rebuild tree to the top
---
-{-# TERMINATING #-}   -- https://agda.readthedocs.io/en/v2.5.3/language/termination-checking.html
-replaceNode : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) si -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
-replaceNode {n} {m} {t} {a} {k} {si} tree s parent n0 next = popStack s (
-      \s grandParent -> replaceNode1 s grandParent ( compare tree (key parent) (key n0) ) )
-  where
-        replaceNode1 : Stack (Node a k) si -> Maybe ( Node a k ) -> CompareResult -> t
-        replaceNode1 s Nothing LT = next ( record tree { root = Just ( record parent { left = Just n0 ; color = Black } ) } )   
-        replaceNode1 s Nothing GT = next ( record tree { root = Just ( record parent { right = Just n0 ; color = Black } ) } )   
-        replaceNode1 s Nothing EQ = next ( record tree { root = Just ( record parent { right = Just n0 ; color = Black } ) } )   
-        replaceNode1 s (Just grandParent) result with result
-        ... | LT =  replaceNode tree s grandParent ( record parent { left = Just n0 } ) next
-        ... | GT =  replaceNode tree s grandParent ( record parent { right = Just n0 } ) next
-        ... | EQ =  next tree 
-
-rotateRight : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node  a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
-rotateRight {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
-
-rotateLeft : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
-rotateLeft {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
-
-insertCase5 : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
-insertCase5 {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
-
-insertCase4 : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> Node a k -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
-insertCase4 {n} {m} {t} {a} {k} {si} tree s n0 parent grandParent next = {!!}
-
-{-# TERMINATING #-}
-insertNode : {n m : Level } {t : Set m } {a k si : Set n} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) {t} si -> Node a k -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
-insertNode {n} {m} {t} {a} {k} {si} tree s n0 next = get2Stack s (\ s d1 d2 -> insertCase1 s n0 d1 d2 )
-   where
-    insertCase1 : Stack (Node a k) si -> Node a k -> Maybe (Node a k) -> Maybe (Node a k) -> t    -- placed here to allow mutual recursion
-          -- http://agda.readthedocs.io/en/v2.5.2/language/mutual-recursion.html
-    insertCase3 : Stack (Node a k) si -> Node a k -> Node a k -> Node a k -> t
-    insertCase3 s n0 parent grandParent with left grandParent | right grandParent
-    ... | Nothing | Nothing = insertCase4 tree s n0 parent grandParent next
-    ... | Nothing | Just uncle  = insertCase4 tree s n0 parent grandParent next
-    ... | Just uncle | _  with compare tree ( key uncle ) ( key parent )
-    ...                   | EQ =  insertCase4 tree s n0 parent grandParent next
-    ...                   | _ with color uncle
-    ...                           | Red = pop2Stack s ( \s p0 p1 -> insertCase1 s ( 
-           record grandParent { color = Red ; left = Just ( record parent { color = Black ; left = Just n0 } )  ; right = Just ( record uncle { color = Black } ) }) p0 p1 )
-    ...                           | Black = insertCase4 tree s n0 parent grandParent next
-    insertCase2 : Stack (Node a k) si -> Node a k -> Node a k -> Node a k -> t
-    insertCase2 s n0 parent grandParent with color parent
-    ... | Black = replaceNode tree s grandParent n0 next
-    ... | Red = insertCase3 s n0 parent grandParent
-    insertCase1 s n0 Nothing Nothing = next tree
-    insertCase1 s n0 Nothing (Just grandParent) = replaceNode tree s grandParent n0 next
-    insertCase1 s n0 (Just grandParent) Nothing = replaceNode tree s grandParent n0 next
-    insertCase1 s n0 (Just parent) (Just grandParent) = insertCase2 s n0 parent grandParent
-      where
-
-findNode : {n m : Level } {a k si : Set n} {t : Set m} -> RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) si -> (Node a k) -> (Node a k) -> (RedBlackTree {n} {m} {t} a k si -> Stack (Node a k) si -> Node a k -> t) -> t
-findNode {n} {m} {a} {k} {si} {t} tree s n0 n1 next = pushStack s n1 (\ s -> findNode1 s n1)
-  where
-    findNode2 : Stack (Node a k) si -> (Maybe (Node a k)) -> t
-    findNode2 s Nothing = next tree s n0
-    findNode2 s (Just n) = findNode tree s n0 n next
-    findNode1 : Stack (Node a k) si -> (Node a k)  -> t
-    findNode1 s n1 with (compare tree (key n0) (key n1))
-    ...                                | EQ = next tree s n0 
-    ...                                | GT = findNode2 s (right n1)
-    ...                                | LT = findNode2 s (left n1)
-
-
-leafNode : {n : Level } {a k : Set n}  -> k -> a -> Node a k
-leafNode k1 value = record {
-    key   = k1 ;
-    value = value ;
-    right = Nothing ;
-    left  = Nothing ;
-    color = Black 
-  }
-
-putRedBlackTree : {n m : Level } {a k si : Set n} {t : Set m} -> RedBlackTree {n} {m} {t} a k si -> k -> a -> (RedBlackTree {n} {m} {t} a k si -> t) -> t
-putRedBlackTree {n} {m} {a} {k} {si} {t} tree k1 value next with (root tree)
-...                                | Nothing = next (record tree {root = Just (leafNode k1 value) })
-...                                | Just n2  = findNode tree (nodeStack tree) (leafNode k1 value) n2 (\ tree1 s n1 -> insertNode tree1 s n1 next)
-
-getRedBlackTree : {n m : Level } {a k si : Set n} {t : Set m} -> RedBlackTree {n} {m} {t} a k si -> k -> (RedBlackTree {n} {m} {t} a k si -> (Maybe (Node a k)) -> t) -> t
-getRedBlackTree {_} {_} {a} {k} {_} {t} tree k1 cs = checkNode (root tree)
-  where
-    checkNode : Maybe (Node a k) -> t
-    checkNode Nothing = cs tree Nothing
-    checkNode (Just n) = search n
-      where
-        search : Node a k -> t
-        search n with compare tree k1 (key n)
-        search n | LT = checkNode (left n)
-        search n | GT = checkNode (right n)
-        search n | EQ = cs tree (Just n)
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/RedBlackTree.cbc
--- a/src/parallel_execution/RedBlackTree.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,602 +0,0 @@
-#include <stdio.h>
-
-#include "../context.h"
-#interface "Tree.h"
-#interface "Stack.h"
-
-extern enum Relational compare(struct Node* node1, struct Node* node2);
-
-Tree* createRedBlackTree(struct Context* context) {
-    struct Tree* tree = new Tree();
-    struct RedBlackTree* redBlackTree = new RedBlackTree();
-    tree->tree = (union Data*)redBlackTree;
-    redBlackTree->root = NULL;
-    redBlackTree->nodeStack = createSingleLinkedStack(context);
-    tree->put = C_putRedBlackTree;
-    tree->get = C_getRedBlackTree;
-    tree->remove = C_removeRedBlackTree;
-    // tree->clear = C_clearRedBlackTree;
-    return tree;
-}
-
-void printTree1(union Data* data) {
-    struct Node* node = &data->Node;
-    if (node == NULL) {
-        printf("NULL");
-    } else {
-        printf("key = %d (", node->key);
-        printTree1((union Data*)(node->right));
-        printf("), (");
-        printTree1((union Data*)(node->left));
-        printf(")");
-    }
-}
-
-void printTree(union Data* data) {
-    printTree1(data);
-    printf("\n");
-}
-
-__code putRedBlackTree(struct RedBlackTree* tree, struct Node* node) {
-    struct Node* newNode = &ALLOCATE(context, Node)->Node;
-    struct Node* root = tree->root;
-    printTree((union Data*)(tree->root));
-    tree->newNode = newNode;
-    tree->root = newNode; // this should done at stackClear
-    tree->parent = NULL;
-    if (root) {
-        tree->current = root;
-        tree->result = compare(tree->current, node);
-        tree->findNodeNext = C_insertNode;
-        goto findNode(tree);
-    }
-    goto insertNode(tree, node);
-}
-
-__code findNode(struct RedBlackTree* tree) {
-    struct Stack* nodeStack = tree->nodeStack;
-    struct Node* oldNode = tree->current;
-    struct Node* newNode = tree->newNode;
-    tree->previous = newNode;
-    *newNode = *oldNode;
-    goto nodeStack->push((union Data*)newNode, findNode1);
-}
-
-__code findNode1(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
-    struct Node* oldNode = tree->current;
-    struct Node* newNode = tree->previous;
-    struct Node* newnewNode = &ALLOCATE(context, Node)->Node;
-    int result = tree->result;
-    if (result == EQ) {
-        newNode->value = node->value;
-        // go to stack clear
-        goto next(...);
-    } else if (result == GT) {
-        tree->current = oldNode->right;
-        newNode->right = newnewNode;
-    } else {
-        tree->current = oldNode->left;
-        newNode->left = newnewNode;
-    }
-    tree->newNode = newnewNode;
-    if (tree->current) {
-        tree->result = compare(tree->current, node);
-        goto findNode(tree);
-    }
-    goto meta(context, tree->findNodeNext);
-    //   gato tree->findNodeNext(tree, node);
-    
-}
-
-__code insertNode(struct RedBlackTree* tree, struct Node* node) {
-    struct Stack* nodeStack = tree->nodeStack;
-    struct Node* newNode = tree->newNode;
-    *newNode = *node;
-    newNode->color = Red;
-    tree->current = newNode;
-    goto nodeStack->get2(insertCase1);
-}
-
-__code insertCase1(struct RedBlackTree* tree, struct Node *parent, struct Node *grandparent) {
-    if (parent != NULL) {
-        tree->parent = parent;
-        tree->grandparent = grandparent;
-        goto insertCase2(tree);
-    }
-    tree->root->color = Black;
-    goto stackClear();
-}
-
-__code insertCase1_stub(struct Context* context) {
-    goto insertCase1(context, 
-        &Gearef(context, Tree)->tree->Tree.tree->RedBlackTree,
-        &context->data[D_Stack]->Stack.data->Node,
-        &context->data[D_Stack]->Stack.data1->Node);
-}
-
-__code insertCase2(struct RedBlackTree* tree) {
-    if (tree->parent->color == Black) {
-        goto stackClear();
-    }
-    goto insertCase3(tree);
-}
-
-__code insertCase3(struct RedBlackTree* tree) {
-    struct Stack* nodeStack = tree->nodeStack;
-    struct Node* uncle;
-
-    if (tree->grandparent->left == tree->parent) {
-        uncle = tree->grandparent->right;
-    } else {
-        uncle = tree->grandparent->left;
-    }
-
-    if (uncle && (uncle->color == Red)) {
-        // do insertcase1 on grandparent, stack must be pop by two
-        tree->parent->color = Black;
-        uncle->color = Black;
-        tree->grandparent->color = Red;
-        tree->current = tree->grandparent;
-        goto nodeStack->pop2(insertCase1);
-    }
-    goto insertCase4();
-}
-
-__code insertCase4(struct RedBlackTree* tree, struct RotateTree* rotateTree) {
-    struct Stack* nodeStack = tree->nodeStack;
-
-    if ((tree->current == tree->parent->right) && (tree->parent == tree->grandparent->left)) {
-        tree->current = tree->current->left;
-        tree->parent = tree->grandparent;
-
-        rotateTree->traverse = tree;
-        rotateTree->next = C_insertCase5;
-
-        goto nodeStack->pop(rotateLeft);
-    } else if ((tree->current == tree->parent->left) && (tree->parent == tree->grandparent->right)) {
-        tree->parent = tree->grandparent;
-        tree->current = tree->current->right;
-
-        rotateTree->traverse = tree;
-        rotateTree->next = C_insertCase5;
-
-        goto nodeStack->pop(rotateRight);
-    }
-
-    goto insertCase5();
-}
-
-__code insertCase5(struct RedBlackTree* tree) {
-    struct Stack* nodeStack = tree->nodeStack;
-    goto nodeStack->pop2(insertCase51);
-}
-
-__code insertCase51(struct RedBlackTree* tree, struct RotateTree* rotateTree, struct Node* parent, struct Node* grandparent) {
-    struct Node* current = tree->current;
-    tree->parent = parent;
-    tree->grandparent = grandparent;
-
-    parent->color = Black;
-    grandparent->color = Red;
-
-    tree->current = grandparent;
-
-    rotateTree->traverse = tree;
-    rotateTree->next = C_stackClear;
-
-    if ((current == parent->left) && (parent == grandparent->left)){
-        goto rotateRight();
-    } else {
-        goto rotateLeft();
-    }
-}
-
-__code insertCase51_stub(struct Context* context) {
-    struct Node* parent = &context->data[D_Stack]->Stack.data->Node;
-    struct Node* grandparent = &context->data[D_Stack]->Stack.data1->Node;
-    goto insertCase51(context,
-                      &Gearef(context, Tree)->tree->Tree.tree->RedBlackTree,
-                      Gearef(context, RotateTree),
-                      parent,
-                      grandparent);
-}
-
-__code rotateLeft(struct RedBlackTree* tree) {
-    struct Stack* nodeStack = tree->nodeStack;
-    goto nodeStack->get(rotateLeft1);
-}
-
-__code rotateLeft_stub(struct Context* context) {
-    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
-    goto rotateLeft(context, traverse);
-}
-    
-__code rotateLeft1(struct Node* node, struct RedBlackTree* tree, struct Node* parent, struct RotateTree* rotateTree) {
-    struct Node* tmp = node->right;
-
-    if (parent) {
-        if (node == parent->left)
-            parent->left = tmp;
-        else
-            parent->right = tmp;
-    } else {
-        tree->root = tmp;
-    }
-
-    node->right = tmp->left;
-    tmp->left = node;
-    tree->current = tmp;
-    
-    goto meta(context, rotateTree->next);
-}
-
-__code rotateLeft1_stub(struct Context* context) {
-    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
-    struct Node* parent = &context->data[D_Stack]->Stack.data->Node;
-    goto rotateLeft1(context,
-                    traverse->current,
-                    traverse,
-                    parent,
-                    Gearef(context, RotateTree));
-}
-
-__code rotateRight(struct RedBlackTree* tree) {
-    struct Stack* nodeStack = tree->nodeStack;
-    goto nodeStack->get(rotateRight1);
-}
-
-__code rotateRight_stub(struct Context* context) {
-    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
-    goto rotateLeft(context, traverse);
-}
-
-__code rotateRight1(struct Node* node, struct RedBlackTree* traverse,struct Node *parent,struct RotateTree *rotateTree) {
-    struct Node* tmp = node->left;
-    
-    if (parent) {
-        if (node == parent->left)
-            parent->left = tmp;
-        else
-            parent->right = tmp;
-    } else {
-        traverse->root = tmp;
-    }
-
-    node->left = tmp->right;
-    tmp->right = node;
-    traverse->current = tmp;
-    
-    goto meta(context, rotateTree->next);
-}
-
-__code rotateRight1_stub(struct Context* context) {
-    struct RedBlackTree* traverse = context->data[D_RotateTree]->RotateTree.traverse;
-    struct Node* parent = &context->data[D_Stack]->Stack.data->Node;
-    goto rotateRight1(context,
-                     traverse->current,
-                     traverse,
-                     parent,
-                     Gearef(context, RotateTree));
-}
-
-__code stackClear(struct RedBlackTree* tree, struct Stack* nodeStack, __code next(...)) {
-    tree->current = 0;
-    nodeStack->stack = (union Data*)tree->nodeStack;
-    nodeStack->next = next;
-    goto meta(context, tree->nodeStack->clear);
-}
-
-__code getRedBlackTree(struct RedBlackTree* tree, __code next(...)) {
-    if (tree->root) {
-        tree->current = tree->root;
-
-        goto search();
-    }
-
-    goto next(...);
-}
-
-__code search(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
-    // compare(context, traverse, traverse->current->key, node->key);
-    tree->result = compare(tree->current, node);
-    if (tree->result == EQ) {
-        *node = *tree->current;
-        
-        goto meta(context, next);
-    } else if (tree->result == GT) {
-        tree->current = tree->current->right;
-    } else {
-        tree->current = tree->current->left;
-    }
-        
-    if (tree->current) {
-        goto meta(context, C_search);
-    }
-
-    goto next(...);
-}
-
-
-__code removeRedBlackTree(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
-    struct Node* newNode = &ALLOCATE(context, Node)->Node;
-    struct Node* root = tree->root;
-    printTree((union Data*)(tree->root));
-    tree->newNode = newNode;
-    tree->root = newNode; // this should done at stackClear
-    tree->parent = NULL;
-    if (root) {
-        tree->current = root;
-        tree->result = compare(tree->current, node);
-        tree->findNodeNext = C_replaceNodeForDelete2;
-        goto findNode(tree);
-    }
-    goto next(...);
-}
-
-
-
-__code delete2(struct Node* current) {
-    if (current->color == Black) {
-        struct Node* child = current->right == NULL ? current->left : current->right;
-        current->color = child == NULL ? Black : child->color;
-
-        goto deleteCase1(current);
-    }
-
-    goto delete3(tree, current);
-}
-
-
-
-__code delete3(struct RedBlackTree* tree, struct Node* current, __code next(...)) {
-    struct Node* tmp = current->right == NULL ? current->left : current->right;
-    struct Stack* nodeStack = tree->nodeStack;
-
-    if (tree->parent) {
-        if (current == tree->parent->left)
-            tree->parent->left = tmp;
-        else
-            tree->parent->right = tmp;
-    } else {
-        tree->root = tmp;
-    }
-
-
-    if (tree->parent == NULL && tmp) {
-        tmp->color = Black;
-    }
-
-    current == tree->parent->left ? (tree->parent->left = NULL) : (tree->parent->right = NULL);
-
-    Gearef(context, Stack)->stack = (union Data*) nodeStack;
-    Gearef(context, Stack)->next = next;
-    goto meta(context, nodeStack->pop);
-
-//    gato nodeStack->pop(next);
-}
-
-
-
-__code replaceNodeForDelete2(struct RedBlackTree* tree, struct Node* newNode) {
-    if (tree->current->left && tree->current->right) {
-        tree->parent = newNode;
-        tree->current = newNode->left;
-        newNode->left = context->heap;
-
-
-        tree->parent = newNode;
-        
-        goto findMax1(tree,oldNode, newNode);
-    }
-
-    goto delete2(current);
-}
-
-
-__code findMax1(struct RedBlackTree* tree, struct Node* oldNode, struct Node* newNode) {
-    *newNode = *oldNode;
-
-    if (newNode->right) {
-        goto findMax2(tree, oldNode, newNode);
-    }
-    
-    tree->current = newNode;
-
-    goto delete2(current);
-}
-
-
-    
-
-__code findMax2(struct RedBlackTree* tree, struct Node* oldNode, struct Node* newNode) {
-    *newNode = *oldNode;
-
-    if (newNode->right->right) {
-        tree->current = newNode->right;
-        newNode->right = context->heap;
-
-        tree->parent = newNode;
-        
-        goto findMax2(tree, oldNode, newNode);
-    }
-
-    tree->current = newNode;
-    
-    goto delete2(tree,current);
-}
-    
-
-__code deleteCase1(struct RedBlackTree* tree, struct Node* current) {
-    if (tree->parent) {
-        goto deleteCase2(tree,current);
-    }
-
-    goto delete3(tree, current);
-}
-
-
-
-__code deleteCase2(struct RedBlackTree* tree, struct Node* current, struct RotateTree* rotateTree) {
-    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
-    struct Stack* nodeStack = tree->nodeStack;
-    
-    if ((sibling == NULL ? Black : sibling->color) == Red) {
-        tree->parent->color = Red;
-        sibling->color = Black;
-
-        current == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
-
-        struct Node* node = sibling;
-        
-        tree->current = tree->parent;
-
-        rotateTree->traverse = tree;
-        rotateTree->next = C_deleteCase3;
-
-        if (current == tree->parent->left) {
-            goto nodeStack->push((union Data*)node,rotateLeft);
-        } else {
-            goto nodeStack->push((union Data*)node,rotateRight);
-        }
-
-        goto deleteCase3(tree,current);
-    }
-}
-
-
-
-__code deleteCase3(struct RedBlackTree* tree, struct Node* current) {
-    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
-    
-    if (tree->parent->color == Black &&
-        (sibling == NULL ? Black : sibling->color) == Black &&
-        (sibling->left == NULL ? Black : sibling->left->color) == Black &&
-        (sibling->right == NULL ? Black : sibling->right->color) == Black) {
-        sibling->color = Red;
-
-        tree->current = tree->parent;
-        goto deleteCase1(current);
-    }
-
-    goto deleteCase4(current);
-}
-
-
-
-__code deleteCase4(struct RedBlackTree* tree,struct Node* current) {
-    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
-    
-    if (tree->parent->color == Red &&
-        (sibling == NULL ? Black : sibling->color) == Black &&
-        (sibling->left == NULL ? Black : sibling->left->color) == Black &&
-        (sibling->right == NULL ? Black : sibling->right->color) == Black) {
-        sibling->color = Red;
-        tree->parent->color = Black;
-
-        goto delete3(tree,current);
-    }
-
-    goto deleteCase5(tree,current);
-}
-
-
-
-__code deleteCase5(struct RedBlackTree* tree, struct Node* current, struct RotateTree* rotateTree) {
-    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
-    struct Stack* nodeStack = tree->nodeStack;
-    // sibling->parent = tree->parent;
-    
-    if (current == tree->parent->left &&
-        (sibling == NULL ? Black : sibling->color) == Black &&
-        (sibling->left == NULL ? Black : sibling->left->color) == Red &&
-        (sibling->right == NULL ? Black : sibling->right->color) == Black) {
-        sibling->color = Red;
-        sibling->left->color = Black;
-        
-        // sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap);
-        sibling == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
-
-        struct Node* node = new Node();
-        node = sibling->left;
-
-        struct Node* tmp = node;
-        *tmp = *sibling;
-        tree->parent = current;
-        
-        tmp->left = context->heap;
-/*         struct Node* node = new Node(); */
-/*         node = *sibling->left; */
-        tree->parent = tmp;
-
-        tree->current = tmp;
-        
-
-        rotateTree->traverse = tree;
-        rotateTree->next = C_deleteCase6;
-
-        goto nodeStack->push((union Data*)node,rotateRight);
-    } else if (current == tree->parent->right &&
-               (sibling == NULL ? Black : sibling->color) == Black &&
-               (sibling->left == NULL ? Black : sibling->left->color) == Black &&
-               (sibling->right == NULL ? Black : sibling->right->color) == Red) {
-        sibling->color = Red;
-        sibling->right->color = Black;
-
-        sibling == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
-
-        struct Node* node = new Node();
-        node = sibling->right;
-
-        struct Node* tmp = node;
-        *tmp = *sibling;
-        // tmp->parent = current;
-
-        tmp->right = context->heap;
-/*         struct Node* node = new Node(); */
-/*         node = *sibling->right; */
-        //node->parent = tmp;
-
-        tree->current = tmp;
-        
-
-        rotateTree->traverse = tree;
-        rotateTree->next = C_deleteCase6;
-
-        goto nodeStack->push((union Data*)node,rotateLeft);
-    }
-
-    goto deleteCase6(tree,current);
-}
-
-
-__code deleteCase6(struct RedBlackTree* tree, struct Node* current, struct RotateTree* rotateTree) {
-    struct Node* sibling = current == tree->parent->left ? tree->parent->right : tree->parent->left;
-    struct Stack* nodeStack = tree->nodeStack;
-    sibling == tree->parent->left ? (tree->parent->left = context->heap) : (tree->parent->right = context->heap);
-
-    struct Node* tmp = sibling;
-    // *tmp = *sibling;
-    tree->parent = current;
-
-    tmp->color = tree->parent->color;
-    tree->parent->color = Black;
-    
-    
-    if (current == tree->parent->left) {
-        tmp->right->color = Black;
-        tree->current = tree->parent;
-
-        rotateTree->traverse = tree;
-        rotateTree->next = C_delete3;
-
-        goto nodeStack->push((union Data*)tmp,rotateLeft);
-    } else {
-        tmp->left->color = Black;
-        tree->current = tree->parent;
-
-        rotateTree->traverse = tree;
-        rotateTree->next = C_delete3;
-
-        goto nodeStack->push((union Data*)tmp,rotateLeft);
-    }
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/RedBlackTreeReWright.cbc
--- a/src/parallel_execution/RedBlackTreeReWright.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,269 +0,0 @@
-#include <stdio.h>
-
-#include "../context.h"
-#include "../compare.c"
-#interface "Tree.h"
-#interface "Stack.h"
-
-extern enum Relational compare(struct Node* node1, struct Node* node2);
-
-
-Tree* createRedBlackTree(struct Context* context) {
-    struct Tree* tree = new Tree();
-    struct RedBlackTree* rbtree = new RedBlackTree();
-
-    tree->tree = (union Data*)rbtree;
-     rbtree->root = NULL;
-     rbtree->nodeStack = (union Data*)createSingleLinkedStack(context);
-     tree->put = C_putRedBlackTree;
-    // tree->get = C_getRedBlackTree;
-    // tree->remove = C_removeRedBlackTree;
-    // tree->clear = C_clearRedBlackTree;
-     return tree;
-}
-
-void printNode(struct Node* node) {
-  if (node == NULL) {
-    printf("leaf");
-  } else {
-    printf("((%d,%d (",node->color, node->key);
-    printNode(node->right);
-    printf(") (");
-    printNode(node->left);
-    printf(")");
-  }
-}
-
-void printTree(struct RedBlackTree* tree) {
-  printf("\n");
-  tree->current = tree->root;
-  printNode(tree->current);
-  printf(")\n");
-}
-
-__code putRedBlackTree(struct RedBlackTree* tree, struct Node* node, __code next(...)) {
-    printf("C_putRedBlackTree\n");
-    printf("value->%d,key->%d \n",node->value,node->key);
-    tree->previous = tree->newNode;
-    tree->newNode = node;
-    tree->newNode->color = Red;
-    tree->current = tree->root;
-    goto insertRBTree(node, tree);
-}
-
-__code stackClear(struct RedBlackTree* tree, struct Stack* nodeStack, __code next(...)) {
-   tree->current = 0;
-   nodeStack->stack = tree->nodeStack;
-   nodeStack->next = next;
-   goto meta(context, tree->nodeStack->clear);
-  }
-
-__code getRedBlackTree(struct RedBlackTree* tree, __code next(...)) {
-    if (tree->root) {
-        tree->current = tree->root;
-        goto insertRBTree();
-        // goto deleteRBTree();
-      }
-    goto next(...);
-}
-
-__code insertRBTree(struct Node* node, struct RedBlackTree* tree, struct Stack* stack, __code next(...)) {
-  // first case tree->current = root;
-  printf("C_insertRBTree\n");
-  printf("value->%d,key->%d\n",node->value,node->key);
-  printf("newNode value->%d,newNode key->%d\n",tree->newNode->value,tree->newNode->key);
-
-  if (tree->root == NULL) {
-    printf("insertRBTree_root eq NULL\n");
-    tree->root = tree->newNode;
-    tree->root->color = Black;
-    printf("tree->root->key = %d, tree->root->color = %d \n",tree->root->key,tree->root->color);
-    printTree(tree);
-    goto next(tree,...);
-  } else {
-    goto searchInsertLocation(node, tree, stack);
-  }
-}
-
-__code insertRBTree_stub(struct Context* context) {
-	Node* node = Gearef(context, Tree)->node;
-	RedBlackTree* tree = (RedBlackTree*)GearImpl(context, Tree, tree);
-	Stack* stack = createSingleLinkedStack(context);
-	enum Code next = Gearef(context, Tree)->next;
-	goto insertRBTree(context, node, tree, stack, next);
-} 
-
-__code searchInsertLocation(struct Node* node, struct RedBlackTree* tree) {
-  // first case tree->current = root; PreCase remove root=NULL case.don't exist firstCase tree->current=NULL
-  printf("C_searchInsertLocation\n");
-  printf("nownode->key %d , previous->key %d \n",tree->newNode->key,tree->previous->key);
-
-  tree->result = compare(tree->current, node);
-  printf("tree->current->key = %d, node->key %d\n",tree->current->key,node->key);
-  printf("compare (%d,%d)\n",tree->current,node);
-
-  Stack* stack = tree->nodeStack;
-
-  if (tree->current == NULL) {
-    printf("goto insertLocationBackInsert stack->pop\n");
-    goto stack->pop(insertLocationBackInsert);
-  }
-  if (tree->result == GT) {
-    printf("GT searchInsertLocation\n");
-    tree->current = tree->current->right;
-    goto stack->push(tree->newNode,insertLocationBackInsert);
-  } else if (tree->result == LT) {
-    printf("LT searchInsertLocation\n");
-    tree->current = tree->current->left;
-    goto stack->push(tree->newNode, searchInsertLocation);
-  } else if (tree->result == EQ) {
-    printf("already member this node : __code searchInsertLocation()\n");
-    goto meta(context, C_exit_code);
-  } else {
-    printf("$insert value tree : __code searchInsertLocation() \n");
-    goto meta(context, C_exit_code);
-  }
-}
-
-__code searchInsertLocation_stub(struct Context* context) {
-	Node* node = Gearef(context, Tree)->node;
-	RedBlackTree* tree = (RedBlackTree*)GearImpl(context, Tree, tree);
-  Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
-	goto searchInsertLocation(context, node, tree);
-}
-
-__code insertLocationBackInsert(struct RedBlackTree* tree, struct Node* node, struct Stack* stack) {
-  printf("C_insertLocationBackInsert\n");
-  struct Node* hoge = stack->data;
-  printf("stackpopdata%d\n",stack->data);
-  tree->current = tree->previous;
-  // tree->current = nodeStack->data;
-  // this CS is ones only backTrace, and insert node
-  tree->result = compare(tree->previous,tree->newNode);
-  printf("back,compare\n");
-  if (tree->result == GT) {
-    printf("GT\n");
-    tree->current->right = tree->newNode;
-    printTree(tree);
-    goto insertBalance(tree, stack, node, next);
-  } else if (tree->result == LT) {
-    printf("LT\n");
-    tree->current->left = tree->newNode;
-    goto insertBalance(tree, stack, node, next);
-  } else {
-    printf("error : __code insertLocationBackTrace() \n");
-    goto meta(context, C_exit_code);
-  }
-}
-
-__code insertLocationBackInsert_stub(struct Context* context) {
-	RedBlackTree* tree = (RedBlackTree*)GearImpl(context, Tree, tree);
-  SingleLinkedStack* singleLinkedStack = (SingleLinkedStack*)GearImpl(context, Stack, stack);
-	Node* node = Gearef(context, Tree)->node;
-  Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
-	goto insertLocationBackInsert(context, tree, node, stack);
-}
-
-__code insertBalance(struct RedBlackTree* tree, struct Node* nodeStack, struct Node* node, __code next(...)) {
-  printf("C_insertBalance\n");
-  struct Node* traceNode = tree->nodeStack->data;
-  tree->current = traceNode;
-  struct Stack* stack = tree->nodeStack;
-
-  // exit insertion code
-  if (tree->current == tree->root) {
-    tree->current->color = Black;
-    printTree(tree);
-    //printTree
-    goto next(tree,...);
-  }
-
-
-  //current color eq Red
-  if (tree->current->color == Red)
-    goto stack->pop(insertBalance);
-
-  // current color eq Black
-  if (tree->current->left->left || tree->current->left->right) {
-    goto insertBalanceLeft(tree,nodeStack);
-  } else if (tree->current->right->left || tree->current->right->right) {
-    goto insertBalanceRight(tree,nodeStack);
-  } else {
-    goto stack->pop(insertBalance);
-  }
-}
-
-__code insertBalanceLeft(struct RedBlackTree* tree, struct Node* nodeStack, struct Node* node) {
-  printf("C_insertBalanceLeft\n");
-  struct Stack* stack = tree->nodeStack;
-
-  if (tree->current->color == Black && tree->current->left->color == Red && tree->current->left->left->color == Red) {
-    struct Node* tmpCurrent  = tree->current;
-    struct Node* tmpLeft     = tree->current->left;
-    struct Node* tmpLeftLeft = tree->current->left->left;
-
-    tree->current = tmpLeft;
-    tree->current->right = tmpCurrent;
-    tree->current->left = tmpLeftLeft;
-    tree->current->right->left = tmpLeft->right;
-    tree->current->color = Red;
-    tree->current->left->color = Black;
-    tree->current->right->color = Black;
-    goto stack->pop(insertBalance);
-
-  } else if (tree->current->color == Black && tree->current->left->color == Red && tree->current->left->right->color == Red) {
-    struct Node* tmpCurrent   = tree->current;
-    struct Node* tmpLeft      = tree->current->left;
-    struct Node* tmpLeftRight = tree->current->left->right;
-
-    tree->current = tmpLeft;
-    tree->current->right = tmpCurrent;
-    tree->current->left = tmpLeftRight;
-    tree->current->right->left = tmpLeft->left;
-    tree->current->color = Red;
-    tree->current->left->color = Black;
-    tree->current->right->color = Black;
-    goto stack->pop(insertBalance);
-
-  }
-}
-
-__code insertBalanceRight(struct RedBlackTree* tree, struct Node* nodeStack, struct Node* node) {
-  printf("C_insertBalanceLeft\n");
-  struct Stack* stack = tree->nodeStack;
-
-  if (tree->current->color == Black && tree->current->right->color == Red && tree->current->right->right->color == Red) {
-    struct Node* tmpCurrent    = tree->current;
-    struct Node* tmpRight      = tree->current->right;
-    struct Node* tmpRightRight = tree->current->right->right;
-
-    tree->current = tmpRight;
-    tree->current->left = tmpCurrent;
-    tree->current->right = tmpRightRight;
-    tree->current->left->right = tmpRight->left;
-    tree->current->color = Red;
-    tree->current->left->color = Black;
-    tree->current->right->color = Black;
-    goto stack->pop(insertBalance);
-
-  } else if (tree->current->color == Black && tree->current->right->color == Red && tree->current->right->left->color == Red) {
-
-    struct Node* tmpCurrent = tree->current;
-    struct Node* tmpRight = tree->current->right;
-    struct Node* tmpRightLeft = tree->current->right->left;
-
-    tree->current = tmpRight;
-    tree->current->right = tmpCurrent;
-    tree->current->left = tmpRightLeft;
-    tree->current->left->right = tmpRight->right;
-    tree->current->color = Red;
-    tree->current->left->color = Black;
-    tree->current->right->color = Black;
-    goto stack->pop(insertBalance);
-
-  } else {
-    printf("unkwon error : __code insertBalanceRight() \n");
-    goto meta(context, C_exit_code);
-  }
-}
-// insertCode end
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Semaphore.h
--- a/src/parallel_execution/Semaphore.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-typedef struct Semaphore<Impl>{
-        union Data* semaphore;
-        __code p(Impl* semaphore, __code next(...)); 
-        __code v(Impl* semaphore, __code next(...)); 
-        __code next(...);
-} Semaphore;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/SemaphoreImpl.cbc
--- a/src/parallel_execution/SemaphoreImpl.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,37 +0,0 @@
-#include "../context.h"
-#interface "semaphore.h"
-
-Semaphore* createSemaphoreImpl(struct Context* context, int n) {
-    struct Semaphore* semaphore = new Semaphore();
-    struct SemaphoreImpl* semaphoreImpl = new SemaphoreImpl();
-    semaphore->semaphore = (union Data*)semaphoreImpl;
-    semaphoreImpl->value =  n;
-    pthread_mutex_init(&semaphoreImpl->mutex, NULL);
-    pthread_cond_init(&semaphoreImpl->cond, NULL);
-    semaphore->p = C_pOperationSemaphoreImpl;
-    semaphore->v = C_vOperationSemaphoreImpl;
-    return semaphore;
-}
-
-__code pOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
-    pthread_mutex_lock(&semaphore->mutex);
-    goto meta(context, C_pOperationSemaphoreImpl1);
-}
-
-__code pOperationSemaphoreImpl1(struct SemaphoreImpl* semaphore, __code next(...)) {
-    if (semaphore->value == 0) {
-        pthread_cond_wait(&semaphore->cond, &semaphore->mutex);
-        goto meta(context, C_pOperationSemaphoreImpl1);
-    }
-    semaphore->value--;
-    pthread_mutex_unlock(&semaphore->mutex);
-    goto next(...);
-}
-
-__code vOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
-    pthread_mutex_lock(&semaphore->mutex);
-    semaphore->value++;
-    pthread_cond_signal(&semaphore->cond);
-    pthread_mutex_unlock(&semaphore->mutex);
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/SingleLinkedQueue.cbc
--- a/src/parallel_execution/SingleLinkedQueue.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#include "../context.h"
-#include <stdio.h>
-#interface "Queue.h"
-// use "Node.h"
-// use "Element.h"
-
-Queue* createSingleLinkedQueue(struct Context* context) {
-    struct Queue* queue = new Queue();
-    struct SingleLinkedQueue* singleLinkedQueue = new SingleLinkedQueue();
-    queue->queue = (union Data*)singleLinkedQueue;
-    singleLinkedQueue->top  = new Element();
-    singleLinkedQueue->last = singleLinkedQueue->top;
-    queue->take  = C_takeSingleLinkedQueue;
-    queue->put  = C_putSingleLinkedQueue;
-    queue->isEmpty = C_isEmptySingleLinkedQueue;
-    queue->clear = C_clearSingleLinkedQueue;
-    return queue;
-}
-
-void printQueue1(union Data* data) {
-    struct Node* node = &data->Element.data->Node;
-    if (node == NULL) {
-        printf("NULL");
-    } else {
-        printf("key = %d ,", node->key);
-        printQueue1((union Data*)data->Element.next);
-    }
-}
-
-void printQueue(union Data* data) {
-    printQueue1(data);
-    printf("\n");
-}
-
-__code clearSingleLinkedQueue(struct SingleLinkedQueue* queue, __code next(...)) {
-    queue->top = NULL;
-    goto next(...);
-}
-
-__code putSingleLinkedQueue(struct SingleLinkedQueue* queue, union Data* data, __code next(...)) {
-    Element* element = new Element();
-    element->data = data;
-    element->next = NULL;
-    queue->last->next  = element;
-    queue->last = element;
-    goto next(...);
-}
-
-__code takeSingleLinkedQueue(struct SingleLinkedQueue* queue, __code next(union Data* data, ...)) {
-    struct Element* top = queue->top;
-    struct Element* nextElement = top->next;
-    if (queue->top == queue->last) {
-        data = NULL;
-    } else {
-        queue->top = nextElement;
-        data = nextElement->data;
-    }
-    goto next(data, ...);
-}
-
-__code isEmptySingleLinkedQueue(struct SingleLinkedQueue* queue, __code next(...), __code whenEmpty(...)) {
-    if (queue->top == queue->last) {
-        goto whenEmpty(...);
-    } else {
-        goto next(...);
-    }
-}
-
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/SingleLinkedStack.cbc
--- a/src/parallel_execution/SingleLinkedStack.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,111 +0,0 @@
-#include "../context.h"
-#interface "Stack.h"
-#include <stdio.h>
-
-// typedef struct SingleLinkedStack {
-//     struct Element* top;
-// } SingleLinkedStack;
-
-Stack* createSingleLinkedStack(struct Context* context) {
-    struct Stack* stack = new Stack();
-    struct SingleLinkedStack* singleLinkedStack = new SingleLinkedStack();
-    stack->stack = (union Data*)singleLinkedStack;
-    singleLinkedStack->top = NULL;
-    stack->push = C_pushSingleLinkedStack;
-    stack->pop  = C_popSingleLinkedStack;
-    stack->pop2  = C_pop2SingleLinkedStack;
-    stack->get  = C_getSingleLinkedStack;
-    stack->get2  = C_get2SingleLinkedStack;
-    stack->isEmpty = C_isEmptySingleLinkedStack;
-    stack->clear = C_clearSingleLinkedStack;
-    return stack;
-}
-
-void printStack1(union Data* data) {
-    struct Node* node = &data->Element.data->Node;
-    if (node == NULL) {
-        printf("NULL");
-    } else {
-        printf("key = %d ,", node->key);
-        printStack1((union Data*)data->Element.next);
-    }
-}
-
-void printStack(union Data* data) {
-    printStack1(data);
-    printf("\n");
-}
-
-__code clearSingleLinkedStack(struct SingleLinkedStack* stack,__code next(...)) {
-    stack->top = NULL;
-    goto next(...);
-}
-
-__code pushSingleLinkedStack(struct SingleLinkedStack* stack, union Data* data, __code next(...)) {
-    Element* element = new Element();
-    element->next = stack->top;
-    element->data = data;
-    stack->top = element;
-    goto next(...);
-}
-
-__code popSingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, ...)) {
-    if (stack->top) {
-        data = stack->top->data;
-        stack->top = stack->top->next;
-    } else {
-        data = NULL;
-    }
-    goto next(data, ...);
-}
-
-__code pop2SingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, union Data* data1, ...)) {
-    if (stack->top) {
-        data = stack->top->data;
-        stack->top = stack->top->next;
-    } else {
-        data = NULL;
-    }
-    if (stack->top) {
-        data1 = stack->top->data;
-        stack->top = stack->top->next;
-    } else {
-        data1 = NULL;
-    }
-    goto next(data, data1, ...);
-}
-
-
-__code getSingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, ...)) {
-    if (stack->top) {
-        data = stack->top->data;
-    } else {
-        data = NULL;
-    }
-    goto next(data, ...);
-}
-
-__code get2SingleLinkedStack(struct SingleLinkedStack* stack, __code next(union Data* data, union Data* data1, ...)) {
-    if (stack->top) {
-        data = stack->top->data;
-        if (stack->top->next) {
-            data1 = stack->top->next->data;
-        } else {
-            data1 = NULL;
-        }
-    } else {
-        data = NULL;
-        data1 = NULL;
-    }
-    goto next(data, data1, ...);
-}
-    
-__code isEmptySingleLinkedStack(struct SingleLinkedStack* stack, __code next(...), __code whenEmpty(...)) {
-    if (stack->top) {
-        goto next(...);
-    } else {
-        goto whenEmpty(...);
-    }
-}
-
-
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/SpinLock.cbc
--- a/src/parallel_execution/SpinLock.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-#include "../context.h"
-#interface "Atomic.h"
-#interface "Lock.h"
-
-Lock* createSpinLock(struct Context* context) {
-    struct Lock* lock = new Lock();
-    struct SpinLock* spinLock = new SpinLock();
-    spinLock->lock = NULL;
-    spinLock->atomic = createAtomicReference(context);
-    lock->lock = (union Data*)spinLock;
-    lock->doLock = C_doLockSpinLock;
-    lock->doUnlock = C_doUnlockSpinLock;
-    return lock;
-}
-
-__code doLockSpinLock(struct SpinLock* lock, __code next(...)) {
-    struct Atomic* atomic = lock->atomic;
-    goto atomic->checkAndSet(&lock->lock, NULL, 1, doLockSpinLock1, doLockSpinLock);
-}
-
-__code doLockSpinLock1(struct SpinLock* lock, __code next(...)) {
-    lock->lockContext = context;
-    goto next(...);
-}
-
-__code doUnlockSpinLock(struct SpinLock* lock, __code next(...)) {
-    if (lock->lockContext == context) {
-        struct Atomic* atomic = lock->atomic;
-        goto atomic->checkAndSet(&lock->lock, 1, NULL, next(...), doUnlockSpinLock);
-    }
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Stack.h
--- a/src/parallel_execution/Stack.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-typedef struct Stack<Type, Impl>{
-        union Data* stack;
-        union Data* data;
-        union Data* data1;
-        /* Type* stack; */
-        /* Type* data; */
-        /* Type* data1; */
-        __code whenEmpty(...);
-        __code clear(Impl* stack,__code next(...));
-        __code push(Impl* stack,Type* data, __code next(...));
-        __code pop(Impl* stack, __code next(Type* data, ...));
-        __code pop2(Impl* stack, __code next(Type* data, Type* data1, ...));
-        __code isEmpty(Impl* stack, __code next(...), __code whenEmpty(...));
-        __code get(Impl* stack, __code next(Type* data, ...));
-        __code get2(Impl* stack, __code next(Type* data, Type* data1, ...));
-        __code next(...);
-} Stack;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/SynchronizedQueue.cbc
--- a/src/parallel_execution/SynchronizedQueue.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-#include "../context.h"
-#interface "Queue.h"
-#interface "Atomic.h"
-
-#include <stdio.h>
-
-/*
- * Non-blocking queue of Paper: Simple, Fast, and Practical Non-Blocking and Blocking Concurrent Queue Algorithms(https://www.research.ibm.com/people/m/michael/podc-1996.pdf).
- */
-
-Queue* createSynchronizedQueue(struct Context* context) {
-    struct Queue* queue = new Queue();
-    struct SynchronizedQueue* synchronizedQueue = new SynchronizedQueue();
-    synchronizedQueue->top = new Element(); // allocate a free node
-    synchronizedQueue->top->next = NULL;
-    synchronizedQueue->last = synchronizedQueue->top;
-    synchronizedQueue->atomic = createAtomicReference(context);
-    queue->queue = (union Data*)synchronizedQueue;
-    queue->take  = C_takeSynchronizedQueue;
-    queue->put  = C_putSynchronizedQueue;
-    queue->isEmpty = C_isEmptySynchronizedQueue;
-    queue->clear = C_clearSynchronizedQueue;
-    return queue;
-}
-
-__code clearSynchronizedQueue(struct SynchronizedQueue* queue, __code next(...)) {
-    struct Element* top = queue->top;
-    struct Atomic* atomic = queue->atomic;
-    goto atomic->checkAndSet(&queue->top, top, NULL, next(...), clearSynchronizedQueue);
-}
-
-__code putSynchronizedQueue(struct SynchronizedQueue* queue, union Data* data, __code next(...)) {
-    Element* element = new Element();
-    element->data = data;
-    element->next = NULL;
-    Element* last = queue->last;
-    Element* nextElement = last->next;
-    if (last != queue->last) {
-        goto putSynchronizedQueue();
-    }
-    if (nextElement == NULL) {
-        struct Atomic* atomic = queue->atomic;
-        goto atomic->checkAndSet(&last->next, nextElement, element, next(...), putSynchronizedQueue);
-    } else {
-        struct Atomic* atomic = queue->atomic;
-        goto atomic->checkAndSet(&queue->last, last, nextElement, putSynchronizedQueue, putSynchronizedQueue);
-    }
-}
-
-__code takeSynchronizedQueue(struct SynchronizedQueue* queue, __code next(union Data* data, ...)) {
-    struct Element* top = queue->top;
-    struct Element* last = queue->last;
-    struct Element* nextElement = top->next;
-    if (top != queue->top) {
-        goto takeSynchronizedQueue();
-    }
-    if (top == last) {
-        if (nextElement != NULL) {
-            struct Atomic* atomic = queue->atomic;
-            goto atomic->checkAndSet(&queue->last, last, nextElement, takeSynchronizedQueue, takeSynchronizedQueue);
-        }
-    } else {
-        struct Atomic* atomic = queue->atomic;
-        goto atomic->checkAndSet(&queue->top, top, nextElement, takeSynchronizedQueue1, takeSynchronizedQueue);
-    }
-    goto takeSynchronizedQueue();
-}
-
-__code takeSynchronizedQueue1(struct SynchronizedQueue* queue, __code next(union Data* data, ...), struct Element* nextElement) {
-    data = nextElement->data;
-    goto next(data, ...);
-}
-
-__code takeSynchronizedQueue1_stub(struct Context* context) {
-	SynchronizedQueue* queue = (SynchronizedQueue*)GearImpl(context, Queue, queue);
-	enum Code next = Gearef(context, Queue)->next;
-	Data** O_data = &Gearef(context, Queue)->data;
-	goto takeSynchronizedQueue1(context,
-                                queue,
-                                next,
-                                O_data,
-                                (struct Element*)Gearef(context, Atomic)->newData);
-}
-
-__code isEmptySynchronizedQueue(struct SynchronizedQueue* queue, __code next(...), __code whenEmpty(...)) {
-    struct Element* top = queue->top;
-    struct Element* last = queue->last;
-    struct Element* nextElement = top->next;
-    if (top != queue->top) {
-        goto isEmptySynchronizedQueue();
-    }
-    if (top == last && nextElement == NULL) {
-        goto whenEmpty(...);
-    }
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/TaskIterator.cbc
--- a/src/parallel_execution/TaskIterator.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-#include "../context.h"
-#interface "Iterator.h"
-#include <stdio.h>
-
-createTaskIterator(struct Context, struct List list) {
-    struct Iterator* iterator = new Iterator();
-    struct TaskIterator* taskIterator = new TaskIterator();
-    iterator->itearot = (union Data*)taskIterator;
-    iterator->exec = C_execTaskIterator;
-    iterator->barrier = C_barrierTaskIterator;
-    taskIterator->taskList = list;
-}
-
-__code execTaskIterator(struct taskIterator* iterator, struct TaskManager* taskManager, struct Context* task, __code next(...)) {
-    if (iterator->list->next == null) {
-        goto next(...);
-    }
-    iterator->list = list->next;
-    struct Context* task = (struct Context*)iterator->list->data;
-    struct TaskManager taskManager = task->taskManager;
-    taskManager->spawn(task, C_execTaskIterator);
-}
-
-__code barrierTaskIterator(struct MultiDimIterator* iterator, struct Context* task, __code next(...), __code whenWait(...)) {
-    if (__sync_fetch_and_sub(&iterator->count, 1) == 1) {
-        goto next(...);
-    }
-    goto whenWait(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/TaskManager.h
--- a/src/parallel_execution/TaskManager.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-typedef struct TaskManager<Impl>{
-    union Data* taskManager;
-    struct Context* task;
-    struct Element* taskList;
-    __code spawn(Impl* taskManager, struct Context* task, __code next(...));
-    __code spawnTasks(Impl* taskManagerImpl, struct Element* taskList, __code next1(...));
-    __code setWaitTask(Impl* taskManagerImpl, struct Context* task, __code next(...));
-    __code shutdown(Impl* taskManagerImpl, __code next(...));
-    __code incrementTaskCount(Impl* taskManagerImpl, __code next(...));
-    __code decrementTaskCount(Impl* taskManagerImpl, __code next(...));
-    __code next(...);
-    __code next1(...);
-} TaskManager;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/TaskManagerImpl.cbc
--- a/src/parallel_execution/TaskManagerImpl.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,206 +0,0 @@
-#include "../context.h"
-#interface "TaskManager.h"
-#interface "Iterator.h"
-#interface "Queue.h"
-#interface "Worker.h"
-
-#include <stdio.h>
-#include <unistd.h>
-
-void createWorkers(struct Context* context, TaskManagerImpl* taskManager);
-
-TaskManager* createTaskManagerImpl(struct Context* context, int numCPU, int numGPU, int numIO) {
-    struct TaskManager* taskManager = new TaskManager();
-    taskManager->spawnTasks = C_spawnTasksTaskManagerImpl;
-    taskManager->spawn = C_spawnTaskManagerImpl;
-    taskManager->shutdown  = C_shutdownTaskManagerImpl;
-    taskManager->incrementTaskCount = C_incrementTaskCountTaskManagerImpl;
-    taskManager->decrementTaskCount = C_decrementTaskCountTaskManagerImpl;
-    taskManager->setWaitTask = C_setWaitTaskTaskManagerImpl;
-    struct TaskManagerImpl* taskManagerImpl = new TaskManagerImpl();
-    // 0...numIO-1 IOProcessor
-    // numIO...numIO+numGPU-1 GPUProcessor
-    // numIO+numGPU...numIO+numGPU+numCPU-1 CPUProcessor
-    taskManagerImpl->io = 0;
-    taskManagerImpl->gpu = numIO;
-    taskManagerImpl->cpu = numIO+numGPU;
-    taskManagerImpl->maxCPU = numIO+numGPU+numCPU;
-    taskManagerImpl->numWorker = taskManagerImpl->maxCPU;
-    taskManagerImpl->sendGPUWorkerIndex = taskManagerImpl->gpu;
-    taskManagerImpl->sendCPUWorkerIndex = taskManagerImpl->cpu;
-    taskManagerImpl->taskCount = 0;
-    taskManagerImpl->loopCounter = 0;
-    createWorkers(context, taskManagerImpl);
-    taskManager->taskManager = (union Data*)taskManagerImpl;
-    return taskManager;
-}
-
-void createWorkers(struct Context* context, TaskManagerImpl* taskManager) {
-    int i = 0;
-    taskManager->workers = (Worker**)ALLOCATE_PTR_ARRAY(context, Worker, taskManager->maxCPU);
-    for (;i<taskManager->gpu;i++) {
-        Queue* queue = createSynchronizedQueue(context);
-        taskManager->workers[i] = (Worker*)createCPUWorker(context, i, queue);
-    }
-    for (;i<taskManager->cpu;i++) {
-        Queue* queue = createSynchronizedQueue(context);
-#ifdef USE_CUDAWorker
-        taskManager->workers[i] = (Worker*)createCUDAWorker(context, i, queue,0);
-#else
-        taskManager->workers[i] = (Worker*)createCPUWorker(context, i, queue);
-#endif
-    }
-    for (;i<taskManager->maxCPU;i++) {
-        Queue* queue = createSynchronizedQueue(context);
-        taskManager->workers[i] = (Worker*)createCPUWorker(context, i, queue);
-    }
-}
-
-__code spawnTasksTaskManagerImpl(struct TaskManagerImpl* taskManager, struct Element* taskList, __code next1(...)) {
-    taskManager->taskList = taskList;
-    goto spawnTasksTaskManagerImpl1();
-}
-
-__code spawnTasksTaskManagerImpl1(struct TaskManagerImpl* taskManagerImpl, struct TaskManager* taskManager) {
-    if (taskManagerImpl->taskList == NULL) {
-        goto spawnTasksTaskManagerImpl2();
-    }
-    struct Context* task = (struct Context*)taskManagerImpl->taskList->data;
-    taskManagerImpl->taskList = taskManagerImpl->taskList->next;
-    goto taskManager->setWaitTask(task, spawnTasksTaskManagerImpl1);
-}
-
-__code spawnTasksTaskManagerImpl1_stub(struct Context* context) {
-    TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
-    TaskManager* taskManager = &Gearef(context, TaskManager)->taskManager->TaskManager;
-    goto spawnTasksTaskManagerImpl1(context, taskManagerImpl, taskManager);
-}
-
-__code spawnTasksTaskManagerImpl2(struct TaskManagerImpl* taskManager, struct Element* taskList, __code next1(...)) {
-    taskManager->taskList = taskList;
-    goto spawnTasksTaskManagerImpl3();
-}
-
-__code spawnTasksTaskManagerImpl3(struct TaskManagerImpl* taskManagerImpl, __code next1(...), struct TaskManager* taskManager) {
-    if (taskManagerImpl->taskList == NULL) {
-        goto next1(...);
-    }
-    struct Context* task = (struct Context*)taskManagerImpl->taskList->data;
-    taskManagerImpl->taskList = taskManagerImpl->taskList->next;
-    goto taskManager->spawn(task, spawnTasksTaskManagerImpl3);
-}
-
-__code spawnTasksTaskManagerImpl3_stub(struct Context* context) {
-    TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
-    enum Code next1 = Gearef(context, TaskManager)->next1;
-    TaskManager* taskManager = &Gearef(context, TaskManager)->taskManager->TaskManager;
-    goto spawnTasksTaskManagerImpl3(context, taskManagerImpl, next1, taskManager);
-}
-
-__code setWaitTaskTaskManagerImpl(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
-    int i = taskManager->loopCounter;
-    if (task->idg+i < task->maxIdg) {
-        struct Queue* queue = GET_WAIT_LIST(task->data[task->idg + i]);
-        taskManager->loopCounter++;
-        goto queue->put(task, setWaitTaskTaskManagerImpl);
-    }
-    taskManager->loopCounter = 0;
-    goto incrementTaskCountTaskManagerImpl();
-}
-
-__code incrementTaskCountTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...)) {
-    __sync_fetch_and_add(&taskManager->taskCount, 1);
-    goto next(...);
-}
-
-__code decrementTaskCountTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...)) {
-    __sync_fetch_and_sub(&taskManager->taskCount, 1);
-    goto next(...);
-}
-
-__code spawnTaskManagerImpl(struct TaskManagerImpl* taskManagerImpl, struct Context* task, __code next(...), struct TaskManager* taskManager) {
-    task->taskManager = taskManager;
-    if (task->idgCount == 0) {
-        // iterator task is normal task until spawned
-        if (task->iterator != NULL && task->iterate == 0) {
-            pthread_mutex_unlock(&taskManagerImpl->mutex);
-            struct Iterator* iterator = task->iterator;
-            goto iterator->exec(task, taskManagerImpl->cpu - taskManagerImpl->gpu, next(...));
-        }
-        goto taskSend();
-    }
-    pthread_mutex_unlock(&taskManagerImpl->mutex);
-    goto next(...);
-}
-
-__code spawnTaskManagerImpl_stub(struct Context* context) {
-    TaskManagerImpl* taskManagerImpl = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
-    struct Context* task = Gearef(context, TaskManager)->task;
-    TaskManager* taskManager = &Gearef(context, TaskManager)->taskManager->TaskManager;
-    goto spawnTaskManagerImpl(context,
-                              taskManagerImpl,
-                              task,
-                              Gearef(context, TaskManager)->next,
-                              taskManager);
-}
-
-
-__code taskSend(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
-    // set workerId
-    if (task->gpu) {
-        goto taskSend1();
-    } else {
-        goto taskSend2();
-    }
-}
-
-__code taskSend1(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
-    int workerId = taskManager->sendGPUWorkerIndex;
-    if (++taskManager->sendGPUWorkerIndex >= taskManager->cpu) {
-        taskManager->sendGPUWorkerIndex = taskManager->gpu;
-    }
-    pthread_mutex_unlock(&taskManager->mutex);
-    struct Queue* queue = taskManager->workers[workerId]->tasks;
-    goto queue->put(task, next(...));
-}
-
-__code taskSend2(struct TaskManagerImpl* taskManager, struct Context* task, __code next(...)) {
-    int workerId = taskManager->sendCPUWorkerIndex;
-    if (++taskManager->sendCPUWorkerIndex >= taskManager->maxCPU) {
-        taskManager->sendCPUWorkerIndex = taskManager->cpu;
-    }
-    pthread_mutex_unlock(&taskManager->mutex);
-    struct Queue* queue = taskManager->workers[workerId]->tasks;
-    goto queue->put(task, next(...));
-}
-
-__code shutdownTaskManagerImpl(struct TaskManagerImpl* taskManager, __code next(...)) {
-    if (taskManager->taskCount != 0) {
-        usleep(1000);
-        goto shutdownTaskManagerImpl();
-    }
-    int i = taskManager->loopCounter;
-    if (i < taskManager->numWorker) {
-        struct Queue* tasks = taskManager->workers[i]->tasks;
-        goto tasks->put(NULL, shutdownTaskManagerImpl1);
-    }
-
-    taskManager->loopCounter = 0;
-    goto shutdownTaskManagerImpl2();
-}
-
-__code shutdownTaskManagerImpl1(struct TaskManagerImpl* taskManager, __code next(...)) {
-    taskManager->loopCounter++;
-    goto shutdownTaskManagerImpl();
-}
-
-__code shutdownTaskManagerImpl2(struct TaskManagerImpl* taskManager, __code next(...)) {
-    int i = taskManager->loopCounter;
-    if (i < taskManager->numWorker) {
-        pthread_join(taskManager->workers[i]->thread, NULL);
-        taskManager->loopCounter++;
-        goto shutdownTaskManagerImpl2();
-    }
-    taskManager->loopCounter = 0;
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Timer.h
--- a/src/parallel_execution/Timer.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-typedef struct Timer<Impl>{
-        union Data* timer;
-        __code start(Impl* timer, __code next(...));
-        __code end(Impl* timer, __code next(...));
-        __code next(...);
-} Timer;
-
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/TimerImpl.cbc
--- a/src/parallel_execution/TimerImpl.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,30 +0,0 @@
-#include <stdio.h>
-#include <sys/time.h>
-
-#include "../context.h"
-#interface "Timer.h"
-
-Timer* createTimerImpl(struct Context* context) {
-    struct Timer* timer = new Timer();
-    struct TimerImpl* timerImpl = new TimerImpl();
-    timer->timer = (union Data*)timerImpl;
-    timer->start = C_startTimer;
-    timer->end = C_endTimer;
-    return timer;
-}
-
-__code startTimer(struct TimerImpl* timer, __code next(...)) {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-
-    timer->time = tv.tv_sec + (double)tv.tv_usec*1e-6;
-
-    goto next(...);
-}
-
-__code endTimer(struct TimerImpl* timer, __code next(...)) {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    printf("%0.6f\n", (tv.tv_sec+(double)tv.tv_usec*1e-6) - timer->time);
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Todo
--- a/src/parallel_execution/Todo	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,144 +0,0 @@
-Fri May  4 20:06:48 JST 2018
-
-    par goto がある code segment は $inParGoto ではなく $hasParGoto にする
-    par goto があるばあいは goto meta ではなく goto parGotoMeta にする
-    taskList の処理も parGotoMeta で行う
-    
-    par goto の遅い理由を調べる
-        多分 Context と Synchronized Queue の生成に時間がかかってる？
-    
-    Perl スクリプトを一つにする
-    Context を生成するモジュールとstubを生成するモジュールをそれぞれオブジェクトとして作る
-    
-    Code Gear のプロトタイプを格納するオブジェクトをつくる
-
-Tue Aug  1 19:32:55 JST 2017
- 
-    DataGear の待ち合わせ
-    DataGear の Commit
-     
-    これらは、stubとgoto meta の部分で行う
-    
-    どれに対して行うかを実行時あるいはコンパイル時に指定する必要がある
-
-    一つの解決策は、 typedefのときにannotution してあげる
-    もう一つの解決策は, Data Gear の allocation 時に指定する
-    Code Gearのプロトタイプのなかで指定する事も考えられる
-    
-    par goto時に渡す continuation で同期をとっても良い, このときにはこのcontinuation を作成するinterfaceを作る必要がある
-
-    実行時に指定してしまうと、毎回フラグのチェックが必要になる。
-    これを abstract model checking を事前に行うことで, static なコードに置き換える事はできる
-
-    例題としては, chat、dining philosophers, map reduce
-
-Fri Apr 14 18:44:09 JST 2017
-
-    struct B {
-        A a;
-        .....
-    }
-    struct A {
-        __code init(..., __code next(A a, ...));
-    }
-    par goto A->init(a);
-    // meta level
-    task->code = C_init_A;
-    task->data[idg] = ...;
-    task->data[idg + 1] = ...;
-    task->data[odg] = ...;
-    task->next = C_writeToa;
-    goto meta(context, context->TaskManager->spawn)
-
-    // lambda version?
-    par goto A->init(\A -> a = A)
-
-    // meta level
-    par goto A->init(next = \A -> a = A)
-
-Wed Mar  1 18:25:36 JST 2017
-
-    parallel_executtion/test/ を .cbc に書き直す
-    rb_tree の stub をできるだけ取り外す
-    synchornizedQueue の meta部分を分離する
-    synchronizedQueue のバグをとる
-    GPU のバグとり
-    cbc++...?
-
-Sat Jan 28 16:10:28 JST 2017
-
-    stackからpopした後、呼び出される continuation は出力を受けとる。
-    出力を受けとる stub を生成する必要がある。
-    なので、CodeGear が、そのような interface で定義されたものかどうかを調べる必要がある。
-    Stackのnext(やisEmpty)に代入された時点でわかる。なので、あまり自明な見つける方法がない。 
-    引数の異なるnextは異なる名前を持つべきか? 持たなくてもできるが...
-
-         goto next(data, ...);                                       引数で渡された continuation に移動
-         goto nodeStack->push(newNode, replaceNode1);                Interface の呼び出し。(ここで replaceNode1 が stack の戻り値を受けることがわかる。
-         goto replaceNode(traverse, traverse->current, newNode);     普通のgoto
-         goto rotateTree->next(...);                                 DataGearに格納された continuation
-
-    などをチェックする必要がある。これらの型チェックは CbC level では行われない。(CbCはmeta levelだから)
-
-     戻り値の部分は interface に記述させるという手もあるな。
-
-
-Sun Jan 22 20:11:28 JST 2017
-
-    TaskManagerから必要なCPUWorkerを生成する
-    WorkerはcreateWorker時に新しくthreadを作る
-    
-    TaskManager->createTaskで新しいContextを生成する
-    この時点でWorkerを番号で指定する
-    このContextにGearefで値を設定していく
-    待ち合わせ用のDSを設定する
-    taskManager->spawnでWorkerにcontextを送る
-
-Fri Jan 13 17:47:40 JST 2017
-
-    Task は contextを直接使うことにする
-        DS には, まっているcontextをListを作る
-        context に実行中断中のCS の番号をいれるフィールドを用意する
-        待っているDS のcount
-    createTaskの手順
-        新しくcontextを作る
-            allocate 用のheap も用意
-            もとのcontextを全部copyする or 必要なものだけcopyする
-            待ち合わせのDS群を指定する
-            終わったあとの行き先を指定する(default は task_exit)
-            exception の行き先も必要な指定する
-            待っているDSが全部揃っていたら active Queueに入れる
-    task の実行
-        taskの実行後、 goto meta する直前で code gear commit を呼んで, Reader list を消化する
-        複数から参照されるDSは一旦localに書き出して, その後atomic に書き出す
-        複数から参照されるDSは何かしら宣言が必要
-            つまり DS には 一つ一つ owner がいる
-
-Mon Nov 28 17:39:39 JST 2016
-
-    Task,TaskManager,Workerのインターフェースの実装を作成する
-    Taskを一旦Treeに入れずに直接Queueに入れる
-
-    Task
-        CodeGen
-            IDataSeg
-            IDataSeg
-            ...
-        idsCount
-        nextTask(can be C_exit)
-            ODataSeg?
-
-    TaskManager
-        createWorker
-        spawn (any,cpu,GPU)
-        taskSend
-        activeQueue
-        shutdown
-        deadlockDetectid
-
-    SynchronizedQueue * Workerの数だけ
-
-    Worker
-        execute
-        taskRecive
-        shutdown
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Tree.h
--- a/src/parallel_execution/Tree.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-typedef struct Tree<Type, Impl>{
-    /* future Code */
-    /* Type* tree; */
-    /* Type* node; */
-    union Data* tree;
-    struct Node* node;
-    __code put(Impl* tree,Type* node, __code next(...));
-    // __code get(Impl* tree, __code next(...));
-    __code remove(Impl* tree,Type* node, __code next(...));
-    // __code clearRedBlackTree();
-    __code next(...);
-} Tree;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/Worker.h
--- a/src/parallel_execution/Worker.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,10 +0,0 @@
-typedef struct Worker<Impl>{
-    union Data* worker;
-    struct Queue* tasks;
-    struct Context* task;
-    pthread_t thread;
-    struct TaskManager* taskManager;
-    __code taskReceive(Impl* worker, struct Queue* tasks);
-    __code shutdown(Impl* worker);
-    __code next(...);
-} Worker;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/auto_generate_context.pl
--- a/src/parallel_execution/auto_generate_context.pl	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-use FindBin;
-use lib "$FindBin::Bin/lib";
-
-use Gears::Context;
-use Getopt::Std;
-my %opt;
-getopts("w" => \%opt);
-
-my $h = find_target_from_camke_list();
-
-my $target = shift;
-unless (exists $h->{$target}) {
-  map { print "$_\n" }  keys %$h;
-  print "invalid target name\n";
-  exit 0;
-}
-
-my @cbc_files;
-
-for my $file_name (split / /, $h->{$target}) {
-  if ($file_name =~ /^\s*$/) {
-    next;
-  }
-  chomp $file_name;
-  push(@cbc_files,$file_name);
-}
-
-my $output     = $opt{w} ? "$FindBin::Bin/context.h" : "stdout";
-my $gears      = Gears::Context->new(compile_sources => \@cbc_files, find_root => $FindBin::Bin, output => $output);
-my $data_gears = $gears->extraction_dg_compile_sources();
-my $g          = $gears->set_data_gear_header_path();
-
-my $dg2path    = $gears->update_dg_each_header_path($data_gears,$g);
-
-my $tree = $gears->createImplTree_from_header($dg2path);
-$gears->tree2create_context_h($tree);
-
-
-sub find_target_from_camke_list {
-  open my $fh, '<', "CMakeLists.txt";
-  my $in_gears = 0;
-  my $target;
-  my %res;
-
-  while (my $line = <$fh>) {
-    if ($in_gears == 1) {
-      $in_gears++;
-      next;
-    }
-
-    if ($in_gears == 2) {
-       $line =~ s/\s*(\w+)\s*/$1/g;
-       $target = $line;
-       chomp $target;
-       $in_gears++;
-       next;
-    }
-
-    if ($in_gears == 3) {
-      $in_gears++;
-      next;
-    }
-
-    if ($in_gears == 4) {
-      $res{$target} = $line;
-      chomp $res{$target};
-      $in_gears = 0;
-      next;
-    }
-
-    if ($line =~ /^GearsCommand\(/) {
-        $in_gears++;
-    }
-
-  }
-  return \%res;
-}
-
-#GearsCommand(
-#  TARGET
-#      rbtree
-#  SOURCES
-#      SingleLinkedQueue.cbc test/rbTree_test.cbc RedBlackTree.cbc SingleLinkedStack.cbc compare.c
-#)
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/compare.c
--- a/src/parallel_execution/compare.c	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-#include "context.h"
-
-enum Relational compare(struct Node* node1, struct Node* node2) {
-    int key1 = node1->key;
-    int key2 = node2->key;
-    if (key1 == key2) {
-        return EQ;
-    } else if (key1 < key2) {
-        return GT;
-    } else {
-        return LT;
-    }
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/cuda.c
--- a/src/parallel_execution/cuda.c	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-#include <stdio.h>
-#include <sys/time.h>
-#include <string.h>
-#include <stdlib.h>
-
-// includes, project
-#include <driver_types.h>
-#include <cuda_runtime.h>
-#include <cuda.h>
-#include "helper_cuda.h"
-#include "pthread.h"
-
-#include "context.h"
-
-/*
-struct Context {
-    int next;
-    struct Worker* worker;
-    struct TaskManager* taskManager;
-    int codeNum;
-    void  (**code) (struct Context*);
-    void* heapStart;
-    void* heap;
-    long heapLimit;
-    int dataNum;
-    int idgCount; //number of waiting dataGear
-    int idg;
-    int maxIdg;
-    int odg;
-    int maxOdg;
-    int workerId;
-    struct Context* task;
-    struct Queue* tasks;
-    int num_exec;
-    CUmodule module;
-    CUfunction function;
-    union Data **data;
-
-    // multi dimension parameter
-    int iterate;
-    struct Iterator* iterator;
-};
-
-struct CUDAWorker {
-    CUdevice device;
-    CUcontext cuCtx;
-    pthread_t thread;
-    struct Context* context;
-    int id;
-    struct Queue* tasks;
-    int runFlag;
-    int next;
-    int numStream;
-    CUstream *stream;
-} CUDAWorker;
-
-struct LoopCounter {
-    int i;
-} LoopCounter;
-
-struct Array {
-    int size;
-    int index;
-    int prefix;
-    int* array;
-} Array;
-*/
-
-void cudaInit(struct CUDAWorker *cudaWorker,int phase, int deviceNum) {
-    // initialize and load kernel
-    cudaWorker->numStream = 1; // number of stream
-    //    cudaWorker->stream = NEWN(cudaWorker->numStream, CUstream );
-    if (phase==0)
-        checkCudaErrors(cuInit(0));
-    if (phase==0)
-        checkCudaErrors(cuDeviceGet(&cudaWorker->device, deviceNum));
-    if (phase==0)
-        checkCudaErrors(cuCtxCreate(&cudaWorker->cuCtx, CU_CTX_SCHED_SPIN, cudaWorker->device));
-    //    if (cudaWorker->num_stream) {
-    //        for (int i=0;i<cudaWorker->num_stream;i++)
-    //            checkCudaErrors(cuStreamCreate(&cudaWorker->stream[i],0));
-    //    }
-    printf("cuda Init: Done\n");
-}
-
-void cudaLoadFunction(struct Context* context, char* filename, char* function) {
-    checkCudaErrors(cuModuleLoad(&context->module, filename));
-    checkCudaErrors(cuModuleGetFunction(&context->function, context->module, function));
-}
-
-void cudaShutdown(struct CUDAWorker *worker) {
-    //    for (int i=0;i<worker->num_stream;i++)
-    //        checkCudaErrors(cuStreamDestroy(worker->stream[i]));
-    checkCudaErrors(cuCtxDestroy(worker->cuCtx));
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/bitonicSort/CUDAbitonicSwap.cu
--- a/src/parallel_execution/examples/bitonicSort/CUDAbitonicSwap.cu	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,32 +0,0 @@
-extern "C" {
-    struct Integer {
-        int value;
-    };
-    struct SortArray {
-        struct Integer *array;
-        int loopCounter;
-        int block;
-        int first;
-        int prefix;
-    };
-    __global__ void bitonicSwap(struct Integer* array, struct SortArray* sortArray) {
-        int block = sortArray->block;
-        int first = sortArray->first;
-        int prefix = sortArray->prefix;
-        int i = 0;
-C_bitonicSwap:
-        if (i < prefix) {
-            int index = i + (blockIdx.x * blockDim.x + threadIdx.x) * prefix;
-            int position = index/block;
-            int index1 = index+block*position;
-            int index2 = (first == 1)? ((block<<1)*(position+1))-(index1%block)-1 : index1+block;
-            if (array[index2].value < array[index1].value) {
-                struct Integer tmp = array[index1];
-                array[index1] = array[index2];
-                array[index2] = tmp;
-            }
-            i++;
-            goto C_bitonicSwap;
-        }
-    }
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/bitonicSort/SortArray.h
--- a/src/parallel_execution/examples/bitonicSort/SortArray.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-typedef struct SortArray<Impl>{
-    union Data* sortArray;
-    struct Integer *array;
-    int loopCounter;
-    int block;
-    int first;
-    int prefix;
-} SortArray;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/bitonicSort/bitonicSort.cbc
--- a/src/parallel_execution/examples/bitonicSort/bitonicSort.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,89 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <math.h>
-
-#include "../../../context.h"
-#interface "TaskManager.h"
-
-int cpu_num = 1;
-int length = 1024;
-int split  = 8;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-    goto code1();
-}
-
-__code code1(struct LoopCounter* loopCounter) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    printf("length/task:\t%d\n", length/split);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-
-    goto createTask1();
-}
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    struct SortArray* outputSortArray = new SortArray();
-    struct SortArray* inputSortArray = outputSortArray;
-    struct Timer* timer = createTimerImpl(context);
-
-    par goto makeArray(outputSortArray, timer, __exit);
-
-    for (int i=2; i <= length; i=2*i) {
-        int first = 1;
-        for (int j=i>>1; j > 0; j=j>>1) {
-            outputSortArray = new SortArray();
-            inputSortArray->prefix = length/2/split;
-            inputSortArray->block = j;
-            inputSortArray->first = first;
-            par goto bitonicSwap(inputSortArray, outputSortArray, iterate(split), __exit);
-            first = 0;
-            inputSortArray = outputSortArray;
-        }
-    }
-
-    par goto printArray(inputSortArray, timer, __exit);
-
-    goto code2();
-}
-
-__code code2(struct TaskManager* taskManager) {
-    goto taskManager->shutdown(exit_code);
-}
-
-__code code2_stub(struct Context* context) {
-    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-s") == 0)
-            split = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-    goto initDataGears();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/bitonicSort/bitonicSwap.cbc
--- a/src/parallel_execution/examples/bitonicSort/bitonicSwap.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,60 +0,0 @@
-#include "../../../context.h"
-#include <stdio.h>
-
-#ifdef USE_CUDAWorker
-extern void cudaLoadFunction(struct Context* context, char* filename, char* function);
-#endif
-
-__code bitonicSwap(struct SortArray* inputArray, struct MultiDim* multiDim, __code next(struct SortArray* output, ...), struct LoopCounter* loopCounter) {
-    int block = inputArray->block;
-    int first = inputArray->first;
-    if (loopCounter->i < inputArray->prefix) {
-        int index = loopCounter->i + multiDim->x * inputArray->prefix;
-        int position = index/block;
-        int index1 = index+block*position;
-        int index2 = (first == 1)? ((block<<1)*(position+1))-(index1%block)-1 : index1+block;
-        struct Integer* array = inputArray->array;
-        if (array[index2].value < array[index1].value) {
-            struct Integer tmp = array[index1];
-            array[index1] = array[index2];
-            array[index2] = tmp;
-        }
-        loopCounter->i++;
-        goto bitonicSwap();
-    }
-    loopCounter->i = 0;
-    output->array = inputArray->array;
-    goto next(output, ...);
-}
-
-__code bitonicSwap_stub(struct Context* context) {
-#ifdef USE_CUDAWorker
-    if (context->gpu) {
-        SortArray* inputSortArray  = &context->data[context->idg]->SortArray;
-        SortArray* outputSortArray = &context->data[context->odg]->SortArray;
-        CUDABuffer* buffer = &ALLOCATE(context, CUDABuffer)->CUDABuffer;
-        buffer->inputData = (union Data**)ALLOCATE_PTR_ARRAY(context, SortArray, 2);
-        buffer->inputData[0] = (union Data*)inputSortArray->array;
-        buffer->inputData[1] = (union Data*)inputSortArray;
-        buffer->outputData = NULL;
-        buffer->inputLen = 2;
-        buffer->outputLen = 0;
-        //continuationにそってGPUworkerに戻る
-        outputSortArray->array = inputSortArray->array;
-        Executor* executor = context->worker->worker->CUDAWorker.executor;
-        executor->executor->CUDAExecutor.buffer = buffer;
-        cudaLoadFunction(context, "c/examples/bitonicSort/CUDAbitonicSwap.ptx", "bitonicSwap");
-        Gearef(context, Executor)->executor = (union Data*)executor;
-        Gearef(context, Executor)->task = context;
-        Gearef(context, Executor)->next = context->next;
-        goto meta(context, executor->read);
-    }
-#endif
-    SortArray** O_output = (struct SortArray **)&context->data[context->odg];
-    goto bitonicSwap(context,
-            &context->data[context->idg]->SortArray,
-            &context->data[context->idg+1]->MultiDim,
-            context->next,
-            O_output,
-            Gearef(context, LoopCounter));
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/bitonicSort/makeArray.cbc
--- a/src/parallel_execution/examples/bitonicSort/makeArray.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,20 +0,0 @@
-#include "../../../context.h"
-#include <stdio.h>
-#interface "Timer.h"
-
-extern int length;
-__code makeArray(__code next(struct SortArray* output, struct Timer* output1, ...)){
-    if (output->loopCounter == 0){
-        output->array = (Integer*)ALLOCATE_ARRAY(context, Integer, length);
-        srand((unsigned) time(NULL));
-    }
-    if (output->loopCounter == GET_LEN(output->array)){
-        printf("created Array\n");
-        output->loopCounter = 0;
-        goto output1->start(next(...));
-    }
-    output->array[output->loopCounter].value = rand() % 1000;
-    //printf("%d\n", output->array[output->loopCounter]->value);
-    output->loopCounter++;
-    goto makeArray();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/bitonicSort/printArray.cbc
--- a/src/parallel_execution/examples/bitonicSort/printArray.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-#include "../../../context.h"
-#interface "Timer.h"
-#include <stdio.h>
-
-__code printArray(struct SortArray* inputArray, struct Timer* inputTimer, __code next(...)){
-    goto inputTimer->end(printArray1);
-}
-
-__code printArray1(struct SortArray* inputArray, __code next(...)){
-    //printf("%d\n", inputArray->array[inputArray->loopCounter].value);
-    inputArray->loopCounter++;
-    if (inputArray->loopCounter == GET_LEN(inputArray->array)){
-        printf("sort completed\n");
-        inputArray->loopCounter = 0;
-        goto next(...);
-    }
-    if (inputArray->array[inputArray->loopCounter-1].value > inputArray->array[inputArray->loopCounter].value) {
-        printf("wrong result\n");
-        goto next(...);
-    }
-    goto printArray1();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/bitonicSort/sort.cbc
--- a/src/parallel_execution/examples/bitonicSort/sort.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,95 +0,0 @@
-#include<stdio.h>
-#include <stdlib.h>
-#include "../../../context.h"
-#define LOGN 5
-#define MAX 1 << LOGN
-
-int main(int argc, char const* argv[]) {
-    struct Context* main_context = NEW(struct Context);
-    initContext(main_context);
-    main_context->next = C_sort_start;
-    goto start_code(main_context);
-}
-
-__code sort_start(struct SortArray* sortArray){
-    sortArray->sortArray = new SortArray();
-    sortArray->sortArray->array = (Integer**)ALLOCATE_PTR_ARRAY(context, Integer, MAX);//ALLOC_ARRAYはDSの配列なのでintではできない
-    sortArray->sortArray->loop_counter = 0;
-    sortArray->sortArray->loop_counter2 = 0;
-    sortArray->sortArray->loop_counter3 = 0;
-    srand((unsigned) time(NULL));
-    goto meta(context, C_make_array);
-}
-
-__code make_array(struct SortArray* sortArray){//乱数生成
-    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→配列表示へ
-        sortArray->sortArray->loop_counter = 0;
-        goto meta(context, C_print);
-    }
-    struct Integer* integer = new Integer();
-    integer->value = rand() % 1000;
-    sortArray->sortArray->array[sortArray->sortArray->loop_counter] = integer;
-    sortArray->sortArray->loop_counter++;
-    goto meta(context, C_make_array);
-}
-
-__code print(struct SortArray* sortArray){//配列表示
-    if (sortArray->sortArray->loop_counter == MAX){//ループの終了→ソートへ
-        printf("\n");
-        if (sortArray->sortArray->sort_finish == 1){//ソート終わってたら終了
-            goto meta(context, C_exit_code);
-        }
-        sortArray->sortArray->loop_counter = 0;
-        sortArray->sortArray->loop_counter2 = 0;
-        goto meta(context, C_bitonic_sort);
-    }
-
-    printf("%d, ", sortArray->sortArray->array[sortArray->sortArray->loop_counter]->value);
-    sortArray->sortArray->loop_counter++;
-    goto meta(context, C_print);
-}
-
-__code bitonic_sort(struct SortArray* sortArray){//ソートの繰り返し
-    if (sortArray->sortArray->loop_counter >= LOGN){//ループの終了→配列表示へ
-        sortArray->sortArray->loop_counter = 0;
-        sortArray->sortArray->sort_finish = 1;
-        goto meta(context, C_print);
-    }
-    goto meta(context, C_kernel);
-}
-
-__code kernel(struct SortArray* sortArray){//繰り返し２
-    if (sortArray->sortArray->loop_counter2 > sortArray->sortArray->loop_counter){//ループの終了→上のループへ
-        sortArray->sortArray->loop_counter++;
-        sortArray->sortArray->loop_counter2 = 0;
-        goto meta(context, C_bitonic_sort);
-    }
-
-    goto meta(context, C_kernel2);
-}
-
-__code kernel2(struct SortArray* sortArray){//ソートの中身
-    int i = sortArray->sortArray->loop_counter3;
-
-    if (i >= GET_LEN(sortArray->sortArray->array)){//ループの終了→上のループへ
-        sortArray->sortArray->loop_counter2++;
-        sortArray->sortArray->loop_counter3 = 0;
-        goto meta(context, C_kernel);
-    }
-
-     goto meta(context, C_swap);
-}
-
-__code swap(struct SortArray* sortArray){//配列の要素を入れ替える
-    int i = sortArray->sortArray->loop_counter3;
-    int d = 1 << (sortArray->sortArray->loop_counter - sortArray->sortArray->loop_counter2);
-    int up = ((i >> sortArray->sortArray->loop_counter) & 2) == 0;
-
-    if ((i & d) == 0 && (sortArray->sortArray->array[i]->value > sortArray->sortArray->array[i | d]->value) == up) {
-        struct Integer *tmp = sortArray->sortArray->array[i];
-        sortArray->sortArray->array[i] = sortArray->sortArray->array[i | d];
-        sortArray->sortArray->array[i | d] = tmp;
-    }
-    sortArray->sortArray->loop_counter3++;
-    goto meta(context, C_kernel2);//上位のループへ
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/boundedBuffer/BoundedBuffer.cbc
--- a/src/parallel_execution/examples/boundedBuffer/BoundedBuffer.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,77 +0,0 @@
-#include "../../../context.h"
-#interface "Buffer.h"
-#interface "Semaphore.h"
-
-Buffer* createBoundedBuffer(struct Context* context, int size) {
-    struct Buffer* buffer = new Buffer();
-    struct BoundedBuffer* boundedBuffer = new BoundedBuffer();
-    boundedBuffer->top = new Element();
-    boundedBuffer->top->next = NULL;
-    boundedBuffer->last = boundedBuffer->top;
-    boundedBuffer->fullCount = createSemaphoreImpl(context, 0);
-    boundedBuffer->emptyCount = createSemaphoreImpl(context, size);
-    boundedBuffer->lock = createSemaphoreImpl(context, 1); // binary semaphore
-    buffer->buffer = (union Data*)boundedBuffer;
-    buffer->take = C_takeBoundedBuffer;
-    buffer->put = C_putBoundedBuffer;
-    return buffer;
-}
-
-__code putBoundedBuffer(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
-    struct Semaphore* semaphore = buffer->emptyCount;
-    goto semaphore->p(putBoundedBuffer1);
-}
-
-__code putBoundedBuffer1(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
-    struct Semaphore* semaphore = buffer->lock;
-    goto semaphore->p(putBoundedBuffer2);
-}
-
-__code putBoundedBuffer2(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
-    struct Element* element = new Element();
-    element->data = data;
-    element->next = NULL;
-    struct Element* last = buffer->last;
-    last->next = element;
-    buffer->last = element;
-    struct Semaphore* semaphore = buffer->lock;
-    goto semaphore->v(putBoundedBuffer3);
-}
-
-__code putBoundedBuffer3(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
-    struct Semaphore* semaphore = buffer->fullCount;
-    goto semaphore->v(putBoundedBuffer4);
-}
-
-__code putBoundedBuffer4(struct BoundedBuffer* buffer, union Data* data, __code next(...)) {
-    goto next(...);
-}
-
-__code takeBoundedBuffer(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
-    struct Semaphore* semaphore = buffer->fullCount;
-    goto semaphore->p(takeBoundedBuffer1);
-}
-
-__code takeBoundedBuffer1(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
-    struct Semaphore* semaphore = buffer->lock;
-    goto semaphore->p(takeBoundedBuffer2);
-}
-
-__code takeBoundedBuffer2(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
-    struct Element* top = buffer->top;
-    struct Element* nextElement = top->next;
-    data = nextElement->data;
-    *O_data =data;
-    buffer->top = nextElement;
-    struct Semaphore* semaphore = buffer->lock;
-    goto semaphore->v(takeBoundedBuffer3);
-}
-
-__code takeBoundedBuffer3(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
-    struct Semaphore* semaphore = buffer->emptyCount;
-    goto semaphore->v(takeBoundedBuffer4);
-}
-
-__code takeBoundedBuffer4(struct BoundedBuffer* buffer, __code next(union Data* data, ...)) {
-    goto next(data, ...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/boundedBuffer/SemaphoreImpl.cbc
--- a/src/parallel_execution/examples/boundedBuffer/SemaphoreImpl.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-#include "../../../context.h"
-#interface "Semaphore.h"
-#interface "Queue.h"
-#interface "TaskManager.h"
-#interface "Lock.h"
-
-Semaphore* createSemaphoreImpl(struct Context* context, int n) {
-    struct Semaphore* semaphore = new Semaphore();
-    struct SemaphoreImpl* semaphoreImpl = new SemaphoreImpl();
-    semaphore->semaphore = (union Data*)semaphoreImpl;
-    semaphoreImpl->value =  n;
-    semaphoreImpl->waitThreadQueue = createSingleLinkedQueue(context);
-    semaphoreImpl->lock = createSpinLock(context);
-    semaphore->p = C_pOperationSemaphoreImpl;
-    semaphore->v = C_vOperationSemaphoreImpl;
-    return semaphore;
-}
-
-__code pOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
-    struct Lock* lock = semaphore->lock;
-    goto lock->doLock(pOperationSemaphoreImpl1);
-}
-
-__code pOperationSemaphoreImpl1(struct SemaphoreImpl* semaphore, __code next(...)) {
-    if (semaphore->value == 0) {
-        context->next= C_pOperationSemaphoreImpl;
-        struct Queue* queue = semaphore->waitThreadQueue;
-        goto queue->put(context, pOperationSemaphoreImpl2); // put this context(thread, process)
-    }
-    semaphore->value--;
-    struct Lock* lock = semaphore->lock;
-    goto lock->doUnlock(next(...));
-}
-
-__code pOperationSemaphoreImpl2(struct SemaphoreImpl* semaphore, __code next(...)) {
-    struct Lock* lock = semaphore->lock;
-    goto lock->doUnlock(pOperationSemaphoreImpl3);
-}
-
-__code pOperationSemaphoreImpl3(struct SemaphoreImpl* semaphore, struct Worker* worker, __code next(...)) {
-    goto worker->taskReceive(); // goto shceduler
-}
-
-__code pOperationSemaphoreImpl3_stub(struct Context* context) {
-    // switch worker context
-    struct Context* workerContext = context->worker->worker->CPUWorker.context;
-    SemaphoreImpl* semaphoreImpl = (SemaphoreImpl*)GearImpl(context, Semaphore, semaphore);
-    goto pOperationSemaphoreImpl3(workerContext,
-                                  semaphoreImpl,
-                                  context->worker,
-                                  Gearef(context, Semaphore)->next);
-}
-
-__code vOperationSemaphoreImpl(struct SemaphoreImpl* semaphore, __code next(...)) {
-    struct Lock* lock = semaphore->lock;
-    goto lock->doLock(vOperationSemaphoreImpl1);
-}
-
-__code vOperationSemaphoreImpl1(struct SemaphoreImpl* semaphore, __code next(...)) {
-    semaphore->value++;
-    struct Queue* queue = semaphore->waitThreadQueue;
-    goto queue->isEmpty(vOperationSemaphoreImpl2, vOperationSemaphoreImpl4);
-}
-
-__code vOperationSemaphoreImpl2(struct SemaphoreImpl* semaphore, __code next(...)) {
-    struct Queue* queue = semaphore->waitThreadQueue;
-    goto queue->take(vOperationSemaphoreImpl3);
-}
-
-__code vOperationSemaphoreImpl3(struct SemaphoreImpl* semaphore, struct Context* waitTask, __code next(...)) {
-    struct TaskManager* taskManager = waitTask->taskManager;
-    goto taskManager->spawn(waitTask, vOperationSemaphoreImpl4); //notify
-}
-
-__code vOperationSemaphoreImpl3_stub(struct Context* context) {
-    SemaphoreImpl* semaphoreImpl = (SemaphoreImpl*)GearImpl(context, Semaphore, semaphore);
-    struct Context* waitTask = &Gearef(context, Queue)->data->Context;
-    goto vOperationSemaphoreImpl3(context,
-                                  semaphoreImpl,
-                                  waitTask,
-                                  Gearef(context, Semaphore)->next);
-}
-
-__code vOperationSemaphoreImpl4(struct SemaphoreImpl* semaphore, __code next(...)) {
-    struct Lock* lock = semaphore->lock;
-    goto lock->doUnlock(next(...));
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/boundedBuffer/consumer.cbc
--- a/src/parallel_execution/examples/boundedBuffer/consumer.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,25 +0,0 @@
-#include "../../../context.h"
-#include <stdio.h>
-#interface "Buffer.h"
-
-__code consumer(struct Buffer* buffer, struct Integer* length, __code next(...), struct LoopCounter* loopCounter) {
-    int i = loopCounter->i;
-    if (i < length->value) {
-        loopCounter->i++;
-        goto buffer->take(consumer1);
-    }
-    goto next(...);
-}
-
-__code consumer1(struct Buffer* buffer, struct Integer* length, __code next(...), struct Node* node) {
-    printf("getData %d\n", node->value->Integer.value);
-    goto consumer();
-}
-
-__code consumer1_stub(struct Context* context) {
-    goto consumer1(context,
-                   &context->data[context->idg]->Buffer,
-                   &context->data[context->idg+1]->Integer,
-                   context->next,
-                   &Gearef(context, Buffer)->data->Node);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/boundedBuffer/initBuffer.cbc
--- a/src/parallel_execution/examples/boundedBuffer/initBuffer.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#include "../../../context.h"
-
-__code initBuffer(__code next(struct Buffer* output, struct Integer* output1, ...)) {
-    goto next(output, output1, ...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/boundedBuffer/main.cbc
--- a/src/parallel_execution/examples/boundedBuffer/main.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,71 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "../../../context.h"
-#interface "TaskManager.h"
-
-int cpu_num = 1;
-int length = 100;
-int buffer_size = 10;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-    goto code1();
-}
-
-__code code1(struct Timer* timer) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    goto createTask1();
-}
-
-__code code2(struct TaskManager* taskManager) {
-    goto taskManager->shutdown(exit_code);
-}
-
-__code code2_stub(struct Context* context) {
-    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
-}
-
-__code createTask1(struct TaskManager* taskManager) {
-    struct Buffer* buffer = createBoundedBuffer(context, buffer_size);
-    Integer* len = new Integer();
-    len->value = length;
-    par goto producer(buffer, len, __exit);
-    par goto producer(buffer, len, __exit);
-    par goto producer(buffer, len, __exit);
-    par goto consumer(buffer, len, __exit);
-    par goto consumer(buffer, len, __exit);
-    par goto consumer(buffer, len, __exit);
-    par goto initBuffer(buffer, len, __exit);
-    goto code2();
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-bufferSize") == 0)
-            buffer_size = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-    goto initDataGears();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/boundedBuffer/producer.cbc
--- a/src/parallel_execution/examples/boundedBuffer/producer.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-#include "../../../context.h"
-#interface "Buffer.h"
-
-__code producer(struct Buffer* buffer, struct Integer* length, __code next(...), struct LoopCounter* loopCounter) {
-    int i = loopCounter->i;
-    if (i < length->value) {
-        Node* node = new Node();
-        node->value = (union Data*)new Integer();
-        node->value->Integer.value = i;
-        loopCounter->i++;
-        goto buffer->put(node, producer);
-    }
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/calc/add.cbc
--- a/src/parallel_execution/examples/calc/add.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#include "../../../context.h"
-#include <stdio.h>
-__code add(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
-    output->value = input1->value + input2->value;
-    printf("%d + %d = %d\n", input1->value, input2->value, output->value);
-    goto next(output, ...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/calc/calc.cbc
--- a/src/parallel_execution/examples/calc/calc.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <assert.h>
-
-#include "../../../context.h"
-#interface "TaskManager.h"
-
-int cpu_num = 1;
-int length = 100;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-    goto meta(context, C_code1);
-}
-
-__code code1(struct Timer* timer) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-
-    //time->next = C_code2;
-    goto meta(context, C_createTask1);
-    //goto meta(context, C_start_time);
-}
-
-__code code1_stub(struct Context* context) {
-    goto code1(context, Gearef(context, Timer));
-}
-
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    int i = loopCounter->i;
-
-    if (i < length) {
-        loopCounter->i++;
-        goto meta(context, C_createTask2);
-    }
-
-    loopCounter->i = 0;
-    taskManager->next = C_exit_code;
-    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
-}
-
-__code createTask2(struct LoopCounter* loopCounter) {
-    Integer* integer1 = new Integer();
-    Integer* integer2 = new Integer();
-    Integer* integer3 = new Integer();
-    par goto mult(integer1, integer2, integer3, __exit);
-
-    Integer* integer4 = new Integer();
-    Integer* integer5 = new Integer();
-    par goto add(integer4, integer5, integer1, __exit);
-
-    par goto initIntegerDataGears(integer2, integer4, integer5, __exit);
-
-    goto createTask1();
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-    goto initDataGears();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/calc/initIntegerDataGears.cbc
--- a/src/parallel_execution/examples/calc/initIntegerDataGears.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-#include "../../../context.h"
-#include <stdio.h>
-__code initIntegerDataGears(__code next(struct Integer* output1, struct Integer* output2, struct Integer* output3, ...)) {
-    output1->value = 1;
-    output2->value = 2;
-    output3->value = 3;
-    goto next(output1, output2, output3, ...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/calc/mult.cbc
--- a/src/parallel_execution/examples/calc/mult.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-#include "../../../context.h"
-#include <stdio.h>
-__code mult(struct Integer* input1, struct Integer* input2, __code next(struct Integer* output, ...)) {
-    output->value = input1->value * input2->value;
-    printf("%d * %d = %d\n", input1->value, input2->value, output->value);
-    goto next(output, ...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/twice/CUDAtwice.cu
--- a/src/parallel_execution/examples/twice/CUDAtwice.cu	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-extern "C" {
-    struct Array {
-        int prefix;
-        int* array;
-    } Array;
-
-    __global__ void twice(int* array, struct Array* inputArray) {
-        int i = 0;
-        int prefix = inputArray->prefix;
-C_twice:
-        if (i < prefix) {
-            array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix]->value = array[i+(blockIdx.x*blockDim.x+threadIdx.x)*prefix]->value*2;
-            i++;
-            goto C_twice;
-        }
-    }
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/twice/createArray.cbc
--- a/src/parallel_execution/examples/twice/createArray.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-#include <stdio.h>
-#include "../../../context.h"
-#interface "Timer.h"
-
-extern int length;
-extern int split;
-
-__code createArray(__code next(struct Array* output, struct Timer* output1, ...), struct LoopCounter* loopCounter) {
-    int i = loopCounter->i;
-    if (i == 0){
-        output->array = (Integer*)ALLOCATE_ARRAY(context, Integer, length);
-        output->prefix = length/split;
-    }
-    if (i == GET_LEN(output->array)){
-        printf("created Array\n");
-        loopCounter->i = 0;
-        goto output1->start(next(...));
-    }
-    output->array[i].value = i;
-    loopCounter->i++;
-    goto createArray();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/twice/main.cbc
--- a/src/parallel_execution/examples/twice/main.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,75 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../../../context.h"
-#interface "TaskManager.h"
-
-int cpu_num = 1;
-int length = 102400;
-int split = 8;
-int* array_ptr;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-    goto code1();
-}
-
-__code code1(struct LoopCounter* loopCounter) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    printf("length/task:\t%d\n", length/split);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-    goto createTask1();
-}
-
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    Array* array1 = new Array();
-    Array* array2 = new Array();
-    Timer* timer = createTimerImpl(context);
-
-    par goto createArray(array1, timer, __exit);
-    par goto twice(array1, array2, iterate(split), __exit);
-    par goto printArray(array2, timer, __exit);
-    goto code2();
-}
-
-__code code2(struct TaskManager* taskManager) {
-    goto taskManager->shutdown(exit_code);
-}
-
-__code code2_stub(struct Context* context) {
-    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-s") == 0)
-            split = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-    goto initDataGears();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/twice/printArray.cbc
--- a/src/parallel_execution/examples/twice/printArray.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,22 +0,0 @@
-#include "../../../context.h"
-#interface "Timer.h"
-#include <stdio.h>
-
-__code printArray(struct Array* array, struct Timer* inputTimer, __code next(...)){
-    goto inputTimer->end(printArray1);
-}
-
-__code printArray1(struct Array* array, __code next(...), struct LoopCounter* loopCounter){
-    int i = loopCounter->i;
-    //printf("%d\n", array->array[i]);
-    if (i < GET_LEN(array->array)) {
-        if (array->array[i].value == i*2) {
-            loopCounter->i++;
-            goto printArray1();
-        } else {
-            printf("wrong result\n");
-        }
-    }
-   loopCounter->i = 0;
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/examples/twice/twice.cbc
--- a/src/parallel_execution/examples/twice/twice.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,53 +0,0 @@
-#include <stdio.h>
-
-#include "../../../context.h"
-
-#ifdef USE_CUDAWorker
-extern void cudaLoadFunction(struct Context* context, char* filename, char* function);
-#endif
-
-__code twice(struct Array* array, struct MultiDim* multiDim, __code next(struct Array* output, ...), struct LoopCounter* loopCounter) {
-    int i = loopCounter->i;
-    int index = multiDim->x;
-    if (i < array->prefix) {
-        array->array[i+index*array->prefix].value = array->array[i+index*array->prefix].value*2;
-        loopCounter->i++;
-
-        goto meta(context, C_twice);
-    }
-
-    loopCounter->i = 0;
-    output->array = array->array;
-    goto next(output, ...);
-}
-
-__code twice_stub(struct Context* context) {
-#ifdef USE_CUDAWorker
-    if (context->gpu) {
-        Array* inputArray  = &context->data[context->idg]->Array;
-        Array* outputArray = &context->data[context->odg]->Array;
-        CUDABuffer* buffer = &ALLOCATE(context, CUDABuffer)->CUDABuffer;
-        buffer->inputData = (union Data**)ALLOCATE_PTR_ARRAY(context, Array, 2);
-        buffer->inputData[0] = (union Data*)inputArray->array;
-        buffer->inputData[1] = (union Data*)inputArray;
-        buffer->outputData = NULL;
-        buffer->inputLen = 2;
-        buffer->outputLen = 0;
-        Executor* executor = context->worker->worker->CUDAWorker.executor;
-        executor->executor->CUDAExecutor.buffer = buffer;
-        cudaLoadFunction(context, "c/examples/twice/CUDAtwice.ptx", "twice");
-        outputArray->array = inputArray->array;
-        Gearef(context, Executor)->executor = (union Data*)executor;
-        Gearef(context, Executor)->task = context;
-        Gearef(context, Executor)->next = context->next;
-        goto meta(context, executor->read);
-    }
-#endif
-    Array** O_output = (struct Array **)&context->data[context->odg];
-    goto twice(context,
-               &context->data[context->idg]->Array,
-               &context->data[context->idg+1]->MultiDim,
-               context->next,
-               O_output,
-               Gearef(context, LoopCounter));
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/generate_context.pl
--- a/src/parallel_execution/generate_context.pl	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,249 +0,0 @@
-#!/usr/bin/perl
-
-use Getopt::Std;
-use strict;
-
-# 
-# generrate Gears OS context heaader and initializer from CbC sources
-#
-# CodeGear
-# 
-# get stub information from # *.c
-#     __code taskManager_stub(struct Context* context) {
-# 
-# generate CodeGear indexn in context.h
-#     C_taskManager,
-# 
-# generate CodeGear stub reference in context.h
-#     extern __code taskManager_stub(struct Context*);
-# 
-# generate CodeGear stub reference in $name-context.h for each module
-#     context->code[C_taskManager]   = taskManager_stub;
-# 
-# DataGear
-# 
-# get DataGear information from context.h
-#     struct Worker {
-#         int id;
-#         struct Context* contexts;
-#         enum Code execute;
-#         enum Code taskSend;
-#         enum Code taskRecive;
-#         enum Code shutdown;
-#         struct Queue* tasks;
-#     } Worker;
-# 
-# generate typedefs and DataGear index in context.h
-#     typedef struct Worker Worker;
-#     D_Worker,
-# 
-# generate DataGear allocator in context.h
-#      ALLOC_DATA(context, Worker);
-#
-
-my $ddir = "c";
-
-our($opt_o,$opt_d,$opt_h,$opt_w);
-getopts('o:d:hw');
-
-my $name = $opt_o?$opt_o:"gears";
-
-if ($opt_d) {
-    $ddir = $opt_d;
-}
-
-if ( ! -d $ddir) {
-    mkdir $ddir;
-}
-
-if ($opt_h) {
-    print "$0  [-d distdir] [-h]\n";
-    exit;
-}
-
-my %codeGear;
-my %dataGear;
-my %constructor;
-
-{
-  use FindBin;
-  use lib "$FindBin::Bin/lib";
-
-  use File::Spec;
-
-  use Gears::Context;
-  use Getopt::Std;
-
-  my $output     = $opt_w ? "$FindBin::Bin/context.h" : "stdout";
-
-  my @cbc_files;
-  map { push(@cbc_files,File::Spec->rel2abs($_)); }  @ARGV;
-  my $gears      = Gears::Context->new(compile_sources => \@cbc_files, find_root => $FindBin::Bin, output => $output);
-  my $data_gears = $gears->extraction_dg_compile_sources();
-  my $g          = $gears->set_data_gear_header_path();
-
-  #use DDP {deparse =>1};
-  my $dg2path    = $gears->update_dg_each_header_path($data_gears,$g);
-  my $tree = $gears->createImplTree_from_header($dg2path);
-  $gears->tree2create_context_h($tree);
-}
-
-# gather module Information for code table initialization
-for (@ARGV) {
-    next if (/context.c/);
-    &getStubInfo($_);
-}
-
-my (%mCodeGear) = (%codeGear);
-
-# anyway we gather all Gears Information
-while (<*.c test/*.c>) {
-    next if (/context.c/);
-    &getStubInfo($_);
-}
-
-&generateContext();
-
-sub getStubInfo {
-    my ($filename) = @_;
-    open my $fd,"<",$filename or die("can't open $filename $!");
-    while (<$fd>) {
-        if (/^__code (\w+)_stub\(struct  *Context *\* *context\)/) {
-            $codeGear{$1} = $filename;
-        } elsif (/^(\w+)(\*)+  *create(\w+)\(([^]]*)\)/) {
-            my $interface = $1;
-            my $implementation = $3;
-            my $constructorArgs = $4;
-            $constructor{$implementation} = [$interface, $constructorArgs];
-        }
-    }
-
-    open my $cx,"<","context.h" or die("can't open context.h $!");
-    my $inUnionData = 0;
-    while (<$cx>) {
-        if (! $inUnionData) {
-            if ( /^union Data/) {
-                $inUnionData = 1;
-            }
-            next;
-        }
-        last if (/union Data end/);
-        if (/struct (\w+) \{/) {
-            $dataGear{$1} = 'struct';
-        } elsif (/^\s{4}(\w+) (\w+);/) { # primitive type
-            $dataGear{$1} = 'primitive';
-        }
-        $dataGear{"Context"} = "struct";
-    }
-}
-
-sub generateContext {
-    $codeGear{"start_code"} = "$ddir/$name-context.c";
-    $codeGear{"exit_code"} = "$ddir/$name-context.c";
-    $mCodeGear{"start_code"} = "$ddir/$name-context.c";
-    $mCodeGear{"exit_code"} = "$ddir/$name-context.c";
-    open my $fd,">","$ddir/extern.h" or die("can't open $ddir/extern.h $!");
-    for my $code ( sort keys %codeGear ) {
-        print $fd "extern __code ${code}_stub(struct Context*);\n";
-    }
-    for my $impl ( sort keys %constructor ) {
-        my ($interface, $constructorArgs) = @{$constructor{$impl}};
-        print $fd "extern ${interface}* create${impl}($constructorArgs);\n";
-    }
-    print $fd "\n";
-
-    open my $fd,">","$ddir/enumCode.h" or die("can't open $ddir/enumCode.h $!");
-    print $fd "enum Code {\n";
-    for my $code ( sort keys %codeGear ) {
-        print $fd "    C_${code},\n";
-    }
-    print $fd "};\n";
-   
-    my $code_init = ''; 
-    for my $code ( sort keys %mCodeGear ) {
-        $code_init .=  "    context->code[C_${code}]    = ${code}_stub;\n";
-    }
-
-    my $data_num = keys(%dataGear);
-    $data_num++;
-my $context_c = << "EOFEOF";
-#include <stdlib.h>
-
-#include "../context.h"
-
-void initContext(struct Context* context) {
-    context->heapLimit = sizeof(union Data)*ALLOCATE_SIZE;
-    context->code = (__code(**) (struct Context*)) NEWN(ALLOCATE_SIZE, void*);
-    context->data = NEWN(ALLOCATE_SIZE, union Data*);
-    context->heapStart = NEWN(context->heapLimit, char);
-    context->heap = context->heapStart;
-    // context->codeNum = Exit;
-
-$code_init
-
-#include "dataGearInit.c"
-    context->dataNum = $data_num;
-}
-EOFEOF
-
-    open my $fd,">","$ddir/$name-context.c" or die("can't open $ddir/$name-context.c $!");
-    print $fd $context_c;
-
-my $meta_call = <<"EOFEOF";
-__code meta(struct Context* context, enum Code next) {
-    // printf("meta %d\\n",next);
-    goto (context->code[next])(context);
-}
-
-__code parGotoMeta(struct Context* context, enum Code next) {
-    context->task     = NULL;
-    context->taskList = NULL;
-    goto (context->code[Gearef(context, TaskManager)->taskManager->TaskManager.spawnTasks])(context);
-}
-
-__code start_code(struct Context* context) {
-    goto meta(context, context->next);
-}
-
-__code start_code_stub(struct Context* context) {
-    goto start_code(context);
-}
-
-__code exit_code(struct Context* context) {
-    free(context->code);
-    free(context->data);
-    free(context->heapStart);
-    goto exit(0);
-}
-
-__code exit_code_stub(struct Context* context) {
-    goto exit_code(context);
-}    
-
-// end context_c
-EOFEOF
-
-print $fd $meta_call;
-
-open my $fd,">","$ddir/enumData.h" or die("can't open $ddir/enumData.h $!");
-print $fd "enum DataType {\n";
-print $fd "    D_Code,\n";
-for my $data ( sort keys %dataGear ) {
-    print $fd "    D_${data},\n";
-}
-print $fd "};\n\n";
-
-open my $fd,">","$ddir/typedefData.h" or die("can't open $ddir/typedefData.h $!");
-for my $data ( sort keys %dataGear ) {
-    if ($dataGear{$data} eq 'struct') {
-        print $fd "typedef struct ${data} ${data};\n";
-    }
-}
-
-open my $fd,">","$ddir/dataGearInit.c" or die("can't open $ddir/dataGearInit.c $!");
-for my $data ( sort keys %dataGear ) {
-    print $fd "    ALLOC_DATA(context, ${data});\n";
-}
-}
-
-# end
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/generate_stub.pl
--- a/src/parallel_execution/generate_stub.pl	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,636 +0,0 @@
-#!/usr/bin/perl
-
-use strict;
-use Getopt::Std;
-use File::Path qw(make_path);
-
-# interface.h
-# typedef struct Worker {
-#         int id;
-#         struct Context* contexts;
-#         enum Code execute;
-#         enum Code taskSend;
-#         enum Code taskRecive;
-#         enum Code shutdown;
-#         struct Queue* tasks;
-#     } Worker;
-
-our($opt_o,$opt_d,$opt_h);
-getopts('o:d:h');
-
-my $dir = ".";
-if ($opt_d) {
-    $dir = $opt_d;
-    if (! -d $dir) {
-        make_path $dir;
-    }
-}
-
-for my $fn (@ARGV) {
-    next if ($fn !~ /\.cbc$/);
-    &getDataGear($fn);
-    &generateDataGear($fn);
-}
-
-my %var;
-my %code;
-my %dataGearVar;
-my %outputVar;       # output var initializer
-my %outputArgs;      # continuation's output variables
-my %dataGear;
-my %dataGearName;
-my %generic;
-my %dataGearVarType;
-my %codeGear;
-my $implementation;
-my $interface;
-
-# interface definision
-#
-# typedef struct Stack<Type, Impl>{
-#         Type* stack;
-#         Type* data;
-#         Type* data1;
-#         __code whenEmpty(...);
-#         __code clear(Impl* stack,__code next(...));
-#         __code push(Impl* stack,Type* data, __code next(...));
-#         __code pop(Impl* stack, __code next(Type*, ...));
-#         __code pop2(Impl* stack, Type** data, Type** data1, __code next(Type**, Type**, ...));
-#         __code isEmpty(Impl* stack, __code next(...), __code whenEmpty(...));
-#         __code get(Impl* stack, Type** data, __code next(...));
-#         __code get2(Impl* stack,..., __code next(...));
-#         __code next(...);
-# } Stack;
-#
-# calling example
-#
-# goto nodeStack->push((union Data*)node, stackTest3);
-#
-# generated meta level code
-#
-# Gearef(context, Stack)->stack = (union Data*)nodeStack;
-# Gearef(context, Stack)->data = (union Data*)node;
-# Gearef(context, Stack)->next = C_stackTest3;
-# goto meta(context, nodeStack->push);
-
-sub getDataGear {
-    my ($filename) = @_;
-    my ($codeGearName, $name, $inTypedef);
-    open my $fd,"<",$filename or die("can't open $filename $!");
-    while (<$fd>) {
-        if (! $inTypedef) {
-            if (/^typedef struct (\w+)\s*<(.*)>/) {
-                $inTypedef = 1;
-                $name = $1;
-                $dataGear{$name} = $_;
-                $var{$name} = {};
-                $code{$name} = {};
-                $generic{$name} = \split(/,/,$2);
-            } elsif (/^typedef struct (\w+)/) {
-                $inTypedef = 1;
-                $name = $1;
-                $dataGear{$name} = $_;
-                $var{$name} = {};
-                $code{$name} = {};
-                $generic{$name} = [];
-            } elsif (/^(\w+)(\*)+ create(\w+)\(/) {
-                if (defined $interface) {
-                   die "duplicate interface $interface\n";
-                }
-                $interface = $1;
-                $implementation = $3;
-                if ( -f "$interface.cbc") {
-                    &getDataGear("$interface.cbc");
-                }
-            } elsif(/^(.*)par goto (\w+)\((.*)\)/) {
-                my $codeGearName = $2;
-                if ($filename =~ /^(.*)\/(.*)/) {
-                    $codeGearName = "$1/$codeGearName";
-                }
-                if ( -f "$codeGearName.cbc") {
-                    &getCodeGear("$codeGearName.cbc");
-                }
-			} elsif(/^#interface "(.*)"/) {
-                # use interface
-                my $interfaceHeader = $1;
-                next if ($interfaceHeader =~ /context.h/);
-                if (-f $interfaceHeader) {
-                    &getDataGear("$interfaceHeader");
-                    &getCodeGear("$interfaceHeader");
-                }
-            } elsif (/^\_\_code (\w+)\((.*)\)(.*)/) {
-                my $codeGearName = $1;
-                if ($filename =~ /^(.*)\/(.*)/) {
-                    $codeGearName = "$1/$codeGearName";
-                }
-                if ( -f "$codeGearName.cbc") {
-                    &getCodeGear("$codeGearName.cbc");
-                }
-            }
-            next;
-        }
-        # gather type name and type
-        $dataGear{$name} .= $_;
-        if (/^\s*(.*)\s+(\w+);$/ ) {
-            my $ttype = $1;
-            my $tname = $2;
-            if ($ttype =~ /^(union|struct)?\s*(\w+)/) {
-                $ttype = $2;
-            }
-            $var{$name}->{$tname} = $ttype;
-        }
-        if (/^}/) {
-            $inTypedef = 0;
-        }
-    }
-
-}
-
-sub getCodeGear {
-    my ($filename) = @_;
-    open my $fd,"<",$filename or die("can't open $filename $!");
-    my ($name,$impln);
-    while (<$fd>) {
-        if (/^(\w+)(\*)+ create(\w+)\(/) {
-            $name = $1;
-            $impln = $3;
-        } elsif(/^typedef struct (.*)<.*>\s*{/) {
-            $name = $1;
-        }
-        if (defined $name) {
-            if (/^\s*\_\_code (\w+)\((.*)\);/) {
-                my $args = $2;
-                my $method = $1;
-                $code{$name}->{$method} = [];
-                while($args) {
-                    # replace comma
-                    $args =~ s/(^\s*,\s*)//;
-                    # continuation case
-                    if ($args =~ s/^(\s)*\_\_code\s+(\w+)\(([^)]*)\)//) {
-                        my $next = $2;
-                        my @args = split(/,/,$3);
-                        push(@{$code{$name}->{$method}},"\_\_code $next");
-                    } elsif ($args =~ s/^(struct|union)?\s*(\w+)(\**)\s+(\w+)//) {
-                        my $structType = $1;
-                        my $typeName = $2;
-                        my $ptrType = $3;
-                        my $varName = $4;
-                        my $typeField = lcfirst($typeName);
-                        push(@{$code{$name}->{$method}},"$typeName$ptrType $varName");
-                    } elsif ($args =~ s/(.*,)//) {
-                    } else {
-                        last;
-                    }
-                }
-            }
-        } elsif (/^\_\_code (\w+)\((.*)\)(.*)/) {
-            my $codeGearName = $1;
-            my $args = $2;
-            my $inputCount = 0;
-            my $outputCount = 0;
-            my $inputIncFlag = 1;
-            while($args) {
-                if ($args =~ s/(^\s*,\s*)//) {
-                }
-                if ($args =~ s/^(\s)*\_\_code\s+(\w+)\((.*?)\)//) {
-                    $codeGear{$codeGearName}->{"code"}->{$2} = "\_\_code";
-                    $inputIncFlag = 0;
-                    my @outputs = split(/,/,$3);
-                    for my $output (@outputs) {
-                        if ($output =~ /\s*(struct|union)?\s*(\w+)(\*)?+\s(\w+)/) {
-                            my $type = $2;
-                            my $varName = $4;
-                            $codeGear{$codeGearName}->{"var"}->{$varName} = "$type $outputCount";
-                            $outputCount++;
-                        }
-                    }
-                } elsif ($args =~ s/^(struct|union)?\s*(\w+)(\*)?+\s(\w+)// && $inputIncFlag) {
-                    my $type = $2;
-                    my $varName = $4;
-                    $codeGear{$codeGearName}->{"var"}->{$varName} = "$type $inputCount";
-                    $inputCount++;
-                } elsif ($args =~ s/(.*,)//) {
-                } else {
-                    last;
-                }
-            }
-            $codeGear{$codeGearName}->{"input"} = $inputCount;
-            $codeGear{$codeGearName}->{"output"} = $outputCount;
-        }
-    }
-}
-
-sub generateStub {
-    my($fd,$prevCodeGearName,$dataGearName) = @_;
-    print $fd "__code ", $prevCodeGearName ,"_stub(struct Context* context) {\n";
-    print $fd $dataGearName;
-    print $fd "\n} \n\n";
-    return 1;
-}
-
-sub generateStubArgs {
-    my($codeGearName, $varName, $typeName, $ptrType, $typeField, $interface,$output) = @_;
-    my $varname1 = $output?"O_$varName":$varName;
-    for my $n ( @{$dataGearVar{$codeGearName}} ) {
-        # we already have it
-        return 0 if ( $n eq $varname1);
-    }
-    push @{$dataGearVar{$codeGearName}}, $varname1;
-    push @{$dataGearVarType{$codeGearName}}, $typeName;
-    if ($typeName eq $implementation) {
-        # get implementation
-        $dataGearName{$codeGearName} .= "\t$typeName* $varName = ($typeName*)GearImpl(context, $interface, $varName);\n";
-    } else {
-        # interface var
-        for my $ivar (keys %{$var{$interface}}) {
-            #  input data gear field
-            if ($varName eq $ivar) {
-                if ($typeName eq $var{$interface}->{$ivar}) {
-                    if ($output) {
-                        $dataGearName{$codeGearName} .= "\t$typeName$ptrType* O_$varName = &Gearef(context, $interface)->$varName;\n";
-                        $outputVar{$codeGearName} .= "\t$typeName$ptrType $varName  __attribute__((unused)) = *O_$varName;\n";
-                        return 1;
-                    }
-                    $dataGearName{$codeGearName} .= "\t$typeName$ptrType $varName = Gearef(context, $interface)->$varName;\n";
-                    return 1;
-                }
-            }
-        }
-
-        # interface continuation
-        for my $cName (keys %{$code{$interface}}) {
-            if ($varName eq $cName) {
-                # continuation field
-                $dataGearName{$codeGearName} .= "\tenum Code $varName = Gearef(context, $interface)->$varName;\n";
-                return 1;
-            }
-        }
-
-        # par goto  var
-        for my $var (keys %{$codeGear{$codeGearName}->{"var"}}) {
-            #  input data gear field
-            if ($varName eq $var) {
-                my ($type, $count) = split(/\s/, $codeGear{$codeGearName}->{"var"}->{$var});
-                if ($typeName eq $type) {
-                    if ($output) {
-                        $dataGearName{$codeGearName} .= "\t$typeName$ptrType* O_$varName = ($typeName $ptrType*)&context->data[context->odg + $count];\n";
-                        $outputVar{$codeGearName} .= "\t$typeName$ptrType $varName = *O_$varName;\n";
-                        return 1;
-                    }
-                    $dataGearName{$codeGearName} .= "\t$typeName$ptrType $varName = &context->data[context->idg + $count]->$typeName;\n";
-                    return 1;
-                }
-            }
-        }
-
-        # par goto continuation
-        for my $cName (keys %{$codeGear{$codeGearName}->{"code"}}) {
-            if ($varName eq $cName) {
-                # continuation field
-                $dataGearName{$codeGearName} .= "\tenum Code $varName = context->next;\n";
-                return 1;
-            }
-        }
-
-        # par goto continuation
-        # global or local variable case
-        if ($typeName eq "Code") {
-            $dataGearName{$codeGearName} .= "\tenum $typeName$ptrType $varName = Gearef(context, $interface)->$varName;\n";
-            return 1;
-        }
-        $dataGearName{$codeGearName} .= "\t$typeName$ptrType $varName = Gearef(context, $typeName);\n";
-        return 1;
-    }
-}
-
-sub generateDataGear {
-    my ($filename) = @_;
-    open my $in,"<",$filename or die("can't open $filename $!");
-
-    my $fn;
-    if ($opt_o) {
-        $fn = $opt_o;
-    } else {
-        my $fn1 = $filename;
-        $fn1 =~ s/\.cbc/.c/;
-        my $i = 1;
-        $fn = "$dir/$fn1";
-        while ( -f $fn) {
-            $fn = "$dir/$fn1.$i";
-            $i++;
-        }
-    }
-    if ( $fn =~ m=(.*)/[^/]+$= ) {
-        if (! -d $1) {
-            make_path $1;
-        }
-    }
-    open my $fd,">",$fn or die("can't write $fn $!");
-
-    my $prevCodeGearName;
-    my $inTypedef = 0;
-    my $inStub = 0;
-    my $hasParGoto = 0;
-    my $inMain = 0 ;
-    my %stub;
-    my $codeGearName;
-    my %localVarType;
-
-    while (<$in>) {
-        if (! $inTypedef && ! $inStub && ! $inMain) {
-            if (/^typedef struct (\w+) \{/) {
-                $inTypedef = 1;
-            } elsif (/^int main\((.*)\) \{/) {
-                $inMain = 1;
-            } elsif(/^#interface "(.*)"/) {
-                my $interfaceHeader = $1;
-                # #interface not write
-                next unless ($interfaceHeader =~ /context.h/);
-            } elsif (/^\_\_code (\w+)\((.*)\)(.*)/) {
-                %localVarType = {};
-                $codeGearName = $1;
-                my $args = $2;
-                my $tail = $3;
-                if ($codeGearName =~ /_stub$/) {
-                    # don't touch already existing stub
-                    $inStub = 1;
-                    $stub{$codeGearName} = 1;
-                    print $fd $_;
-                    next;
-                }
-                if (defined $prevCodeGearName) {
-                    # stub is generated just before next CodeGear
-                    if (defined $stub{$prevCodeGearName."_stub"}) {
-                        undef $prevCodeGearName;
-                    } else {
-                        &generateStub($fd,$prevCodeGearName,$dataGearName{$prevCodeGearName});
-                        $stub{$prevCodeGearName."_stub"} = 1;
-                    }
-                }
-                # analyzing CodeGear argument
-                #      these arguments are extract from current context's arugment DataGear Interface
-                #      and passed to the CodeGear
-                #      struct Implementaion needs special handling
-                #      __code next(...)   --->   enum Code next
-                $prevCodeGearName = $codeGearName;
-                $dataGearVar{$codeGearName} = [];
-                $outputVar{$codeGearName} = "";
-                $outputArgs{$codeGearName} = {};
-                my $newArgs = "struct Context *context,";
-                if ($args=~/^struct Context\s*\*\s*context/) {
-                    $newArgs = "";
-                }
-                if (!$args){
-                    $newArgs = "struct Context *context";
-                }
-                while($args) {
-                    if ($args =~ s/(^\s*,\s*)//) {
-                        $newArgs .= $1;
-                    }
-                    # continuation case
-                    if ($args =~ s/^(\s)*\_\_code\s+(\w+)\(([^)]*)\)//) {
-                        my $next = $2;
-                        my @args = split(/,/,$3);
-                        if (&generateStubArgs($codeGearName, $next, "Code", "", $next, $interface,0) ) {
-                            $newArgs .= "enum Code $next";
-                        }
-                        # analyze continuation arguments
-                        #    output arguments are defined in the Interface take the pointer of these
-                        #    output arguments are put into the Interface DataGear just before the goto
-                        for my $arg (@args) {
-                            $arg =~ s/^\s*//;
-                            last if ($arg =~ /\.\.\./);
-                            $arg =~ s/^(struct|union)?\s*(\w+)(\**)\s(\w+)//;
-                            my $structType = $1;
-                            my $typeName = $2;
-                            my $ptrType = $3;
-                            my $varName = $4;
-                            my $typeField = lcfirst($typeName);
-                            push(@{$outputArgs{$codeGearName}->{$next}}, $varName);
-                            if (&generateStubArgs($codeGearName, $varName, $typeName, $ptrType, $typeField, $interface,1)) {
-                                $newArgs .= ",$structType $typeName **O_$varName";
-                            }
-                        }
-                    } elsif ($args =~ s/^(struct|union)?\s*(\w+)(\**)\s(\w+)//) {
-                        my $structType = $1;
-                        my $typeName = $2;
-                        my $ptrType = $3;
-                        my $varName = $4;
-                        my $typeField = lcfirst($typeName);
-                        $newArgs .= $&;    # assuming no duplicate
-                        &generateStubArgs($codeGearName, $varName, $typeName, $ptrType, $typeField, $interface,0);
-                    } elsif ($args =~ s/(.*,)//) {
-                        $newArgs .= $1;
-                    } else {
-                        $newArgs .= $args;
-                        last;
-                    }
-                }
-                # generate goto statement from stub to the CodeGear in the buffer
-                $dataGearName{$codeGearName} .= "\tgoto $codeGearName(context";
-                for my $arg ( @{$dataGearVar{$codeGearName}}) {
-                    $dataGearName{$codeGearName} .= ", $arg";
-                }
-                $dataGearName{$codeGearName} .= ");";
-                # generate CodeGear header with new arguments
-                print $fd "__code $codeGearName($newArgs)$tail\n";
-                if ($outputVar{$codeGearName} ne "") {
-                    # output data var can be use before write
-                    # it should be initialze by gearef
-                    print $fd $outputVar{$codeGearName};
-                }
-                next;
-            } elsif (/^(.*)goto (\w+)\-\>(\w+)\((.*)\);/) {
-                # handling goto statement
-                # convert it to the meta call form with two arugments, that is context and enum Code
-                my $prev = $1;
-                my $next = $2;
-                my $method = $3;
-                my $tmpArgs = $4;
-                #$tmpArgs =~ s/\(.*\)/\(\)/;
-                my @args = split(/,/,$tmpArgs);
-                my @types = @{$dataGearVarType{$codeGearName}};
-                my $ntype;
-                my $ftype;
-                for my $v (@{$dataGearVar{$codeGearName}}) {
-                    my $t = shift @types;
-                    if ($v eq $next || $v eq "O_$next") {
-                        $ntype = $t;
-                        $ftype = lcfirst($ntype);
-                    }
-                }
-                if (!defined $ntype) {
-                    $ntype = $localVarType{$next};
-                    $ftype = lcfirst($ntype);
-                }
-                print $fd "\tGearef(context, $ntype)->$ftype = (union Data*) $next;\n";
-                # Put interface argument
-                my $prot = $code{$ntype}->{$method};
-                my $i = 1;
-                for my $arg (@args) {
-                    my $pType;
-                    my $pName;
-                    my $p = @$prot[$i];
-                    next if ($p eq $arg);
-                    $p =~ s/^(.*)\s(\w+)//;
-                    $pType = $1;
-                    $pName = $2;
-                    $arg =~ s/^(\s)*(\w+)/$2/;
-                    if ($pType =~ s/\_\_code$//) {
-                        if ($arg =~ /(\w+)\(.*\)/) {
-                            print $fd "\tGearef(context, $ntype)->$pName = $1;\n";
-                        } else {
-                            print $fd "\tGearef(context, $ntype)->$pName = C_$arg;\n";
-                        }
-                    } elsif ($pType =~ /Data\**$/){
-                        print $fd "\tGearef(context, $ntype)->$pName = (union $pType) $arg;\n";
-                    } else {
-                        print $fd "\tGearef(context, $ntype)->$pName = $arg;\n";
-                    }
-                    $i++;
-                }
-                print $fd "${prev}context->before = C_$codeGearName;\n";
-                print $fd "${prev}goto meta(context, $next->$method);\n";
-                next;
-            } elsif(/^(.*)par goto (\w+)\((.*)\);/) {
-                # handling par goto statement
-                # convert it to the parallel
-                my $prev = $1;
-                my $codeGearName = $2;
-                my $args = $3;
-                my $inputCount = $codeGear{$codeGearName}->{'input'};
-                my $outputCount = $codeGear{$codeGearName}->{'output'};
-                my @iterateCounts;
-                # parse examples 'par goto(.., iterate(10), exit);'
-                if ($args =~ /iterate\((.*)?\),/) {
-                    @iterateCounts = split(/,/,$1);;
-                    $inputCount--;
-                }
-                # replace iterate keyword
-                $args =~ s/iterate\((.*)?\),//;
-                my @dataGears = split(/,\s*/, $args);
-                my $nextCodeGear = pop(@dataGears);
-                if (! $hasParGoto) {
-                    $hasParGoto = 1;
-                    print $fd "${prev}struct Element* element;\n";
-                }
-                my $initTask = << "EOFEOF";
-                ${prev}context->task = NEW(struct Context);
-                ${prev}initContext(context->task);
-                ${prev}context->task->next = C_$codeGearName;
-                ${prev}context->task->idgCount = $inputCount;
-                ${prev}context->task->idg = context->task->dataNum;
-                ${prev}context->task->maxIdg = context->task->idg + $inputCount;
-                ${prev}context->task->odg = context->task->maxIdg;
-                ${prev}context->task->maxOdg = context->task->odg + $outputCount;
-EOFEOF
-                print $fd $initTask;
-                if (@iterateCounts) {
-                    print $fd "${prev}context->task->iterate = 0;\n";
-                    my $len = @iterateCounts;
-                    if ($len == 1) {
-                        print $fd "${prev}context->task->iterator = createMultiDimIterator(context, $iterateCounts[0], 1, 1);\n";
-                    } elsif ($len == 2) {
-                        print $fd "${prev}context->task->iterator = createMultiDimIterator(context, $iterateCounts[0], $iterateCounts[1], 1);\n";
-                    } elsif ($len == 3) {
-                        print $fd "${prev}context->task->iterator = createMultiDimIterator(context, $iterateCounts[0], $iterateCounts[1], $iterateCounts[2]);\n";
-                    }
-                }
-                for my $dataGear (@dataGears) {
-                    print $fd "${prev}GET_META($dataGear)->wait = createSynchronizedQueue(context);\n";
-                }
-                for my $i (0..$inputCount-1) {
-                    print $fd "${prev}context->task->data[context->task->idg+$i] = (union Data*)@dataGears[$i];\n";
-                }
-                for my $i (0..$outputCount-1) {
-                    print $fd "${prev}context->task->data[context->task->odg+$i] = (union Data*)@dataGears[$inputCount+$i];\n";
-                }
-                my $putTask = << "EOFEOF";
-                ${prev}element = &ALLOCATE(context, Element)->Element;
-                ${prev}element->data = (union Data*)context->task;
-                ${prev}element->next = context->taskList;
-                ${prev}context->taskList = element;
-EOFEOF
-                print $fd $putTask;
-                next;
-            } elsif (/^(.*)goto (\w+)\((.*)\);/) {
-                # handling goto statement
-                # convert it to the meta call form with two arugments, that is context and enum Code
-                my $prev = $1;
-                my $next = $2;
-                my @args = split(/,/, $3);
-                my $v = 0;
-                for my $n ( @{$dataGearVar{$codeGearName}} ) {
-                    # continuation arguments
-                    $v = 1  if ( $n eq $next);
-                }
-                if ($v || defined $code{$interface}->{$next}) {
-                    # write continuation's arguments into the interface arguments
-                    # we may need a commit for a shared DataGear
-                    for my $arg ( @{$outputArgs{$codeGearName}->{$next}} ) {
-                        my $v = shift(@args);
-                        print $fd "\t*O_$arg = $v;\n";
-                    }
-                    if ($hasParGoto) {
-                        print $fd "${prev}Gearef(context, TaskManager)->taskList = context->taskList;\n";
-                        print $fd "${prev}Gearef(context, TaskManager)->next1 = C_$next;\n";
-                        print $fd "${prev}goto meta(context, C_$next);\n";
-                    } else {
-                        print $fd "${prev}context->before = C_$codeGearName;\n";
-                        print $fd "${prev}goto meta(context, $next);\n";
-                    }
-                    next;
-                }
-                if ($hasParGoto) {
-                    print $fd "${prev}Gearef(context, TaskManager)->taskList = context->taskList;\n";
-                    print $fd "${prev}Gearef(context, TaskManager)->next1 = C_$next;\n";
-                    print $fd "${prev}goto parGotoMeta(context, C_$next);\n";
-                    next;
-                } elsif ($next eq "meta") {
-                    print $fd $_;
-                    next;
-                } else {
-                    print $fd "${prev}context->before = C_$codeGearName;\n";
-                    print $fd "${prev}goto meta(context, C_$next);\n";
-                    next;
-                }
-            } elsif(/^.*(struct|union)?\s(\w+)\*\s(\w+)\s?[=;]/) {
-                my $type    = $2;
-                my $varName = $3;
-                $localVarType{$varName} = $type;
-                s/new\s+(\w+)\(\)/\&ALLOCATE(context, \1)->\1/g;   # replacing new
-            } elsif(/^}/) {
-                $hasParGoto = 0;
-            } else {
-                s/new\s+(\w+)\(\)/\&ALLOCATE(context, \1)->\1/g;   # replacing new
-            }
-            # gather type name and type
-        } elsif ($inMain) {
-            if (/^(.*)goto start_code\(main_context\);/) {
-                print $fd $_;
-                next;
-            } elsif (/^(.*)goto (\w+)\((.*)\);/) {
-                my $prev = $1;
-                my $next = $2;
-                print $fd "${prev}struct Context* main_context = NEW(struct Context);\n";
-                print $fd "${prev}initContext(main_context);\n";
-                print $fd "${prev}main_context->next = C_$next;\n";
-                print $fd "${prev}goto start_code(main_context);\n";
-                next;
-            }
-        }
-        if (/^}/) {
-            $inStub = 0;
-            $inTypedef = 0;
-            $inMain = 0;
-        }
-        print $fd $_;
-    }
-    if (defined $prevCodeGearName) {
-        if (!defined $stub{$prevCodeGearName."_stub"}) {
-            $stub{$prevCodeGearName."_stub"} = &generateStub($fd,$prevCodeGearName,$dataGearName{$codeGearName});
-        }
-    }
-}
-
-# end
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/helper_cuda.h
--- a/src/parallel_execution/helper_cuda.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1088 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-////////////////////////////////////////////////////////////////////////////////
-// These are CUDA Helper functions for initialization and error checking
-
-#ifndef HELPER_CUDA_H
-#define HELPER_CUDA_H
-
-#pragma once
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "helper_string.h"
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// Note, it is required that your SDK sample to include the proper header files, please
-// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
-// on which CUDA functions are used.
-
-// CUDA Runtime error messages
-#ifndef __DRIVER_TYPES_H__
-static const char *_cudaGetErrorEnum(cudaError_t error)
-{
-    switch (error)
-    {
-        case cudaSuccess:
-            return "cudaSuccess";
-
-        case cudaErrorMissingConfiguration:
-            return "cudaErrorMissingConfiguration";
-
-        case cudaErrorMemoryAllocation:
-            return "cudaErrorMemoryAllocation";
-
-        case cudaErrorInitializationError:
-            return "cudaErrorInitializationError";
-
-        case cudaErrorLaunchFailure:
-            return "cudaErrorLaunchFailure";
-
-        case cudaErrorPriorLaunchFailure:
-            return "cudaErrorPriorLaunchFailure";
-
-        case cudaErrorLaunchTimeout:
-            return "cudaErrorLaunchTimeout";
-
-        case cudaErrorLaunchOutOfResources:
-            return "cudaErrorLaunchOutOfResources";
-
-        case cudaErrorInvalidDeviceFunction:
-            return "cudaErrorInvalidDeviceFunction";
-
-        case cudaErrorInvalidConfiguration:
-            return "cudaErrorInvalidConfiguration";
-
-        case cudaErrorInvalidDevice:
-            return "cudaErrorInvalidDevice";
-
-        case cudaErrorInvalidValue:
-            return "cudaErrorInvalidValue";
-
-        case cudaErrorInvalidPitchValue:
-            return "cudaErrorInvalidPitchValue";
-
-        case cudaErrorInvalidSymbol:
-            return "cudaErrorInvalidSymbol";
-
-        case cudaErrorMapBufferObjectFailed:
-            return "cudaErrorMapBufferObjectFailed";
-
-        case cudaErrorUnmapBufferObjectFailed:
-            return "cudaErrorUnmapBufferObjectFailed";
-
-        case cudaErrorInvalidHostPointer:
-            return "cudaErrorInvalidHostPointer";
-
-        case cudaErrorInvalidDevicePointer:
-            return "cudaErrorInvalidDevicePointer";
-
-        case cudaErrorInvalidTexture:
-            return "cudaErrorInvalidTexture";
-
-        case cudaErrorInvalidTextureBinding:
-            return "cudaErrorInvalidTextureBinding";
-
-        case cudaErrorInvalidChannelDescriptor:
-            return "cudaErrorInvalidChannelDescriptor";
-
-        case cudaErrorInvalidMemcpyDirection:
-            return "cudaErrorInvalidMemcpyDirection";
-
-        case cudaErrorAddressOfConstant:
-            return "cudaErrorAddressOfConstant";
-
-        case cudaErrorTextureFetchFailed:
-            return "cudaErrorTextureFetchFailed";
-
-        case cudaErrorTextureNotBound:
-            return "cudaErrorTextureNotBound";
-
-        case cudaErrorSynchronizationError:
-            return "cudaErrorSynchronizationError";
-
-        case cudaErrorInvalidFilterSetting:
-            return "cudaErrorInvalidFilterSetting";
-
-        case cudaErrorInvalidNormSetting:
-            return "cudaErrorInvalidNormSetting";
-
-        case cudaErrorMixedDeviceExecution:
-            return "cudaErrorMixedDeviceExecution";
-
-        case cudaErrorCudartUnloading:
-            return "cudaErrorCudartUnloading";
-
-        case cudaErrorUnknown:
-            return "cudaErrorUnknown";
-
-        case cudaErrorNotYetImplemented:
-            return "cudaErrorNotYetImplemented";
-
-        case cudaErrorMemoryValueTooLarge:
-            return "cudaErrorMemoryValueTooLarge";
-
-        case cudaErrorInvalidResourceHandle:
-            return "cudaErrorInvalidResourceHandle";
-
-        case cudaErrorNotReady:
-            return "cudaErrorNotReady";
-
-        case cudaErrorInsufficientDriver:
-            return "cudaErrorInsufficientDriver";
-
-        case cudaErrorSetOnActiveProcess:
-            return "cudaErrorSetOnActiveProcess";
-
-        case cudaErrorInvalidSurface:
-            return "cudaErrorInvalidSurface";
-
-        case cudaErrorNoDevice:
-            return "cudaErrorNoDevice";
-
-        case cudaErrorECCUncorrectable:
-            return "cudaErrorECCUncorrectable";
-
-        case cudaErrorSharedObjectSymbolNotFound:
-            return "cudaErrorSharedObjectSymbolNotFound";
-
-        case cudaErrorSharedObjectInitFailed:
-            return "cudaErrorSharedObjectInitFailed";
-
-        case cudaErrorUnsupportedLimit:
-            return "cudaErrorUnsupportedLimit";
-
-        case cudaErrorDuplicateVariableName:
-            return "cudaErrorDuplicateVariableName";
-
-        case cudaErrorDuplicateTextureName:
-            return "cudaErrorDuplicateTextureName";
-
-        case cudaErrorDuplicateSurfaceName:
-            return "cudaErrorDuplicateSurfaceName";
-
-        case cudaErrorDevicesUnavailable:
-            return "cudaErrorDevicesUnavailable";
-
-        case cudaErrorInvalidKernelImage:
-            return "cudaErrorInvalidKernelImage";
-
-        case cudaErrorNoKernelImageForDevice:
-            return "cudaErrorNoKernelImageForDevice";
-
-        case cudaErrorIncompatibleDriverContext:
-            return "cudaErrorIncompatibleDriverContext";
-
-        case cudaErrorPeerAccessAlreadyEnabled:
-            return "cudaErrorPeerAccessAlreadyEnabled";
-
-        case cudaErrorPeerAccessNotEnabled:
-            return "cudaErrorPeerAccessNotEnabled";
-
-        case cudaErrorDeviceAlreadyInUse:
-            return "cudaErrorDeviceAlreadyInUse";
-
-        case cudaErrorProfilerDisabled:
-            return "cudaErrorProfilerDisabled";
-
-        case cudaErrorProfilerNotInitialized:
-            return "cudaErrorProfilerNotInitialized";
-
-        case cudaErrorProfilerAlreadyStarted:
-            return "cudaErrorProfilerAlreadyStarted";
-
-        case cudaErrorProfilerAlreadyStopped:
-            return "cudaErrorProfilerAlreadyStopped";
-
-        /* Since CUDA 4.0*/
-        case cudaErrorAssert:
-            return "cudaErrorAssert";
-
-        case cudaErrorTooManyPeers:
-            return "cudaErrorTooManyPeers";
-
-        case cudaErrorHostMemoryAlreadyRegistered:
-            return "cudaErrorHostMemoryAlreadyRegistered";
-
-        case cudaErrorHostMemoryNotRegistered:
-            return "cudaErrorHostMemoryNotRegistered";
-
-        /* Since CUDA 5.0 */
-        case cudaErrorOperatingSystem:
-            return "cudaErrorOperatingSystem";
-
-        case cudaErrorPeerAccessUnsupported:
-            return "cudaErrorPeerAccessUnsupported";
-
-        case cudaErrorLaunchMaxDepthExceeded:
-            return "cudaErrorLaunchMaxDepthExceeded";
-
-        case cudaErrorLaunchFileScopedTex:
-            return "cudaErrorLaunchFileScopedTex";
-
-        case cudaErrorLaunchFileScopedSurf:
-            return "cudaErrorLaunchFileScopedSurf";
-
-        case cudaErrorSyncDepthExceeded:
-            return "cudaErrorSyncDepthExceeded";
-
-        case cudaErrorLaunchPendingCountExceeded:
-            return "cudaErrorLaunchPendingCountExceeded";
-
-        case cudaErrorNotPermitted:
-            return "cudaErrorNotPermitted";
-
-        case cudaErrorNotSupported:
-            return "cudaErrorNotSupported";
-
-        /* Since CUDA 6.0 */
-        case cudaErrorHardwareStackError:
-            return "cudaErrorHardwareStackError";
-
-        case cudaErrorIllegalInstruction:
-            return "cudaErrorIllegalInstruction";
-
-        case cudaErrorMisalignedAddress:
-            return "cudaErrorMisalignedAddress";
-
-        case cudaErrorInvalidAddressSpace:
-            return "cudaErrorInvalidAddressSpace";
-
-        case cudaErrorInvalidPc:
-            return "cudaErrorInvalidPc";
-
-        case cudaErrorIllegalAddress:
-            return "cudaErrorIllegalAddress";
-
-        /* Since CUDA 6.5*/
-        case cudaErrorInvalidPtx:
-            return "cudaErrorInvalidPtx";
-
-        case cudaErrorInvalidGraphicsContext:
-            return "cudaErrorInvalidGraphicsContext";
-
-        case cudaErrorStartupFailure:
-            return "cudaErrorStartupFailure";
-
-        case cudaErrorApiFailureBase:
-            return "cudaErrorApiFailureBase";
-
-        /* Since CUDA 8.0*/        
-        case cudaErrorNvlinkUncorrectable :   
-            return "cudaErrorNvlinkUncorrectable";
-    }
-
-    return "<unknown>";
-}
-#else
-// CUDA Driver API errors
-static const char *_cudaGetErrorEnum(CUresult error)
-{
-    switch (error)
-    {
-        case CUDA_SUCCESS:
-            return "CUDA_SUCCESS";
-
-        case CUDA_ERROR_INVALID_VALUE:
-            return "CUDA_ERROR_INVALID_VALUE";
-
-        case CUDA_ERROR_OUT_OF_MEMORY:
-            return "CUDA_ERROR_OUT_OF_MEMORY";
-
-        case CUDA_ERROR_NOT_INITIALIZED:
-            return "CUDA_ERROR_NOT_INITIALIZED";
-
-        case CUDA_ERROR_DEINITIALIZED:
-            return "CUDA_ERROR_DEINITIALIZED";
-
-        case CUDA_ERROR_PROFILER_DISABLED:
-            return "CUDA_ERROR_PROFILER_DISABLED";
-
-        case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
-            return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
-
-        case CUDA_ERROR_PROFILER_ALREADY_STARTED:
-            return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
-
-        case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
-            return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
-
-        case CUDA_ERROR_NO_DEVICE:
-            return "CUDA_ERROR_NO_DEVICE";
-
-        case CUDA_ERROR_INVALID_DEVICE:
-            return "CUDA_ERROR_INVALID_DEVICE";
-
-        case CUDA_ERROR_INVALID_IMAGE:
-            return "CUDA_ERROR_INVALID_IMAGE";
-
-        case CUDA_ERROR_INVALID_CONTEXT:
-            return "CUDA_ERROR_INVALID_CONTEXT";
-
-        case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
-            return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
-
-        case CUDA_ERROR_MAP_FAILED:
-            return "CUDA_ERROR_MAP_FAILED";
-
-        case CUDA_ERROR_UNMAP_FAILED:
-            return "CUDA_ERROR_UNMAP_FAILED";
-
-        case CUDA_ERROR_ARRAY_IS_MAPPED:
-            return "CUDA_ERROR_ARRAY_IS_MAPPED";
-
-        case CUDA_ERROR_ALREADY_MAPPED:
-            return "CUDA_ERROR_ALREADY_MAPPED";
-
-        case CUDA_ERROR_NO_BINARY_FOR_GPU:
-            return "CUDA_ERROR_NO_BINARY_FOR_GPU";
-
-        case CUDA_ERROR_ALREADY_ACQUIRED:
-            return "CUDA_ERROR_ALREADY_ACQUIRED";
-
-        case CUDA_ERROR_NOT_MAPPED:
-            return "CUDA_ERROR_NOT_MAPPED";
-
-        case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
-            return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
-
-        case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
-            return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
-
-        case CUDA_ERROR_ECC_UNCORRECTABLE:
-            return "CUDA_ERROR_ECC_UNCORRECTABLE";
-
-        case CUDA_ERROR_UNSUPPORTED_LIMIT:
-            return "CUDA_ERROR_UNSUPPORTED_LIMIT";
-
-        case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
-            return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
-
-        case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
-            return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
-
-        case CUDA_ERROR_INVALID_PTX:
-            return "CUDA_ERROR_INVALID_PTX";
-
-        case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
-            return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
-
-        case CUDA_ERROR_NVLINK_UNCORRECTABLE:
-            return "CUDA_ERROR_NVLINK_UNCORRECTABLE";
-
-        case CUDA_ERROR_INVALID_SOURCE:
-            return "CUDA_ERROR_INVALID_SOURCE";
-
-        case CUDA_ERROR_FILE_NOT_FOUND:
-            return "CUDA_ERROR_FILE_NOT_FOUND";
-
-        case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
-            return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
-
-        case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
-            return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
-
-        case CUDA_ERROR_OPERATING_SYSTEM:
-            return "CUDA_ERROR_OPERATING_SYSTEM";
-
-        case CUDA_ERROR_INVALID_HANDLE:
-            return "CUDA_ERROR_INVALID_HANDLE";
-
-        case CUDA_ERROR_NOT_FOUND:
-            return "CUDA_ERROR_NOT_FOUND";
-
-        case CUDA_ERROR_NOT_READY:
-            return "CUDA_ERROR_NOT_READY";
-
-        case CUDA_ERROR_ILLEGAL_ADDRESS:
-            return "CUDA_ERROR_ILLEGAL_ADDRESS";
-
-        case CUDA_ERROR_LAUNCH_FAILED:
-            return "CUDA_ERROR_LAUNCH_FAILED";
-
-        case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
-            return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
-
-        case CUDA_ERROR_LAUNCH_TIMEOUT:
-            return "CUDA_ERROR_LAUNCH_TIMEOUT";
-
-        case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
-            return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
-
-        case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
-            return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
-
-        case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
-            return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
-
-        case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
-            return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
-
-        case CUDA_ERROR_CONTEXT_IS_DESTROYED:
-            return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
-
-        case CUDA_ERROR_ASSERT:
-            return "CUDA_ERROR_ASSERT";
-
-        case CUDA_ERROR_TOO_MANY_PEERS:
-            return "CUDA_ERROR_TOO_MANY_PEERS";
-
-        case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
-            return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
-
-        case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
-            return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
-
-        case CUDA_ERROR_HARDWARE_STACK_ERROR:
-            return "CUDA_ERROR_HARDWARE_STACK_ERROR";
-
-        case CUDA_ERROR_ILLEGAL_INSTRUCTION:
-            return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
-
-        case CUDA_ERROR_MISALIGNED_ADDRESS:
-            return "CUDA_ERROR_MISALIGNED_ADDRESS";
-
-        case CUDA_ERROR_INVALID_ADDRESS_SPACE:
-            return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
-
-        case CUDA_ERROR_INVALID_PC:
-            return "CUDA_ERROR_INVALID_PC";
-
-        case CUDA_ERROR_NOT_PERMITTED:
-            return "CUDA_ERROR_NOT_PERMITTED";
-
-        case CUDA_ERROR_NOT_SUPPORTED:
-            return "CUDA_ERROR_NOT_SUPPORTED";
-
-        case CUDA_ERROR_UNKNOWN:
-            return "CUDA_ERROR_UNKNOWN";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef CUBLAS_API_H_
-// cuBLAS API errors
-static const char *_cudaGetErrorEnum(cublasStatus_t error)
-{
-    switch (error)
-    {
-        case CUBLAS_STATUS_SUCCESS:
-            return "CUBLAS_STATUS_SUCCESS";
-
-        case CUBLAS_STATUS_NOT_INITIALIZED:
-            return "CUBLAS_STATUS_NOT_INITIALIZED";
-
-        case CUBLAS_STATUS_ALLOC_FAILED:
-            return "CUBLAS_STATUS_ALLOC_FAILED";
-
-        case CUBLAS_STATUS_INVALID_VALUE:
-            return "CUBLAS_STATUS_INVALID_VALUE";
-
-        case CUBLAS_STATUS_ARCH_MISMATCH:
-            return "CUBLAS_STATUS_ARCH_MISMATCH";
-
-        case CUBLAS_STATUS_MAPPING_ERROR:
-            return "CUBLAS_STATUS_MAPPING_ERROR";
-
-        case CUBLAS_STATUS_EXECUTION_FAILED:
-            return "CUBLAS_STATUS_EXECUTION_FAILED";
-
-        case CUBLAS_STATUS_INTERNAL_ERROR:
-            return "CUBLAS_STATUS_INTERNAL_ERROR";
-
-        case CUBLAS_STATUS_NOT_SUPPORTED:
-            return "CUBLAS_STATUS_NOT_SUPPORTED";
-
-        case CUBLAS_STATUS_LICENSE_ERROR:
-            return "CUBLAS_STATUS_LICENSE_ERROR";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef _CUFFT_H_
-// cuFFT API errors
-static const char *_cudaGetErrorEnum(cufftResult error)
-{
-    switch (error)
-    {
-        case CUFFT_SUCCESS:
-            return "CUFFT_SUCCESS";
-
-        case CUFFT_INVALID_PLAN:
-            return "CUFFT_INVALID_PLAN";
-
-        case CUFFT_ALLOC_FAILED:
-            return "CUFFT_ALLOC_FAILED";
-
-        case CUFFT_INVALID_TYPE:
-            return "CUFFT_INVALID_TYPE";
-
-        case CUFFT_INVALID_VALUE:
-            return "CUFFT_INVALID_VALUE";
-
-        case CUFFT_INTERNAL_ERROR:
-            return "CUFFT_INTERNAL_ERROR";
-
-        case CUFFT_EXEC_FAILED:
-            return "CUFFT_EXEC_FAILED";
-
-        case CUFFT_SETUP_FAILED:
-            return "CUFFT_SETUP_FAILED";
-
-        case CUFFT_INVALID_SIZE:
-            return "CUFFT_INVALID_SIZE";
-
-        case CUFFT_UNALIGNED_DATA:
-            return "CUFFT_UNALIGNED_DATA";
-
-        case CUFFT_INCOMPLETE_PARAMETER_LIST:
-            return "CUFFT_INCOMPLETE_PARAMETER_LIST";
-
-        case CUFFT_INVALID_DEVICE:
-            return "CUFFT_INVALID_DEVICE";
-
-        case CUFFT_PARSE_ERROR:
-            return "CUFFT_PARSE_ERROR";
-
-        case CUFFT_NO_WORKSPACE:
-            return "CUFFT_NO_WORKSPACE";
-
-        case CUFFT_NOT_IMPLEMENTED:
-            return "CUFFT_NOT_IMPLEMENTED";
-
-        case CUFFT_LICENSE_ERROR:
-            return "CUFFT_LICENSE_ERROR";
-
-        case CUFFT_NOT_SUPPORTED:
-            return "CUFFT_NOT_SUPPORTED";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-
-#ifdef CUSPARSEAPI
-// cuSPARSE API errors
-static const char *_cudaGetErrorEnum(cusparseStatus_t error)
-{
-    switch (error)
-    {
-        case CUSPARSE_STATUS_SUCCESS:
-            return "CUSPARSE_STATUS_SUCCESS";
-
-        case CUSPARSE_STATUS_NOT_INITIALIZED:
-            return "CUSPARSE_STATUS_NOT_INITIALIZED";
-
-        case CUSPARSE_STATUS_ALLOC_FAILED:
-            return "CUSPARSE_STATUS_ALLOC_FAILED";
-
-        case CUSPARSE_STATUS_INVALID_VALUE:
-            return "CUSPARSE_STATUS_INVALID_VALUE";
-
-        case CUSPARSE_STATUS_ARCH_MISMATCH:
-            return "CUSPARSE_STATUS_ARCH_MISMATCH";
-
-        case CUSPARSE_STATUS_MAPPING_ERROR:
-            return "CUSPARSE_STATUS_MAPPING_ERROR";
-
-        case CUSPARSE_STATUS_EXECUTION_FAILED:
-            return "CUSPARSE_STATUS_EXECUTION_FAILED";
-
-        case CUSPARSE_STATUS_INTERNAL_ERROR:
-            return "CUSPARSE_STATUS_INTERNAL_ERROR";
-
-        case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
-            return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef CUSOLVER_COMMON_H_
-//cuSOLVER API errors
-static const char *_cudaGetErrorEnum(cusolverStatus_t error)
-{
-   switch(error)
-   {
-       case CUSOLVER_STATUS_SUCCESS:
-           return "CUSOLVER_STATUS_SUCCESS";
-       case CUSOLVER_STATUS_NOT_INITIALIZED:
-           return "CUSOLVER_STATUS_NOT_INITIALIZED";
-       case CUSOLVER_STATUS_ALLOC_FAILED:
-           return "CUSOLVER_STATUS_ALLOC_FAILED";
-       case CUSOLVER_STATUS_INVALID_VALUE:
-           return "CUSOLVER_STATUS_INVALID_VALUE";
-       case CUSOLVER_STATUS_ARCH_MISMATCH:
-           return "CUSOLVER_STATUS_ARCH_MISMATCH";
-       case CUSOLVER_STATUS_MAPPING_ERROR:
-           return "CUSOLVER_STATUS_MAPPING_ERROR";
-       case CUSOLVER_STATUS_EXECUTION_FAILED:
-           return "CUSOLVER_STATUS_EXECUTION_FAILED";
-       case CUSOLVER_STATUS_INTERNAL_ERROR:
-           return "CUSOLVER_STATUS_INTERNAL_ERROR";
-       case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
-           return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
-       case CUSOLVER_STATUS_NOT_SUPPORTED :
-           return "CUSOLVER_STATUS_NOT_SUPPORTED ";
-       case CUSOLVER_STATUS_ZERO_PIVOT:
-           return "CUSOLVER_STATUS_ZERO_PIVOT";
-       case CUSOLVER_STATUS_INVALID_LICENSE:
-           return "CUSOLVER_STATUS_INVALID_LICENSE";
-    }
-
-    return "<unknown>";
-
-}
-#endif
-
-#ifdef CURAND_H_
-// cuRAND API errors
-static const char *_cudaGetErrorEnum(curandStatus_t error)
-{
-    switch (error)
-    {
-        case CURAND_STATUS_SUCCESS:
-            return "CURAND_STATUS_SUCCESS";
-
-        case CURAND_STATUS_VERSION_MISMATCH:
-            return "CURAND_STATUS_VERSION_MISMATCH";
-
-        case CURAND_STATUS_NOT_INITIALIZED:
-            return "CURAND_STATUS_NOT_INITIALIZED";
-
-        case CURAND_STATUS_ALLOCATION_FAILED:
-            return "CURAND_STATUS_ALLOCATION_FAILED";
-
-        case CURAND_STATUS_TYPE_ERROR:
-            return "CURAND_STATUS_TYPE_ERROR";
-
-        case CURAND_STATUS_OUT_OF_RANGE:
-            return "CURAND_STATUS_OUT_OF_RANGE";
-
-        case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
-            return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
-
-        case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
-            return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
-
-        case CURAND_STATUS_LAUNCH_FAILURE:
-            return "CURAND_STATUS_LAUNCH_FAILURE";
-
-        case CURAND_STATUS_PREEXISTING_FAILURE:
-            return "CURAND_STATUS_PREEXISTING_FAILURE";
-
-        case CURAND_STATUS_INITIALIZATION_FAILED:
-            return "CURAND_STATUS_INITIALIZATION_FAILED";
-
-        case CURAND_STATUS_ARCH_MISMATCH:
-            return "CURAND_STATUS_ARCH_MISMATCH";
-
-        case CURAND_STATUS_INTERNAL_ERROR:
-            return "CURAND_STATUS_INTERNAL_ERROR";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef NV_NPPIDEFS_H
-// NPP API errors
-static const char *_cudaGetErrorEnum(NppStatus error)
-{
-    switch (error)
-    {
-        case NPP_NOT_SUPPORTED_MODE_ERROR:
-            return "NPP_NOT_SUPPORTED_MODE_ERROR";
-
-        case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
-            return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
-
-        case NPP_RESIZE_NO_OPERATION_ERROR:
-            return "NPP_RESIZE_NO_OPERATION_ERROR";
-
-        case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
-            return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
-
-        case NPP_BAD_ARG_ERROR:
-            return "NPP_BAD_ARGUMENT_ERROR";
-
-        case NPP_COEFF_ERROR:
-            return "NPP_COEFFICIENT_ERROR";
-
-        case NPP_RECT_ERROR:
-            return "NPP_RECTANGLE_ERROR";
-
-        case NPP_QUAD_ERROR:
-            return "NPP_QUADRANGLE_ERROR";
-
-        case NPP_MEM_ALLOC_ERR:
-            return "NPP_MEMORY_ALLOCATION_ERROR";
-
-        case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
-            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
-
-        case NPP_INVALID_INPUT:
-            return "NPP_INVALID_INPUT";
-
-        case NPP_POINTER_ERROR:
-            return "NPP_POINTER_ERROR";
-
-        case NPP_WARNING:
-            return "NPP_WARNING";
-
-        case NPP_ODD_ROI_WARNING:
-            return "NPP_ODD_ROI_WARNING";
-#else
-
-            // These are for CUDA 5.5 or higher
-        case NPP_BAD_ARGUMENT_ERROR:
-            return "NPP_BAD_ARGUMENT_ERROR";
-
-        case NPP_COEFFICIENT_ERROR:
-            return "NPP_COEFFICIENT_ERROR";
-
-        case NPP_RECTANGLE_ERROR:
-            return "NPP_RECTANGLE_ERROR";
-
-        case NPP_QUADRANGLE_ERROR:
-            return "NPP_QUADRANGLE_ERROR";
-
-        case NPP_MEMORY_ALLOCATION_ERR:
-            return "NPP_MEMORY_ALLOCATION_ERROR";
-
-        case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
-            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
-
-        case NPP_INVALID_HOST_POINTER_ERROR:
-            return "NPP_INVALID_HOST_POINTER_ERROR";
-
-        case NPP_INVALID_DEVICE_POINTER_ERROR:
-            return "NPP_INVALID_DEVICE_POINTER_ERROR";
-#endif
-
-        case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
-            return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
-
-        case NPP_TEXTURE_BIND_ERROR:
-            return "NPP_TEXTURE_BIND_ERROR";
-
-        case NPP_WRONG_INTERSECTION_ROI_ERROR:
-            return "NPP_WRONG_INTERSECTION_ROI_ERROR";
-
-        case NPP_NOT_EVEN_STEP_ERROR:
-            return "NPP_NOT_EVEN_STEP_ERROR";
-
-        case NPP_INTERPOLATION_ERROR:
-            return "NPP_INTERPOLATION_ERROR";
-
-        case NPP_RESIZE_FACTOR_ERROR:
-            return "NPP_RESIZE_FACTOR_ERROR";
-
-        case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
-            return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
-
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
-
-        case NPP_MEMFREE_ERR:
-            return "NPP_MEMFREE_ERR";
-
-        case NPP_MEMSET_ERR:
-            return "NPP_MEMSET_ERR";
-
-        case NPP_MEMCPY_ERR:
-            return "NPP_MEMCPY_ERROR";
-
-        case NPP_MIRROR_FLIP_ERR:
-            return "NPP_MIRROR_FLIP_ERR";
-#else
-
-        case NPP_MEMFREE_ERROR:
-            return "NPP_MEMFREE_ERROR";
-
-        case NPP_MEMSET_ERROR:
-            return "NPP_MEMSET_ERROR";
-
-        case NPP_MEMCPY_ERROR:
-            return "NPP_MEMCPY_ERROR";
-
-        case NPP_MIRROR_FLIP_ERROR:
-            return "NPP_MIRROR_FLIP_ERROR";
-#endif
-
-        case NPP_ALIGNMENT_ERROR:
-            return "NPP_ALIGNMENT_ERROR";
-
-        case NPP_STEP_ERROR:
-            return "NPP_STEP_ERROR";
-
-        case NPP_SIZE_ERROR:
-            return "NPP_SIZE_ERROR";
-
-        case NPP_NULL_POINTER_ERROR:
-            return "NPP_NULL_POINTER_ERROR";
-
-        case NPP_CUDA_KERNEL_EXECUTION_ERROR:
-            return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
-
-        case NPP_NOT_IMPLEMENTED_ERROR:
-            return "NPP_NOT_IMPLEMENTED_ERROR";
-
-        case NPP_ERROR:
-            return "NPP_ERROR";
-
-        case NPP_SUCCESS:
-            return "NPP_SUCCESS";
-
-        case NPP_WRONG_INTERSECTION_QUAD_WARNING:
-            return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
-
-        case NPP_MISALIGNED_DST_ROI_WARNING:
-            return "NPP_MISALIGNED_DST_ROI_WARNING";
-
-        case NPP_AFFINE_QUAD_INCORRECT_WARNING:
-            return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
-
-        case NPP_DOUBLE_SIZE_WARNING:
-            return "NPP_DOUBLE_SIZE_WARNING";
-
-        case NPP_WRONG_INTERSECTION_ROI_WARNING:
-            return "NPP_WRONG_INTERSECTION_ROI_WARNING";
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
-        /* These are 6.0 or higher */
-        case NPP_LUT_PALETTE_BITSIZE_ERROR:
-            return "NPP_LUT_PALETTE_BITSIZE_ERROR";
-
-        case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
-            return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
-
-        case NPP_QUALITY_INDEX_ERROR:
-            return "NPP_QUALITY_INDEX_ERROR";
-
-        case NPP_CHANNEL_ORDER_ERROR:
-            return "NPP_CHANNEL_ORDER_ERROR";
-
-        case NPP_ZERO_MASK_VALUE_ERROR:
-            return "NPP_ZERO_MASK_VALUE_ERROR";
-
-        case NPP_NUMBER_OF_CHANNELS_ERROR:
-            return "NPP_NUMBER_OF_CHANNELS_ERROR";
-
-        case NPP_COI_ERROR:
-            return "NPP_COI_ERROR";
-
-        case NPP_DIVISOR_ERROR:
-            return "NPP_DIVISOR_ERROR";
-
-        case NPP_CHANNEL_ERROR:
-            return "NPP_CHANNEL_ERROR";
-
-        case NPP_STRIDE_ERROR:
-            return "NPP_STRIDE_ERROR";
-
-        case NPP_ANCHOR_ERROR:
-            return "NPP_ANCHOR_ERROR";
-
-        case NPP_MASK_SIZE_ERROR:
-            return "NPP_MASK_SIZE_ERROR";
-
-        case NPP_MOMENT_00_ZERO_ERROR:
-            return "NPP_MOMENT_00_ZERO_ERROR";
-
-        case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
-            return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
-
-        case NPP_THRESHOLD_ERROR:
-            return "NPP_THRESHOLD_ERROR";
-
-        case NPP_CONTEXT_MATCH_ERROR:
-            return "NPP_CONTEXT_MATCH_ERROR";
-
-        case NPP_FFT_FLAG_ERROR:
-            return "NPP_FFT_FLAG_ERROR";
-
-        case NPP_FFT_ORDER_ERROR:
-            return "NPP_FFT_ORDER_ERROR";
-
-        case NPP_SCALE_RANGE_ERROR:
-            return "NPP_SCALE_RANGE_ERROR";
-
-        case NPP_DATA_TYPE_ERROR:
-            return "NPP_DATA_TYPE_ERROR";
-
-        case NPP_OUT_OFF_RANGE_ERROR:
-            return "NPP_OUT_OFF_RANGE_ERROR";
-
-        case NPP_DIVIDE_BY_ZERO_ERROR:
-            return "NPP_DIVIDE_BY_ZERO_ERROR";
-
-        case NPP_RANGE_ERROR:
-            return "NPP_RANGE_ERROR";
-
-        case NPP_NO_MEMORY_ERROR:
-            return "NPP_NO_MEMORY_ERROR";
-
-        case NPP_ERROR_RESERVED:
-            return "NPP_ERROR_RESERVED";
-
-        case NPP_NO_OPERATION_WARNING:
-            return "NPP_NO_OPERATION_WARNING";
-
-        case NPP_DIVIDE_BY_ZERO_WARNING:
-            return "NPP_DIVIDE_BY_ZERO_WARNING";
-#endif
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
-        /* These are 7.0 or higher */
-        case NPP_OVERFLOW_ERROR:
-            return "NPP_OVERFLOW_ERROR";
-
-        case NPP_CORRUPTED_DATA_ERROR:
-            return "NPP_CORRUPTED_DATA_ERROR";
-#endif
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef __DRIVER_TYPES_H__
-#ifndef DEVICE_RESET
-#define DEVICE_RESET cudaDeviceReset();
-#endif
-#else
-#ifndef DEVICE_RESET
-#define DEVICE_RESET
-#endif
-#endif
-
-#ifdef __DRIVER_TYPES_H__
-static inline void check(CUresult result, char const *const func, const char *const file, int const line)
-{
-    if (result)
-    {
-        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
-                file, line, (unsigned int)(result), _cudaGetErrorEnum(result), func);
-        DEVICE_RESET
-        // Make sure we call CUDA Device Reset before exiting
-        exit(EXIT_FAILURE);
-    }
-}
-#else
-static inline void check(cudaError_t result, char const *const func, const char *const file, int const line)
-{
-    if (result)
-    {
-        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
-                file, line, (unsigned int)(result), _cudaGetErrorEnum(result), func);
-        DEVICE_RESET
-        // Make sure we call CUDA Device Reset before exiting
-        exit(EXIT_FAILURE);
-    }
-}
-#endif
-
-#ifdef __DRIVER_TYPES_H__
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ )
-
-// This will output the proper error string when calling cudaGetLastError
-#define getLastCudaError(msg)      __getLastCudaError (msg, __FILE__, __LINE__)
-
-inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
-{
-    cudaError_t err = cudaGetLastError();
-
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
-                file, line, errorMessage, (int)err, cudaGetErrorString(err));
-        DEVICE_RESET
-        exit(EXIT_FAILURE);
-    }
-}
-#endif
-
-#ifndef MAX
-#define MAX(a,b) (a > b ? a : b)
-#endif
-
-// Float To Int conversion
-inline int ftoi(float value)
-{
-    return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
-}
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2Cores(int major, int minor)
-{
-    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
-    typedef struct sSMtoCores
-    {
-        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
-        int Cores;
-    } sSMtoCores;
-
-    sSMtoCores nGpuArchCoresPerSM[] =
-    {
-        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
-        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
-        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
-        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
-        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
-        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
-        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
-        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
-        { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class
-        { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class
-        { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class
-        { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class
-        {   -1, -1 }
-    };
-
-    int index = 0;
-
-    while (nGpuArchCoresPerSM[index].SM != -1)
-    {
-        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
-        {
-            return nGpuArchCoresPerSM[index].Cores;
-        }
-
-        index++;
-    }
-
-    // If we don't find the values, we default use the previous one to run properly
-    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
-    return nGpuArchCoresPerSM[index-1].Cores;
-}
-// end of GPU Architecture definitions
-
-
-// end of CUDA Helper Functions
-
-
-#endif
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/helper_string.h
--- a/src/parallel_execution/helper_string.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,264 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// These are helper functions for the SDK samples (string parsing, timers, etc)
-#ifndef STRING_HELPER_H
-#define STRING_HELPER_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#ifndef _CRT_SECURE_NO_DEPRECATE
-#define _CRT_SECURE_NO_DEPRECATE
-#endif
-#ifndef STRCASECMP
-#define STRCASECMP  _stricmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP _strnicmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result != 0)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf_s
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf_s
-#endif
-#else // Linux Includes
-#include <string.h>
-#include <strings.h>
-
-#ifndef STRCASECMP
-#define STRCASECMP  strcasecmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP strncasecmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result == NULL)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf
-#endif
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-#ifndef bool
-typedef int bool;
-#define false 0
-#define true 1
-#endif
-
-// CUDA Utility Helper Functions
-inline int stringRemoveDelimiter(char delimiter, const char *string)
-{
-    int string_start = 0;
-
-    while (string[string_start] == delimiter)
-    {
-        string_start++;
-    }
-
-    if (string_start >= (int)strlen(string)-1)
-    {
-        return 0;
-    }
-
-    return string_start;
-}
-
-inline int getFileExtension(char *filename, char **extension)
-{
-    int string_length = (int)strlen(filename);
-
-    while (filename[string_length--] != '.')
-    {
-        if (string_length == 0)
-            break;
-    }
-
-    if (string_length > 0) string_length += 2;
-
-    if (string_length == 0)
-        *extension = NULL;
-    else
-        *extension = &filename[string_length];
-
-    return string_length;
-}
-
-
-inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-
-            const char *equal_pos = strchr(string_argv, '=');
-            int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
-
-            int length = (int)strlen(string_ref);
-
-            if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
-            {
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    return bFound;
-}
-
-
-inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-    int value = -1;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    value = atoi(&string_argv[length + auto_inc]);
-                }
-                else
-                {
-                    value = 0;
-                }
-
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (bFound)
-    {
-        return value;
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-    float value = -1;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    value = (float)atof(&string_argv[length + auto_inc]);
-                }
-                else
-                {
-                    value = 0.f;
-                }
-
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (bFound)
-    {
-        return value;
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-inline bool getCmdLineArgumentString(const int argc, const char **argv,
-                                     const char *string_ref, char **string_retval)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            char *string_argv = (char *)&argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                *string_retval = &string_argv[length+1];
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (!bFound)
-    {
-        *string_retval = NULL;
-    }
-
-    return bFound;
-}
-
-
-#endif
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/lib/Gears/Context.pm
--- a/src/parallel_execution/lib/Gears/Context.pm	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,191 +0,0 @@
-package Gears::Context;
-
-use strict;
-use warnings;
-
-use Gears::Util;
-use Gears::Context::Template;
-use Carp qw/croak/;
-
-sub new {
-  my ($class, %args) = @_;
-  my $self = {
-    data_gears_with_count => {},
-    find_root => $args{find_root} // ".",
-    output   => $args{output},
-  };
-
-  if ($args{compile_sources}) {
-    $self->{compile_sources} = $args{compile_sources};
-    map { Gears::Util->file_checking($_); } @{$self->{compile_sources}};
-  }
-
-  return bless $self, $class;
-}
-
-
-sub extraction_dg_compile_sources {
-  my $self = shift;
-  my %counter;
-  for my $cbc_file (@{$self->{compile_sources}}) {
-    open my $fh , '<', $cbc_file;
-    while (my $line = <$fh>) {
-       if ($line =~ /#interface\s*"(.*)\.h"/ || $line =~ /^\/\/\s*use\s*"(.*)\.h"/) {
-          $self->{data_gears_with_count}->{$1}->{caller}->{$cbc_file}++;
-          $counter{interfaces}->{$1}++;
-          next;
-       }
-
-       if ($line =~ /^(\w+)(\*)+  *create(\w+)\(([^]]*)\)/) {
-          my $interface = $1;
-          my $implementation = $3;
-          $self->{data_gears_with_count}->{$interface}->{caller}->{$cbc_file}++;
-          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
-          $counter{interfaces}->{$interface}++;
-          $counter{impl}->{$implementation}++;
-          next;
-       }
-
-       if ($line =~ /Gearef\(context,\s*(\w+)\)/) {
-          my $implementation = $1;
-          $counter{impl}->{$implementation}++;
-          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
-          next;
-       }
-
-       if ($line =~ /ALLOCATE_(?:PTR_)?ARRAY\(context,\s*(\w+),[\s\w]+\)/) {
-          my $implementation = $1;
-          $counter{impl}->{$implementation}++;
-          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
-          next;
-       }
-
-       if ($line =~ /ALLOCATE_DATA_GEAR\((\w+),\s*(\w+)\)/) {
-          my $implementation = $2;
-          $counter{impl}->{$implementation}++;
-          $self->{data_gears_with_count}->{$implementation}->{caller}->{$cbc_file}++;
-          next;
-       }
-
-       #TaskManagerImpl* taskManager = (TaskManagerImpl*)GearImpl(context, TaskManager, taskManager);
-       if ($line =~ /\((\w+)\*\)GearImpl\(context,\s*(\w+),\s*(\w+)\)/) {
-          my $interface = $2;
-          my $implementation = $1;
-          $self->{data_gears_with_count}->{$interface}->{caller}->{$cbc_file}++;
-          $counter{interfaces}->{$interface}++;
-          $counter{impl}->{$implementation}++;
-          next;
-       }
-
-       if ($line =~ /__code/) {
-         while ($line =~ /struct (\w+)*/g) {
-           next if $1 eq "Context";
-           $self->{data_gears_with_count}->{$1}->{caller}->{$cbc_file}++;
-           next if (exists $counter{interfaces}->{$1});
-           $counter{impl}->{$1}++;
-         }
-       }
-    }
-    close $fh;
-  }
-  $counter{interfaces}->{Meta}++;
-  $self->{data_gears_with_count}->{Meta}++;
-  return \%counter;
-}
-
-sub set_data_gear_header_path {
-  my $self = shift;
-  my @data_gears_name;
-  if (@_) {
-    @data_gears_name = @_;
-  } else {
-    map { push (@data_gears_name,$_) if $_ ne "Context" } keys %{$self->{data_gears_with_count}};
-  }
-  return _find_headers($self->{find_root},\@data_gears_name);
-}
-
-sub update_dg_each_header_path {
-  my ($self, $dgs, $dg2path) = @_;
-  my $new_dgs;
-  for my $kind (keys %$dgs) {
-    for my $dg_name (keys %{$dgs->{$kind}}) {
-      if ($dg2path->{$dg_name}) {
-        $new_dgs->{$kind}->{$dg_name} = $dg2path->{$dg_name};
-      } else {
-        croak "failed trans header $dg_name\n";
-      }
-    }
-  }
-  return $new_dgs;
-}
-
-sub tree2create_context_h {
-  my ($self, $dg2path) = @_;
-  my $data_struct_str = $self->tree2data_struct_str($dg2path);
-
-  my $output = $self->_select_output();
-  Gears::Context::Template->emit_top_header($output);
-  Gears::Context::Template->emit_data_gears($output,$data_struct_str);
-  Gears::Context::Template->emit_last_header($output);
-  close $output;
-}
-
-sub _select_output {
-  my $self = shift;
-  print "$self->{output}\n";
-  if ($self->{output} eq  'stdout') {
-    return *STDOUT;
-  }
-  open my $fh, '>', $self->{output};
-  return $fh;
-}
-
-sub tree2data_struct_str {
-  my ($self, $dg_str) = @_;
-  my $data_struct_str  = "";
-  for my $interface (sort keys %$dg_str) {
-    $data_struct_str .= Gears::Util->h2context_str($dg_str->{$interface}->{elem});
-    next unless ($dg_str->{$interface}->{impl});
-    for my $impl (sort keys %{$dg_str->{$interface}->{impl}}) {
-       $data_struct_str .= Gears::Util->h2context_str($dg_str->{$interface}->{impl}->{$impl});
-    }
-  }
-  return $data_struct_str;
-}
-
-sub createImplTree_from_header {
-  my ($self, $dg2path) = @_;
-  my %dg_str = ();
-
-  my $inters = $dg2path->{interfaces};
-  my $impls = $dg2path->{impl};
-  map { $dg_str{$_}->{elem} = Gears::Util->parse_interface($inters->{$_}) } keys %$inters;
-
-  map {
-    my $res = Gears::Util->parse($impls->{$_});
-    if ($res->{isa}) {
-        $dg_str{$res->{isa}}->{impl}->{$_} = $res;
-    } else {
-        $dg_str{$_}->{elem} = $res;
-    }
-  } keys %$impls;
-  return \%dg_str;
-}
-
-sub _find_headers {
-  my ($search_bash_path, $targets) = @_;
-  my %res;
-  map { $res{$_}++ } @$targets;
-
-  my $header_paths = Gears::Util->find_headers_path($search_bash_path);
-  map {
-    /(\w+)\.h/;
-    my $header_tile = $1;
-    if (exists $res{$header_tile}){
-      $res{$header_tile} = $_;
-    }
-  } @$header_paths;
-  return \%res;
-}
-
-1;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/lib/Gears/Context/Template.pm
--- a/src/parallel_execution/lib/Gears/Context/Template.pm	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,171 +0,0 @@
-package Gears::Context::Template;
-use strict;
-use warnings;
-
-sub emit_top_header {
-  my ($class, $out) = @_;
-my $str =  << 'EOFEOF';
-/* Context definition for llrb example */
-#ifndef CONTEXT_H
-#define CONTEXT_H
-#include <stdlib.h>
-#include <pthread.h>
-#ifdef USE_CUDAWorker
-#include <cuda.h>
-#include <driver_types.h>
-#include <cuda_runtime.h>
-#include "helper_cuda.h"
-#endif
-
-#define ALLOCATE_SIZE 20000000
-#define NEW(type) (type*)(calloc(1, sizeof(type)))
-#define NEWN(n, type) (type*)(calloc(n, sizeof(type)))
-
-#define ALLOC_DATA(context, dseg) ({\
-    Meta* meta = (Meta*)context->heap;\
-    meta->type = D_##dseg;\
-    meta->size = sizeof(dseg);\
-    meta->len = 1;\
-    context->heap += sizeof(Meta);\
-    context->data[D_##dseg] = context->heap; context->heap += sizeof(dseg); (dseg *)context->data[D_##dseg]; })
-
-#define ALLOC_DATA_TYPE(context, dseg, t) ({\
-    Meta* meta = (Meta*)context->heap;\
-    meta->type = D_##t;\
-    meta->size = sizeof(t);\
-    meta->len = 1;\
-    context->heap += sizeof(Meta);\
-    context->data[D_##dseg] = context->heap; context->heap += sizeof(t); (t *)context->data[D_##dseg]; })
-
-#define ALLOCATE(context, t) ({ \
-    Meta* meta = (Meta*)context->heap;\
-    context->heap += sizeof(Meta);\
-    union Data* data = context->heap; \
-    context->heap += sizeof(t); \
-    meta->type = D_##t; \
-    meta->size = sizeof(t);     \
-    meta->len = 1;\
-    data; })
-
-#define ALLOCATE_ARRAY(context, t, length) ({ \
-    Meta* meta = (Meta*)context->heap;\
-    context->heap += sizeof(Meta);\
-    union Data* data = context->heap; \
-    context->heap += sizeof(t)*length; \
-    meta->type = D_##t; \
-    meta->size = sizeof(t)*length; \
-    meta->len = length; \
-    data;   })
-
-#define ALLOCATE_PTR_ARRAY(context, dseg, length) ({\
-    Meta* meta = (Meta*)context->heap;\
-    context->heap += sizeof(Meta);\
-    union Data* data = context->heap; \
-    context->heap += sizeof(dseg *)*length; \
-    meta->type = D_##dseg; \
-    meta->size = sizeof(dseg *)*length; \
-    meta->len = length; \
-    data; })
-
-#define ALLOCATE_DATA_GEAR(context, t) ({ \
-        union Data* data = ALLOCATE(context, t); \
-        Meta* meta = GET_META(data); \
-        meta->wait = createSynchronizedQueue(context); \
-        data; })
-
-#define ALLOC(context, t) (&ALLOCATE(context, t)->t)
-
-#define GET_META(dseg) ((Meta*)(((void*)dseg) - sizeof(Meta)))
-#define GET_TYPE(dseg) (GET_META(dseg)->type)
-#define GET_SIZE(dseg) (GET_META(dseg)->size)
-#define GET_LEN(dseg) (GET_META(dseg)->len)
-#define GET_WAIT_LIST(dseg) (GET_META(dseg)->wait)
-
-#define Gearef(context, t) (&(context)->data[D_##t]->t)
-
-// (SingleLinkedStack *)context->data[D_Stack]->Stack.stack->Stack.stack
-
-#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name)
-
-#include "c/enumCode.h"
-
-enum Relational {
-    EQ,
-    GT,
-    LT,
-};
-
-#include "c/enumData.h"
-
-struct Context {
-    enum Code next;
-    struct Worker* worker;
-    struct TaskManager* taskManager;
-    int codeNum;
-    __code (**code) (struct Context*);
-    union Data **data;
-    void* heapStart;
-    void* heap;
-    long heapLimit;
-    int dataNum;
-
-    // task parameter
-    int idgCount; //number of waiting dataGear
-    int idg;
-    int maxIdg;
-    int odg;
-    int maxOdg;
-    int gpu; // GPU task
-    struct Context* task;
-    struct Element* taskList;
-#ifdef USE_CUDAWorker
-    int num_exec;
-    CUmodule module;
-    CUfunction function;
-#endif
-    /* multi dimension parameter */
-    int iterate;
-    struct Iterator* iterator;
-    enum Code before;
-};
-
-typedef int Int;
-#ifndef USE_CUDAWorker
-typedef unsigned long long CUdeviceptr;
-#endif
-EOFEOF
-    print $out $str;
-}
-
-sub emit_data_gears {
-  my ($class, $out, $dgs) = @_;
-
-print $out "union Data {\n";
-print $out $dgs;
-print $out  <<'EOF';
-    struct Context Context;
-}; // union Data end       this is necessary for context generator
-typedef union Data Data;
-EOF
-}
-
-
-sub emit_last_header {
-  my($class, $out) = @_;
-  print $out <<'EOF';
-#include "c/typedefData.h"
-
-#include "c/extern.h"
-
-extern __code start_code(struct Context* context);
-extern __code exit_code(struct Context* context);
-extern __code meta(struct Context* context, enum Code next);
-//extern __code par_meta(struct Context* context, enum Code spawns, enum Code next);
-extern __code parGotoMeta(struct Context* context, enum Code next);
-extern void initContext(struct Context* context);
-
-#endif
-EOF
-}
-
-1;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/lib/Gears/Util.pm
--- a/src/parallel_execution/lib/Gears/Util.pm	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,178 +0,0 @@
-package Gears::Util;
-use strict;
-use warnings;
-use Carp qw/croak/;
-use File::Find;
-
-sub parse {
-  my ($class, $file_name) = @_;
-  my $ir = _parse_base($file_name);
-  return $ir;
-}
-
-sub parse_code_verbose {
-  my ($class, $file_name) = @_;
-  my $ir = _parse_base($file_name,1);
-  return $ir;
-}
-
-sub parse_interface {
-  my ($class, $file_name) = @_;
-  my $ir = _parse_base($file_name);
-  
-  unless ($ir->{name}) {
-    croak 'invalid struct name';
-  }
-  return $ir;
-}
-
-
-sub parse_impl {
-  my ($class, $file_name) = @_;
-  my $ir = _parse_base($file_name);
-
-  unless ($ir->{isa} && $ir->{name}) {
-    croak 'invalid struct name';
-  }
-  return $ir;
-}
-
-sub _parse_base {
-  my ($file,$code_verbose) = @_;
-  my $ir  = {};
-
-  Gears::Util->file_checking($file);
-  open my $fh, '<', $file;
-  my $line = <$fh>;
-
-  if ($line =~ /typedef struct (\w+)\s?<.*>([\s\w{]+)/) {
-    die "invalied struct name $1" unless $1;
-    $ir->{name} = $1;
-
-    if ($2 =~ m|\s*impl\s*([\w+]+)\s*{|) {
-      $ir->{isa} = $1;
-    }
-  }
-
-  while ($line = <$fh>) {
-    if ($line =~ m|\s*/\*|) {
-      while ( $line !~ m|\*/|) {
-        $line = <$fh>;
-        next;
-      }
-      next;
-    }
-    next if ($line =~ /^\s+$/);
-    next if ($line =~ m[^\s*//]);
-    next if ($line =~ m[^\}\s*$ir->{name};]);
-
-    if ($line =~ m|__code (\w+)\(([()\.\*\s\w,_]+)\)|) {
-      $line = "enum Code $1;\n";
-    }
-
-    push(@{$ir->{content}},$line);
-  }
-
-  return $ir;
-}
-
-sub parse_with_rewrite {
-  my ($class, $file)  = @_;
-  my $ir = _parse_base($file);
-
-  my @data_gears;
-  my @code_gears;
-  map { push (@data_gears, $_) unless ($_ =~ /enum Code/);} @{$ir->{content}};
-  map { push (@code_gears, $1) if ($_ =~ /enum Code (\w+);/);} @{$ir->{content}};
-
-  open my $fh , '<', $file;
-  my $i = 0;
-  while (($i < scalar @code_gears) && (my $line = <$fh>)) {
-      my $cg = $code_gears[$i];
-      if ($line =~ m|__code $cg\(([()\.\*\s\w,_]+)\)|) {
-        $code_gears[$i] = {
-          name => $cg,
-          args => $1, 
-        };
-        $i++;
-      }
-  }
-  $ir->{codes} = \@code_gears;
-  $ir->{data}  = \@data_gears;
-  return $ir;
-}
-
-sub file_checking {
-  my ($class, $file_name) = @_;
-  unless (-f $file_name) {
-    croak "invalid filepath :$file_name\n";
-  }
-}
-
-sub slup {
-  my ($class,$file) = @_;
-  open my $fh, '<', $file;
-  local $/;
-  my $f = <$fh>;
-  return $f;
-}
-
-sub find_header {
-  my $class = shift;
-  my $header_name = shift;
-
-  my $find_path = shift // ".";
-  my $header_file = '';
-
-  find(
-    {
-      wanted => sub {
-        if ($_ =~ /\/$header_name\.h/) {
-          $header_file = $_;
-        }
-      },
-      no_chdir => 1,
-    },
-    $find_path);
-  return $header_file;
-}
-
-sub find_headers_path {
-  my $class = shift;
-  my $find_path = shift // ".";
-
-  my @files;
-  find( { wanted => sub { push @files, $_ if /\.(?:h|dg)/ }, no_chdir => 1 }, $find_path);
-
-  return \@files;
-}
-
-sub h2context_str {
-  my ($class, $h2context) = @_;
-  my $context = '';
-  my $space = '    ';
-
-  $context =  "${space}struct $h2context->{name} {\n";
-  my $content_space;
-  if (exists $h2context->{content}){
-    my @chars = split //, $h2context->{content}->[0];
-    for my $w (@chars) {
-      last if ($w !~ /\s/);
-      $content_space .= $w;
-    }
-  }
-
-  unless (defined $content_space) {
-    $content_space = "";
-  }
-
-  for my $c (@{$h2context->{content}}) {
-    $c =~ s/$content_space//;
-    $context .= "${space}${space}$c";
-  }
-  $context .= "${space}} $h2context->{name};\n";
-  return $context;
-}
-
-
-1;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/main.cbc
--- a/src/parallel_execution/main.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,110 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include "../context.h"
-
-int cpu_num = 1;
-int length = 102400;
-int split = 8;
-int* array_ptr;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-void *start_taskManager(struct Context *context) {
-    goto initDataGears(context, Gearef(context, LoopCounter), Gearef(context, TaskManager));
-    return 0;
-}
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-    goto meta(context, C_code1);
-}
-
-__code initDataGears_stub(struct Context* context) {
-    struct TaskManager* taskManager =  Gearef(context, TaskManager);
-    taskManager->taskManager = 0;
-    struct LoopCounter* loopCounter = Gearef(context, LoopCounter);
-    goto initDataGears(context, loopCounter, taskManager);
-}
-
-__code code1(struct Time* time) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    printf("length/task:\t%d\n", length/split);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-    time->time = (union Data*)createTimeImpl(context);
-    time->next = C_createTask1;
-    goto meta(context, time->time->Time.start);
-}
-
-__code code2(struct Time* time, struct TaskManager* taskManager) {
-    time->next = C_code3;
-    taskManager->next = time->time->Time.end;
-    goto meta(context, taskManager->taskManager->TaskManager.shutdown);
-}
-
-__code code3(struct LoopCounter* loopCounter) {
-    int i = loopCounter->i;
-
-    if (i < length) {
-        //printf("%d\n", array_ptr[i]);
-        if (array_ptr[i] == (i*2)) {
-            loopCounter->i++;
-            goto meta(context, C_code3);
-        } else
-            puts("wrong result");
-
-    }
-
-    goto meta(context, C_exit_code);
-}
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    Array* array = new Array();
-
-    par goto createArray(array, __exit);
-
-    par goto twice(array, iterate(split), __exit);
-    goto code2();
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-s") == 0)
-            split = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-
-int main(int argc, char** argv) {
-    init(argc, argv);
-
-    array_ptr = NEWN(length, int);
-
-    for(int i=0; i<length; i++)
-        array_ptr[i]=i;
-
-    struct Context* main_context = NEW(struct Context);
-    initContext(main_context);
-    main_context->next = C_initDataGears;
-
-    goto start_code(main_context);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/AtomicReference.h
--- a/src/parallel_execution/plautogen/impl/AtomicReference.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2 +0,0 @@
-typedef struct AtomicReference <Type, Isa> impl Atomic {
-} AtomicReference;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/CUDAExecutor.h
--- a/src/parallel_execution/plautogen/impl/CUDAExecutor.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-typedef struct CUDAExecutor <Type, Isa> impl Executor {
-  CUdeviceptr** kernelParams;
-  struct CUDABuffer* buffer;
-  int maxThreadPerBlock;
-  int maxThreadPerBlockX;
-  int maxThreadPerBlockY;
-  int maxThreadPerBlockZ;
-  struct Timer* timer;
-} CUDAExecutor;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/CUDAWorker.h
--- a/src/parallel_execution/plautogen/impl/CUDAWorker.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,14 +0,0 @@
-typedef struct CUDAWorker <Type, Isa> impl Worker {
-  CUdevice device;
-  CUcontext cuCtx;
-  struct Context* context;
-  int id;
-  int loopCounter;
-  int deviceNum;
-  struct Queue* tasks;
-  int runFlag;
-  __code next(...);
-  int numStream;
-  struct Executor* executor;
-  CUstream *stream;
-} CUDAWorker;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/LockImpl.h
--- a/src/parallel_execution/plautogen/impl/LockImpl.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-typedef struct LockImpl <Type, Isa> impl Lock {
-  Int* lock;
-  struct Queue* waitThreadQueue;
-  struct Atomic* atomic;
-  struct Context* lockContext;
-} LockImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/MultiDimIterator.h
--- a/src/parallel_execution/plautogen/impl/MultiDimIterator.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,9 +0,0 @@
-typedef struct MultiDimIterator <Type, Isa> impl Iterator {
-  int x;
-  int y;
-  int z;
-  int count;
-  int counterX;
-  int counterY;
-  int counterZ;
-} MultiDimIterator;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/RedBlackTree.h
--- a/src/parallel_execution/plautogen/impl/RedBlackTree.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-typedef struct RedBlackTree <Type, Isa> impl Tree {
-  struct Node* root;
-  struct Node* current; // reading node of original tree;
-  struct Node* previous; // parent of reading node of original tree;
-  struct Node* newNode; // writing node of new tree;
-  struct Node* parent;
-  struct Node* grandparent;
-  struct Stack* nodeStack;
-  __code findNodeNext(...);
-  int result;
-} RedBlackTree;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/SemaphoreImpl.h
--- a/src/parallel_execution/plautogen/impl/SemaphoreImpl.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-typedef struct SemaphoreImpl <Type, Isa> impl Semaphore {
-  int value;
-  struct Lock* lock;
-  struct Queue* waitThreadQueue;
-} SemaphoreImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/SingleLinkedQueue.h
--- a/src/parallel_execution/plautogen/impl/SingleLinkedQueue.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-typedef struct SingleLinkedQueue <Type, Isa> impl Queue {
-  struct Element* top;
-  struct Element* last;
-} SingleLinkedQueue;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/SingleLinkedStack.h
--- a/src/parallel_execution/plautogen/impl/SingleLinkedStack.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-typedef struct SingleLinkedStack<Type, Isa> impl Stack {
-  struct Element* top;
-} SingleLinkedStack;
-
-/*
-    // Stack Interface
-    struct Stack {
-        union Data* stack;
-        union Data* data;
-        union Data* data1;
-        enum Code whenEmpty;
-        enum Code clear;
-        enum Code push;
-        enum Code pop;
-        enum Code pop2;
-        enum Code isEmpty;
-        enum Code get;
-        enum Code get2;
-        enum Code next;
-    } Stack;
-    // Stack implementations
-    struct SingleLinkedStack {
-        struct Element* top;
-    } SingleLinkedStack;
-    */
-
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/SpinLock.h
--- a/src/parallel_execution/plautogen/impl/SpinLock.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-typedef struct SpinLock <Type, Isa> impl Lock {
-  volatile Int* lock;
-  struct Atomic* atomic;
-  struct Context* lockContext;
-} SpinLock;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/SynchronizedQueue.h
--- a/src/parallel_execution/plautogen/impl/SynchronizedQueue.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-typedef struct SynchronizedQueue <Type, Isa> impl Queue {
-  struct Element* top;
-  struct Element* last;
-  struct Atomic* atomic;
-} SynchronizedQueue;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/TaskManagerImpl.h
--- a/src/parallel_execution/plautogen/impl/TaskManagerImpl.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-typedef struct TaskManagerImpl <Type, Isa> impl TaskManager {
-  int numWorker;
-  int sendCPUWorkerIndex;
-  int sendGPUWorkerIndex;
-  int taskCount;
-  pthread_mutex_t mutex;
-  struct Queue* activeQueue;
-  struct Worker** workers;
-  struct Element* taskList;
-  int loopCounter;
-  int cpu;
-  int gpu;
-  int io;
-  int maxCPU;
-  __code next(...);
-} TaskManagerImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/impl/TimerImpl.h
--- a/src/parallel_execution/plautogen/impl/TimerImpl.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-typedef struct TimerImpl <Type, Isa> impl Timer {
-  double time;
-} TimerImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Allocate.h
--- a/src/parallel_execution/plautogen/interface/Allocate.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-typedef struct Allocate <Type, Impl> {
-  __code next(...);
-  long size;
-} Allocate;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Array.h
--- a/src/parallel_execution/plautogen/interface/Array.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-typedef struct Array <Type, Impl> {
-  int prefix;
-  struct Integer* array;
-} Array;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/ArrayStack.h
--- a/src/parallel_execution/plautogen/interface/ArrayStack.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-typedef struct ArrayStack <Type, Impl> {
-  int size;
-  int limit;
-  struct Element* array;
-} ArrayStack;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/BoundedBuffer.h
--- a/src/parallel_execution/plautogen/interface/BoundedBuffer.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-typedef struct BoundedBuffer <Type, Impl> {
-  struct Element* top;
-  struct Element* last;
-  struct Semaphore* fullCount;
-  struct Semaphore* emptyCount;
-  struct Semaphore* lock;
-} BoundedBuffer;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/CPUWorker.h
--- a/src/parallel_execution/plautogen/interface/CPUWorker.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,7 +0,0 @@
-typedef struct CPUWorker <Type, Impl> {
-  pthread_mutex_t mutex;
-  pthread_cond_t cond;
-  struct Context* context;
-  int id;
-  int loopCounter;
-} CPUWorker;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/CUDABuffer.h
--- a/src/parallel_execution/plautogen/interface/CUDABuffer.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-typedef struct CUDABuffer <Type, Impl> {
-  int inputLen;
-  int outputLen;
-  union Data** inputData;
-  union Data** outputData;
-} CUDABuffer;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Element.h
--- a/src/parallel_execution/plautogen/interface/Element.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,4 +0,0 @@
-typedef struct Element <Type, Impl> {
-  union Data* data;
-  struct Element* next;
-} Element;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Integer.h
--- a/src/parallel_execution/plautogen/interface/Integer.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-typedef struct Integer <Type, Impl> {
-  int value;
-} Integer;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/LoopCounter.h
--- a/src/parallel_execution/plautogen/interface/LoopCounter.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-typedef struct LoopCounter <Type, Impl> {
-  int i;
-} LoopCounter;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Main.h
--- a/src/parallel_execution/plautogen/interface/Main.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-typedef struct Main <Type, Impl> {
-  __code code(...);
-  __code next(...);
-  struct Queue* args;
-} Main;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Memory.h
--- a/src/parallel_execution/plautogen/interface/Memory.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-typedef struct Memory <Type, Impl> {
-  union Data* adr;
-  int length;
-  union Data* body;
-  int hash;
-} Memory;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Meta.h
--- a/src/parallel_execution/plautogen/interface/Meta.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-typedef struct Meta <Type, Impl> {
-  enum DataType type;
-  long size;
-  long len;
-  struct Queue* wait; // tasks waiting this dataGear;
-} Meta;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/MultiDim.h
--- a/src/parallel_execution/plautogen/interface/MultiDim.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-typedef struct MultiDim <Type, Impl> {
-  int x;
-  int y;
-  int z;
-} MultiDim;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/Node.h
--- a/src/parallel_execution/plautogen/interface/Node.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,12 +0,0 @@
-typedef struct Node <Type, Impl> {
-  int key; // comparable data segment;
-  union Data* value;
-  struct Node* left;
-  struct Node* right;
-          // need to balancing
-  enum Color {
-      Red,
-      Black,
-      // Red eq 0,Black eq 1. enum name convert intager.
-  }color;
-} Node;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/plautogen/interface/RotateTree.h
--- a/src/parallel_execution/plautogen/interface/RotateTree.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-typedef struct RotateTree <Type, Impl> {
-  __code next(...);
-  struct RedBlackTree* traverse;
-  struct Tree* tree;
-} RotateTree;
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/stack.agda
--- a/src/parallel_execution/stack.agda	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,255 +0,0 @@
-open import Level renaming (suc to succ ; zero to Zero )
-module stack  where
-
-open import Relation.Binary.PropositionalEquality
-open import Relation.Binary.Core
-open import Data.Nat
-
-ex : 1 + 2 ≡ 3
-ex = refl
-
-data Bool {n : Level } : Set n where
-  True  : Bool
-  False : Bool
-
-record _∧_ {n : Level } (a : Set n) (b : Set n): Set n where
-  field
-    pi1 : a
-    pi2 : b
-
-data Maybe {n : Level } (a : Set n) : Set n where
-  Nothing : Maybe a
-  Just    : a -> Maybe a
-
-record StackMethods {n m : Level } (a : Set n ) {t : Set m }(stackImpl : Set n ) : Set (m Level.⊔ n) where
-  field
-    push : stackImpl -> a -> (stackImpl -> t) -> t
-    pop  : stackImpl -> (stackImpl -> Maybe a -> t) -> t
-    pop2 : stackImpl -> (stackImpl -> Maybe a -> Maybe a -> t) -> t
-    get  : stackImpl -> (stackImpl -> Maybe a -> t) -> t
-    get2 : stackImpl -> (stackImpl -> Maybe a -> Maybe a -> t) -> t
-open StackMethods
-
-record Stack {n m : Level } (a : Set n ) {t : Set m } (si : Set n ) : Set (m Level.⊔ n) where
-  field
-    stack : si
-    stackMethods : StackMethods {n} {m} a {t} si
-  pushStack :  a -> (Stack a si -> t) -> t
-  pushStack d next = push (stackMethods ) (stack ) d (\s1 -> next (record {stack = s1 ; stackMethods = stackMethods } ))
-  popStack : (Stack a si -> Maybe a  -> t) -> t
-  popStack next = pop (stackMethods ) (stack ) (\s1 d1 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 )
-  pop2Stack :  (Stack a si -> Maybe a -> Maybe a -> t) -> t
-  pop2Stack next = pop2 (stackMethods ) (stack ) (\s1 d1 d2 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 d2)
-  getStack :  (Stack a si -> Maybe a  -> t) -> t
-  getStack next = get (stackMethods ) (stack ) (\s1 d1 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 )
-  get2Stack :  (Stack a si -> Maybe a -> Maybe a -> t) -> t
-  get2Stack next = get2 (stackMethods ) (stack ) (\s1 d1 d2 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 d2)
-
-open Stack
-
-data Element {n : Level } (a : Set n) : Set n where
-  cons : a -> Maybe (Element a) -> Element a
-
-datum : {n : Level } {a : Set n} -> Element a -> a
-datum (cons a _) = a
-
-next : {n : Level } {a : Set n} -> Element a -> Maybe (Element a)
-next (cons _ n) = n
-
-
-{-
--- cannot define recrusive record definition. so use linked list with maybe.
-record Element {l : Level} (a : Set n l) : Set n (suc l) where
-  field
-    datum : a  -- `data` is reserved by Agda.
-    next : Maybe (Element a)
--}
-
-
-
-record SingleLinkedStack {n : Level } (a : Set n) : Set n where
-  field
-    top : Maybe (Element a)
-open SingleLinkedStack
-
-pushSingleLinkedStack : {n m : Level } {t : Set m } {Data : Set n} -> SingleLinkedStack Data -> Data -> (Code : SingleLinkedStack Data -> t) -> t
-pushSingleLinkedStack stack datum next = next stack1
-  where
-    element = cons datum (top stack)
-    stack1  = record {top = Just element}
-
-
-popSingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> t) -> t
-popSingleLinkedStack stack cs with (top stack)
-...                                | Nothing = cs stack  Nothing
-...                                | Just d  = cs stack1 (Just data1)
-  where
-    data1  = datum d
-    stack1 = record { top = (next d) }
-
-pop2SingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
-pop2SingleLinkedStack {n} {m} {t} {a} stack cs with (top stack)
-...                                | Nothing = cs stack Nothing Nothing
-...                                | Just d = pop2SingleLinkedStack' {n} {m} stack cs
-  where
-    pop2SingleLinkedStack' : {n m : Level } {t : Set m }  -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
-    pop2SingleLinkedStack' stack cs with (next d)
-    ...              | Nothing = cs stack Nothing Nothing
-    ...              | Just d1 = cs (record {top = (next d1)}) (Just (datum d)) (Just (datum d1))
-    
-
-getSingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> t) -> t
-getSingleLinkedStack stack cs with (top stack)
-...                                | Nothing = cs stack  Nothing
-...                                | Just d  = cs stack (Just data1)
-  where
-    data1  = datum d
-
-get2SingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
-get2SingleLinkedStack {n} {m} {t} {a} stack cs with (top stack)
-...                                | Nothing = cs stack Nothing Nothing
-...                                | Just d = get2SingleLinkedStack' {n} {m} stack cs
-  where
-    get2SingleLinkedStack' : {n m : Level} {t : Set m } -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
-    get2SingleLinkedStack' stack cs with (next d)
-    ...              | Nothing = cs stack Nothing Nothing
-    ...              | Just d1 = cs stack (Just (datum d)) (Just (datum d1))
-
-
-
-emptySingleLinkedStack : {n : Level } {a : Set n} -> SingleLinkedStack a
-emptySingleLinkedStack = record {top = Nothing}
-
------
--- Basic stack implementations are specifications of a Stack
---
-singleLinkedStackSpec : {n m : Level } {t : Set m } {a : Set n} -> StackMethods {n} {m} a {t} (SingleLinkedStack a)
-singleLinkedStackSpec = record {
-                                   push = pushSingleLinkedStack
-                                 ; pop  = popSingleLinkedStack
-                                 ; pop2 = pop2SingleLinkedStack
-                                 ; get  = getSingleLinkedStack
-                                 ; get2 = get2SingleLinkedStack
-                           }
-
-createSingleLinkedStack : {n m : Level } {t : Set m } {a : Set n} -> Stack {n} {m} a {t} (SingleLinkedStack a)
-createSingleLinkedStack = record {
-                             stack = emptySingleLinkedStack ;
-                             stackMethods = singleLinkedStackSpec 
-                           }
-
-----
---
--- proof of properties ( concrete cases )
---
-
-test01 : {n : Level } {a : Set n} -> SingleLinkedStack a -> Maybe a -> Bool {n}
-test01 stack _ with (top stack)
-...                  | (Just _) = True
-...                  | Nothing  = False
-
-
-test02 : {n : Level } {a : Set n} -> SingleLinkedStack a -> Bool
-test02 stack = popSingleLinkedStack stack test01
-
-test03 : {n : Level } {a : Set n} -> a ->  Bool
-test03 v = pushSingleLinkedStack emptySingleLinkedStack v test02
-
--- after a push and a pop, the stack is empty
-lemma : {n : Level} {A : Set n} {a : A} -> test03 a ≡ False
-lemma = refl
-
-testStack01 : {n m : Level } {a : Set n} -> a -> Bool {m}
-testStack01 v = pushStack createSingleLinkedStack v (
-   \s -> popStack s (\s1 d1 -> True))
-
--- after push 1 and 2, pop2 get 1 and 2
-
-testStack02 : {m : Level } ->  ( Stack  ℕ (SingleLinkedStack ℕ) -> Bool {m} ) -> Bool {m}
-testStack02 cs = pushStack createSingleLinkedStack 1 (
-   \s -> pushStack s 2 cs)
-
-
-testStack031 : (d1 d2 : ℕ ) -> Bool {Zero}
-testStack031 2 1 = True
-testStack031 _ _ = False
-
-testStack032 : (d1 d2 : Maybe ℕ) -> Bool {Zero}
-testStack032  (Just d1) (Just d2) = testStack031 d1 d2
-testStack032  _ _ = False
-
-testStack03 : {m : Level } -> Stack  ℕ (SingleLinkedStack ℕ) -> ((Maybe ℕ) -> (Maybe ℕ) -> Bool {m} ) -> Bool {m}
-testStack03 s cs = pop2Stack s (
-   \s d1 d2 -> cs d1 d2 )
-
-testStack04 : Bool
-testStack04 = testStack02 (\s -> testStack03 s testStack032)
-
-testStack05 : testStack04 ≡ True
-testStack05 = refl
-
-------
---
--- proof of properties with indefinite state of stack
---
--- this should be proved by properties of the stack inteface, not only by the implementation,
---    and the implementation have to provides the properties.
---
---    we cannot write "s ≡ s3", since level of the Set does not fit , but use stack s ≡ stack s3 is ok.
---    anyway some implementations may result s != s3
---  
-
-stackInSomeState : {l m : Level } {D : Set l} {t : Set m } (s : SingleLinkedStack D ) -> Stack {l} {m} D {t}  ( SingleLinkedStack  D )
-stackInSomeState s =  record { stack = s ; stackMethods = singleLinkedStackSpec }
-
-push->push->pop2 : {l : Level } {D : Set l} (x y : D ) (s : SingleLinkedStack D ) ->
-    pushStack ( stackInSomeState s )  x ( \s1 -> pushStack s1 y ( \s2 -> pop2Stack s2 ( \s3 y1 x1 -> (Just x ≡ x1 ) ∧ (Just y ≡ y1 ) ) ))
-push->push->pop2 {l} {D} x y s = record { pi1 = refl ; pi2 = refl }
-
-
-id : {n : Level} {A : Set n} -> A -> A
-id a = a
-
--- push a, n times
-
-n-push : {n : Level} {A : Set n} {a : A} -> ℕ -> SingleLinkedStack A -> SingleLinkedStack A
-n-push zero s            = s
-n-push {l} {A} {a} (suc n) s = pushSingleLinkedStack (n-push {l} {A} {a} n s) a (\s -> s ) 
-
-n-pop :  {n : Level}{A : Set n} {a : A} -> ℕ -> SingleLinkedStack A -> SingleLinkedStack A
-n-pop zero    s         = s
-n-pop  {_} {A} {a} (suc n) s = popSingleLinkedStack (n-pop {_} {A} {a} n s) (\s _ -> s )
-
-open ≡-Reasoning
-
-push-pop-equiv : {n : Level} {A : Set n} {a : A} (s : SingleLinkedStack A) -> (popSingleLinkedStack (pushSingleLinkedStack s a (\s -> s)) (\s _ -> s) ) ≡ s
-push-pop-equiv s = refl
-
-push-and-n-pop : {n : Level} {A : Set n} {a : A} (n : ℕ) (s : SingleLinkedStack A) -> n-pop {_} {A} {a} (suc n) (pushSingleLinkedStack s a id) ≡ n-pop {_} {A} {a} n s
-push-and-n-pop zero s            = refl
-push-and-n-pop {_} {A} {a} (suc n) s = begin
-   n-pop {_} {A} {a} (suc (suc n)) (pushSingleLinkedStack s a id)
-  ≡⟨ refl ⟩
-   popSingleLinkedStack (n-pop {_} {A} {a} (suc n) (pushSingleLinkedStack s a id)) (\s _ -> s)
-  ≡⟨ cong (\s -> popSingleLinkedStack s (\s _ -> s )) (push-and-n-pop n s) ⟩ 
-   popSingleLinkedStack (n-pop {_} {A} {a} n s) (\s _ -> s)
-  ≡⟨ refl ⟩
-    n-pop {_} {A} {a} (suc n) s
-  ∎
-  
-
-n-push-pop-equiv : {n : Level} {A : Set n} {a : A} (n : ℕ) (s : SingleLinkedStack A) -> (n-pop {_} {A} {a} n (n-push {_} {A} {a} n s)) ≡ s
-n-push-pop-equiv zero s            = refl
-n-push-pop-equiv {_} {A} {a} (suc n) s = begin
-    n-pop {_} {A} {a} (suc n) (n-push (suc n) s)
-  ≡⟨ refl ⟩
-    n-pop {_} {A} {a} (suc n) (pushSingleLinkedStack (n-push n s) a (\s -> s))
-  ≡⟨ push-and-n-pop n (n-push n s)  ⟩
-    n-pop {_} {A} {a} n (n-push n s)
-  ≡⟨ n-push-pop-equiv n s ⟩
-    s
-  ∎
-
-
-n-push-pop-equiv-empty : {n : Level} {A : Set n} {a : A} -> (n : ℕ) -> n-pop {_} {A} {a} n (n-push {_} {A} {a} n emptySingleLinkedStack)  ≡ emptySingleLinkedStack
-n-push-pop-equiv-empty n = n-push-pop-equiv n emptySingleLinkedStack
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/test/cbc_stack_test.c
--- a/src/parallel_execution/test/cbc_stack_test.c	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,62 +0,0 @@
-#include "../context.h"
-#include <assert.h>
-
-__code stack_test1(struct Context* context, struct Task* task, struct Stack* stack) {
-    task->code = C_stack_test1;
-    stack->next = C_stack_test2;
-    stack->data = (union Data*)task;
-    goto meta(context, stack->stack->Stack.push);
-}
-
-__code stack_test1_stub(struct Context* context) {
-    Task* task = &ALLOCATE(context, Task)->Task;
-    struct Stack* stack = &(createSingleLinkedStack(context)->Stack);
-    assert(stack->stack->SingleLinkedStack.top == NULL);
-    context->data[D_Stack]->Stack.stack = (union Data*)stack;
-    goto stack_test1(context,
-            task,
-            &context->data[D_Stack]->Stack);
-}
-
-__code stack_test2(struct Context* context, struct Task* task, struct Stack* stack) {
-    task->code = C_stack_test2;
-    stack->next = C_stack_test3;
-    stack->data = (union Data*)task;
-    goto meta(context, stack->stack->Stack.push);
-}
-
-__code stack_test2_stub(struct Context* context) {
-    assert(context->data[D_Stack]->Stack.stack->Stack.stack->SingleLinkedStack.top->data->Task.code == C_stack_test1);
-    Task* task = &ALLOCATE(context, Task)->Task;
-    goto stack_test2(context,
-            task,
-            &context->data[D_Stack]->Stack);
-}
-
-__code stack_test3(struct Context* context, struct Stack* stack) {
-    stack->next = C_stack_test4;
-    goto meta(context, stack->stack->Stack.pop);
-}
-
-__code stack_test3_stub(struct Context* context) {
-    assert(context->data[D_Stack]->Stack.stack->Stack.stack->SingleLinkedStack.top->data->Task.code == C_stack_test2);
-    goto stack_test3(context,
-            &context->data[D_Stack]->Stack);
-}
-
-__code stack_test4(struct Context* context) {
-    goto meta(context, C_exit_code);
-}
-
-__code stack_test4_stub(struct Context* context) {
-    assert(context->data[D_Stack]->Stack.data->Task.code == C_stack_test2);
-    assert(context->data[D_Stack]->Stack.stack->Stack.stack->SingleLinkedStack.top->data->Task.code == C_stack_test1);
-    goto stack_test4(context);
-}
-
-int main(int argc, char const* argv[]) {
-    struct Context* main_context = NEW(struct Context);
-    initContext(main_context);
-    main_context->next = C_stack_test1;
-    goto start_code(main_context);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/test/multiDimIterator_test.cbc
--- a/src/parallel_execution/test/multiDimIterator_test.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,78 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-#include <unistd.h>
-#interface "TaskManager.h"
-
-#include "../../context.h"
-
-int cpu_num = 1;
-int length = 1;
-int gpu_num = 0;
-int CPU_ANY = -1;
-int CPU_CUDA = -1;
-
-__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    // loopCounter->tree = createRedBlackTree(context);
-    loopCounter->i = 0;
-    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
-    goto code1();
-}
-
-__code code1(struct LoopCounter* loopCounter) {
-    printf("cpus:\t\t%d\n", cpu_num);
-    printf("gpus:\t\t%d\n", gpu_num);
-    printf("length:\t\t%d\n", length);
-    /* puts("queue"); */
-    /* print_queue(context->data[ActiveQueue]->queue.first); */
-    /* puts("tree"); */
-    /* print_tree(context->data[Tree]->tree.root); */
-    /* puts("result"); */
-
-    goto createTask1();
-}
-
-__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
-    int i = loopCounter->i;
-
-    if (i < length) {
-        loopCounter->i++;
-        goto createTask2();
-    }
-
-    loopCounter->i = 0;
-    taskManager->next = C_exit_code;
-    goto code2();
-}
-
-__code code2(struct TaskManager* taskManager) {
-    goto taskManager->shutdown(exit_code);
-}
-
-__code code2_stub(struct Context* context) {
-    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
-}
-
-__code createTask2(struct TaskManager* taskManager) {
-    par goto printIterator(iterate(2), __exit);
-    par goto printIterator(iterate(2, 2), __exit);
-    par goto printIterator(iterate(2, 2, 2), __exit);
-    goto createTask1();
-}
-
-void init(int argc, char** argv) {
-    for (int i = 1; argv[i]; ++i) {
-        if (strcmp(argv[i], "-cpu") == 0)
-            cpu_num = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-l") == 0)
-            length = (int)atoi(argv[i+1]);
-        else if (strcmp(argv[i], "-cuda") == 0) {
-            gpu_num = 1;
-            CPU_CUDA = 0;
-        }
-    }
-}
-
-int main(int argc, char** argv) {
-    goto initDataGears();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/test/printIterator.cbc
--- a/src/parallel_execution/test/printIterator.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-#include "../../context.h"
-#include <stdio.h>
-__code printIterator(struct MultiDim* multiDim, __code next(...)) {
-    printf("x: %d, y: %d, z: %d\n", multiDim->x, multiDim->y, multiDim->z);
-    goto next(...);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/test/queue_test.cbc
--- a/src/parallel_execution/test/queue_test.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,51 +0,0 @@
-#include "../../context.h"
-#interface "Queue.h"
-#include <assert.h>
-
-__code queueTest1(struct Queue* queue) {
-    Node* node = new Node();
-    node->color = Red;
-    goto queue->put(node, queueTest2);
-}
-
-__code queueTest1_stub(struct Context* context) {
-    Queue* queue = createSingleLinkedQueue(context);
-    goto queueTest1(context, queue);
-}
-
-__code queueTest2(struct Queue* queue) {
-    Node* node = new Node();
-    node->color = Black;
-    goto queue->put(node, queueTest3);
-}
-
-__code queueTest2_stub(struct Context* context) {
-    SingleLinkedQueue* singleLinkedQueue = (SingleLinkedQueue*)GearImpl(context, Queue, queue);
-    assert(singleLinkedQueue->top->next->data->Node.color == Red);
-    assert(singleLinkedQueue->last->data->Node.color == Red);
-    Queue* queue = (struct Queue*)Gearef(context, Queue)->queue;
-    goto queueTest2(context, queue);
-}
-
-__code queueTest3(struct Queue* queue) {
-    goto queue->take(assert3);
-}
-
-__code queueTest3_stub(struct Context* context) {
-    SingleLinkedQueue* singleLinkedQueue = (SingleLinkedQueue*)GearImpl(context, Queue, queue);
-    assert(singleLinkedQueue->top->next->data->Node.color == Red);
-    assert(singleLinkedQueue->last->data->Node.color == Black);
-    Queue* queue = (struct Queue*)Gearef(context, Queue)->queue;
-    goto queueTest3(context, queue);
-}
-
-__code assert3(struct Queue* queue) {
-    SingleLinkedQueue* singleLinkedQueue = &queue->queue->Queue.queue->SingleLinkedQueue;
-    assert(singleLinkedQueue->top->next->data->Node.color == Black);
-    assert(singleLinkedQueue->last->data->Node.color == Black);
-    goto exit_code(context);
-}
-
-int main(int argc, char const* argv[]) {
-    goto queueTest1();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/test/rbTree_test.cbc
--- a/src/parallel_execution/test/rbTree_test.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,87 +0,0 @@
-#include <stdio.h>
-#include "../../context.h"
-#interface "Tree.h"
-
-/* #include <assert.h> */
-
-__code rbTreeTest1(struct Tree* tree) {
-  printf("Test1\n");
-  Node* node = new Node();
-  node->value = (union Data*)new Int();
-  node->value->Int = 3;
-  node->key = 3;
-  printf("value->%d,key->%d\n",node->value->Int,node->key);
-  goto tree->put(node, rbTreeTest2);
-}
-
-__code rbTreeTest1_stub(struct Context* context) {
-  printf("test1_stub\n");
-  Tree* tree = createRedBlackTree(context);
-  goto rbTreeTest1(context,tree);
-}
-
-
-__code rbTreeTest2(struct Tree* tree) {
-  printf("Test2\n");
-  Node* node = new Node();
-  node->value = (union Data*)new Int();
-  node->value->Int = 4;
-  node->key = 4;
-  goto tree->put(node, rbTreeTest3);
-}
-
-__code rbTreeTest2_stub(struct Context* context) {
-  printf("test2_stub\n");
-  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
-  goto rbTreeTest2(context,tree);
-}
-
-
-__code rbTreeTest3(struct Tree* tree) {
-  printf("test3\n");
-  Node* node = new Node();
-  node->value = (union Data*)new Int();
-  node->value->Int = 2;
-  node->key = 2;
-  goto tree->put(node, rbTreeTest4);
-}
-
-__code rbTreeTest3_stub(struct Context* context) {
-  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
-  goto rbTreeTest3(context,tree);
-}
-
-__code rbTreeTest4(struct Tree* tree) {
-  printf("test4\n");
-  Node* node = new Node();
-  node->value = (union Data*)new Int();
-  node->value->Int = 8;
-  node->key = 8;
-  goto tree->put(node, rbTreeTest5);
-}
-
-__code rbTreeTest4_stub(struct Context* context) {
-  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
-  goto rbTreeTest4(context,tree);
-}
-
-__code rbTreeTest5(struct Tree* tree) {
-  printf("test5\n");
-  Node* node = new Node();
-  node->value = (union Data*)new Int();
-  node->value->Int = 8;
-  node->key = 8;
-  goto tree->remove(node,exit_code);
-}
-
-__code rbTreeTest5_stub(struct Context* context) {
-  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
-  goto rbTreeTest5(context,tree);
-}
-
-
-
-int main(int argc, char const* argv[]) {
-  printf("test_main\n");
-  goto rbTreeTest1();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/test/stack_test.cbc
--- a/src/parallel_execution/test/stack_test.cbc	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,54 +0,0 @@
-#include "../../context.h"
-#interface "Stack.h"
-#include <assert.h>
-// use "TaskManager.h"
-
-__code stackTest1(struct Stack* stack) {
-    Node* node = new Node();
-    node->color = Red;
-    goto stack->push(node, stackTest2);
-}
-
-__code stackTest1_stub(struct Context* context) {
-    Stack* stack = createSingleLinkedStack(context);
-    goto stackTest1(context, stack);
-}
-
-__code stackTest2(struct Stack* stack) {
-    Node* node = new Node();
-    node->color = Black;
-    goto stack->push(node, stackTest3);
-}
-
-__code stackTest2_stub(struct Context* context) {
-    SingleLinkedStack* singleLinkedStack = (SingleLinkedStack*)GearImpl(context, Stack, stack);
-    assert(singleLinkedStack->top->data->Node.color == Red);
-    Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
-    goto stackTest2(context, stack);
-}
-
-__code stackTest3(struct Stack* stack) {
-    goto stack->pop(assert3);
-}
-
-__code stackTest3_stub(struct Context* context) {
-    /*
-        assert on stack implementation
-    */
-    SingleLinkedStack* singleLinkedStack = (SingleLinkedStack*)GearImpl(context, Stack, stack);
-    assert(singleLinkedStack->top->data->Node.color == Black);
-    Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
-    goto stackTest3(context, stack);
-} 
-
-__code assert3(struct Node* node, struct Stack* stack) {
-    /*
-        assert in normal level
-    */
-    assert(node->color == Red);
-    goto exit_code(0);
-}
-
-int main(int argc, char const* argv[]) {
-    goto stackTest1();
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/tmp_tool/_orig_context.h
--- a/src/parallel_execution/tmp_tool/_orig_context.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,454 +0,0 @@
-/* Context definition for llrb example */
-#ifndef CONTEXT_H
-#define CONTEXT_H
-#include <stdlib.h>
-#include <pthread.h>
-#ifdef USE_CUDAWorker
-#include <cuda.h>
-#include <driver_types.h>
-#include <cuda_runtime.h>
-#include "helper_cuda.h"
-#endif
-
-#define ALLOCATE_SIZE 20000000
-#define NEW(type) (type*)(calloc(1, sizeof(type)))
-#define NEWN(n, type) (type*)(calloc(n, sizeof(type)))
-
-#define ALLOC_DATA(context, dseg) ({\
-    Meta* meta = (Meta*)context->heap;\
-    meta->type = D_##dseg;\
-    meta->size = sizeof(dseg);\
-    meta->len = 1;\
-    context->heap += sizeof(Meta);\
-    context->data[D_##dseg] = context->heap; context->heap += sizeof(dseg); (dseg *)context->data[D_##dseg]; })
-
-#define ALLOC_DATA_TYPE(context, dseg, t) ({\
-    Meta* meta = (Meta*)context->heap;\
-    meta->type = D_##t;\
-    meta->size = sizeof(t);\
-    meta->len = 1;\
-    context->heap += sizeof(Meta);\
-    context->data[D_##dseg] = context->heap; context->heap += sizeof(t); (t *)context->data[D_##dseg]; })
-
-#define ALLOCATE(context, t) ({ \
-    Meta* meta = (Meta*)context->heap;\
-    context->heap += sizeof(Meta);\
-    union Data* data = context->heap; \
-    context->heap += sizeof(t); \
-    meta->type = D_##t; \
-    meta->size = sizeof(t);     \
-    meta->len = 1;\
-    data; })
-
-#define ALLOCATE_ARRAY(context, t, length) ({ \
-    Meta* meta = (Meta*)context->heap;\
-    context->heap += sizeof(Meta);\
-    union Data* data = context->heap; \
-    context->heap += sizeof(t)*length; \
-    meta->type = D_##t; \
-    meta->size = sizeof(t)*length; \
-    meta->len = length; \
-    data;   })
-
-#define ALLOCATE_PTR_ARRAY(context, dseg, length) ({\
-    Meta* meta = (Meta*)context->heap;\
-    context->heap += sizeof(Meta);\
-    union Data* data = context->heap; \
-    context->heap += sizeof(dseg *)*length; \
-    meta->type = D_##dseg; \
-    meta->size = sizeof(dseg *)*length; \
-    meta->len = length; \
-    data; })
-
-#define ALLOCATE_DATA_GEAR(context, t) ({ \
-        union Data* data = ALLOCATE(context, t); \
-        Meta* meta = GET_META(data); \
-        meta->wait = createSynchronizedQueue(context); \
-        data; })
-
-#define ALLOC(context, t) (&ALLOCATE(context, t)->t)
-
-#define GET_META(dseg) ((Meta*)(((void*)dseg) - sizeof(Meta)))
-#define GET_TYPE(dseg) (GET_META(dseg)->type)
-#define GET_SIZE(dseg) (GET_META(dseg)->size)
-#define GET_LEN(dseg) (GET_META(dseg)->len)
-#define GET_WAIT_LIST(dseg) (GET_META(dseg)->wait)
-
-#define Gearef(context, t) (&(context)->data[D_##t]->t)
-
-// (SingleLinkedStack *)context->data[D_Stack]->Stack.stack->Stack.stack
-
-#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name)
-
-#include "c/enumCode.h"
-
-enum Relational {
-    EQ,
-    GT,
-    LT,
-};
-
-#include "c/enumData.h"
-
-struct Context {
-    enum Code next;
-    struct Worker* worker;
-    struct TaskManager* taskManager;
-    int codeNum;
-    __code (**code) (struct Context*);
-    union Data **data;
-    void* heapStart;
-    void* heap;
-    long heapLimit;
-    int dataNum;
-
-    // task parameter
-    int idgCount; //number of waiting dataGear
-    int idg;
-    int maxIdg;
-    int odg;
-    int maxOdg;
-    int gpu; // GPU task
-    struct Context* task;
-    struct Element* taskList;
-#ifdef USE_CUDAWorker
-    int num_exec;
-    CUmodule module;
-    CUfunction function;
-#endif
-    /* multi dimension parameter */
-    int iterate;
-    struct Iterator* iterator;
-    enum Code before;
-};
-
-typedef int Int;
-#ifndef USE_CUDAWorker
-typedef unsigned long long CUdeviceptr;
-#endif
-union Data {
-    struct Meta {
-        enum DataType type;
-        long size;
-        long len;
-        struct Queue* wait; // tasks waiting this dataGear
-    } Meta;
-    struct Context Context;
-    struct Timer {
-        union Data* timer;
-        enum Code start;
-        enum Code end;
-        enum Code next;
-    } Timer;
-    struct TimerImpl {
-        double time;
-    } TimerImpl;
-    struct LoopCounter {
-        int i;
-    } LoopCounter;
-    struct TaskManager {
-        union Data* taskManager;
-        enum Code spawn;      // start NEW context on the worker
-        enum Code spawnTasks; // start NEW tasks on the worker
-        enum Code shutdown;
-        enum Code incrementTaskCount;
-        enum Code decrementTaskCount;
-        enum Code next;
-        enum Code next1;
-        enum Code setWaitTask;
-        struct Context* task;
-        struct Element* taskList;
-        union Data* data;
-    } TaskManager;
-    struct TaskManagerImpl {
-        enum Code next;
-        int numWorker;
-        int sendCPUWorkerIndex;
-        int sendGPUWorkerIndex;
-        int taskCount;
-        pthread_mutex_t mutex;
-        struct Queue* activeQueue;
-        struct Worker** workers;
-        struct Element* taskList;
-        int loopCounter;
-        int cpu;
-        int gpu;
-        int io;
-        int maxCPU;
-    } TaskManagerImpl;
-    struct Worker {
-        union Data* worker;
-        enum Code taskReceive;
-        enum Code shutdown;
-        enum Code next;
-        struct Queue* tasks;
-        pthread_t thread;
-        struct TaskManager* taskManager;
-        struct Context* task;
-    } Worker;
-    struct CPUWorker {
-        pthread_mutex_t mutex;
-        pthread_cond_t cond;
-        struct Context* context;
-        int id;
-        int loopCounter;
-    } CPUWorker;
-#ifdef USE_CUDAWorker
-    struct CUDAWorker {
-        CUdevice device;
-        CUcontext cuCtx;
-        struct Context* context;
-        int id;
-        int loopCounter;
-        int deviceNum;
-        struct Queue* tasks;
-        int runFlag;
-        enum Code next;
-        int numStream;
-        struct Executor* executor;
-        CUstream *stream;
-    } CUDAWorker;
-#else
-    struct CUDAWorker {
-    } CUDAWorker;
-#endif
-    struct Main {
-        enum Code code;
-        enum Code next;
-        struct Queue* args;
-    } Main;
-    // Queue Interface
-    struct Queue {
-        union Data* queue;
-        union Data* data;
-        enum Code whenEmpty;
-        enum Code clear;
-        enum Code put;
-        enum Code take;
-        enum Code isEmpty;
-        enum Code next;
-    } Queue;
-    struct SingleLinkedQueue {
-        struct Element* top;
-        struct Element* last;
-    } SingleLinkedQueue;
-    struct SynchronizedQueue {
-        struct Element* top;
-        struct Element* last;
-        struct Atomic* atomic;
-    } SynchronizedQueue;
-    // Stack Interface
-    struct Stack {
-        union Data* stack;
-        union Data* data;
-        union Data* data1;
-        enum Code whenEmpty;
-        enum Code clear;
-        enum Code push;
-        enum Code pop;
-        enum Code pop2;
-        enum Code isEmpty;
-        enum Code get;
-        enum Code get2;
-        enum Code next;
-    } Stack;
-    // Stack implementations
-    struct SingleLinkedStack {
-        struct Element* top;
-    } SingleLinkedStack;
-    struct ArrayStack {
-        int size;
-        int limit;
-        struct Element* array;
-    } ArrayStack;
-    // Stack implementation end
-    struct Element {
-        union Data* data;
-        struct Element* next;
-    } Element;
-    struct Array {
-        int prefix;
-        Int* array;
-    } Array;
-    struct Tree {
-        union Data* tree;
-        struct Node* node;
-        enum Code put;
-        enum Code get;
-        enum Code remove;
-        enum Code clear;
-        enum Code next;
-    } Tree;
-    struct RedBlackTree {
-        struct Node* root;
-        struct Node* current; // reading node of original tree
-        struct Node* previous; // parent of reading node of original tree
-        struct Node* newNode; // writing node of new tree
-        struct Node* parent;
-        struct Node* grandparent;
-        struct Stack* nodeStack;
-        enum Code findNodeNext;
-        int result;
-    } RedBlackTree;
-    struct RotateTree {
-        enum Code next;
-        struct RedBlackTree* traverse;
-        struct Tree* tree;
-    } RotateTree;
-    struct Node {
-        int key; // comparable data segment
-        union Data* value;
-        struct Node* left;
-        struct Node* right;
-        // need to balancing
-        enum Color {
-            Red,
-            Black,
-            // Red eq 0,Black eq 1. enum name convert intager.
-        } color;
-    } Node;
-    struct Atomic {
-        union Data* atomic;
-        union Data** ptr;
-        union Data* oldData;
-        union Data* newData;
-        enum Code checkAndSet;
-        enum Code next;
-        enum Code fail;
-    } Atomic;
-    struct AtomicReference {
-    } AtomicReference;
-    struct Semaphore {
-        union Data* semaphore;
-        enum Code p;
-        enum Code v;
-        enum Code next;
-    } Semaphore;
-    struct SemaphoreImpl {
-        int value;
-        struct Lock* lock;
-        struct Queue* waitThreadQueue;
-    } SemaphoreImpl;
-    struct Allocate {
-        enum Code next;
-        long size;
-    } Allocate;
-    struct Integer {
-        int value;
-    } Integer;
-    struct SortArray {
-        union Data* sortArray;
-        struct Integer *array; //Array arrayじゃできない？
-        int loopCounter;
-        int block;
-        int first;
-        int prefix;
-    } SortArray;
-    struct Iterator {
-        union Data* iterator;
-        struct Context* task;
-        int numGPU;
-        enum Code exec;
-        enum Code barrier;
-        enum Code whenWait;
-        enum Code next;
-    } Iterator;
-    struct MultiDimIterator {
-        int x;
-        int y;
-        int z;
-        int count;
-        int counterX;
-        int counterY;
-        int counterZ;
-    } MultiDimIterator;
-    struct MultiDim {
-        int x;
-        int y;
-        int z;
-    } MultiDim;
-    struct Executor {
-        union Data* executor;
-        struct Context* task;
-        enum Code read;
-        enum Code exec;
-        enum Code write;
-        enum Code next;
-    } Executor;
-#ifdef USE_CUDAWorker
-    struct CUDAExecutor {
-        CUdeviceptr** kernelParams;
-        struct CUDABuffer* buffer;
-        int maxThreadPerBlock;
-        int maxThreadPerBlockX;
-        int maxThreadPerBlockY;
-        int maxThreadPerBlockZ;
-        struct Timer* timer;
-    } CUDAExecutor;
-    struct CUDABuffer {
-        int inputLen;
-        int outputLen;
-        union Data** inputData;
-        union Data** outputData;
-    } CUDABuffer;
-    CUdeviceptr CUdeviceptr;
-#else
-    struct CUDAExecutor {
-    } CUDAExecutor;
-    struct CUDABuffer {
-    } CUDABuffer;
-    CUdeviceptr CUdeviceptr;
-#endif
-    Int Int;
-    struct Memory {
-        union Data* adr;
-        int length;
-        union Data* body;
-        int hash;
-    } Memory;
-    struct Buffer {
-        union Data* buffer;
-        union Data* data;
-        enum Code put;
-        enum Code take;
-        enum Code next;
-    } Buffer;
-    struct BoundedBuffer {
-        struct Element* top;
-        struct Element* last;
-        struct Semaphore* fullCount;
-        struct Semaphore* emptyCount;
-        struct Semaphore* lock;
-    } BoundedBuffer;
-    struct Lock {
-        union Data* lock;
-        enum Code doLock;
-        enum Code doUnlock;
-        enum Code next;
-    } Lock;
-    struct LockImpl {
-        Int* lock;
-        struct Queue* waitThreadQueue;
-        struct Atomic* atomic;
-        struct Context* lockContext;
-    } LockImpl;
-    struct SpinLock {
-        volatile Int* lock;
-        struct Atomic* atomic;
-        struct Context* lockContext;
-    } SpinLock;
-}; // union Data end       this is necessary for context generator
-typedef union Data Data;
-
-#include "c/typedefData.h"
-
-#include "c/extern.h"
-
-extern __code start_code(struct Context* context);
-extern __code exit_code(struct Context* context);
-extern __code meta(struct Context* context, enum Code next);
-//extern __code par_meta(struct Context* context, enum Code spawns, enum Code next);
-extern __code parGotoMeta(struct Context* context, enum Code next);
-extern void initContext(struct Context* context);
-
-#endif
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/tmp_tool/parse_cerate_each_context.pl
--- a/src/parallel_execution/tmp_tool/parse_cerate_each_context.pl	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,147 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-use DDP {deparse => 1};
-
-my @already_defined = qw/
-Atomic
-Buffer
-CodeGear
-Executor
-Iterator
-Lock
-Queue
-Semaphore
-Stack
-TaskManager
-Timer
-Tree
-Worker
-SingleLinkedStack
-SortArray
-/;
-
-my %already_defined_hash;
-map { $already_defined_hash{$_}++ } @already_defined;
-
-my $context = shift // "context.h";
-
-open my $fh, '<', $context;
-while (my $line = <$fh>) {
-  if ($line =~ /^union Data \{/) {
-    last;
-  }
-}
-
-my @context_cg_str = <$fh>;
-close($fh);
-chomp @context_cg_str;
-my $res = {};
-
-while (my $line = shift @context_cg_str) {
-  if ($line =~ /\s*struct\s*(\w+)\s*\{/) {
-    my $struct = $1;
-    if (exists $already_defined_hash{$struct}) {
-      next;
-    }
-    $line = shift @context_cg_str;
-    while ($line !~  /\}\s*$struct/) {
-        $line =~ s/\s+([\*\w ]+);/$1/;
-        push (@{$res->{$struct}},$line);
-        $line = shift @context_cg_str ;
-    }
-    unless (defined $res->{$struct}) {
-        push (@{$res->{$struct}},"");
-    }
-  }
-}
-
-map { print "$_\n" } keys %$res;
-my %impl2inter = (
-  SpinLock          => "Lock",
-  CUDAWorker        => "Worker",
-  RedBlackTree      => "Tree",
-  AtomicReference   => "Atomic",
-  CPUWoker          => "Woker",
-  MultiDimIterator  => "Iterator",
-  CUDAExecutor      => "Executor",
-  SingleLinkedStack => "Stack",
-  SingleLinkedQueue => "Queue",
-  SynchronizedQueue => "Queue",
-);
-
-for my $dg_name (keys %$res) {
-  if ($dg_name =~ /(\w+)Impl/) {
-      create_impl_file($dg_name,$res->{$dg_name},$1);
-      next;
-  }
-
-  if (exists $impl2inter{$dg_name}) {
-      create_impl_file($dg_name,$res->{$dg_name},$impl2inter{$dg_name});
-      next;
-  }
-  create_inter_file($dg_name,$res->{$dg_name});
-}
-
-sub create_impl_file {
-  my ($name, $contents,$interface) = @_;
-  my $str = "typedef struct $name <Type, Isa> impl $interface {\n";
-  create_file("impl/$name.h",$contents,$str,$name);
-}
-
-sub create_inter_file {
-  my ($name, $contents) = @_;
-  my $str = "typedef struct $name <Type, Impl> {\n";
-  create_file("interface/$name.h",$contents,$str,$name);
-}
-
-sub create_file {
-  my ($file_name, $contents, $str, $name) = @_;
-  my $space = "  ";
-  for my $co (@$contents) {
-    if ($co =~ /enum\s*Code\s*(\w+)/) {
-      $str .= "${space}__code $1(...);\n";
-      next;
-    }
-    chomp $co;
-    $str .= "${space}$co;\n";
-  }
-  open my $fh, '>', "$ENV{PWD}/plautogen/$file_name" or die "oops! $file_name\n";
-  print $fh $str;
-  print $fh "} $name;\n";
-  close $fh;
-}
-
-sub print_impl {
-  my ($out, $name, $cg_info) = @_;
-  print $out "typedef strcut $name<Impl, Bot> {\n";
-}
-
-__DATA__
-SpinLock
-Main
-CUDAExecutor
-TaskManagerImpl
-LockImpl
-MultiDim
-SynchronizedQueue
-ArrayStack
-LoopCounter
-TimerImpl
-Node
-CUDAWorker
-Memory
-SemaphoreImpl
-BoundedBuffer
-RotateTree
-CUDABuffer
-Array
-Allocate
-Meta
-SingleLinkedQueue
-CPUWorker
-Integer
-MultiDimIterator
-Element
-RedBlackTree
-
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/trans_impl.pl
--- a/src/parallel_execution/trans_impl.pl	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,180 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-
-use FindBin;
-use lib "$FindBin::Bin/lib";
-use Gears::Util;
-
-use Getopt::Std;
-
-my %opt;
-getopts("w" => \%opt);
-
-my $impl_file = shift or die 'require impl file';
-my $impl_ir         = Gears::Util->parse_with_rewrite($impl_file);
-my $interface_file  = Gears::Util->find_header($impl_ir->{isa},"$FindBin::Bin");
-
-my $inter_ir        = Gears::Util->parse_with_rewrite($interface_file);
-
-
-my $output_file = $impl_file;
-$output_file =~ s/\.h/.cbc/;
-open my $fh, '>', $output_file;
-my $stdout    = $fh;
-
-unless ($opt{w}) {
-    $stdout    = *STDOUT;
-}
-
-emit_include_part($stdout, $inter_ir->{name});
-emit_impl_header_in_comment($stdout, $impl_file);
-emit_constracutor($stdout,$impl_ir,$inter_ir);
-emit_code_gears($stdout,$impl_ir,$inter_ir);
-close $fh;
-
-sub emit_include_part {
-  my ($out, $interface) = @_;
-  print $out <<"EOF"
-#include "../context.h";
-#interface "$interface.h";
-
-EOF
-}
-
-sub emit_impl_header_in_comment {
-  my ($out, $impl_file) = @_;
-  my $line =  Gears::Util->slup($impl_file);
-  print $out "// ----\n";
-  map { print $out "// $_\n" } split /\n/, $line;
-  print $out "// ----\n\n";
-}
-
-sub emit_constracutor {
-  my ($out, $impl_ir, $inter_ir) = @_;
-
-  my @inter_data     = @{$inter_ir->{data}};
-  my @impl_data      = @{$impl_ir->{data}};
-  my $instance_inter = shift @inter_data;
-
-  if ($instance_inter =~ /union\s*Data\*\s*(\w+)/) {
-    $instance_inter = $1;
-  }
-
-  my $instance_impl  = lcfirst $impl_ir->{name};
-  $instance_impl     =~ s/([A-Z])/_\l$1/g;
-
-  print $out <<"EOF";
-$impl_ir->{isa}* create$impl_ir->{name}(struct Context* context) {
-    struct $impl_ir->{isa}* $instance_inter  = new $impl_ir->{isa}();
-    struct $impl_ir->{name}* $instance_impl = new $impl_ir->{name}();
-    $instance_inter->$instance_inter = (union Data*)$instance_impl;
-EOF
-
-  for my $datum (@impl_data) {
-        if ($datum =~ /\w+ \w+\* (\w+)/) {
-            print $out "    ${instance_impl}->$1 = NULL;\n";
-            next;
-        }
-        if ($datum =~ /\w+ \w+ (\w+)/) {
-            print $out "    ${instance_impl}->$1 = 0;\n";
-        }
-  }
-
-  for my $datum (@inter_data) {
-        if ($datum =~ /\w+ \w+\* (\w+)/) {
-            print $out "    ${instance_inter}->$1 = NULL;\n";
-            next;
-        }
-        if ($datum =~ /\w+ \w+ (\w+)/) {
-            print $out "    ${instance_inter}->$1 = 0;\n";
-        }
-  }
-
-  for my $code (@{$inter_ir->{codes}}) {
-      my $code_gear = $code->{name};
-      print $out "    ${instance_inter}->$code_gear = C_$code_gear$impl_ir->{name};\n"
-  }
-
-print $out "    return $instance_inter;\n";
-print $out "}\n";
-}
-
-
-sub emit_code_gears {
-  my ($out, $impl_ir, $inter_ir) = @_;
-  my $impl = $impl_ir->{name};
-  my $interface_name = $inter_ir->{name};
-
-  my @inter_data = @{$inter_ir->{data}};
-  my $instance_inter = shift @inter_data;
-  if ($instance_inter =~ /union\s*Data\*\s*(\w+)/) {
-    $instance_inter = $1;
-  }
-  my $instance_impl = lcfirst $impl_ir->{name};
-  $instance_impl =~ s/([A-Z])/_\l$1/g;
-  my $data_gear_types = {};
-
-  if (defined $impl_ir->{codes}) {
-    for my $cg (@{$impl_ir->{codes}}) {
-      my $data_gears = $cg->{args};
-      while ($data_gears =~ /Type\*\s*(\w+),/g) {
-          $data_gears =~ s/Type\*/struct $impl*/;
-      }
-
-      while ($data_gears =~ /Isa\*\s*(\w+),/g) {
-          $data_gears =~ s/Isa\*/struct $interface_name*/;
-      }
-      print $out "__code $cg->{name}$impl(";
-      print $out "$data_gears) {\n\n";
-
-      #__code next(...), __code whenEmpty(...)
-      my @cg = ();
-      while ($data_gears =~ /__code ([\w(\.)\*\s,]+?\)),?/g) {
-        push(@cg, $1);
-      }
-
-      if (@cg) {
-        if (@cg == 2) {
-          print $out "  if (:TODO:) {\n";
-          print $out "       goto ",shift(@cg),";\n";
-          print $out "  }\n";
-          print $out "  goto ",shift(@cg),";\n";
-        } else {
-          print $out "  goto ",shift(@cg),";\n";
-        }
-      }
-      print $out "}\n\n";
-    }
-  }
-
-  for my $code_ir (@{$inter_ir->{codes}}) {
-    my $data_gears = $code_ir->{args};
-    $data_gears =~ s/Impl/struct $impl/g;
-
-    while ($data_gears =~ /Type\*\s*(\w+),/g) {
-        $data_gears =~ s/Type\*/struct $interface_name*/;
-    }
-
-    print $out "__code $code_ir->{name}$impl(";
-    print $out "$data_gears) {\n\n";
-
-    #__code next(...), __code whenEmpty(...)
-    my @cg = ();
-    while ($data_gears =~ /__code ([\w(\.)\*\s,]+?\)),?/g) {
-      push(@cg, $1);
-    }
-
-    if (@cg) {
-      if (@cg == 2) {
-        print $out "  if (:TODO:) {\n";
-        print $out "       goto ",shift(@cg),";\n";
-        print $out "  }\n";
-        print $out "  goto ",shift(@cg),";\n";
-      } else {
-        print $out "  goto ",shift(@cg),";\n";
-      }
-    }
-    print $out "}\n\n";
-  }
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/update_context.pl
--- a/src/parallel_execution/update_context.pl	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-#!/usr/bin/env perl
-use strict;
-use warnings;
-use Getopt::Std;
-
-use FindBin;
-use lib "$FindBin::Bin/lib";
-use Gears::Util;
-
-my %opt;
-getopts("wc" => \%opt);
-
-my $interface_file = shift or die "require itnerface file";
-my $h2context = Gears::Util->parse_interface($interface_file);
-
-my $context = Gears::Util->h2context_str($h2context);
-
-if ($opt{c}) {
-  print "$context";
-  exit 0;
-}
-
-my ($first,$last) = slup_context_h($h2context->{name});
-
-if ($opt{w}) {
-  context_write(@{$first},$context,@{$last});
-} else {
-  context_dump(@{$first},$context,@{$last});
-}
-
-
-sub slup_context_h {
-  open my $fh, '<', 'context.h';
-  
-  my $data_gear_name = shift;
-
-  my @first_context_headers = ();
-  my @last_context_headers = ();
-  
-  while (my $line = <$fh>) {
-    if ( $line =~ /union Data end/) {
-      push(@last_context_headers, $line);
-      push(@last_context_headers, <$fh>);
-      last;
-    }
-    if ( $line =~ /struct $data_gear_name/) {
-      print "WARN! $data_gear_name struct already exists\n";
-      exit 1;
-    }
-    push(@first_context_headers, $line);
-  }
-  
-  close $fh;
-  
-  #print "@first_context_headers\n";
-  #print "@last_context_headers\n";
-  return (\@first_context_headers,\@last_context_headers);
-}
-
-sub context_dump {
-  for my $line (@_) {
-    print "$line";
-  }
-}
-
-sub context_write {
-  open my $fh, '>', "context.h";
-  for my $line (@_) {
-    print $fh "$line";
-  }
-  close $fh;
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/verifier/llrbContextWithVerifier.c
--- a/src/parallel_execution/verifier/llrbContextWithVerifier.c	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,46 +0,0 @@
-#include <stdio.h>
-#include "llrbContextWithVerifier.h"
-
-unsigned int min_height(struct Node* node, unsigned int height) {
-    if ((node->left == NULL) && (node->right == NULL)) return height;
-    if (node->left  == NULL) return min_height(node->right, height+1);
-    if (node->right == NULL) return min_height(node->left, height+1);
-
-    unsigned int left_min  = min_height(node->left, height+1);
-    unsigned int right_min = min_height(node->right, height+1);
-
-    if (left_min < right_min) {
-        return left_min;
-    } else {
-        return right_min;
-    }
-}
-
-unsigned int max_height(struct Node* node, unsigned int height) {
-    if ((node->left == NULL) && (node->right == NULL)) return height;
-    if (node->left  == NULL) return max_height(node->right, height+1);
-    if (node->right == NULL) return max_height(node->left, height+1);
-
-    unsigned int left_max  = max_height(node->left, height+1);
-    unsigned int right_max = max_height(node->right, height+1);
-
-    if (left_max > right_max) {
-        return left_max;
-    } else {
-        return right_max;
-    }
-}
-
-void verify_tree_height(struct Node* root) {
-    if (root == NULL) return;
-
-    unsigned int min_h = min_height(root, 1);
-    unsigned int max_h = max_height(root, 1);
-
-    if (max_h >= 2*min_h) {
-        printf("llrb-condition violated.\n");
-        printf("\tmin-height %u", min_h);
-        printf("\tmax-height %u", max_h);
-        exit(EXIT_FAILURE);
-    }
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/verifier/llrbContextWithVerifier.h
--- a/src/parallel_execution/verifier/llrbContextWithVerifier.h	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#include "llrbContext.h"
-
-unsigned int min_height(struct Node* node, unsigned int height);
-unsigned int max_height(struct Node* node, unsigned int height);
-void verify_tree_height(struct Node* root);
diff -r a4cab67624f7 -r 9146d6017f18 src/parallel_execution/verifier/verify_put_cs.c
--- a/src/parallel_execution/verifier/verify_put_cs.c	Thu Jan 16 15:11:11 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,29 +0,0 @@
-/* Verification of LLRB-Tree height in put operations.
- * LLRB-Tree allows (max-height) <= 2*(min-height).
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <time.h>
-#include "llrbContextWithVerifier.h"
-
-__code meta(struct Context* context, enum Code next) {
-    if (next == Put) {
-        verify_tree_height(context->data[Tree]->tree.root);
-    }
-    goto (context->code[next])(context);
-}
-
-__code start_code(struct Context* context, enum Code next) {
-    unsigned int seed = (unsigned int)time(NULL);
-
-    printf("--- srand(%u)\n", seed);
-    goto meta(context, next);
-}
-
-__code exit_code(struct Context* context) {
-    free(context->code);
-    free(context->data);
-    free(context->heapStart);
-    goto exit(0);
-}
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/AtomicReference.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/AtomicReference.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,2 @@
+typedef struct AtomicReference <Type, Isa> impl Atomic {
+} AtomicReference;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/CUDAExecutor.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/CUDAExecutor.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,9 @@
+typedef struct CUDAExecutor <Type, Isa> impl Executor {
+  CUdeviceptr** kernelParams;
+  struct CUDABuffer* buffer;
+  int maxThreadPerBlock;
+  int maxThreadPerBlockX;
+  int maxThreadPerBlockY;
+  int maxThreadPerBlockZ;
+  struct Timer* timer;
+} CUDAExecutor;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/CUDAWorker.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/CUDAWorker.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,14 @@
+typedef struct CUDAWorker <Type, Isa> impl Worker {
+  CUdevice device;
+  CUcontext cuCtx;
+  struct Context* context;
+  int id;
+  int loopCounter;
+  int deviceNum;
+  struct Queue* tasks;
+  int runFlag;
+  __code next(...);
+  int numStream;
+  struct Executor* executor;
+  CUstream *stream;
+} CUDAWorker;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/LockImpl.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/LockImpl.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,6 @@
+typedef struct LockImpl <Type, Isa> impl Lock {
+  Int* lock;
+  struct Queue* waitThreadQueue;
+  struct Atomic* atomic;
+  struct Context* lockContext;
+} LockImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/MultiDimIterator.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/MultiDimIterator.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,9 @@
+typedef struct MultiDimIterator <Type, Isa> impl Iterator {
+  int x;
+  int y;
+  int z;
+  int count;
+  int counterX;
+  int counterY;
+  int counterZ;
+} MultiDimIterator;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/RedBlackTree.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/RedBlackTree.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,11 @@
+typedef struct RedBlackTree <Type, Isa> impl Tree {
+  struct Node* root;
+  struct Node* current; // reading node of original tree;
+  struct Node* previous; // parent of reading node of original tree;
+  struct Node* newNode; // writing node of new tree;
+  struct Node* parent;
+  struct Node* grandparent;
+  struct Stack* nodeStack;
+  __code findNodeNext(...);
+  int result;
+} RedBlackTree;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/SemaphoreImpl.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/SemaphoreImpl.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+typedef struct SemaphoreImpl <Type, Isa> impl Semaphore {
+  int value;
+  struct Lock* lock;
+  struct Queue* waitThreadQueue;
+} SemaphoreImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/SingleLinkedQueue.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/SingleLinkedQueue.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,4 @@
+typedef struct SingleLinkedQueue <Type, Isa> impl Queue {
+  struct Element* top;
+  struct Element* last;
+} SingleLinkedQueue;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/SingleLinkedStack.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/SingleLinkedStack.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,26 @@
+typedef struct SingleLinkedStack<Type, Isa> impl Stack {
+  struct Element* top;
+} SingleLinkedStack;
+
+/*
+    // Stack Interface
+    struct Stack {
+        union Data* stack;
+        union Data* data;
+        union Data* data1;
+        enum Code whenEmpty;
+        enum Code clear;
+        enum Code push;
+        enum Code pop;
+        enum Code pop2;
+        enum Code isEmpty;
+        enum Code get;
+        enum Code get2;
+        enum Code next;
+    } Stack;
+    // Stack implementations
+    struct SingleLinkedStack {
+        struct Element* top;
+    } SingleLinkedStack;
+    */
+
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/SpinLock.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/SpinLock.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+typedef struct SpinLock <Type, Isa> impl Lock {
+  volatile Int* lock;
+  struct Atomic* atomic;
+  struct Context* lockContext;
+} SpinLock;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/SynchronizedQueue.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/SynchronizedQueue.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+typedef struct SynchronizedQueue <Type, Isa> impl Queue {
+  struct Element* top;
+  struct Element* last;
+  struct Atomic* atomic;
+} SynchronizedQueue;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/TaskManagerImpl.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/TaskManagerImpl.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,16 @@
+typedef struct TaskManagerImpl <Type, Isa> impl TaskManager {
+  int numWorker;
+  int sendCPUWorkerIndex;
+  int sendGPUWorkerIndex;
+  int taskCount;
+  pthread_mutex_t mutex;
+  struct Queue* activeQueue;
+  struct Worker** workers;
+  struct Element* taskList;
+  int loopCounter;
+  int cpu;
+  int gpu;
+  int io;
+  int maxCPU;
+  __code next(...);
+} TaskManagerImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/impl/TimerImpl.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/impl/TimerImpl.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,3 @@
+typedef struct TimerImpl <Type, Isa> impl Timer {
+  double time;
+} TimerImpl;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Allocate.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Allocate.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,4 @@
+typedef struct Allocate <Type, Impl> {
+  __code next(...);
+  long size;
+} Allocate;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Array.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Array.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,4 @@
+typedef struct Array <Type, Impl> {
+  int prefix;
+  struct Integer* array;
+} Array;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/ArrayStack.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/ArrayStack.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+typedef struct ArrayStack <Type, Impl> {
+  int size;
+  int limit;
+  struct Element* array;
+} ArrayStack;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/BoundedBuffer.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/BoundedBuffer.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,7 @@
+typedef struct BoundedBuffer <Type, Impl> {
+  struct Element* top;
+  struct Element* last;
+  struct Semaphore* fullCount;
+  struct Semaphore* emptyCount;
+  struct Semaphore* lock;
+} BoundedBuffer;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/CPUWorker.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/CPUWorker.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,7 @@
+typedef struct CPUWorker <Type, Impl> {
+  pthread_mutex_t mutex;
+  pthread_cond_t cond;
+  struct Context* context;
+  int id;
+  int loopCounter;
+} CPUWorker;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/CUDABuffer.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/CUDABuffer.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,6 @@
+typedef struct CUDABuffer <Type, Impl> {
+  int inputLen;
+  int outputLen;
+  union Data** inputData;
+  union Data** outputData;
+} CUDABuffer;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Element.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Element.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,4 @@
+typedef struct Element <Type, Impl> {
+  union Data* data;
+  struct Element* next;
+} Element;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Integer.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Integer.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,3 @@
+typedef struct Integer <Type, Impl> {
+  int value;
+} Integer;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/LoopCounter.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/LoopCounter.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,3 @@
+typedef struct LoopCounter <Type, Impl> {
+  int i;
+} LoopCounter;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Main.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Main.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+typedef struct Main <Type, Impl> {
+  __code code(...);
+  __code next(...);
+  struct Queue* args;
+} Main;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Memory.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Memory.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,6 @@
+typedef struct Memory <Type, Impl> {
+  union Data* adr;
+  int length;
+  union Data* body;
+  int hash;
+} Memory;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Meta.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Meta.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,6 @@
+typedef struct Meta <Type, Impl> {
+  enum DataType type;
+  long size;
+  long len;
+  struct Queue* wait; // tasks waiting this dataGear;
+} Meta;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/MultiDim.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/MultiDim.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+typedef struct MultiDim <Type, Impl> {
+  int x;
+  int y;
+  int z;
+} MultiDim;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/Node.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/Node.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,12 @@
+typedef struct Node <Type, Impl> {
+  int key; // comparable data segment;
+  union Data* value;
+  struct Node* left;
+  struct Node* right;
+          // need to balancing
+  enum Color {
+      Red,
+      Black,
+      // Red eq 0,Black eq 1. enum name convert intager.
+  }color;
+} Node;
diff -r a4cab67624f7 -r 9146d6017f18 src/plautogen/interface/RotateTree.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/plautogen/interface/RotateTree.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+typedef struct RotateTree <Type, Impl> {
+  __code next(...);
+  struct RedBlackTree* traverse;
+  struct Tree* tree;
+} RotateTree;
diff -r a4cab67624f7 -r 9146d6017f18 src/stack.agda
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/stack.agda	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,255 @@
+open import Level renaming (suc to succ ; zero to Zero )
+module stack  where
+
+open import Relation.Binary.PropositionalEquality
+open import Relation.Binary.Core
+open import Data.Nat
+
+ex : 1 + 2 ≡ 3
+ex = refl
+
+data Bool {n : Level } : Set n where
+  True  : Bool
+  False : Bool
+
+record _∧_ {n : Level } (a : Set n) (b : Set n): Set n where
+  field
+    pi1 : a
+    pi2 : b
+
+data Maybe {n : Level } (a : Set n) : Set n where
+  Nothing : Maybe a
+  Just    : a -> Maybe a
+
+record StackMethods {n m : Level } (a : Set n ) {t : Set m }(stackImpl : Set n ) : Set (m Level.⊔ n) where
+  field
+    push : stackImpl -> a -> (stackImpl -> t) -> t
+    pop  : stackImpl -> (stackImpl -> Maybe a -> t) -> t
+    pop2 : stackImpl -> (stackImpl -> Maybe a -> Maybe a -> t) -> t
+    get  : stackImpl -> (stackImpl -> Maybe a -> t) -> t
+    get2 : stackImpl -> (stackImpl -> Maybe a -> Maybe a -> t) -> t
+open StackMethods
+
+record Stack {n m : Level } (a : Set n ) {t : Set m } (si : Set n ) : Set (m Level.⊔ n) where
+  field
+    stack : si
+    stackMethods : StackMethods {n} {m} a {t} si
+  pushStack :  a -> (Stack a si -> t) -> t
+  pushStack d next = push (stackMethods ) (stack ) d (\s1 -> next (record {stack = s1 ; stackMethods = stackMethods } ))
+  popStack : (Stack a si -> Maybe a  -> t) -> t
+  popStack next = pop (stackMethods ) (stack ) (\s1 d1 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 )
+  pop2Stack :  (Stack a si -> Maybe a -> Maybe a -> t) -> t
+  pop2Stack next = pop2 (stackMethods ) (stack ) (\s1 d1 d2 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 d2)
+  getStack :  (Stack a si -> Maybe a  -> t) -> t
+  getStack next = get (stackMethods ) (stack ) (\s1 d1 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 )
+  get2Stack :  (Stack a si -> Maybe a -> Maybe a -> t) -> t
+  get2Stack next = get2 (stackMethods ) (stack ) (\s1 d1 d2 -> next (record {stack = s1 ; stackMethods = stackMethods }) d1 d2)
+
+open Stack
+
+data Element {n : Level } (a : Set n) : Set n where
+  cons : a -> Maybe (Element a) -> Element a
+
+datum : {n : Level } {a : Set n} -> Element a -> a
+datum (cons a _) = a
+
+next : {n : Level } {a : Set n} -> Element a -> Maybe (Element a)
+next (cons _ n) = n
+
+
+{-
+-- cannot define recrusive record definition. so use linked list with maybe.
+record Element {l : Level} (a : Set n l) : Set n (suc l) where
+  field
+    datum : a  -- `data` is reserved by Agda.
+    next : Maybe (Element a)
+-}
+
+
+
+record SingleLinkedStack {n : Level } (a : Set n) : Set n where
+  field
+    top : Maybe (Element a)
+open SingleLinkedStack
+
+pushSingleLinkedStack : {n m : Level } {t : Set m } {Data : Set n} -> SingleLinkedStack Data -> Data -> (Code : SingleLinkedStack Data -> t) -> t
+pushSingleLinkedStack stack datum next = next stack1
+  where
+    element = cons datum (top stack)
+    stack1  = record {top = Just element}
+
+
+popSingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> t) -> t
+popSingleLinkedStack stack cs with (top stack)
+...                                | Nothing = cs stack  Nothing
+...                                | Just d  = cs stack1 (Just data1)
+  where
+    data1  = datum d
+    stack1 = record { top = (next d) }
+
+pop2SingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
+pop2SingleLinkedStack {n} {m} {t} {a} stack cs with (top stack)
+...                                | Nothing = cs stack Nothing Nothing
+...                                | Just d = pop2SingleLinkedStack' {n} {m} stack cs
+  where
+    pop2SingleLinkedStack' : {n m : Level } {t : Set m }  -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
+    pop2SingleLinkedStack' stack cs with (next d)
+    ...              | Nothing = cs stack Nothing Nothing
+    ...              | Just d1 = cs (record {top = (next d1)}) (Just (datum d)) (Just (datum d1))
+    
+
+getSingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> t) -> t
+getSingleLinkedStack stack cs with (top stack)
+...                                | Nothing = cs stack  Nothing
+...                                | Just d  = cs stack (Just data1)
+  where
+    data1  = datum d
+
+get2SingleLinkedStack : {n m : Level } {t : Set m } {a  : Set n} -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
+get2SingleLinkedStack {n} {m} {t} {a} stack cs with (top stack)
+...                                | Nothing = cs stack Nothing Nothing
+...                                | Just d = get2SingleLinkedStack' {n} {m} stack cs
+  where
+    get2SingleLinkedStack' : {n m : Level} {t : Set m } -> SingleLinkedStack a -> (Code : SingleLinkedStack a -> (Maybe a) -> (Maybe a) -> t) -> t
+    get2SingleLinkedStack' stack cs with (next d)
+    ...              | Nothing = cs stack Nothing Nothing
+    ...              | Just d1 = cs stack (Just (datum d)) (Just (datum d1))
+
+
+
+emptySingleLinkedStack : {n : Level } {a : Set n} -> SingleLinkedStack a
+emptySingleLinkedStack = record {top = Nothing}
+
+-----
+-- Basic stack implementations are specifications of a Stack
+--
+singleLinkedStackSpec : {n m : Level } {t : Set m } {a : Set n} -> StackMethods {n} {m} a {t} (SingleLinkedStack a)
+singleLinkedStackSpec = record {
+                                   push = pushSingleLinkedStack
+                                 ; pop  = popSingleLinkedStack
+                                 ; pop2 = pop2SingleLinkedStack
+                                 ; get  = getSingleLinkedStack
+                                 ; get2 = get2SingleLinkedStack
+                           }
+
+createSingleLinkedStack : {n m : Level } {t : Set m } {a : Set n} -> Stack {n} {m} a {t} (SingleLinkedStack a)
+createSingleLinkedStack = record {
+                             stack = emptySingleLinkedStack ;
+                             stackMethods = singleLinkedStackSpec 
+                           }
+
+----
+--
+-- proof of properties ( concrete cases )
+--
+
+test01 : {n : Level } {a : Set n} -> SingleLinkedStack a -> Maybe a -> Bool {n}
+test01 stack _ with (top stack)
+...                  | (Just _) = True
+...                  | Nothing  = False
+
+
+test02 : {n : Level } {a : Set n} -> SingleLinkedStack a -> Bool
+test02 stack = popSingleLinkedStack stack test01
+
+test03 : {n : Level } {a : Set n} -> a ->  Bool
+test03 v = pushSingleLinkedStack emptySingleLinkedStack v test02
+
+-- after a push and a pop, the stack is empty
+lemma : {n : Level} {A : Set n} {a : A} -> test03 a ≡ False
+lemma = refl
+
+testStack01 : {n m : Level } {a : Set n} -> a -> Bool {m}
+testStack01 v = pushStack createSingleLinkedStack v (
+   \s -> popStack s (\s1 d1 -> True))
+
+-- after push 1 and 2, pop2 get 1 and 2
+
+testStack02 : {m : Level } ->  ( Stack  ℕ (SingleLinkedStack ℕ) -> Bool {m} ) -> Bool {m}
+testStack02 cs = pushStack createSingleLinkedStack 1 (
+   \s -> pushStack s 2 cs)
+
+
+testStack031 : (d1 d2 : ℕ ) -> Bool {Zero}
+testStack031 2 1 = True
+testStack031 _ _ = False
+
+testStack032 : (d1 d2 : Maybe ℕ) -> Bool {Zero}
+testStack032  (Just d1) (Just d2) = testStack031 d1 d2
+testStack032  _ _ = False
+
+testStack03 : {m : Level } -> Stack  ℕ (SingleLinkedStack ℕ) -> ((Maybe ℕ) -> (Maybe ℕ) -> Bool {m} ) -> Bool {m}
+testStack03 s cs = pop2Stack s (
+   \s d1 d2 -> cs d1 d2 )
+
+testStack04 : Bool
+testStack04 = testStack02 (\s -> testStack03 s testStack032)
+
+testStack05 : testStack04 ≡ True
+testStack05 = refl
+
+------
+--
+-- proof of properties with indefinite state of stack
+--
+-- this should be proved by properties of the stack inteface, not only by the implementation,
+--    and the implementation have to provides the properties.
+--
+--    we cannot write "s ≡ s3", since level of the Set does not fit , but use stack s ≡ stack s3 is ok.
+--    anyway some implementations may result s != s3
+--  
+
+stackInSomeState : {l m : Level } {D : Set l} {t : Set m } (s : SingleLinkedStack D ) -> Stack {l} {m} D {t}  ( SingleLinkedStack  D )
+stackInSomeState s =  record { stack = s ; stackMethods = singleLinkedStackSpec }
+
+push->push->pop2 : {l : Level } {D : Set l} (x y : D ) (s : SingleLinkedStack D ) ->
+    pushStack ( stackInSomeState s )  x ( \s1 -> pushStack s1 y ( \s2 -> pop2Stack s2 ( \s3 y1 x1 -> (Just x ≡ x1 ) ∧ (Just y ≡ y1 ) ) ))
+push->push->pop2 {l} {D} x y s = record { pi1 = refl ; pi2 = refl }
+
+
+id : {n : Level} {A : Set n} -> A -> A
+id a = a
+
+-- push a, n times
+
+n-push : {n : Level} {A : Set n} {a : A} -> ℕ -> SingleLinkedStack A -> SingleLinkedStack A
+n-push zero s            = s
+n-push {l} {A} {a} (suc n) s = pushSingleLinkedStack (n-push {l} {A} {a} n s) a (\s -> s ) 
+
+n-pop :  {n : Level}{A : Set n} {a : A} -> ℕ -> SingleLinkedStack A -> SingleLinkedStack A
+n-pop zero    s         = s
+n-pop  {_} {A} {a} (suc n) s = popSingleLinkedStack (n-pop {_} {A} {a} n s) (\s _ -> s )
+
+open ≡-Reasoning
+
+push-pop-equiv : {n : Level} {A : Set n} {a : A} (s : SingleLinkedStack A) -> (popSingleLinkedStack (pushSingleLinkedStack s a (\s -> s)) (\s _ -> s) ) ≡ s
+push-pop-equiv s = refl
+
+push-and-n-pop : {n : Level} {A : Set n} {a : A} (n : ℕ) (s : SingleLinkedStack A) -> n-pop {_} {A} {a} (suc n) (pushSingleLinkedStack s a id) ≡ n-pop {_} {A} {a} n s
+push-and-n-pop zero s            = refl
+push-and-n-pop {_} {A} {a} (suc n) s = begin
+   n-pop {_} {A} {a} (suc (suc n)) (pushSingleLinkedStack s a id)
+  ≡⟨ refl ⟩
+   popSingleLinkedStack (n-pop {_} {A} {a} (suc n) (pushSingleLinkedStack s a id)) (\s _ -> s)
+  ≡⟨ cong (\s -> popSingleLinkedStack s (\s _ -> s )) (push-and-n-pop n s) ⟩ 
+   popSingleLinkedStack (n-pop {_} {A} {a} n s) (\s _ -> s)
+  ≡⟨ refl ⟩
+    n-pop {_} {A} {a} (suc n) s
+  ∎
+  
+
+n-push-pop-equiv : {n : Level} {A : Set n} {a : A} (n : ℕ) (s : SingleLinkedStack A) -> (n-pop {_} {A} {a} n (n-push {_} {A} {a} n s)) ≡ s
+n-push-pop-equiv zero s            = refl
+n-push-pop-equiv {_} {A} {a} (suc n) s = begin
+    n-pop {_} {A} {a} (suc n) (n-push (suc n) s)
+  ≡⟨ refl ⟩
+    n-pop {_} {A} {a} (suc n) (pushSingleLinkedStack (n-push n s) a (\s -> s))
+  ≡⟨ push-and-n-pop n (n-push n s)  ⟩
+    n-pop {_} {A} {a} n (n-push n s)
+  ≡⟨ n-push-pop-equiv n s ⟩
+    s
+  ∎
+
+
+n-push-pop-equiv-empty : {n : Level} {A : Set n} {a : A} -> (n : ℕ) -> n-pop {_} {A} {a} n (n-push {_} {A} {a} n emptySingleLinkedStack)  ≡ emptySingleLinkedStack
+n-push-pop-equiv-empty n = n-push-pop-equiv n emptySingleLinkedStack
diff -r a4cab67624f7 -r 9146d6017f18 src/test/cbc_stack_test.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/cbc_stack_test.c	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,62 @@
+#include "../context.h"
+#include <assert.h>
+
+__code stack_test1(struct Context* context, struct Task* task, struct Stack* stack) {
+    task->code = C_stack_test1;
+    stack->next = C_stack_test2;
+    stack->data = (union Data*)task;
+    goto meta(context, stack->stack->Stack.push);
+}
+
+__code stack_test1_stub(struct Context* context) {
+    Task* task = &ALLOCATE(context, Task)->Task;
+    struct Stack* stack = &(createSingleLinkedStack(context)->Stack);
+    assert(stack->stack->SingleLinkedStack.top == NULL);
+    context->data[D_Stack]->Stack.stack = (union Data*)stack;
+    goto stack_test1(context,
+            task,
+            &context->data[D_Stack]->Stack);
+}
+
+__code stack_test2(struct Context* context, struct Task* task, struct Stack* stack) {
+    task->code = C_stack_test2;
+    stack->next = C_stack_test3;
+    stack->data = (union Data*)task;
+    goto meta(context, stack->stack->Stack.push);
+}
+
+__code stack_test2_stub(struct Context* context) {
+    assert(context->data[D_Stack]->Stack.stack->Stack.stack->SingleLinkedStack.top->data->Task.code == C_stack_test1);
+    Task* task = &ALLOCATE(context, Task)->Task;
+    goto stack_test2(context,
+            task,
+            &context->data[D_Stack]->Stack);
+}
+
+__code stack_test3(struct Context* context, struct Stack* stack) {
+    stack->next = C_stack_test4;
+    goto meta(context, stack->stack->Stack.pop);
+}
+
+__code stack_test3_stub(struct Context* context) {
+    assert(context->data[D_Stack]->Stack.stack->Stack.stack->SingleLinkedStack.top->data->Task.code == C_stack_test2);
+    goto stack_test3(context,
+            &context->data[D_Stack]->Stack);
+}
+
+__code stack_test4(struct Context* context) {
+    goto meta(context, C_exit_code);
+}
+
+__code stack_test4_stub(struct Context* context) {
+    assert(context->data[D_Stack]->Stack.data->Task.code == C_stack_test2);
+    assert(context->data[D_Stack]->Stack.stack->Stack.stack->SingleLinkedStack.top->data->Task.code == C_stack_test1);
+    goto stack_test4(context);
+}
+
+int main(int argc, char const* argv[]) {
+    struct Context* main_context = NEW(struct Context);
+    initContext(main_context);
+    main_context->next = C_stack_test1;
+    goto start_code(main_context);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/test/multiDimIterator_test.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/multiDimIterator_test.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,78 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#interface "TaskManager.h"
+
+#include "../../context.h"
+
+int cpu_num = 1;
+int length = 1;
+int gpu_num = 0;
+int CPU_ANY = -1;
+int CPU_CUDA = -1;
+
+__code initDataGears(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    // loopCounter->tree = createRedBlackTree(context);
+    loopCounter->i = 0;
+    taskManager->taskManager = (union Data*)createTaskManagerImpl(context, cpu_num, gpu_num, 0);
+    goto code1();
+}
+
+__code code1(struct LoopCounter* loopCounter) {
+    printf("cpus:\t\t%d\n", cpu_num);
+    printf("gpus:\t\t%d\n", gpu_num);
+    printf("length:\t\t%d\n", length);
+    /* puts("queue"); */
+    /* print_queue(context->data[ActiveQueue]->queue.first); */
+    /* puts("tree"); */
+    /* print_tree(context->data[Tree]->tree.root); */
+    /* puts("result"); */
+
+    goto createTask1();
+}
+
+__code createTask1(struct LoopCounter* loopCounter, struct TaskManager* taskManager) {
+    int i = loopCounter->i;
+
+    if (i < length) {
+        loopCounter->i++;
+        goto createTask2();
+    }
+
+    loopCounter->i = 0;
+    taskManager->next = C_exit_code;
+    goto code2();
+}
+
+__code code2(struct TaskManager* taskManager) {
+    goto taskManager->shutdown(exit_code);
+}
+
+__code code2_stub(struct Context* context) {
+    goto code2(context, &Gearef(context, TaskManager)->taskManager->TaskManager);
+}
+
+__code createTask2(struct TaskManager* taskManager) {
+    par goto printIterator(iterate(2), __exit);
+    par goto printIterator(iterate(2, 2), __exit);
+    par goto printIterator(iterate(2, 2, 2), __exit);
+    goto createTask1();
+}
+
+void init(int argc, char** argv) {
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-cpu") == 0)
+            cpu_num = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-l") == 0)
+            length = (int)atoi(argv[i+1]);
+        else if (strcmp(argv[i], "-cuda") == 0) {
+            gpu_num = 1;
+            CPU_CUDA = 0;
+        }
+    }
+}
+
+int main(int argc, char** argv) {
+    goto initDataGears();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/test/printIterator.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/printIterator.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,6 @@
+#include "../../context.h"
+#include <stdio.h>
+__code printIterator(struct MultiDim* multiDim, __code next(...)) {
+    printf("x: %d, y: %d, z: %d\n", multiDim->x, multiDim->y, multiDim->z);
+    goto next(...);
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/test/queue_test.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/queue_test.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,51 @@
+#include "../../context.h"
+#interface "Queue.h"
+#include <assert.h>
+
+__code queueTest1(struct Queue* queue) {
+    Node* node = new Node();
+    node->color = Red;
+    goto queue->put(node, queueTest2);
+}
+
+__code queueTest1_stub(struct Context* context) {
+    Queue* queue = createSingleLinkedQueue(context);
+    goto queueTest1(context, queue);
+}
+
+__code queueTest2(struct Queue* queue) {
+    Node* node = new Node();
+    node->color = Black;
+    goto queue->put(node, queueTest3);
+}
+
+__code queueTest2_stub(struct Context* context) {
+    SingleLinkedQueue* singleLinkedQueue = (SingleLinkedQueue*)GearImpl(context, Queue, queue);
+    assert(singleLinkedQueue->top->next->data->Node.color == Red);
+    assert(singleLinkedQueue->last->data->Node.color == Red);
+    Queue* queue = (struct Queue*)Gearef(context, Queue)->queue;
+    goto queueTest2(context, queue);
+}
+
+__code queueTest3(struct Queue* queue) {
+    goto queue->take(assert3);
+}
+
+__code queueTest3_stub(struct Context* context) {
+    SingleLinkedQueue* singleLinkedQueue = (SingleLinkedQueue*)GearImpl(context, Queue, queue);
+    assert(singleLinkedQueue->top->next->data->Node.color == Red);
+    assert(singleLinkedQueue->last->data->Node.color == Black);
+    Queue* queue = (struct Queue*)Gearef(context, Queue)->queue;
+    goto queueTest3(context, queue);
+}
+
+__code assert3(struct Queue* queue) {
+    SingleLinkedQueue* singleLinkedQueue = &queue->queue->Queue.queue->SingleLinkedQueue;
+    assert(singleLinkedQueue->top->next->data->Node.color == Black);
+    assert(singleLinkedQueue->last->data->Node.color == Black);
+    goto exit_code(context);
+}
+
+int main(int argc, char const* argv[]) {
+    goto queueTest1();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/test/rbTree_test.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/rbTree_test.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,87 @@
+#include <stdio.h>
+#include "../../context.h"
+#interface "Tree.h"
+
+/* #include <assert.h> */
+
+__code rbTreeTest1(struct Tree* tree) {
+  printf("Test1\n");
+  Node* node = new Node();
+  node->value = (union Data*)new Int();
+  node->value->Int = 3;
+  node->key = 3;
+  printf("value->%d,key->%d\n",node->value->Int,node->key);
+  goto tree->put(node, rbTreeTest2);
+}
+
+__code rbTreeTest1_stub(struct Context* context) {
+  printf("test1_stub\n");
+  Tree* tree = createRedBlackTree(context);
+  goto rbTreeTest1(context,tree);
+}
+
+
+__code rbTreeTest2(struct Tree* tree) {
+  printf("Test2\n");
+  Node* node = new Node();
+  node->value = (union Data*)new Int();
+  node->value->Int = 4;
+  node->key = 4;
+  goto tree->put(node, rbTreeTest3);
+}
+
+__code rbTreeTest2_stub(struct Context* context) {
+  printf("test2_stub\n");
+  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
+  goto rbTreeTest2(context,tree);
+}
+
+
+__code rbTreeTest3(struct Tree* tree) {
+  printf("test3\n");
+  Node* node = new Node();
+  node->value = (union Data*)new Int();
+  node->value->Int = 2;
+  node->key = 2;
+  goto tree->put(node, rbTreeTest4);
+}
+
+__code rbTreeTest3_stub(struct Context* context) {
+  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
+  goto rbTreeTest3(context,tree);
+}
+
+__code rbTreeTest4(struct Tree* tree) {
+  printf("test4\n");
+  Node* node = new Node();
+  node->value = (union Data*)new Int();
+  node->value->Int = 8;
+  node->key = 8;
+  goto tree->put(node, rbTreeTest5);
+}
+
+__code rbTreeTest4_stub(struct Context* context) {
+  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
+  goto rbTreeTest4(context,tree);
+}
+
+__code rbTreeTest5(struct Tree* tree) {
+  printf("test5\n");
+  Node* node = new Node();
+  node->value = (union Data*)new Int();
+  node->value->Int = 8;
+  node->key = 8;
+  goto tree->remove(node,exit_code);
+}
+
+__code rbTreeTest5_stub(struct Context* context) {
+  Tree* tree = (struct Tree*)Gearef(context, Tree)->tree;
+  goto rbTreeTest5(context,tree);
+}
+
+
+
+int main(int argc, char const* argv[]) {
+  printf("test_main\n");
+  goto rbTreeTest1();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/test/stack_test.cbc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/test/stack_test.cbc	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,54 @@
+#include "../../context.h"
+#interface "Stack.h"
+#include <assert.h>
+// use "TaskManager.h"
+
+__code stackTest1(struct Stack* stack) {
+    Node* node = new Node();
+    node->color = Red;
+    goto stack->push(node, stackTest2);
+}
+
+__code stackTest1_stub(struct Context* context) {
+    Stack* stack = createSingleLinkedStack(context);
+    goto stackTest1(context, stack);
+}
+
+__code stackTest2(struct Stack* stack) {
+    Node* node = new Node();
+    node->color = Black;
+    goto stack->push(node, stackTest3);
+}
+
+__code stackTest2_stub(struct Context* context) {
+    SingleLinkedStack* singleLinkedStack = (SingleLinkedStack*)GearImpl(context, Stack, stack);
+    assert(singleLinkedStack->top->data->Node.color == Red);
+    Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
+    goto stackTest2(context, stack);
+}
+
+__code stackTest3(struct Stack* stack) {
+    goto stack->pop(assert3);
+}
+
+__code stackTest3_stub(struct Context* context) {
+    /*
+        assert on stack implementation
+    */
+    SingleLinkedStack* singleLinkedStack = (SingleLinkedStack*)GearImpl(context, Stack, stack);
+    assert(singleLinkedStack->top->data->Node.color == Black);
+    Stack* stack = (struct Stack*)Gearef(context, Stack)->stack;
+    goto stackTest3(context, stack);
+} 
+
+__code assert3(struct Node* node, struct Stack* stack) {
+    /*
+        assert in normal level
+    */
+    assert(node->color == Red);
+    goto exit_code(0);
+}
+
+int main(int argc, char const* argv[]) {
+    goto stackTest1();
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/tmp_tool/_orig_context.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/tmp_tool/_orig_context.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,454 @@
+/* Context definition for llrb example */
+#ifndef CONTEXT_H
+#define CONTEXT_H
+#include <stdlib.h>
+#include <pthread.h>
+#ifdef USE_CUDAWorker
+#include <cuda.h>
+#include <driver_types.h>
+#include <cuda_runtime.h>
+#include "helper_cuda.h"
+#endif
+
+#define ALLOCATE_SIZE 20000000
+#define NEW(type) (type*)(calloc(1, sizeof(type)))
+#define NEWN(n, type) (type*)(calloc(n, sizeof(type)))
+
+#define ALLOC_DATA(context, dseg) ({\
+    Meta* meta = (Meta*)context->heap;\
+    meta->type = D_##dseg;\
+    meta->size = sizeof(dseg);\
+    meta->len = 1;\
+    context->heap += sizeof(Meta);\
+    context->data[D_##dseg] = context->heap; context->heap += sizeof(dseg); (dseg *)context->data[D_##dseg]; })
+
+#define ALLOC_DATA_TYPE(context, dseg, t) ({\
+    Meta* meta = (Meta*)context->heap;\
+    meta->type = D_##t;\
+    meta->size = sizeof(t);\
+    meta->len = 1;\
+    context->heap += sizeof(Meta);\
+    context->data[D_##dseg] = context->heap; context->heap += sizeof(t); (t *)context->data[D_##dseg]; })
+
+#define ALLOCATE(context, t) ({ \
+    Meta* meta = (Meta*)context->heap;\
+    context->heap += sizeof(Meta);\
+    union Data* data = context->heap; \
+    context->heap += sizeof(t); \
+    meta->type = D_##t; \
+    meta->size = sizeof(t);     \
+    meta->len = 1;\
+    data; })
+
+#define ALLOCATE_ARRAY(context, t, length) ({ \
+    Meta* meta = (Meta*)context->heap;\
+    context->heap += sizeof(Meta);\
+    union Data* data = context->heap; \
+    context->heap += sizeof(t)*length; \
+    meta->type = D_##t; \
+    meta->size = sizeof(t)*length; \
+    meta->len = length; \
+    data;   })
+
+#define ALLOCATE_PTR_ARRAY(context, dseg, length) ({\
+    Meta* meta = (Meta*)context->heap;\
+    context->heap += sizeof(Meta);\
+    union Data* data = context->heap; \
+    context->heap += sizeof(dseg *)*length; \
+    meta->type = D_##dseg; \
+    meta->size = sizeof(dseg *)*length; \
+    meta->len = length; \
+    data; })
+
+#define ALLOCATE_DATA_GEAR(context, t) ({ \
+        union Data* data = ALLOCATE(context, t); \
+        Meta* meta = GET_META(data); \
+        meta->wait = createSynchronizedQueue(context); \
+        data; })
+
+#define ALLOC(context, t) (&ALLOCATE(context, t)->t)
+
+#define GET_META(dseg) ((Meta*)(((void*)dseg) - sizeof(Meta)))
+#define GET_TYPE(dseg) (GET_META(dseg)->type)
+#define GET_SIZE(dseg) (GET_META(dseg)->size)
+#define GET_LEN(dseg) (GET_META(dseg)->len)
+#define GET_WAIT_LIST(dseg) (GET_META(dseg)->wait)
+
+#define Gearef(context, t) (&(context)->data[D_##t]->t)
+
+// (SingleLinkedStack *)context->data[D_Stack]->Stack.stack->Stack.stack
+
+#define GearImpl(context, intf, name) (Gearef(context, intf)->name->intf.name)
+
+#include "c/enumCode.h"
+
+enum Relational {
+    EQ,
+    GT,
+    LT,
+};
+
+#include "c/enumData.h"
+
+struct Context {
+    enum Code next;
+    struct Worker* worker;
+    struct TaskManager* taskManager;
+    int codeNum;
+    __code (**code) (struct Context*);
+    union Data **data;
+    void* heapStart;
+    void* heap;
+    long heapLimit;
+    int dataNum;
+
+    // task parameter
+    int idgCount; //number of waiting dataGear
+    int idg;
+    int maxIdg;
+    int odg;
+    int maxOdg;
+    int gpu; // GPU task
+    struct Context* task;
+    struct Element* taskList;
+#ifdef USE_CUDAWorker
+    int num_exec;
+    CUmodule module;
+    CUfunction function;
+#endif
+    /* multi dimension parameter */
+    int iterate;
+    struct Iterator* iterator;
+    enum Code before;
+};
+
+typedef int Int;
+#ifndef USE_CUDAWorker
+typedef unsigned long long CUdeviceptr;
+#endif
+union Data {
+    struct Meta {
+        enum DataType type;
+        long size;
+        long len;
+        struct Queue* wait; // tasks waiting this dataGear
+    } Meta;
+    struct Context Context;
+    struct Timer {
+        union Data* timer;
+        enum Code start;
+        enum Code end;
+        enum Code next;
+    } Timer;
+    struct TimerImpl {
+        double time;
+    } TimerImpl;
+    struct LoopCounter {
+        int i;
+    } LoopCounter;
+    struct TaskManager {
+        union Data* taskManager;
+        enum Code spawn;      // start NEW context on the worker
+        enum Code spawnTasks; // start NEW tasks on the worker
+        enum Code shutdown;
+        enum Code incrementTaskCount;
+        enum Code decrementTaskCount;
+        enum Code next;
+        enum Code next1;
+        enum Code setWaitTask;
+        struct Context* task;
+        struct Element* taskList;
+        union Data* data;
+    } TaskManager;
+    struct TaskManagerImpl {
+        enum Code next;
+        int numWorker;
+        int sendCPUWorkerIndex;
+        int sendGPUWorkerIndex;
+        int taskCount;
+        pthread_mutex_t mutex;
+        struct Queue* activeQueue;
+        struct Worker** workers;
+        struct Element* taskList;
+        int loopCounter;
+        int cpu;
+        int gpu;
+        int io;
+        int maxCPU;
+    } TaskManagerImpl;
+    struct Worker {
+        union Data* worker;
+        enum Code taskReceive;
+        enum Code shutdown;
+        enum Code next;
+        struct Queue* tasks;
+        pthread_t thread;
+        struct TaskManager* taskManager;
+        struct Context* task;
+    } Worker;
+    struct CPUWorker {
+        pthread_mutex_t mutex;
+        pthread_cond_t cond;
+        struct Context* context;
+        int id;
+        int loopCounter;
+    } CPUWorker;
+#ifdef USE_CUDAWorker
+    struct CUDAWorker {
+        CUdevice device;
+        CUcontext cuCtx;
+        struct Context* context;
+        int id;
+        int loopCounter;
+        int deviceNum;
+        struct Queue* tasks;
+        int runFlag;
+        enum Code next;
+        int numStream;
+        struct Executor* executor;
+        CUstream *stream;
+    } CUDAWorker;
+#else
+    struct CUDAWorker {
+    } CUDAWorker;
+#endif
+    struct Main {
+        enum Code code;
+        enum Code next;
+        struct Queue* args;
+    } Main;
+    // Queue Interface
+    struct Queue {
+        union Data* queue;
+        union Data* data;
+        enum Code whenEmpty;
+        enum Code clear;
+        enum Code put;
+        enum Code take;
+        enum Code isEmpty;
+        enum Code next;
+    } Queue;
+    struct SingleLinkedQueue {
+        struct Element* top;
+        struct Element* last;
+    } SingleLinkedQueue;
+    struct SynchronizedQueue {
+        struct Element* top;
+        struct Element* last;
+        struct Atomic* atomic;
+    } SynchronizedQueue;
+    // Stack Interface
+    struct Stack {
+        union Data* stack;
+        union Data* data;
+        union Data* data1;
+        enum Code whenEmpty;
+        enum Code clear;
+        enum Code push;
+        enum Code pop;
+        enum Code pop2;
+        enum Code isEmpty;
+        enum Code get;
+        enum Code get2;
+        enum Code next;
+    } Stack;
+    // Stack implementations
+    struct SingleLinkedStack {
+        struct Element* top;
+    } SingleLinkedStack;
+    struct ArrayStack {
+        int size;
+        int limit;
+        struct Element* array;
+    } ArrayStack;
+    // Stack implementation end
+    struct Element {
+        union Data* data;
+        struct Element* next;
+    } Element;
+    struct Array {
+        int prefix;
+        Int* array;
+    } Array;
+    struct Tree {
+        union Data* tree;
+        struct Node* node;
+        enum Code put;
+        enum Code get;
+        enum Code remove;
+        enum Code clear;
+        enum Code next;
+    } Tree;
+    struct RedBlackTree {
+        struct Node* root;
+        struct Node* current; // reading node of original tree
+        struct Node* previous; // parent of reading node of original tree
+        struct Node* newNode; // writing node of new tree
+        struct Node* parent;
+        struct Node* grandparent;
+        struct Stack* nodeStack;
+        enum Code findNodeNext;
+        int result;
+    } RedBlackTree;
+    struct RotateTree {
+        enum Code next;
+        struct RedBlackTree* traverse;
+        struct Tree* tree;
+    } RotateTree;
+    struct Node {
+        int key; // comparable data segment
+        union Data* value;
+        struct Node* left;
+        struct Node* right;
+        // need to balancing
+        enum Color {
+            Red,
+            Black,
+            // Red eq 0,Black eq 1. enum name convert intager.
+        } color;
+    } Node;
+    struct Atomic {
+        union Data* atomic;
+        union Data** ptr;
+        union Data* oldData;
+        union Data* newData;
+        enum Code checkAndSet;
+        enum Code next;
+        enum Code fail;
+    } Atomic;
+    struct AtomicReference {
+    } AtomicReference;
+    struct Semaphore {
+        union Data* semaphore;
+        enum Code p;
+        enum Code v;
+        enum Code next;
+    } Semaphore;
+    struct SemaphoreImpl {
+        int value;
+        struct Lock* lock;
+        struct Queue* waitThreadQueue;
+    } SemaphoreImpl;
+    struct Allocate {
+        enum Code next;
+        long size;
+    } Allocate;
+    struct Integer {
+        int value;
+    } Integer;
+    struct SortArray {
+        union Data* sortArray;
+        struct Integer *array; //Array arrayじゃできない？
+        int loopCounter;
+        int block;
+        int first;
+        int prefix;
+    } SortArray;
+    struct Iterator {
+        union Data* iterator;
+        struct Context* task;
+        int numGPU;
+        enum Code exec;
+        enum Code barrier;
+        enum Code whenWait;
+        enum Code next;
+    } Iterator;
+    struct MultiDimIterator {
+        int x;
+        int y;
+        int z;
+        int count;
+        int counterX;
+        int counterY;
+        int counterZ;
+    } MultiDimIterator;
+    struct MultiDim {
+        int x;
+        int y;
+        int z;
+    } MultiDim;
+    struct Executor {
+        union Data* executor;
+        struct Context* task;
+        enum Code read;
+        enum Code exec;
+        enum Code write;
+        enum Code next;
+    } Executor;
+#ifdef USE_CUDAWorker
+    struct CUDAExecutor {
+        CUdeviceptr** kernelParams;
+        struct CUDABuffer* buffer;
+        int maxThreadPerBlock;
+        int maxThreadPerBlockX;
+        int maxThreadPerBlockY;
+        int maxThreadPerBlockZ;
+        struct Timer* timer;
+    } CUDAExecutor;
+    struct CUDABuffer {
+        int inputLen;
+        int outputLen;
+        union Data** inputData;
+        union Data** outputData;
+    } CUDABuffer;
+    CUdeviceptr CUdeviceptr;
+#else
+    struct CUDAExecutor {
+    } CUDAExecutor;
+    struct CUDABuffer {
+    } CUDABuffer;
+    CUdeviceptr CUdeviceptr;
+#endif
+    Int Int;
+    struct Memory {
+        union Data* adr;
+        int length;
+        union Data* body;
+        int hash;
+    } Memory;
+    struct Buffer {
+        union Data* buffer;
+        union Data* data;
+        enum Code put;
+        enum Code take;
+        enum Code next;
+    } Buffer;
+    struct BoundedBuffer {
+        struct Element* top;
+        struct Element* last;
+        struct Semaphore* fullCount;
+        struct Semaphore* emptyCount;
+        struct Semaphore* lock;
+    } BoundedBuffer;
+    struct Lock {
+        union Data* lock;
+        enum Code doLock;
+        enum Code doUnlock;
+        enum Code next;
+    } Lock;
+    struct LockImpl {
+        Int* lock;
+        struct Queue* waitThreadQueue;
+        struct Atomic* atomic;
+        struct Context* lockContext;
+    } LockImpl;
+    struct SpinLock {
+        volatile Int* lock;
+        struct Atomic* atomic;
+        struct Context* lockContext;
+    } SpinLock;
+}; // union Data end       this is necessary for context generator
+typedef union Data Data;
+
+#include "c/typedefData.h"
+
+#include "c/extern.h"
+
+extern __code start_code(struct Context* context);
+extern __code exit_code(struct Context* context);
+extern __code meta(struct Context* context, enum Code next);
+//extern __code par_meta(struct Context* context, enum Code spawns, enum Code next);
+extern __code parGotoMeta(struct Context* context, enum Code next);
+extern void initContext(struct Context* context);
+
+#endif
diff -r a4cab67624f7 -r 9146d6017f18 src/tmp_tool/parse_cerate_each_context.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/tmp_tool/parse_cerate_each_context.pl	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,147 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use DDP {deparse => 1};
+
+my @already_defined = qw/
+Atomic
+Buffer
+CodeGear
+Executor
+Iterator
+Lock
+Queue
+Semaphore
+Stack
+TaskManager
+Timer
+Tree
+Worker
+SingleLinkedStack
+SortArray
+/;
+
+my %already_defined_hash;
+map { $already_defined_hash{$_}++ } @already_defined;
+
+my $context = shift // "context.h";
+
+open my $fh, '<', $context;
+while (my $line = <$fh>) {
+  if ($line =~ /^union Data \{/) {
+    last;
+  }
+}
+
+my @context_cg_str = <$fh>;
+close($fh);
+chomp @context_cg_str;
+my $res = {};
+
+while (my $line = shift @context_cg_str) {
+  if ($line =~ /\s*struct\s*(\w+)\s*\{/) {
+    my $struct = $1;
+    if (exists $already_defined_hash{$struct}) {
+      next;
+    }
+    $line = shift @context_cg_str;
+    while ($line !~  /\}\s*$struct/) {
+        $line =~ s/\s+([\*\w ]+);/$1/;
+        push (@{$res->{$struct}},$line);
+        $line = shift @context_cg_str ;
+    }
+    unless (defined $res->{$struct}) {
+        push (@{$res->{$struct}},"");
+    }
+  }
+}
+
+map { print "$_\n" } keys %$res;
+my %impl2inter = (
+  SpinLock          => "Lock",
+  CUDAWorker        => "Worker",
+  RedBlackTree      => "Tree",
+  AtomicReference   => "Atomic",
+  CPUWoker          => "Woker",
+  MultiDimIterator  => "Iterator",
+  CUDAExecutor      => "Executor",
+  SingleLinkedStack => "Stack",
+  SingleLinkedQueue => "Queue",
+  SynchronizedQueue => "Queue",
+);
+
+for my $dg_name (keys %$res) {
+  if ($dg_name =~ /(\w+)Impl/) {
+      create_impl_file($dg_name,$res->{$dg_name},$1);
+      next;
+  }
+
+  if (exists $impl2inter{$dg_name}) {
+      create_impl_file($dg_name,$res->{$dg_name},$impl2inter{$dg_name});
+      next;
+  }
+  create_inter_file($dg_name,$res->{$dg_name});
+}
+
+sub create_impl_file {
+  my ($name, $contents,$interface) = @_;
+  my $str = "typedef struct $name <Type, Isa> impl $interface {\n";
+  create_file("impl/$name.h",$contents,$str,$name);
+}
+
+sub create_inter_file {
+  my ($name, $contents) = @_;
+  my $str = "typedef struct $name <Type, Impl> {\n";
+  create_file("interface/$name.h",$contents,$str,$name);
+}
+
+sub create_file {
+  my ($file_name, $contents, $str, $name) = @_;
+  my $space = "  ";
+  for my $co (@$contents) {
+    if ($co =~ /enum\s*Code\s*(\w+)/) {
+      $str .= "${space}__code $1(...);\n";
+      next;
+    }
+    chomp $co;
+    $str .= "${space}$co;\n";
+  }
+  open my $fh, '>', "$ENV{PWD}/plautogen/$file_name" or die "oops! $file_name\n";
+  print $fh $str;
+  print $fh "} $name;\n";
+  close $fh;
+}
+
+sub print_impl {
+  my ($out, $name, $cg_info) = @_;
+  print $out "typedef strcut $name<Impl, Bot> {\n";
+}
+
+__DATA__
+SpinLock
+Main
+CUDAExecutor
+TaskManagerImpl
+LockImpl
+MultiDim
+SynchronizedQueue
+ArrayStack
+LoopCounter
+TimerImpl
+Node
+CUDAWorker
+Memory
+SemaphoreImpl
+BoundedBuffer
+RotateTree
+CUDABuffer
+Array
+Allocate
+Meta
+SingleLinkedQueue
+CPUWorker
+Integer
+MultiDimIterator
+Element
+RedBlackTree
+
diff -r a4cab67624f7 -r 9146d6017f18 src/trans_impl.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/trans_impl.pl	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,180 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+use Gears::Util;
+
+use Getopt::Std;
+
+my %opt;
+getopts("w" => \%opt);
+
+my $impl_file = shift or die 'require impl file';
+my $impl_ir         = Gears::Util->parse_with_rewrite($impl_file);
+my $interface_file  = Gears::Util->find_header($impl_ir->{isa},"$FindBin::Bin");
+
+my $inter_ir        = Gears::Util->parse_with_rewrite($interface_file);
+
+
+my $output_file = $impl_file;
+$output_file =~ s/\.h/.cbc/;
+open my $fh, '>', $output_file;
+my $stdout    = $fh;
+
+unless ($opt{w}) {
+    $stdout    = *STDOUT;
+}
+
+emit_include_part($stdout, $inter_ir->{name});
+emit_impl_header_in_comment($stdout, $impl_file);
+emit_constracutor($stdout,$impl_ir,$inter_ir);
+emit_code_gears($stdout,$impl_ir,$inter_ir);
+close $fh;
+
+sub emit_include_part {
+  my ($out, $interface) = @_;
+  print $out <<"EOF"
+#include "../context.h";
+#interface "$interface.h";
+
+EOF
+}
+
+sub emit_impl_header_in_comment {
+  my ($out, $impl_file) = @_;
+  my $line =  Gears::Util->slup($impl_file);
+  print $out "// ----\n";
+  map { print $out "// $_\n" } split /\n/, $line;
+  print $out "// ----\n\n";
+}
+
+sub emit_constracutor {
+  my ($out, $impl_ir, $inter_ir) = @_;
+
+  my @inter_data     = @{$inter_ir->{data}};
+  my @impl_data      = @{$impl_ir->{data}};
+  my $instance_inter = shift @inter_data;
+
+  if ($instance_inter =~ /union\s*Data\*\s*(\w+)/) {
+    $instance_inter = $1;
+  }
+
+  my $instance_impl  = lcfirst $impl_ir->{name};
+  $instance_impl     =~ s/([A-Z])/_\l$1/g;
+
+  print $out <<"EOF";
+$impl_ir->{isa}* create$impl_ir->{name}(struct Context* context) {
+    struct $impl_ir->{isa}* $instance_inter  = new $impl_ir->{isa}();
+    struct $impl_ir->{name}* $instance_impl = new $impl_ir->{name}();
+    $instance_inter->$instance_inter = (union Data*)$instance_impl;
+EOF
+
+  for my $datum (@impl_data) {
+        if ($datum =~ /\w+ \w+\* (\w+)/) {
+            print $out "    ${instance_impl}->$1 = NULL;\n";
+            next;
+        }
+        if ($datum =~ /\w+ \w+ (\w+)/) {
+            print $out "    ${instance_impl}->$1 = 0;\n";
+        }
+  }
+
+  for my $datum (@inter_data) {
+        if ($datum =~ /\w+ \w+\* (\w+)/) {
+            print $out "    ${instance_inter}->$1 = NULL;\n";
+            next;
+        }
+        if ($datum =~ /\w+ \w+ (\w+)/) {
+            print $out "    ${instance_inter}->$1 = 0;\n";
+        }
+  }
+
+  for my $code (@{$inter_ir->{codes}}) {
+      my $code_gear = $code->{name};
+      print $out "    ${instance_inter}->$code_gear = C_$code_gear$impl_ir->{name};\n"
+  }
+
+print $out "    return $instance_inter;\n";
+print $out "}\n";
+}
+
+
+sub emit_code_gears {
+  my ($out, $impl_ir, $inter_ir) = @_;
+  my $impl = $impl_ir->{name};
+  my $interface_name = $inter_ir->{name};
+
+  my @inter_data = @{$inter_ir->{data}};
+  my $instance_inter = shift @inter_data;
+  if ($instance_inter =~ /union\s*Data\*\s*(\w+)/) {
+    $instance_inter = $1;
+  }
+  my $instance_impl = lcfirst $impl_ir->{name};
+  $instance_impl =~ s/([A-Z])/_\l$1/g;
+  my $data_gear_types = {};
+
+  if (defined $impl_ir->{codes}) {
+    for my $cg (@{$impl_ir->{codes}}) {
+      my $data_gears = $cg->{args};
+      while ($data_gears =~ /Type\*\s*(\w+),/g) {
+          $data_gears =~ s/Type\*/struct $impl*/;
+      }
+
+      while ($data_gears =~ /Isa\*\s*(\w+),/g) {
+          $data_gears =~ s/Isa\*/struct $interface_name*/;
+      }
+      print $out "__code $cg->{name}$impl(";
+      print $out "$data_gears) {\n\n";
+
+      #__code next(...), __code whenEmpty(...)
+      my @cg = ();
+      while ($data_gears =~ /__code ([\w(\.)\*\s,]+?\)),?/g) {
+        push(@cg, $1);
+      }
+
+      if (@cg) {
+        if (@cg == 2) {
+          print $out "  if (:TODO:) {\n";
+          print $out "       goto ",shift(@cg),";\n";
+          print $out "  }\n";
+          print $out "  goto ",shift(@cg),";\n";
+        } else {
+          print $out "  goto ",shift(@cg),";\n";
+        }
+      }
+      print $out "}\n\n";
+    }
+  }
+
+  for my $code_ir (@{$inter_ir->{codes}}) {
+    my $data_gears = $code_ir->{args};
+    $data_gears =~ s/Impl/struct $impl/g;
+
+    while ($data_gears =~ /Type\*\s*(\w+),/g) {
+        $data_gears =~ s/Type\*/struct $interface_name*/;
+    }
+
+    print $out "__code $code_ir->{name}$impl(";
+    print $out "$data_gears) {\n\n";
+
+    #__code next(...), __code whenEmpty(...)
+    my @cg = ();
+    while ($data_gears =~ /__code ([\w(\.)\*\s,]+?\)),?/g) {
+      push(@cg, $1);
+    }
+
+    if (@cg) {
+      if (@cg == 2) {
+        print $out "  if (:TODO:) {\n";
+        print $out "       goto ",shift(@cg),";\n";
+        print $out "  }\n";
+        print $out "  goto ",shift(@cg),";\n";
+      } else {
+        print $out "  goto ",shift(@cg),";\n";
+      }
+    }
+    print $out "}\n\n";
+  }
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/update_context.pl
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/update_context.pl	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,72 @@
+#!/usr/bin/env perl
+use strict;
+use warnings;
+use Getopt::Std;
+
+use FindBin;
+use lib "$FindBin::Bin/lib";
+use Gears::Util;
+
+my %opt;
+getopts("wc" => \%opt);
+
+my $interface_file = shift or die "require itnerface file";
+my $h2context = Gears::Util->parse_interface($interface_file);
+
+my $context = Gears::Util->h2context_str($h2context);
+
+if ($opt{c}) {
+  print "$context";
+  exit 0;
+}
+
+my ($first,$last) = slup_context_h($h2context->{name});
+
+if ($opt{w}) {
+  context_write(@{$first},$context,@{$last});
+} else {
+  context_dump(@{$first},$context,@{$last});
+}
+
+
+sub slup_context_h {
+  open my $fh, '<', 'context.h';
+  
+  my $data_gear_name = shift;
+
+  my @first_context_headers = ();
+  my @last_context_headers = ();
+  
+  while (my $line = <$fh>) {
+    if ( $line =~ /union Data end/) {
+      push(@last_context_headers, $line);
+      push(@last_context_headers, <$fh>);
+      last;
+    }
+    if ( $line =~ /struct $data_gear_name/) {
+      print "WARN! $data_gear_name struct already exists\n";
+      exit 1;
+    }
+    push(@first_context_headers, $line);
+  }
+  
+  close $fh;
+  
+  #print "@first_context_headers\n";
+  #print "@last_context_headers\n";
+  return (\@first_context_headers,\@last_context_headers);
+}
+
+sub context_dump {
+  for my $line (@_) {
+    print "$line";
+  }
+}
+
+sub context_write {
+  open my $fh, '>', "context.h";
+  for my $line (@_) {
+    print $fh "$line";
+  }
+  close $fh;
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/verifier/llrbContextWithVerifier.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/verifier/llrbContextWithVerifier.c	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,46 @@
+#include <stdio.h>
+#include "llrbContextWithVerifier.h"
+
+unsigned int min_height(struct Node* node, unsigned int height) {
+    if ((node->left == NULL) && (node->right == NULL)) return height;
+    if (node->left  == NULL) return min_height(node->right, height+1);
+    if (node->right == NULL) return min_height(node->left, height+1);
+
+    unsigned int left_min  = min_height(node->left, height+1);
+    unsigned int right_min = min_height(node->right, height+1);
+
+    if (left_min < right_min) {
+        return left_min;
+    } else {
+        return right_min;
+    }
+}
+
+unsigned int max_height(struct Node* node, unsigned int height) {
+    if ((node->left == NULL) && (node->right == NULL)) return height;
+    if (node->left  == NULL) return max_height(node->right, height+1);
+    if (node->right == NULL) return max_height(node->left, height+1);
+
+    unsigned int left_max  = max_height(node->left, height+1);
+    unsigned int right_max = max_height(node->right, height+1);
+
+    if (left_max > right_max) {
+        return left_max;
+    } else {
+        return right_max;
+    }
+}
+
+void verify_tree_height(struct Node* root) {
+    if (root == NULL) return;
+
+    unsigned int min_h = min_height(root, 1);
+    unsigned int max_h = max_height(root, 1);
+
+    if (max_h >= 2*min_h) {
+        printf("llrb-condition violated.\n");
+        printf("\tmin-height %u", min_h);
+        printf("\tmax-height %u", max_h);
+        exit(EXIT_FAILURE);
+    }
+}
diff -r a4cab67624f7 -r 9146d6017f18 src/verifier/llrbContextWithVerifier.h
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/verifier/llrbContextWithVerifier.h	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,5 @@
+#include "llrbContext.h"
+
+unsigned int min_height(struct Node* node, unsigned int height);
+unsigned int max_height(struct Node* node, unsigned int height);
+void verify_tree_height(struct Node* root);
diff -r a4cab67624f7 -r 9146d6017f18 src/verifier/verify_put_cs.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/verifier/verify_put_cs.c	Thu Jan 16 15:12:06 2020 +0900
@@ -0,0 +1,29 @@
+/* Verification of LLRB-Tree height in put operations.
+ * LLRB-Tree allows (max-height) <= 2*(min-height).
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include "llrbContextWithVerifier.h"
+
+__code meta(struct Context* context, enum Code next) {
+    if (next == Put) {
+        verify_tree_height(context->data[Tree]->tree.root);
+    }
+    goto (context->code[next])(context);
+}
+
+__code start_code(struct Context* context, enum Code next) {
+    unsigned int seed = (unsigned int)time(NULL);
+
+    printf("--- srand(%u)\n", seed);
+    goto meta(context, next);
+}
+
+__code exit_code(struct Context* context) {
+    free(context->code);
+    free(context->data);
+    free(context->heapStart);
+    goto exit(0);
+}