changeset 589:a4cab67624f7

remove old file
author anatofuz <anatofuz@cr.ie.u-ryukyu.ac.jp>
date Thu, 16 Jan 2020 15:11:11 +0900
parents 78e10562b210
children 9146d6017f18
files src/CMakeLists.txt src/include/allocate.h src/include/context.h src/include/origin_cs.h src/llrb/CMakeLists.txt src/llrb/allocate.c src/llrb/compare.c src/llrb/include/llrbContext.h src/llrb/include/stack.h src/llrb/llrb.c src/llrb/llrbContext.c src/llrb/main.c src/llrb/origin_cs.c src/llrb/stack.c src/test/CMakeLists.txt src/test/Cudasample_cpu.cu src/test/Cudasample_gpu.cu src/test/OpenCL_gpu.c src/test/helper_cuda.h src/test/helper_string.h src/test/main.cu src/test/multiply.cu src/test/test.c src/test/twice.cc src/test/vectorAddDrv.cc src/test/vectorAdd_kernel.cu
diffstat 26 files changed, 0 insertions(+), 4301 deletions(-) [+]
line wrap: on
line diff
--- a/src/CMakeLists.txt	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,18 +0,0 @@
-cmake_minimum_required(VERSION 3.3)
-
-# output compile log
-set(CMAKE_VERBOSE_MAKEFILE 1)
-
-# set compiler
-set(CMAKE_C_COMPILER $ENV{CBC_COMPILER})
-
-# compile option
-add_definitions("-Wall -g -O0")
-
-include_directories(include)
-include_directories($ENV{CUDA_PATH})
-add_subdirectory(allocate)
-add_subdirectory(list)
-add_subdirectory(llrb)
-add_subdirectory(synchronizedQueue)
-add_subdirectory(parallel_execution)
--- a/src/include/allocate.h	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,13 +0,0 @@
-__code allocate();
-__code meta_allocate();
-
-__code allocate(struct Context* context) {
-    goto meta_allocate(context);
-}
-
-__code meta_allocate(struct Context* context) {
-    context->data[++context->dataNum] = context->heap;
-    context->heap += context->data[0]->allocate.size;
-
-    goto meta(context, context->next[--context->current]);
-}
--- a/src/include/context.h	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-typedef struct Context_st {
-    void* ds;
-    void* mds;
-    void* ds_heap;
-    void* mds_heap;
-    __code (*next)();
-    void* head;
-} Context;
--- a/src/include/origin_cs.h	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,3 +0,0 @@
-extern __code start_code(struct Context* context, enum Code next);
-extern __code exit_code(struct Context* context);
-extern __code meta(struct Context* context, enum Code next);
--- a/src/llrb/CMakeLists.txt	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,28 +0,0 @@
-cmake_minimum_required(VERSION 2.8)
-
-add_definitions("-Wall -g -O0")
-
-set(CMAKE_C_COMPILER $ENV{CbC_Clang}/clang)
-
-include_directories(include)
-add_executable(llrb
-               main.c
-               llrb.c
-               llrbContext.c
-               allocate.c
-               compare.c
-               stack.c
-               origin_cs.c
-)
-
-
-add_executable(llrb_with_put_verify
-               main.c
-               llrb.c
-               llrbContext.c
-               allocate.c
-               compare.c
-               stack.c
-               verifier/llrbContextWithVerifier.c
-               verifier/verify_put_cs.c
-)
--- a/src/llrb/allocate.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,6 +0,0 @@
-#include "llrbContext.h"
-
-void allocator(struct Context* context) {
-    context->data[++context->dataNum] = context->heap;
-    context->heap += context->data[Allocate]->allocate.size;
-}
--- a/src/llrb/compare.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,11 +0,0 @@
-#include "llrbContext.h"
-
-void compare(struct Context* context, struct Tree* tree, int key1, int key2) {
-    if (key1 == key2) {
-        tree->result = EQ;
-    } else if (key1 < key2) {
-        tree->result = GT;
-    } else {
-        tree->result = LT;
-    }
-}
--- a/src/llrb/include/llrbContext.h	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-/* Context definition for llrb example */
-#include "stack.h"
-
-#define ALLOCATE_SIZE 1000
-
-enum Code {
-    Code1,
-    Code2,
-    Code3,
-    Code4,
-    Code5,
-    Find,
-    Not_find,
-    Code6,
-    Allocator,
-    Put,
-    Replace,
-    Insert,
-    Compare,
-    RotateL,
-    RotateR,
-    SetTree,
-    InsertCase1,
-    InsertCase2,
-    InsertCase3,
-    InsertCase4,
-    InsertCase4_1,
-    InsertCase4_2,
-    InsertCase5,
-    StackClear,
-    Get,
-    Search,
-    Delete,
-    Delete1,
-    Delete2,
-    Delete3,
-    Replace_d1,
-    Replace_d2,
-    FindMax1,
-    FindMax2,
-    DeleteCase1,
-    DeleteCase2,
-    DeleteCase3,
-    DeleteCase4,
-    DeleteCase5,
-    DeleteCase6,
-    Exit,
-};
-
-enum Relational {
-    EQ,
-    GT,
-    LT,
-};
-
-enum UniqueData {
-    Allocate,
-    Tree,
-    Node,
-};
-
-struct Context {
-    enum Code next;
-    int codeNum;
-    __code (**code) (struct Context*);
-    void* heapStart;
-    void* heap;
-    long heapLimit;
-    int dataNum;
-    stack_ptr code_stack;
-    stack_ptr node_stack;
-    union Data **data;
-};
-
-union Data {
-    struct Comparable { // inteface
-        enum Code compare;
-        union Data* data;
-    } compare;
-    struct Count {
-        enum Code next;
-        long i;
-    } count;
-    struct Tree {
-        enum Code next;
-        struct Node* root;
-        struct Node* current;
-        struct Node* deleted;
-        int result;
-    } tree;
-    struct Node {
-        // need to tree
-        enum Code next;
-        int key; // comparable data segment
-        int value;
-        struct Node* left;
-        struct Node* right;
-        // need to balancing
-        enum Color {
-            Red,
-            Black,
-        } color;
-    } node;
-    struct Allocate {
-        enum Code next;
-        long size;
-    } allocate;
-};
--- a/src/llrb/include/stack.h	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,16 +0,0 @@
-#include <stdlib.h>
-
-typedef struct {
-    size_t size;
-    int max;
-    int num;
-    void* data;
-} stack, *stack_ptr;
-
-extern stack_ptr stack_init();
-extern stack_ptr stack_realloc();
-extern void stack_free();
-extern int stack_push();
-extern int stack_pop();
-extern int isMax();
-extern int isEmpty();
--- a/src/llrb/llrb.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,669 +0,0 @@
-#include <stdio.h>
-
-#include "llrbContext.h"
-#include "origin_cs.h"
-
-extern void allocator(struct Context* context);
-extern void compare(struct Context* context, struct Tree* tree, int key1, int key2);
-
-extern int num;
-
-__code put(struct Context* context, struct Tree* tree, struct Node* root, struct Allocate* allocate) {
-    allocate->size = sizeof(struct Node);
-    allocator(context);
-    
-    stack_push(context->code_stack, &context->next);
-
-    context->next = StackClear;
-    stack_push(context->code_stack, &context->next);
-    
-    tree->root = &context->data[context->dataNum]->node;
-    
-    if (root) {
-        tree->current = root;
-        compare(context, tree, tree->current->key, context->data[Node]->node.key);
-
-        goto meta(context, Replace);
-    }
-
-    goto meta(context, Insert);
-}
-
-__code put_stub(struct Context* context) {
-    goto put(context, &context->data[Tree]->tree, context->data[Tree]->tree.root, &context->data[Allocate]->allocate);
-}
-
-__code replaceNode(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode, int result) {
-    *newNode = *oldNode;
-    stack_push(context->node_stack, &newNode);
-
-    if (result == EQ) {
-        newNode->value = context->data[Node]->node.value;
-
-        stack_pop(context->code_stack, &context->next);
-        goto meta(context, context->next);
-    } else if (result == GT) {
-        tree->current = oldNode->right;
-        newNode->right = context->heap;
-    } else {
-        tree->current = oldNode->left;
-        newNode->left = context->heap;
-    }
-
-    context->data[Allocate]->allocate.size = sizeof(struct Node);
-    allocator(context);
-
-    if (tree->current) {
-        compare(context, tree, tree->current->key, context->data[Node]->node.key);
-        goto meta(context, Replace);
-    }
-    
-    goto meta(context, Insert);
-}
-
-__code replaceNode_stub(struct Context* context) {
-    goto replaceNode(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node, context->data[Tree]->tree.result);
-}
-
-__code insertNode(struct Context* context, struct Tree* tree, struct Node* node, struct Node* newNode) {
-    node->color = Red;
-    *newNode = *node;
-    
-    tree->current = newNode;
-
-    goto meta(context, InsertCase1);
-}
-
-__code insertNode_stub(struct Context* context) {
-    goto insertNode(context, &context->data[Tree]->tree, &context->data[Node]->node, &context->data[context->dataNum]->node);
-}
-
-__code insertCase1(struct Context* context, struct Tree* tree, struct Node* current) {
-    if (!isEmpty(context->node_stack))
-        goto meta(context, InsertCase2);
-
-    tree->root->color = Black;
-
-    stack_pop(context->code_stack, &context->next);
-    goto meta(context, context->next);
-}
-
-__code insert1_stub(struct Context* context) {
-    goto insertCase1(context, &context->data[Tree]->tree, context->data[Tree]->tree.current);
-}
-
-__code insertCase2(struct Context* context, struct Node* current) {
-    struct Node* parent;
-    stack_pop(context->node_stack, &parent);
-    
-    if (parent->color == Black) {
-        stack_pop(context->code_stack, &context->next);
-        goto meta(context, context->next);
-    }
-    
-    stack_push(context->node_stack, &parent);
-    goto meta(context, InsertCase3);
-}
-
-__code insert2_stub(struct Context* context) {
-    goto insertCase2(context, context->data[Tree]->tree.current);
-}
-
-__code insertCase3(struct Context* context, struct Tree* tree, struct Node* current) {
-    struct Node* parent;
-    struct Node* uncle;
-    struct Node* grandparent;
-
-    stack_pop(context->node_stack, &parent);
-    stack_pop(context->node_stack, &grandparent);
-
-    if (grandparent->left == parent)
-        uncle = grandparent->right;
-    else
-        uncle = grandparent->left;
-
-    if (uncle && (uncle->color == Red)) {
-        parent->color = Black;
-        uncle->color = Black;
-        grandparent->color = Red;
-        tree->current = grandparent;
-        goto meta(context, InsertCase1);
-    }
-
-    stack_push(context->node_stack, &grandparent);
-    stack_push(context->node_stack, &parent);
-
-    goto meta(context, InsertCase4);
-}
-
-__code insert3_stub(struct Context* context) {
-    goto insertCase3(context, &context->data[Tree]->tree, context->data[Tree]->tree.current);
-}
-
-__code insertCase4(struct Context* context, struct Tree* tree, struct Node* current) {
-    struct Node* parent;
-    struct Node* grandparent;
-
-    stack_pop(context->node_stack, &parent);
-    stack_pop(context->node_stack, &grandparent);
-
-    stack_push(context->node_stack, &grandparent);
-
-    tree->current = parent;
-
-    if ((current == parent->right) && (parent == grandparent->left)) {
-        context->next = InsertCase4_1;
-
-        stack_push(context->code_stack, &context->next);
-        goto meta(context, RotateL);
-    } else if ((current == parent->left) && (parent == grandparent->right)) {
-        context->next = InsertCase4_2;
-        
-        stack_push(context->code_stack, &context->next);
-        goto meta(context, RotateR);
-    }
-
-    stack_push(context->node_stack, &parent);
-    tree->current = current;
-    goto meta(context, InsertCase5);
-}
-
-__code insert4_stub(struct Context* context) {
-    goto insertCase4(context, &context->data[Tree]->tree, context->data[Tree]->tree.current);
-}
-
-__code insertCase4_1(struct Context* context, struct Tree* tree) {
-    stack_push(context->node_stack, &tree->current);
-    tree->current = tree->current->left;
-    goto meta(context, InsertCase5);
-}
-
-__code insert4_1_stub(struct Context* context) {
-    goto insertCase4_1(context, &context->data[Tree]->tree);
-}   
-
-__code insertCase4_2(struct Context* context, struct Tree* tree) {
-    stack_push(context->node_stack, &tree->current);
-    tree->current = tree->current->right;
-    goto meta(context, InsertCase5);
-}
-
-__code insert4_2_stub(struct Context* context) {
-    goto insertCase4_2(context, &context->data[Tree]->tree);
-}   
-
-__code insertCase5(struct Context* context, struct Tree* tree, struct Node* current) {
-    struct Node* parent;
-    struct Node* grandparent;
-
-    stack_pop(context->node_stack, &parent);
-    stack_pop(context->node_stack, &grandparent);
-    
-    parent->color = Black;
-    grandparent->color = Red;
-
-    tree->current = grandparent;
-
-    if ((current == parent->left) && (parent == grandparent->left))
-        goto meta(context, RotateR);
-    else
-        goto meta(context, RotateL);
-}
-
-__code insert5_stub(struct Context* context) {
-    goto insertCase5(context, &context->data[Tree]->tree, context->data[Tree]->tree.current);
-}
-
-__code rotateLeft(struct Context* context, struct Node* node, struct Tree* tree) {
-    struct Node* tmp = node->right;
-    struct Node* parent = 0;
-    
-    stack_pop(context->node_stack, &parent);
-
-    if (parent) {
-        if (node == parent->left)
-            parent->left = tmp;
-        else
-            parent->right = tmp;
-    } else {
-        tree->root = tmp;
-    }
-
-    stack_push(context->node_stack, &parent);
-    
-    node->right = tmp->left;
-    tmp->left = node;
-    tree->current = tmp;
-    
-    stack_pop(context->code_stack, &context->next);
-    goto meta(context, context->next);
-}
-    
-__code rotateLeft_stub(struct Context* context) {
-    goto rotateLeft(context, context->data[Tree]->tree.current, &context->data[Tree]->tree);
-}
-
-__code rotateRight(struct Context* context, struct Node* node, struct Tree* tree) {
-    struct Node* tmp = node->left;
-    struct Node* parent = 0;
-    
-    stack_pop(context->node_stack, &parent);
-
-    if (parent) {
-        if (node == parent->left)
-            parent->left = tmp;
-        else
-            parent->right = tmp;
-    } else {
-        tree->root = tmp;
-    }
-
-    stack_push(context->node_stack, &parent);
-    
-    node->left = tmp->right;
-    tmp->right = node;
-    tree->current = tmp;
-    
-    stack_pop(context->code_stack, &context->next);
-    goto meta(context, context->next);
-}
-
-__code rotateRight_stub(struct Context* context) {
-    goto rotateRight(context, context->data[Tree]->tree.current, &context->data[Tree]->tree);
-}
-
-__code stackClear(struct Context* context, stack_ptr node_stack, struct Tree* tree) {
-    if (stack_pop(node_stack, &tree->current) == 0)
-        goto meta(context, StackClear);
-
-    tree->current = 0;
-
-    stack_pop(context->code_stack, &context->next);
-    goto meta(context, context->next);
-}
-
-__code stackClear_stub(struct Context* context) {
-    goto stackClear(context, context->node_stack, &context->data[Tree]->tree);
-}
-    
-
-/* /\* __code get(struct Context* context, struct Tree* tree) { *\/ */
-/* /\*     if (tree->root) { *\/ */
-/* /\*         tree->current = tree->root; *\/ */
-
-/* /\*         goto meta(context, Search); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     stack_pop(context->code_stack, &context->next); *\/ */
-/* /\*     goto meta(context, context->next); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code get_stub(struct Context* context) { *\/ */
-/* /\*     goto get(context, &context->data[Tree]->tree); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code search(struct Context* context, struct Tree* tree, struct Node* node) { *\/ */
-/* /\*     compare(context, tree, tree->current->key, node->key); *\/ */
-    
-/* /\*     if (tree->result == EQ) { *\/ */
-/* /\*         *node = *tree->current; *\/ */
-        
-/* /\*         goto meta(context, context->next); *\/ */
-/* /\*     } else if (tree->result == GT) { *\/ */
-/* /\*         tree->current = tree->current->right; *\/ */
-/* /\*     } else { *\/ */
-/* /\*         tree->current = tree->current->left; *\/ */
-/* /\*     } *\/ */
-        
-/* /\*     if (tree->current) *\/ */
-/* /\*         goto meta(context, Search); *\/ */
-
-/* /\*     stack_pop(context->code_stack, &context->next); *\/ */
-/* /\*     goto meta(context, context->next); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code search_stub(struct Context* context) { *\/ */
-/* /\*     goto search(context, &context->data[Tree]->tree, &context->data[Node]->node); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete(struct Context* context, struct Tree* tree) { *\/ */
-/* /\*     if (tree->root) { *\/ */
-/* /\*         stack_push(context->code_stack, &context->next); *\/ */
-/* /\*         context->next = Delete1; *\/ */
-/* /\*         goto meta(context, Get); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     goto meta(context, context->next); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete_stub(struct Context* context) { *\/ */
-/* /\*     goto delete(context, &context->data[Tree]->tree); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete1(struct Context* context, struct Tree* tree, struct Allocate* allocate) { *\/ */
-/* /\*     allocate->size = sizeof(struct Node); *\/ */
-/* /\*     allocator(context); *\/ */
-    
-/* /\*     struct Node* root = tree->root; *\/ */
-
-/* /\*     tree->root = &context->data[context->dataNum]->node; *\/ */
-/* /\*     tree->current = root; *\/ */
-
-/* /\*     compare(context, tree, tree->current->key, context->data[Node]->node.key); *\/ */
-    
-/* /\*     goto meta(context, Replace_d1); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete1_stub(struct Context* context) { *\/ */
-/* /\*     goto delete1(context, &context->data[Tree]->tree, &context->data[Allocate]->allocate); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete2(struct Context* context, struct Node* current) { *\/ */
-/* /\*     if (current->color == Black) { *\/ */
-/* /\*         struct Node* child = current->right == NULL ? current->left : current->right; *\/ */
-/* /\*         current->color = child == NULL ? Black : child->color; *\/ */
-
-/* /\*         goto meta(context, DeleteCase1); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     goto meta(context, Delete3); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete2_stub(struct Context* context) { *\/ */
-/* /\*     goto delete2(context, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete3(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */
-/* /\*     struct Node* tmp = current->right == NULL ? current->left : current->right; *\/ */
-
-/* /\*     if (current->parent) { *\/ */
-/* /\*         if (current == current->parent->left) *\/ */
-/* /\*             current->parent->left = tmp; *\/ */
-/* /\*         else *\/ */
-/* /\*             current->parent->right = tmp; *\/ */
-/* /\*     } else { *\/ */
-/* /\*         tree->root = tmp; *\/ */
-/* /\*     } *\/ */
-
-/* /\*     if (tmp) *\/ */
-/* /\*         tmp->parent = current->parent; *\/ */
-
-/* /\*     if (current->parent == NULL && tmp) *\/ */
-/* /\*         tmp->color = Black; *\/ */
-
-/* /\*     current == current->parent->left ? (current->parent->left = NULL) : (current->parent->right = NULL); *\/ */
-
-/* /\*     stack_pop(context->code_stack, &context->next); *\/ */
-/* /\*     goto meta(context, context->next); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code delete3_stub(struct Context* context) { *\/ */
-/* /\*     goto delete3(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code replaceNodeForDelete1(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode, int result) { *\/ */
-/* /\*     *newNode = *oldNode; *\/ */
-
-/* /\*     if (result == EQ) *\/ */
-/* /\*         goto meta(context, Replace_d2); *\/ */
-/* /\*     else if (result == GT) *\/ */
-/* /\*         tree->current = newNode->right; *\/ */
-/* /\*     else *\/ */
-/* /\*         tree->current = newNode->left; *\/ */
-
-/* /\*     tree->current->parent = newNode; *\/ */
-    
-/* /\*     if (tree->current->left == NULL && tree->current->right == NULL) *\/ */
-/* /\*         goto meta(context, Delete2); *\/ */
-    
-/* /\*     if (result == GT) *\/ */
-/* /\*         newNode->right = context->heap; *\/ */
-/* /\*     else if (result == LT) *\/ */
-/* /\*         newNode->left = context->heap; *\/ */
-    
-/* /\*     allocator(context); *\/ */
-    
-/* /\*     compare(context, tree, tree->current->key, context->data[Node]->node.key); *\/ */
-    
-/* /\*     goto meta(context, Replace_d1); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code replaceNodeForDelete1_stub(struct Context* context) { *\/ */
-/* /\*     goto replaceNodeForDelete1(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node, context->data[Tree]->tree.result); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code replaceNodeForDelete2(struct Context* context, struct Tree* tree, struct Node* newNode) { *\/ */
-/* /\*     if (tree->current->left && tree->current->right) { *\/ */
-/* /\*         newNode->left->parent = newNode; *\/ */
-/* /\*         tree->current = newNode->left; *\/ */
-/* /\*         newNode->left = context->heap; *\/ */
-/* /\*         tree->deleted = newNode; *\/ */
-
-/* /\*         allocator(context); *\/ */
-/* /\*         tree->current->parent = newNode; *\/ */
-        
-/* /\*         goto meta(context, FindMax1); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     goto meta(context, Delete2); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code replaceNodeForDelete2_stub(struct Context* context) { *\/ */
-/* /\*     goto replaceNodeForDelete2(context, &context->data[Tree]->tree, &context->data[context->dataNum]->node); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code findMax1(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode) { *\/ */
-/* /\*     *newNode = *oldNode; *\/ */
-
-/* /\*     if (newNode->right) *\/ */
-/* /\*         goto meta(context, FindMax2); *\/ */
-    
-/* /\*     tree->deleted->key = newNode->key; *\/ */
-/* /\*     tree->deleted->value = newNode->value; *\/ */
-
-/* /\*     tree->current = newNode; *\/ */
-
-/* /\*     goto meta(context, Delete2); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code findMax1_stub(struct Context* context) { *\/ */
-/* /\*     goto findMax1(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node); *\/ */
-/* /\* } *\/ */
-    
-
-/* /\* __code findMax2(struct Context* context, struct Tree* tree, struct Node* oldNode, struct Node* newNode) { *\/ */
-/* /\*     *newNode = *oldNode; *\/ */
-
-/* /\*     if (newNode->right->right) { *\/ */
-/* /\*         tree->current = newNode->right; *\/ */
-/* /\*         newNode->right = context->heap; *\/ */
-
-/* /\*         allocator(context); *\/ */
-/* /\*         tree->current->parent = newNode; *\/ */
-        
-/* /\*         goto meta(context, FindMax2); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     tree->deleted->key = newNode->right->key; *\/ */
-/* /\*     tree->deleted->value = newNode->right->value; *\/ */
-
-/* /\*     tree->current = newNode; *\/ */
-    
-/* /\*     goto meta(context, Delete2); *\/ */
-/* /\* } *\/ */
-    
-/* /\* __code findMax2_stub(struct Context* context) { *\/ */
-/* /\*     goto findMax2(context, &context->data[Tree]->tree, context->data[Tree]->tree.current, &context->data[context->dataNum]->node); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase1(struct Context* context, struct Node* current) { *\/ */
-/* /\*     if (current->parent) *\/ */
-/* /\*         goto meta(context, DeleteCase2); *\/ */
-
-/* /\*     goto meta(context, Delete3); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase1_stub(struct Context* context) { *\/ */
-/* /\*     goto deleteCase1(context, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase2(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */
-/* /\*     struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */
-    
-/* /\*     if ((sibling == NULL ? Black : sibling->color) == Red) { *\/ */
-/* /\*         current->parent->color = Red; *\/ */
-/* /\*         sibling->color = Black; *\/ */
-
-/* /\*         current == current->parent->left ? (current->parent->left = context->heap) : (current->parent->right = context->heap); *\/ */
-/* /\*         allocator(context); *\/ */
-/* /\*         context->data[context->dataNum]->node = *sibling; *\/ */
-        
-/* /\*         tree->current = current->parent; *\/ */
-        
-/* /\*         context->next = DeleteCase3; *\/ */
-/* /\*         stack_push(context->code_stack, &context->next); *\/ */
-
-/* /\*         if (current == current->parent->left) *\/ */
-/* /\*             goto meta(context, RotateL); *\/ */
-/* /\*         else *\/ */
-/* /\*             goto meta(context, RotateR); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     goto meta(context, DeleteCase3); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase2_stub(struct Context* context) { *\/ */
-/* /\*     goto deleteCase2(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase3(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */
-/* /\*     struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */
-    
-/* /\*     if (current->parent->color == Black && *\/ */
-/* /\*         (sibling == NULL ? Black : sibling->color) == Black && *\/ */
-/* /\*         (sibling->left == NULL ? Black : sibling->left->color) == Black && *\/ */
-/* /\*         (sibling->right == NULL ? Black : sibling->right->color) == Black) { *\/ */
-/* /\*         sibling->color = Red; *\/ */
-
-/* /\*         tree->current = current->parent; *\/ */
-/* /\*         goto meta(context, DeleteCase1); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     goto meta(context, DeleteCase4); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase3_stub(struct Context* context) { *\/ */
-/* /\*     goto deleteCase3(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase4(struct Context* context, struct Node* current) { *\/ */
-/* /\*     struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */
-    
-/* /\*     if (current->parent->color == Red && *\/ */
-/* /\*         (sibling == NULL ? Black : sibling->color) == Black && *\/ */
-/* /\*         (sibling->left == NULL ? Black : sibling->left->color) == Black && *\/ */
-/* /\*         (sibling->right == NULL ? Black : sibling->right->color) == Black) { *\/ */
-/* /\*         sibling->color = Red; *\/ */
-/* /\*         current->parent->color = Black; *\/ */
-
-/* /\*         goto meta(context, Delete3); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     goto meta(context, DeleteCase5); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase4_stub(struct Context* context) { *\/ */
-/* /\*     goto deleteCase4(context, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase5(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */
-/* /\*     struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */
-/* /\*     sibling->parent = current->parent; *\/ */
-    
-/* /\*     if (current == current->parent->left && *\/ */
-/* /\*         (sibling == NULL ? Black : sibling->color) == Black && *\/ */
-/* /\*         (sibling->left == NULL ? Black : sibling->left->color) == Red && *\/ */
-/* /\*         (sibling->right == NULL ? Black : sibling->right->color) == Black) { *\/ */
-/* /\*         sibling->color = Red; *\/ */
-/* /\*         sibling->left->color = Black; *\/ */
-        
-/* /\*         sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap); *\/ */
-/* /\*         allocator(context); *\/ */
-/* /\*         struct Node* tmp = &context->data[context->dataNum]->node; *\/ */
-/* /\*         *tmp = *sibling; *\/ */
-/* /\*         tmp->parent = current; *\/ */
-        
-/* /\*         tmp->left = context->heap; *\/ */
-/* /\*         allocator(context); *\/ */
-/* /\*         context->data[context->dataNum]->node = *sibling->left; *\/ */
-/* /\*         context->data[context->dataNum]->node.parent = tmp; *\/ */
-
-/* /\*         tree->current = tmp; *\/ */
-        
-/* /\*         context->next = DeleteCase6; *\/ */
-/* /\*         stack_push(context->code_stack, &context->next); *\/ */
-
-/* /\*         goto meta(context, RotateR); *\/ */
-/* /\*     } else if (current == current->parent->right && *\/ */
-/* /\*                (sibling == NULL ? Black : sibling->color) == Black && *\/ */
-/* /\*                (sibling->left == NULL ? Black : sibling->left->color) == Black && *\/ */
-/* /\*                (sibling->right == NULL ? Black : sibling->right->color) == Red) { *\/ */
-/* /\*         sibling->color = Red; *\/ */
-/* /\*         sibling->right->color = Black; *\/ */
-
-/* /\*         sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap); *\/ */
-/* /\*         allocator(context); *\/ */
-/* /\*         struct Node* tmp = &context->data[context->dataNum]->node; *\/ */
-/* /\*         *tmp = *sibling; *\/ */
-/* /\*         tmp->parent = current; *\/ */
-
-/* /\*         tmp->right = context->heap; *\/ */
-/* /\*         allocator(context); *\/ */
-/* /\*         context->data[context->dataNum]->node = *sibling->right; *\/ */
-/* /\*         context->data[context->dataNum]->node.parent = tmp; *\/ */
-
-/* /\*         tree->current = tmp; *\/ */
-
-/* /\*         context->next = DeleteCase6; *\/ */
-/* /\*         stack_push(context->code_stack, &context->next); *\/ */
-/* /\*         goto meta(context, RotateL); *\/ */
-/* /\*     } *\/ */
-
-/* /\*     goto meta(context, DeleteCase6); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase5_stub(struct Context* context) { *\/ */
-/* /\*     goto deleteCase5(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase6(struct Context* context, struct Tree* tree, struct Node* current) { *\/ */
-/* /\*     struct Node* sibling = current == current->parent->left ? current->parent->right : current->parent->left; *\/ */
-
-/* /\*     sibling == sibling->parent->left ? (sibling->parent->left = context->heap) : (sibling->parent->right = context->heap); *\/ */
-/* /\*     allocator(context); *\/ */
-/* /\*     struct Node* tmp = &context->data[context->dataNum]->node; *\/ */
-/* /\*     *tmp = *sibling; *\/ */
-/* /\*     tmp->parent = current; *\/ */
-
-/* /\*     tmp->color = current->parent->color; *\/ */
-/* /\*     current->parent->color = Black; *\/ */
-    
-/* /\*     context->next = Delete3; *\/ */
-/* /\*     stack_push(context->code_stack, &context->next); *\/ */
-    
-/* /\*     if (current == current->parent->left) { *\/ */
-/* /\*         tmp->right->color = Black; *\/ */
-/* /\*         tree->current = current->parent; *\/ */
-
-/* /\*         goto meta(context, RotateL); *\/ */
-/* /\*     } else { *\/ */
-/* /\*         tmp->left->color = Black; *\/ */
-/* /\*         tree->current = current->parent; *\/ */
-
-/* /\*         goto meta(context, RotateR); *\/ */
-/* /\*     } *\/ */
-/* /\* } *\/ */
-
-/* /\* __code deleteCase6_stub(struct Context* context) { *\/ */
-/* /\*     goto deleteCase6(context, &context->data[Tree]->tree, context->data[Tree]->tree.current); *\/ */
-/* /\* } *\/ */
--- a/src/llrb/llrbContext.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,115 +0,0 @@
-#include <stdlib.h>
-
-#include "llrbContext.h"
-
-extern __code code1_stub(struct Context*);
-extern __code code2_stub(struct Context*);
-extern __code code3_stub(struct Context*);
-extern __code code4(struct Context*);
-extern __code code5(struct Context*);
-extern __code find(struct Context*);
-extern __code not_find(struct Context*);
-extern __code code6(struct Context*);
-extern __code meta(struct Context*);
-extern __code put_stub(struct Context*);
-extern __code replaceNode_stub(struct Context*);
-extern __code insertNode_stub(struct Context*);
-extern __code rotateLeft_stub(struct Context*);
-extern __code rotateRight_stub(struct Context*);
-extern __code colorFlip_stub(struct Context*);
-extern __code fixUp_stub(struct Context*);
-extern __code changeReference_stub(struct Context*);
-extern __code insert1_stub(struct Context*);
-extern __code insert2_stub(struct Context*);
-extern __code insert3_stub(struct Context*);
-extern __code insert4_stub(struct Context*);
-extern __code insert4_1_stub(struct Context*);
-extern __code insert4_2_stub(struct Context*);
-extern __code insert5_stub(struct Context*);
-extern __code stackClear_stub(struct Context*);
-extern __code get_stub(struct Context*);
-extern __code search_stub(struct Context*);
-extern __code delete_stub(struct Context*);
-extern __code delete1_stub(struct Context*);
-extern __code delete2_stub(struct Context*);
-extern __code delete3_stub(struct Context*);
-extern __code replaceNodeForDelete1_stub(struct Context*);
-extern __code replaceNodeForDelete2_stub(struct Context*);
-extern __code findMax1_stub(struct Context*);
-extern __code findMax2_stub(struct Context*);
-extern __code deleteCase1_stub(struct Context*);
-extern __code deleteCase2_stub(struct Context*);
-extern __code deleteCase3_stub(struct Context*);
-extern __code deleteCase4_stub(struct Context*);
-extern __code deleteCase5_stub(struct Context*);
-extern __code deleteCase6_stub(struct Context*);
-extern __code exit_code(struct Context*);
-
-__code initLLRBContext(struct Context* context, int num) {
-    context->heapLimit = sizeof(union Data)*ALLOCATE_SIZE;
-    context->code = malloc(sizeof(__code*)*ALLOCATE_SIZE);
-    context->data = malloc(sizeof(union Data*)*ALLOCATE_SIZE);
-    context->heapStart = malloc(context->heapLimit);
-
-    context->codeNum = Exit;
-
-    context->code[Code1]      = code1_stub;
-    context->code[Code2]      = code2_stub;
-    context->code[Code3]      = code3_stub;
-    context->code[Code4]      = code4;
-    context->code[Code5]      = code5;
-    context->code[Find]       = find;
-    context->code[Not_find]   = not_find;
-    context->code[Code6]      = code6;
-    context->code[Put]        = put_stub;
-    context->code[Replace]    = replaceNode_stub;
-    context->code[Insert]     = insertNode_stub;
-    context->code[RotateL]    = rotateLeft_stub;
-    context->code[RotateR]    = rotateRight_stub;
-    context->code[InsertCase1]   = insert1_stub;
-    context->code[InsertCase2]   = insert2_stub;
-    context->code[InsertCase3]   = insert3_stub;
-    context->code[InsertCase4]   = insert4_stub;
-    context->code[InsertCase4_1] = insert4_1_stub;
-    context->code[InsertCase4_2] = insert4_2_stub;
-    context->code[InsertCase5]   = insert5_stub;
-    context->code[StackClear]    = stackClear_stub;
-    /* context->code[Get]        = get_stub; */
-    /* context->code[Search]        = search_stub; */
-    /* context->code[Delete]        = delete_stub; */
-    /* context->code[Delete1]       = delete1_stub; */
-    /* context->code[Delete2]       = delete2_stub; */
-    /* context->code[Delete3]       = delete3_stub; */
-    /* context->code[Replace_d1] = replaceNodeForDelete1_stub; */
-    /* context->code[Replace_d2] = replaceNodeForDelete2_stub; */
-    /* context->code[FindMax1]    = findMax1_stub; */
-    /* context->code[FindMax2]    = findMax2_stub; */
-    /* context->code[DeleteCase1]   = deleteCase1_stub; */
-    /* context->code[DeleteCase2]   = deleteCase2_stub; */
-    /* context->code[DeleteCase3]   = deleteCase3_stub; */
-    /* context->code[DeleteCase4]   = deleteCase4_stub; */
-    /* context->code[DeleteCase5]   = deleteCase5_stub; */
-    /* context->code[DeleteCase6]   = deleteCase6_stub; */
-    context->code[Exit]       = exit_code;
-    
-    context->heap = context->heapStart;
-
-    context->data[Allocate] = context->heap;
-    context->heap += sizeof(struct Allocate);
-
-    context->data[Tree] = context->heap;
-    context->heap += sizeof(struct Tree);
-
-    context->data[Node] = context->heap;
-    context->heap += sizeof(struct Node);
-
-    context->dataNum = Node;
-    
-    struct Tree* tree = &context->data[Tree]->tree;
-    tree->root = 0;
-    tree->current = 0;
-    tree->deleted = 0;
-    
-    context->node_stack = stack_init(sizeof(struct Node*), 100);
-    context->code_stack = stack_init(sizeof(enum Code), 100);
-}
--- a/src/llrb/main.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,136 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <sys/time.h>
-
-#include "llrbContext.h"
-#include "origin_cs.h"
-
-static double st_time;
-static double ed_time;
-static clock_t c1,c2;
-
-int num;
-
-extern __code initLLRBContext(struct Context* context, int);
-extern void allocator(struct Context* context);
-
-static double getTime() {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    return tv.tv_sec + (double)tv.tv_usec*1e-6;
-}
-
-void print_tree(struct Node* node, int n) {
-    if (node != 0) {
-        print_tree(node->left, n+1);
-        for (int i=0;i<n;i++)
-            printf("  ");
-        printf("key=%d value=%d color=%s\t%p\n", node->key, node->value,/* n, */node->color==0? "R":"B", node);
-        print_tree(node->right, n+1);
-    }
-}
-
-/* 
-__code code1(Allocate allocate) {
-    allocate.size = sizeof(long);
-    allocate.next = Code2;
-    goto Allocate(allocate);
-}
-*/
-
-__code code1(struct Context* context, struct Allocate *allocate) {
-    allocate->size = sizeof(struct Count);
-    allocator(context);
-    goto meta(context, Code2);
-}
-
-__code code1_stub(struct Context* context) {
-    goto code1(context, &context->data[Allocate]->allocate);
-}
-
-/*
-__code code2(Allocate allocate, Count count) {
-    count.count = 0;
-    goto code3(count);
-}
-*/
-
-__code code2(struct Context* context, struct Count* count) {
-    count->i = num;
-    goto meta(context, Code3);
-}
-
-__code code2_stub(struct Context* context) {
-    goto code2(context, &context->data[context->dataNum]->count);
-}
-
-__code code3(struct Context* context, struct Node* node, struct Count* count) {
-    if (count->i == 0) {
-        goto meta(context, Code4);
-    }
-
-    print_tree(context->data[Tree]->tree.root, 0);
-    puts("");
-    context->next = Code3;
-    node->key = rand()%100+1;
-    node->value = count->i;
-    
-    count->i--;
-    goto meta(context, Put);
-}
-
-__code code3_stub(struct Context* context) {
-    goto code3(context, &context->data[Node]->node, &context->data[3]->count);
-}
-
-__code code4(struct Context* context) {
-    puts("---before---");
-    print_tree(context->data[Tree]->tree.root, 0);
-    
-    struct Node* node = &context->data[Node]->node;
-    node->key = 4;
-    
-    context->next = Code5;
-
-    goto meta(context, Exit);
-}
-
-__code code5(struct Context* context) {
-    puts("---after---");
-    print_tree(context->data[Tree]->tree.root, 0);
-    puts("--Number of Data--");
-    printf("%d\n", context->dataNum);
-
-    goto meta(context, Exit);
-}
-
-__code find(struct Context* context) {
-    context->data[Node]->node.key = 2;
-    context->next = Not_find;
-
-    goto meta(context, Get);
-}
-
-__code not_find(struct Context* context) {
-    context->data[Node]->node.key = 10;
-    context->next = Code6;
-
-    printf("%p\n", context->data[Tree]->tree.current);
-    context->data[Tree]->tree.current = 0;
-    goto meta(context, Get);
-}
-
-__code code6(struct Context* context) {
-    printf("%p\n", context->data[Tree]->tree.current);
-
-    stack_free(context->node_stack);
-
-    goto meta(context, Exit);
-}    
-
-int main(int argc, char** argv) {
-    num = (int)atoi(argv[1]);
-    struct Context* context = (struct Context*)malloc(sizeof(struct Context));
-    initLLRBContext(context, num);
-    goto start_code(context, Code1);
-}
--- a/src/llrb/origin_cs.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,17 +0,0 @@
-#include <stdlib.h>
-#include "llrbContext.h"
-
-__code meta(struct Context* context, enum Code next) {
-    goto (context->code[next])(context);
-}
-
-__code start_code(struct Context* context, enum Code next) {
-    goto meta(context, next);
-}
-
-__code exit_code(struct Context* context) {
-    free(context->code);
-    free(context->data);
-    free(context->heapStart);
-    goto exit(0);
-}
--- a/src/llrb/stack.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,68 +0,0 @@
-#include <string.h>
-#include "stack.h"
-
-stack_ptr stack_init(size_t size, int max) {
-    stack_ptr stack_ptr;
-    
-    if ((stack_ptr = calloc(1, sizeof(stack))) == NULL)
-        return NULL;
-    
-    if ((stack_ptr->data = calloc(max, size)) == NULL) {
-        free(stack_ptr);
-        return NULL;
-    }
-
-    stack_ptr->size = size;
-    stack_ptr->max = max;
-    stack_ptr->num = 0;
-
-    return stack_ptr;
-}
-
-stack_ptr stack_realloc(stack_ptr stack_ptr, int max) {
-    if (stack_ptr == NULL)
-        return NULL;
-
-    if ((stack_ptr->data = realloc(stack_ptr->data, stack_ptr->size*max)) == NULL)
-        return NULL;
-
-    stack_ptr->max = max;
-
-    return stack_ptr;
-}
-
-void stack_free(stack_ptr stack_ptr) {
-    if (stack_ptr != NULL && stack_ptr->data != NULL) {
-        free(stack_ptr->data);
-        free(stack_ptr);
-    }
-}
-    
-int stack_push(stack_ptr stack_ptr, void* data) {
-    if (stack_ptr->max <= stack_ptr->num)
-        return -1;
-
-    memcpy((char*)stack_ptr->data+stack_ptr->num*stack_ptr->size,  data, stack_ptr->size);
-    stack_ptr->num++;
-
-    return 0;
-}
-
-int stack_pop(stack_ptr stack_ptr, void* data) {
-    if (stack_ptr->num == 0)
-        return -1;
-
-    stack_ptr->num--;
-
-    memcpy(data, (char*)stack_ptr->data+stack_ptr->num*stack_ptr->size, stack_ptr->size);
-    
-    return 0;
-}
-
-int isMax(const stack_ptr stack_ptr) {
-    return stack_ptr->max<=stack_ptr->num;
-}
-
-int isEmpty(const stack_ptr stack_ptr) {
-    return stack_ptr->num<=0;
-}
--- a/src/test/CMakeLists.txt	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,45 +0,0 @@
-cmake_minimum_required(VERSION 2.8)
-
-add_definitions("-Wall -g -O0")
-
-set(NVCCFLAG "-std=c++11" "-g" "-O0" )
-
-include_directories("/usr/local/cuda/include")
-
-set(CMAKE_C_COMPILER $ENV{CBC_COMPILER})
-
-set(CUDA_LINK_FLAGS "-framework CUDA -lc++ -Wl,-search_paths_first -Wl,-headerpad_max_install_names /Developer/NVIDIA/CUDA-8.0/lib/libcudart_static.a -Wl,-rpath,/usr/local/cuda/lib") 
-# for linux use -lcuda
-
-SET( CMAKE_EXE_LINKER_FLAGS  "${CMAKE_EXE_LINKER_FLAGS} ${CUDA_LINK_FLAGS}" )
-
-cmake_minimum_required(VERSION 2.8)
-find_package(CUDA REQUIRED)
-
-add_custom_command(OUTPUT main.o
-   DEPENDS main.cu                          
-   COMMAND nvcc ${NVCCFLAG} -c  main.cu 
-)
-
-add_executable(cudaExmple main.o test.c)
-
-add_custom_command(OUTPUT multiply.ptx
-   DEPENDS multiply.cu                          
-   COMMAND nvcc ${NVCCFLAG}  -c  multiply.cu  -ptx
-)
-
-add_executable(twiceExample twice.cc multiply.ptx )
-
-add_custom_command(OUTPUT vectorAdd_kernel.ptx
-   DEPENDS vectorAdd_kernel.cu                          
-   COMMAND nvcc ${NVCCFLAG}  -c  vectorAdd_kernel.cu  -ptx
-)
-
-add_executable(vectorExample vectorAddDrv.cc vectorAdd_kernel.ptx)
-
-# to compile these, comment out CMAKE_C_COMPILER
-# cuda_add_executable(Cudasample_gpu Cudasample_gpu.cu)
-# cuda_add_executable(Cudasample_cpu Cudasample_cpu.cu)
-
-# target_link_libraries(twiceExample ${CUDA_LIBRARIES} ${MPI_LIBRARIES} ${OPENGL_LIBRARIES})
-
--- a/src/test/Cudasample_cpu.cu	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-#include <stdio.h>
-
-int main(void)
-{       
-        int b;
-      
-        for (b = 99; b >= 0; b--) {
-                switch (b) {
-                case 0:
-                        printf("No more bottles of beer on the wall, no more bottles of beer.\n");
-                        printf("Go to the store and buy some more, 99 bottles of beer on the wall.\n");
-                        break;
-                case 1:
-                        printf("1 bottle of beer on the wall, 1 bottle of beer.\n");
-                        printf("Take one down and pass it around, no more bottles of beer on the wall\n");
-                        break;
-                default:
-                        printf("%d bottles of beer on the wall, %d bottles of beer.\n", b, b);
-                        printf("Take one down and pass it around, %d %s of beer on the wall.\n"
-                                ,b - 1
-                                ,((b - 1) > 1)? "bottles" : "bottle");
-                        break;
-                }
-        }               
-        return 0;
-}      
\ No newline at end of file
--- a/src/test/Cudasample_gpu.cu	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,72 +0,0 @@
-#include <stdio.h>
-
-#define SIZE_TEXT (sizeof(text)-1)
-#define SIZE_END (sizeof(end)-1)
-
-__device__ char text[] =
-"__ bottles of beer on the wall, __ bottles of beer!\n"
-"Take one down, and pass it around, ## bottles of beer on the wall!\n\n";
-       
-__device__ char end[] =
-"01 bottle of beer on the wall, 01 bottle of beer.\n"
-"Take one down and pass it around, no more bottles of beer on the wall.\n"
-"\n"
-"No more bottles of beer on the wall, no more bottles of beer.\n"
-"Go to the store and buy some more, 99 bottles of beer on the wall.";
-
-
-__global__
-void bottle99(char *addr){
-    int x = threadIdx.x;
-       addr += x * SIZE_TEXT;
-    int bottle = 99 - x;
-       if (bottle == 1) {
-        for (int i=0; i<SIZE_END; i++) {
-               addr[i] = end[i];
-           }
-         addr[SIZE_END] = '\0';
-           } else {
-               char c1 = (bottle/10) + '0';
-               char c2 = (bottle%10) + '0';
-
-               char d1 = ((bottle-1)/10) + '0';
-               char d2 = ((bottle-1)%10) + '0';
-      
-           for (int i=0; i<SIZE_TEXT; i++) {
-               int c = text[i];
-               if (c == '_') {
-                  addr[i] = c1;
-                  addr[i+1] = c2;
-                  i++;
-               } else if (c == '#') {
-
-            addr[i] = d1;
-               addr[i+1] = d2;
-                    i++;
-               } else {
-
-                 addr[i] = text[i];
-            }
-        }
-    }
-}
-      
-int main()
-{
-    char *buffer;
-    char *d_buffer;
-      
-    int size = SIZE_TEXT * 98 + SIZE_END + 1;
-
-    buffer = new char[size];
-    cudaMalloc((void**)&d_buffer, size);
-
-    bottle99<<<1, 99>>>(d_buffer);
-
-    cudaMemcpy(buffer, d_buffer, size, cudaMemcpyDeviceToHost);
-    cudaFree(d_buffer);
-
-    puts(buffer);
-    free(buffer);               
-       }
-  
\ No newline at end of file
--- a/src/test/OpenCL_gpu.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,91 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
- 
-#ifdef __APPLE__
-#include <OpenCL/opencl.h>
-#else
-#include <CL/cl.h>
-#endif
- 
-#define MEM_SIZE (128)
-#define MAX_SOURCE_SIZE (0x100000)
- 
-int main()
-{
-    cl_device_id device_id = NULL;
-    cl_context context = NULL;
-    cl_command_queue command_queue = NULL;
-    cl_mem memobj = NULL;
-    cl_program program = NULL;
-    cl_kernel kernel = NULL;
-    cl_platform_id platform_id = NULL;
-    cl_uint ret_num_devices;
-    cl_uint ret_num_platforms;
-    cl_int ret;
- 
-    char string[MEM_SIZE];
- 
-    FILE *fp;
-    char fileName[] = "./hello.cl";
-    char *source_str;
-    size_t source_size;
- 
-/* Load the source code containing the kernel*/
-    fp = fopen(fileName, "r");
-    if (!fp) {
-        fprintf(stderr, "Failed to load kernel.\n");
-        exit(1);
-    }
-    source_str = (char*)malloc(MAX_SOURCE_SIZE);
-    source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);
-    fclose(fp);
- 
-/* Get Platform and Device Info */
-    ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);
-    ret = clGetDeviceIDs(platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);
- 
-/* Create OpenCL context */
-    context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);
- 
-/* Create Command Queue */
-    command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
- 
-/* Create Memory Buffer */
-    memobj = clCreateBuffer(context, CL_MEM_READ_WRITE,MEM_SIZE * sizeof(char), NULL, &ret);
- 
-/* Create Kernel Program from the source */
-    program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
-                                        (const size_t *)&source_size, &ret);
- 
-/* Build Kernel Program */
-    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
- 
-/* Create OpenCL Kernel */
-    kernel = clCreateKernel(program, "hello", &ret);
- 
-/* Set OpenCL Kernel Parameters */
-    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&memobj);
- 
-/* Execute OpenCL Kernel */
-    ret = clEnqueueTask(command_queue, kernel, 0, NULL,NULL);
- 
-/* Copy results from the memory buffer */
-    ret = clEnqueueReadBuffer(command_queue, memobj, CL_TRUE, 0,
-                              MEM_SIZE * sizeof(char),string, 0, NULL, NULL);
- 
-/* Display Result */
-    puts(string);
- 
-/* Finalization */
-    ret = clFlush(command_queue);
-    ret = clFinish(command_queue);
-    ret = clReleaseKernel(kernel);
-    ret = clReleaseProgram(program);
-    ret = clReleaseMemObject(memobj);
-    ret = clReleaseCommandQueue(command_queue);
-    ret = clReleaseContext(context);
- 
-    free(source_str);
- 
-    return 0;
-}
--- a/src/test/helper_cuda.h	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,1283 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-////////////////////////////////////////////////////////////////////////////////
-// These are CUDA Helper functions for initialization and error checking
-
-#ifndef HELPER_CUDA_H
-#define HELPER_CUDA_H
-
-#pragma once
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include "helper_string.h"
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// Note, it is required that your SDK sample to include the proper header files, please
-// refer the CUDA examples for examples of the needed CUDA headers, which may change depending
-// on which CUDA functions are used.
-
-// CUDA Runtime error messages
-#ifdef __DRIVER_TYPES_H__
-static const char *_cudaGetErrorEnum(cudaError_t error)
-{
-    switch (error)
-    {
-        case cudaSuccess:
-            return "cudaSuccess";
-
-        case cudaErrorMissingConfiguration:
-            return "cudaErrorMissingConfiguration";
-
-        case cudaErrorMemoryAllocation:
-            return "cudaErrorMemoryAllocation";
-
-        case cudaErrorInitializationError:
-            return "cudaErrorInitializationError";
-
-        case cudaErrorLaunchFailure:
-            return "cudaErrorLaunchFailure";
-
-        case cudaErrorPriorLaunchFailure:
-            return "cudaErrorPriorLaunchFailure";
-
-        case cudaErrorLaunchTimeout:
-            return "cudaErrorLaunchTimeout";
-
-        case cudaErrorLaunchOutOfResources:
-            return "cudaErrorLaunchOutOfResources";
-
-        case cudaErrorInvalidDeviceFunction:
-            return "cudaErrorInvalidDeviceFunction";
-
-        case cudaErrorInvalidConfiguration:
-            return "cudaErrorInvalidConfiguration";
-
-        case cudaErrorInvalidDevice:
-            return "cudaErrorInvalidDevice";
-
-        case cudaErrorInvalidValue:
-            return "cudaErrorInvalidValue";
-
-        case cudaErrorInvalidPitchValue:
-            return "cudaErrorInvalidPitchValue";
-
-        case cudaErrorInvalidSymbol:
-            return "cudaErrorInvalidSymbol";
-
-        case cudaErrorMapBufferObjectFailed:
-            return "cudaErrorMapBufferObjectFailed";
-
-        case cudaErrorUnmapBufferObjectFailed:
-            return "cudaErrorUnmapBufferObjectFailed";
-
-        case cudaErrorInvalidHostPointer:
-            return "cudaErrorInvalidHostPointer";
-
-        case cudaErrorInvalidDevicePointer:
-            return "cudaErrorInvalidDevicePointer";
-
-        case cudaErrorInvalidTexture:
-            return "cudaErrorInvalidTexture";
-
-        case cudaErrorInvalidTextureBinding:
-            return "cudaErrorInvalidTextureBinding";
-
-        case cudaErrorInvalidChannelDescriptor:
-            return "cudaErrorInvalidChannelDescriptor";
-
-        case cudaErrorInvalidMemcpyDirection:
-            return "cudaErrorInvalidMemcpyDirection";
-
-        case cudaErrorAddressOfConstant:
-            return "cudaErrorAddressOfConstant";
-
-        case cudaErrorTextureFetchFailed:
-            return "cudaErrorTextureFetchFailed";
-
-        case cudaErrorTextureNotBound:
-            return "cudaErrorTextureNotBound";
-
-        case cudaErrorSynchronizationError:
-            return "cudaErrorSynchronizationError";
-
-        case cudaErrorInvalidFilterSetting:
-            return "cudaErrorInvalidFilterSetting";
-
-        case cudaErrorInvalidNormSetting:
-            return "cudaErrorInvalidNormSetting";
-
-        case cudaErrorMixedDeviceExecution:
-            return "cudaErrorMixedDeviceExecution";
-
-        case cudaErrorCudartUnloading:
-            return "cudaErrorCudartUnloading";
-
-        case cudaErrorUnknown:
-            return "cudaErrorUnknown";
-
-        case cudaErrorNotYetImplemented:
-            return "cudaErrorNotYetImplemented";
-
-        case cudaErrorMemoryValueTooLarge:
-            return "cudaErrorMemoryValueTooLarge";
-
-        case cudaErrorInvalidResourceHandle:
-            return "cudaErrorInvalidResourceHandle";
-
-        case cudaErrorNotReady:
-            return "cudaErrorNotReady";
-
-        case cudaErrorInsufficientDriver:
-            return "cudaErrorInsufficientDriver";
-
-        case cudaErrorSetOnActiveProcess:
-            return "cudaErrorSetOnActiveProcess";
-
-        case cudaErrorInvalidSurface:
-            return "cudaErrorInvalidSurface";
-
-        case cudaErrorNoDevice:
-            return "cudaErrorNoDevice";
-
-        case cudaErrorECCUncorrectable:
-            return "cudaErrorECCUncorrectable";
-
-        case cudaErrorSharedObjectSymbolNotFound:
-            return "cudaErrorSharedObjectSymbolNotFound";
-
-        case cudaErrorSharedObjectInitFailed:
-            return "cudaErrorSharedObjectInitFailed";
-
-        case cudaErrorUnsupportedLimit:
-            return "cudaErrorUnsupportedLimit";
-
-        case cudaErrorDuplicateVariableName:
-            return "cudaErrorDuplicateVariableName";
-
-        case cudaErrorDuplicateTextureName:
-            return "cudaErrorDuplicateTextureName";
-
-        case cudaErrorDuplicateSurfaceName:
-            return "cudaErrorDuplicateSurfaceName";
-
-        case cudaErrorDevicesUnavailable:
-            return "cudaErrorDevicesUnavailable";
-
-        case cudaErrorInvalidKernelImage:
-            return "cudaErrorInvalidKernelImage";
-
-        case cudaErrorNoKernelImageForDevice:
-            return "cudaErrorNoKernelImageForDevice";
-
-        case cudaErrorIncompatibleDriverContext:
-            return "cudaErrorIncompatibleDriverContext";
-
-        case cudaErrorPeerAccessAlreadyEnabled:
-            return "cudaErrorPeerAccessAlreadyEnabled";
-
-        case cudaErrorPeerAccessNotEnabled:
-            return "cudaErrorPeerAccessNotEnabled";
-
-        case cudaErrorDeviceAlreadyInUse:
-            return "cudaErrorDeviceAlreadyInUse";
-
-        case cudaErrorProfilerDisabled:
-            return "cudaErrorProfilerDisabled";
-
-        case cudaErrorProfilerNotInitialized:
-            return "cudaErrorProfilerNotInitialized";
-
-        case cudaErrorProfilerAlreadyStarted:
-            return "cudaErrorProfilerAlreadyStarted";
-
-        case cudaErrorProfilerAlreadyStopped:
-            return "cudaErrorProfilerAlreadyStopped";
-
-        /* Since CUDA 4.0*/
-        case cudaErrorAssert:
-            return "cudaErrorAssert";
-
-        case cudaErrorTooManyPeers:
-            return "cudaErrorTooManyPeers";
-
-        case cudaErrorHostMemoryAlreadyRegistered:
-            return "cudaErrorHostMemoryAlreadyRegistered";
-
-        case cudaErrorHostMemoryNotRegistered:
-            return "cudaErrorHostMemoryNotRegistered";
-
-        /* Since CUDA 5.0 */
-        case cudaErrorOperatingSystem:
-            return "cudaErrorOperatingSystem";
-
-        case cudaErrorPeerAccessUnsupported:
-            return "cudaErrorPeerAccessUnsupported";
-
-        case cudaErrorLaunchMaxDepthExceeded:
-            return "cudaErrorLaunchMaxDepthExceeded";
-
-        case cudaErrorLaunchFileScopedTex:
-            return "cudaErrorLaunchFileScopedTex";
-
-        case cudaErrorLaunchFileScopedSurf:
-            return "cudaErrorLaunchFileScopedSurf";
-
-        case cudaErrorSyncDepthExceeded:
-            return "cudaErrorSyncDepthExceeded";
-
-        case cudaErrorLaunchPendingCountExceeded:
-            return "cudaErrorLaunchPendingCountExceeded";
-
-        case cudaErrorNotPermitted:
-            return "cudaErrorNotPermitted";
-
-        case cudaErrorNotSupported:
-            return "cudaErrorNotSupported";
-
-        /* Since CUDA 6.0 */
-        case cudaErrorHardwareStackError:
-            return "cudaErrorHardwareStackError";
-
-        case cudaErrorIllegalInstruction:
-            return "cudaErrorIllegalInstruction";
-
-        case cudaErrorMisalignedAddress:
-            return "cudaErrorMisalignedAddress";
-
-        case cudaErrorInvalidAddressSpace:
-            return "cudaErrorInvalidAddressSpace";
-
-        case cudaErrorInvalidPc:
-            return "cudaErrorInvalidPc";
-
-        case cudaErrorIllegalAddress:
-            return "cudaErrorIllegalAddress";
-
-        /* Since CUDA 6.5*/
-        case cudaErrorInvalidPtx:
-            return "cudaErrorInvalidPtx";
-
-        case cudaErrorInvalidGraphicsContext:
-            return "cudaErrorInvalidGraphicsContext";
-
-        case cudaErrorStartupFailure:
-            return "cudaErrorStartupFailure";
-
-        case cudaErrorApiFailureBase:
-            return "cudaErrorApiFailureBase";
-
-        /* Since CUDA 8.0*/        
-        case cudaErrorNvlinkUncorrectable :   
-            return "cudaErrorNvlinkUncorrectable";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef __cuda_cuda_h__
-// CUDA Driver API errors
-const char *_cudaGetErrorEnum(CUresult error)
-{
-    switch (error)
-    {
-        case CUDA_SUCCESS:
-            return "CUDA_SUCCESS";
-
-        case CUDA_ERROR_INVALID_VALUE:
-            return "CUDA_ERROR_INVALID_VALUE";
-
-        case CUDA_ERROR_OUT_OF_MEMORY:
-            return "CUDA_ERROR_OUT_OF_MEMORY";
-
-        case CUDA_ERROR_NOT_INITIALIZED:
-            return "CUDA_ERROR_NOT_INITIALIZED";
-
-        case CUDA_ERROR_DEINITIALIZED:
-            return "CUDA_ERROR_DEINITIALIZED";
-
-        case CUDA_ERROR_PROFILER_DISABLED:
-            return "CUDA_ERROR_PROFILER_DISABLED";
-
-        case CUDA_ERROR_PROFILER_NOT_INITIALIZED:
-            return "CUDA_ERROR_PROFILER_NOT_INITIALIZED";
-
-        case CUDA_ERROR_PROFILER_ALREADY_STARTED:
-            return "CUDA_ERROR_PROFILER_ALREADY_STARTED";
-
-        case CUDA_ERROR_PROFILER_ALREADY_STOPPED:
-            return "CUDA_ERROR_PROFILER_ALREADY_STOPPED";
-
-        case CUDA_ERROR_NO_DEVICE:
-            return "CUDA_ERROR_NO_DEVICE";
-
-        case CUDA_ERROR_INVALID_DEVICE:
-            return "CUDA_ERROR_INVALID_DEVICE";
-
-        case CUDA_ERROR_INVALID_IMAGE:
-            return "CUDA_ERROR_INVALID_IMAGE";
-
-        case CUDA_ERROR_INVALID_CONTEXT:
-            return "CUDA_ERROR_INVALID_CONTEXT";
-
-        case CUDA_ERROR_CONTEXT_ALREADY_CURRENT:
-            return "CUDA_ERROR_CONTEXT_ALREADY_CURRENT";
-
-        case CUDA_ERROR_MAP_FAILED:
-            return "CUDA_ERROR_MAP_FAILED";
-
-        case CUDA_ERROR_UNMAP_FAILED:
-            return "CUDA_ERROR_UNMAP_FAILED";
-
-        case CUDA_ERROR_ARRAY_IS_MAPPED:
-            return "CUDA_ERROR_ARRAY_IS_MAPPED";
-
-        case CUDA_ERROR_ALREADY_MAPPED:
-            return "CUDA_ERROR_ALREADY_MAPPED";
-
-        case CUDA_ERROR_NO_BINARY_FOR_GPU:
-            return "CUDA_ERROR_NO_BINARY_FOR_GPU";
-
-        case CUDA_ERROR_ALREADY_ACQUIRED:
-            return "CUDA_ERROR_ALREADY_ACQUIRED";
-
-        case CUDA_ERROR_NOT_MAPPED:
-            return "CUDA_ERROR_NOT_MAPPED";
-
-        case CUDA_ERROR_NOT_MAPPED_AS_ARRAY:
-            return "CUDA_ERROR_NOT_MAPPED_AS_ARRAY";
-
-        case CUDA_ERROR_NOT_MAPPED_AS_POINTER:
-            return "CUDA_ERROR_NOT_MAPPED_AS_POINTER";
-
-        case CUDA_ERROR_ECC_UNCORRECTABLE:
-            return "CUDA_ERROR_ECC_UNCORRECTABLE";
-
-        case CUDA_ERROR_UNSUPPORTED_LIMIT:
-            return "CUDA_ERROR_UNSUPPORTED_LIMIT";
-
-        case CUDA_ERROR_CONTEXT_ALREADY_IN_USE:
-            return "CUDA_ERROR_CONTEXT_ALREADY_IN_USE";
-
-        case CUDA_ERROR_PEER_ACCESS_UNSUPPORTED:
-            return "CUDA_ERROR_PEER_ACCESS_UNSUPPORTED";
-
-        case CUDA_ERROR_INVALID_PTX:
-            return "CUDA_ERROR_INVALID_PTX";
-
-        case CUDA_ERROR_INVALID_GRAPHICS_CONTEXT:
-            return "CUDA_ERROR_INVALID_GRAPHICS_CONTEXT";
-
-        case CUDA_ERROR_NVLINK_UNCORRECTABLE:
-            return "CUDA_ERROR_NVLINK_UNCORRECTABLE";
-
-        case CUDA_ERROR_INVALID_SOURCE:
-            return "CUDA_ERROR_INVALID_SOURCE";
-
-        case CUDA_ERROR_FILE_NOT_FOUND:
-            return "CUDA_ERROR_FILE_NOT_FOUND";
-
-        case CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND:
-            return "CUDA_ERROR_SHARED_OBJECT_SYMBOL_NOT_FOUND";
-
-        case CUDA_ERROR_SHARED_OBJECT_INIT_FAILED:
-            return "CUDA_ERROR_SHARED_OBJECT_INIT_FAILED";
-
-        case CUDA_ERROR_OPERATING_SYSTEM:
-            return "CUDA_ERROR_OPERATING_SYSTEM";
-
-        case CUDA_ERROR_INVALID_HANDLE:
-            return "CUDA_ERROR_INVALID_HANDLE";
-
-        case CUDA_ERROR_NOT_FOUND:
-            return "CUDA_ERROR_NOT_FOUND";
-
-        case CUDA_ERROR_NOT_READY:
-            return "CUDA_ERROR_NOT_READY";
-
-        case CUDA_ERROR_ILLEGAL_ADDRESS:
-            return "CUDA_ERROR_ILLEGAL_ADDRESS";
-
-        case CUDA_ERROR_LAUNCH_FAILED:
-            return "CUDA_ERROR_LAUNCH_FAILED";
-
-        case CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES:
-            return "CUDA_ERROR_LAUNCH_OUT_OF_RESOURCES";
-
-        case CUDA_ERROR_LAUNCH_TIMEOUT:
-            return "CUDA_ERROR_LAUNCH_TIMEOUT";
-
-        case CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING:
-            return "CUDA_ERROR_LAUNCH_INCOMPATIBLE_TEXTURING";
-
-        case CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED:
-            return "CUDA_ERROR_PEER_ACCESS_ALREADY_ENABLED";
-
-        case CUDA_ERROR_PEER_ACCESS_NOT_ENABLED:
-            return "CUDA_ERROR_PEER_ACCESS_NOT_ENABLED";
-
-        case CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE:
-            return "CUDA_ERROR_PRIMARY_CONTEXT_ACTIVE";
-
-        case CUDA_ERROR_CONTEXT_IS_DESTROYED:
-            return "CUDA_ERROR_CONTEXT_IS_DESTROYED";
-
-        case CUDA_ERROR_ASSERT:
-            return "CUDA_ERROR_ASSERT";
-
-        case CUDA_ERROR_TOO_MANY_PEERS:
-            return "CUDA_ERROR_TOO_MANY_PEERS";
-
-        case CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED:
-            return "CUDA_ERROR_HOST_MEMORY_ALREADY_REGISTERED";
-
-        case CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED:
-            return "CUDA_ERROR_HOST_MEMORY_NOT_REGISTERED";
-
-        case CUDA_ERROR_HARDWARE_STACK_ERROR:
-            return "CUDA_ERROR_HARDWARE_STACK_ERROR";
-
-        case CUDA_ERROR_ILLEGAL_INSTRUCTION:
-            return "CUDA_ERROR_ILLEGAL_INSTRUCTION";
-
-        case CUDA_ERROR_MISALIGNED_ADDRESS:
-            return "CUDA_ERROR_MISALIGNED_ADDRESS";
-
-        case CUDA_ERROR_INVALID_ADDRESS_SPACE:
-            return "CUDA_ERROR_INVALID_ADDRESS_SPACE";
-
-        case CUDA_ERROR_INVALID_PC:
-            return "CUDA_ERROR_INVALID_PC";
-
-        case CUDA_ERROR_NOT_PERMITTED:
-            return "CUDA_ERROR_NOT_PERMITTED";
-
-        case CUDA_ERROR_NOT_SUPPORTED:
-            return "CUDA_ERROR_NOT_SUPPORTED";
-
-        case CUDA_ERROR_UNKNOWN:
-            return "CUDA_ERROR_UNKNOWN";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef CUBLAS_API_H_
-// cuBLAS API errors
-static const char *_cudaGetErrorEnum(cublasStatus_t error)
-{
-    switch (error)
-    {
-        case CUBLAS_STATUS_SUCCESS:
-            return "CUBLAS_STATUS_SUCCESS";
-
-        case CUBLAS_STATUS_NOT_INITIALIZED:
-            return "CUBLAS_STATUS_NOT_INITIALIZED";
-
-        case CUBLAS_STATUS_ALLOC_FAILED:
-            return "CUBLAS_STATUS_ALLOC_FAILED";
-
-        case CUBLAS_STATUS_INVALID_VALUE:
-            return "CUBLAS_STATUS_INVALID_VALUE";
-
-        case CUBLAS_STATUS_ARCH_MISMATCH:
-            return "CUBLAS_STATUS_ARCH_MISMATCH";
-
-        case CUBLAS_STATUS_MAPPING_ERROR:
-            return "CUBLAS_STATUS_MAPPING_ERROR";
-
-        case CUBLAS_STATUS_EXECUTION_FAILED:
-            return "CUBLAS_STATUS_EXECUTION_FAILED";
-
-        case CUBLAS_STATUS_INTERNAL_ERROR:
-            return "CUBLAS_STATUS_INTERNAL_ERROR";
-
-        case CUBLAS_STATUS_NOT_SUPPORTED:
-            return "CUBLAS_STATUS_NOT_SUPPORTED";
-
-        case CUBLAS_STATUS_LICENSE_ERROR:
-            return "CUBLAS_STATUS_LICENSE_ERROR";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef _CUFFT_H_
-// cuFFT API errors
-static const char *_cudaGetErrorEnum(cufftResult error)
-{
-    switch (error)
-    {
-        case CUFFT_SUCCESS:
-            return "CUFFT_SUCCESS";
-
-        case CUFFT_INVALID_PLAN:
-            return "CUFFT_INVALID_PLAN";
-
-        case CUFFT_ALLOC_FAILED:
-            return "CUFFT_ALLOC_FAILED";
-
-        case CUFFT_INVALID_TYPE:
-            return "CUFFT_INVALID_TYPE";
-
-        case CUFFT_INVALID_VALUE:
-            return "CUFFT_INVALID_VALUE";
-
-        case CUFFT_INTERNAL_ERROR:
-            return "CUFFT_INTERNAL_ERROR";
-
-        case CUFFT_EXEC_FAILED:
-            return "CUFFT_EXEC_FAILED";
-
-        case CUFFT_SETUP_FAILED:
-            return "CUFFT_SETUP_FAILED";
-
-        case CUFFT_INVALID_SIZE:
-            return "CUFFT_INVALID_SIZE";
-
-        case CUFFT_UNALIGNED_DATA:
-            return "CUFFT_UNALIGNED_DATA";
-
-        case CUFFT_INCOMPLETE_PARAMETER_LIST:
-            return "CUFFT_INCOMPLETE_PARAMETER_LIST";
-
-        case CUFFT_INVALID_DEVICE:
-            return "CUFFT_INVALID_DEVICE";
-
-        case CUFFT_PARSE_ERROR:
-            return "CUFFT_PARSE_ERROR";
-
-        case CUFFT_NO_WORKSPACE:
-            return "CUFFT_NO_WORKSPACE";
-
-        case CUFFT_NOT_IMPLEMENTED:
-            return "CUFFT_NOT_IMPLEMENTED";
-
-        case CUFFT_LICENSE_ERROR:
-            return "CUFFT_LICENSE_ERROR";
-
-        case CUFFT_NOT_SUPPORTED:
-            return "CUFFT_NOT_SUPPORTED";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-
-#ifdef CUSPARSEAPI
-// cuSPARSE API errors
-static const char *_cudaGetErrorEnum(cusparseStatus_t error)
-{
-    switch (error)
-    {
-        case CUSPARSE_STATUS_SUCCESS:
-            return "CUSPARSE_STATUS_SUCCESS";
-
-        case CUSPARSE_STATUS_NOT_INITIALIZED:
-            return "CUSPARSE_STATUS_NOT_INITIALIZED";
-
-        case CUSPARSE_STATUS_ALLOC_FAILED:
-            return "CUSPARSE_STATUS_ALLOC_FAILED";
-
-        case CUSPARSE_STATUS_INVALID_VALUE:
-            return "CUSPARSE_STATUS_INVALID_VALUE";
-
-        case CUSPARSE_STATUS_ARCH_MISMATCH:
-            return "CUSPARSE_STATUS_ARCH_MISMATCH";
-
-        case CUSPARSE_STATUS_MAPPING_ERROR:
-            return "CUSPARSE_STATUS_MAPPING_ERROR";
-
-        case CUSPARSE_STATUS_EXECUTION_FAILED:
-            return "CUSPARSE_STATUS_EXECUTION_FAILED";
-
-        case CUSPARSE_STATUS_INTERNAL_ERROR:
-            return "CUSPARSE_STATUS_INTERNAL_ERROR";
-
-        case CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
-            return "CUSPARSE_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef CUSOLVER_COMMON_H_
-//cuSOLVER API errors
-static const char *_cudaGetErrorEnum(cusolverStatus_t error)
-{
-   switch(error)
-   {
-       case CUSOLVER_STATUS_SUCCESS:
-           return "CUSOLVER_STATUS_SUCCESS";
-       case CUSOLVER_STATUS_NOT_INITIALIZED:
-           return "CUSOLVER_STATUS_NOT_INITIALIZED";
-       case CUSOLVER_STATUS_ALLOC_FAILED:
-           return "CUSOLVER_STATUS_ALLOC_FAILED";
-       case CUSOLVER_STATUS_INVALID_VALUE:
-           return "CUSOLVER_STATUS_INVALID_VALUE";
-       case CUSOLVER_STATUS_ARCH_MISMATCH:
-           return "CUSOLVER_STATUS_ARCH_MISMATCH";
-       case CUSOLVER_STATUS_MAPPING_ERROR:
-           return "CUSOLVER_STATUS_MAPPING_ERROR";
-       case CUSOLVER_STATUS_EXECUTION_FAILED:
-           return "CUSOLVER_STATUS_EXECUTION_FAILED";
-       case CUSOLVER_STATUS_INTERNAL_ERROR:
-           return "CUSOLVER_STATUS_INTERNAL_ERROR";
-       case CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED:
-           return "CUSOLVER_STATUS_MATRIX_TYPE_NOT_SUPPORTED";
-       case CUSOLVER_STATUS_NOT_SUPPORTED :
-           return "CUSOLVER_STATUS_NOT_SUPPORTED ";
-       case CUSOLVER_STATUS_ZERO_PIVOT:
-           return "CUSOLVER_STATUS_ZERO_PIVOT";
-       case CUSOLVER_STATUS_INVALID_LICENSE:
-           return "CUSOLVER_STATUS_INVALID_LICENSE";
-    }
-
-    return "<unknown>";
-
-}
-#endif
-
-#ifdef CURAND_H_
-// cuRAND API errors
-static const char *_cudaGetErrorEnum(curandStatus_t error)
-{
-    switch (error)
-    {
-        case CURAND_STATUS_SUCCESS:
-            return "CURAND_STATUS_SUCCESS";
-
-        case CURAND_STATUS_VERSION_MISMATCH:
-            return "CURAND_STATUS_VERSION_MISMATCH";
-
-        case CURAND_STATUS_NOT_INITIALIZED:
-            return "CURAND_STATUS_NOT_INITIALIZED";
-
-        case CURAND_STATUS_ALLOCATION_FAILED:
-            return "CURAND_STATUS_ALLOCATION_FAILED";
-
-        case CURAND_STATUS_TYPE_ERROR:
-            return "CURAND_STATUS_TYPE_ERROR";
-
-        case CURAND_STATUS_OUT_OF_RANGE:
-            return "CURAND_STATUS_OUT_OF_RANGE";
-
-        case CURAND_STATUS_LENGTH_NOT_MULTIPLE:
-            return "CURAND_STATUS_LENGTH_NOT_MULTIPLE";
-
-        case CURAND_STATUS_DOUBLE_PRECISION_REQUIRED:
-            return "CURAND_STATUS_DOUBLE_PRECISION_REQUIRED";
-
-        case CURAND_STATUS_LAUNCH_FAILURE:
-            return "CURAND_STATUS_LAUNCH_FAILURE";
-
-        case CURAND_STATUS_PREEXISTING_FAILURE:
-            return "CURAND_STATUS_PREEXISTING_FAILURE";
-
-        case CURAND_STATUS_INITIALIZATION_FAILED:
-            return "CURAND_STATUS_INITIALIZATION_FAILED";
-
-        case CURAND_STATUS_ARCH_MISMATCH:
-            return "CURAND_STATUS_ARCH_MISMATCH";
-
-        case CURAND_STATUS_INTERNAL_ERROR:
-            return "CURAND_STATUS_INTERNAL_ERROR";
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef NV_NPPIDEFS_H
-// NPP API errors
-static const char *_cudaGetErrorEnum(NppStatus error)
-{
-    switch (error)
-    {
-        case NPP_NOT_SUPPORTED_MODE_ERROR:
-            return "NPP_NOT_SUPPORTED_MODE_ERROR";
-
-        case NPP_ROUND_MODE_NOT_SUPPORTED_ERROR:
-            return "NPP_ROUND_MODE_NOT_SUPPORTED_ERROR";
-
-        case NPP_RESIZE_NO_OPERATION_ERROR:
-            return "NPP_RESIZE_NO_OPERATION_ERROR";
-
-        case NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY:
-            return "NPP_NOT_SUFFICIENT_COMPUTE_CAPABILITY";
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
-
-        case NPP_BAD_ARG_ERROR:
-            return "NPP_BAD_ARGUMENT_ERROR";
-
-        case NPP_COEFF_ERROR:
-            return "NPP_COEFFICIENT_ERROR";
-
-        case NPP_RECT_ERROR:
-            return "NPP_RECTANGLE_ERROR";
-
-        case NPP_QUAD_ERROR:
-            return "NPP_QUADRANGLE_ERROR";
-
-        case NPP_MEM_ALLOC_ERR:
-            return "NPP_MEMORY_ALLOCATION_ERROR";
-
-        case NPP_HISTO_NUMBER_OF_LEVELS_ERROR:
-            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
-
-        case NPP_INVALID_INPUT:
-            return "NPP_INVALID_INPUT";
-
-        case NPP_POINTER_ERROR:
-            return "NPP_POINTER_ERROR";
-
-        case NPP_WARNING:
-            return "NPP_WARNING";
-
-        case NPP_ODD_ROI_WARNING:
-            return "NPP_ODD_ROI_WARNING";
-#else
-
-            // These are for CUDA 5.5 or higher
-        case NPP_BAD_ARGUMENT_ERROR:
-            return "NPP_BAD_ARGUMENT_ERROR";
-
-        case NPP_COEFFICIENT_ERROR:
-            return "NPP_COEFFICIENT_ERROR";
-
-        case NPP_RECTANGLE_ERROR:
-            return "NPP_RECTANGLE_ERROR";
-
-        case NPP_QUADRANGLE_ERROR:
-            return "NPP_QUADRANGLE_ERROR";
-
-        case NPP_MEMORY_ALLOCATION_ERR:
-            return "NPP_MEMORY_ALLOCATION_ERROR";
-
-        case NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR:
-            return "NPP_HISTOGRAM_NUMBER_OF_LEVELS_ERROR";
-
-        case NPP_INVALID_HOST_POINTER_ERROR:
-            return "NPP_INVALID_HOST_POINTER_ERROR";
-
-        case NPP_INVALID_DEVICE_POINTER_ERROR:
-            return "NPP_INVALID_DEVICE_POINTER_ERROR";
-#endif
-
-        case NPP_LUT_NUMBER_OF_LEVELS_ERROR:
-            return "NPP_LUT_NUMBER_OF_LEVELS_ERROR";
-
-        case NPP_TEXTURE_BIND_ERROR:
-            return "NPP_TEXTURE_BIND_ERROR";
-
-        case NPP_WRONG_INTERSECTION_ROI_ERROR:
-            return "NPP_WRONG_INTERSECTION_ROI_ERROR";
-
-        case NPP_NOT_EVEN_STEP_ERROR:
-            return "NPP_NOT_EVEN_STEP_ERROR";
-
-        case NPP_INTERPOLATION_ERROR:
-            return "NPP_INTERPOLATION_ERROR";
-
-        case NPP_RESIZE_FACTOR_ERROR:
-            return "NPP_RESIZE_FACTOR_ERROR";
-
-        case NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR:
-            return "NPP_HAAR_CLASSIFIER_PIXEL_MATCH_ERROR";
-
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) <= 0x5000
-
-        case NPP_MEMFREE_ERR:
-            return "NPP_MEMFREE_ERR";
-
-        case NPP_MEMSET_ERR:
-            return "NPP_MEMSET_ERR";
-
-        case NPP_MEMCPY_ERR:
-            return "NPP_MEMCPY_ERROR";
-
-        case NPP_MIRROR_FLIP_ERR:
-            return "NPP_MIRROR_FLIP_ERR";
-#else
-
-        case NPP_MEMFREE_ERROR:
-            return "NPP_MEMFREE_ERROR";
-
-        case NPP_MEMSET_ERROR:
-            return "NPP_MEMSET_ERROR";
-
-        case NPP_MEMCPY_ERROR:
-            return "NPP_MEMCPY_ERROR";
-
-        case NPP_MIRROR_FLIP_ERROR:
-            return "NPP_MIRROR_FLIP_ERROR";
-#endif
-
-        case NPP_ALIGNMENT_ERROR:
-            return "NPP_ALIGNMENT_ERROR";
-
-        case NPP_STEP_ERROR:
-            return "NPP_STEP_ERROR";
-
-        case NPP_SIZE_ERROR:
-            return "NPP_SIZE_ERROR";
-
-        case NPP_NULL_POINTER_ERROR:
-            return "NPP_NULL_POINTER_ERROR";
-
-        case NPP_CUDA_KERNEL_EXECUTION_ERROR:
-            return "NPP_CUDA_KERNEL_EXECUTION_ERROR";
-
-        case NPP_NOT_IMPLEMENTED_ERROR:
-            return "NPP_NOT_IMPLEMENTED_ERROR";
-
-        case NPP_ERROR:
-            return "NPP_ERROR";
-
-        case NPP_SUCCESS:
-            return "NPP_SUCCESS";
-
-        case NPP_WRONG_INTERSECTION_QUAD_WARNING:
-            return "NPP_WRONG_INTERSECTION_QUAD_WARNING";
-
-        case NPP_MISALIGNED_DST_ROI_WARNING:
-            return "NPP_MISALIGNED_DST_ROI_WARNING";
-
-        case NPP_AFFINE_QUAD_INCORRECT_WARNING:
-            return "NPP_AFFINE_QUAD_INCORRECT_WARNING";
-
-        case NPP_DOUBLE_SIZE_WARNING:
-            return "NPP_DOUBLE_SIZE_WARNING";
-
-        case NPP_WRONG_INTERSECTION_ROI_WARNING:
-            return "NPP_WRONG_INTERSECTION_ROI_WARNING";
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x6000
-        /* These are 6.0 or higher */
-        case NPP_LUT_PALETTE_BITSIZE_ERROR:
-            return "NPP_LUT_PALETTE_BITSIZE_ERROR";
-
-        case NPP_ZC_MODE_NOT_SUPPORTED_ERROR:
-            return "NPP_ZC_MODE_NOT_SUPPORTED_ERROR";
-
-        case NPP_QUALITY_INDEX_ERROR:
-            return "NPP_QUALITY_INDEX_ERROR";
-
-        case NPP_CHANNEL_ORDER_ERROR:
-            return "NPP_CHANNEL_ORDER_ERROR";
-
-        case NPP_ZERO_MASK_VALUE_ERROR:
-            return "NPP_ZERO_MASK_VALUE_ERROR";
-
-        case NPP_NUMBER_OF_CHANNELS_ERROR:
-            return "NPP_NUMBER_OF_CHANNELS_ERROR";
-
-        case NPP_COI_ERROR:
-            return "NPP_COI_ERROR";
-
-        case NPP_DIVISOR_ERROR:
-            return "NPP_DIVISOR_ERROR";
-
-        case NPP_CHANNEL_ERROR:
-            return "NPP_CHANNEL_ERROR";
-
-        case NPP_STRIDE_ERROR:
-            return "NPP_STRIDE_ERROR";
-
-        case NPP_ANCHOR_ERROR:
-            return "NPP_ANCHOR_ERROR";
-
-        case NPP_MASK_SIZE_ERROR:
-            return "NPP_MASK_SIZE_ERROR";
-
-        case NPP_MOMENT_00_ZERO_ERROR:
-            return "NPP_MOMENT_00_ZERO_ERROR";
-
-        case NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR:
-            return "NPP_THRESHOLD_NEGATIVE_LEVEL_ERROR";
-
-        case NPP_THRESHOLD_ERROR:
-            return "NPP_THRESHOLD_ERROR";
-
-        case NPP_CONTEXT_MATCH_ERROR:
-            return "NPP_CONTEXT_MATCH_ERROR";
-
-        case NPP_FFT_FLAG_ERROR:
-            return "NPP_FFT_FLAG_ERROR";
-
-        case NPP_FFT_ORDER_ERROR:
-            return "NPP_FFT_ORDER_ERROR";
-
-        case NPP_SCALE_RANGE_ERROR:
-            return "NPP_SCALE_RANGE_ERROR";
-
-        case NPP_DATA_TYPE_ERROR:
-            return "NPP_DATA_TYPE_ERROR";
-
-        case NPP_OUT_OFF_RANGE_ERROR:
-            return "NPP_OUT_OFF_RANGE_ERROR";
-
-        case NPP_DIVIDE_BY_ZERO_ERROR:
-            return "NPP_DIVIDE_BY_ZERO_ERROR";
-
-        case NPP_RANGE_ERROR:
-            return "NPP_RANGE_ERROR";
-
-        case NPP_NO_MEMORY_ERROR:
-            return "NPP_NO_MEMORY_ERROR";
-
-        case NPP_ERROR_RESERVED:
-            return "NPP_ERROR_RESERVED";
-
-        case NPP_NO_OPERATION_WARNING:
-            return "NPP_NO_OPERATION_WARNING";
-
-        case NPP_DIVIDE_BY_ZERO_WARNING:
-            return "NPP_DIVIDE_BY_ZERO_WARNING";
-#endif
-
-#if ((NPP_VERSION_MAJOR << 12) + (NPP_VERSION_MINOR << 4)) >= 0x7000
-        /* These are 7.0 or higher */
-        case NPP_OVERFLOW_ERROR:
-            return "NPP_OVERFLOW_ERROR";
-
-        case NPP_CORRUPTED_DATA_ERROR:
-            return "NPP_CORRUPTED_DATA_ERROR";
-#endif
-    }
-
-    return "<unknown>";
-}
-#endif
-
-#ifdef __DRIVER_TYPES_H__
-#ifndef DEVICE_RESET
-#define DEVICE_RESET cudaDeviceReset();
-#endif
-#else
-#ifndef DEVICE_RESET
-#define DEVICE_RESET
-#endif
-#endif
-
-template< typename T >
-void check(T result, char const *const func, const char *const file, int const line)
-{
-    if (result)
-    {
-        fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n",
-                file, line, static_cast<unsigned int>(result), _cudaGetErrorEnum(result), func);
-        DEVICE_RESET
-        // Make sure we call CUDA Device Reset before exiting
-        exit(EXIT_FAILURE);
-    }
-}
-
-#ifdef __DRIVER_TYPES_H__
-// This will output the proper CUDA error strings in the event that a CUDA host call returns an error
-#define checkCudaErrors(val)           check ( (val), #val, __FILE__, __LINE__ )
-
-// This will output the proper error string when calling cudaGetLastError
-#define getLastCudaError(msg)      __getLastCudaError (msg, __FILE__, __LINE__)
-
-inline void __getLastCudaError(const char *errorMessage, const char *file, const int line)
-{
-    cudaError_t err = cudaGetLastError();
-
-    if (cudaSuccess != err)
-    {
-        fprintf(stderr, "%s(%i) : getLastCudaError() CUDA error : %s : (%d) %s.\n",
-                file, line, errorMessage, (int)err, cudaGetErrorString(err));
-        DEVICE_RESET
-        exit(EXIT_FAILURE);
-    }
-}
-#endif
-
-#ifndef MAX
-#define MAX(a,b) (a > b ? a : b)
-#endif
-
-// Float To Int conversion
-inline int ftoi(float value)
-{
-    return (value >= 0 ? (int)(value + 0.5) : (int)(value - 0.5));
-}
-
-// Beginning of GPU Architecture definitions
-inline int _ConvertSMVer2Cores(int major, int minor)
-{
-    // Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
-    typedef struct
-    {
-        int SM; // 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
-        int Cores;
-    } sSMtoCores;
-
-    sSMtoCores nGpuArchCoresPerSM[] =
-    {
-        { 0x20, 32 }, // Fermi Generation (SM 2.0) GF100 class
-        { 0x21, 48 }, // Fermi Generation (SM 2.1) GF10x class
-        { 0x30, 192}, // Kepler Generation (SM 3.0) GK10x class
-        { 0x32, 192}, // Kepler Generation (SM 3.2) GK10x class
-        { 0x35, 192}, // Kepler Generation (SM 3.5) GK11x class
-        { 0x37, 192}, // Kepler Generation (SM 3.7) GK21x class
-        { 0x50, 128}, // Maxwell Generation (SM 5.0) GM10x class
-        { 0x52, 128}, // Maxwell Generation (SM 5.2) GM20x class
-        { 0x53, 128}, // Maxwell Generation (SM 5.3) GM20x class
-        { 0x60, 64 }, // Pascal Generation (SM 6.0) GP100 class
-        { 0x61, 128}, // Pascal Generation (SM 6.1) GP10x class
-        { 0x62, 128}, // Pascal Generation (SM 6.2) GP10x class
-        {   -1, -1 }
-    };
-
-    int index = 0;
-
-    while (nGpuArchCoresPerSM[index].SM != -1)
-    {
-        if (nGpuArchCoresPerSM[index].SM == ((major << 4) + minor))
-        {
-            return nGpuArchCoresPerSM[index].Cores;
-        }
-
-        index++;
-    }
-
-    // If we don't find the values, we default use the previous one to run properly
-    printf("MapSMtoCores for SM %d.%d is undefined.  Default to use %d Cores/SM\n", major, minor, nGpuArchCoresPerSM[index-1].Cores);
-    return nGpuArchCoresPerSM[index-1].Cores;
-}
-// end of GPU Architecture definitions
-
-#ifdef __CUDA_RUNTIME_H__
-// General GPU Device CUDA Initialization
-inline int gpuDeviceInit(int devID)
-{
-    int device_count;
-    checkCudaErrors(cudaGetDeviceCount(&device_count));
-
-    if (device_count == 0)
-    {
-        fprintf(stderr, "gpuDeviceInit() CUDA error: no devices supporting CUDA.\n");
-        exit(EXIT_FAILURE);
-    }
-
-    if (devID < 0)
-    {
-        devID = 0;
-    }
-
-    if (devID > device_count-1)
-    {
-        fprintf(stderr, "\n");
-        fprintf(stderr, ">> %d CUDA capable GPU device(s) detected. <<\n", device_count);
-        fprintf(stderr, ">> gpuDeviceInit (-device=%d) is not a valid GPU device. <<\n", devID);
-        fprintf(stderr, "\n");
-        return -devID;
-    }
-
-    cudaDeviceProp deviceProp;
-    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
-
-    if (deviceProp.computeMode == cudaComputeModeProhibited)
-    {
-        fprintf(stderr, "Error: device is running in <Compute Mode Prohibited>, no threads can use ::cudaSetDevice().\n");
-        return -1;
-    }
-
-    if (deviceProp.major < 1)
-    {
-        fprintf(stderr, "gpuDeviceInit(): GPU device does not support CUDA.\n");
-        exit(EXIT_FAILURE);
-    }
-
-    checkCudaErrors(cudaSetDevice(devID));
-    printf("gpuDeviceInit() CUDA Device [%d]: \"%s\n", devID, deviceProp.name);
-
-    return devID;
-}
-
-// This function returns the best GPU (with maximum GFLOPS)
-inline int gpuGetMaxGflopsDeviceId()
-{
-    int current_device     = 0, sm_per_multiproc  = 0;
-    int max_perf_device    = 0;
-    int device_count       = 0, best_SM_arch      = 0;
-    int devices_prohibited = 0;
-    
-    unsigned long long max_compute_perf = 0;
-    cudaDeviceProp deviceProp;
-    cudaGetDeviceCount(&device_count);
-    
-    checkCudaErrors(cudaGetDeviceCount(&device_count));
-
-    if (device_count == 0)
-    {
-        fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: no devices supporting CUDA.\n");
-        exit(EXIT_FAILURE);
-    }
-
-    // Find the best major SM Architecture GPU device
-    while (current_device < device_count)
-    {
-        cudaGetDeviceProperties(&deviceProp, current_device);
-
-        // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
-        if (deviceProp.computeMode != cudaComputeModeProhibited)
-        {
-            if (deviceProp.major > 0 && deviceProp.major < 9999)
-            {
-                best_SM_arch = MAX(best_SM_arch, deviceProp.major);
-            }
-        }
-        else
-        {
-            devices_prohibited++;
-        }
-
-        current_device++;
-    }
-
-    if (devices_prohibited == device_count)
-    {
-    	fprintf(stderr, "gpuGetMaxGflopsDeviceId() CUDA error: all devices have compute mode prohibited.\n");
-    	exit(EXIT_FAILURE);
-    }
-
-    // Find the best CUDA capable GPU device
-    current_device = 0;
-
-    while (current_device < device_count)
-    {
-        cudaGetDeviceProperties(&deviceProp, current_device);
-
-        // If this GPU is not running on Compute Mode prohibited, then we can add it to the list
-        if (deviceProp.computeMode != cudaComputeModeProhibited)
-        {
-            if (deviceProp.major == 9999 && deviceProp.minor == 9999)
-            {
-                sm_per_multiproc = 1;
-            }
-            else
-            {
-                sm_per_multiproc = _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor);
-            }
-
-            unsigned long long compute_perf  = (unsigned long long) deviceProp.multiProcessorCount * sm_per_multiproc * deviceProp.clockRate;
-
-            if (compute_perf  > max_compute_perf)
-            {
-                // If we find GPU with SM major > 2, search only these
-                if (best_SM_arch > 2)
-                {
-                    // If our device==dest_SM_arch, choose this, or else pass
-                    if (deviceProp.major == best_SM_arch)
-                    {
-                        max_compute_perf  = compute_perf;
-                        max_perf_device   = current_device;
-                    }
-                }
-                else
-                {
-                    max_compute_perf  = compute_perf;
-                    max_perf_device   = current_device;
-                }
-            }
-        }
-
-        ++current_device;
-    }
-
-    return max_perf_device;
-}
-
-
-// Initialization code to find the best CUDA Device
-inline int findCudaDevice(int argc, const char **argv)
-{
-    cudaDeviceProp deviceProp;
-    int devID = 0;
-
-    // If the command-line has a device number specified, use it
-    if (checkCmdLineFlag(argc, argv, "device"))
-    {
-        devID = getCmdLineArgumentInt(argc, argv, "device=");
-
-        if (devID < 0)
-        {
-            printf("Invalid command line parameter\n ");
-            exit(EXIT_FAILURE);
-        }
-        else
-        {
-            devID = gpuDeviceInit(devID);
-
-            if (devID < 0)
-            {
-                printf("exiting...\n");
-                exit(EXIT_FAILURE);
-            }
-        }
-    }
-    else
-    {
-        // Otherwise pick the device with highest Gflops/s
-        devID = gpuGetMaxGflopsDeviceId();
-        checkCudaErrors(cudaSetDevice(devID));
-        checkCudaErrors(cudaGetDeviceProperties(&deviceProp, devID));
-        printf("GPU Device %d: \"%s\" with compute capability %d.%d\n\n", devID, deviceProp.name, deviceProp.major, deviceProp.minor);
-    }
-
-    return devID;
-}
-
-// General check for CUDA GPU SM Capabilities
-inline bool checkCudaCapabilities(int major_version, int minor_version)
-{
-    cudaDeviceProp deviceProp;
-    deviceProp.major = 0;
-    deviceProp.minor = 0;
-    int dev;
-
-    checkCudaErrors(cudaGetDevice(&dev));
-    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, dev));
-
-    if ((deviceProp.major > major_version) ||
-        (deviceProp.major == major_version && deviceProp.minor >= minor_version))
-    {
-        printf("  Device %d: <%16s >, Compute SM %d.%d detected\n", dev, deviceProp.name, deviceProp.major, deviceProp.minor);
-        return true;
-    }
-    else
-    {
-        printf("  No GPU device was found that can support CUDA compute capability %d.%d.\n", major_version, minor_version);
-        return false;
-    }
-}
-#endif
-
-// end of CUDA Helper Functions
-
-
-#endif
--- a/src/test/helper_string.h	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,527 +0,0 @@
-/**
- * Copyright 1993-2013 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-// These are helper functions for the SDK samples (string parsing, timers, etc)
-#ifndef STRING_HELPER_H
-#define STRING_HELPER_H
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <fstream>
-#include <string>
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-#ifndef _CRT_SECURE_NO_DEPRECATE
-#define _CRT_SECURE_NO_DEPRECATE
-#endif
-#ifndef STRCASECMP
-#define STRCASECMP  _stricmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP _strnicmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) fopen_s(&fHandle, filename, mode)
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result != 0)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf_s
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf_s
-#endif
-#else // Linux Includes
-#include <string.h>
-#include <strings.h>
-
-#ifndef STRCASECMP
-#define STRCASECMP  strcasecmp
-#endif
-#ifndef STRNCASECMP
-#define STRNCASECMP strncasecmp
-#endif
-#ifndef STRCPY
-#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
-#endif
-
-#ifndef FOPEN
-#define FOPEN(fHandle,filename,mode) (fHandle = fopen(filename, mode))
-#endif
-#ifndef FOPEN_FAIL
-#define FOPEN_FAIL(result) (result == NULL)
-#endif
-#ifndef SSCANF
-#define SSCANF sscanf
-#endif
-#ifndef SPRINTF
-#define SPRINTF sprintf
-#endif
-#endif
-
-#ifndef EXIT_WAIVED
-#define EXIT_WAIVED 2
-#endif
-
-// CUDA Utility Helper Functions
-inline int stringRemoveDelimiter(char delimiter, const char *string)
-{
-    int string_start = 0;
-
-    while (string[string_start] == delimiter)
-    {
-        string_start++;
-    }
-
-    if (string_start >= (int)strlen(string)-1)
-    {
-        return 0;
-    }
-
-    return string_start;
-}
-
-inline int getFileExtension(char *filename, char **extension)
-{
-    int string_length = (int)strlen(filename);
-
-    while (filename[string_length--] != '.')
-    {
-        if (string_length == 0)
-            break;
-    }
-
-    if (string_length > 0) string_length += 2;
-
-    if (string_length == 0)
-        *extension = NULL;
-    else
-        *extension = &filename[string_length];
-
-    return string_length;
-}
-
-
-inline bool checkCmdLineFlag(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-
-            const char *equal_pos = strchr(string_argv, '=');
-            int argv_length = (int)(equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
-
-            int length = (int)strlen(string_ref);
-
-            if (length == argv_length && !STRNCASECMP(string_argv, string_ref, length))
-            {
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    return bFound;
-}
-
-// This function wraps the CUDA Driver API into a template function
-template <class T>
-inline bool getCmdLineArgumentValue(const int argc, const char **argv, const char *string_ref, T *value)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    *value = (T)atoi(&string_argv[length + auto_inc]);
-                }
-
-                bFound = true;
-                i=argc;
-            }
-        }
-    }
-
-    return bFound;
-}
-
-inline int getCmdLineArgumentInt(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-    int value = -1;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    value = atoi(&string_argv[length + auto_inc]);
-                }
-                else
-                {
-                    value = 0;
-                }
-
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (bFound)
-    {
-        return value;
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-inline float getCmdLineArgumentFloat(const int argc, const char **argv, const char *string_ref)
-{
-    bool bFound = false;
-    float value = -1;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            const char *string_argv = &argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                if (length+1 <= (int)strlen(string_argv))
-                {
-                    int auto_inc = (string_argv[length] == '=') ? 1 : 0;
-                    value = (float)atof(&string_argv[length + auto_inc]);
-                }
-                else
-                {
-                    value = 0.f;
-                }
-
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (bFound)
-    {
-        return value;
-    }
-    else
-    {
-        return 0;
-    }
-}
-
-inline bool getCmdLineArgumentString(const int argc, const char **argv,
-                                     const char *string_ref, char **string_retval)
-{
-    bool bFound = false;
-
-    if (argc >= 1)
-    {
-        for (int i=1; i < argc; i++)
-        {
-            int string_start = stringRemoveDelimiter('-', argv[i]);
-            char *string_argv = (char *)&argv[i][string_start];
-            int length = (int)strlen(string_ref);
-
-            if (!STRNCASECMP(string_argv, string_ref, length))
-            {
-                *string_retval = &string_argv[length+1];
-                bFound = true;
-                continue;
-            }
-        }
-    }
-
-    if (!bFound)
-    {
-        *string_retval = NULL;
-    }
-
-    return bFound;
-}
-
-//////////////////////////////////////////////////////////////////////////////
-//! Find the path for a file assuming that
-//! files are found in the searchPath.
-//!
-//! @return the path if succeeded, otherwise 0
-//! @param filename         name of the file
-//! @param executable_path  optional absolute path of the executable
-//////////////////////////////////////////////////////////////////////////////
-inline char *sdkFindFilePath(const char *filename, const char *executable_path)
-{
-    // <executable_name> defines a variable that is replaced with the name of the executable
-
-    // Typical relative search paths to locate needed companion files (e.g. sample input data, or JIT source files)
-    // The origin for the relative search may be the .exe file, a .bat file launching an .exe, a browser .exe launching the .exe or .bat, etc
-    const char *searchPath[] =
-    {
-        "./",                                       // same dir
-        "./<executable_name>_data_files/",
-        "./common/",                                // "/common/" subdir
-        "./common/data/",                           // "/common/data/" subdir
-        "./data/",                                  // "/data/" subdir
-        "./src/",                                   // "/src/" subdir
-        "./src/<executable_name>/data/",            // "/src/<executable_name>/data/" subdir
-        "./inc/",                                   // "/inc/" subdir
-        "./0_Simple/",                              // "/0_Simple/" subdir
-        "./1_Utilities/",                           // "/1_Utilities/" subdir
-        "./2_Graphics/",                            // "/2_Graphics/" subdir
-        "./3_Imaging/",                             // "/3_Imaging/" subdir
-        "./4_Finance/",                             // "/4_Finance/" subdir
-        "./5_Simulations/",                         // "/5_Simulations/" subdir
-        "./6_Advanced/",                            // "/6_Advanced/" subdir
-        "./7_CUDALibraries/",                       // "/7_CUDALibraries/" subdir
-        "./8_Android/",                             // "/8_Android/" subdir
-        "./samples/",                               // "/samples/" subdir
-
-        "./0_Simple/<executable_name>/data/",        // "/0_Simple/<executable_name>/data/" subdir
-        "./1_Utilities/<executable_name>/data/",     // "/1_Utilities/<executable_name>/data/" subdir
-        "./2_Graphics/<executable_name>/data/",      // "/2_Graphics/<executable_name>/data/" subdir
-        "./3_Imaging/<executable_name>/data/",       // "/3_Imaging/<executable_name>/data/" subdir
-        "./4_Finance/<executable_name>/data/",       // "/4_Finance/<executable_name>/data/" subdir
-        "./5_Simulations/<executable_name>/data/",   // "/5_Simulations/<executable_name>/data/" subdir
-        "./6_Advanced/<executable_name>/data/",      // "/6_Advanced/<executable_name>/data/" subdir
-        "./7_CUDALibraries/<executable_name>/",      // "/7_CUDALibraries/<executable_name>/" subdir
-        "./7_CUDALibraries/<executable_name>/data/", // "/7_CUDALibraries/<executable_name>/data/" subdir
-
-        "../",                                      // up 1 in tree
-        "../common/",                               // up 1 in tree, "/common/" subdir
-        "../common/data/",                          // up 1 in tree, "/common/data/" subdir
-        "../data/",                                 // up 1 in tree, "/data/" subdir
-        "../src/",                                  // up 1 in tree, "/src/" subdir
-        "../inc/",                                  // up 1 in tree, "/inc/" subdir
-
-        "../0_Simple/<executable_name>/data/",       // up 1 in tree, "/0_Simple/<executable_name>/" subdir
-        "../1_Utilities/<executable_name>/data/",    // up 1 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../2_Graphics/<executable_name>/data/",     // up 1 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../3_Imaging/<executable_name>/data/",      // up 1 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../4_Finance/<executable_name>/data/",      // up 1 in tree, "/4_Finance/<executable_name>/" subdir
-        "../5_Simulations/<executable_name>/data/",  // up 1 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../6_Advanced/<executable_name>/data/",     // up 1 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../7_CUDALibraries/<executable_name>/data/",// up 1 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../8_Android/<executable_name>/data/",      // up 1 in tree, "/8_Android/<executable_name>/" subdir
-        "../samples/<executable_name>/data/",        // up 1 in tree, "/samples/<executable_name>/" subdir
-        "../../",                                        // up 2 in tree
-        "../../common/",                                 // up 2 in tree, "/common/" subdir
-        "../../common/data/",                            // up 2 in tree, "/common/data/" subdir
-        "../../data/",                                   // up 2 in tree, "/data/" subdir
-        "../../src/",                                    // up 2 in tree, "/src/" subdir
-        "../../inc/",                                    // up 2 in tree, "/inc/" subdir
-        "../../sandbox/<executable_name>/data/",         // up 2 in tree, "/sandbox/<executable_name>/" subdir
-        "../../0_Simple/<executable_name>/data/",        // up 2 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../1_Utilities/<executable_name>/data/",     // up 2 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../2_Graphics/<executable_name>/data/",      // up 2 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../3_Imaging/<executable_name>/data/",       // up 2 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../4_Finance/<executable_name>/data/",       // up 2 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../5_Simulations/<executable_name>/data/",   // up 2 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../6_Advanced/<executable_name>/data/",      // up 2 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../7_CUDALibraries/<executable_name>/data/", // up 2 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../8_Android/<executable_name>/data/",       // up 2 in tree, "/8_Android/<executable_name>/" subdir
-        "../../samples/<executable_name>/data/",         // up 2 in tree, "/samples/<executable_name>/" subdir
-        "../../../",                                        // up 3 in tree
-        "../../../src/<executable_name>/",                  // up 3 in tree, "/src/<executable_name>/" subdir
-        "../../../src/<executable_name>/data/",             // up 3 in tree, "/src/<executable_name>/data/" subdir
-        "../../../src/<executable_name>/src/",              // up 3 in tree, "/src/<executable_name>/src/" subdir
-        "../../../src/<executable_name>/inc/",              // up 3 in tree, "/src/<executable_name>/inc/" subdir
-        "../../../sandbox/<executable_name>/",              // up 3 in tree, "/sandbox/<executable_name>/" subdir
-        "../../../sandbox/<executable_name>/data/",         // up 3 in tree, "/sandbox/<executable_name>/data/" subdir
-        "../../../sandbox/<executable_name>/src/",          // up 3 in tree, "/sandbox/<executable_name>/src/" subdir
-        "../../../sandbox/<executable_name>/inc/",          // up 3 in tree, "/sandbox/<executable_name>/inc/" subdir
-        "../../../0_Simple/<executable_name>/data/",        // up 3 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../1_Utilities/<executable_name>/data/",     // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../2_Graphics/<executable_name>/data/",      // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../3_Imaging/<executable_name>/data/",       // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../4_Finance/<executable_name>/data/",       // up 3 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../5_Simulations/<executable_name>/data/",   // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../6_Advanced/<executable_name>/data/",      // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../7_CUDALibraries/<executable_name>/data/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../8_Android/<executable_name>/data/",       // up 3 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../0_Simple/<executable_name>/",        // up 3 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../1_Utilities/<executable_name>/",     // up 3 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../2_Graphics/<executable_name>/",      // up 3 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../3_Imaging/<executable_name>/",       // up 3 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../4_Finance/<executable_name>/",       // up 3 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../5_Simulations/<executable_name>/",   // up 3 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../6_Advanced/<executable_name>/",      // up 3 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../7_CUDALibraries/<executable_name>/", // up 3 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../8_Android/<executable_name>/",       // up 3 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../samples/<executable_name>/data/",         // up 3 in tree, "/samples/<executable_name>/" subdir
-        "../../../common/",                                 // up 3 in tree, "../../../common/" subdir
-        "../../../common/data/",                            // up 3 in tree, "../../../common/data/" subdir
-        "../../../data/",                                   // up 3 in tree, "../../../data/" subdir
-        "../../../../",                                // up 4 in tree
-        "../../../../src/<executable_name>/",          // up 4 in tree, "/src/<executable_name>/" subdir
-        "../../../../src/<executable_name>/data/",     // up 4 in tree, "/src/<executable_name>/data/" subdir
-        "../../../../src/<executable_name>/src/",      // up 4 in tree, "/src/<executable_name>/src/" subdir
-        "../../../../src/<executable_name>/inc/",      // up 4 in tree, "/src/<executable_name>/inc/" subdir
-        "../../../../sandbox/<executable_name>/",      // up 4 in tree, "/sandbox/<executable_name>/" subdir
-        "../../../../sandbox/<executable_name>/data/", // up 4 in tree, "/sandbox/<executable_name>/data/" subdir
-        "../../../../sandbox/<executable_name>/src/",  // up 4 in tree, "/sandbox/<executable_name>/src/" subdir
-        "../../../../sandbox/<executable_name>/inc/",   // up 4 in tree, "/sandbox/<executable_name>/inc/" subdir
-        "../../../../0_Simple/<executable_name>/data/",     // up 4 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../../1_Utilities/<executable_name>/data/",  // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../../2_Graphics/<executable_name>/data/",   // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../../3_Imaging/<executable_name>/data/",    // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../../4_Finance/<executable_name>/data/",    // up 4 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../../5_Simulations/<executable_name>/data/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../../6_Advanced/<executable_name>/data/",   // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../../7_CUDALibraries/<executable_name>/data/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../../8_Android/<executable_name>/data/",    // up 4 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../../0_Simple/<executable_name>/",     // up 4 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../../1_Utilities/<executable_name>/",  // up 4 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../../2_Graphics/<executable_name>/",   // up 4 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../../3_Imaging/<executable_name>/",    // up 4 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../../4_Finance/<executable_name>/",    // up 4 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../../5_Simulations/<executable_name>/",// up 4 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../../6_Advanced/<executable_name>/",   // up 4 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../../7_CUDALibraries/<executable_name>/", // up 4 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../../8_Android/<executable_name>/",    // up 4 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../../samples/<executable_name>/data/",      // up 4 in tree, "/samples/<executable_name>/" subdir
-        "../../../../common/",                              // up 4 in tree, "../../../common/" subdir
-        "../../../../common/data/",                         // up 4 in tree, "../../../common/data/" subdir
-        "../../../../data/",                                // up 4 in tree, "../../../data/" subdir
-        "../../../../../",                                // up 5 in tree
-        "../../../../../src/<executable_name>/",          // up 5 in tree, "/src/<executable_name>/" subdir
-        "../../../../../src/<executable_name>/data/",     // up 5 in tree, "/src/<executable_name>/data/" subdir
-        "../../../../../src/<executable_name>/src/",      // up 5 in tree, "/src/<executable_name>/src/" subdir
-        "../../../../../src/<executable_name>/inc/",      // up 5 in tree, "/src/<executable_name>/inc/" subdir
-        "../../../../../sandbox/<executable_name>/",      // up 5 in tree, "/sandbox/<executable_name>/" subdir
-        "../../../../../sandbox/<executable_name>/data/", // up 5 in tree, "/sandbox/<executable_name>/data/" subdir
-        "../../../../../sandbox/<executable_name>/src/",  // up 5 in tree, "/sandbox/<executable_name>/src/" subdir
-        "../../../../../sandbox/<executable_name>/inc/",   // up 5 in tree, "/sandbox/<executable_name>/inc/" subdir
-        "../../../../../0_Simple/<executable_name>/data/",     // up 5 in tree, "/0_Simple/<executable_name>/" subdir
-        "../../../../../1_Utilities/<executable_name>/data/",  // up 5 in tree, "/1_Utilities/<executable_name>/" subdir
-        "../../../../../2_Graphics/<executable_name>/data/",   // up 5 in tree, "/2_Graphics/<executable_name>/" subdir
-        "../../../../../3_Imaging/<executable_name>/data/",    // up 5 in tree, "/3_Imaging/<executable_name>/" subdir
-        "../../../../../4_Finance/<executable_name>/data/",    // up 5 in tree, "/4_Finance/<executable_name>/" subdir
-        "../../../../../5_Simulations/<executable_name>/data/",// up 5 in tree, "/5_Simulations/<executable_name>/" subdir
-        "../../../../../6_Advanced/<executable_name>/data/",   // up 5 in tree, "/6_Advanced/<executable_name>/" subdir
-        "../../../../../7_CUDALibraries/<executable_name>/data/", // up 5 in tree, "/7_CUDALibraries/<executable_name>/" subdir
-        "../../../../../8_Android/<executable_name>/data/",    // up 5 in tree, "/8_Android/<executable_name>/" subdir
-        "../../../../../samples/<executable_name>/data/",      // up 5 in tree, "/samples/<executable_name>/" subdir
-        "../../../../../common/",                         // up 5 in tree, "../../../common/" subdir
-        "../../../../../common/data/",                    // up 5 in tree, "../../../common/data/" subdir
-    };
-
-    // Extract the executable name
-    std::string executable_name;
-
-    if (executable_path != 0)
-    {
-        executable_name = std::string(executable_path);
-
-#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
-        // Windows path delimiter
-        size_t delimiter_pos = executable_name.find_last_of('\\');
-        executable_name.erase(0, delimiter_pos + 1);
-
-        if (executable_name.rfind(".exe") != std::string::npos)
-        {
-            // we strip .exe, only if the .exe is found
-            executable_name.resize(executable_name.size() - 4);
-        }
-
-#else
-        // Linux & OSX path delimiter
-        size_t delimiter_pos = executable_name.find_last_of('/');
-        executable_name.erase(0,delimiter_pos+1);
-#endif
-    }
-
-    // Loop over all search paths and return the first hit
-    for (unsigned int i = 0; i < sizeof(searchPath)/sizeof(char *); ++i)
-    {
-        std::string path(searchPath[i]);
-        size_t executable_name_pos = path.find("<executable_name>");
-
-        // If there is executable_name variable in the searchPath
-        // replace it with the value
-        if (executable_name_pos != std::string::npos)
-        {
-            if (executable_path != 0)
-            {
-                path.replace(executable_name_pos, strlen("<executable_name>"), executable_name);
-            }
-            else
-            {
-                // Skip this path entry if no executable argument is given
-                continue;
-            }
-        }
-
-#ifdef _DEBUG
-        printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
-#endif
-
-        // Test if the file exists
-        path.append(filename);
-        FILE *fp;
-        FOPEN(fp, path.c_str(), "rb");
-
-        if (fp != NULL)
-        {
-            fclose(fp);
-            // File found
-            // returning an allocated array here for backwards compatibility reasons
-            char *file_path = (char *) malloc(path.length() + 1);
-            STRCPY(file_path, path.length() + 1, path.c_str());
-            return file_path;
-        }
-
-        if (fp)
-        {
-            fclose(fp);
-        }
-    }
-
-    // File not found
-    return 0;
-}
-
-#endif
--- a/src/test/main.cu	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,368 +0,0 @@
-/*
- * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-/*
- * Quadro and Tesla GPUs with compute capability >= 2.0 can overlap two memcopies
- * with kernel execution. This sample illustrates the usage of CUDA streams to
- * achieve overlapping of kernel execution with copying data to and from the device.
- *
- * Additionally, this sample uses CUDA events to measure elapsed time for
- * CUDA calls.  Events are a part of CUDA API and provide a system independent
- * way to measure execution times on CUDA devices with approximately 0.5
- * microsecond precision.
- *
- * Elapsed times are averaged over nreps repetitions (10 by default).
- *
-*/
-
-const char *sSDKname = "simpleMultiCopy";
-
-// includes, system
-#include <stdio.h>
-
-extern "C" {
-extern void test1();
-}
-// include CUDA
-#include <cuda.h>
-#include <cuda_runtime.h>
-
-// includes, project
-//#include <helper_cuda.h>
-//#include <helper_functions.h>  // helper for shared that are common to CUDA Samples
-
-#include "helper_cuda.h"
-
-// includes, kernels
-// Declare the CUDA kernels here and main() code that is needed to launch
-// Compute workload on the system
-__global__ void incKernel(int *g_out, int *g_in, int N, int inner_reps)
-{
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-
-    if (idx < N)
-    {
-        for (int i=0; i<inner_reps; ++i)
-        {
-            g_out[idx] = g_in[idx] + 1;
-        }
-    }
-}
-
-#define STREAM_COUNT 4
-
-// Uncomment to simulate data source/sink IO times
-//#define SIMULATE_IO
-
-int *h_data_source;
-int *h_data_sink;
-
-int *h_data_in[STREAM_COUNT];
-int *d_data_in[STREAM_COUNT];
-
-int *h_data_out[STREAM_COUNT];
-int *d_data_out[STREAM_COUNT];
-
-
-cudaEvent_t cycleDone[STREAM_COUNT];
-cudaStream_t stream[STREAM_COUNT];
-
-cudaEvent_t start, stop;
-
-int N = 1 << 22;
-int nreps = 10;                 // number of times each experiment is repeated
-int inner_reps = 5;
-
-int memsize;
-
-dim3 block(512);
-dim3 grid;
-
-int thread_blocks;
-
-float processWithStreams(int streams_used);
-void init();
-bool test();
-
-////////////////////////////////////////////////////////////////////////////////
-// Program main
-////////////////////////////////////////////////////////////////////////////////
-int main(int argc, char *argv[])
-{
-    int cuda_device = 0;
-    float scale_factor;
-    cudaDeviceProp deviceProp;
-
-    test1();
-    printf("[%s] - Starting...\n", sSDKname);
-
-        // Otherwise pick the device with the highest Gflops/s
-        cuda_device = 0;
-        checkCudaErrors(cudaSetDevice(cuda_device));
-        checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device));
-        printf("> Using CUDA device [%d]: %s\n", cuda_device, deviceProp.name);
-
-    checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device));
-    printf("[%s] has %d MP(s) x %d (Cores/MP) = %d (Cores)\n",
-           deviceProp.name, deviceProp.multiProcessorCount,
-           _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor),
-           _ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * deviceProp.multiProcessorCount);
-
-    // Anything that is less than 32 Cores will have scaled down workload
-    scale_factor = max((32.0f / (_ConvertSMVer2Cores(deviceProp.major, deviceProp.minor) * (float)deviceProp.multiProcessorCount)), 1.0f);
-    N = (int)((float)N / scale_factor);
-
-    printf("> Device name: %s\n", deviceProp.name);
-    printf("> CUDA Capability %d.%d hardware with %d multi-processors\n",
-           deviceProp.major, deviceProp.minor,
-           deviceProp.multiProcessorCount);
-    printf("> scale_factor = %.2f\n", 1.0f/scale_factor);
-    printf("> array_size   = %d\n\n", N);
-
-    memsize = N * sizeof(int);
-
-    thread_blocks = N / block.x;
-
-    grid.x = thread_blocks % 65535;
-    grid.y = (thread_blocks / 65535 + 1);
-
-
-    // Allocate resources
-
-    h_data_source = (int *) malloc(memsize);
-    h_data_sink = (int *) malloc(memsize);
-
-    for (int i =0; i<STREAM_COUNT; ++i)
-    {
-
-        checkCudaErrors(cudaHostAlloc(&h_data_in[i], memsize,
-                                      cudaHostAllocDefault));
-        checkCudaErrors(cudaMalloc(&d_data_in[i], memsize));
-
-        checkCudaErrors(cudaHostAlloc(&h_data_out[i], memsize,
-                                      cudaHostAllocDefault));
-        checkCudaErrors(cudaMalloc(&d_data_out[i], memsize));
-
-        checkCudaErrors(cudaStreamCreate(&stream[i]));
-        checkCudaErrors(cudaEventCreate(&cycleDone[i]));
-
-        cudaEventRecord(cycleDone[i], stream[i]);
-    }
-
-    cudaEventCreate(&start);
-    cudaEventCreate(&stop);
-
-    init();
-
-    // Kernel warmup
-    incKernel<<<grid, block>>>(d_data_out[0], d_data_in[0], N, inner_reps);
-
-
-    // Time copies and kernel
-    cudaEventRecord(start,0);
-    checkCudaErrors(cudaMemcpyAsync(d_data_in[0], h_data_in[0], memsize,
-                                    cudaMemcpyHostToDevice,0));
-    cudaEventRecord(stop,0);
-    cudaEventSynchronize(stop);
-
-    float memcpy_h2d_time;
-    cudaEventElapsedTime(&memcpy_h2d_time, start, stop);
-
-    cudaEventRecord(start,0);
-    checkCudaErrors(cudaMemcpyAsync(h_data_out[0], d_data_out[0], memsize,
-                                    cudaMemcpyDeviceToHost, 0));
-    cudaEventRecord(stop,0);
-    cudaEventSynchronize(stop);
-
-    float memcpy_d2h_time;
-    cudaEventElapsedTime(&memcpy_d2h_time, start, stop);
-
-    cudaEventRecord(start,0);
-    incKernel<<<grid, block,0,0>>>(d_data_out[0], d_data_in[0], N, inner_reps);
-    cudaEventRecord(stop,0);
-    cudaEventSynchronize(stop);
-
-    float kernel_time;
-    cudaEventElapsedTime(&kernel_time, start, stop);
-
-    printf("\n");
-    printf("Relevant properties of this CUDA device\n");
-    printf("(%s) Can overlap one CPU<>GPU data transfer with GPU kernel execution (device property \"deviceOverlap\")\n", deviceProp.deviceOverlap ? "X" : " ");
-    //printf("(%s) Can execute several GPU kernels simultaneously (compute capability >= 2.0)\n", deviceProp.major >= 2 ? "X": " ");
-    printf("(%s) Can overlap two CPU<>GPU data transfers with GPU kernel execution\n"
-           "    (Compute Capability >= 2.0 AND (Tesla product OR Quadro 4000/5000/6000/K5000)\n",
-           (deviceProp.major >= 2 && deviceProp.asyncEngineCount > 1)
-           ? "X" : " ");
-
-    printf("\n");
-    printf("Measured timings (throughput):\n");
-    printf(" Memcpy host to device\t: %f ms (%f GB/s)\n",
-           memcpy_h2d_time, (memsize * 1e-6)/ memcpy_h2d_time);
-    printf(" Memcpy device to host\t: %f ms (%f GB/s)\n",
-           memcpy_d2h_time, (memsize * 1e-6)/ memcpy_d2h_time);
-    printf(" Kernel\t\t\t: %f ms (%f GB/s)\n",
-           kernel_time, (inner_reps *memsize * 2e-6)/ kernel_time);
-
-    printf("\n");
-    printf("Theoretical limits for speedup gained from overlapped data transfers:\n");
-    printf("No overlap at all (transfer-kernel-transfer): %f ms \n",
-           memcpy_h2d_time + memcpy_d2h_time + kernel_time);
-    printf("Compute can overlap with one transfer: %f ms\n",
-           max((memcpy_h2d_time + memcpy_d2h_time), kernel_time));
-    printf("Compute can overlap with both data transfers: %f ms\n",
-           max(max(memcpy_h2d_time,memcpy_d2h_time), kernel_time));
-
-    // Process pipelined work
-    float serial_time = processWithStreams(1);
-    float overlap_time = processWithStreams(STREAM_COUNT);
-
-    printf("\nAverage measured timings over %d repetitions:\n", nreps);
-    printf(" Avg. time when execution fully serialized\t: %f ms\n",
-           serial_time / nreps);
-    printf(" Avg. time when overlapped using %d streams\t: %f ms\n",
-           STREAM_COUNT, overlap_time / nreps);
-    printf(" Avg. speedup gained (serialized - overlapped)\t: %f ms\n",
-           (serial_time - overlap_time) / nreps);
-
-    printf("\nMeasured throughput:\n");
-    printf(" Fully serialized execution\t\t: %f GB/s\n",
-           (nreps * (memsize * 2e-6))/ serial_time);
-    printf(" Overlapped using %d streams\t\t: %f GB/s\n",
-           STREAM_COUNT, (nreps * (memsize * 2e-6))/ overlap_time);
-
-    // Verify the results, we will use the results for final output
-    bool bResults = test();
-
-    // Free resources
-
-    free(h_data_source);
-    free(h_data_sink);
-
-    for (int i =0; i<STREAM_COUNT; ++i)
-    {
-
-        cudaFreeHost(h_data_in[i]);
-        cudaFree(d_data_in[i]);
-
-        cudaFreeHost(h_data_out[i]);
-        cudaFree(d_data_out[i]);
-
-        cudaStreamDestroy(stream[i]);
-        cudaEventDestroy(cycleDone[i]);
-    }
-
-    cudaEventDestroy(start);
-    cudaEventDestroy(stop);
-
-    // Test result
-    exit(bResults ? EXIT_SUCCESS : EXIT_FAILURE);
-}
-
-float processWithStreams(int streams_used)
-{
-
-    int current_stream = 0;
-
-    float time;
-
-    // Do processing in a loop
-    //
-    // Note: All memory commands are processed in the order  they are issued,
-    // independent of the stream they are enqueued in. Hence the pattern by
-    // which the copy and kernel commands are enqueued in the stream
-    // has an influence on the achieved overlap.
-
-    cudaEventRecord(start, 0);
-
-    for (int i=0; i<nreps; ++i)
-    {
-        int next_stream = (current_stream + 1) % streams_used;
-
-#ifdef SIMULATE_IO
-        // Store the result
-        memcpy(h_data_sink, h_data_out[current_stream],memsize);
-
-        // Read new input
-        memcpy(h_data_in[next_stream], h_data_source, memsize);
-#endif
-
-        // Ensure that processing and copying of the last cycle has finished
-        cudaEventSynchronize(cycleDone[next_stream]);
-
-        // Process current frame
-        incKernel<<<grid, block, 0, stream[current_stream]>>>(
-            d_data_out[current_stream],
-            d_data_in[current_stream],
-            N,
-            inner_reps);
-
-        // Upload next frame
-        checkCudaErrors(cudaMemcpyAsync(
-                            d_data_in[next_stream],
-                            h_data_in[next_stream],
-                            memsize,
-                            cudaMemcpyHostToDevice,
-                            stream[next_stream]));
-
-        // Download current frame
-        checkCudaErrors(cudaMemcpyAsync(
-                            h_data_out[current_stream],
-                            d_data_out[current_stream],
-                            memsize,
-                            cudaMemcpyDeviceToHost,
-                            stream[current_stream]));
-
-        checkCudaErrors(cudaEventRecord(
-                            cycleDone[current_stream],
-                            stream[current_stream]));
-
-        current_stream = next_stream;
-    }
-
-    cudaEventRecord(stop, 0);
-
-    cudaDeviceSynchronize();
-
-    cudaEventElapsedTime(&time, start, stop);
-
-    return time;
-
-}
-
-void init()
-{
-    for (int i=0; i<N; ++i)
-    {
-        h_data_source[i] = 0;
-    }
-
-    for (int i =0; i<STREAM_COUNT; ++i)
-    {
-        memcpy(h_data_in[i], h_data_source, memsize);
-    }
-}
-
-
-bool test()
-{
-
-    bool passed = true;
-
-    for (int j =0; j<STREAM_COUNT; ++j)
-    {
-        for (int i =0; i<N; ++i)
-        {
-            passed &= (h_data_out[j][i] == 1);
-        }
-    }
-
-    return passed;
-}
--- a/src/test/multiply.cu	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,8 +0,0 @@
-extern "C" {
-    __global__ void multiply(float* A, float* B, float* C) {
-//        printf("%d %d\n",i[0],i[1]);
-        int index = blockIdx.x * blockDim.x + threadIdx.x;
-        C[index] = A[index] * B[0];
-    }
-
-}
--- a/src/test/test.c	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,5 +0,0 @@
-#include <stdio.h>
-
-void test1() {
-    printf("test\n");
-}
--- a/src/test/twice.cc	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,182 +0,0 @@
-#include <stdio.h>
-#include <sys/time.h>
-#include <string.h>
-#include <stdlib.h>
-extern "C" {
-#include <pthread.h>
-}
-
-#include <cuda.h>
-
-#include <cuda_runtime.h>
-#include "helper_cuda.h"
-
-#define LENGTH (10)
-#define THREAD (10)
-
-double
-getTime() {
-    struct timeval tv;
-    gettimeofday(&tv, NULL);
-    return tv.tv_sec + (double)tv.tv_usec*1e-6;
-}
-
-void
-check_data(float* A, float B, float* C) {
-    for (int i=0; i<LENGTH*THREAD; i++) {
-        if (A[i]*B!=C[i]) {
-            puts("multiply failure.");
-            return;
-        }
-    }
-    puts("success.");
-}
-
-void print_result(float* C) {
-    for (int i=0; i<LENGTH*THREAD; i++) {
-        printf("%f\n",C[i]);
-    }
-}
-
-int num_stream = 1; // number of stream
-int num_exec = 16; // number of executed kernel
-
-static void *start_cuda(void *) ;
-
-int main(int args, char* argv[]) {
-    
-    for (int i=1;argv[i];i++) {
-        if (strcmp(argv[i], "--stream") == 0 || strcmp(argv[i], "-s") == 0) {
-            num_stream = atoi(argv[++i]);
-        }
-        if (strcmp(argv[i], "--numExec") == 0 || strcmp(argv[i], "-e") == 0) {
-            num_exec = atoi(argv[++i]);
-        }
-    }
-#if 0
-    start_cuda(NULL);
-#else
-    pthread_t thread;
-    pthread_create(&thread, NULL, start_cuda, NULL);
-    pthread_join(thread,NULL);
-#endif
-    return 0;
-}
-
-static void *start_cuda(void *args) {
-    // initialize and load kernel
-    CUdevice device;
-    CUcontext context;
-    CUmodule module;
-    CUfunction function;
-    CUstream stream[num_stream];
-
-    checkCudaErrors(cuInit(0));
-    checkCudaErrors(cuDeviceGet(&device, 0));
-    checkCudaErrors(cuCtxCreate(&context, CU_CTX_SCHED_SPIN, device));
-    checkCudaErrors(cuModuleLoad(&module, "multiply.ptx"));
-    checkCudaErrors(cuModuleGetFunction(&function, module, "multiply"));
-    if (num_stream) {
-        for (int i=0;i<num_stream;i++)
-            checkCudaErrors(cuStreamCreate(&stream[i],0));
-    }
-
-    // memory allocate
-    CUdeviceptr devA;
-    CUdeviceptr devB[num_exec];
-    CUdeviceptr devOut[num_exec];
-
-    checkCudaErrors(cuMemAlloc(&devA, LENGTH*THREAD*sizeof(float)));
-    for (int i=0;i<num_exec;i++) {
-        checkCudaErrors(cuMemAlloc(&devB[i], sizeof(float)));
-        checkCudaErrors(cuMemAlloc(&devOut[i], LENGTH*THREAD*sizeof(float)));
-    }
-
-    // input buffer
-    float* A = new float[LENGTH*THREAD];
-    float* B = new float[num_exec];
-
-    for (int i=0; i<LENGTH*THREAD; i++)
-        A[i] = (float)(i+1000);
-
-    // output buffer
-    float** result = new float* [num_exec];
-
-    for (int i=0;i<num_exec;i++)
-        result[i] = new float[LENGTH*THREAD];
-
-    // Synchronous data transfer(host to device)
-    checkCudaErrors(cuMemcpyHtoD(devA, A, LENGTH*THREAD*sizeof(float)));
-    
-    // Asynchronous data transfer(host to device)
-    int cur = 0;
-
-     for (int i=0;i<num_exec;i++,cur++) {
-         if (num_stream <= cur)
-             cur = 0;
-         B[i] = (float)(i+1);
-         if (num_stream) {
-             checkCudaErrors(cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]));
-         } else {
-             checkCudaErrors(cuMemcpyHtoD(devB[i], &B[i], sizeof(float)));
-         }
-     }
-
-    cur = 0;
-
-    // Asynchronous launch kernel
-    for (int i=0;i<num_exec;i++,cur++) {
-        if (num_stream <= cur)
-            cur=0;
-        //B[i] = (float)(i+1);
-        //cuMemcpyHtoDAsync(devB[i], &B[i], sizeof(float), stream[cur]);
-        void* args[] = {&devA, &devB[i], &devOut[i]};
-        checkCudaErrors(cuLaunchKernel(function,
-                       LENGTH, 1, 1,
-                       THREAD, 1, 1,
-                                 0, num_stream ? stream[cur] : NULL , args, NULL));
-        //cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]);
-    }
-
-    cur = 0;
-
-    
-    // Asynchronous data transfer(device to host)
-     for (int i=0;i<num_exec;i++,cur++) {
-         if (num_stream <= cur)
-             cur = 0;
-         if (num_stream) {
-             checkCudaErrors(cuMemcpyDtoHAsync(result[i], devOut[i], LENGTH*THREAD*sizeof(float), stream[cur]));
-         } else {
-             checkCudaErrors(cuMemcpyDtoH(result[i], devOut[i], LENGTH*THREAD*sizeof(float)));
-         }
-     }
-    
-    // wait for stream
-    for (int i=0;i<num_stream;i++)
-        checkCudaErrors(cuStreamSynchronize(stream[i]));
-    
-    //printf("%0.6f\n",getTime()-start);
-
-    for (int i=0;i<num_exec;i++)
-        check_data(A,(float)(i+1),result[i]);
-
-    // memory release
-    checkCudaErrors(cuMemFree(devA));
-    for (int i=0;i<num_exec;i++) {
-        checkCudaErrors(cuMemFree(devB[i]));
-        checkCudaErrors(cuMemFree(devOut[i]));
-    }
-    for (int i=0;i<num_stream;i++)
-        checkCudaErrors(cuStreamDestroy(stream[i]));
-    checkCudaErrors(cuModuleUnload(module));
-    checkCudaErrors(cuCtxDestroy(context));
-
-    delete[] A;
-    delete[] B;
-    for (int i=0;i<num_exec;i++)
-        delete[] result[i];
-    delete[] result;
-    return 0;
-}
-
--- a/src/test/vectorAddDrv.cc	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,451 +0,0 @@
-/*
- * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-/* Vector addition: C = A + B.
- *
- * This sample is a very basic sample that implements element by element
- * vector addition. It is the same as the sample illustrating Chapter 3
- * of the programming guide with some additions like error checking.
- *
- */
-
-// Includes
-#include <stdio.h>
-#include <string.h>
-#include <iostream>
-#include <cstring>
-#include <math.h>
-
-// includes, project
-#include <driver_types.h>
-#include <cuda_runtime.h>
-#include <cuda.h>
-#include "helper_cuda.h"
-
-// includes, CUDA
-#include <builtin_types.h>
-
-#define PTX_FILE "vectorAdd_kernel.ptx"
-
-
-using namespace std;
-
-// Variables
-CUdevice cuDevice;
-CUcontext cuContext;
-CUmodule cuModule;
-CUfunction vecAdd_kernel;
-float *h_A;
-float *h_B;
-float *h_C;
-CUdeviceptr d_A;
-CUdeviceptr d_B;
-CUdeviceptr d_C;
-bool noprompt = false;
-
-// Functions
-void Cleanup(bool);
-CUresult CleanupNoFailure();
-void RandomInit(float *, int);
-bool findModulePath(const char *, string &, char **, string &);
-void ParseArguments(int, char **);
-
-int *pArgc = NULL;
-char **pArgv = NULL;
-
-
-// Host code
-int main(int argc, char **argv)
-{
-    pArgc = &argc;
-    pArgv = argv;
-
-    printf("Vector Addition (Driver API)\n");
-    int N = 50000, devID = 0;
-    size_t  size = N * sizeof(float);
-
-    ParseArguments(argc, argv);
-
-    // Initialize
-    checkCudaErrors(cuInit(0));
-
-    // This assumes that the user is attempting to specify a explicit device -device=n
-    if (argc > 1)
-    {
-        bool bFound = false;
-
-        for (int param=0; param < argc; param++)
-        {
-            int string_start = 0;
-
-            while (argv[param][string_start] == '-')
-            {
-                string_start++;
-            }
-
-            char *string_argv = &argv[param][string_start];
-
-            if (!strncmp(string_argv, "device", 6))
-            {
-                int len=(int)strlen(string_argv);
-
-                while (string_argv[len] != '=')
-                {
-                    len--;
-                }
-
-                devID = atoi(&string_argv[++len]);
-                bFound = true;
-            }
-
-            if (bFound)
-            {
-                break;
-            }
-        }
-    }
-
-    // Get number of devices supporting CUDA
-    int deviceCount = 0;
-    checkCudaErrors(cuDeviceGetCount(&deviceCount));
-    if (deviceCount == 0)
-    {
-        printf("There is no device supporting CUDA.\n");
-        Cleanup(false);
-    }
-
-    if (devID < 0)
-    {
-        devID = 0;
-    }
-
-    if (devID > deviceCount-1)
-    {
-        fprintf(stderr, "(Device=%d) invalid GPU device.  %d GPU device(s) detected.\nexiting...\n", devID, deviceCount);
-        CleanupNoFailure();
-        exit(EXIT_SUCCESS);
-    }
-    else
-    {
-        int major, minor;
-        char deviceName[100];
-        checkCudaErrors(cuDeviceComputeCapability(&major, &minor, devID));
-        checkCudaErrors(cuDeviceGetName(deviceName, 256, devID));
-        printf("> Using Device %d: \"%s\" with Compute %d.%d capability\n", devID, deviceName, major, minor);
-    }
-
-    // pick up device with zero ordinal (default, or devID)
-    checkCudaErrors(cuDeviceGet(&cuDevice, devID));
-    // Create context
-    checkCudaErrors(cuCtxCreate(&cuContext, 0, cuDevice));
-    // first search for the module path before we load the results
-    string module_path, ptx_source;
-
-    if (!findModulePath(PTX_FILE, module_path, argv, ptx_source))
-    {
-        if (!findModulePath("vectorAdd_kernel.cubin", module_path, argv, ptx_source))
-        {
-            printf("> findModulePath could not find <vectorAdd> ptx or cubin\n");
-            Cleanup(false);
-        }
-    }
-    else
-    {
-        printf("> initCUDA loading module: <%s>\n", module_path.c_str());
-    }
-
-    // Create module from binary file (PTX or CUBIN)
-    if (module_path.rfind("ptx") != string::npos)
-    {
-        // in this branch we use compilation with parameters
-        const unsigned int jitNumOptions = 3;
-        CUjit_option *jitOptions = new CUjit_option[jitNumOptions];
-        void **jitOptVals = new void *[jitNumOptions];
-
-        // set up size of compilation log buffer
-        jitOptions[0] = CU_JIT_INFO_LOG_BUFFER_SIZE_BYTES;
-        int jitLogBufferSize = 1024;
-        jitOptVals[0] = (void *)(size_t)jitLogBufferSize;
-
-        // set up pointer to the compilation log buffer
-        jitOptions[1] = CU_JIT_INFO_LOG_BUFFER;
-        char *jitLogBuffer = new char[jitLogBufferSize];
-        jitOptVals[1] = jitLogBuffer;
-
-        // set up pointer to set the Maximum # of registers for a particular kernel
-        jitOptions[2] = CU_JIT_MAX_REGISTERS;
-        int jitRegCount = 32;
-        jitOptVals[2] = (void *)(size_t)jitRegCount;
-
-        checkCudaErrors(cuModuleLoadDataEx(&cuModule, ptx_source.c_str(), jitNumOptions, jitOptions, (void **)jitOptVals));
-
-        printf("> PTX JIT log:\n%s\n", jitLogBuffer);
-    }
-    else
-    {
-        checkCudaErrors(cuModuleLoad(&cuModule, module_path.c_str()));
-    }
-
-    // Get function handle from module
-    checkCudaErrors(cuModuleGetFunction(&vecAdd_kernel, cuModule, "VecAdd_kernel"));
-
-    // Allocate input vectors h_A and h_B in host memory
-    h_A = (float *)malloc(size);
-    if (h_A == 0) { Cleanup(false); }
-
-    h_B = (float *)malloc(size);
-    if (h_B == 0) { Cleanup(false); }
-
-    h_C = (float *)malloc(size);
-    if (h_C == 0) { Cleanup(false); }
-
-    // Initialize input vectors
-    RandomInit(h_A, N);
-    RandomInit(h_B, N);
-
-    // Allocate vectors in device memory
-    checkCudaErrors(cuMemAlloc(&d_A, size));
-    checkCudaErrors(cuMemAlloc(&d_B, size));
-    checkCudaErrors(cuMemAlloc(&d_C, size));
-
-    // Copy vectors from host memory to device memory
-    checkCudaErrors(cuMemcpyHtoD(d_A, h_A, size));
-    checkCudaErrors(cuMemcpyHtoD(d_B, h_B, size));
-
-#if 1
-
-    if (1)
-    {
-        // This is the new CUDA 4.0 API for Kernel Parameter Passing and Kernel Launch (simpler method)
-
-        // Grid/Block configuration
-        int threadsPerBlock = 256;
-        int blocksPerGrid   = (N + threadsPerBlock - 1) / threadsPerBlock;
-
-        void *args[] = { &d_A, &d_B, &d_C, &N };
-
-        // Launch the CUDA kernel
-        checkCudaErrors(cuLaunchKernel(vecAdd_kernel,  blocksPerGrid, 1, 1,
-                               threadsPerBlock, 1, 1,
-                               0,
-                               NULL, args, NULL));
-    }
-    else
-    {
-        // This is the new CUDA 4.0 API for Kernel Parameter Passing and Kernel Launch (advanced method)
-        int offset = 0;
-        void *argBuffer[16];
-        *((CUdeviceptr *)&argBuffer[offset]) = d_A;
-        offset += sizeof(d_A);
-        *((CUdeviceptr *)&argBuffer[offset]) = d_B;
-        offset += sizeof(d_B);
-        *((CUdeviceptr *)&argBuffer[offset]) = d_C;
-        offset += sizeof(d_C);
-        *((int *)&argBuffer[offset]) = N;
-        offset += sizeof(N);
-
-        // Grid/Block configuration
-        int threadsPerBlock = 256;
-        int blocksPerGrid   = (N + threadsPerBlock - 1) / threadsPerBlock;
-
-        // Launch the CUDA kernel
-        checkCudaErrors(cuLaunchKernel(vecAdd_kernel,  blocksPerGrid, 1, 1,
-                               threadsPerBlock, 1, 1,
-                               0,
-                               NULL, NULL, argBuffer));
-    }
-
-#else
-    {
-        char argBuffer[256];
-
-        // pass in launch parameters (not actually de-referencing CUdeviceptr).  CUdeviceptr is
-        // storing the value of the parameters
-        *((CUdeviceptr *)&argBuffer[offset]) = d_A;
-        offset += sizeof(d_A);
-        *((CUdeviceptr *)&argBuffer[offset]) = d_B;
-        offset += sizeof(d_B);
-        *((CUdeviceptr *)&argBuffer[offset]) = d_C;
-        offset += sizeof(d_C);
-        *((int *)&argBuffer[offset]) = N;
-        offset += sizeof(N);
-
-        void *kernel_launch_config[5] =
-        {
-            CU_LAUNCH_PARAM_BUFFER_POINTER, argBuffer,
-            CU_LAUNCH_PARAM_BUFFER_SIZE,    &offset,
-            CU_LAUNCH_PARAM_END
-        };
-
-        // Grid/Block configuration
-        int threadsPerBlock = 256;
-        int blocksPerGrid   = (N + threadsPerBlock - 1) / threadsPerBlock;
-
-        // Launch the CUDA kernel
-        checkCudaErrors(cuLaunchKernel(vecAdd_kernel,  blocksPerGrid, 1, 1,
-                               threadsPerBlock, 1, 1,
-                               0, 0,
-                               NULL, (void **)&kernel_launch_config));
-    }
-#endif
-
-#ifdef _DEBUG
-    checkCudaErrors(cuCtxSynchronize());
-#endif
-
-    // Copy result from device memory to host memory
-    // h_C contains the result in host memory
-    checkCudaErrors(cuMemcpyDtoH(h_C, d_C, size));
-
-    // Verify result
-    int i;
-
-    for (i = 0; i < N; ++i)
-    {
-        float sum = h_A[i] + h_B[i];
-
-        if (fabs(h_C[i] - sum) > 1e-7f)
-        {
-            break;
-        }
-    }
-
-    printf("%s\n", (i==N) ? "Result = PASS" : "Result = FAIL");
-
-    exit((i==N) ? EXIT_SUCCESS : EXIT_FAILURE);
-}
-
-CUresult CleanupNoFailure()
-{
-    CUresult error;
-
-    // Free device memory
-    if (d_A)
-    {
-        error = cuMemFree(d_A);
-    }
-
-    if (d_B)
-    {
-        error = cuMemFree(d_B);
-    }
-
-    if (d_C)
-    {
-        error = cuMemFree(d_C);
-    }
-
-    // Free host memory
-    if (h_A)
-    {
-        free(h_A);
-    }
-
-    if (h_B)
-    {
-        free(h_B);
-    }
-
-    if (h_C)
-    {
-        free(h_C);
-    }
-
-    error = cuCtxDestroy(cuContext);
-
-    return error;
-}
-
-void Cleanup(bool noError)
-{
-    CUresult error;
-    error = CleanupNoFailure();
-
-    if (!noError || error != CUDA_SUCCESS)
-    {
-        printf("Function call failed\nFAILED\n");
-        exit(EXIT_FAILURE);
-    }
-
-    if (!noprompt)
-    {
-        printf("\nPress ENTER to exit...\n");
-        fflush(stdout);
-        fflush(stderr);
-        getchar();
-    }
-}
-
-
-// Allocates an array with random float entries.
-void RandomInit(float *data, int n)
-{
-    for (int i = 0; i < n; ++i)
-    {
-        data[i] = rand() / (float)RAND_MAX;
-    }
-}
-
-bool inline
-findModulePath(const char *module_file, string &module_path, char **argv, string &ptx_source)
-{
-    char *actual_path = sdkFindFilePath(module_file, argv[0]);
-
-    if (actual_path)
-    {
-        module_path = actual_path;
-    }
-    else
-    {
-        printf("> findModulePath file not found: <%s> \n", module_file);
-        return false;
-    }
-
-    if (module_path.empty())
-    {
-        printf("> findModulePath could not find file: <%s> \n", module_file);
-        return false;
-    }
-    else
-    {
-        printf("> findModulePath found file at <%s>\n", module_path.c_str());
-
-        if (module_path.rfind(".ptx") != string::npos)
-        {
-            FILE *fp = fopen(module_path.c_str(), "rb");
-            fseek(fp, 0, SEEK_END);
-            int file_size = ftell(fp);
-            char *buf = new char[file_size+1];
-            fseek(fp, 0, SEEK_SET);
-            fread(buf, sizeof(char), file_size, fp);
-            fclose(fp);
-            buf[file_size] = '\0';
-            ptx_source = buf;
-            delete[] buf;
-        }
-
-        return true;
-    }
-}
-
-// Parse program arguments
-void ParseArguments(int argc, char **argv)
-{
-    for (int i = 0; i < argc; ++i)
-    {
-        if (strcmp(argv[i], "--noprompt") == 0 ||
-            strcmp(argv[i], "-noprompt") == 0)
-        {
-            noprompt = true;
-            break;
-        }
-    }
-}
--- a/src/test/vectorAdd_kernel.cu	Thu Jan 16 14:44:03 2020 +0900
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,27 +0,0 @@
-/*
- * Copyright 1993-2015 NVIDIA Corporation.  All rights reserved.
- *
- * Please refer to the NVIDIA end user license agreement (EULA) associated
- * with this source code for terms and conditions that govern your use of
- * this software. Any use, reproduction, disclosure, or distribution of
- * this software and related documentation outside the terms of the EULA
- * is strictly prohibited.
- *
- */
-
-/* Vector addition: C = A + B.
- *
- * This sample is a very basic sample that implements element by element
- * vector addition. It is the same as the sample illustrating Chapter 3
- * of the programming guide with some additions like error checking.
- *
- */
-
-// Device code
-extern "C" __global__ void VecAdd_kernel(const float *A, const float *B, float *C, int N)
-{
-    int i = blockDim.x * blockIdx.x + threadIdx.x;
-
-    if (i < N)
-        C[i] = A[i] + B[i];
-}