view TaskManager/Cuda/CudaScheduler.h @ 2014:8c618e912c88 draft

optimization data transfer. wrong result
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Tue, 01 Jul 2014 17:04:01 +0900
parents d43c2b7932ea
children 6bf6450bd45a
line wrap: on
line source

#ifndef INCLUDE_CUDA_SCHEDULER
#define INCLUDE_CUDA_SCHEDULER

#include "MainScheduler.h"
#include "FifoDmaManager.h"
#include "CudaThreads.h"
#include "HTask.h"
#include "TaskManager.h"
#include <cuda.h>
#include <map>

extern TaskObject cuda_task_list[MAX_TASK_OBJECT];

using namespace std;

#define STAGE 8

class CudaScheduler : public MainScheduler {
 public:
    typedef struct cudabuffer {
        int allcate_size;
        int in_size;
        int out_size;
        CUdeviceptr* memin;
        CUdeviceptr* memout;
        CUstream stream;
        void** kernelParams;
    } CudaBuffer, *CudaBufferPtr;
    CudaScheduler();
    virtual ~CudaScheduler();
    void init_gpu();
    void wait_for_event(CudaBufferPtr m, TaskListPtr taskList, int cur);
    void run();
    
    // platform platform;
    // platform は OpenCL が複数のメーカーの GPU に対応してるから必要
    // Cuda の場合、NVIDIA だけなので必要ない?
    CUdevice device;
    int ret_num_devices;
    CUcontext context;
    // command_queue command_queue;
    // Cuda には command_queue に相当するものはない
    // Closest approximation would be the CUDA Stream mechanism. らしい...
    int ret;
    memaddr reply;
    // cl_kernel に相当
    CUfunction kernel[STAGE];
    CudaBuffer cudabuffer[STAGE];
    
    // record transmitted data.
    map<ListElement*, CUdeviceptr*> transmitted;
    map<CUdeviceptr*, ListElement*> reverse_map;

    HTask::htask_flag flag[STAGE];
    
 private:
    int load_kernel(int cmd);
    void createBuffer(CudaBufferPtr cudabuffer, CUdeviceptr* mem, int i, size_t size);
    void initCudaBuffer(CudaBufferPtr m);
    void destroyCudaBuffer(CudaBufferPtr m);
    void CudaTaskError(CudaBufferPtr cudabuffer, int cur, TaskListPtr taskList, int ret);
    int read(TaskPtr nextTask, TaskListPtr tasklist);
    void exec(TaskListPtr tasklist, int cur);
    TaskPtr write(TaskPtr nextTask, TaskListPtr tasklist);
};

#define CudaSchedRegister(str, filename, functionname)  \
    cuda_register_task(str, filename, functionname);
#endif

extern void cuda_register_task(int cmd, const char* filename, const char* functionname);