view WordCount/main.cc @ 5:ef8efbd04df9 default tip

minor change
author Yutaka_Kinjyo
date Thu, 28 Jul 2011 11:10:28 +0900
parents 641eef31681e
children
line wrap: on
line source

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <fcntl.h>
#include <unistd.h>
#include <OpenCL/opencl.h>
#include <oclUtils.h>

#define OUT_PARAM_NUM 4
#define PRINT_PARAM_NUM 2

typedef struct {
    caddr_t file_mmap;
    off_t size;
} st_mmap_t;

typedef struct {
    int size;
    int remain_size;
} param_t;


static st_mmap_t
my_mmap(char *filename)
{

    /*マッピングだよ!*/
    int fd = -1;
    int map = MAP_PRIVATE;
    st_mmap_t st_mmap;
    struct stat sb;
    
    if ((fd=open(filename,O_RDONLY,0666))==0) {
	fprintf(stderr,"can't open %s\n",filename);
    }
    
    if (fstat(fd,&sb)) {
	fprintf(stderr,"can't fstat %s\n",filename);
    }

    st_mmap.size = sb.st_size;

    printf("file size %d\n",(int)st_mmap.size);

    st_mmap.file_mmap = (char*)mmap(NULL,st_mmap.size,PROT_READ,map,fd,(off_t)0);
    if (st_mmap.file_mmap == (caddr_t)-1) {
	fprintf(stderr,"Can't mmap file\n");
	perror(NULL);
	exit(0);
    }

    return st_mmap;

}



int main(int args, char *argv[]) 
{

    char *filename = 0;
    cl_device_type dev_type = CL_DEVICE_TYPE_DEFAULT;

    for (int i = 1; argv[i]; ++i) {	
	if (strcmp(argv[i], "-file") == 0) {
	    filename = argv[i+1];
	} else if (strcmp(argv[i], "-help") == 0) {
            printf("Usage: ./word_count [-file filename]\n");
        } else if (strcmp(argv[i], "-type") == 0) {

            if (strcmp(argv[i+1], "gpu") == 0) {
                dev_type = CL_DEVICE_TYPE_GPU;
            } else if (strcmp(argv[i+1], "cpu") == 0) {
                dev_type = CL_DEVICE_TYPE_CPU;
            }

        }
    }

    if (filename == 0) {
        printf("Usage: ./word_count [-file filename]\n");
        return 0;
    }

    //指定されたファイルをメモリにmap
    st_mmap_t st_mmap = my_mmap(filename);

    //kernelファイルの大きさ取得して、メモリ確保
    int fd = -1;
    const char *kernel_filename =  "./kernel/word_count.cl";
    if ((fd=open(kernel_filename,O_RDONLY,0666))==0) {
        fprintf(stderr,"can't open %s\n",kernel_filename);
    }

    struct stat sb;
    if (fstat(fd,&sb)) {
        fprintf(stderr,"can't fstat %s\n",filename);
    }

    size_t source_size = sb.st_size;
    void *source_str = malloc(source_size);

    int err = read(fd, source_str, source_size);
    if (err == -1) {
        fprintf(stderr,"can't read %s\n",filename);
    }

    close(fd);

    cl_platform_id platform_id = NULL;
    cl_uint ret_num_platforms = NULL;
    cl_int ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);

    cl_device_id device_id = NULL;
    cl_uint ret_num_devices = NULL;
    // CL_DEVICE_TYPE_CPU, CL_DEVICE_TYPE_GPU, と指定できる
    // CL_DEVICE_TYPE_DEFAULT はどうなるのか
    ret = clGetDeviceIDs( platform_id, dev_type, 
                          1, &device_id, &ret_num_devices);

    oclCheckError(ret, CL_SUCCESS);

    //OpenCLコンテキストの作成
    cl_context context = clCreateContext( NULL, 1, &device_id, NULL, NULL, &ret);
    oclCheckError(ret, CL_SUCCESS);
    cl_command_queue command_queue = clCreateCommandQueue(context, device_id, 0, &ret);
    oclCheckError(ret, CL_SUCCESS);
    // カーネルプログラムを読み込む
    cl_program program = clCreateProgramWithSource(context, 1, (const char **)&source_str,
                                                   (const size_t *)&source_size, &ret);

    ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);

    if (ret != CL_SUCCESS) {
        oclLogBuildInfo(program, device_id);
    }

    oclCheckError(ret, CL_SUCCESS);

    //カーネルプログラムをビルド
    //Task選択にあたる
    cl_kernel kernel = clCreateKernel(program, "word_count", &ret);
    oclCheckError(ret, CL_SUCCESS);

    //カウントするテキストデータのメモリオブジェクト
    cl_mem text_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 
                                        st_mmap.size, NULL, &ret);

    ret = clEnqueueWriteBuffer(command_queue, text_memobj,
                               CL_TRUE, 0, st_mmap.size, (char*)st_mmap.file_mmap,
                               0, NULL, NULL);

    oclCheckError(ret, CL_SUCCESS);
    
    /*
     * 並列度の計算
     * wcするファイルの大きさに合わせる
     *
     */

    // このdivi_size はどうやって決めるよ
    int divi_size = 1024;
    size_t global_work_size = st_mmap.size / divi_size;
    int remain_size = st_mmap.size - global_work_size * divi_size; 
    if (remain_size > 0) {
        global_work_size += 1;
    }

    /* 
     * 行数、単語数, 分割地点のフラグを格納する配列
     * word_num, line_num, head, tail
     */

    int out_size = sizeof(int) * OUT_PARAM_NUM * global_work_size;
    cl_mem out_memobj = clCreateBuffer(context, CL_MEM_WRITE_ONLY, 
                                       out_size * sizeof(char), NULL, &ret);

    oclCheckError(ret, CL_SUCCESS);

    // 必要なパラメータのオブジェクト
    cl_mem param_memobj = clCreateBuffer(context, CL_MEM_READ_ONLY, 
                                         sizeof(param_t), NULL, &ret);

    param_t param;
    param.size        = divi_size;
    param.remain_size = remain_size;

    ret = clEnqueueWriteBuffer(command_queue, param_memobj,
                               CL_TRUE, 0, sizeof(param_t), &param,
                               0, NULL, NULL);

    oclCheckError(ret, CL_SUCCESS);

    // 引数のSet
    // memory object にしなくてもできるsetできるかも
    ret = clSetKernelArg(kernel, 0, sizeof(cl_mem), (void *)&text_memobj);
    ret |= clSetKernelArg(kernel, 1, sizeof(cl_mem), (void *)&param_memobj);
    ret |= clSetKernelArg(kernel, 2, sizeof(cl_mem), (void *)&out_memobj);

    oclCheckError(ret, CL_SUCCESS);

    /* 
     * kernel実行
   * 並列に処理せずに work-item ひとつで動かしたい場合は、clEnqueueNDRangeKernel の簡易版 clEnqueueTask が使える
     *
     */

    //ret = clEnqueueTask(command_queue, kernel, 0, NULL, NULL);
    // global_work_size は配列。並列動作させる時の次元数にあわせて、配列の次元数も決まるはず
    ret = clEnqueueNDRangeKernel(command_queue, kernel, 1, NULL, &global_work_size, NULL, 0, NULL, NULL);


    oclCheckError(ret, CL_SUCCESS);

    int *out_data = (int*)malloc(out_size);


    cl_event ev;

    // 演算結果の読み込み
    // CL_FALSE で non-bloking, CL_TURE で bloking
    ret = clEnqueueReadBuffer(command_queue, out_memobj, CL_FALSE, 0, 
                              out_size, out_data, 0, NULL, &ev);

    // event object に関連する処理の完了を wait する
    // これはeventひとつだけど、listにして、複数 wait できる
    clWaitForEvents(1, &ev);

    
    oclCheckError(ret, CL_SUCCESS);

    printf("global_work_size %d\n", (int)global_work_size);

    int word_line_num[PRINT_PARAM_NUM];

    for (int i = 0; i < PRINT_PARAM_NUM; i++) {
        word_line_num[i] = 0;
    } 

    for (int i = 0; i < global_work_size; i++) {

        for (int j = 0; j < PRINT_PARAM_NUM; j++) {

            word_line_num[j] += out_data[i*OUT_PARAM_NUM+j];

        }

        //flagの判定
        if ((i != global_work_size-1) && 
            (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+1] == 1) && 
            (out_data[i*OUT_PARAM_NUM+PRINT_PARAM_NUM+4] == 0)) {
            
            word_line_num[0] += 1;

        } 
    }

    printf("%d %d \n", word_line_num[0], word_line_num[1]);

    clFlush(command_queue);
    clFinish(command_queue);
    clReleaseKernel(kernel);
    clReleaseProgram(program);
    clReleaseMemObject(text_memobj);
    clReleaseMemObject(param_memobj);
    clReleaseMemObject(out_memobj);
    clReleaseCommandQueue(command_queue);
    clReleaseContext(context);
    clReleaseEvent(ev);

    free(source_str);
    free(out_data);

    return 0;

}