# HG changeset patch # User Tatsuki IHA # Date 1518464117 -32400 # Node ID c0b6ce2ed8209696ca7872a3bfa5a6794c6887e6 # Parent b78533641f9b019cd2b529e55cf56bc4e16e2553 Add comment diff -r b78533641f9b -r c0b6ce2ed820 src/parallel_execution/CUDAExecutor.cbc --- a/src/parallel_execution/CUDAExecutor.cbc Tue Feb 06 05:14:55 2018 +0900 +++ b/src/parallel_execution/CUDAExecutor.cbc Tue Feb 13 04:35:17 2018 +0900 @@ -43,6 +43,7 @@ executor->maxThreadPerBlockX = 1; executor->maxThreadPerBlockY = 1; executor->maxThreadPerBlockZ = 1; + // maxThreadPerBlockX * maxThreadPerBlockY * maxThreadPerBlockZ <= maxThreadPerBlock if (iterator->x > 1 && iterator->y == 1 && iterator->z == 1) { executor->maxThreadPerBlockX = executor->maxThreadPerBlock; executor->maxThreadPerBlockY = 1; @@ -63,7 +64,6 @@ } __code execCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { - // Asynchronous launch kernel task->num_exec = 1; if (task->iterate) { struct MultiDimIterator* iterator = &task->iterator->iterator->MultiDimIterator; @@ -71,6 +71,7 @@ int blockDimX = computeblockDim(iterator->x, executor->maxThreadPerBlockX); int blockDimY = computeblockDim(iterator->y, executor->maxThreadPerBlockY); int blockDimZ = computeblockDim(iterator->z, executor->maxThreadPerBlockZ); + // launch kernel checkCudaErrors(cuLaunchKernel(task->function, iterator->x/blockDimX, iterator->y/blockDimY, iterator->z/blockDimZ, blockDimX, blockDimY, blockDimZ, @@ -87,7 +88,7 @@ } __code writeCUDAExecutor(struct CUDAExecutor* executor, struct Context* task, __code next(...)) { - // wait for stream + // Asynchronous launch kernel checkCudaErrors(cuCtxSynchronize()); struct Timer* timer = executor->timer; goto timer->end(writeCUDAExecutor1);