changeset 2035:33af6d6e1bfc draft

add bitonic sort
author Shohei KOKUBO <e105744@ie.u-ryukyu.ac.jp>
date Thu, 05 Feb 2015 23:48:49 +0900
parents ce7a3887d09b
children 6fafe38d0508
files example/bitonic_sort/Func.h example/bitonic_sort/Makefile example/bitonic_sort/Makefile.cell example/bitonic_sort/Makefile.cuda example/bitonic_sort/Makefile.def example/bitonic_sort/Makefile.gpu example/bitonic_sort/Makefile.linux example/bitonic_sort/Makefile.macosx example/bitonic_sort/main.cc example/bitonic_sort/ppe/swap.cc example/bitonic_sort/ppe/swap.h example/bitonic_sort/ppe/task_init.cc example/bitonic_sort/spe/swap.cc example/bitonic_sort/spe/swap.h
diffstat 14 files changed, 531 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Func.h	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,4 @@
+enum {
+#include "SysTasks.h"
+    SWAP,
+};
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Makefile	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,32 @@
+default: macosx 
+macosx: FORCE
+	@echo "Make for Mac OS X"
+	@$(MAKE) -f Makefile.macosx
+
+fifo64: FORCE
+	@echo "Make for Mac OS X 64bit mode"
+	@$(MAKE) -f Makefile.fifo ABIBIT=64
+
+linux: FORCE
+	@echo "Make for Linux"
+	@$(MAKE) -f Makefile.linux
+
+cell: FORCE
+	@echo "Make for PS3 (Cell)"
+	@$(MAKE) -f Makefile.cell
+
+gpu: FORCE
+	@echo "Make for OpenCL"
+	@$(MAKE) -f Makefile.gpu
+
+cuda: FORCE
+	@echo "Make for CUDA"
+	@$(MAKE) -f Makefile.cuda
+
+FORCE:
+
+clean:
+	@$(MAKE) -f Makefile.macosx clean
+	@$(MAKE) -f Makefile.linux clean
+	@$(MAKE) -f Makefile.gpu clean
+	@$(MAKE) -f Makefile.cuda clean
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Makefile.cell	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,39 @@
+include ./Makefile.def
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE =   # 除外するファイルを書く
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+TASK_DIR  = ppe
+TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
+TASK_SRCS_EXCLUDE = 
+TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
+TASK_OBJS = $(TASK_SRCS:.cc=.o)
+
+LIBS += -lCellManager -lspe2 -lpthread -Wl,--gc-sections 
+
+.SUFFIXES: .cc .o
+
+.cc.o:
+	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
+
+all: $(TARGET) speobject
+
+$(TARGET): $(OBJS) $(TASK_OBJS)
+	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
+
+speobject:
+	cd spe; $(MAKE)
+
+link:
+	$(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS)
+
+debug: $(TARGET)
+	sudo ppu-gdb ./$(TARGET) 
+
+clean:
+	rm -f $(TARGET) $(OBJS) $(TASK_OBJS)
+	rm -f *~ \#*
+	rm -f ppe/*~ ppe/\#*
+	cd spe; $(MAKE) clean
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Makefile.cuda	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,51 @@
+include ./Makefile.def
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE =  # 除外するファイルを書く
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+TASK_DIR  = ppe
+CUDA_TASK_DIR = cuda
+
+TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
+TASK_SRCS_EXCLUDE = 
+TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP)) $(wildcard $(CUDA_TASK_DIR)/*.cc)
+TASK_OBJS = $(TASK_SRCS:.cc=.o)
+
+CUDA_SRCS_TMP = $(wildcard $(CUDA_TASK_DIR)/*.cu)
+CUDA_SRCS_EXCLUDE = # 除外するファイルを書く
+CUDA_SRCS = $(filter-out $(CUDA_TASK_DIR)/$(CUDA_SRCS_EXCLUDE),$(CUDA_SRCS_TMP))
+CUDA_OBJS = $(CUDA_SRCS:.cu=.ptx)
+
+CFLAGS += -D__CERIUM_CUDA__ -DGPU
+LIBS += `sdl-config --libs` -lCudaManager -F/Library/Frameworks -framework CUDA
+
+INCLUDE += -I$(CUDA_PATH)
+
+NVCC = nvcc
+NVCCFLAGS = -ptx -arch=sm_20
+
+.SUFFIXES: .cc .o .cu .ptx
+
+.cc.o:
+	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
+
+.cu.ptx:
+	$(NVCC) $(NVCCFLAGS) $< -o $@
+
+all: $(TARGET) $(CUDA_OBJS)
+
+$(TARGET): $(OBJS) $(TASK_OBJS) $(CUDA_OBJS)
+	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
+
+link:
+	$(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS)
+
+debug: $(TARGET)
+	sudo ppu-gdb ./$(TARGET) 
+
+clean:
+	rm -f $(TARGET) $(OBJS) $(TASK_OBJS) $(CUDA_OBJS)
+	rm -f *~ \#*
+	rm -f cuda/*~ cuda/\#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Makefile.def	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,16 @@
+TARGET = bitonic_sort
+
+# include/library path
+# ex  macosx
+#CERIUM = /Users/gongo/Source/Concurrency/Game_project/Cerium
+
+CERIUM = ../../../Cerium
+
+CC      = clang++
+OPT = -g -O0
+#CXX = clang++
+CFLAGS  =  -Wall  $(OPT)
+#CXXFLAGS  = ${CFLAGS}
+
+INCLUDE = -I. -I.. -I${CERIUM}/include/TaskManager 
+LIBS = -L${CERIUM}/TaskManager
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Makefile.gpu	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,44 @@
+include ./Makefile.def
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE = # 除外するファイルを書く
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+GPU_TASK_DIR  = gpu
+GPU_TASK_SRCS_TMP = $(wildcard $(GPU_TASK_DIR)/*.cc)
+GPU_TASK_SRCS_EXCLUDE =
+GPU_TASK_SRCS = $(filter-out $(GPU_TASK_DIR)/$(GPU_TASK_SRCS_EXCLUDE),$(GPU_TASK_SRCS_TMP))
+GPU_TASK_OBJS = $(GPU_TASK_SRCS:.cc=.o)
+
+TASK_DIR = ppe
+TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
+TASK_SRCS_EXCLUDE = 
+TASK_SRCS = $(filter-out $(GPU_TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
+TASK_OBJS = $(TASK_SRCS:.cc=.o)
+
+CFLAGS  += -DGPU
+
+LIBS += `sdl-config --libs` -lGpuManager -framework opencl 
+
+.SUFFIXES: .cc .o
+
+.cc.o:
+	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS) $(TASK_OBJS) $(GPU_TASK_OBJS)
+	$(CC) $(OPT) -o $@ $(OBJS) $(TASK_OBJS) $(GPU_TASK_OBJS) $(LIBS)
+
+link:
+	$(CC) $(OPT) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(GPU_TASK_OBJS) $(LIBS)
+
+debug: $(TARGET)
+	sudo gdb ./$(TARGET)
+
+clean:
+	rm -f $(TARGET) $(OBJS) $(TASK_OBJS) $(GPU_TASK_OBJS)
+	rm -f *~ \#*
+	rm -f ppe/*~ ppe/\#*
+	rm -f gpu/*~ gpu/\#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Makefile.linux	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,64 @@
+include ./Makefile.def
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE = # 除外するファイルを書く
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+TASK_DIR  = ppe
+TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
+TASK_SRCS_EXCLUDE =
+TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
+TASK_OBJS = $(TASK_SRCS:.cc=.o)
+
+CC      = clang++
+CC += $(ABI)
+# CFLAGS  = -g -Wall# -O9 #-DDEBUG
+
+INCLUDE =  -I. -I.. -I${CERIUM}/include/TaskManager
+LIBS = -L${CERIUM}/TaskManager -lFifoManager -lrt
+
+.SUFFIXES: .cc .o
+
+.cc.o:
+	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS) $(TASK_OBJS)
+	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
+
+link:
+	$(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS)
+
+hoge:
+	cpus=0;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=1;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=2;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=3;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=4;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=5;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=6;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=7;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=8;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=9;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=10;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=11;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=12;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=13;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=14;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=15;./$(TARGET) -file lena512.pgm -cpu $$cpus
+	cpus=16;./$(TARGET) -file lena512.pgm -cpu $$cpus
+
+debug: $(TARGET)
+	sudo gdb ./$(TARGET)
+
+test:
+	./$(TARGET) -file lena512.pgm -cpu 1
+	./$(TARGET) -file lena512.pgm -cpu 4
+clean:
+	rm -f $(TARGET) $(OBJS) $(TASK_OBJS)
+	rm -f *~ \#*
+	rm -f ppe/*~ ppe/\#*
+	rm -f spe/*~ spe/\#*
+	rm -f gpu/*~ gpu/\#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/Makefile.macosx	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,43 @@
+include ./Makefile.def
+
+
+SRCS_TMP = $(wildcard *.cc)
+SRCS_EXCLUDE = # 除外するファイルを書く
+SRCS = $(filter-out $(SRCS_EXCLUDE),$(SRCS_TMP))
+OBJS = $(SRCS:.cc=.o)
+
+TASK_DIR  = ppe
+TASK_SRCS_TMP = $(wildcard $(TASK_DIR)/*.cc)
+TASK_SRCS_EXCLUDE =
+TASK_SRCS = $(filter-out $(TASK_DIR)/$(TASK_SRCS_EXCLUDE),$(TASK_SRCS_TMP))
+TASK_OBJS = $(TASK_SRCS:.cc=.o)
+
+CC      = clang++
+CC += $(ABI)
+# CFLAGS  = -g -Wall# -O9 #-DDEBUG
+
+INCLUDE =  -I. -I.. -I${CERIUM}/include/TaskManager
+LIBS = -L${CERIUM}/TaskManager -lFifoManager `sdl-config --libs`
+
+.SUFFIXES: .cc .o
+
+.cc.o:
+	$(CC) $(CFLAGS) $(INCLUDE) -c $< -o $@
+
+all: $(TARGET)
+
+$(TARGET): $(OBJS) $(TASK_OBJS)
+	$(CC) -o $@ $(OBJS) $(TASK_OBJS) $(LIBS)
+
+link:
+	$(CC) -o $(TARGET) $(OBJS) $(TASK_OBJS) $(LIBS)
+
+debug: $(TARGET)
+	sudo gdb ./$(TARGET)
+
+clean:
+	rm -f $(TARGET) $(OBJS) $(TASK_OBJS)
+	rm -f *~ \#*
+	rm -f ppe/*~ ppe/\#*
+	rm -f spe/*~ spe/\#*
+	rm -f gpu/*~ gpu/\#*
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/main.cc	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,148 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <sys/time.h>
+#include <string.h>
+#include "TaskManager.h"
+#include "GpuScheduler.h"
+#include "SchedTask.h"
+#include "Func.h"
+
+extern void task_init();
+#ifdef GPU
+extern void gpu_task_init();
+#endif
+
+void TMend(TaskManager *);
+
+static double st_time;
+static double ed_time;
+
+CPU_TYPE spe_cpu = SPE_ANY;
+
+int length = 1024;
+int* data;
+
+bool first = false;
+
+static double
+getTime()
+{
+    struct timeval tv;
+    gettimeofday(&tv, NULL);
+    return tv.tv_sec + (double)tv.tv_usec*1e-6;
+}
+
+static void
+print()
+{
+    for (int i=0; i<length; i++) {
+        printf("%d ", data[i]);
+    }
+    puts("");
+}
+
+static void
+check_data()
+{
+    for (int i=0; i<length-1; i++) {
+        if (data[i+1] < data[i]) {
+            printf("Data are not sorted at %d. %d > %d \n",i , data[i], data[i+1]);
+            return;
+        }
+    }
+    puts("Data are sorted");
+}
+            
+
+const char *usr_help_str = "Usage: ./bitonic_sort [option]\n \
+options\n                                                    \
+  -cpu     Number of SPE used (default 1)\n                  \
+  -l, --length  Sorted number of data (default 1200)\n       \
+  -h, --help    Print this message\n";
+
+void
+init(int argc, char**argv){
+    for (int i = 1; argv[i]; ++i) {
+        if (strcmp(argv[i], "-g") == 0) {
+            spe_cpu = GPU_0;
+        } else if (strcmp(argv[i], "-any") == 0) {
+            spe_cpu = ANY_ANY;
+        } else if (strcmp(argv[i], "--length") == 0 || strcmp(argv[i], "-l") == 0) {
+            length = (int)atoi(argv[++i]);
+        }
+    }
+}
+
+void run_start(TaskManager* manager) {
+    int dummy = 0;
+    int exp = 0;
+    for (int i=0;; i++) {
+        if (length <= (1<<i)) {
+            exp = i;
+            dummy = (1<<i) - length;
+            length = 1<<i;
+            break;
+        }
+    }
+
+    data = (int*)malloc(length*sizeof(int*));
+    for (int i=0; i<length; i++) {
+        if (i<dummy) {
+            data[i] = 0;
+        } else {
+            data[i] = manager->get_random()%1000000;
+        }
+    }
+
+    //print();
+
+    HTask* wait;
+    HTask* swap;
+    int stage = 1;
+    int block = 1;
+
+    for (int i=stage; 1<<(stage-1) < length; i=stage) {
+        for (int j=i, k=block; 0 < j; j--, k=k/2) {
+            swap = manager->create_task(SWAP);
+            swap->set_inData(0, data, length*sizeof(int*));
+            swap->set_outData(0, data, length*sizeof(int*));
+            swap->set_param(0, (long)block);
+            swap->set_param(1, (long)1<<(j-1));
+            swap->set_param(2, (long)k);
+            swap->set_cpu(spe_cpu);
+            swap->flip();
+            if (first)
+                swap->wait_for(wait);
+            swap->iterate(length/2);
+            wait = swap;
+        }
+        stage++;
+        block = block<<1;
+        first = true;
+    }
+}
+    
+
+int TMmain(TaskManager *manager, int argc, char** argv) {
+    task_init();
+#ifdef GPU
+    gpu_task_init();
+#endif
+    init(argc, argv);
+    run_start(manager);
+    st_time = getTime();
+    manager->set_TMend(TMend);
+    return 0;
+}
+
+void
+TMend(TaskManager *manager)
+{
+    ed_time = getTime();
+    printf("%0.6f\n",ed_time-st_time);
+    //print();
+    check_data();
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/ppe/swap.cc	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,33 @@
+#include "swap.h"
+
+SchedDefineTask1(swap,swap);
+
+static int
+swap(SchedTask* s, void* rbuf,void* wbuf)
+{
+    int* inData = (int*)s->get_input(rbuf, 0);
+    int* outData = (int*)s->get_output(wbuf, 0);
+    int index = s->x;
+    long block = (long)s->get_param(0);
+    long dist = (long)s->get_param(1);
+    long sp = (long)s->get_param(2);
+
+    int x = index/block;
+    int y = index/sp;
+    int i = index+sp*y;
+    int temp = inData[i];
+
+    if (x%2 == 0) {
+        if (inData[i+dist] < inData[i]) {
+            outData[i] = inData[i+dist];
+            outData[i+dist] = temp;
+        }
+    } else {
+        if (inData[i] < inData[i+dist]) {
+            outData[i] = inData[i+dist];
+            outData[i+dist] = temp;
+        }
+    }
+    
+    return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/ppe/swap.h	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,8 @@
+#ifndef INCLUDED_TASK_SWAP
+#define INCLUDED_TASK_SWAP
+
+#ifndef INCLUDED_SCHED_TASK
+#include "SchedTask.h"
+#endif
+
+#endif
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/ppe/task_init.cc	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,10 @@
+#include "Func.h"
+#include "Scheduler.h"
+
+SchedExternTask(swap);
+
+void
+task_init(void)
+{
+    SchedRegisterTask(SWAP, swap);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/spe/swap.cc	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,31 @@
+#include "swap.h"
+
+SchedDefineTask1(swap,swap);
+
+static int
+swap(SchedTask* s, void* rbuf,void* wbuf)
+{
+    int* inData = (int*)s->get_input(rbuf, 0);
+    int* outData = (int*)s->get_output(wbuf, 0);
+    int index = s->x;
+    int block = (int)s->get_param(0);
+    int dist = (int)s->get_param(1);
+
+    int x = index/(block/2);
+    int i = index+block*x;
+    int temp = inData[i];
+
+    if (x%2 == 0) {
+        if (inData[i+dist] < inData[i]) {
+            outData[i] = inData[i+dist];
+            outData[i+dist] = temp;
+        }
+    } else {
+        if (inData[i] < inData[i+dist]) {
+            outData[i] = inData[i+dist];
+            outData[i+dist] = temp;
+        }
+    }
+        
+    return 0;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/example/bitonic_sort/spe/swap.h	Thu Feb 05 23:48:49 2015 +0900
@@ -0,0 +1,8 @@
+#ifndef INCLUDED_TASK_SWAP
+#define INCLUDED_TASK_SWAP
+
+#ifndef INCLUDED_SCHED_TASK
+#include "SchedTask.h"
+#endif
+
+#endif