# HG changeset patch # User Yutaka Kinjyo # Date 1282822686 -32400 # Node ID 33630c6ff445cf90d4d1f44e82a98b10f2f7f26a # Parent 0a6c5ee89a4cde41766cb93af0a098cfe53fa5ac change pipeline and TaskArray fast diff -r 0a6c5ee89a4c -r 33630c6ff445 Renderer/Engine/spe/spe-main.cc --- a/Renderer/Engine/spe/spe-main.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/Renderer/Engine/spe/spe-main.cc Thu Aug 26 20:38:06 2010 +0900 @@ -11,8 +11,8 @@ SchedExternTask(DrawSpanRenew); SchedExternTask(DrawBack); -SchedExternTask(ChainCal); -SchedExternTask(ChainInit); +//SchedExternTask(ChainCal); +//SchedExternTask(ChainInit); SchedExternTask(CreateSpan); SchedExternTask(CreatePolygon); @@ -23,9 +23,9 @@ SchedExternTask(InitKey); SchedExternTask(UpdateKey); -SchedExternTask(PropertyTask); -SchedExternTask(UniverseTask); -SchedExternTask(ChainTask); +//SchedExternTask(PropertyTask); +//SchedExternTask(UniverseTask); +//SchedExternTask(ChainTask); void task_init(Scheduler *s) @@ -38,8 +38,8 @@ SchedRegister( LoadTexture); SchedRegister( DrawSpan); - SchedRegister( ChainCal); - SchedRegister( ChainInit); + //SchedRegister( ChainCal); + //SchedRegister( ChainInit); SchedRegister( DrawBack); SchedRegister( CreateSpan); @@ -51,7 +51,7 @@ SchedRegister( InitKey); SchedRegister( UpdateKey); - SchedRegister( PropertyTask); - SchedRegister( UniverseTask); - SchedRegister( ChainTask); + //SchedRegister( PropertyTask); + //SchedRegister( UniverseTask); + //SchedRegister( ChainTask); } diff -r 0a6c5ee89a4c -r 33630c6ff445 Renderer/Engine/viewerFB.cc --- a/Renderer/Engine/viewerFB.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/Renderer/Engine/viewerFB.cc Thu Aug 26 20:38:06 2010 +0900 @@ -99,7 +99,7 @@ Uint32 * ViewerFB::video_init(TaskManager *manager, int bpp, int width, int height) { - Uint32 sdl_flag = default_sdl_flag ; + Uint32 sdl_flag = default_sdl_flag | SDL_INIT_VIDEO; if (SDL_Init(sdl_flag) < 0) { fprintf(stderr,"Couldn't initialize SDL: %s\n",SDL_GetError()); diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/Cell/spe/CellDmaManager.cc --- a/TaskManager/Cell/spe/CellDmaManager.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/Cell/spe/CellDmaManager.cc Thu Aug 26 20:38:06 2010 +0900 @@ -16,6 +16,8 @@ { if (size == 0) return ; mfc_get((volatile void *)buf, addr, size, mask, 0, 0); + + } /** @@ -34,12 +36,35 @@ void CellDmaManager::dma_wait(uint32 mask) { + (this->*start_dmawait_profile)(); mfc_write_tag_mask(1 << mask); mfc_write_tag_update_all(); mfc_read_tag_status(); __asm__ __volatile__(" sync"); (this->*end_dmawait_profile)(&global_wait_time); + +} + +void CellDmaManager::dma_wait(uint32 mask, int cmd) +{ + + unsigned long long wait = 0; + + (this->*start_dmawait_profile)(); + mfc_write_tag_mask(1 << mask); + mfc_write_tag_update_all(); + mfc_read_tag_status(); + __asm__ __volatile__(" sync"); + (this->*end_dmawait_profile)(&wait); + + global_wait_time += wait; + + if (cmd == TaskArray) { + task_array_load_time += wait; + } else if (cmd == TaskArray1) { + } + } void CellDmaManager::mail_write(memaddr data) @@ -92,8 +117,10 @@ global_busy_time = 0; global_mail_time = 0; global_wait_time = 0; + task_array_load_time = 0; start_dmawait_profile = &CellDmaManager::do_start_dmawait_profile; - end_dmawait_profile = &CellDmaManager::do_end_dmawait_profile; + end_dmawait_profile = &CellDmaManager::do_end_dmawait_profile; + } void @@ -116,12 +143,14 @@ spu_writech(SPU_WrDec, 0xffffffff); } + void CellDmaManager::do_end_dmawait_profile(unsigned long long *counter) { wait_time = spu_readch(SPU_RdDec); *counter += 0xffffffff - wait_time; busy_time = wait_time; + } void CellDmaManager::null_start_dmawait_profile() {} @@ -146,12 +175,17 @@ s->printf("spu%d: busy_time = %lld" " wait_time = %lld(%.3g%%), " " mail_time = %lld(%.3g%%), " - " busy_ratio = %.3g%%\n", cpu, global_busy_time, - global_wait_time, d, global_mail_time, m, r); + " busy_ratio = %.3g%%" + " array_load_time = %lld\n" + ,cpu, global_busy_time, + global_wait_time, d, global_mail_time, m, r, + task_array_load_time); global_busy_time = 0; global_mail_time = 0; global_wait_time = 0; + task_array_load_time = 0; + } diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/Cell/spe/CellDmaManager.h --- a/TaskManager/Cell/spe/CellDmaManager.h Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/Cell/spe/CellDmaManager.h Thu Aug 26 20:38:06 2010 +0900 @@ -26,11 +26,13 @@ /* variables */ unsigned int wait_time, busy_time; unsigned long long global_busy_time, global_wait_time, global_mail_time; + unsigned long long task_array_load_time; /* functions */ void dma_load(void *buf, memaddr addr, uint32 size, uint32 mask); void dma_store(void *buf, memaddr addr, uint32 size, uint32 mask); void dma_wait(uint32 mask) ; + void dma_wait(uint32 mask, int cmd) ; void (CellDmaManager::*start_dmawait_profile)(); void (CellDmaManager::*end_dmawait_profile)(unsigned long long *counter); void start_profile(); diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/ppe/HTask.cc --- a/TaskManager/kernel/ppe/HTask.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/ppe/HTask.cc Thu Aug 26 20:38:06 2010 +0900 @@ -76,6 +76,7 @@ { r_size = Task::calc_size(num_param, num_inData, num_outData)*num_task; rbuf = (memaddr) mimpl->allocate(r_size); + //printf("r_size %d\n",r_size); bzero(rbuf,r_size); Task *task = (Task*)rbuf; task->init(id, num_param,num_inData,num_outData); diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/ppe/Task.h --- a/TaskManager/kernel/ppe/Task.h Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/ppe/Task.h Thu Aug 26 20:38:06 2010 +0900 @@ -56,7 +56,13 @@ outData_count = outs; inData_offset = round_up16(sizeof(memaddr)*params); outData_offset = round_up16(inData_offset+sizeof(ListElement)*ins); - task_size = round_up16(sizeof(Task)+outData_offset+sizeof(ListElement)*outs); + //task_size = round_up16(sizeof(Task)+outData_offset+sizeof(ListElement)*outs); + + task_size = round_up16(sizeof(Task)) + + round_up16(sizeof(memaddr)*params) + + round_up16(sizeof(ListElement)*ins) + + round_up16(sizeof(ListElement)*outs); + } int size() { diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/DmaManager.h --- a/TaskManager/kernel/schedule/DmaManager.h Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/DmaManager.h Thu Aug 26 20:38:06 2010 +0900 @@ -7,6 +7,7 @@ enum dma_tag { DMA_READ = 25, + DMA_READ2 = 26, DMA_WRITE = 27, // DMA_READ_IN_LIST = 29, // DMA_READ_OUT_LIST = 30, @@ -25,6 +26,7 @@ virtual void dma_load(void *buf, memaddr addr, uint32 size, uint32 mask) {} virtual void dma_store(void *buf,memaddr addr, uint32 size, uint32 mask) {} virtual void dma_wait(uint32 mask) {} + virtual void dma_wait(uint32 mask, int cmd) {} virtual void show_dma_wait(Scheduler *s, int cpu) {} virtual void start_profile() {} diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/SchedTask.cc --- a/TaskManager/kernel/schedule/SchedTask.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/SchedTask.cc Thu Aug 26 20:38:06 2010 +0900 @@ -134,7 +134,8 @@ } if (nextTask->command==TaskArray) { // Start Task Array - return new SchedTaskArrayLoad(scheduler, nextSched); + int dma_tag_switch = 0; + return new SchedTaskArrayLoad(scheduler, nextSched, dma_tag_switch); } return nextSched; } else { diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/SchedTaskArray.cc --- a/TaskManager/kernel/schedule/SchedTaskArray.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/SchedTaskArray.cc Thu Aug 26 20:38:06 2010 +0900 @@ -4,7 +4,7 @@ #include "TaskManagerImpl.h" -SchedTaskArray::SchedTaskArray(Scheduler *s, SchedTaskBase *savedTask_, Task *curTask_, Task *_array) +SchedTaskArray::SchedTaskArray(Scheduler *s, SchedTaskBase *savedTask_, Task *curTask_, Task *_array, int tag) { savedTask = savedTask_; atask = curTask_; @@ -22,6 +22,9 @@ cur_index = -1; task = 0; + + this->tag = tag; + } /** @@ -51,6 +54,8 @@ cur_index = -1; task = 0; + this->tag = 0; + } /** @@ -97,7 +102,7 @@ // load Input Data readbuf = manager->allocate(inListData.size); // inListData.print(); - scheduler->dma_loadList(&inListData, readbuf, DMA_READ); + scheduler->dma_loadList(&inListData, readbuf, (DMA_READ + this->tag)); bound(&inListData); } @@ -125,7 +130,7 @@ // printf("bad %x\n",outListData.element); //} } - scheduler->dma_wait(DMA_READ); + scheduler->dma_wait((DMA_READ + this->tag)); run(this, get_input(readbuf, 0), get_output(writebuf, 0)); free(readbuf); // 書き込む領域がなければ無視 @@ -165,12 +170,13 @@ Task *next = atask->next(); if (next < last()) { // Task List が残っているので、次を準備 - return new SchedTaskArray(scheduler, savedTask, next, array); + //scheduler->printf("hog\n"); + return new SchedTaskArray(scheduler, savedTask, next, array, this->tag^1); } else { //このTaskArrayは終わったが、Pipeline 上にread の TaskArray が残っているので //1ステージを稼ぐ必要がある - + //scheduler->printf("auau\n"); return new SchedTaskArrayNop(scheduler, savedTask, next, array); } diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/SchedTaskArray.h --- a/TaskManager/kernel/schedule/SchedTaskArray.h Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/SchedTaskArray.h Thu Aug 26 20:38:06 2010 +0900 @@ -11,7 +11,7 @@ BASE_NEW_DELETE(SchedTaskArray); /* constructor */ - SchedTaskArray(Scheduler *s, SchedTaskBase *savedTask_, Task *task_, Task *array_); + SchedTaskArray(Scheduler *s, SchedTaskBase *savedTask_, Task *task_, Task *array_, int tag); /* constructor for old style task */ SchedTaskArray(Scheduler *s, SchedTaskBase *savedTask_); diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/SchedTaskArrayLoad.cc --- a/TaskManager/kernel/schedule/SchedTaskArrayLoad.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/SchedTaskArrayLoad.cc Thu Aug 26 20:38:06 2010 +0900 @@ -4,12 +4,15 @@ #include "TaskManagerImpl.h" -SchedTaskArrayLoad::SchedTaskArrayLoad(Scheduler *s, SchedTaskBase *savedTask_) +SchedTaskArrayLoad::SchedTaskArrayLoad(Scheduler *s, SchedTaskBase *savedTask_, int dma_tag_switch) { scheduler = s; savedTask = savedTask_; task = savedTask->task; cur_index = -2; + + this->tag = dma_tag_switch; + } SchedTaskArrayLoad::~SchedTaskArrayLoad() {} @@ -21,12 +24,12 @@ // load Task Array Data readbuf = manager->allocate(task->r_size); scheduler->dma_load(readbuf, task->rbuf,task->r_size, DMA_READ); + scheduler->dma_wait(DMA_READ); } void SchedTaskArrayLoad::exec() { // もっと、はやめに DMA_READ して、最初のcodeをloadするべきか? - scheduler->dma_wait(DMA_READ); } // to empty the pipeline void @@ -35,8 +38,9 @@ SchedTaskBase* SchedTaskArrayLoad::next(Scheduler *scheduler, SchedTaskBase *p) { + Task *nextTask = (Task *)readbuf; - return new SchedTaskArray(scheduler, savedTask, nextTask, nextTask); + return new SchedTaskArray(scheduler, savedTask, nextTask, nextTask, this->tag); } /* end */ diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/SchedTaskArrayLoad.h --- a/TaskManager/kernel/schedule/SchedTaskArrayLoad.h Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/SchedTaskArrayLoad.h Thu Aug 26 20:38:06 2010 +0900 @@ -8,7 +8,7 @@ class SchedTaskArrayLoad : public SchedTask { public: /* constructor */ - SchedTaskArrayLoad(Scheduler *s, SchedTaskBase *savedTask_); + SchedTaskArrayLoad(Scheduler *s, SchedTaskBase *savedTask_, int dma_tag_switch); virtual ~SchedTaskArrayLoad(); BASE_NEW_DELETE(SchedTaskArrayLoad); diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/SchedTaskBase.h --- a/TaskManager/kernel/schedule/SchedTaskBase.h Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/SchedTaskBase.h Thu Aug 26 20:38:06 2010 +0900 @@ -93,6 +93,8 @@ // Task の、Tasklist での位置。(task = &list[cur_index-1]) int cur_index; + int tag; + memaddr params_addr; memaddr waiter; Task *array; diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/SchedTaskList.cc --- a/TaskManager/kernel/schedule/SchedTaskList.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/SchedTaskList.cc Thu Aug 26 20:38:06 2010 +0900 @@ -28,6 +28,7 @@ list = scheduler->get_curListBuf(); scheduler->dma_load(list, params_addr, sizeof(TaskList), DMA_READ_TASKLIST); + } diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/Scheduler.cc --- a/TaskManager/kernel/schedule/Scheduler.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/Scheduler.cc Thu Aug 26 20:38:06 2010 +0900 @@ -74,14 +74,17 @@ SchedTaskBase* task3 = new SchedNop(); // main loop do { - task3->write(); - task2->exec(); - task1->read(); + + task3->write(); + task1->read(); + task2->exec(); + delete task3; task3 = task2; task2 = task1; task1 = task1->next(this, 0); + } while (task1); delete task3; @@ -123,10 +126,17 @@ void Scheduler::dma_wait(uint32 mask) { + //printf("%d\n",mask); connector->dma_wait(mask); } void +Scheduler::dma_wait(uint32 mask, int cmd) +{ + connector->dma_wait(mask,cmd); +} + +void Scheduler::dma_loadList(ListDataPtr list, void *buff, uint32 mask) { connector->dma_loadList(list, buff, mask); diff -r 0a6c5ee89a4c -r 33630c6ff445 TaskManager/kernel/schedule/Scheduler.h --- a/TaskManager/kernel/schedule/Scheduler.h Mon Aug 09 00:09:00 2010 +0900 +++ b/TaskManager/kernel/schedule/Scheduler.h Thu Aug 26 20:38:06 2010 +0900 @@ -119,6 +119,7 @@ void dma_load(void *buf, memaddr addr, uint32 size, uint32 mask); void dma_store(void *buf,memaddr addr, uint32 size, uint32 mask); void dma_wait(uint32 mask); + void dma_wait(uint32 mask, int cmd); void show_dma_wait() { connector->show_dma_wait(this, id); }; void start_profile() { connector->start_profile(); }; void mail_write(memaddr data); diff -r 0a6c5ee89a4c -r 33630c6ff445 example/word_count/main.cc --- a/example/word_count/main.cc Mon Aug 09 00:09:00 2010 +0900 +++ b/example/word_count/main.cc Thu Aug 26 20:38:06 2010 +0900 @@ -17,6 +17,7 @@ int use_task_array = 1; int use_compat = 0; int array_task_num = 8; +int spe_num = 1; const char *usr_help_str = "Usage: ./word_count [-a -c -s] [-cpu spe_num] [-file filename]\n"; @@ -82,58 +83,105 @@ if (task_count<=0) return; } + + if (use_task_array) { + + int spl = spe_num * array_task_num; + int loop = (task_count + spl - 1) / spl; + + for (int i = 0; i < loop; i += 1) { + + if (spl > w->task_num) { + spe_num = 1; + array_task_num = w->task_num; + } + + + HTask **task_array = (HTask**)manager->allocate(sizeof(HTask*)*spe_num); + Task **t_exec = (Task**)manager->allocate(sizeof(Task*)*spe_num); + + for (int k = 0; k < spe_num; k++) { + task_array[k] = manager->create_task_array(TASK_EXEC,array_task_num,0,1,1); + t_exec[k] = 0; + if (all) { + w->t_print->wait_for(task_array[k]); + } else { + t_next->wait_for(task_array[k]); + } + } + + + for (int j = 0; j < array_task_num; j++) { + for (int k = 0; k < spe_num; k++) { + + int a = w->task_spwaned++; + + if (w->size < size) size = w->size; + + t_exec[k] = task_array[k]->next_task_array(TASK_EXEC,t_exec[k]); + t_exec[k]->set_inData(0,w->file_mmap + a*w->division_size, size); + t_exec[k]->set_outData(0,w->o_data + a*w->out_size, w->division_out_size); + + w->size -= size; + w->task_num--; + } + } + + for (int k = 0; k < spe_num; k++) { + task_array[k]->spawn_task_array(t_exec[k]->next()); + task_array[k]->set_cpu(SPE_ANY); + task_array[k]->spawn(); + } + + } + + return; + + } + + for (int i = 0; i < task_count; i += array_task_num) { - - HTask *task_array; - if (use_task_array) { - int task_num = (w->size+size-1)/size; - if (task_num>array_task_num) task_num = array_task_num; - task_array = manager->create_task_array(TASK_EXEC,task_num,0,1,1); - if (!all) { - t_next->wait_for(task_array); - } else { - w->t_print->wait_for(task_array); - } - } - - Task *t_exec = 0; - HTask *h_exec = 0; + + HTask *h_exec = 0; for (int j = 0; j < array_task_num; j++) { int i = w->task_spwaned++; if (w->size < size) size = w->size; if (size==0) break; - if (use_task_array) { - t_exec = task_array->next_task_array(TASK_EXEC,t_exec); - t_exec->set_inData(0,w->file_mmap + i*w->division_size, size); - t_exec->set_outData(0,w->o_data + i*w->out_size, w->division_out_size); - } else if (use_compat) { + + if (use_compat) { h_exec = manager->create_task(TASK_EXEC); h_exec->set_inData(0,w->file_mmap + i*w->division_size, size); h_exec->set_outData(0,w->o_data + i*w->out_size, w->division_out_size); - t_next->wait_for(h_exec); - + if (all) { + w->t_print->wait_for(h_exec); + } else { + t_next->wait_for(h_exec); + } + h_exec->set_cpu(SPE_ANY); h_exec->spawn(); + } else { h_exec = manager->create_task(TASK_EXEC, (memaddr)(w->file_mmap + i*w->division_size), size, (memaddr)(w->o_data + i*w->out_size), w->division_out_size); - t_next->wait_for(h_exec); + + if (all) { + w->t_print->wait_for(h_exec); + } else { + t_next->wait_for(h_exec); + } + h_exec->set_cpu(SPE_ANY); h_exec->spawn(); } w->size -= size; w->task_num--; } - if (use_task_array) { - task_array->spawn_task_array(t_exec->next()); - task_array->set_cpu(SPE_ANY); - task_array->spawn(); - } else { - //if (!all) t_next->wait_for(h_exec); - } - } + + } + } /** @@ -269,6 +317,8 @@ use_compat = 0; } else if (strcmp(argv[i], "-anum") == 0) { array_task_num = atoi(argv[i+1]); + } else if (strcmp(argv[i], "-cpu") == 0) { + spe_num = atoi(argv[i+1]); } } if (filename==0) {