diff --git a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/device/sliceGMHOST_device.cu b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/device/sliceGMHOST_device.cu index 4b6b811..216a2d2 100755 --- a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/device/sliceGMHOST_device.cu +++ b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/device/sliceGMHOST_device.cu @@ -22,12 +22,21 @@ static __device__ float f(float x); * tabGM est un tableau promu, qui a autant de case que de thread * */ -__global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice) - { +__global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice) { const int NB_THREAD = Thread2D::nbThread(); const int TID = Thread2D::tid(); - // TODO SliceGMHOST + const float delta_x = 1.f / (float)nbSlice; + + int s = TID; + tabGM[TID] = 0.f; + + while(s < nbSlice) { + float xi = s * delta_x; + tabGM[TID] += f(xi); + + s+=NB_THREAD; + } // Conseils : // @@ -43,13 +52,10 @@ __global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice) |* Private *| \*-------------------------------------*/ -__device__ float f(float x) - { - // TODO SliceGMHOST - return -1; - } +__device__ float f(float x) { + return 4.f / (1.f + x * x); +} /*----------------------------------------------------------------------*\ |* End *| \*---------------------------------------------------------------------*/ - diff --git a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHOST.cu b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHOST.cu index d1ec9e3..f07ba8e 100755 --- a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHOST.cu +++ b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHOST.cu @@ -29,20 +29,19 @@ extern __global__ void reductionIntraThreadGMHOST(float* tabGM,int nbSlice); SliceGMHOST::SliceGMHOST(Grid grid , int nbSlice , double* ptrPiHat , bool isVerbose) : RunnableGPU(grid, "SliceGM_HOST_" + to_string(nbSlice), isVerbose), // classe parente -// - nbSlice(nbSlice), // - ptrPiHat(ptrPiHat) // - { - this->nTabGM = -1; // TODO SliceGMHOST // le nombre de case de tabGM. Indication : grid.threadCounts() donne le nombre de thread ed la grille - this->sizeTabGM = -1; // TODO SliceGMHOST // la taille en octet de tabGM [octet] + nbSlice(nbSlice), + ptrPiHat(ptrPiHat) +{ - // TODO SliceGMHOST - } + this->nTabGM = grid.threadCounts(); // le nombre de case de tabGM. Indication : grid.threadCounts() donne le nombre de thread de la grille + this->sizeTabGM = nTabGM * sizeof(float); // la taille en octet de tabGM [octet] -SliceGMHOST::~SliceGMHOST(void) - { - // TODO SliceGMHOST - } + GM::malloc(&tabGM, sizeTabGM); +} + +SliceGMHOST::~SliceGMHOST(void) { + GM::free(tabGM); +} /*--------------------------------------*\ |* Methode *| @@ -60,9 +59,9 @@ SliceGMHOST::~SliceGMHOST(void) * * */ -void SliceGMHOST::run() - { - // TODO SliceGMHOST // call the kernel +void SliceGMHOST::run() { + + reductionIntraThreadGMHOST<<>>(tabGM, nbSlice); // Indication: // dg et db sont stokcer dans la classe parente @@ -70,7 +69,7 @@ void SliceGMHOST::run() // exemple : reductionIntraThreadGMHOST<<>>(...) reductionHost(); - } +} /*--------------------------------------*\ |* Private *| @@ -88,7 +87,15 @@ void SliceGMHOST::reductionHost() // TODO SliceGMHOST + float tab[nTabGM]; + GM::memcpyDToH(tab, tabGM, sizeTabGM); + for (int i = 0; i < nTabGM; i++) { + *ptrPiHat += (double)tab[i]; } + const double delta_x = 1.f / (float)nbSlice; + *ptrPiHat *= delta_x; +} + /*----------------------------------------------------------------------*\ |* End *| diff --git a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHost_BestGrid.h b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHost_BestGrid.h index c1a453a..251bd40 100755 --- a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHost_BestGrid.h +++ b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHost_BestGrid.h @@ -11,29 +11,27 @@ |* Impelmentation *| \*---------------------------------------------------------------------*/ -namespace sliceGMHost - { +namespace sliceGMHost { - class BestGrid - { +class BestGrid { - public: +public: - static Grid get() - { - const int MP = Hardware::getMPCount(); + static Grid get() { + const int MP = Hardware::getMPCount(); + const int CORE_MP = Hardware::getCoreCountMP(); - // TODO SliceGMHOST grid + dim3 dg(MP, 1, 1); + dim3 db(CORE_MP, 2, 1); // produit <=1024 + Grid grid(dg, db); - // to remove once coded - { - Couts::redln("aie aie aie, your best grid won t build itself"); - assert(false); - } - } + return grid; - }; - } + } + +}; + +} /*----------------------------------------------------------------------*\ |* End *| diff --git a/Student_Cuda/src/main/main.cpp b/Student_Cuda/src/main/main.cpp index f8f3358..c47ed23 100755 --- a/Student_Cuda/src/main/main.cpp +++ b/Student_Cuda/src/main/main.cpp @@ -29,7 +29,7 @@ int main(int argc , char** argv) // public { cudaContext.deviceId = 0; // in [0,2] width Server Cuda3 - cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST BENCHMARK FORCEBRUT + cudaContext.launchMode = LaunchModeMOO::TEST; // USE TEST BENCHMARK FORCEBRUT cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT LOAD_ALL cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE; // NONE ALL ALL_SIMPLE CURRENT @@ -49,4 +49,3 @@ int main(int argc , char** argv) /*----------------------------------------------------------------------*\ |* End *| \*---------------------------------------------------------------------*/ - diff --git a/Student_Cuda/src/main/mainBenchmark.cpp b/Student_Cuda/src/main/mainBenchmark.cpp index c38c71e..6c1c85a 100755 --- a/Student_Cuda/src/main/mainBenchmark.cpp +++ b/Student_Cuda/src/main/mainBenchmark.cpp @@ -182,4 +182,3 @@ void addvectorTristream() /*----------------------------------------------------------------------*\ |* End *| \*---------------------------------------------------------------------*/ - diff --git a/Student_Cuda/src/main/mainTest.cpp b/Student_Cuda/src/main/mainTest.cpp index e8d1f2a..88e25d5 100755 --- a/Student_Cuda/src/main/mainTest.cpp +++ b/Student_Cuda/src/main/mainTest.cpp @@ -55,8 +55,8 @@ int mainTest() void slice() { VTSliceGMHOST test1; - VTSliceGM test2; - VTSliceSM test3; + // VTSliceGM test2; + // VTSliceSM test3; test1.run(); @@ -96,4 +96,3 @@ void vectorStream() /*----------------------------------------------------------------------*\ |* End *| \*---------------------------------------------------------------------*/ - diff --git a/Student_Cuda/src/main/mainUse.cpp b/Student_Cuda/src/main/mainUse.cpp index 3f7b27d..0d84384 100755 --- a/Student_Cuda/src/main/mainUse.cpp +++ b/Student_Cuda/src/main/mainUse.cpp @@ -67,8 +67,8 @@ int mainUse() void slice(bool& isOk) { SliceGmHostUse sliceGmHostUse(IS_VERBOSE); - SliceGmUse sliceGmUse(IS_VERBOSE); - SliceSmUse sliceSmUse(IS_VERBOSE); + // SliceGmUse sliceGmUse(IS_VERBOSE); + // SliceSmUse sliceSmUse(IS_VERBOSE); isOk &= sliceGmHostUse.isOk(IS_VERBOSE); // isOk &= sliceGmUse.isOk(IS_VERBOSE); @@ -213,4 +213,3 @@ void print(bool isSuccess) /*----------------------------------------------------------------------*\ |* End *| \*---------------------------------------------------------------------*/ -