feat(lab01): add Slice GM Host lab

feat(warmup): add solutions
feat(omp): add lab omp realised during classroom
2025-10-18 16:39:18 +02:00 · 2025-10-18 16:37:06 +02:00 · 2025-10-18 16:36:29 +02:00
21 changed files with 223 additions and 130 deletions
--- a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/device/sliceGMHOST_device.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/device/sliceGMHOST_device.cu
@@ -22,12 +22,21 @@ static __device__ float f(float x);
 * tabGM est un tableau promu, qui a autant de case que de thread
 * </pre>
 */
-__global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice)
+__global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice) {
    {
    const int NB_THREAD = Thread2D::nbThread();
    const int TID = Thread2D::tid();
-    // TODO SliceGMHOST
+    const float delta_x = 1.f / (float)nbSlice;
    int s = TID;
    tabGM[TID] = 0.f;
    while(s < nbSlice) {
        float xi = s * delta_x;
       	tabGM[TID] += f(xi);
        s+=NB_THREAD;
    }
    // Conseils :
    //
@@ -43,13 +52,10 @@ __global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice)
 |*		Private			*|
 \*-------------------------------------*/
-__device__ float f(float x)
+__device__ float f(float x) {
-    {
+    return 4.f / (1.f + x * x);
-    // TODO SliceGMHOST
+}
    return -1;
    }
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHOST.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHOST.cu
@@ -29,20 +29,19 @@ extern __global__ void reductionIntraThreadGMHOST(float* tabGM,int nbSlice);
 SliceGMHOST::SliceGMHOST(Grid grid , int nbSlice , double* ptrPiHat , bool isVerbose) :
 	RunnableGPU(grid, "SliceGM_HOST_" + to_string(nbSlice), isVerbose), // classe parente
-//
+	nbSlice(nbSlice),
-	nbSlice(nbSlice), //
+	ptrPiHat(ptrPiHat)
-	ptrPiHat(ptrPiHat) //
+{
    {
    this->nTabGM = -1; // TODO SliceGMHOST // le nombre de case de tabGM. Indication :  grid.threadCounts() donne le nombre de thread ed la grille
    this->sizeTabGM = -1; //  TODO SliceGMHOST // la taille en octet de tabGM [octet]
-    // TODO SliceGMHOST
+    this->nTabGM = grid.threadCounts(); // le nombre de case de tabGM. Indication :  grid.threadCounts() donne le nombre de thread de la grille
-    }
+    this->sizeTabGM = nTabGM * sizeof(float); // la taille en octet de tabGM [octet]
-SliceGMHOST::~SliceGMHOST(void)
+    GM::malloc(&tabGM, sizeTabGM);
-    {
+}
-    // TODO SliceGMHOST
+
-    }
+SliceGMHOST::~SliceGMHOST(void) {
    GM::free(tabGM);
 }
 /*--------------------------------------*\
 |*		Methode			*|
@@ -60,9 +59,9 @@ SliceGMHOST::~SliceGMHOST(void)
 *
 * </pre>
 */
-void SliceGMHOST::run()
+void SliceGMHOST::run() {
-    {
+
-    // TODO SliceGMHOST // call the kernel
+    reductionIntraThreadGMHOST<<<dg, db>>>(tabGM, nbSlice);
    // Indication:
    // 		dg et db sont stokcer dans la classe parente
@@ -70,7 +69,7 @@ void SliceGMHOST::run()
    // 		exemple : reductionIntraThreadGMHOST<<<dg,db>>>(...)
    reductionHost();
-    }
+}
 /*--------------------------------------*\
 |*		Private			*|
@@ -88,7 +87,15 @@ void SliceGMHOST::reductionHost()
    // TODO SliceGMHOST
    float tab[nTabGM];
    GM::memcpyDToH(tab, tabGM, sizeTabGM);
    for (int i = 0; i < nTabGM; i++) {
        *ptrPiHat += (double)tab[i];
    }
    const double delta_x = 1.f / (float)nbSlice;
    *ptrPiHat *= delta_x;
 }
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
--- a/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHost_BestGrid.h
+++ b/Student_Cuda/src/core/01_student/03_Slice/01_Slice_GM_Host/host/SliceGMHost_BestGrid.h
@@ -11,29 +11,27 @@
 |*			Impelmentation 					*|
 \*---------------------------------------------------------------------*/
-namespace sliceGMHost
+namespace sliceGMHost {
    {
-    class BestGrid
+class BestGrid {
 	{
-	public:
+public:
-	    static Grid get()
+    static Grid get() {
 		{
  		const int MP = Hardware::getMPCount();
        const int CORE_MP = Hardware::getCoreCountMP();
-		// TODO SliceGMHOST grid
+        dim3 dg(MP, 1, 1);
        dim3 db(CORE_MP, 2, 1);   // produit <=1024
        Grid grid(dg, db);
        return grid;
 		// to remove once coded
 		    {
 		    Couts::redln("aie aie aie, your best grid won t build itself");
 		    assert(false);
 		    }
 	}
-	};
+};
-    }
+
 }
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
--- a/Student_Cuda/src/main/main.cpp
+++ b/Student_Cuda/src/main/main.cpp
@@ -29,7 +29,7 @@ int main(int argc , char** argv)
    // public
 	{
 	cudaContext.deviceId = 0; //  in [0,2] width Server Cuda3
-	cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST  BENCHMARK  FORCEBRUT
+	cudaContext.launchMode = LaunchModeMOO::TEST; // USE TEST  BENCHMARK  FORCEBRUT
 	cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT   LOAD_ALL
 	cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE;   // NONE  ALL  ALL_SIMPLE  CURRENT
@@ -49,4 +49,3 @@ int main(int argc , char** argv)
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda/src/main/mainBenchmark.cpp
+++ b/Student_Cuda/src/main/mainBenchmark.cpp
@@ -182,4 +182,3 @@ void addvectorTristream()
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda/src/main/mainTest.cpp
+++ b/Student_Cuda/src/main/mainTest.cpp
@@ -55,8 +55,8 @@ int mainTest()
 void slice()
    {
    VTSliceGMHOST test1;
-    VTSliceGM test2;
+    // VTSliceGM test2;
-    VTSliceSM test3;
+    // VTSliceSM test3;
    test1.run();
@@ -96,4 +96,3 @@ void vectorStream()
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda/src/main/mainUse.cpp
+++ b/Student_Cuda/src/main/mainUse.cpp
@@ -67,8 +67,8 @@ int mainUse()
 void slice(bool& isOk)
    {
    SliceGmHostUse sliceGmHostUse(IS_VERBOSE);
-    SliceGmUse sliceGmUse(IS_VERBOSE);
+    // SliceGmUse sliceGmUse(IS_VERBOSE);
-    SliceSmUse sliceSmUse(IS_VERBOSE);
+    // SliceSmUse sliceSmUse(IS_VERBOSE);
    isOk &= sliceGmHostUse.isOk(IS_VERBOSE);
 //    isOk &= sliceGmUse.isOk(IS_VERBOSE);
@@ -213,4 +213,3 @@ void print(bool isSuccess)
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda_Warmup/src/core/01_addArray/00_procedurale/addArrayProcedurale.cu
+++ b/Student_Cuda_Warmup/src/core/01_addArray/00_procedurale/addArrayProcedurale.cu
@@ -9,6 +9,7 @@
 #include "cudas.h"
 #include "GM.h"
 #include "Kernel.h"
 #include "Hardware.h"
 using std::cout;
 using std::endl;
@@ -32,9 +33,34 @@ static __global__ void kaddArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW
 * ptrW receptionne le resultat
 * n nombre de case
 */
-__host__ bool addArray_procedurale(float* ptrV1 , float* ptrV2 , float* ptrW , int n) // __host__ facultatif
+__host__ void addArray_procedurale(float* ptrV1 , float* ptrV2 , float* ptrW , int n) // __host__ facultatif
    {
-    // TODO addArray
+    size_t sizeVector = sizeof(float) * n;
    float* ptrGMV1;
    float* ptrGMV2;
    float* ptrGMW;
    GM::malloc(&ptrGMV1, sizeVector);
    GM::malloc(&ptrGMV2, sizeVector);
    GM::malloc(&ptrGMW, sizeVector);
    GM::memcpyHToD(ptrGMV1, ptrV1, sizeVector);
    GM::memcpyHToD(ptrGMV2, ptrV2, sizeVector);
    const int MP = Hardware::getMPCount();
    const int CORE_MP = Hardware::getCoreCountMP();
    dim3 dg(MP, 3, 1);
    dim3 db(CORE_MP, 2, 1); // <=1024
    kaddArray<<<dg,db>>>( ptrGMV1 , ptrGMV2 , ptrGMW , n);
    GM::memcpyDToH(ptrW, ptrGMW, sizeVector);
    GM::free(ptrGMV1);
    GM::free(ptrGMV2);
    GM::free(ptrGMW);
    }
 /*--------------------------------------*\
@@ -50,8 +76,12 @@ __global__ void kaddArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW , int
    const int TID = Thread2D::tid();
    // pattern entrelacement
-
+    int s = TID;
-    // TODO addArray
+    while (s < n)
 	{
 	ptrGMW[s] = ptrGMV1[s] + ptrGMV2[s];
 	s += NB_THREAD;
 	}
    }
 /*----------------------------------------------------------------------*\
--- a/Student_Cuda_Warmup/src/core/01_addArray/00_procedurale/useAddArrayProcedurale.cpp
+++ b/Student_Cuda_Warmup/src/core/01_addArray/00_procedurale/useAddArrayProcedurale.cpp
@@ -37,16 +37,17 @@ bool exemple_addArray_procedurale()
 	ArrayTools::print(ptrV2, n);
 	cout << "--------------------------------------------------------------------" << endl;
 	ArrayTools::print(ptrW, n);
 	cout<<endl;
 	}
    // Delete
 	{
 	delete ptrV1;
-	delete ptrV1;
+	delete ptrV2;
 	delete ptrW;
 	}
-    return isOk;
+    return true;
    }
 /*----------------------------------------------------------------------*\
--- a/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/device/addArray_device.cu
+++ b/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/device/addArray_device.cu
@@ -11,23 +11,18 @@
 /**
 * output : void required, because kernel is asynchrone
 */
-__global__ void addArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW , int n)
+__global__ void addArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW , int n) {
    {
    const int NB_THREAD = Thread2D::nbThread();
    const int TID = Thread2D::tid();
    // Debug, facultatif
 //    if (TID == 0)
 //	{
 //	printf("Coucou from device tid = %d", TID);
 //	}
    // pattern entrelacement
-
+    int s = TID;
-    // TODO addArray
+    while (s < n) {
    	ptrGMW[s] = ptrGMV1[s] + ptrGMV2[s];
    	s += NB_THREAD;
 	}
 }
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
--- a/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/host/AddArray.cu
+++ b/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/host/AddArray.cu
@@ -12,7 +12,6 @@ using std::endl;
 using std::to_string;
 using std::string;
 /*--------------------------------------*\
 |*		Imported	 	*|
 \*-------------------------------------*/
@@ -33,26 +32,24 @@ AddArray::AddArray(const Grid& grid , float* ptrV1 , float* ptrV2 , float* ptrW
 	ptrW(ptrW), //
 	n(n), //
 	dg(grid.dg), //
-	db(grid.db)
+	db(grid.db) {
-    {
+        this->sizeVector = sizeof(float) * n;
    this->sizeVector = -1; // TODO addArray // octet
        // MM (malloc Device)
    	{
           	GM::malloc(&ptrGMV1, sizeVector);
-	// TODO addArray
+           	GM::malloc(&ptrGMV2, sizeVector);
           	GM::malloc(&ptrGMW, sizeVector);
    	}
    }
-AddArray::~AddArray()
+AddArray::~AddArray() {
    {
    //MM (device free)
 	{
 	GM::free(ptrGMV1);
-	// TODO addArray
+	GM::free(ptrGMV2);
-	}
+	GM::free(ptrGMW);
-    }
+}
 /*--------------------------------------*\
 |*		Methode			*|
@@ -61,23 +58,23 @@ AddArray::~AddArray()
 /**
 * override
 */
-void AddArray::run()
+void AddArray::run() {
    {
    // MM (copy Host->Device)
    {
    	GM::memcpyHToD(ptrGMV1, ptrV1, sizeVector);
-	// TODO addArray
+    	GM::memcpyHToD(ptrGMV2, ptrV2, sizeVector);
 	}
-    // TODO addArray // call kernel // assynchrone
+
    addArray<<<dg,db>>>( ptrGMV1 , ptrGMV2 , ptrGMW , n);
    //Kernel::synchronize();// inutile
    // MM (Device -> Host)
 	{
-	// TODO addArray // MM barier de synchronisation implicite
+	    GM::memcpyDToH(ptrW, ptrGMW, sizeVector);
 	}
 	}
 }
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
--- a/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/host/AddArray.h
+++ b/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/host/AddArray.h
@@ -9,8 +9,7 @@
 \*---------------------------------------------------------------------*/
-class AddArray
+class AddArray {
    {
 	/*--------------------------------------*\
 	|*		Constructor		*|
 	 \*-------------------------------------*/
--- a/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/useAddArrayObject.cpp
+++ b/Student_Cuda_Warmup/src/core/01_addArray/01_poo/01_pattern_entrelacement/useAddArrayObject.cpp
@@ -45,12 +45,13 @@ bool exemple_addArray_object()
 	ArrayTools::print(ptrV2, n);
 	cout << "--------------------------------------------------------------------" << endl;
 	ArrayTools::print(ptrW, n);
 	cout<<endl;
 	}
    // Delete
 	{
 	delete ptrV1;
-	delete ptrV1;
+	delete ptrV2;
 	delete ptrW;
 	}
@@ -69,16 +70,10 @@ static Grid createGrid()
    const int MP = Hardware::getMPCount();
    const int CORE_MP = Hardware::getCoreCountMP();
-    dim3 dg(1, 1, 1); // TODO addArray
+    dim3 dg(MP, 2, 1);
-    dim3 db(1, 1, 1); // TODO addArray  // produit <=1024
+    dim3 db(CORE_MP, 3, 1);   // produit <=1024
    Grid grid(dg, db);
    // to remove once coded
 	{
 	Couts::redln("aie aie aie, your best grid won t build itself");
 	assert(false);
 	}
    return grid;
    }
--- a/Student_Cuda_Warmup/src/core/01_addArray/01_poo/02_pattern_11/useAddArray11.cpp
+++ b/Student_Cuda_Warmup/src/core/01_addArray/01_poo/02_pattern_11/useAddArray11.cpp
@@ -45,12 +45,13 @@ bool exemple_addArray_11()
 	ArrayTools::print(ptrV2, n);
 	cout << "--------------------------------------------------------------------" << endl;
 	ArrayTools::print(ptrW, n);
 	cout<<endl;
 	}
    // Delete
 	{
 	delete ptrV1;
-	delete ptrV1;
+	delete ptrV2;
 	delete ptrW;
 	}
--- a/Student_Cuda_Warmup/src/main/main.cpp
+++ b/Student_Cuda_Warmup/src/main/main.cpp
@@ -2,6 +2,7 @@
 #include <stdlib.h>
 #include "Couts.h"
 #include "Hardware.h"
 using std::cerr;
 using std::cout;
@@ -29,18 +30,30 @@ static void scalar(bool& isOk);
 int main(int argc , char** argv)
    {
    // Facultatif
 	{
 	Hardware::loadCudaDriverAll();
 	Hardware::printAllSimple();
 	const int DEVICE_ID = 0;
 	Hardware::setDevice(DEVICE_ID);
 	}
    const int IS_VERBOSE = true;
    // run
 	{
 	bool isOk = true;
 	// Commenter ce dont vous n'avez pas besoin ci-dessous
-    scalar(isOk);	 // commenter dans la methode ci-dessous ce que vous ne voulez pas lancer
+	//scalar(isOk);	 // commenter dans la methode ci-dessous ce que vous ne voulez pas lancer
 	array(isOk);	// commenter dans la methode ci-dessous ce que vous ne voulez pas lancer
 	Couts::statusln(isOk);
 	return isOk ? EXIT_SUCCESS : EXIT_FAILURE;
 	}
    }
 /*-------------------------------------*\
 |*		Private 		*|
@@ -56,7 +69,7 @@ static void array(bool& isOk)
    {
    isOk &= exemple_addArray_procedurale();
    isOk &= exemple_addArray_object();
-    isOk &= exemple_addArray_11();
+    // isOk &= exemple_addArray_11();
    }
 /*----------------------------------------------------------------------*\
--- a/Student_OMP/src/core/omp/02_Slice/01_pi_sequentiel.cpp
+++ b/Student_OMP/src/core/omp/02_Slice/01_pi_sequentiel.cpp
@@ -34,14 +34,14 @@ bool isPiSequentiel_OK(int n)
 \*-------------------------------------*/
 double piSequentiel(int n) {
-    const double delta_x = 1 / (double)n;
+    const double dx = 1.0 / (double)n;
    double sum = 0;
    for (int i = 0; i < n; i++) {
-    	double xi = i * delta_x;
+    	double xi = i * dx;
    	sum += fpi(xi);
 	}
-    return sum * delta_x;
+    return sum * dx;
    }
 /*----------------------------------------------------------------------*\
--- a/Student_OMP/src/core/omp/02_Slice/02_pi_entrelacer_promotionTab.cpp
+++ b/Student_OMP/src/core/omp/02_Slice/02_pi_entrelacer_promotionTab.cpp
@@ -45,22 +45,20 @@ double piOMPEntrelacerPromotionTab(int n)
    double sum[NB_THREAD];
    // Reduction intra thread
-#pragma omp parallel
+#pragma omp parallel {
    {
    const int TID = Omps::getTid();
    int s = TID;
    double sum_thread = 0;
-    while (s < n)
+    while (s < n) {
 	{
    	double xi = s * delta_x;
    	sum_thread += fpi(xi);
    	s += NB_THREAD;
 	}
    sum[TID] = sum_thread;
-    }
+}
 double sumTotal = 0;
--- a/Student_OMP/src/core/omp/02_Slice/03_pi_entrelacer_critique.cpp
+++ b/Student_OMP/src/core/omp/02_Slice/03_pi_entrelacer_critique.cpp
@@ -37,8 +37,29 @@ bool isPiOMPEntrelacerCritical_Ok(int n)
 double piOMPEntrelacerCritical(int n)
    {
-    //TODO
+    const double dx = 1.0 / (double)n;
-    return -1;
+    const int NB_THREAD = Omps::setAndGetNaturalGranularity();
    int total = 0;
 #pragma omp parallel
    {
 	const int TID = Omps::getTid();
 	int s = TID;
 	double sum_thread = 0;
 	while (s < n)
 	    {
 	    double xi = s*dx;
 	    sum_thread += fpi(xi);
 	    s+= NB_THREAD;
 	    }
 #pragma omp critical
 	{
 	total += sum_thread;
 	}
    }
    return total * dx;
    }
 /*----------------------------------------------------------------------*\
--- a/Student_OMP/src/core/omp/02_Slice/04_pi_entrelacer_atomic.cpp
+++ b/Student_OMP/src/core/omp/02_Slice/04_pi_entrelacer_atomic.cpp
@@ -40,8 +40,26 @@ bool isPiOMPEntrelacerAtomic_Ok(int n)
 */
 double piOMPEntrelacerAtomic(int n)
    {
-    // TODO
+    const double dx = 1.0 / (double)n;
-    return -1;
+    const int NB_THREAD = Omps::setAndGetNaturalGranularity();
    int total = 0;
 #pragma omp parallel
 	{
 	const int TID = Omps::getTid();
 	int s = TID;
 	double sum_thread = 0;
 	while (s < n)
 	    {
 	    double xi = s * dx;
 	    sum_thread += fpi(xi);
 	    s += NB_THREAD;
 	    }
 #pragma omp atomic
 	total += sum_thread;
 	}
    return total * dx;
    }
 /*----------------------------------------------------------------------*\
--- a/Student_OMP/src/core/omp/02_Slice/07_pi_for_promotionTab.cpp
+++ b/Student_OMP/src/core/omp/02_Slice/07_pi_for_promotionTab.cpp
@@ -43,7 +43,18 @@ bool isPiOMPforPromotionTab_Ok(int n)
 */
 double piOMPforPromotionTab(int n)
    {
-    //TODO
+    const double dx = 1.0 / (double)n;
    const int NB_THREAD = Omps::setAndGetNaturalGranularity();
    double total[NB_THREAD] = {0.0};
 #pragma omp parallel for
    for(int i = 0; i < n; i++) {
 	const int TID = Omps::getTid();
 //	double xi = s*dx;
 //	total[TID] += fpi(xi);
    }
    return -1;
    }
--- a/Student_OMP/src/core/omp/02_Slice/08_pi_for_reduction.cpp
+++ b/Student_OMP/src/core/omp/02_Slice/08_pi_for_reduction.cpp
@@ -41,11 +41,18 @@ bool isPiOMPforReduction_Ok(int n)
 */
 double piOMPforReduction(int n)
    {
-    //TODO
+    const double dx = 1.0 / (double)n;
-    return -1;
+    double sum = 0;
 #pragma omp parallel for reduction(+:sum)
    for (int i = 0; i < n; i++)
 	{
 	double xi = i * dx;
 	sum += fpi(xi);
 	}
    return sum * dx;
    }
 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
Author	SHA1	Message	Date
Klagarge	dcd3df8f89	feat(lab01): add Slice GM Host lab	2025-10-18 16:39:18 +02:00
Klagarge	f800b4f395	feat(warmup): add solutions	2025-10-18 16:37:06 +02:00
Klagarge	f8ae49d666	feat(omp): add lab omp realised during classroom	2025-10-18 16:36:29 +02:00