feat(sliceSM): add sliceSM

2025-11-24 23:21:59 +01:00
parent cb6856ccde
commit bf9a0a2941
11 changed files with 79 additions and 61 deletions
--- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/device/sliceSM_device.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/device/sliceSM_device.cu
@@ -17,16 +17,21 @@ static __device__ float f(float x);
 |*			Implementation 					*|
 \*---------------------------------------------------------------------*/

-__global__ void sliceSM(int nbSlice , float* ptrPiHatGM)
-    {
+__global__
+void sliceSM(int nbSlice , float* ptrPiHatGM) {
    // TODO SliceSM

    // Reception tabSM
+    extern __shared__ float tabSM[];
    // ReductionIntraThread
+    reductionIntraThread(tabSM, nbSlice);
+    __syncthreads();
    // Reduction de tabSM (use tools ReductionAdd)
+    ReductionAdd::reduce(tabSM, ptrPiHatGM);
+

    // __syncthreads(); necessaire? ou? pas a la fin en tout cas
-    }
+}

 /*--------------------------------------*\
 |*		Private			*|
@@ -35,19 +40,33 @@ __global__ void sliceSM(int nbSlice , float* ptrPiHatGM)
 /**
 * remplit la sm
 */
-void reductionIntraThread(float* tabSM , int nbSlice)
-    {
+static
+__device__
+void reductionIntraThread(float* tabSM , int nbSlice) {
    // TODO SliceSM

    // Warning: Il faut employer TID et TID_LOCAL
-    }
+    const int TID = Thread2D::tid();
+    const int localTID = Thread2D::tidLocal();
+    const int NB_THREAD = Thread2D::nbThread();

-__device__ float f(float x)
-    {
-    return 4.f / (1.f + x * x);
+    const float delta_x = 1.f / (float)nbSlice;
+
+    int s = TID;
+    tabSM[localTID] = 0.f;
+
+    while (s < nbSlice) {
+        float xi = s * delta_x;
+        tabSM[localTID] += f(xi);
+        s += NB_THREAD;
    }
+}
+
+__device__
+float f(float x) {
+    return 4.f / (1.f + x * x);
+}

 /*----------------------------------------------------------------------*\
 |*			End	 					*|
 \*---------------------------------------------------------------------*/
-
--- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.cu
+++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.cu
@@ -25,34 +25,31 @@ extern __global__ void sliceSM(int nbSlice,float* ptrPiHatGM);
 \*-------------------------------------*/

 SliceSM::SliceSM(const Grid& grid , int nbSlice , double* ptrPiHat , bool isVerbose) :
-	RunnableGPU(grid, "SliceSM_" + to_string(nbSlice), isVerbose), // classe parente
-	//
-	ptrPiHat(ptrPiHat), //
-	nbSlice(nbSlice) //
-    {
-    this->sizeSM = -1; //TODO SliceSM
+	RunnableGPU(grid, "SliceSM_" + to_string(nbSlice), isVerbose),
+	ptrPiHat(ptrPiHat),
+	nbSlice(nbSlice) {
+    this->sizeSM = grid.threadByBlock() * sizeof(float); //TODO SliceSM

    // MM
 	{
 	// TODO SliceSM (pas oublier de mettre a zero, avec mallocfloat0 par exemple)
-
+	GM::mallocFloat0(&ptrPiHatGM);
 	// Tip:		Il y a une methode dedier pour malloquer un float cote device et l'initialiser a zero
 	//
 	//		   GM::mallocfloat0(&ptrPiHatGM);
 	}
-    }
+}

-SliceSM::~SliceSM(void)
-    {
+SliceSM::~SliceSM(void) {
    //TODO SliceSM
-    }
+    GM::free(ptrPiHatGM);
+}

 /*--------------------------------------*\
 |*		Methode			*|
 \*-------------------------------------*/

-void SliceSM::run()
-    {
+void SliceSM::run() {
    // Etape 1 : lancer le kernel
    // Etape 2 : recuperer le resultat coter host (par exemple avec memcpyDToH_float)
    // Etape 3 : finaliser le calcul de PI
@@ -60,7 +57,12 @@ void SliceSM::run()
    // Solution : double result; // et ramener dans result, transferer et finaliser ensuite dans ptrPiHat

    // TODO SliceSM
-    }
+    sliceSM<<<dg,db,this->sizeSM>>>(this->nbSlice, this->ptrPiHatGM);
+    float result;
+    GM::memcpyDToH_float(&result, this->ptrPiHatGM);
+    const double delta_x = 1.0 / (double) this->nbSlice;
+    *this->ptrPiHat = (double) result * delta_x;
+}

 /////////////////////////
 //	Rappel:
--- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.h
+++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM.h
@@ -8,8 +8,7 @@
 |*			Declaration 					*|
 \*---------------------------------------------------------------------*/

-class SliceSM: public RunnableGPU
-    {
+class SliceSM: public RunnableGPU {
 	/*--------------------------------------*\
 	|*		Constructor		*|
 	 \*-------------------------------------*/
@@ -51,7 +50,7 @@ class SliceSM: public RunnableGPU
 	size_t sizeSM; // [octet]
 	float* ptrPiHatGM;

-    };
+};

 /*----------------------------------------------------------------------*\
 |*			End	 					*|
--- a/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM_BestGrid.h
+++ b/Student_Cuda/src/core/01_student/03_Slice/03_Slice_SM/host/SliceSM_BestGrid.h
@@ -11,29 +11,26 @@
 |*			Impelmentation 					*|
 \*---------------------------------------------------------------------*/

-namespace sliceSM
-    {
+namespace sliceSM {

-    class BestGrid
-	{
+    class BestGrid {

 	public:

-	    static Grid get()
-		{
-		const int MP = Hardware::getMPCount();
+	    static Grid get() {
+    		const int MP = Hardware::getMPCount();
+            const int CORE_MP = Hardware::getCoreCountMP();

-		// TODO SliceGMHOST grid
+    		// TODO SliceGMHOST grid
+            dim3 dg(MP, 6, 1);
+            dim3 db(CORE_MP, 2, 1);
+            Grid grid(dg, db);

-		// to remove once coded
-		    {
-		    Couts::redln("aie aie aie, your best grid won t build itself");
-		    assert(false);
-		    }
+            return grid;
 		}

 	};
-    }
+}

 /*----------------------------------------------------------------------*\
 |*			End	 					*|