feat(sliceSM): add sliceSM

This commit is contained in:
2025-11-24 23:21:59 +01:00
parent cb6856ccde
commit bf9a0a2941
11 changed files with 79 additions and 61 deletions

View File

@@ -17,16 +17,21 @@ static __device__ float f(float x);
|* Implementation *|
\*---------------------------------------------------------------------*/
__global__ void sliceSM(int nbSlice , float* ptrPiHatGM)
{
__global__
void sliceSM(int nbSlice , float* ptrPiHatGM) {
// TODO SliceSM
// Reception tabSM
extern __shared__ float tabSM[];
// ReductionIntraThread
reductionIntraThread(tabSM, nbSlice);
__syncthreads();
// Reduction de tabSM (use tools ReductionAdd)
ReductionAdd::reduce(tabSM, ptrPiHatGM);
// __syncthreads(); necessaire? ou? pas a la fin en tout cas
}
}
/*--------------------------------------*\
|* Private *|
@@ -35,19 +40,33 @@ __global__ void sliceSM(int nbSlice , float* ptrPiHatGM)
/**
* remplit la sm
*/
void reductionIntraThread(float* tabSM , int nbSlice)
{
static
__device__
void reductionIntraThread(float* tabSM , int nbSlice) {
// TODO SliceSM
// Warning: Il faut employer TID et TID_LOCAL
}
const int TID = Thread2D::tid();
const int localTID = Thread2D::tidLocal();
const int NB_THREAD = Thread2D::nbThread();
__device__ float f(float x)
{
return 4.f / (1.f + x * x);
const float delta_x = 1.f / (float)nbSlice;
int s = TID;
tabSM[localTID] = 0.f;
while (s < nbSlice) {
float xi = s * delta_x;
tabSM[localTID] += f(xi);
s += NB_THREAD;
}
}
__device__
float f(float x) {
return 4.f / (1.f + x * x);
}
/*----------------------------------------------------------------------*\
|* End *|
\*---------------------------------------------------------------------*/

View File

@@ -25,34 +25,31 @@ extern __global__ void sliceSM(int nbSlice,float* ptrPiHatGM);
\*-------------------------------------*/
SliceSM::SliceSM(const Grid& grid , int nbSlice , double* ptrPiHat , bool isVerbose) :
RunnableGPU(grid, "SliceSM_" + to_string(nbSlice), isVerbose), // classe parente
//
ptrPiHat(ptrPiHat), //
nbSlice(nbSlice) //
{
this->sizeSM = -1; //TODO SliceSM
RunnableGPU(grid, "SliceSM_" + to_string(nbSlice), isVerbose),
ptrPiHat(ptrPiHat),
nbSlice(nbSlice) {
this->sizeSM = grid.threadByBlock() * sizeof(float); //TODO SliceSM
// MM
{
// TODO SliceSM (pas oublier de mettre a zero, avec mallocfloat0 par exemple)
GM::mallocFloat0(&ptrPiHatGM);
// Tip: Il y a une methode dedier pour malloquer un float cote device et l'initialiser a zero
//
// GM::mallocfloat0(&ptrPiHatGM);
}
}
}
SliceSM::~SliceSM(void)
{
SliceSM::~SliceSM(void) {
//TODO SliceSM
}
GM::free(ptrPiHatGM);
}
/*--------------------------------------*\
|* Methode *|
\*-------------------------------------*/
void SliceSM::run()
{
void SliceSM::run() {
// Etape 1 : lancer le kernel
// Etape 2 : recuperer le resultat coter host (par exemple avec memcpyDToH_float)
// Etape 3 : finaliser le calcul de PI
@@ -60,7 +57,12 @@ void SliceSM::run()
// Solution : double result; // et ramener dans result, transferer et finaliser ensuite dans ptrPiHat
// TODO SliceSM
}
sliceSM<<<dg,db,this->sizeSM>>>(this->nbSlice, this->ptrPiHatGM);
float result;
GM::memcpyDToH_float(&result, this->ptrPiHatGM);
const double delta_x = 1.0 / (double) this->nbSlice;
*this->ptrPiHat = (double) result * delta_x;
}
/////////////////////////
// Rappel:

View File

@@ -8,8 +8,7 @@
|* Declaration *|
\*---------------------------------------------------------------------*/
class SliceSM: public RunnableGPU
{
class SliceSM: public RunnableGPU {
/*--------------------------------------*\
|* Constructor *|
\*-------------------------------------*/
@@ -51,7 +50,7 @@ class SliceSM: public RunnableGPU
size_t sizeSM; // [octet]
float* ptrPiHatGM;
};
};
/*----------------------------------------------------------------------*\
|* End *|

View File

@@ -11,29 +11,26 @@
|* Impelmentation *|
\*---------------------------------------------------------------------*/
namespace sliceSM
{
namespace sliceSM {
class BestGrid
{
class BestGrid {
public:
static Grid get()
{
const int MP = Hardware::getMPCount();
static Grid get() {
const int MP = Hardware::getMPCount();
const int CORE_MP = Hardware::getCoreCountMP();
// TODO SliceGMHOST grid
// TODO SliceGMHOST grid
dim3 dg(MP, 6, 1);
dim3 db(CORE_MP, 2, 1);
Grid grid(dg, db);
// to remove once coded
{
Couts::redln("aie aie aie, your best grid won t build itself");
assert(false);
}
return grid;
}
};
}
}
/*----------------------------------------------------------------------*\
|* End *|