Compare commits

...

3 Commits

Author SHA1 Message Date
dcd3df8f89 feat(lab01): add Slice GM Host lab 2025-10-18 16:39:18 +02:00
f800b4f395 feat(warmup): add solutions 2025-10-18 16:37:06 +02:00
f8ae49d666 feat(omp): add lab omp realised during classroom 2025-10-18 16:36:29 +02:00
21 changed files with 223 additions and 130 deletions

View File

@@ -22,12 +22,21 @@ static __device__ float f(float x);
* tabGM est un tableau promu, qui a autant de case que de thread * tabGM est un tableau promu, qui a autant de case que de thread
* </pre> * </pre>
*/ */
__global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice) __global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice) {
{
const int NB_THREAD = Thread2D::nbThread(); const int NB_THREAD = Thread2D::nbThread();
const int TID = Thread2D::tid(); const int TID = Thread2D::tid();
// TODO SliceGMHOST const float delta_x = 1.f / (float)nbSlice;
int s = TID;
tabGM[TID] = 0.f;
while(s < nbSlice) {
float xi = s * delta_x;
tabGM[TID] += f(xi);
s+=NB_THREAD;
}
// Conseils : // Conseils :
// //
@@ -43,13 +52,10 @@ __global__ void reductionIntraThreadGMHOST(float* tabGM , int nbSlice)
|* Private *| |* Private *|
\*-------------------------------------*/ \*-------------------------------------*/
__device__ float f(float x) __device__ float f(float x) {
{ return 4.f / (1.f + x * x);
// TODO SliceGMHOST
return -1;
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/

View File

@@ -29,19 +29,18 @@ extern __global__ void reductionIntraThreadGMHOST(float* tabGM,int nbSlice);
SliceGMHOST::SliceGMHOST(Grid grid , int nbSlice , double* ptrPiHat , bool isVerbose) : SliceGMHOST::SliceGMHOST(Grid grid , int nbSlice , double* ptrPiHat , bool isVerbose) :
RunnableGPU(grid, "SliceGM_HOST_" + to_string(nbSlice), isVerbose), // classe parente RunnableGPU(grid, "SliceGM_HOST_" + to_string(nbSlice), isVerbose), // classe parente
// nbSlice(nbSlice),
nbSlice(nbSlice), // ptrPiHat(ptrPiHat)
ptrPiHat(ptrPiHat) //
{ {
this->nTabGM = -1; // TODO SliceGMHOST // le nombre de case de tabGM. Indication : grid.threadCounts() donne le nombre de thread ed la grille
this->sizeTabGM = -1; // TODO SliceGMHOST // la taille en octet de tabGM [octet]
// TODO SliceGMHOST this->nTabGM = grid.threadCounts(); // le nombre de case de tabGM. Indication : grid.threadCounts() donne le nombre de thread de la grille
this->sizeTabGM = nTabGM * sizeof(float); // la taille en octet de tabGM [octet]
GM::malloc(&tabGM, sizeTabGM);
} }
SliceGMHOST::~SliceGMHOST(void) SliceGMHOST::~SliceGMHOST(void) {
{ GM::free(tabGM);
// TODO SliceGMHOST
} }
/*--------------------------------------*\ /*--------------------------------------*\
@@ -60,9 +59,9 @@ SliceGMHOST::~SliceGMHOST(void)
* *
* </pre> * </pre>
*/ */
void SliceGMHOST::run() void SliceGMHOST::run() {
{
// TODO SliceGMHOST // call the kernel reductionIntraThreadGMHOST<<<dg, db>>>(tabGM, nbSlice);
// Indication: // Indication:
// dg et db sont stokcer dans la classe parente // dg et db sont stokcer dans la classe parente
@@ -88,7 +87,15 @@ void SliceGMHOST::reductionHost()
// TODO SliceGMHOST // TODO SliceGMHOST
float tab[nTabGM];
GM::memcpyDToH(tab, tabGM, sizeTabGM);
for (int i = 0; i < nTabGM; i++) {
*ptrPiHat += (double)tab[i];
} }
const double delta_x = 1.f / (float)nbSlice;
*ptrPiHat *= delta_x;
}
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|

View File

@@ -11,28 +11,26 @@
|* Impelmentation *| |* Impelmentation *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/
namespace sliceGMHost namespace sliceGMHost {
{
class BestGrid class BestGrid {
{
public: public:
static Grid get() static Grid get() {
{
const int MP = Hardware::getMPCount(); const int MP = Hardware::getMPCount();
const int CORE_MP = Hardware::getCoreCountMP();
// TODO SliceGMHOST grid dim3 dg(MP, 1, 1);
dim3 db(CORE_MP, 2, 1); // produit <=1024
Grid grid(dg, db);
return grid;
// to remove once coded
{
Couts::redln("aie aie aie, your best grid won t build itself");
assert(false);
}
} }
}; };
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\

View File

@@ -29,7 +29,7 @@ int main(int argc , char** argv)
// public // public
{ {
cudaContext.deviceId = 0; // in [0,2] width Server Cuda3 cudaContext.deviceId = 0; // in [0,2] width Server Cuda3
cudaContext.launchMode = LaunchModeMOO::USE; // USE TEST BENCHMARK FORCEBRUT cudaContext.launchMode = LaunchModeMOO::TEST; // USE TEST BENCHMARK FORCEBRUT
cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT LOAD_ALL cudaContext.deviceDriver = DeviceDriver::LOAD_ALL; // LOAD_CURRENT LOAD_ALL
cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE; // NONE ALL ALL_SIMPLE CURRENT cudaContext.deviceInfo = DeviceInfo::ALL_SIMPLE; // NONE ALL ALL_SIMPLE CURRENT
@@ -49,4 +49,3 @@ int main(int argc , char** argv)
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/

View File

@@ -182,4 +182,3 @@ void addvectorTristream()
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/

View File

@@ -55,8 +55,8 @@ int mainTest()
void slice() void slice()
{ {
VTSliceGMHOST test1; VTSliceGMHOST test1;
VTSliceGM test2; // VTSliceGM test2;
VTSliceSM test3; // VTSliceSM test3;
test1.run(); test1.run();
@@ -96,4 +96,3 @@ void vectorStream()
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/

View File

@@ -67,8 +67,8 @@ int mainUse()
void slice(bool& isOk) void slice(bool& isOk)
{ {
SliceGmHostUse sliceGmHostUse(IS_VERBOSE); SliceGmHostUse sliceGmHostUse(IS_VERBOSE);
SliceGmUse sliceGmUse(IS_VERBOSE); // SliceGmUse sliceGmUse(IS_VERBOSE);
SliceSmUse sliceSmUse(IS_VERBOSE); // SliceSmUse sliceSmUse(IS_VERBOSE);
isOk &= sliceGmHostUse.isOk(IS_VERBOSE); isOk &= sliceGmHostUse.isOk(IS_VERBOSE);
// isOk &= sliceGmUse.isOk(IS_VERBOSE); // isOk &= sliceGmUse.isOk(IS_VERBOSE);
@@ -213,4 +213,3 @@ void print(bool isSuccess)
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/

View File

@@ -9,6 +9,7 @@
#include "cudas.h" #include "cudas.h"
#include "GM.h" #include "GM.h"
#include "Kernel.h" #include "Kernel.h"
#include "Hardware.h"
using std::cout; using std::cout;
using std::endl; using std::endl;
@@ -32,9 +33,34 @@ static __global__ void kaddArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW
* ptrW receptionne le resultat * ptrW receptionne le resultat
* n nombre de case * n nombre de case
*/ */
__host__ bool addArray_procedurale(float* ptrV1 , float* ptrV2 , float* ptrW , int n) // __host__ facultatif __host__ void addArray_procedurale(float* ptrV1 , float* ptrV2 , float* ptrW , int n) // __host__ facultatif
{ {
// TODO addArray size_t sizeVector = sizeof(float) * n;
float* ptrGMV1;
float* ptrGMV2;
float* ptrGMW;
GM::malloc(&ptrGMV1, sizeVector);
GM::malloc(&ptrGMV2, sizeVector);
GM::malloc(&ptrGMW, sizeVector);
GM::memcpyHToD(ptrGMV1, ptrV1, sizeVector);
GM::memcpyHToD(ptrGMV2, ptrV2, sizeVector);
const int MP = Hardware::getMPCount();
const int CORE_MP = Hardware::getCoreCountMP();
dim3 dg(MP, 3, 1);
dim3 db(CORE_MP, 2, 1); // <=1024
kaddArray<<<dg,db>>>( ptrGMV1 , ptrGMV2 , ptrGMW , n);
GM::memcpyDToH(ptrW, ptrGMW, sizeVector);
GM::free(ptrGMV1);
GM::free(ptrGMV2);
GM::free(ptrGMW);
} }
/*--------------------------------------*\ /*--------------------------------------*\
@@ -50,8 +76,12 @@ __global__ void kaddArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW , int
const int TID = Thread2D::tid(); const int TID = Thread2D::tid();
// pattern entrelacement // pattern entrelacement
int s = TID;
// TODO addArray while (s < n)
{
ptrGMW[s] = ptrGMV1[s] + ptrGMV2[s];
s += NB_THREAD;
}
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\

View File

@@ -37,16 +37,17 @@ bool exemple_addArray_procedurale()
ArrayTools::print(ptrV2, n); ArrayTools::print(ptrV2, n);
cout << "--------------------------------------------------------------------" << endl; cout << "--------------------------------------------------------------------" << endl;
ArrayTools::print(ptrW, n); ArrayTools::print(ptrW, n);
cout<<endl;
} }
// Delete // Delete
{ {
delete ptrV1; delete ptrV1;
delete ptrV1; delete ptrV2;
delete ptrW; delete ptrW;
} }
return isOk; return true;
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\

View File

@@ -11,23 +11,18 @@
/** /**
* output : void required, because kernel is asynchrone * output : void required, because kernel is asynchrone
*/ */
__global__ void addArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW , int n) __global__ void addArray(float* ptrGMV1 , float* ptrGMV2 , float* ptrGMW , int n) {
{
const int NB_THREAD = Thread2D::nbThread(); const int NB_THREAD = Thread2D::nbThread();
const int TID = Thread2D::tid(); const int TID = Thread2D::tid();
// Debug, facultatif
// if (TID == 0)
// {
// printf("Coucou from device tid = %d", TID);
// }
// pattern entrelacement // pattern entrelacement
int s = TID;
// TODO addArray while (s < n) {
ptrGMW[s] = ptrGMV1[s] + ptrGMV2[s];
s += NB_THREAD;
}
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/

View File

@@ -12,7 +12,6 @@ using std::endl;
using std::to_string; using std::to_string;
using std::string; using std::string;
/*--------------------------------------*\ /*--------------------------------------*\
|* Imported *| |* Imported *|
\*-------------------------------------*/ \*-------------------------------------*/
@@ -33,25 +32,23 @@ AddArray::AddArray(const Grid& grid , float* ptrV1 , float* ptrV2 , float* ptrW
ptrW(ptrW), // ptrW(ptrW), //
n(n), // n(n), //
dg(grid.dg), // dg(grid.dg), //
db(grid.db) db(grid.db) {
{ this->sizeVector = sizeof(float) * n;
this->sizeVector = -1; // TODO addArray // octet
// MM (malloc Device) // MM (malloc Device)
{ {
GM::malloc(&ptrGMV1, sizeVector); GM::malloc(&ptrGMV1, sizeVector);
// TODO addArray GM::malloc(&ptrGMV2, sizeVector);
GM::malloc(&ptrGMW, sizeVector);
} }
} }
AddArray::~AddArray() AddArray::~AddArray() {
{
//MM (device free) //MM (device free)
{
GM::free(ptrGMV1); GM::free(ptrGMV1);
// TODO addArray GM::free(ptrGMV2);
} GM::free(ptrGMW);
} }
/*--------------------------------------*\ /*--------------------------------------*\
@@ -61,21 +58,21 @@ AddArray::~AddArray()
/** /**
* override * override
*/ */
void AddArray::run() void AddArray::run() {
{
// MM (copy Host->Device) // MM (copy Host->Device)
{ {
GM::memcpyHToD(ptrGMV1, ptrV1, sizeVector); GM::memcpyHToD(ptrGMV1, ptrV1, sizeVector);
// TODO addArray GM::memcpyHToD(ptrGMV2, ptrV2, sizeVector);
} }
// TODO addArray // call kernel // assynchrone
addArray<<<dg,db>>>( ptrGMV1 , ptrGMV2 , ptrGMW , n);
//Kernel::synchronize();// inutile //Kernel::synchronize();// inutile
// MM (Device -> Host) // MM (Device -> Host)
{ {
// TODO addArray // MM barier de synchronisation implicite GM::memcpyDToH(ptrW, ptrGMW, sizeVector);
} }
} }

View File

@@ -9,8 +9,7 @@
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/
class AddArray class AddArray {
{
/*--------------------------------------*\ /*--------------------------------------*\
|* Constructor *| |* Constructor *|
\*-------------------------------------*/ \*-------------------------------------*/

View File

@@ -45,12 +45,13 @@ bool exemple_addArray_object()
ArrayTools::print(ptrV2, n); ArrayTools::print(ptrV2, n);
cout << "--------------------------------------------------------------------" << endl; cout << "--------------------------------------------------------------------" << endl;
ArrayTools::print(ptrW, n); ArrayTools::print(ptrW, n);
cout<<endl;
} }
// Delete // Delete
{ {
delete ptrV1; delete ptrV1;
delete ptrV1; delete ptrV2;
delete ptrW; delete ptrW;
} }
@@ -69,16 +70,10 @@ static Grid createGrid()
const int MP = Hardware::getMPCount(); const int MP = Hardware::getMPCount();
const int CORE_MP = Hardware::getCoreCountMP(); const int CORE_MP = Hardware::getCoreCountMP();
dim3 dg(1, 1, 1); // TODO addArray dim3 dg(MP, 2, 1);
dim3 db(1, 1, 1); // TODO addArray // produit <=1024 dim3 db(CORE_MP, 3, 1); // produit <=1024
Grid grid(dg, db); Grid grid(dg, db);
// to remove once coded
{
Couts::redln("aie aie aie, your best grid won t build itself");
assert(false);
}
return grid; return grid;
} }

View File

@@ -45,12 +45,13 @@ bool exemple_addArray_11()
ArrayTools::print(ptrV2, n); ArrayTools::print(ptrV2, n);
cout << "--------------------------------------------------------------------" << endl; cout << "--------------------------------------------------------------------" << endl;
ArrayTools::print(ptrW, n); ArrayTools::print(ptrW, n);
cout<<endl;
} }
// Delete // Delete
{ {
delete ptrV1; delete ptrV1;
delete ptrV1; delete ptrV2;
delete ptrW; delete ptrW;
} }

View File

@@ -2,6 +2,7 @@
#include <stdlib.h> #include <stdlib.h>
#include "Couts.h" #include "Couts.h"
#include "Hardware.h"
using std::cerr; using std::cerr;
using std::cout; using std::cout;
@@ -29,18 +30,30 @@ static void scalar(bool& isOk);
int main(int argc , char** argv) int main(int argc , char** argv)
{ {
// Facultatif
{
Hardware::loadCudaDriverAll();
Hardware::printAllSimple();
const int DEVICE_ID = 0;
Hardware::setDevice(DEVICE_ID);
}
const int IS_VERBOSE = true; const int IS_VERBOSE = true;
// run
{
bool isOk = true; bool isOk = true;
// Commenter ce dont vous n'avez pas besoin ci-dessous // Commenter ce dont vous n'avez pas besoin ci-dessous
scalar(isOk); // commenter dans la methode ci-dessous ce que vous ne voulez pas lancer //scalar(isOk); // commenter dans la methode ci-dessous ce que vous ne voulez pas lancer
array(isOk); // commenter dans la methode ci-dessous ce que vous ne voulez pas lancer array(isOk); // commenter dans la methode ci-dessous ce que vous ne voulez pas lancer
Couts::statusln(isOk); Couts::statusln(isOk);
return isOk ? EXIT_SUCCESS : EXIT_FAILURE; return isOk ? EXIT_SUCCESS : EXIT_FAILURE;
} }
}
/*-------------------------------------*\ /*-------------------------------------*\
|* Private *| |* Private *|
@@ -56,7 +69,7 @@ static void array(bool& isOk)
{ {
isOk &= exemple_addArray_procedurale(); isOk &= exemple_addArray_procedurale();
isOk &= exemple_addArray_object(); isOk &= exemple_addArray_object();
isOk &= exemple_addArray_11(); // isOk &= exemple_addArray_11();
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\

View File

@@ -34,14 +34,14 @@ bool isPiSequentiel_OK(int n)
\*-------------------------------------*/ \*-------------------------------------*/
double piSequentiel(int n) { double piSequentiel(int n) {
const double delta_x = 1 / (double)n; const double dx = 1.0 / (double)n;
double sum = 0; double sum = 0;
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
double xi = i * delta_x; double xi = i * dx;
sum += fpi(xi); sum += fpi(xi);
} }
return sum * delta_x; return sum * dx;
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\

View File

@@ -45,16 +45,14 @@ double piOMPEntrelacerPromotionTab(int n)
double sum[NB_THREAD]; double sum[NB_THREAD];
// Reduction intra thread // Reduction intra thread
#pragma omp parallel #pragma omp parallel {
{
const int TID = Omps::getTid(); const int TID = Omps::getTid();
int s = TID; int s = TID;
double sum_thread = 0; double sum_thread = 0;
while (s < n) while (s < n) {
{
double xi = s * delta_x; double xi = s * delta_x;
sum_thread += fpi(xi); sum_thread += fpi(xi);
s += NB_THREAD; s += NB_THREAD;

View File

@@ -37,8 +37,29 @@ bool isPiOMPEntrelacerCritical_Ok(int n)
double piOMPEntrelacerCritical(int n) double piOMPEntrelacerCritical(int n)
{ {
//TODO const double dx = 1.0 / (double)n;
return -1; const int NB_THREAD = Omps::setAndGetNaturalGranularity();
int total = 0;
#pragma omp parallel
{
const int TID = Omps::getTid();
int s = TID;
double sum_thread = 0;
while (s < n)
{
double xi = s*dx;
sum_thread += fpi(xi);
s+= NB_THREAD;
}
#pragma omp critical
{
total += sum_thread;
}
}
return total * dx;
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\

View File

@@ -40,8 +40,26 @@ bool isPiOMPEntrelacerAtomic_Ok(int n)
*/ */
double piOMPEntrelacerAtomic(int n) double piOMPEntrelacerAtomic(int n)
{ {
// TODO const double dx = 1.0 / (double)n;
return -1; const int NB_THREAD = Omps::setAndGetNaturalGranularity();
int total = 0;
#pragma omp parallel
{
const int TID = Omps::getTid();
int s = TID;
double sum_thread = 0;
while (s < n)
{
double xi = s * dx;
sum_thread += fpi(xi);
s += NB_THREAD;
}
#pragma omp atomic
total += sum_thread;
}
return total * dx;
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\

View File

@@ -43,7 +43,18 @@ bool isPiOMPforPromotionTab_Ok(int n)
*/ */
double piOMPforPromotionTab(int n) double piOMPforPromotionTab(int n)
{ {
//TODO const double dx = 1.0 / (double)n;
const int NB_THREAD = Omps::setAndGetNaturalGranularity();
double total[NB_THREAD] = {0.0};
#pragma omp parallel for
for(int i = 0; i < n; i++) {
const int TID = Omps::getTid();
// double xi = s*dx;
// total[TID] += fpi(xi);
}
return -1; return -1;
} }

View File

@@ -41,11 +41,18 @@ bool isPiOMPforReduction_Ok(int n)
*/ */
double piOMPforReduction(int n) double piOMPforReduction(int n)
{ {
//TODO const double dx = 1.0 / (double)n;
return -1; double sum = 0;
#pragma omp parallel for reduction(+:sum)
for (int i = 0; i < n; i++)
{
double xi = i * dx;
sum += fpi(xi);
}
return sum * dx;
} }
/*----------------------------------------------------------------------*\ /*----------------------------------------------------------------------*\
|* End *| |* End *|
\*---------------------------------------------------------------------*/ \*---------------------------------------------------------------------*/