d4/da1/Variant3_8cpp_source.html

 #include "CellFaceData.h"

 #include "exahype2/CellData.h"

 #include "kernels/AderSolver/BufferSizes.h"

 #include "kernels/AderSolver/FaceIntegral.h"

 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"

 #include "kernels/AderSolver/MaxScaledEigenvalue.h"

 #include "kernels/AderSolver/RiemannSolver.h"

 #include "repositories/SolverRepository.h"

 #include "Utils.h"

 #include "Variants.h"


 #include <cstring>


 tarch::logging::Log variant3::_log("variants3::");


 using namespace benchmarks::exahype2::kernelbenchmarks;


 inline void initialTask(

   exahype2::CellData<SolverPrecision, SolverPrecision>*     myCellData,

   exahype2::CellFaceData<SolverPrecision, SolverPrecision>* myFaceData,

   const int                               cellId,

   const int                               faceId,

   tarch::timing::Measurement&             measurement

 ) {


   // Assume that the faces for a given cell are one after another

   // in typical Peano order, e.g. left, lower, right, upper

   SolverPrecision* lQhbnd[2 * DIMENSIONS] = {

     &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],

     &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],

 #if DIMENSIONS == 3

     &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],

 #endif

     &myFaceData->QIn[faceId][DIMENSIONS + 0][0],

     &myFaceData->QIn[faceId][DIMENSIONS + 1][0]

 #if DIMENSIONS == 3

     ,

     &myFaceData->QIn[faceId][DIMENSIONS + 2][0]

 #endif

   };


   SolverPrecision* lFhbnd[2 * DIMENSIONS] = {

     &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],

     &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],

 #if DIMENSIONS == 3

     &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],

 #endif

     &myFaceData->QOut[faceId][DIMENSIONS + 0][0],

     &myFaceData->QOut[faceId][DIMENSIONS + 1][0]

 #if DIMENSIONS == 3

     ,

     &myFaceData->QOut[faceId][DIMENSIONS + 2][0]

 #endif

   };


   tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);


   int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(

     repositories::instanceOfAderSolver,

     lQhbnd,

     lFhbnd,

     myCellData->QIn[cellId],

     myCellData->cellCentre[cellId],

     myCellData->cellSize[cellId],

     myCellData->t,

     myCellData->dt

   );


   watchKernelCompute.stop();

   measurement.setValue(watchKernelCompute.getCalendarTime());

 }


 inline void firstTask(

   exahype2::CellFaceData<SolverPrecision, SolverPrecision>* myFaceData,

   const int                               faceId,

   const int                               faceDirection,

   tarch::timing::Measurement&             measurement

 ) {


   tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);


   tarch::la::Vector<DIMENSIONS, double> faceCenter = myFaceData->cellCentre[faceId];

   faceCenter[faceDirection % DIMENSIONS] += 0.5 * myFaceData->cellSize[faceId][faceDirection % DIMENSIONS];


   // We are assuming that there are no boundaries okay

   // In a real ExaHyPE application boundaries will exist

   // but we can still assume that the corresponding faces

   // will have received data

   kernels::AderSolver::riemannSolver<SolverPrecision>(

     repositories::instanceOfAderSolver,

     &myFaceData->QOut[faceId][faceDirection][0],

     &myFaceData->QOut[faceId][faceDirection][kernels::AderSolver::getBndFluxSize()],

     &myFaceData->QIn[faceId][faceDirection][0],

     &myFaceData->QIn[faceId][faceDirection][kernels::AderSolver::getBndFaceSize()],

     myFaceData->t[faceId],

     myFaceData->dt[faceId],

     faceCenter,

     myFaceData->cellSize[faceId],

     faceDirection,

     false,

     0

   );


   watchKernelCompute.stop();

   measurement.setValue(watchKernelCompute.getCalendarTime());

 }


 inline void secondTask(

   exahype2::CellData<SolverPrecision, SolverPrecision>*     myCellData,

   exahype2::CellFaceData<SolverPrecision, SolverPrecision>* myFaceData,

   const int                               cellId,

   const int                               faceId,

   tarch::timing::Measurement&             measurement

 ) {


   // Assume that the faces for a given cell are one after another

   // in typical Peano order, e.g. left, lower, right, upper

   SolverPrecision* lQhbnd[2 * DIMENSIONS] = {

     &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],

     &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],

 #if DIMENSIONS == 3

     &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],

 #endif

     &myFaceData->QIn[faceId][DIMENSIONS + 0][0],

     &myFaceData->QIn[faceId][DIMENSIONS + 1][0]

 #if DIMENSIONS == 3

     ,

     &myFaceData->QIn[faceId][DIMENSIONS + 2][0]

 #endif

   };


   SolverPrecision* lFhbnd[2 * DIMENSIONS] = {

     &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],

     &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],

 #if DIMENSIONS == 3

     &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],

 #endif

     &myFaceData->QOut[faceId][DIMENSIONS + 0][0],

     &myFaceData->QOut[faceId][DIMENSIONS + 1][0]

 #if DIMENSIONS == 3

     ,

     &myFaceData->QOut[faceId][DIMENSIONS + 2][0]

 #endif

   };


   tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);


   for (int d = 0; d < DIMENSIONS; d++) {

     const int direction = d;


     const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];

     // Negative face

     kernels::AderSolver::faceIntegral(

       myCellData->QIn[cellId],

       &myFaceData->QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],

       direction,

       0,

       inverseDxDirection,

       myCellData->dt

     );


     // Positive face

     kernels::AderSolver::faceIntegral(

       myCellData->QIn[cellId],

       &myFaceData->QOut[faceId][d + DIMENSIONS][0],

       direction,

       1,

       inverseDxDirection,

       myCellData->dt

     );


   } // for d


   myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(

     repositories::instanceOfAderSolver,

     myCellData->QIn[cellId],

     myCellData->cellCentre[cellId],

     myCellData->cellSize[cellId],

     myCellData->t,

     myCellData->dt

   );


   int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(

     repositories::instanceOfAderSolver,

     lQhbnd,

     lFhbnd,

     myCellData->QIn[cellId],

     myCellData->cellCentre[cellId],

     myCellData->cellSize[cellId],

     myCellData->t,

     myCellData->dt

   );


   watchKernelCompute.stop();

   measurement.setValue(watchKernelCompute.getCalendarTime());

 }


 void variant3::runBenchmarks(

   int                                         numberOfCells,

   double                                      timeStamp,

   double                                      timeStepSize,

   const tarch::la::Vector<DIMENSIONS, double> cellCenter,

   const tarch::la::Vector<DIMENSIONS, double> cellSize

 ) {


   tarch::timing::Measurement timingComputeKernel;


   exahype2::CellData<SolverPrecision, SolverPrecision>     cellData(numberOfCells);

   exahype2::CellFaceData<SolverPrecision, SolverPrecision> cellFaceData(numberOfCells);


   for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {

     cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(

       NumberOfInputEntriesPerCell,

       tarch::MemoryLocation::Heap

     );

     cellData.t                        = timeStamp;

     cellData.dt                       = timeStepSize;

     cellData.QOut[cellIndex]          = nullptr;

     cellData.cellCentre[cellIndex]    = cellCenter;

     cellData.cellSize[cellIndex]      = cellSize;

     cellData.maxEigenvalue[cellIndex] = 0.0;


     initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);

     std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));


     for (int i = 0; i < DIMENSIONS; i++) {

       cellFaceData.QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(

         2 * kernels::AderSolver::getBndFaceSize(),

         tarch::MemoryLocation::Heap

       );

       cellFaceData.QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(

         2 * kernels::AderSolver::getBndFluxSize(),

         tarch::MemoryLocation::Heap

       );

     }

     // Stitch faces together by connecting the left and right faces of any given cell

     for (int i = 0; i < DIMENSIONS; i++) {

       cellFaceData.QIn[cellIndex][i + DIMENSIONS]  = cellFaceData.QIn[cellIndex][i];

       cellFaceData.QOut[cellIndex][i + DIMENSIONS] = cellFaceData.QOut[cellIndex][i];

     }


     cellFaceData.t[cellIndex]          = timeStamp;

     cellFaceData.dt[cellIndex]         = timeStepSize;

     cellFaceData.cellCentre[cellIndex] = cellCenter;

     cellFaceData.cellSize[cellIndex]   = cellSize;


     std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));

   }


   int numberOfThreads = 1;


   #if defined(WITH_OPENMP)

   for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {

     numberOfThreads = NumberOfLaunchingThreads[threadIndex];

   #endif


     // Initial task

     for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {

       initialTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);

     }


     timingComputeKernel.erase();

     tarch::timing::Measurement timingKernelLaunch;


     for (int sample = 0; sample <= NumberOfSamples; sample++) {


       tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);


       #if defined(WITH_OPENMP)

       #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])

       #endif

       for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {

         // We're not actually simulating a real grid, instead

         // we are making each cell it's own neighbour, as though

         // it were a whole periodic domain

         firstTask(&cellFaceData, cellIndex, 0, timingComputeKernel);

         firstTask(&cellFaceData, cellIndex, 1, timingComputeKernel);

       }

       #if defined(WITH_OPENMP)

       #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])

       #endif

       for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {

         cellData.maxEigenvalue[cellIndex] = 0.0;

         secondTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);

       }


       watchKernelLaunch.stop();

       timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());

     }


     reportRuntime("variant 3", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);

     // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);

     // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);


   #if defined(WITH_OPENMP)

   }//threadIndex

   #endif


   for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {

     tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);

     tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);


     for (int i = 0; i < DIMENSIONS; i++) {

       tarch::freeMemory(cellFaceData.QIn[cellIndex][i], tarch::MemoryLocation::Heap);

       tarch::freeMemory(cellFaceData.QOut[cellIndex][i], tarch::MemoryLocation::Heap);

     }

   }

 }

CellFaceData.h

Utils.h

firstTask
void firstTask(exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int faceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition: Variant3.cpp:99

secondTask
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant3.cpp:134

initialTask
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant3.cpp:44

Variants.h

timeStamp
constexpr double timeStamp
Definition: KernelBenchmarks-main.cpp:46

cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellCenter
Definition: KernelBenchmarks-main.cpp:49

cellSize
const tarch::la::Vector< DIMENSIONS, double > cellSize
Definition: KernelBenchmarks-main.cpp:51

timeStepSize
constexpr double timeStepSize
Definition: KernelBenchmarks-main.cpp:47

timingComputeKernel
tarch::timing::Measurement timingComputeKernel
Definition: KernelBenchmarks-main.cpp:84

benchmarks::exahype2::kernelbenchmarks
Definition: Utils.h:12

benchmarks::exahype2::kernelbenchmarks::NumberOfInputEntriesPerCell
constexpr int NumberOfInputEntriesPerCell
Definition: Utils.h:14

benchmarks::exahype2::kernelbenchmarks::reportRuntime
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition: Utils.h:68

benchmarks::exahype2::kernelbenchmarks::NumberOfOutputEntriesPerCell
constexpr int NumberOfOutputEntriesPerCell
Definition: Utils.h:20

benchmarks::exahype2::kernelbenchmarks::initInputData
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition: Utils.h:30

variant1::_log
tarch::logging::Log _log
This is variant 1 of the fused kernels.

variant3::runBenchmarks
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition: Variant3.cpp:225

variant3::_log
tarch::logging::Log _log
This is variant 3 of the fused kernels.

exahype2::CellFaceData
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
Definition: CellFaceData.h:20

exahype2::CellFaceData::cellSize
tarch::la::Vector< DIMENSIONS, double > * cellSize
Definition: CellFaceData.h:27

exahype2::CellFaceData::cellCentre
tarch::la::Vector< DIMENSIONS, double > * cellCentre
Definition: CellFaceData.h:26

exahype2::CellFaceData::dt
double * dt
Definition: CellFaceData.h:30

exahype2::CellFaceData::QOut
outType *(* QOut)[2 *DIMENSIONS]
Out values.
Definition: CellFaceData.h:60

exahype2::CellFaceData::QIn
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition: CellFaceData.h:25

exahype2::CellFaceData::t
double * t
Definition: CellFaceData.h:29