28 #include "exahype2/CellData.h"
29 #include "kernels/AderSolver/BufferSizes.h"
30 #include "kernels/AderSolver/FaceIntegral.h"
31 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
32 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
33 #include "kernels/AderSolver/RiemannSolver.h"
34 #include "repositories/SolverRepository.h"
45 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
49 tarch::timing::Measurement& measurement
54 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
55 &myFaceData->
QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
56 &myFaceData->
QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
58 &myFaceData->
QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
60 &myFaceData->
QIn[faceId][DIMENSIONS + 0][0],
61 &myFaceData->
QIn[faceId][DIMENSIONS + 1][0]
64 &myFaceData->
QIn[faceId][DIMENSIONS + 2][0]
68 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
69 &myFaceData->
QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
70 &myFaceData->
QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
72 &myFaceData->
QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
74 &myFaceData->
QOut[faceId][DIMENSIONS + 0][0],
75 &myFaceData->
QOut[faceId][DIMENSIONS + 1][0]
78 &myFaceData->
QOut[faceId][DIMENSIONS + 2][0]
82 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
84 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
85 repositories::instanceOfAderSolver,
88 myCellData->QIn[cellId],
89 myCellData->cellCentre[cellId],
90 myCellData->cellSize[cellId],
95 watchKernelCompute.stop();
96 measurement.setValue(watchKernelCompute.getCalendarTime());
102 const int faceDirection,
103 tarch::timing::Measurement& measurement
106 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
108 tarch::la::Vector<DIMENSIONS, double> faceCenter = myFaceData->
cellCentre[faceId];
109 faceCenter[faceDirection % DIMENSIONS] += 0.5 * myFaceData->
cellSize[faceId][faceDirection % DIMENSIONS];
115 kernels::AderSolver::riemannSolver<SolverPrecision>(
116 repositories::instanceOfAderSolver,
117 &myFaceData->
QOut[faceId][faceDirection][0],
118 &myFaceData->
QOut[faceId][faceDirection][kernels::AderSolver::getBndFluxSize()],
119 &myFaceData->
QIn[faceId][faceDirection][0],
120 &myFaceData->
QIn[faceId][faceDirection][kernels::AderSolver::getBndFaceSize()],
121 myFaceData->
t[faceId],
122 myFaceData->
dt[faceId],
130 watchKernelCompute.stop();
131 measurement.setValue(watchKernelCompute.getCalendarTime());
135 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
139 tarch::timing::Measurement& measurement
144 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
145 &myFaceData->
QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
146 &myFaceData->
QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
148 &myFaceData->
QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
150 &myFaceData->
QIn[faceId][DIMENSIONS + 0][0],
151 &myFaceData->
QIn[faceId][DIMENSIONS + 1][0]
154 &myFaceData->
QIn[faceId][DIMENSIONS + 2][0]
158 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
159 &myFaceData->
QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
160 &myFaceData->
QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
162 &myFaceData->
QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
164 &myFaceData->
QOut[faceId][DIMENSIONS + 0][0],
165 &myFaceData->
QOut[faceId][DIMENSIONS + 1][0]
168 &myFaceData->
QOut[faceId][DIMENSIONS + 2][0]
172 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
174 for (
int d = 0; d < DIMENSIONS; d++) {
175 const int direction = d;
177 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
179 kernels::AderSolver::faceIntegral(
180 myCellData->QIn[cellId],
181 &myFaceData->
QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
189 kernels::AderSolver::faceIntegral(
190 myCellData->QIn[cellId],
191 &myFaceData->
QOut[faceId][d + DIMENSIONS][0],
200 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
201 repositories::instanceOfAderSolver,
202 myCellData->QIn[cellId],
203 myCellData->cellCentre[cellId],
204 myCellData->cellSize[cellId],
209 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
210 repositories::instanceOfAderSolver,
213 myCellData->QIn[cellId],
214 myCellData->cellCentre[cellId],
215 myCellData->cellSize[cellId],
220 watchKernelCompute.stop();
221 measurement.setValue(watchKernelCompute.getCalendarTime());
229 const tarch::la::Vector<DIMENSIONS, double>
cellCenter,
230 const tarch::la::Vector<DIMENSIONS, double>
cellSize
235 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
238 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
239 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
241 tarch::MemoryLocation::Heap
245 cellData.QOut[cellIndex] =
nullptr;
247 cellData.cellSize[cellIndex] =
cellSize;
248 cellData.maxEigenvalue[cellIndex] = 0.0;
253 for (
int i = 0; i < DIMENSIONS; i++) {
254 cellFaceData.
QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
255 2 * kernels::AderSolver::getBndFaceSize(),
256 tarch::MemoryLocation::Heap
258 cellFaceData.
QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259 2 * kernels::AderSolver::getBndFluxSize(),
260 tarch::MemoryLocation::Heap
264 for (
int i = 0; i < DIMENSIONS; i++) {
265 cellFaceData.
QIn[cellIndex][i + DIMENSIONS] = cellFaceData.
QIn[cellIndex][i];
266 cellFaceData.
QOut[cellIndex][i + DIMENSIONS] = cellFaceData.
QOut[cellIndex][i];
277 int numberOfThreads = 1;
279 #if defined(WITH_OPENMP)
280 for (
int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
281 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
285 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
290 tarch::timing::Measurement timingKernelLaunch;
292 for (
int sample = 0; sample <= NumberOfSamples; sample++) {
294 tarch::timing::Watch watchKernelLaunch(
"::runBenchmarks",
"assessKernel(...)",
false);
296 #if defined(WITH_OPENMP)
297 #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])
299 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
306 #if defined(WITH_OPENMP)
307 #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])
309 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
310 cellData.maxEigenvalue[cellIndex] = 0.0;
314 watchKernelLaunch.stop();
315 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
323 #if defined(WITH_OPENMP)
327 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
328 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
329 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
331 for (
int i = 0; i < DIMENSIONS; i++) {
332 tarch::freeMemory(cellFaceData.
QIn[cellIndex][i], tarch::MemoryLocation::Heap);
333 tarch::freeMemory(cellFaceData.
QOut[cellIndex][i], tarch::MemoryLocation::Heap);
void firstTask(exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int faceId, const int faceDirection, tarch::timing::Measurement &measurement)
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
constexpr int NumberOfOutputEntriesPerCell
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
tarch::logging::Log _log
This is variant 1 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
tarch::logging::Log _log
This is variant 3 of the fused kernels.
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
tarch::la::Vector< DIMENSIONS, double > * cellSize
tarch::la::Vector< DIMENSIONS, double > * cellCentre
outType *(* QOut)[2 *DIMENSIONS]
Out values.
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.