30 #include "exahype2/CellData.h"
31 #include "kernels/AderSolver/BufferSizes.h"
32 #include "kernels/AderSolver/FaceIntegral.h"
33 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
34 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
35 #include "kernels/AderSolver/RiemannSolver.h"
36 #include "repositories/SolverRepository.h"
47 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
51 tarch::timing::Measurement& measurement
56 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
57 &myFaceData->
QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
58 &myFaceData->
QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
60 &myFaceData->
QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
62 &myFaceData->
QIn[faceId][DIMENSIONS + 0][0],
63 &myFaceData->
QIn[faceId][DIMENSIONS + 1][0]
66 &myFaceData->
QIn[faceId][DIMENSIONS + 2][0]
70 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
71 &myFaceData->
QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
72 &myFaceData->
QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
74 &myFaceData->
QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
76 &myFaceData->
QOut[faceId][DIMENSIONS + 0][0],
77 &myFaceData->
QOut[faceId][DIMENSIONS + 1][0]
80 &myFaceData->
QOut[faceId][DIMENSIONS + 2][0]
84 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
86 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
87 repositories::instanceOfAderSolver,
90 myCellData->QIn[cellId],
91 myCellData->cellCentre[cellId],
92 myCellData->cellSize[cellId],
97 watchKernelCompute.stop();
98 measurement.setValue(watchKernelCompute.getCalendarTime());
103 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
107 tarch::timing::Measurement& measurement
114 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
115 &myFaceData->
QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
116 &myFaceData->
QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
118 &myFaceData->
QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
120 &myFaceData->
QIn[faceId][DIMENSIONS + 0][0],
121 &myFaceData->
QIn[faceId][DIMENSIONS + 1][0]
124 &myFaceData->
QIn[faceId][DIMENSIONS + 2][0]
128 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
129 &myFaceData->
QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
130 &myFaceData->
QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
132 &myFaceData->
QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
134 &myFaceData->
QOut[faceId][DIMENSIONS + 0][0],
135 &myFaceData->
QOut[faceId][DIMENSIONS + 1][0]
138 &myFaceData->
QOut[faceId][DIMENSIONS + 2][0]
142 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
144 for (
int d = 0; d < DIMENSIONS; d++) {
145 const int direction = d;
147 kernels::AderSolver::riemannSolver<SolverPrecision>(
148 repositories::instanceOfAderSolver,
149 &myFaceData->
QOut[faceId][d][0],
150 &myFaceData->
QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
151 &myFaceData->
QIn[faceId][d][0],
152 &myFaceData->
QIn[faceId][d][kernels::AderSolver::getBndFaceSize()],
153 0.5 * (myFaceData->
t[faceId] + myFaceData->
t[faceId]),
154 0.5 * (myFaceData->
dt[faceId] + myFaceData->
dt[faceId]),
162 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
164 kernels::AderSolver::faceIntegral(
165 myCellData->QIn[cellId],
166 &myFaceData->
QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
173 tarch::la::Vector<DIMENSIONS, double> faceCentre = myFaceData->
cellCentre[faceId];
174 faceCentre[d] += 0.5 * myFaceData->
cellSize[faceId][d];
176 kernels::AderSolver::riemannSolver<SolverPrecision>(
177 repositories::instanceOfAderSolver,
178 &myFaceData->
QOut[faceId][d + DIMENSIONS][0],
179 &myFaceData->
QOut[faceId][d + DIMENSIONS][kernels::AderSolver::getBndFluxSize()],
180 &myFaceData->
QIn[faceId][d + DIMENSIONS][0],
181 &myFaceData->
QIn[faceId][d + DIMENSIONS][kernels::AderSolver::getBndFaceSize()],
182 0.5 * (myFaceData->
t[faceId] + myFaceData->
t[faceId]),
183 0.5 * (myFaceData->
dt[faceId] + myFaceData->
dt[faceId]),
191 faceCentre[d] -= myFaceData->
cellSize[faceId][d];
193 kernels::AderSolver::faceIntegral(
194 myCellData->QIn[cellId],
195 &myFaceData->
QOut[faceId][d + DIMENSIONS][0],
204 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
205 repositories::instanceOfAderSolver,
206 myCellData->QIn[cellId],
207 myCellData->cellCentre[cellId],
208 myCellData->cellSize[cellId],
213 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
214 repositories::instanceOfAderSolver,
217 myCellData->QIn[cellId],
218 myCellData->cellCentre[cellId],
219 myCellData->cellSize[cellId],
224 watchKernelCompute.stop();
225 measurement.setValue(watchKernelCompute.getCalendarTime());
233 const tarch::la::Vector<DIMENSIONS, double>
cellCenter,
234 const tarch::la::Vector<DIMENSIONS, double>
cellSize
239 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
242 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
243 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
245 tarch::MemoryLocation::Heap
249 cellData.QOut[cellIndex] =
nullptr;
251 cellData.cellSize[cellIndex] =
cellSize;
252 cellData.maxEigenvalue[cellIndex] = 0.0;
257 for (
int i = 0; i < 2 * DIMENSIONS; i++) {
258 cellFaceData.
QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259 2 * kernels::AderSolver::getBndFaceSize(),
260 tarch::MemoryLocation::Heap
262 cellFaceData.
QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
263 2 * kernels::AderSolver::getBndFluxSize(),
264 tarch::MemoryLocation::Heap
276 int numberOfThreads = 1;
278 #if defined(WITH_OPENMP)
279 for (
int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
280 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
284 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
289 tarch::timing::Measurement timingKernelLaunch;
291 for (
int sample = 0; sample <= NumberOfSamples; sample++) {
293 #if defined(WITH_OPENMP)
294 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
296 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
300 for (
int d = 0; d < DIMENSIONS; d++) {
303 &cellFaceData.
QIn[cellIndex][d][kernels::AderSolver::getBndFaceSize()],
304 kernels::AderSolver::getBndFaceSize(),
305 &cellFaceData.
QIn[cellIndex][d + DIMENSIONS][kernels::AderSolver::getBndFaceSize()]
308 &cellFaceData.
QOut[cellIndex][d][kernels::AderSolver::getBndFluxSize()],
309 kernels::AderSolver::getBndFluxSize(),
310 &cellFaceData.
QOut[cellIndex][d + DIMENSIONS][kernels::AderSolver::getBndFluxSize()]
315 &cellFaceData.
QIn[cellIndex][d + DIMENSIONS][0],
316 kernels::AderSolver::getBndFaceSize(),
317 &cellFaceData.
QIn[cellIndex][d][0]
320 &cellFaceData.
QOut[cellIndex][d + DIMENSIONS][0],
321 kernels::AderSolver::getBndFluxSize(),
322 &cellFaceData.
QOut[cellIndex][d][0]
327 tarch::timing::Watch watchKernelLaunch(
"::runBenchmarks",
"assessKernel(...)",
false);
329 #if defined(WITH_OPENMP)
330 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
332 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
333 cellData.maxEigenvalue[cellIndex] = 0.0;
337 watchKernelLaunch.stop();
338 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
346 #if defined(WITH_OPENMP)
350 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
351 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
352 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
354 for (
int i = 0; i < 2 * DIMENSIONS; i++) {
355 tarch::freeMemory(cellFaceData.
QIn[cellIndex][i], tarch::MemoryLocation::Heap);
356 tarch::freeMemory(cellFaceData.
QOut[cellIndex][i], tarch::MemoryLocation::Heap);
void runKernels(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
constexpr int NumberOfOutputEntriesPerCell
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
tarch::logging::Log _log
This is variant 1 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
tarch::logging::Log _log
This is variant 2 of the fused kernels.
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
tarch::la::Vector< DIMENSIONS, double > * cellSize
tarch::la::Vector< DIMENSIONS, double > * cellCentre
outType *(* QOut)[2 *DIMENSIONS]
Out values.
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.