25 #include "exahype2/CellData.h"
26 #include "kernels/AderSolver/BufferSizes.h"
27 #include "kernels/AderSolver/FaceIntegral.h"
28 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
29 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
30 #include "kernels/AderSolver/RiemannSolver.h"
32 #include "repositories/SolverRepository.h"
43 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
47 tarch::timing::Measurement& measurement
54 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
55 myFaceData->
QIn[faceId + 0][1],
56 myFaceData->
QIn[faceId + 1][1],
58 myFaceData->
QIn[faceId + 2][1],
60 myFaceData->
QIn[faceId + DIMENSIONS + 0][0],
61 myFaceData->
QIn[faceId + DIMENSIONS + 1][0]
64 myFaceData->
QIn[faceId + DIMENSIONS + 2][0]
68 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
69 myFaceData->
QOut[faceId + 0][1],
70 myFaceData->
QOut[faceId + 1][1],
72 myFaceData->
QOut[faceId + 2][1],
74 myFaceData->
QOut[faceId + DIMENSIONS + 0][0],
75 myFaceData->
QOut[faceId + DIMENSIONS + 1][0]
78 myFaceData->
QOut[faceId + DIMENSIONS + 2][0]
82 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
84 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
85 repositories::instanceOfAderSolver,
88 myCellData->QIn[cellId],
89 myCellData->cellCentre[cellId],
90 myCellData->cellSize[cellId],
95 watchKernelCompute.stop();
96 measurement.setValue(watchKernelCompute.getCalendarTime());
101 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
105 tarch::timing::Measurement& measurement
112 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
113 myFaceData->
QIn[faceId + 0][1],
114 myFaceData->
QIn[faceId + 1][1],
116 myFaceData->
QIn[faceId + 2][1],
118 myFaceData->
QIn[faceId + DIMENSIONS + 0][0],
119 myFaceData->
QIn[faceId + DIMENSIONS + 1][0]
122 myFaceData->
QIn[faceId + DIMENSIONS + 2][0]
126 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
127 myFaceData->
QOut[faceId + 0][1],
128 myFaceData->
QOut[faceId + 1][1],
130 myFaceData->
QOut[faceId + 2][1],
132 myFaceData->
QOut[faceId + DIMENSIONS + 0][0],
133 myFaceData->
QOut[faceId + DIMENSIONS + 1][0]
136 myFaceData->
QOut[faceId + DIMENSIONS + 2][0]
140 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
142 for (
int d = 0; d < DIMENSIONS; d++) {
143 const int direction = d;
145 kernels::AderSolver::riemannSolver<SolverPrecision>(
146 repositories::instanceOfAderSolver,
147 myFaceData->
QOut[faceId + d][0],
148 myFaceData->
QOut[faceId + d][1],
149 myFaceData->
QIn[faceId + d][0],
150 myFaceData->
QIn[faceId + d][1],
151 myFaceData->
t[faceId + d],
152 myFaceData->
dt[faceId + d],
160 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
162 kernels::AderSolver::faceIntegral(
163 myCellData->QIn[cellId],
164 myFaceData->
QOut[faceId + d][1],
171 kernels::AderSolver::riemannSolver<SolverPrecision>(
172 repositories::instanceOfAderSolver,
173 myFaceData->
QOut[faceId + d + DIMENSIONS][0],
174 myFaceData->
QOut[faceId + d + DIMENSIONS][1],
175 myFaceData->
QIn[faceId + d + DIMENSIONS][0],
176 myFaceData->
QIn[faceId + d + DIMENSIONS][1],
177 myFaceData->
t[faceId + d + DIMENSIONS],
178 myFaceData->
dt[faceId + d + DIMENSIONS],
179 myFaceData->
faceCentre[faceId + d + DIMENSIONS],
180 myFaceData->
faceSize[faceId + d + DIMENSIONS],
187 kernels::AderSolver::faceIntegral(
188 myCellData->QIn[cellId],
189 myFaceData->
QOut[faceId + DIMENSIONS][0],
198 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
199 repositories::instanceOfAderSolver,
200 myCellData->QIn[cellId],
201 myCellData->cellCentre[cellId],
202 myCellData->cellSize[cellId],
207 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
208 repositories::instanceOfAderSolver,
211 myCellData->QIn[cellId],
212 myCellData->cellCentre[cellId],
213 myCellData->cellSize[cellId],
218 watchKernelCompute.stop();
219 measurement.setValue(watchKernelCompute.getCalendarTime());
227 const tarch::la::Vector<DIMENSIONS, double>
cellCenter,
228 const tarch::la::Vector<DIMENSIONS, double>
cellSize
233 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
236 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
237 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
239 tarch::MemoryLocation::Heap
243 cellData.QOut[cellIndex] =
nullptr;
245 cellData.cellSize[cellIndex] =
cellSize;
246 cellData.maxEigenvalue[cellIndex] = 0.0;
255 for (
int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
257 faceData.
QIn[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
258 kernels::AderSolver::getBndFaceSize(),
259 tarch::MemoryLocation::Heap
261 faceData.
QIn[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
262 kernels::AderSolver::getBndFaceSize(),
263 tarch::MemoryLocation::Heap
265 faceData.
QOut[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
266 kernels::AderSolver::getBndFluxSize(),
267 tarch::MemoryLocation::Heap
269 faceData.
QOut[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
270 kernels::AderSolver::getBndFluxSize(),
271 tarch::MemoryLocation::Heap
280 int numberOfThreads = 1;
282 #if defined(WITH_OPENMP)
283 for (
int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
284 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
288 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
293 cellIndex * 2 * DIMENSIONS,
300 tarch::timing::Measurement timingKernelLaunch;
302 for (
int sample = 0; sample <= NumberOfSamples; sample++) {
304 #if defined(WITH_OPENMP)
305 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
307 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
311 for (
int d = 0; d < DIMENSIONS; d++) {
314 faceData.
QIn[2 * DIMENSIONS * cellIndex + d][1],
315 kernels::AderSolver::getBndFaceSize(),
316 faceData.
QIn[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][1]
319 faceData.
QOut[2 * DIMENSIONS * cellIndex + d][1],
320 kernels::AderSolver::getBndFluxSize(),
321 faceData.
QOut[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][1]
326 faceData.
QIn[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][0],
327 kernels::AderSolver::getBndFaceSize(),
328 faceData.
QIn[2 * DIMENSIONS * cellIndex + d][0]
331 faceData.
QOut[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][0],
332 kernels::AderSolver::getBndFluxSize(),
333 faceData.
QOut[2 * DIMENSIONS * cellIndex + d][0]
338 tarch::timing::Watch watchKernelLaunch(
"::runBenchmarks",
"assessKernel(...)",
false);
340 #if defined(WITH_OPENMP)
341 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
343 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
344 cellData.maxEigenvalue[cellIndex] = 0.0;
349 cellIndex * 2 * DIMENSIONS,
355 watchKernelLaunch.stop();
356 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
364 #if defined(WITH_OPENMP)
368 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
369 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
370 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
372 for (
int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
373 tarch::freeMemory(faceData.
QIn[faceIndex][0], tarch::MemoryLocation::Heap);
374 tarch::freeMemory(faceData.
QIn[faceIndex][1], tarch::MemoryLocation::Heap);
375 tarch::freeMemory(faceData.
QOut[faceIndex][0], tarch::MemoryLocation::Heap);
376 tarch::freeMemory(faceData.
QOut[faceIndex][1], tarch::MemoryLocation::Heap);
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
void runKernels(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
tarch::logging::Log _log
This is variant 1 of the fused kernels.
tarch::logging::Log _log
This is variant 4 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
tarch::la::Vector< DIMENSIONS, double > * faceSize
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
tarch::la::Vector< DIMENSIONS, double > * faceCentre
outType *(* QOut)[2]
Out values.