31 #include "exahype2/CellData.h"
32 #include "kernels/AderSolver/BufferSizes.h"
33 #include "kernels/AderSolver/FaceIntegral.h"
34 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
35 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
36 #include "kernels/AderSolver/RiemannSolver.h"
38 #include "repositories/SolverRepository.h"
49 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
53 tarch::timing::Measurement& measurement
60 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
61 myFaceData->
QIn[faceId + 0][1],
62 myFaceData->
QIn[faceId + 1][1],
64 myFaceData->
QIn[faceId + 2][1],
66 myFaceData->
QIn[faceId + DIMENSIONS + 0][0],
67 myFaceData->
QIn[faceId + DIMENSIONS + 1][0]
70 myFaceData->
QIn[faceId + DIMENSIONS + 2][0]
74 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
75 myFaceData->
QOut[faceId + 0][1],
76 myFaceData->
QOut[faceId + 1][1],
78 myFaceData->
QOut[faceId + 2][1],
80 myFaceData->
QOut[faceId + DIMENSIONS + 0][0],
81 myFaceData->
QOut[faceId + DIMENSIONS + 1][0]
84 myFaceData->
QOut[faceId + DIMENSIONS + 2][0]
88 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
90 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
91 repositories::instanceOfAderSolver,
94 myCellData->QIn[cellId],
95 myCellData->cellCentre[cellId],
96 myCellData->cellSize[cellId],
101 watchKernelCompute.stop();
102 measurement.setValue(watchKernelCompute.getCalendarTime());
108 const int faceDirection,
109 tarch::timing::Measurement& measurement
112 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
118 kernels::AderSolver::riemannSolver<SolverPrecision>(
119 repositories::instanceOfAderSolver,
120 myFaceData->
QOut[faceId][1],
121 myFaceData->
QOut[faceId][0],
122 myFaceData->
QIn[faceId][1],
123 myFaceData->
QIn[faceId][0],
124 myFaceData->
t[faceId],
125 myFaceData->
dt[faceId],
133 watchKernelCompute.stop();
134 measurement.setValue(watchKernelCompute.getCalendarTime());
139 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
143 tarch::timing::Measurement& measurement
150 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
151 myFaceData->
QIn[faceId + 0][1],
152 myFaceData->
QIn[faceId + 1][1],
154 myFaceData->
QIn[faceId + 2][1],
156 myFaceData->
QIn[faceId + DIMENSIONS + 0][0],
157 myFaceData->
QIn[faceId + DIMENSIONS + 1][0]
160 myFaceData->
QIn[faceId + DIMENSIONS + 2][0]
164 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
165 myFaceData->
QOut[faceId + 0][1],
166 myFaceData->
QOut[faceId + 1][1],
168 myFaceData->
QOut[faceId + 2][1],
170 myFaceData->
QOut[faceId + DIMENSIONS + 0][0],
171 myFaceData->
QOut[faceId + DIMENSIONS + 1][0]
174 myFaceData->
QOut[faceId + DIMENSIONS + 2][0]
178 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
180 for (
int d = 0; d < DIMENSIONS; d++) {
181 const int direction = d;
183 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
185 kernels::AderSolver::faceIntegral(
186 myCellData->QIn[cellId],
187 myFaceData->
QOut[faceId + d][1],
195 kernels::AderSolver::faceIntegral(
196 myCellData->QIn[cellId],
197 myFaceData->
QOut[faceId + DIMENSIONS][0],
206 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
207 repositories::instanceOfAderSolver,
208 myCellData->QIn[cellId],
209 myCellData->cellCentre[cellId],
210 myCellData->cellSize[cellId],
215 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
216 repositories::instanceOfAderSolver,
219 myCellData->QIn[cellId],
220 myCellData->cellCentre[cellId],
221 myCellData->cellSize[cellId],
226 watchKernelCompute.stop();
227 measurement.setValue(watchKernelCompute.getCalendarTime());
235 const tarch::la::Vector<DIMENSIONS, double>
cellCenter,
236 const tarch::la::Vector<DIMENSIONS, double>
cellSize
241 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
244 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
245 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
247 tarch::MemoryLocation::Heap
251 cellData.QOut[cellIndex] =
nullptr;
253 cellData.cellSize[cellIndex] =
cellSize;
254 cellData.maxEigenvalue[cellIndex] = 0.0;
276 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
277 const int faceIndex = 2 * DIMENSIONS * cellIndex;
278 for (
int d = 0; d < DIMENSIONS; d++) {
280 faceData.
QIn[faceIndex + d][1] = tarch::allocateMemory<SolverPrecision>(
281 kernels::AderSolver::getBndFaceSize(),
282 tarch::MemoryLocation::Heap
284 faceData.
QIn[faceIndex + d + DIMENSIONS][0] = tarch::allocateMemory<SolverPrecision>(
285 kernels::AderSolver::getBndFaceSize(),
286 tarch::MemoryLocation::Heap
289 faceData.
QOut[faceIndex + d][1] = tarch::allocateMemory<SolverPrecision>(
290 kernels::AderSolver::getBndFluxSize(),
291 tarch::MemoryLocation::Heap
293 faceData.
QOut[faceIndex + d + DIMENSIONS][0] = tarch::allocateMemory<SolverPrecision>(
294 kernels::AderSolver::getBndFluxSize(),
295 tarch::MemoryLocation::Heap
299 faceData.
QIn[faceIndex + d][0] = faceData.
QIn[faceIndex + d + DIMENSIONS][0];
300 faceData.
QIn[faceIndex + d + DIMENSIONS][1] = faceData.
QIn[faceIndex + d][1];
302 faceData.
QOut[faceIndex + d][0] = faceData.
QOut[faceIndex + d + DIMENSIONS][0];
303 faceData.
QOut[faceIndex + d + DIMENSIONS][1] = faceData.
QOut[faceIndex + d][1];
310 faceData.
t[faceIndex + d + DIMENSIONS] =
timeStamp;
317 int numberOfThreads = 1;
319 #if defined(WITH_OPENMP)
320 for (
int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
321 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
325 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
330 cellIndex * 2 * DIMENSIONS,
337 tarch::timing::Measurement timingKernelLaunch;
339 for (
int sample = 0; sample <= NumberOfSamples; sample++) {
341 tarch::timing::Watch watchKernelLaunch(
"::runBenchmarks",
"assessKernel(...)",
false);
343 #if defined(WITH_OPENMP)
344 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
346 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
348 for (
int d = 0; d < DIMENSIONS; d++) {
355 #if defined(WITH_OPENMP)
356 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
358 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
359 cellData.maxEigenvalue[cellIndex] = 0.0;
364 cellIndex * 2 * DIMENSIONS,
370 watchKernelLaunch.stop();
371 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
379 #if defined(WITH_OPENMP)
383 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
384 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
385 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
387 const int faceIndex = 2 * DIMENSIONS * cellIndex;
388 for (
int d = 0; d < DIMENSIONS; d++) {
390 tarch::freeMemory(faceData.
QIn[faceIndex + d][1], tarch::MemoryLocation::Heap);
391 tarch::freeMemory(faceData.
QIn[faceIndex + d + DIMENSIONS][0], tarch::MemoryLocation::Heap);
392 tarch::freeMemory(faceData.
QOut[faceIndex + d][1], tarch::MemoryLocation::Heap);
393 tarch::freeMemory(faceData.
QOut[faceIndex + d + DIMENSIONS][0], tarch::MemoryLocation::Heap);
void firstTask(exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int faceId, const int faceDirection, tarch::timing::Measurement &measurement)
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
tarch::logging::Log _log
This is variant 1 of the fused kernels.
tarch::logging::Log _log
This is variant 6 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
tarch::la::Vector< DIMENSIONS, double > * faceSize
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
tarch::la::Vector< DIMENSIONS, double > * faceCentre
outType *(* QOut)[2]
Out values.