24 #include "exahype2/CellData.h"
25 #include "kernels/AderSolver/BufferSizes.h"
26 #include "kernels/AderSolver/FaceIntegral.h"
27 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
28 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
29 #include "kernels/AderSolver/RiemannSolver.h"
31 #include "repositories/SolverRepository.h"
42 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
46 tarch::timing::Measurement& measurement
53 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
54 myFaceData->
QIn[faceId + 0][1],
55 myFaceData->
QIn[faceId + 1][1],
57 myFaceData->
QIn[faceId + 2][1],
59 myFaceData->
QIn[faceId + DIMENSIONS + 0][0],
60 myFaceData->
QIn[faceId + DIMENSIONS + 1][0]
63 myFaceData->
QIn[faceId + DIMENSIONS + 2][0]
67 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
68 myFaceData->
QOut[faceId + 0][1],
69 myFaceData->
QOut[faceId + 1][1],
71 myFaceData->
QOut[faceId + 2][1],
73 myFaceData->
QOut[faceId + DIMENSIONS + 0][0],
74 myFaceData->
QOut[faceId + DIMENSIONS + 1][0]
77 myFaceData->
QOut[faceId + DIMENSIONS + 2][0]
81 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
83 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
84 repositories::instanceOfAderSolver,
87 myCellData->QIn[cellId],
88 myCellData->cellCentre[cellId],
89 myCellData->cellSize[cellId],
94 watchKernelCompute.stop();
95 measurement.setValue(watchKernelCompute.getCalendarTime());
100 const int leftFaceId,
101 const int rightFaceId,
102 const int faceDirection,
103 tarch::timing::Measurement& measurement
106 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
112 kernels::AderSolver::riemannSolver<SolverPrecision>(
113 repositories::instanceOfAderSolver,
114 myFaceData->
QOut[leftFaceId][1],
115 myFaceData->
QOut[rightFaceId][0],
116 myFaceData->
QIn[leftFaceId][1],
117 myFaceData->
QIn[rightFaceId][0],
118 0.5 * (myFaceData->
t[leftFaceId] + myFaceData->
t[rightFaceId]),
119 0.5 * (myFaceData->
dt[leftFaceId] + myFaceData->
dt[rightFaceId]),
121 0.5 * (myFaceData->
faceSize[leftFaceId] + myFaceData->
faceSize[rightFaceId]),
127 watchKernelCompute.stop();
128 measurement.setValue(watchKernelCompute.getCalendarTime());
133 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
137 tarch::timing::Measurement& measurement
144 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
145 myFaceData->
QIn[faceId + 0][1],
146 myFaceData->
QIn[faceId + 1][1],
148 myFaceData->
QIn[faceId + 2][1],
150 myFaceData->
QIn[faceId + DIMENSIONS + 0][0],
151 myFaceData->
QIn[faceId + DIMENSIONS + 1][0]
154 myFaceData->
QIn[faceId + DIMENSIONS + 2][0]
158 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
159 myFaceData->
QOut[faceId + 0][1],
160 myFaceData->
QOut[faceId + 1][1],
162 myFaceData->
QOut[faceId + 2][1],
164 myFaceData->
QOut[faceId + DIMENSIONS + 0][0],
165 myFaceData->
QOut[faceId + DIMENSIONS + 1][0]
168 myFaceData->
QOut[faceId + DIMENSIONS + 2][0]
172 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
174 for (
int d = 0; d < DIMENSIONS; d++) {
175 const int direction = d;
177 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
179 kernels::AderSolver::faceIntegral(
180 myCellData->QIn[cellId],
181 myFaceData->
QOut[faceId + d][1],
189 kernels::AderSolver::faceIntegral(
190 myCellData->QIn[cellId],
191 myFaceData->
QOut[faceId + DIMENSIONS][0],
200 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
201 repositories::instanceOfAderSolver,
202 myCellData->QIn[cellId],
203 myCellData->cellCentre[cellId],
204 myCellData->cellSize[cellId],
209 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
210 repositories::instanceOfAderSolver,
213 myCellData->QIn[cellId],
214 myCellData->cellCentre[cellId],
215 myCellData->cellSize[cellId],
220 watchKernelCompute.stop();
221 measurement.setValue(watchKernelCompute.getCalendarTime());
229 const tarch::la::Vector<DIMENSIONS, double>
cellCenter,
230 const tarch::la::Vector<DIMENSIONS, double>
cellSize
235 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
238 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
239 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
241 tarch::MemoryLocation::Heap
245 cellData.QOut[cellIndex] =
nullptr;
247 cellData.cellSize[cellIndex] =
cellSize;
248 cellData.maxEigenvalue[cellIndex] = 0.0;
257 for (
int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
259 faceData.
QIn[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
260 kernels::AderSolver::getBndFaceSize(),
261 tarch::MemoryLocation::Heap
263 faceData.
QIn[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
264 kernels::AderSolver::getBndFaceSize(),
265 tarch::MemoryLocation::Heap
267 faceData.
QOut[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
268 kernels::AderSolver::getBndFluxSize(),
269 tarch::MemoryLocation::Heap
271 faceData.
QOut[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
272 kernels::AderSolver::getBndFluxSize(),
273 tarch::MemoryLocation::Heap
282 int numberOfThreads = 1;
284 #if defined(WITH_OPENMP)
285 for (
int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
286 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
290 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
295 cellIndex * 2 * DIMENSIONS,
302 tarch::timing::Measurement timingKernelLaunch;
304 for (
int sample = 0; sample <= NumberOfSamples; sample++) {
306 tarch::timing::Watch watchKernelLaunch(
"::runBenchmarks",
"assessKernel(...)",
false);
308 #if defined(WITH_OPENMP)
309 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
311 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
313 for (
int d = 0; d < DIMENSIONS; d++) {
318 2 * DIMENSIONS * cellIndex + d + DIMENSIONS,
319 2 * DIMENSIONS * cellIndex + d,
326 #if defined(WITH_OPENMP)
327 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
329 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
330 cellData.maxEigenvalue[cellIndex] = 0.0;
335 cellIndex * 2 * DIMENSIONS,
341 watchKernelLaunch.stop();
342 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
350 #if defined(WITH_OPENMP)
354 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
355 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
356 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
358 for (
int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
359 tarch::freeMemory(faceData.
QIn[faceIndex][0], tarch::MemoryLocation::Heap);
360 tarch::freeMemory(faceData.
QIn[faceIndex][1], tarch::MemoryLocation::Heap);
361 tarch::freeMemory(faceData.
QOut[faceIndex][0], tarch::MemoryLocation::Heap);
362 tarch::freeMemory(faceData.
QOut[faceIndex][1], tarch::MemoryLocation::Heap);
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
void firstTask(exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int leftFaceId, const int rightFaceId, const int faceDirection, tarch::timing::Measurement &measurement)
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
tarch::logging::Log _log
This is variant 1 of the fused kernels.
tarch::logging::Log _log
This is variant 5 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
tarch::la::Vector< DIMENSIONS, double > * faceSize
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
tarch::la::Vector< DIMENSIONS, double > * faceCentre
outType *(* QOut)[2]
Out values.