95 int linearisedIndex = 0;
96 dfor(index, AderSolver::Order + 1) {
97 repositories::instanceOfAderSolver.initialCondition(
99 ::exahype2::dg::getQuadraturePoint(
103 repositories::instanceOfAderSolver.Order + 1,
104 kernels::AderSolver::Quadrature<double>::nodes
109 linearisedIndex += AderSolver::NumberOfUnknowns + AderSolver::NumberOfAuxiliaryVariables;
160 const double*
const* Q,
161 const double*
const maxEigenvalue,
162 const int numberOfCells
164 if constexpr (Accuracy <= 0.0)
return;
166 double maxDifference = 0.0;
168 std::cerr.precision(16);
169 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
171 if (not tarch::la::equals(Q[cellIndex][i],
validQ[cellIndex][i], Accuracy)) {
173 logError(
"validateOutcome(...)",
175 <<
"cell " << cellIndex <<
": "
176 <<
"Q[" << i <<
"]!=validQ[" << i <<
"] ("
184 maxDifference = std::max(maxDifference, std::abs(Q[cellIndex][i] -
validQ[cellIndex][i]));
188 if (not tarch::la::equals(maxEigenvalue[cellIndex],
validMaxEigenvalue[cellIndex], Accuracy)) {
190 logError(
"validateOutcome(...)",
192 <<
"maxEigenvalue[" << cellIndex <<
"]!=validMaxEigenvalue[" << cellIndex <<
"] ("
198 maxDifference = std::max(maxDifference, std::abs(maxEigenvalue[cellIndex] -
validMaxEigenvalue[cellIndex]));
204 logError(
"validateOutcome(...)",
205 "max difference of outcome from all cells is "
207 <<
" (admissible accuracy="
209 <<
" for " << errors <<
" entries"
223 const std::string& kernelIdentificator,
224 const tarch::timing::Measurement& timingKernelLaunch,
227 std::stringstream ss;
229 ss << kernelIdentificator <<
":\n\t";
233 ss << timingKernelLaunch.getValue() <<
" |\n\t";
234 ss << (timingKernelLaunch.getValue() / numberOfCells );
235 ss <<
" |\n\t" << timingKernelLaunch.toString();
236 logInfo(
"reportRuntime()", ss.str());
242double runKernels(
int device, exahype2::CellData<double, double>& cellData,
int cellIndex, tarch::timing::Measurement& measurement){
247 constexpr int AlignmentDoubles = ALIGNMENT /
sizeof(double);
248 constexpr int PaddedBndFaceSize = ((kernels::AderSolver::getBndFaceSize() + AlignmentDoubles - 1) / AlignmentDoubles) * AlignmentDoubles;
249 constexpr int PaddedBndFluxSize = ((kernels::AderSolver::getBndFluxSize() + AlignmentDoubles - 1) / AlignmentDoubles) * AlignmentDoubles;
251 double boundaryData[2 * DIMENSIONS * PaddedBndFaceSize] __attribute__((aligned(ALIGNMENT)));
252 double* lQhbnd[2 * DIMENSIONS] = {
254 boundaryData + PaddedBndFaceSize,
255 boundaryData + 2 * PaddedBndFaceSize,
256 boundaryData + 3 * PaddedBndFaceSize
258 ,boundaryData + 4 * PaddedBndFaceSize,
259 boundaryData + 5 * PaddedBndFaceSize
263 double boundaryFlux[2 * DIMENSIONS * PaddedBndFluxSize] __attribute__((aligned(ALIGNMENT)));
264 double* lFhbnd[2 * DIMENSIONS] = {
266 boundaryFlux + PaddedBndFluxSize,
267 boundaryFlux + 2 * PaddedBndFluxSize,
268 boundaryFlux + 3 * PaddedBndFluxSize
270 ,boundaryFlux + 4 * PaddedBndFluxSize,
271 boundaryFlux + 5 * PaddedBndFluxSize
275 tarch::timing::Watch watchKernelCompute(
"::runBenchmarks",
"assessKernel(...)",
false);
277 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<double, double, double>(
278 repositories::instanceOfAderSolver,
279 lQhbnd, lFhbnd, cellData.QIn[cellIndex],
283 for (
int d = 0; d < DIMENSIONS; d++) {
284 const int direction = d;
288 tarch::la::Vector<DIMENSIONS, double> faceCentre =
CellCenter;
291 kernels::AderSolver::riemannSolver<double>(
292 repositories::instanceOfAderSolver,
293 lFhbnd[d+DIMENSIONS],
295 lQhbnd[d+DIMENSIONS],
306 const double inverseDxDirection = 1.0 /
CellSize[d];
308 kernels::AderSolver::faceIntegral(
309 cellData.QIn[cellIndex],
319 kernels::AderSolver::faceIntegral(
320 cellData.QIn[cellIndex],
321 lFhbnd[d+DIMENSIONS],
330 double maxEigenvalue = kernels::AderSolver::maxScaledEigenvalue(
331 repositories::instanceOfAderSolver,
332 cellData.QIn[cellIndex],
339 watchKernelCompute.stop();
340 measurement.setValue(watchKernelCompute.getCalendarTime());
342 return maxEigenvalue;
352 exahype2::CellData<double, double> cellData(numberOfCells);
353 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
355 cellData.QOut[cellIndex] =
nullptr;
357 cellData.cellSize[cellIndex] =
CellSize;
358 cellData.maxEigenvalue[cellIndex] = 0.0;
366 auto assessKernel = [&](
367 std::function<double(
int device, exahype2::CellData<double, double>& cellData,
int cellIndex, tarch::timing::Measurement& measurement)> executeKernels,
368 const std::string& markerName,
372 tarch::timing::Measurement timingKernelLaunch;
375 while (sample <= NumberOfSamples) {
377 #if defined(WITH_OPENMP)
378 #pragma omp parallel num_threads(NumberOfLaunchingThreads)
380 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
381 cellData.maxEigenvalue[cellIndex] = 0.0;
386 tarch::timing::Watch watchKernelLaunch(
"::runBenchmarks",
"assessKernel(...)",
false);
388 watchKernelLaunch.stop();
389 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
399 reportRuntime(markerName, timingKernelLaunch, numberOfCells);
404 if constexpr (AssessHostKernels) {
406 "host, stateless, batched, AoS, serial",
407 tarch::accelerator::Device::DefaultDevice
411 for (
int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
412 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
413 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
417int main(
int argc,
char** argv) {
418 peano4::init(&argc, &argv, benchmarks::exahype2::kernelbenchmarks::DomainOffset, benchmarks::exahype2::kernelbenchmarks::DomainSize);
419 repositories::initLogFilters();
420 repositories::startSimulation();
424 "number of compute threads: "
425 << tarch::multicore::Core::getInstance().getNumberOfThreads()
429 "number of threads launching compute kernels: "
430 << NumberOfLaunchingThreads
434 "number of unknowns: "
435 << AderSolver::NumberOfUnknowns
439 "number of auxiliary variables: "
440 << AderSolver::NumberOfAuxiliaryVariables
444 "number of finite volumes per axis per cell: "
449 "number of samples per measurement: "
454 "performing accuracy checks with precision: "
457#if defined(WITH_GPU_SYCL)
460 "set SYCL_DEVICE_FILTER=gpu or ONEAPI_DEVICE_SELECTOR=cuda:0 when using SYCL on the device"
464 "set SYCL_PI_TRACE=2 in case of runtime errors"
474 for (
int i = 0; i < NumberOfCellsToStudy.size(); i++) {
475 logInfo(
"main()",
"number of cells: " << NumberOfCellsToStudy[i]);
484 repositories::finishSimulation();
void freeOutcome(const int numberOfCells)
const tarch::la::Vector< DIMENSIONS, double > CellCenter
void runBenchmarks(int numberOfCells)
Run the benchmark for one particular number of cells.
int main(int argc, char **argv)
double runKernels(int device, exahype2::CellData< double, double > &cellData, int cellIndex, tarch::timing::Measurement &measurement)
void allocateAndStoreOutcome(const double *const *Q, const double *const maxEigenvalue, const int numberOfCells)
Allocates and stores outcome of one compute kernel.
constexpr double TimeStamp
void validateOutcome(const double *const *Q, const double *const maxEigenvalue, const int numberOfCells)
Validate data against pre-stored simulation outcome.
tarch::timing::Measurement timingComputeKernel
double * validMaxEigenvalue
tarch::logging::Log _log("::")
constexpr int NumberOfFiniteVolumesPerCell
constexpr double TimeStepSize
const tarch::la::Vector< DIMENSIONS, double > CellSize
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.