Peano
Variant4.cpp
Go to the documentation of this file.
1 
25 #include "exahype2/CellData.h"
26 #include "kernels/AderSolver/BufferSizes.h"
27 #include "kernels/AderSolver/FaceIntegral.h"
28 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
29 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
30 #include "kernels/AderSolver/RiemannSolver.h"
31 #include "LRFaceData.h"
32 #include "repositories/SolverRepository.h"
33 #include "Utils.h"
34 #include "Variants.h"
35 
36 #include <cstring>
37 
38 tarch::logging::Log variant4::_log("variants4::");
39 
41 
42 inline void initialTask(
43  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
45  const int cellId,
46  const int faceId,
47  tarch::timing::Measurement& measurement
48 ) {
49 
50  // Assume that the faces for a given cell are one after another in FaceData
51  // in typical Peano order, e.g. left, lower, right, upper
52  // so faceId is the left, lower (front) face,
53  // faceId+1 is the one after this, and so on.
54  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
55  myFaceData->QIn[faceId + 0][1],
56  myFaceData->QIn[faceId + 1][1],
57 #if DIMENSIONS == 3
58  myFaceData->QIn[faceId + 2][1],
59 #endif
60  myFaceData->QIn[faceId + DIMENSIONS + 0][0],
61  myFaceData->QIn[faceId + DIMENSIONS + 1][0]
62 #if DIMENSIONS == 3
63  ,
64  myFaceData->QIn[faceId + DIMENSIONS + 2][0]
65 #endif
66  };
67 
68  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
69  myFaceData->QOut[faceId + 0][1],
70  myFaceData->QOut[faceId + 1][1],
71 #if DIMENSIONS == 3
72  myFaceData->QOut[faceId + 2][1],
73 #endif
74  myFaceData->QOut[faceId + DIMENSIONS + 0][0],
75  myFaceData->QOut[faceId + DIMENSIONS + 1][0]
76 #if DIMENSIONS == 3
77  ,
78  myFaceData->QOut[faceId + DIMENSIONS + 2][0]
79 #endif
80  };
81 
82  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
83 
84  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
85  repositories::instanceOfAderSolver,
86  lQhbnd,
87  lFhbnd,
88  myCellData->QIn[cellId],
89  myCellData->cellCentre[cellId],
90  myCellData->cellSize[cellId],
91  myCellData->t,
92  myCellData->dt
93  );
94 
95  watchKernelCompute.stop();
96  measurement.setValue(watchKernelCompute.getCalendarTime());
97 }
98 
99 
101  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
103  const int cellId,
104  const int faceId,
105  tarch::timing::Measurement& measurement
106 ) {
107 
108  // Assume that the faces for a given cell are one after another
109  // in typical Peano order, e.g. left, lower, right, upper
110  // Assume that the faces for a given cell are one after another
111  // in typical Peano order, e.g. left, lower, right, upper
112  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
113  myFaceData->QIn[faceId + 0][1],
114  myFaceData->QIn[faceId + 1][1],
115 #if DIMENSIONS == 3
116  myFaceData->QIn[faceId + 2][1],
117 #endif
118  myFaceData->QIn[faceId + DIMENSIONS + 0][0],
119  myFaceData->QIn[faceId + DIMENSIONS + 1][0]
120 #if DIMENSIONS == 3
121  ,
122  myFaceData->QIn[faceId + DIMENSIONS + 2][0]
123 #endif
124  };
125 
126  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
127  myFaceData->QOut[faceId + 0][1],
128  myFaceData->QOut[faceId + 1][1],
129 #if DIMENSIONS == 3
130  myFaceData->QOut[faceId + 2][1],
131 #endif
132  myFaceData->QOut[faceId + DIMENSIONS + 0][0],
133  myFaceData->QOut[faceId + DIMENSIONS + 1][0]
134 #if DIMENSIONS == 3
135  ,
136  myFaceData->QOut[faceId + DIMENSIONS + 2][0]
137 #endif
138  };
139 
140  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
141 
142  for (int d = 0; d < DIMENSIONS; d++) {
143  const int direction = d;
144 
145  kernels::AderSolver::riemannSolver<SolverPrecision>(
146  repositories::instanceOfAderSolver,
147  myFaceData->QOut[faceId + d][0],
148  myFaceData->QOut[faceId + d][1],
149  myFaceData->QIn[faceId + d][0],
150  myFaceData->QIn[faceId + d][1],
151  myFaceData->t[faceId + d],
152  myFaceData->dt[faceId + d],
153  myFaceData->faceCentre[faceId + d],
154  myFaceData->faceSize[faceId + d],
155  direction,
156  false,
157  0
158  );
159 
160  const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
161  // Negative face
162  kernels::AderSolver::faceIntegral(
163  myCellData->QIn[cellId],
164  myFaceData->QOut[faceId + d][1],
165  direction,
166  0,
167  inverseDxDirection,
168  myCellData->dt
169  );
170 
171  kernels::AderSolver::riemannSolver<SolverPrecision>(
172  repositories::instanceOfAderSolver,
173  myFaceData->QOut[faceId + d + DIMENSIONS][0],
174  myFaceData->QOut[faceId + d + DIMENSIONS][1],
175  myFaceData->QIn[faceId + d + DIMENSIONS][0],
176  myFaceData->QIn[faceId + d + DIMENSIONS][1],
177  myFaceData->t[faceId + d + DIMENSIONS],
178  myFaceData->dt[faceId + d + DIMENSIONS],
179  myFaceData->faceCentre[faceId + d + DIMENSIONS],
180  myFaceData->faceSize[faceId + d + DIMENSIONS],
181  direction,
182  false,
183  0
184  );
185 
186  // Positive face
187  kernels::AderSolver::faceIntegral(
188  myCellData->QIn[cellId],
189  myFaceData->QOut[faceId + DIMENSIONS][0],
190  direction,
191  1,
192  inverseDxDirection,
193  myCellData->dt
194  );
195 
196  } // for d
197 
198  myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
199  repositories::instanceOfAderSolver,
200  myCellData->QIn[cellId],
201  myCellData->cellCentre[cellId],
202  myCellData->cellSize[cellId],
203  myCellData->t,
204  myCellData->dt
205  );
206 
207  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
208  repositories::instanceOfAderSolver,
209  lQhbnd,
210  lFhbnd,
211  myCellData->QIn[cellId],
212  myCellData->cellCentre[cellId],
213  myCellData->cellSize[cellId],
214  myCellData->t,
215  myCellData->dt
216  );
217 
218  watchKernelCompute.stop();
219  measurement.setValue(watchKernelCompute.getCalendarTime());
220 }
221 
222 
224  int numberOfCells,
225  double timeStamp,
226  double timeStepSize,
227  const tarch::la::Vector<DIMENSIONS, double> cellCenter,
228  const tarch::la::Vector<DIMENSIONS, double> cellSize
229 ) {
230 
231  tarch::timing::Measurement timingComputeKernel;
232 
233  exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
234  exahype2::FaceData<SolverPrecision, SolverPrecision> faceData(2 * DIMENSIONS * numberOfCells);
235 
236  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
237  cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
239  tarch::MemoryLocation::Heap
240  );
241  cellData.t = timeStamp;
242  cellData.dt = timeStepSize;
243  cellData.QOut[cellIndex] = nullptr;
244  cellData.cellCentre[cellIndex] = cellCenter;
245  cellData.cellSize[cellIndex] = cellSize;
246  cellData.maxEigenvalue[cellIndex] = 0.0;
247 
248  initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
249  }
250 
251  // We need one instance of face per face of a cell, e.g.
252  // 2*DIMENSIONS*numberOfCells faces.
253  // Here we just number them in the Peano order for each cell,
254  // so face (2*DIMENSIONS*i+j) will be the jth face of the ith cell
255  for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
256  // Allocating data for the left and right sides of a given face
257  faceData.QIn[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
258  kernels::AderSolver::getBndFaceSize(),
259  tarch::MemoryLocation::Heap
260  );
261  faceData.QIn[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
262  kernels::AderSolver::getBndFaceSize(),
263  tarch::MemoryLocation::Heap
264  );
265  faceData.QOut[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
266  kernels::AderSolver::getBndFluxSize(),
267  tarch::MemoryLocation::Heap
268  );
269  faceData.QOut[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
270  kernels::AderSolver::getBndFluxSize(),
271  tarch::MemoryLocation::Heap
272  );
273 
274  faceData.t[faceIndex] = timeStamp;
275  faceData.dt[faceIndex] = timeStepSize;
276  faceData.faceCentre[faceIndex] = cellCenter;
277  faceData.faceSize[faceIndex] = cellSize;
278  }
279 
280  int numberOfThreads = 1;
281 
282  #if defined(WITH_OPENMP)
283  for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
284  numberOfThreads = NumberOfLaunchingThreads[threadIndex];
285  #endif
286 
287  // Initial task
288  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
289  initialTask(
290  &cellData,
291  &faceData,
292  cellIndex,
293  cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
294  // arithmetic
296  );
297  }
298 
299  timingComputeKernel.erase();
300  tarch::timing::Measurement timingKernelLaunch;
301 
302  for (int sample = 0; sample <= NumberOfSamples; sample++) {
303 
304  #if defined(WITH_OPENMP)
305  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
306  #endif
307  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
308 
309  // Copy data from one side of each face to the other
310  // Here we just copy data from one side of a given cell's face to the opposite face of the same cell
311  for (int d = 0; d < DIMENSIONS; d++) {
312  // Copy inner negative face into outer positive face
313  std::copy_n(
314  faceData.QIn[2 * DIMENSIONS * cellIndex + d][1],
315  kernels::AderSolver::getBndFaceSize(),
316  faceData.QIn[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][1]
317  );
318  std::copy_n(
319  faceData.QOut[2 * DIMENSIONS * cellIndex + d][1],
320  kernels::AderSolver::getBndFluxSize(),
321  faceData.QOut[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][1]
322  );
323 
324  // Copy inner positive face into outer negative face
325  std::copy_n(
326  faceData.QIn[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][0],
327  kernels::AderSolver::getBndFaceSize(),
328  faceData.QIn[2 * DIMENSIONS * cellIndex + d][0]
329  );
330  std::copy_n(
331  faceData.QOut[2 * DIMENSIONS * cellIndex + d + DIMENSIONS][0],
332  kernels::AderSolver::getBndFluxSize(),
333  faceData.QOut[2 * DIMENSIONS * cellIndex + d][0]
334  );
335  }
336  }
337 
338  tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
339 
340  #if defined(WITH_OPENMP)
341  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
342  #endif
343  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
344  cellData.maxEigenvalue[cellIndex] = 0.0;
345  runKernels(
346  &cellData,
347  &faceData,
348  cellIndex,
349  cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
350  // arithmetic
352  );
353  }
354 
355  watchKernelLaunch.stop();
356  timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
357  }
358 
359 
360  reportRuntime("variant 4", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
361  // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
362  // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
363 
364  #if defined(WITH_OPENMP)
365  }//threadIndex
366  #endif
367 
368  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
369  tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
370  tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
371  }
372  for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
373  tarch::freeMemory(faceData.QIn[faceIndex][0], tarch::MemoryLocation::Heap);
374  tarch::freeMemory(faceData.QIn[faceIndex][1], tarch::MemoryLocation::Heap);
375  tarch::freeMemory(faceData.QOut[faceIndex][0], tarch::MemoryLocation::Heap);
376  tarch::freeMemory(faceData.QOut[faceIndex][1], tarch::MemoryLocation::Heap);
377  }
378 }
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant4.cpp:42
void runKernels(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant4.cpp:100
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition: Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition: Utils.h:68
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition: Utils.h:30
tarch::logging::Log _log
This is variant 1 of the fused kernels.
tarch::logging::Log _log
This is variant 4 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition: Variant4.cpp:223
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
Definition: LRFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * faceSize
Definition: LRFaceData.h:27
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition: LRFaceData.h:25
tarch::la::Vector< DIMENSIONS, double > * faceCentre
Definition: LRFaceData.h:26
outType *(* QOut)[2]
Out values.
Definition: LRFaceData.h:60