Peano
Variant5.cpp
Go to the documentation of this file.
1 
24 #include "exahype2/CellData.h"
25 #include "kernels/AderSolver/BufferSizes.h"
26 #include "kernels/AderSolver/FaceIntegral.h"
27 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
28 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
29 #include "kernels/AderSolver/RiemannSolver.h"
30 #include "LRFaceData.h"
31 #include "repositories/SolverRepository.h"
32 #include "Utils.h"
33 #include "Variants.h"
34 
35 #include <cstring>
36 
37 tarch::logging::Log variant5::_log("variants5::");
38 
40 
41 inline void initialTask(
42  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
44  const int cellId,
45  const int faceId,
46  tarch::timing::Measurement& measurement
47 ) {
48 
49  // Assume that the faces for a given cell are one after another in FaceData
50  // in typical Peano order, e.g. left, lower, right, upper
51  // so faceId is the left, lower (front) face,
52  // faceId+1 is the one after this, and so on.
53  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
54  myFaceData->QIn[faceId + 0][1],
55  myFaceData->QIn[faceId + 1][1],
56 #if DIMENSIONS == 3
57  myFaceData->QIn[faceId + 2][1],
58 #endif
59  myFaceData->QIn[faceId + DIMENSIONS + 0][0],
60  myFaceData->QIn[faceId + DIMENSIONS + 1][0]
61 #if DIMENSIONS == 3
62  ,
63  myFaceData->QIn[faceId + DIMENSIONS + 2][0]
64 #endif
65  };
66 
67  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
68  myFaceData->QOut[faceId + 0][1],
69  myFaceData->QOut[faceId + 1][1],
70 #if DIMENSIONS == 3
71  myFaceData->QOut[faceId + 2][1],
72 #endif
73  myFaceData->QOut[faceId + DIMENSIONS + 0][0],
74  myFaceData->QOut[faceId + DIMENSIONS + 1][0]
75 #if DIMENSIONS == 3
76  ,
77  myFaceData->QOut[faceId + DIMENSIONS + 2][0]
78 #endif
79  };
80 
81  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
82 
83  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
84  repositories::instanceOfAderSolver,
85  lQhbnd,
86  lFhbnd,
87  myCellData->QIn[cellId],
88  myCellData->cellCentre[cellId],
89  myCellData->cellSize[cellId],
90  myCellData->t,
91  myCellData->dt
92  );
93 
94  watchKernelCompute.stop();
95  measurement.setValue(watchKernelCompute.getCalendarTime());
96 }
97 
98 inline void firstTask(
100  const int leftFaceId,
101  const int rightFaceId,
102  const int faceDirection,
103  tarch::timing::Measurement& measurement
104 ) {
105 
106  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
107 
108  // We are assuming that there are no boundaries okay
109  // In a real ExaHyPE application boundaries will exist
110  // but we can still assume that the corresponding faces
111  // will have received data
112  kernels::AderSolver::riemannSolver<SolverPrecision>(
113  repositories::instanceOfAderSolver,
114  myFaceData->QOut[leftFaceId][1], // lFhbnd[d+DIMENSIONS],
115  myFaceData->QOut[rightFaceId][0],
116  myFaceData->QIn[leftFaceId][1], // lFhbnd[d+DIMENSIONS],
117  myFaceData->QIn[rightFaceId][0],
118  0.5 * (myFaceData->t[leftFaceId] + myFaceData->t[rightFaceId]),
119  0.5 * (myFaceData->dt[leftFaceId] + myFaceData->dt[rightFaceId]),
120  0.5 * (myFaceData->faceCentre[leftFaceId] + myFaceData->faceCentre[rightFaceId]),
121  0.5 * (myFaceData->faceSize[leftFaceId] + myFaceData->faceSize[rightFaceId]),
122  faceDirection,
123  false,
124  0
125  );
126 
127  watchKernelCompute.stop();
128  measurement.setValue(watchKernelCompute.getCalendarTime());
129 }
130 
131 
132 inline void secondTask(
133  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
135  const int cellId,
136  const int faceId,
137  tarch::timing::Measurement& measurement
138 ) {
139 
140  // Assume that the faces for a given cell are one after another
141  // in typical Peano order, e.g. left, lower, right, upper
142  // Assume that the faces for a given cell are one after another
143  // in typical Peano order, e.g. left, lower, right, upper
144  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
145  myFaceData->QIn[faceId + 0][1],
146  myFaceData->QIn[faceId + 1][1],
147 #if DIMENSIONS == 3
148  myFaceData->QIn[faceId + 2][1],
149 #endif
150  myFaceData->QIn[faceId + DIMENSIONS + 0][0],
151  myFaceData->QIn[faceId + DIMENSIONS + 1][0]
152 #if DIMENSIONS == 3
153  ,
154  myFaceData->QIn[faceId + DIMENSIONS + 2][0]
155 #endif
156  };
157 
158  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
159  myFaceData->QOut[faceId + 0][1],
160  myFaceData->QOut[faceId + 1][1],
161 #if DIMENSIONS == 3
162  myFaceData->QOut[faceId + 2][1],
163 #endif
164  myFaceData->QOut[faceId + DIMENSIONS + 0][0],
165  myFaceData->QOut[faceId + DIMENSIONS + 1][0]
166 #if DIMENSIONS == 3
167  ,
168  myFaceData->QOut[faceId + DIMENSIONS + 2][0]
169 #endif
170  };
171 
172  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
173 
174  for (int d = 0; d < DIMENSIONS; d++) {
175  const int direction = d;
176 
177  const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
178  // Negative face
179  kernels::AderSolver::faceIntegral(
180  myCellData->QIn[cellId],
181  myFaceData->QOut[faceId + d][1],
182  direction,
183  0,
184  inverseDxDirection,
185  myCellData->dt
186  );
187 
188  // Positive face
189  kernels::AderSolver::faceIntegral(
190  myCellData->QIn[cellId],
191  myFaceData->QOut[faceId + DIMENSIONS][0],
192  direction,
193  1,
194  inverseDxDirection,
195  myCellData->dt
196  );
197 
198  } // for d
199 
200  myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
201  repositories::instanceOfAderSolver,
202  myCellData->QIn[cellId],
203  myCellData->cellCentre[cellId],
204  myCellData->cellSize[cellId],
205  myCellData->t,
206  myCellData->dt
207  );
208 
209  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
210  repositories::instanceOfAderSolver,
211  lQhbnd,
212  lFhbnd,
213  myCellData->QIn[cellId],
214  myCellData->cellCentre[cellId],
215  myCellData->cellSize[cellId],
216  myCellData->t,
217  myCellData->dt
218  );
219 
220  watchKernelCompute.stop();
221  measurement.setValue(watchKernelCompute.getCalendarTime());
222 }
223 
224 
226  int numberOfCells,
227  double timeStamp,
228  double timeStepSize,
229  const tarch::la::Vector<DIMENSIONS, double> cellCenter,
230  const tarch::la::Vector<DIMENSIONS, double> cellSize
231 ) {
232 
233  tarch::timing::Measurement timingComputeKernel;
234 
235  exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
236  exahype2::FaceData<SolverPrecision, SolverPrecision> faceData(2 * DIMENSIONS * numberOfCells);
237 
238  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
239  cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
241  tarch::MemoryLocation::Heap
242  );
243  cellData.t = timeStamp;
244  cellData.dt = timeStepSize;
245  cellData.QOut[cellIndex] = nullptr;
246  cellData.cellCentre[cellIndex] = cellCenter;
247  cellData.cellSize[cellIndex] = cellSize;
248  cellData.maxEigenvalue[cellIndex] = 0.0;
249 
250  initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
251  }
252 
253  // We need one instance of face per face of a cell, e.g.
254  // 2*DIMENSIONS*numberOfCells faces.
255  // Here we just number them in the Peano order for each cell,
256  // so face (2*DIMENSIONS*i+j) will be the jth face of the ith cell
257  for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
258  // Allocating data for the left and right sides of a given face
259  faceData.QIn[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
260  kernels::AderSolver::getBndFaceSize(),
261  tarch::MemoryLocation::Heap
262  );
263  faceData.QIn[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
264  kernels::AderSolver::getBndFaceSize(),
265  tarch::MemoryLocation::Heap
266  );
267  faceData.QOut[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
268  kernels::AderSolver::getBndFluxSize(),
269  tarch::MemoryLocation::Heap
270  );
271  faceData.QOut[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
272  kernels::AderSolver::getBndFluxSize(),
273  tarch::MemoryLocation::Heap
274  );
275 
276  faceData.t[faceIndex] = timeStamp;
277  faceData.dt[faceIndex] = timeStepSize;
278  faceData.faceCentre[faceIndex] = cellCenter;
279  faceData.faceSize[faceIndex] = cellSize;
280  }
281 
282  int numberOfThreads = 1;
283 
284  #if defined(WITH_OPENMP)
285  for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
286  numberOfThreads = NumberOfLaunchingThreads[threadIndex];
287  #endif
288 
289  // Initial task
290  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
291  initialTask(
292  &cellData,
293  &faceData,
294  cellIndex,
295  cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
296  // arithmetic
298  );
299  }
300 
301  timingComputeKernel.erase();
302  tarch::timing::Measurement timingKernelLaunch;
303 
304  for (int sample = 0; sample <= NumberOfSamples; sample++) {
305 
306  tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
307 
308  #if defined(WITH_OPENMP)
309  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
310  #endif
311  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
312 
313  for (int d = 0; d < DIMENSIONS; d++) {
314  // Stitch negative and positive face together and just
315  // perform the Riemann kernel on that.
316  firstTask(
317  &faceData,
318  2 * DIMENSIONS * cellIndex + d + DIMENSIONS,
319  2 * DIMENSIONS * cellIndex + d,
320  d,
322  );
323  }
324  }
325 
326  #if defined(WITH_OPENMP)
327  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
328  #endif
329  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
330  cellData.maxEigenvalue[cellIndex] = 0.0;
331  secondTask(
332  &cellData,
333  &faceData,
334  cellIndex,
335  cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
336  // arithmetic
338  );
339  }
340 
341  watchKernelLaunch.stop();
342  timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
343  }
344 
345 
346  reportRuntime("variant 5", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
347  // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
348  // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
349 
350  #if defined(WITH_OPENMP)
351  }//threadIndex
352  #endif
353 
354  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
355  tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
356  tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
357  }
358  for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
359  tarch::freeMemory(faceData.QIn[faceIndex][0], tarch::MemoryLocation::Heap);
360  tarch::freeMemory(faceData.QIn[faceIndex][1], tarch::MemoryLocation::Heap);
361  tarch::freeMemory(faceData.QOut[faceIndex][0], tarch::MemoryLocation::Heap);
362  tarch::freeMemory(faceData.QOut[faceIndex][1], tarch::MemoryLocation::Heap);
363  }
364 }
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant5.cpp:132
void firstTask(exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int leftFaceId, const int rightFaceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition: Variant5.cpp:98
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant5.cpp:41
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition: Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition: Utils.h:68
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition: Utils.h:30
tarch::logging::Log _log
This is variant 1 of the fused kernels.
tarch::logging::Log _log
This is variant 5 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition: Variant5.cpp:225
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
Definition: LRFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * faceSize
Definition: LRFaceData.h:27
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition: LRFaceData.h:25
tarch::la::Vector< DIMENSIONS, double > * faceCentre
Definition: LRFaceData.h:26
outType *(* QOut)[2]
Out values.
Definition: LRFaceData.h:60