Peano
Variant6.cpp
Go to the documentation of this file.
1 
31 #include "exahype2/CellData.h"
32 #include "kernels/AderSolver/BufferSizes.h"
33 #include "kernels/AderSolver/FaceIntegral.h"
34 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
35 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
36 #include "kernels/AderSolver/RiemannSolver.h"
37 #include "LRFaceData.h"
38 #include "repositories/SolverRepository.h"
39 #include "Utils.h"
40 #include "Variants.h"
41 
42 #include <cstring>
43 
44 tarch::logging::Log variant6::_log("variants6::");
45 
47 
48 inline void initialTask(
49  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
51  const int cellId,
52  const int faceId,
53  tarch::timing::Measurement& measurement
54 ) {
55 
56  // Assume that the faces for a given cell are one after another in FaceData
57  // in typical Peano order, e.g. left, lower, right, upper
58  // so faceId is the left, lower (front) face,
59  // faceId+1 is the one after this, and so on.
60  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
61  myFaceData->QIn[faceId + 0][1],
62  myFaceData->QIn[faceId + 1][1],
63 #if DIMENSIONS == 3
64  myFaceData->QIn[faceId + 2][1],
65 #endif
66  myFaceData->QIn[faceId + DIMENSIONS + 0][0],
67  myFaceData->QIn[faceId + DIMENSIONS + 1][0]
68 #if DIMENSIONS == 3
69  ,
70  myFaceData->QIn[faceId + DIMENSIONS + 2][0]
71 #endif
72  };
73 
74  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
75  myFaceData->QOut[faceId + 0][1],
76  myFaceData->QOut[faceId + 1][1],
77 #if DIMENSIONS == 3
78  myFaceData->QOut[faceId + 2][1],
79 #endif
80  myFaceData->QOut[faceId + DIMENSIONS + 0][0],
81  myFaceData->QOut[faceId + DIMENSIONS + 1][0]
82 #if DIMENSIONS == 3
83  ,
84  myFaceData->QOut[faceId + DIMENSIONS + 2][0]
85 #endif
86  };
87 
88  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
89 
90  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
91  repositories::instanceOfAderSolver,
92  lQhbnd,
93  lFhbnd,
94  myCellData->QIn[cellId],
95  myCellData->cellCentre[cellId],
96  myCellData->cellSize[cellId],
97  myCellData->t,
98  myCellData->dt
99  );
100 
101  watchKernelCompute.stop();
102  measurement.setValue(watchKernelCompute.getCalendarTime());
103 }
104 
105 inline void firstTask(
107  const int faceId,
108  const int faceDirection,
109  tarch::timing::Measurement& measurement
110 ) {
111 
112  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
113 
114  // We are assuming that there are no boundaries okay
115  // In a real ExaHyPE application boundaries will exist
116  // but we can still assume that the corresponding faces
117  // will have received data
118  kernels::AderSolver::riemannSolver<SolverPrecision>(
119  repositories::instanceOfAderSolver,
120  myFaceData->QOut[faceId][1],
121  myFaceData->QOut[faceId][0],
122  myFaceData->QIn[faceId][1],
123  myFaceData->QIn[faceId][0],
124  myFaceData->t[faceId],
125  myFaceData->dt[faceId],
126  myFaceData->faceCentre[faceId],
127  myFaceData->faceSize[faceId],
128  faceDirection,
129  false,
130  0
131  );
132 
133  watchKernelCompute.stop();
134  measurement.setValue(watchKernelCompute.getCalendarTime());
135 }
136 
137 
138 inline void secondTask(
139  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
141  const int cellId,
142  const int faceId,
143  tarch::timing::Measurement& measurement
144 ) {
145 
146  // Assume that the faces for a given cell are one after another
147  // in typical Peano order, e.g. left, lower, right, upper
148  // Assume that the faces for a given cell are one after another
149  // in typical Peano order, e.g. left, lower, right, upper
150  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
151  myFaceData->QIn[faceId + 0][1],
152  myFaceData->QIn[faceId + 1][1],
153 #if DIMENSIONS == 3
154  myFaceData->QIn[faceId + 2][1],
155 #endif
156  myFaceData->QIn[faceId + DIMENSIONS + 0][0],
157  myFaceData->QIn[faceId + DIMENSIONS + 1][0]
158 #if DIMENSIONS == 3
159  ,
160  myFaceData->QIn[faceId + DIMENSIONS + 2][0]
161 #endif
162  };
163 
164  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
165  myFaceData->QOut[faceId + 0][1],
166  myFaceData->QOut[faceId + 1][1],
167 #if DIMENSIONS == 3
168  myFaceData->QOut[faceId + 2][1],
169 #endif
170  myFaceData->QOut[faceId + DIMENSIONS + 0][0],
171  myFaceData->QOut[faceId + DIMENSIONS + 1][0]
172 #if DIMENSIONS == 3
173  ,
174  myFaceData->QOut[faceId + DIMENSIONS + 2][0]
175 #endif
176  };
177 
178  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
179 
180  for (int d = 0; d < DIMENSIONS; d++) {
181  const int direction = d;
182 
183  const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
184  // Negative face
185  kernels::AderSolver::faceIntegral(
186  myCellData->QIn[cellId],
187  myFaceData->QOut[faceId + d][1],
188  direction,
189  0,
190  inverseDxDirection,
191  myCellData->dt
192  );
193 
194  // Positive face
195  kernels::AderSolver::faceIntegral(
196  myCellData->QIn[cellId],
197  myFaceData->QOut[faceId + DIMENSIONS][0],
198  direction,
199  1,
200  inverseDxDirection,
201  myCellData->dt
202  );
203 
204  } // for d
205 
206  myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
207  repositories::instanceOfAderSolver,
208  myCellData->QIn[cellId],
209  myCellData->cellCentre[cellId],
210  myCellData->cellSize[cellId],
211  myCellData->t,
212  myCellData->dt
213  );
214 
215  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
216  repositories::instanceOfAderSolver,
217  lQhbnd,
218  lFhbnd,
219  myCellData->QIn[cellId],
220  myCellData->cellCentre[cellId],
221  myCellData->cellSize[cellId],
222  myCellData->t,
223  myCellData->dt
224  );
225 
226  watchKernelCompute.stop();
227  measurement.setValue(watchKernelCompute.getCalendarTime());
228 }
229 
230 
232  int numberOfCells,
233  double timeStamp,
234  double timeStepSize,
235  const tarch::la::Vector<DIMENSIONS, double> cellCenter,
236  const tarch::la::Vector<DIMENSIONS, double> cellSize
237 ) {
238 
239  tarch::timing::Measurement timingComputeKernel;
240 
241  exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
242  exahype2::FaceData<SolverPrecision, SolverPrecision> faceData(2 * DIMENSIONS * numberOfCells);
243 
244  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
245  cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
247  tarch::MemoryLocation::Heap
248  );
249  cellData.t = timeStamp;
250  cellData.dt = timeStepSize;
251  cellData.QOut[cellIndex] = nullptr;
252  cellData.cellCentre[cellIndex] = cellCenter;
253  cellData.cellSize[cellIndex] = cellSize;
254  cellData.maxEigenvalue[cellIndex] = 0.0;
255 
256  initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
257  }
258 
259  // We need one instance of face per face of a cell, e.g.
260  // 2*DIMENSIONS*numberOfCells faces.
261  // Here we just number them in the Peano order for each cell,
262  // so face (2*DIMENSIONS*i+j) will be the jth face of the ith cell
263 
276  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
277  const int faceIndex = 2 * DIMENSIONS * cellIndex;
278  for (int d = 0; d < DIMENSIONS; d++) {
279  // Allocating data for the right side of the left face and the left side of the right face
280  faceData.QIn[faceIndex + d][1] = tarch::allocateMemory<SolverPrecision>(
281  kernels::AderSolver::getBndFaceSize(),
282  tarch::MemoryLocation::Heap
283  );
284  faceData.QIn[faceIndex + d + DIMENSIONS][0] = tarch::allocateMemory<SolverPrecision>(
285  kernels::AderSolver::getBndFaceSize(),
286  tarch::MemoryLocation::Heap
287  );
288 
289  faceData.QOut[faceIndex + d][1] = tarch::allocateMemory<SolverPrecision>(
290  kernels::AderSolver::getBndFluxSize(),
291  tarch::MemoryLocation::Heap
292  );
293  faceData.QOut[faceIndex + d + DIMENSIONS][0] = tarch::allocateMemory<SolverPrecision>(
294  kernels::AderSolver::getBndFluxSize(),
295  tarch::MemoryLocation::Heap
296  );
297 
298  // Stitching these faces together
299  faceData.QIn[faceIndex + d][0] = faceData.QIn[faceIndex + d + DIMENSIONS][0];
300  faceData.QIn[faceIndex + d + DIMENSIONS][1] = faceData.QIn[faceIndex + d][1];
301 
302  faceData.QOut[faceIndex + d][0] = faceData.QOut[faceIndex + d + DIMENSIONS][0];
303  faceData.QOut[faceIndex + d + DIMENSIONS][1] = faceData.QOut[faceIndex + d][1];
304 
305  faceData.t[faceIndex + d] = timeStamp;
306  faceData.dt[faceIndex + d] = timeStepSize;
307  faceData.faceCentre[faceIndex + d] = cellCenter;
308  faceData.faceSize[faceIndex + d] = cellSize;
309 
310  faceData.t[faceIndex + d + DIMENSIONS] = timeStamp;
311  faceData.dt[faceIndex + d + DIMENSIONS] = timeStepSize;
312  faceData.faceCentre[faceIndex + d + DIMENSIONS] = cellCenter;
313  faceData.faceSize[faceIndex + d + DIMENSIONS] = cellSize;
314  }
315  }
316 
317  int numberOfThreads = 1;
318 
319  #if defined(WITH_OPENMP)
320  for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
321  numberOfThreads = NumberOfLaunchingThreads[threadIndex];
322  #endif
323 
324  // Initial task
325  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
326  initialTask(
327  &cellData,
328  &faceData,
329  cellIndex,
330  cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
331  // arithmetic
333  );
334  }
335 
336  timingComputeKernel.erase();
337  tarch::timing::Measurement timingKernelLaunch;
338 
339  for (int sample = 0; sample <= NumberOfSamples; sample++) {
340 
341  tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
342 
343  #if defined(WITH_OPENMP)
344  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
345  #endif
346  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
347 
348  for (int d = 0; d < DIMENSIONS; d++) {
349  // We have stitched the negative and positive faces together and therefore
350  // only need to perform the Riemann kernel on one of them, as the other points to the same data.
351  firstTask(&faceData, 2 * DIMENSIONS * cellIndex + d, d, timingComputeKernel);
352  }
353  }
354 
355  #if defined(WITH_OPENMP)
356  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
357  #endif
358  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
359  cellData.maxEigenvalue[cellIndex] = 0.0;
360  secondTask(
361  &cellData,
362  &faceData,
363  cellIndex,
364  cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
365  // arithmetic
367  );
368  }
369 
370  watchKernelLaunch.stop();
371  timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
372  }
373 
374 
375  reportRuntime("variant 6", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
376  // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
377  // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
378 
379  #if defined(WITH_OPENMP)
380  }//threadIndex
381  #endif
382 
383  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
384  tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
385  tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
386 
387  const int faceIndex = 2 * DIMENSIONS * cellIndex;
388  for (int d = 0; d < DIMENSIONS; d++) {
389  // Freeing data for the right side of the left face and the left side of the right face
390  tarch::freeMemory(faceData.QIn[faceIndex + d][1], tarch::MemoryLocation::Heap);
391  tarch::freeMemory(faceData.QIn[faceIndex + d + DIMENSIONS][0], tarch::MemoryLocation::Heap);
392  tarch::freeMemory(faceData.QOut[faceIndex + d][1], tarch::MemoryLocation::Heap);
393  tarch::freeMemory(faceData.QOut[faceIndex + d + DIMENSIONS][0], tarch::MemoryLocation::Heap);
394  }
395  }
396 }
void firstTask(exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int faceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition: Variant6.cpp:105
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant6.cpp:138
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant6.cpp:48
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition: Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition: Utils.h:68
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition: Utils.h:30
tarch::logging::Log _log
This is variant 1 of the fused kernels.
tarch::logging::Log _log
This is variant 6 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition: Variant6.cpp:231
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
Definition: LRFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * faceSize
Definition: LRFaceData.h:27
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition: LRFaceData.h:25
tarch::la::Vector< DIMENSIONS, double > * faceCentre
Definition: LRFaceData.h:26
outType *(* QOut)[2]
Out values.
Definition: LRFaceData.h:60