Peano
Variant3.cpp
Go to the documentation of this file.
1 
27 #include "CellFaceData.h"
28 #include "exahype2/CellData.h"
29 #include "kernels/AderSolver/BufferSizes.h"
30 #include "kernels/AderSolver/FaceIntegral.h"
31 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
32 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
33 #include "kernels/AderSolver/RiemannSolver.h"
34 #include "repositories/SolverRepository.h"
35 #include "Utils.h"
36 #include "Variants.h"
37 
38 #include <cstring>
39 
40 tarch::logging::Log variant3::_log("variants3::");
41 
43 
44 inline void initialTask(
45  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
47  const int cellId,
48  const int faceId,
49  tarch::timing::Measurement& measurement
50 ) {
51 
52  // Assume that the faces for a given cell are one after another
53  // in typical Peano order, e.g. left, lower, right, upper
54  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
55  &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
56  &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
57 #if DIMENSIONS == 3
58  &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
59 #endif
60  &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
61  &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
62 #if DIMENSIONS == 3
63  ,
64  &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
65 #endif
66  };
67 
68  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
69  &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
70  &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
71 #if DIMENSIONS == 3
72  &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
73 #endif
74  &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
75  &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
76 #if DIMENSIONS == 3
77  ,
78  &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
79 #endif
80  };
81 
82  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
83 
84  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
85  repositories::instanceOfAderSolver,
86  lQhbnd,
87  lFhbnd,
88  myCellData->QIn[cellId],
89  myCellData->cellCentre[cellId],
90  myCellData->cellSize[cellId],
91  myCellData->t,
92  myCellData->dt
93  );
94 
95  watchKernelCompute.stop();
96  measurement.setValue(watchKernelCompute.getCalendarTime());
97 }
98 
99 inline void firstTask(
101  const int faceId,
102  const int faceDirection,
103  tarch::timing::Measurement& measurement
104 ) {
105 
106  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
107 
108  tarch::la::Vector<DIMENSIONS, double> faceCenter = myFaceData->cellCentre[faceId];
109  faceCenter[faceDirection % DIMENSIONS] += 0.5 * myFaceData->cellSize[faceId][faceDirection % DIMENSIONS];
110 
111  // We are assuming that there are no boundaries okay
112  // In a real ExaHyPE application boundaries will exist
113  // but we can still assume that the corresponding faces
114  // will have received data
115  kernels::AderSolver::riemannSolver<SolverPrecision>(
116  repositories::instanceOfAderSolver,
117  &myFaceData->QOut[faceId][faceDirection][0],
118  &myFaceData->QOut[faceId][faceDirection][kernels::AderSolver::getBndFluxSize()],
119  &myFaceData->QIn[faceId][faceDirection][0],
120  &myFaceData->QIn[faceId][faceDirection][kernels::AderSolver::getBndFaceSize()],
121  myFaceData->t[faceId],
122  myFaceData->dt[faceId],
123  faceCenter,
124  myFaceData->cellSize[faceId],
125  faceDirection,
126  false,
127  0
128  );
129 
130  watchKernelCompute.stop();
131  measurement.setValue(watchKernelCompute.getCalendarTime());
132 }
133 
134 inline void secondTask(
135  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
137  const int cellId,
138  const int faceId,
139  tarch::timing::Measurement& measurement
140 ) {
141 
142  // Assume that the faces for a given cell are one after another
143  // in typical Peano order, e.g. left, lower, right, upper
144  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
145  &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
146  &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
147 #if DIMENSIONS == 3
148  &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
149 #endif
150  &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
151  &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
152 #if DIMENSIONS == 3
153  ,
154  &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
155 #endif
156  };
157 
158  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
159  &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
160  &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
161 #if DIMENSIONS == 3
162  &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
163 #endif
164  &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
165  &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
166 #if DIMENSIONS == 3
167  ,
168  &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
169 #endif
170  };
171 
172  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
173 
174  for (int d = 0; d < DIMENSIONS; d++) {
175  const int direction = d;
176 
177  const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
178  // Negative face
179  kernels::AderSolver::faceIntegral(
180  myCellData->QIn[cellId],
181  &myFaceData->QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
182  direction,
183  0,
184  inverseDxDirection,
185  myCellData->dt
186  );
187 
188  // Positive face
189  kernels::AderSolver::faceIntegral(
190  myCellData->QIn[cellId],
191  &myFaceData->QOut[faceId][d + DIMENSIONS][0],
192  direction,
193  1,
194  inverseDxDirection,
195  myCellData->dt
196  );
197 
198  } // for d
199 
200  myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
201  repositories::instanceOfAderSolver,
202  myCellData->QIn[cellId],
203  myCellData->cellCentre[cellId],
204  myCellData->cellSize[cellId],
205  myCellData->t,
206  myCellData->dt
207  );
208 
209  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
210  repositories::instanceOfAderSolver,
211  lQhbnd,
212  lFhbnd,
213  myCellData->QIn[cellId],
214  myCellData->cellCentre[cellId],
215  myCellData->cellSize[cellId],
216  myCellData->t,
217  myCellData->dt
218  );
219 
220  watchKernelCompute.stop();
221  measurement.setValue(watchKernelCompute.getCalendarTime());
222 }
223 
224 
226  int numberOfCells,
227  double timeStamp,
228  double timeStepSize,
229  const tarch::la::Vector<DIMENSIONS, double> cellCenter,
230  const tarch::la::Vector<DIMENSIONS, double> cellSize
231 ) {
232 
233  tarch::timing::Measurement timingComputeKernel;
234 
235  exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
237 
238  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
239  cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
241  tarch::MemoryLocation::Heap
242  );
243  cellData.t = timeStamp;
244  cellData.dt = timeStepSize;
245  cellData.QOut[cellIndex] = nullptr;
246  cellData.cellCentre[cellIndex] = cellCenter;
247  cellData.cellSize[cellIndex] = cellSize;
248  cellData.maxEigenvalue[cellIndex] = 0.0;
249 
250  initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
251  std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
252 
253  for (int i = 0; i < DIMENSIONS; i++) {
254  cellFaceData.QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
255  2 * kernels::AderSolver::getBndFaceSize(),
256  tarch::MemoryLocation::Heap
257  );
258  cellFaceData.QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259  2 * kernels::AderSolver::getBndFluxSize(),
260  tarch::MemoryLocation::Heap
261  );
262  }
263  // Stitch faces together by connecting the left and right faces of any given cell
264  for (int i = 0; i < DIMENSIONS; i++) {
265  cellFaceData.QIn[cellIndex][i + DIMENSIONS] = cellFaceData.QIn[cellIndex][i];
266  cellFaceData.QOut[cellIndex][i + DIMENSIONS] = cellFaceData.QOut[cellIndex][i];
267  }
268 
269  cellFaceData.t[cellIndex] = timeStamp;
270  cellFaceData.dt[cellIndex] = timeStepSize;
271  cellFaceData.cellCentre[cellIndex] = cellCenter;
272  cellFaceData.cellSize[cellIndex] = cellSize;
273 
274  std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
275  }
276 
277  int numberOfThreads = 1;
278 
279  #if defined(WITH_OPENMP)
280  for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
281  numberOfThreads = NumberOfLaunchingThreads[threadIndex];
282  #endif
283 
284  // Initial task
285  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
286  initialTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
287  }
288 
289  timingComputeKernel.erase();
290  tarch::timing::Measurement timingKernelLaunch;
291 
292  for (int sample = 0; sample <= NumberOfSamples; sample++) {
293 
294  tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
295 
296  #if defined(WITH_OPENMP)
297  #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])
298  #endif
299  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
300  // We're not actually simulating a real grid, instead
301  // we are making each cell it's own neighbour, as though
302  // it were a whole periodic domain
303  firstTask(&cellFaceData, cellIndex, 0, timingComputeKernel);
304  firstTask(&cellFaceData, cellIndex, 1, timingComputeKernel);
305  }
306  #if defined(WITH_OPENMP)
307  #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])
308  #endif
309  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
310  cellData.maxEigenvalue[cellIndex] = 0.0;
311  secondTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
312  }
313 
314  watchKernelLaunch.stop();
315  timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
316  }
317 
318 
319  reportRuntime("variant 3", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
320  // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
321  // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
322 
323  #if defined(WITH_OPENMP)
324  }//threadIndex
325  #endif
326 
327  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
328  tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
329  tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
330 
331  for (int i = 0; i < DIMENSIONS; i++) {
332  tarch::freeMemory(cellFaceData.QIn[cellIndex][i], tarch::MemoryLocation::Heap);
333  tarch::freeMemory(cellFaceData.QOut[cellIndex][i], tarch::MemoryLocation::Heap);
334  }
335  }
336 }
void firstTask(exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int faceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition: Variant3.cpp:99
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant3.cpp:134
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant3.cpp:44
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition: Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition: Utils.h:68
constexpr int NumberOfOutputEntriesPerCell
Definition: Utils.h:20
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition: Utils.h:30
tarch::logging::Log _log
This is variant 1 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition: Variant3.cpp:225
tarch::logging::Log _log
This is variant 3 of the fused kernels.
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
Definition: CellFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * cellSize
Definition: CellFaceData.h:27
tarch::la::Vector< DIMENSIONS, double > * cellCentre
Definition: CellFaceData.h:26
outType *(* QOut)[2 *DIMENSIONS]
Out values.
Definition: CellFaceData.h:60
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition: CellFaceData.h:25