Peano
Variant1.cpp
Go to the documentation of this file.
1 
28 #include "CellFaceData.h"
29 #include "exahype2/CellData.h"
30 #include "kernels/AderSolver/BufferSizes.h"
31 #include "kernels/AderSolver/FaceIntegral.h"
32 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
33 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
34 #include "kernels/AderSolver/RiemannSolver.h"
35 #include "repositories/SolverRepository.h"
36 #include "Utils.h"
37 #include "Variants.h"
38 
39 #include <cstring>
40 
41 tarch::logging::Log variant1::_log("variants1::");
42 
44 
45 inline void initialTask(
46  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
48  const int cellId,
49  const int faceId,
50  tarch::timing::Measurement& measurement
51 ) {
52 
53  // Assume that the faces for a given cell are one after another
54  // in typical Peano order, e.g. left, lower, right, upper
55  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
56  myFaceData->QIn[faceId][0],
57  myFaceData->QIn[faceId][1],
58 #if DIMENSIONS == 3
59  myFaceData->QIn[faceId][2],
60 #endif
61  myFaceData->QIn[faceId][DIMENSIONS + 0],
62  myFaceData->QIn[faceId][DIMENSIONS + 1]
63 #if DIMENSIONS == 3
64  ,
65  myFaceData->QIn[faceId][DIMENSIONS + 2]
66 #endif
67  };
68 
69  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
70  myFaceData->QOut[faceId][0],
71  myFaceData->QOut[faceId][1],
72 #if DIMENSIONS == 3
73  myFaceData->QOut[faceId][2],
74 #endif
75  myFaceData->QOut[faceId][DIMENSIONS + 0],
76  myFaceData->QOut[faceId][DIMENSIONS + 1]
77 #if DIMENSIONS == 3
78  ,
79  myFaceData->QOut[faceId][DIMENSIONS + 2]
80 #endif
81  };
82 
83  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
84 
85  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
86  repositories::instanceOfAderSolver,
87  lQhbnd,
88  lFhbnd,
89  myCellData->QIn[cellId],
90  myCellData->cellCentre[cellId],
91  myCellData->cellSize[cellId],
92  myCellData->t,
93  myCellData->dt
94  );
95 
96  watchKernelCompute.stop();
97  measurement.setValue(watchKernelCompute.getCalendarTime());
98 }
99 
100 inline void firstTask(
102  const int leftFaceId,
103  const int rightFaceId,
104  const int faceDirection,
105  tarch::timing::Measurement& measurement
106 ) {
107 
108  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
109 
110  // We are assuming that there are no boundaries okay
111  // In a real ExaHyPE application boundaries will exist
112  // but we can still assume that the corresponding faces
113  // will have received data
114  kernels::AderSolver::riemannSolver<SolverPrecision>(
115  repositories::instanceOfAderSolver,
116  myFaceData->QOut[leftFaceId][faceDirection + DIMENSIONS], // lFhbnd[d+DIMENSIONS],
117  myFaceData->QOut[rightFaceId][faceDirection],
118  myFaceData->QIn[leftFaceId][faceDirection + DIMENSIONS], // lFhbnd[d+DIMENSIONS],
119  myFaceData->QIn[rightFaceId][faceDirection],
120  0.5 * (myFaceData->t[leftFaceId] + myFaceData->t[rightFaceId]),
121  0.5 * (myFaceData->dt[leftFaceId] + myFaceData->dt[rightFaceId]),
122  0.5 * (myFaceData->cellCentre[leftFaceId] + myFaceData->cellCentre[rightFaceId]),
123  0.5 * (myFaceData->cellSize[leftFaceId] + myFaceData->cellSize[rightFaceId]),
124  faceDirection,
125  false,
126  0
127  );
128 
129  watchKernelCompute.stop();
130  measurement.setValue(watchKernelCompute.getCalendarTime());
131 }
132 
133 inline void secondTask(
134  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
136  const int cellId,
137  const int faceId,
138  tarch::timing::Measurement& measurement
139 ) {
140 
141  // Assume that the faces for a given cell are one after another
142  // in typical Peano order, e.g. left, lower, right, upper
143  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
144  myFaceData->QIn[faceId][0],
145  myFaceData->QIn[faceId][1],
146 #if DIMENSIONS == 3
147  myFaceData->QIn[faceId][2],
148 #endif
149  myFaceData->QIn[faceId][DIMENSIONS + 0],
150  myFaceData->QIn[faceId][DIMENSIONS + 1]
151 #if DIMENSIONS == 3
152  ,
153  myFaceData->QIn[faceId][DIMENSIONS + 2]
154 #endif
155  };
156 
157  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
158  myFaceData->QOut[faceId][0],
159  myFaceData->QOut[faceId][1],
160 #if DIMENSIONS == 3
161  myFaceData->QOut[faceId][2],
162 #endif
163  myFaceData->QOut[faceId][DIMENSIONS + 0],
164  myFaceData->QOut[faceId][DIMENSIONS + 1]
165 #if DIMENSIONS == 3
166  ,
167  myFaceData->QOut[faceId][DIMENSIONS + 2]
168 #endif
169  };
170 
171  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
172 
173  for (int d = 0; d < DIMENSIONS; d++) {
174  const int direction = d;
175 
176  const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
177  // Negative face
178  kernels::AderSolver::faceIntegral(
179  myCellData->QIn[cellId],
180  myFaceData->QOut[faceId][d],
181  direction,
182  0,
183  inverseDxDirection,
184  myCellData->dt
185  );
186 
187  // Positive face
188  kernels::AderSolver::faceIntegral(
189  myCellData->QIn[cellId],
190  myFaceData->QOut[faceId][d + DIMENSIONS],
191  direction,
192  1,
193  inverseDxDirection,
194  myCellData->dt
195  );
196 
197  } // for d
198 
199  myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
200  repositories::instanceOfAderSolver,
201  myCellData->QIn[cellId],
202  myCellData->cellCentre[cellId],
203  myCellData->cellSize[cellId],
204  myCellData->t,
205  myCellData->dt
206  );
207 
208  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
209  repositories::instanceOfAderSolver,
210  lQhbnd,
211  lFhbnd,
212  myCellData->QIn[cellId],
213  myCellData->cellCentre[cellId],
214  myCellData->cellSize[cellId],
215  myCellData->t,
216  myCellData->dt
217  );
218 
219  watchKernelCompute.stop();
220  measurement.setValue(watchKernelCompute.getCalendarTime());
221 }
222 
223 
225  int numberOfCells,
226  double timeStamp,
227  double timeStepSize,
228  const tarch::la::Vector<DIMENSIONS, double> cellCenter,
229  const tarch::la::Vector<DIMENSIONS, double> cellSize
230 ) {
231 
232  tarch::timing::Measurement timingComputeKernel;
233 
234  exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
236 
237  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
238  cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
240  tarch::MemoryLocation::Heap
241  );
242  cellData.t = timeStamp;
243  cellData.dt = timeStepSize;
244  cellData.QOut[cellIndex] = nullptr; // tarch::allocateMemory<SolverPrecision>(NumberOfOutputEntriesPerCell,
245  // tarch::MemoryLocation::Heap);
246  cellData.cellCentre[cellIndex] = cellCenter;
247  cellData.cellSize[cellIndex] = cellSize;
248  cellData.maxEigenvalue[cellIndex] = 0.0;
249 
250  initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
251  std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
252 
253  for (int i = 0; i < 2 * DIMENSIONS; i++) {
254  cellFaceData.QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
255  kernels::AderSolver::getBndFaceSize(),
256  tarch::MemoryLocation::Heap
257  );
258  cellFaceData.QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259  kernels::AderSolver::getBndFluxSize(),
260  tarch::MemoryLocation::Heap
261  );
262  }
263 
264  cellFaceData.t[cellIndex] = timeStamp;
265  cellFaceData.dt[cellIndex] = timeStepSize;
266  cellFaceData.cellCentre[cellIndex] = cellCenter;
267  cellFaceData.cellSize[cellIndex] = cellSize;
268 
269  std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
270  }
271 
272  int numberOfThreads = 1;
273 
274  #if defined(WITH_OPENMP)
275  for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
276  numberOfThreads = NumberOfLaunchingThreads[threadIndex];
277  #endif
278 
279  // Initial task
280  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
281  initialTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
282  }
283 
284  timingComputeKernel.erase();
285  tarch::timing::Measurement timingKernelLaunch;
286 
287  for (int sample = 0; sample <= NumberOfSamples; sample++) {
288 
289  tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
290 
291  #if defined(WITH_OPENMP)
292  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
293  #endif
294  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
295  // We're not actually simulating a real grid, instead
296  // we are making each cell it's own neighbour, as though
297  // it were a whole periodic domain
298  firstTask(&cellFaceData, cellIndex, cellIndex, 0, timingComputeKernel);
299  firstTask(&cellFaceData, cellIndex, cellIndex, 1, timingComputeKernel);
300  }
301  #if defined(WITH_OPENMP)
302  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
303  #endif
304  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
305  cellData.maxEigenvalue[cellIndex] = 0.0;
306  secondTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
307  }
308 
309  watchKernelLaunch.stop();
310  timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
311  }
312 
313  reportRuntime("variant 1", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
314  // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
315  // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
316 
317  #if defined(WITH_OPENMP)
318  }//threadIndex
319  #endif
320 
321  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
322  tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
323  tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
324 
325  for (int i = 0; i < 2 * DIMENSIONS; i++) {
326  tarch::freeMemory(cellFaceData.QIn[cellIndex][i], tarch::MemoryLocation::Heap);
327  tarch::freeMemory(cellFaceData.QOut[cellIndex][i], tarch::MemoryLocation::Heap);
328  }
329  }
330 }
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant1.cpp:133
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant1.cpp:45
void firstTask(exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int leftFaceId, const int rightFaceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition: Variant1.cpp:100
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition: Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition: Utils.h:68
constexpr int NumberOfOutputEntriesPerCell
Definition: Utils.h:20
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition: Utils.h:30
tarch::logging::Log _log
This is variant 1 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition: Variant1.cpp:224
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
Definition: CellFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * cellSize
Definition: CellFaceData.h:27
tarch::la::Vector< DIMENSIONS, double > * cellCentre
Definition: CellFaceData.h:26
outType *(* QOut)[2 *DIMENSIONS]
Out values.
Definition: CellFaceData.h:60
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition: CellFaceData.h:25