Peano
Variant2.cpp
Go to the documentation of this file.
1 
29 #include "CellFaceData.h"
30 #include "exahype2/CellData.h"
31 #include "kernels/AderSolver/BufferSizes.h"
32 #include "kernels/AderSolver/FaceIntegral.h"
33 #include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
34 #include "kernels/AderSolver/MaxScaledEigenvalue.h"
35 #include "kernels/AderSolver/RiemannSolver.h"
36 #include "repositories/SolverRepository.h"
37 #include "Utils.h"
38 #include "Variants.h"
39 
40 #include <cstring>
41 
42 tarch::logging::Log variant2::_log("variants3::");
43 
45 
46 inline void initialTask(
47  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
49  const int cellId,
50  const int faceId,
51  tarch::timing::Measurement& measurement
52 ) {
53 
54  // Assume that the faces for a given cell are one after another
55  // in typical Peano order, e.g. left, lower, right, upper
56  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
57  &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
58  &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
59 #if DIMENSIONS == 3
60  &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
61 #endif
62  &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
63  &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
64 #if DIMENSIONS == 3
65  ,
66  &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
67 #endif
68  };
69 
70  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
71  &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
72  &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
73 #if DIMENSIONS == 3
74  &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
75 #endif
76  &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
77  &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
78 #if DIMENSIONS == 3
79  ,
80  &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
81 #endif
82  };
83 
84  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
85 
86  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
87  repositories::instanceOfAderSolver,
88  lQhbnd,
89  lFhbnd,
90  myCellData->QIn[cellId],
91  myCellData->cellCentre[cellId],
92  myCellData->cellSize[cellId],
93  myCellData->t,
94  myCellData->dt
95  );
96 
97  watchKernelCompute.stop();
98  measurement.setValue(watchKernelCompute.getCalendarTime());
99 }
100 
101 
103  exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
105  const int cellId,
106  const int faceId,
107  tarch::timing::Measurement& measurement
108 ) {
109 
110  // Assume that the faces for a given cell are one after another
111  // in typical Peano order, e.g. left, lower, right, upper
112  // Assume that the faces for a given cell are one after another
113  // in typical Peano order, e.g. left, lower, right, upper
114  SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
115  &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
116  &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
117 #if DIMENSIONS == 3
118  &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
119 #endif
120  &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
121  &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
122 #if DIMENSIONS == 3
123  ,
124  &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
125 #endif
126  };
127 
128  SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
129  &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
130  &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
131 #if DIMENSIONS == 3
132  &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
133 #endif
134  &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
135  &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
136 #if DIMENSIONS == 3
137  ,
138  &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
139 #endif
140  };
141 
142  tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
143 
144  for (int d = 0; d < DIMENSIONS; d++) {
145  const int direction = d;
146 
147  kernels::AderSolver::riemannSolver<SolverPrecision>(
148  repositories::instanceOfAderSolver,
149  &myFaceData->QOut[faceId][d][0], // lFhbnd[d+DIMENSIONS],
150  &myFaceData->QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
151  &myFaceData->QIn[faceId][d][0], // lFhbnd[d+DIMENSIONS],
152  &myFaceData->QIn[faceId][d][kernels::AderSolver::getBndFaceSize()],
153  0.5 * (myFaceData->t[faceId] + myFaceData->t[faceId]),
154  0.5 * (myFaceData->dt[faceId] + myFaceData->dt[faceId]),
155  0.5 * (myFaceData->cellCentre[faceId] + myFaceData->cellCentre[faceId]),
156  0.5 * (myFaceData->cellSize[faceId] + myFaceData->cellSize[faceId]),
157  direction,
158  false,
159  0
160  );
161 
162  const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
163  // Negative face
164  kernels::AderSolver::faceIntegral(
165  myCellData->QIn[cellId],
166  &myFaceData->QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
167  direction,
168  0,
169  inverseDxDirection,
170  myCellData->dt
171  );
172 
173  tarch::la::Vector<DIMENSIONS, double> faceCentre = myFaceData->cellCentre[faceId];
174  faceCentre[d] += 0.5 * myFaceData->cellSize[faceId][d];
175 
176  kernels::AderSolver::riemannSolver<SolverPrecision>(
177  repositories::instanceOfAderSolver,
178  &myFaceData->QOut[faceId][d + DIMENSIONS][0], // lFhbnd[d+DIMENSIONS],
179  &myFaceData->QOut[faceId][d + DIMENSIONS][kernels::AderSolver::getBndFluxSize()],
180  &myFaceData->QIn[faceId][d + DIMENSIONS][0], // lFhbnd[d+DIMENSIONS],
181  &myFaceData->QIn[faceId][d + DIMENSIONS][kernels::AderSolver::getBndFaceSize()],
182  0.5 * (myFaceData->t[faceId] + myFaceData->t[faceId]),
183  0.5 * (myFaceData->dt[faceId] + myFaceData->dt[faceId]),
184  faceCentre,
185  myFaceData->cellSize[faceId],
186  direction,
187  false,
188  0
189  );
190 
191  faceCentre[d] -= myFaceData->cellSize[faceId][d];
192  // Positive face
193  kernels::AderSolver::faceIntegral(
194  myCellData->QIn[cellId],
195  &myFaceData->QOut[faceId][d + DIMENSIONS][0],
196  direction,
197  1,
198  inverseDxDirection,
199  myCellData->dt
200  );
201 
202  } // for d
203 
204  myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
205  repositories::instanceOfAderSolver,
206  myCellData->QIn[cellId],
207  myCellData->cellCentre[cellId],
208  myCellData->cellSize[cellId],
209  myCellData->t,
210  myCellData->dt
211  );
212 
213  int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
214  repositories::instanceOfAderSolver,
215  lQhbnd,
216  lFhbnd,
217  myCellData->QIn[cellId],
218  myCellData->cellCentre[cellId],
219  myCellData->cellSize[cellId],
220  myCellData->t,
221  myCellData->dt
222  );
223 
224  watchKernelCompute.stop();
225  measurement.setValue(watchKernelCompute.getCalendarTime());
226 }
227 
228 
230  int numberOfCells,
231  double timeStamp,
232  double timeStepSize,
233  const tarch::la::Vector<DIMENSIONS, double> cellCenter,
234  const tarch::la::Vector<DIMENSIONS, double> cellSize
235 ) {
236 
237  tarch::timing::Measurement timingComputeKernel;
238 
239  exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
241 
242  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
243  cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
245  tarch::MemoryLocation::Heap
246  );
247  cellData.t = timeStamp;
248  cellData.dt = timeStepSize;
249  cellData.QOut[cellIndex] = nullptr;
250  cellData.cellCentre[cellIndex] = cellCenter;
251  cellData.cellSize[cellIndex] = cellSize;
252  cellData.maxEigenvalue[cellIndex] = 0.0;
253 
254  initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
255  std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
256 
257  for (int i = 0; i < 2 * DIMENSIONS; i++) {
258  cellFaceData.QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259  2 * kernels::AderSolver::getBndFaceSize(),
260  tarch::MemoryLocation::Heap
261  );
262  cellFaceData.QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
263  2 * kernels::AderSolver::getBndFluxSize(),
264  tarch::MemoryLocation::Heap
265  );
266  }
267 
268  cellFaceData.t[cellIndex] = timeStamp;
269  cellFaceData.dt[cellIndex] = timeStepSize;
270  cellFaceData.cellCentre[cellIndex] = cellCenter;
271  cellFaceData.cellSize[cellIndex] = cellSize;
272 
273  std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
274  }
275 
276  int numberOfThreads = 1;
277 
278  #if defined(WITH_OPENMP)
279  for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
280  numberOfThreads = NumberOfLaunchingThreads[threadIndex];
281  #endif
282 
283  // Initial task
284  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
285  initialTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
286  }
287 
288  timingComputeKernel.erase();
289  tarch::timing::Measurement timingKernelLaunch;
290 
291  for (int sample = 0; sample <= NumberOfSamples; sample++) {
292 
293  #if defined(WITH_OPENMP)
294  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
295  #endif
296  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
297 
298  // Copy data from one side of each face to the other
299  // Here we just copy data from one side of a given cell's face to the opposite face of the same cell
300  for (int d = 0; d < DIMENSIONS; d++) {
301  // Copy inner negative face into outer positive face
302  std::copy_n(
303  &cellFaceData.QIn[cellIndex][d][kernels::AderSolver::getBndFaceSize()],
304  kernels::AderSolver::getBndFaceSize(),
305  &cellFaceData.QIn[cellIndex][d + DIMENSIONS][kernels::AderSolver::getBndFaceSize()]
306  );
307  std::copy_n(
308  &cellFaceData.QOut[cellIndex][d][kernels::AderSolver::getBndFluxSize()],
309  kernels::AderSolver::getBndFluxSize(),
310  &cellFaceData.QOut[cellIndex][d + DIMENSIONS][kernels::AderSolver::getBndFluxSize()]
311  );
312 
313  // Copy inner positive face into outer negative face
314  std::copy_n(
315  &cellFaceData.QIn[cellIndex][d + DIMENSIONS][0],
316  kernels::AderSolver::getBndFaceSize(),
317  &cellFaceData.QIn[cellIndex][d][0]
318  );
319  std::copy_n(
320  &cellFaceData.QOut[cellIndex][d + DIMENSIONS][0],
321  kernels::AderSolver::getBndFluxSize(),
322  &cellFaceData.QOut[cellIndex][d][0]
323  );
324  }
325  }
326 
327  tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
328 
329  #if defined(WITH_OPENMP)
330  #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
331  #endif
332  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
333  cellData.maxEigenvalue[cellIndex] = 0.0;
334  runKernels(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
335  }
336 
337  watchKernelLaunch.stop();
338  timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
339 
340  }
341 
342  reportRuntime("variant 2", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
343  // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
344  // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
345 
346  #if defined(WITH_OPENMP)
347  }//threadIndex
348  #endif
349 
350  for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
351  tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
352  tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
353 
354  for (int i = 0; i < 2 * DIMENSIONS; i++) {
355  tarch::freeMemory(cellFaceData.QIn[cellIndex][i], tarch::MemoryLocation::Heap);
356  tarch::freeMemory(cellFaceData.QOut[cellIndex][i], tarch::MemoryLocation::Heap);
357  }
358  }
359 }
void runKernels(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant2.cpp:102
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition: Variant2.cpp:46
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition: Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition: Utils.h:68
constexpr int NumberOfOutputEntriesPerCell
Definition: Utils.h:20
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition: Utils.h:30
tarch::logging::Log _log
This is variant 1 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition: Variant2.cpp:229
tarch::logging::Log _log
This is variant 2 of the fused kernels.
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
Definition: CellFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * cellSize
Definition: CellFaceData.h:27
tarch::la::Vector< DIMENSIONS, double > * cellCentre
Definition: CellFaceData.h:26
outType *(* QOut)[2 *DIMENSIONS]
Out values.
Definition: CellFaceData.h:60
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition: CellFaceData.h:25