Peano
Loading...
Searching...
No Matches
Variant6.cpp
Go to the documentation of this file.
1
30
31#include "exahype2/CellData.h"
32#include "kernels/AderSolver/BufferSizes.h"
33#include "kernels/AderSolver/FaceIntegral.h"
34#include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
35#include "kernels/AderSolver/MaxScaledEigenvalue.h"
36#include "kernels/AderSolver/RiemannSolver.h"
37#include "LRFaceData.h"
38#include "repositories/SolverRepository.h"
39#include "Utils.h"
40#include "Variants.h"
41
42#include <cstring>
43
44tarch::logging::Log variant6::_log("variants6::");
45
47
48inline void initialTask(
49 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
51 const int cellId,
52 const int faceId,
53 tarch::timing::Measurement& measurement
54) {
55
56 // Assume that the faces for a given cell are one after another in FaceData
57 // in typical Peano order, e.g. left, lower, right, upper
58 // so faceId is the left, lower (front) face,
59 // faceId+1 is the one after this, and so on.
60 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
61 myFaceData->QIn[faceId + 0][1],
62 myFaceData->QIn[faceId + 1][1],
63#if DIMENSIONS == 3
64 myFaceData->QIn[faceId + 2][1],
65#endif
66 myFaceData->QIn[faceId + DIMENSIONS + 0][0],
67 myFaceData->QIn[faceId + DIMENSIONS + 1][0]
68#if DIMENSIONS == 3
69 ,
70 myFaceData->QIn[faceId + DIMENSIONS + 2][0]
71#endif
72 };
73
74 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
75 myFaceData->QOut[faceId + 0][1],
76 myFaceData->QOut[faceId + 1][1],
77#if DIMENSIONS == 3
78 myFaceData->QOut[faceId + 2][1],
79#endif
80 myFaceData->QOut[faceId + DIMENSIONS + 0][0],
81 myFaceData->QOut[faceId + DIMENSIONS + 1][0]
82#if DIMENSIONS == 3
83 ,
84 myFaceData->QOut[faceId + DIMENSIONS + 2][0]
85#endif
86 };
87
88 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
89
90 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
91 repositories::instanceOfAderSolver,
92 lQhbnd,
93 lFhbnd,
94 myCellData->QIn[cellId],
95 myCellData->cellCentre[cellId],
96 myCellData->cellSize[cellId],
97 myCellData->t,
98 myCellData->dt
99 );
100
101 watchKernelCompute.stop();
102 measurement.setValue(watchKernelCompute.getCalendarTime());
103}
104
105inline void firstTask(
107 const int faceId,
108 const int faceDirection,
109 tarch::timing::Measurement& measurement
110) {
111
112 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
113
114 // We are assuming that there are no boundaries okay
115 // In a real ExaHyPE application boundaries will exist
116 // but we can still assume that the corresponding faces
117 // will have received data
118 kernels::AderSolver::riemannSolver<SolverPrecision>(
119 repositories::instanceOfAderSolver,
120 myFaceData->QOut[faceId][1],
121 myFaceData->QOut[faceId][0],
122 myFaceData->QIn[faceId][1],
123 myFaceData->QIn[faceId][0],
124 myFaceData->t[faceId],
125 myFaceData->dt[faceId],
126 myFaceData->faceCentre[faceId],
127 myFaceData->faceSize[faceId],
128 faceDirection,
129 false,
130 0
131 );
132
133 watchKernelCompute.stop();
134 measurement.setValue(watchKernelCompute.getCalendarTime());
135}
136
137
138inline void secondTask(
139 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
141 const int cellId,
142 const int faceId,
143 tarch::timing::Measurement& measurement
144) {
145
146 // Assume that the faces for a given cell are one after another
147 // in typical Peano order, e.g. left, lower, right, upper
148 // Assume that the faces for a given cell are one after another
149 // in typical Peano order, e.g. left, lower, right, upper
150 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
151 myFaceData->QIn[faceId + 0][1],
152 myFaceData->QIn[faceId + 1][1],
153#if DIMENSIONS == 3
154 myFaceData->QIn[faceId + 2][1],
155#endif
156 myFaceData->QIn[faceId + DIMENSIONS + 0][0],
157 myFaceData->QIn[faceId + DIMENSIONS + 1][0]
158#if DIMENSIONS == 3
159 ,
160 myFaceData->QIn[faceId + DIMENSIONS + 2][0]
161#endif
162 };
163
164 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
165 myFaceData->QOut[faceId + 0][1],
166 myFaceData->QOut[faceId + 1][1],
167#if DIMENSIONS == 3
168 myFaceData->QOut[faceId + 2][1],
169#endif
170 myFaceData->QOut[faceId + DIMENSIONS + 0][0],
171 myFaceData->QOut[faceId + DIMENSIONS + 1][0]
172#if DIMENSIONS == 3
173 ,
174 myFaceData->QOut[faceId + DIMENSIONS + 2][0]
175#endif
176 };
177
178 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
179
180 for (int d = 0; d < DIMENSIONS; d++) {
181 const int direction = d;
182
183 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
184 // Negative face
185 kernels::AderSolver::faceIntegral(
186 myCellData->QIn[cellId],
187 myFaceData->QOut[faceId + d][1],
188 direction,
189 0,
190 inverseDxDirection,
191 myCellData->dt
192 );
193
194 // Positive face
195 kernels::AderSolver::faceIntegral(
196 myCellData->QIn[cellId],
197 myFaceData->QOut[faceId + DIMENSIONS][0],
198 direction,
199 1,
200 inverseDxDirection,
201 myCellData->dt
202 );
203
204 } // for d
205
206 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
207 repositories::instanceOfAderSolver,
208 myCellData->QIn[cellId],
209 myCellData->cellCentre[cellId],
210 myCellData->cellSize[cellId],
211 myCellData->t,
212 myCellData->dt
213 );
214
215 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
216 repositories::instanceOfAderSolver,
217 lQhbnd,
218 lFhbnd,
219 myCellData->QIn[cellId],
220 myCellData->cellCentre[cellId],
221 myCellData->cellSize[cellId],
222 myCellData->t,
223 myCellData->dt
224 );
225
226 watchKernelCompute.stop();
227 measurement.setValue(watchKernelCompute.getCalendarTime());
228}
229
230
232 int numberOfCells,
233 double timeStamp,
234 double timeStepSize,
235 const tarch::la::Vector<DIMENSIONS, double> cellCenter,
236 const tarch::la::Vector<DIMENSIONS, double> cellSize
237) {
238
239 tarch::timing::Measurement timingComputeKernel;
240
241 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
242 exahype2::FaceData<SolverPrecision, SolverPrecision> faceData(2 * DIMENSIONS * numberOfCells);
243
244 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
245 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
247 tarch::MemoryLocation::Heap
248 );
249 cellData.t = timeStamp;
250 cellData.dt = timeStepSize;
251 cellData.QOut[cellIndex] = nullptr;
252 cellData.cellCentre[cellIndex] = cellCenter;
253 cellData.cellSize[cellIndex] = cellSize;
254 cellData.maxEigenvalue[cellIndex] = 0.0;
255
256 initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
257 }
258
259 // We need one instance of face per face of a cell, e.g.
260 // 2*DIMENSIONS*numberOfCells faces.
261 // Here we just number them in the Peano order for each cell,
262 // so face (2*DIMENSIONS*i+j) will be the jth face of the ith cell
263
276 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
277 const int faceIndex = 2 * DIMENSIONS * cellIndex;
278 for (int d = 0; d < DIMENSIONS; d++) {
279 // Allocating data for the right side of the left face and the left side of the right face
280 faceData.QIn[faceIndex + d][1] = tarch::allocateMemory<SolverPrecision>(
281 kernels::AderSolver::getBndFaceSize(),
282 tarch::MemoryLocation::Heap
283 );
284 faceData.QIn[faceIndex + d + DIMENSIONS][0] = tarch::allocateMemory<SolverPrecision>(
285 kernels::AderSolver::getBndFaceSize(),
286 tarch::MemoryLocation::Heap
287 );
288
289 faceData.QOut[faceIndex + d][1] = tarch::allocateMemory<SolverPrecision>(
290 kernels::AderSolver::getBndFluxSize(),
291 tarch::MemoryLocation::Heap
292 );
293 faceData.QOut[faceIndex + d + DIMENSIONS][0] = tarch::allocateMemory<SolverPrecision>(
294 kernels::AderSolver::getBndFluxSize(),
295 tarch::MemoryLocation::Heap
296 );
297
298 // Stitching these faces together
299 faceData.QIn[faceIndex + d][0] = faceData.QIn[faceIndex + d + DIMENSIONS][0];
300 faceData.QIn[faceIndex + d + DIMENSIONS][1] = faceData.QIn[faceIndex + d][1];
301
302 faceData.QOut[faceIndex + d][0] = faceData.QOut[faceIndex + d + DIMENSIONS][0];
303 faceData.QOut[faceIndex + d + DIMENSIONS][1] = faceData.QOut[faceIndex + d][1];
304
305 faceData.t[faceIndex + d] = timeStamp;
306 faceData.dt[faceIndex + d] = timeStepSize;
307 faceData.faceCentre[faceIndex + d] = cellCenter;
308 faceData.faceSize[faceIndex + d] = cellSize;
309
310 faceData.t[faceIndex + d + DIMENSIONS] = timeStamp;
311 faceData.dt[faceIndex + d + DIMENSIONS] = timeStepSize;
312 faceData.faceCentre[faceIndex + d + DIMENSIONS] = cellCenter;
313 faceData.faceSize[faceIndex + d + DIMENSIONS] = cellSize;
314 }
315 }
316
317 int numberOfThreads = 1;
318
319 #if defined(WITH_OPENMP)
320 for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
321 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
322 #endif
323
324 // Initial task
325 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
327 &cellData,
328 &faceData,
329 cellIndex,
330 cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
331 // arithmetic
333 );
334 }
335
336 timingComputeKernel.erase();
337 tarch::timing::Measurement timingKernelLaunch;
338
339 for (int sample = 0; sample <= NumberOfSamples; sample++) {
340
341 tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
342
343 #if defined(WITH_OPENMP)
344 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
345 #endif
346 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
347
348 for (int d = 0; d < DIMENSIONS; d++) {
349 // We have stitched the negative and positive faces together and therefore
350 // only need to perform the Riemann kernel on one of them, as the other points to the same data.
351 firstTask(&faceData, 2 * DIMENSIONS * cellIndex + d, d, timingComputeKernel);
352 }
353 }
354
355 #if defined(WITH_OPENMP)
356 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
357 #endif
358 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
359 cellData.maxEigenvalue[cellIndex] = 0.0;
361 &cellData,
362 &faceData,
363 cellIndex,
364 cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
365 // arithmetic
367 );
368 }
369
370 watchKernelLaunch.stop();
371 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
372 }
373
374
375 reportRuntime("variant 6", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
376 // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
377 // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
378
379 #if defined(WITH_OPENMP)
380 }//threadIndex
381 #endif
382
383 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
384 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
385 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
386
387 const int faceIndex = 2 * DIMENSIONS * cellIndex;
388 for (int d = 0; d < DIMENSIONS; d++) {
389 // Freeing data for the right side of the left face and the left side of the right face
390 tarch::freeMemory(faceData.QIn[faceIndex + d][1], tarch::MemoryLocation::Heap);
391 tarch::freeMemory(faceData.QIn[faceIndex + d + DIMENSIONS][0], tarch::MemoryLocation::Heap);
392 tarch::freeMemory(faceData.QOut[faceIndex + d][1], tarch::MemoryLocation::Heap);
393 tarch::freeMemory(faceData.QOut[faceIndex + d + DIMENSIONS][0], tarch::MemoryLocation::Heap);
394 }
395 }
396}
void firstTask(exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int faceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition Variant6.cpp:105
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant6.cpp:138
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant6.cpp:48
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition Utils.h:68
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition Utils.h:30
tarch::logging::Log _log
This is variant 6 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition Variant6.cpp:231
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
Definition LRFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * faceSize
Definition LRFaceData.h:27
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition LRFaceData.h:25
tarch::la::Vector< DIMENSIONS, double > * faceCentre
Definition LRFaceData.h:26
outType *(* QOut)[2]
Out values.
Definition LRFaceData.h:60