Peano
Loading...
Searching...
No Matches
Variant5.cpp
Go to the documentation of this file.
1
23
24#include "exahype2/CellData.h"
25#include "kernels/AderSolver/BufferSizes.h"
26#include "kernels/AderSolver/FaceIntegral.h"
27#include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
28#include "kernels/AderSolver/MaxScaledEigenvalue.h"
29#include "kernels/AderSolver/RiemannSolver.h"
30#include "LRFaceData.h"
31#include "repositories/SolverRepository.h"
32#include "Utils.h"
33#include "Variants.h"
34
35#include <cstring>
36
37tarch::logging::Log variant5::_log("variants5::");
38
40
41inline void initialTask(
42 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
44 const int cellId,
45 const int faceId,
46 tarch::timing::Measurement& measurement
47) {
48
49 // Assume that the faces for a given cell are one after another in FaceData
50 // in typical Peano order, e.g. left, lower, right, upper
51 // so faceId is the left, lower (front) face,
52 // faceId+1 is the one after this, and so on.
53 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
54 myFaceData->QIn[faceId + 0][1],
55 myFaceData->QIn[faceId + 1][1],
56#if DIMENSIONS == 3
57 myFaceData->QIn[faceId + 2][1],
58#endif
59 myFaceData->QIn[faceId + DIMENSIONS + 0][0],
60 myFaceData->QIn[faceId + DIMENSIONS + 1][0]
61#if DIMENSIONS == 3
62 ,
63 myFaceData->QIn[faceId + DIMENSIONS + 2][0]
64#endif
65 };
66
67 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
68 myFaceData->QOut[faceId + 0][1],
69 myFaceData->QOut[faceId + 1][1],
70#if DIMENSIONS == 3
71 myFaceData->QOut[faceId + 2][1],
72#endif
73 myFaceData->QOut[faceId + DIMENSIONS + 0][0],
74 myFaceData->QOut[faceId + DIMENSIONS + 1][0]
75#if DIMENSIONS == 3
76 ,
77 myFaceData->QOut[faceId + DIMENSIONS + 2][0]
78#endif
79 };
80
81 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
82
83 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
84 repositories::instanceOfAderSolver,
85 lQhbnd,
86 lFhbnd,
87 myCellData->QIn[cellId],
88 myCellData->cellCentre[cellId],
89 myCellData->cellSize[cellId],
90 myCellData->t,
91 myCellData->dt
92 );
93
94 watchKernelCompute.stop();
95 measurement.setValue(watchKernelCompute.getCalendarTime());
96}
97
98inline void firstTask(
100 const int leftFaceId,
101 const int rightFaceId,
102 const int faceDirection,
103 tarch::timing::Measurement& measurement
104) {
105
106 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
107
108 // We are assuming that there are no boundaries okay
109 // In a real ExaHyPE application boundaries will exist
110 // but we can still assume that the corresponding faces
111 // will have received data
112 kernels::AderSolver::riemannSolver<SolverPrecision>(
113 repositories::instanceOfAderSolver,
114 myFaceData->QOut[leftFaceId][1], // lFhbnd[d+DIMENSIONS],
115 myFaceData->QOut[rightFaceId][0],
116 myFaceData->QIn[leftFaceId][1], // lFhbnd[d+DIMENSIONS],
117 myFaceData->QIn[rightFaceId][0],
118 0.5 * (myFaceData->t[leftFaceId] + myFaceData->t[rightFaceId]),
119 0.5 * (myFaceData->dt[leftFaceId] + myFaceData->dt[rightFaceId]),
120 0.5 * (myFaceData->faceCentre[leftFaceId] + myFaceData->faceCentre[rightFaceId]),
121 0.5 * (myFaceData->faceSize[leftFaceId] + myFaceData->faceSize[rightFaceId]),
122 faceDirection,
123 false,
124 0
125 );
126
127 watchKernelCompute.stop();
128 measurement.setValue(watchKernelCompute.getCalendarTime());
129}
130
131
132inline void secondTask(
133 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
135 const int cellId,
136 const int faceId,
137 tarch::timing::Measurement& measurement
138) {
139
140 // Assume that the faces for a given cell are one after another
141 // in typical Peano order, e.g. left, lower, right, upper
142 // Assume that the faces for a given cell are one after another
143 // in typical Peano order, e.g. left, lower, right, upper
144 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
145 myFaceData->QIn[faceId + 0][1],
146 myFaceData->QIn[faceId + 1][1],
147#if DIMENSIONS == 3
148 myFaceData->QIn[faceId + 2][1],
149#endif
150 myFaceData->QIn[faceId + DIMENSIONS + 0][0],
151 myFaceData->QIn[faceId + DIMENSIONS + 1][0]
152#if DIMENSIONS == 3
153 ,
154 myFaceData->QIn[faceId + DIMENSIONS + 2][0]
155#endif
156 };
157
158 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
159 myFaceData->QOut[faceId + 0][1],
160 myFaceData->QOut[faceId + 1][1],
161#if DIMENSIONS == 3
162 myFaceData->QOut[faceId + 2][1],
163#endif
164 myFaceData->QOut[faceId + DIMENSIONS + 0][0],
165 myFaceData->QOut[faceId + DIMENSIONS + 1][0]
166#if DIMENSIONS == 3
167 ,
168 myFaceData->QOut[faceId + DIMENSIONS + 2][0]
169#endif
170 };
171
172 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
173
174 for (int d = 0; d < DIMENSIONS; d++) {
175 const int direction = d;
176
177 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
178 // Negative face
179 kernels::AderSolver::faceIntegral(
180 myCellData->QIn[cellId],
181 myFaceData->QOut[faceId + d][1],
182 direction,
183 0,
184 inverseDxDirection,
185 myCellData->dt
186 );
187
188 // Positive face
189 kernels::AderSolver::faceIntegral(
190 myCellData->QIn[cellId],
191 myFaceData->QOut[faceId + DIMENSIONS][0],
192 direction,
193 1,
194 inverseDxDirection,
195 myCellData->dt
196 );
197
198 } // for d
199
200 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
201 repositories::instanceOfAderSolver,
202 myCellData->QIn[cellId],
203 myCellData->cellCentre[cellId],
204 myCellData->cellSize[cellId],
205 myCellData->t,
206 myCellData->dt
207 );
208
209 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
210 repositories::instanceOfAderSolver,
211 lQhbnd,
212 lFhbnd,
213 myCellData->QIn[cellId],
214 myCellData->cellCentre[cellId],
215 myCellData->cellSize[cellId],
216 myCellData->t,
217 myCellData->dt
218 );
219
220 watchKernelCompute.stop();
221 measurement.setValue(watchKernelCompute.getCalendarTime());
222}
223
224
226 int numberOfCells,
227 double timeStamp,
228 double timeStepSize,
229 const tarch::la::Vector<DIMENSIONS, double> cellCenter,
230 const tarch::la::Vector<DIMENSIONS, double> cellSize
231) {
232
233 tarch::timing::Measurement timingComputeKernel;
234
235 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
236 exahype2::FaceData<SolverPrecision, SolverPrecision> faceData(2 * DIMENSIONS * numberOfCells);
237
238 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
239 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
241 tarch::MemoryLocation::Heap
242 );
243 cellData.t = timeStamp;
244 cellData.dt = timeStepSize;
245 cellData.QOut[cellIndex] = nullptr;
246 cellData.cellCentre[cellIndex] = cellCenter;
247 cellData.cellSize[cellIndex] = cellSize;
248 cellData.maxEigenvalue[cellIndex] = 0.0;
249
250 initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
251 }
252
253 // We need one instance of face per face of a cell, e.g.
254 // 2*DIMENSIONS*numberOfCells faces.
255 // Here we just number them in the Peano order for each cell,
256 // so face (2*DIMENSIONS*i+j) will be the jth face of the ith cell
257 for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
258 // Allocating data for the left and right sides of a given face
259 faceData.QIn[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
260 kernels::AderSolver::getBndFaceSize(),
261 tarch::MemoryLocation::Heap
262 );
263 faceData.QIn[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
264 kernels::AderSolver::getBndFaceSize(),
265 tarch::MemoryLocation::Heap
266 );
267 faceData.QOut[faceIndex][0] = tarch::allocateMemory<SolverPrecision>(
268 kernels::AderSolver::getBndFluxSize(),
269 tarch::MemoryLocation::Heap
270 );
271 faceData.QOut[faceIndex][1] = tarch::allocateMemory<SolverPrecision>(
272 kernels::AderSolver::getBndFluxSize(),
273 tarch::MemoryLocation::Heap
274 );
275
276 faceData.t[faceIndex] = timeStamp;
277 faceData.dt[faceIndex] = timeStepSize;
278 faceData.faceCentre[faceIndex] = cellCenter;
279 faceData.faceSize[faceIndex] = cellSize;
280 }
281
282 int numberOfThreads = 1;
283
284 #if defined(WITH_OPENMP)
285 for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
286 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
287 #endif
288
289 // Initial task
290 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
292 &cellData,
293 &faceData,
294 cellIndex,
295 cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
296 // arithmetic
298 );
299 }
300
301 timingComputeKernel.erase();
302 tarch::timing::Measurement timingKernelLaunch;
303
304 for (int sample = 0; sample <= NumberOfSamples; sample++) {
305
306 tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
307
308 #if defined(WITH_OPENMP)
309 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
310 #endif
311 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
312
313 for (int d = 0; d < DIMENSIONS; d++) {
314 // Stitch negative and positive face together and just
315 // perform the Riemann kernel on that.
316 firstTask(
317 &faceData,
318 2 * DIMENSIONS * cellIndex + d + DIMENSIONS,
319 2 * DIMENSIONS * cellIndex + d,
320 d,
322 );
323 }
324 }
325
326 #if defined(WITH_OPENMP)
327 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
328 #endif
329 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
330 cellData.maxEigenvalue[cellIndex] = 0.0;
332 &cellData,
333 &faceData,
334 cellIndex,
335 cellIndex * 2 * DIMENSIONS, // just passing the first of a cells faces, it can get the others by pointer
336 // arithmetic
338 );
339 }
340
341 watchKernelLaunch.stop();
342 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
343 }
344
345
346 reportRuntime("variant 5", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
347 // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
348 // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
349
350 #if defined(WITH_OPENMP)
351 }//threadIndex
352 #endif
353
354 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
355 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
356 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
357 }
358 for (int faceIndex = 0; faceIndex < 2 * DIMENSIONS * numberOfCells; faceIndex++) {
359 tarch::freeMemory(faceData.QIn[faceIndex][0], tarch::MemoryLocation::Heap);
360 tarch::freeMemory(faceData.QIn[faceIndex][1], tarch::MemoryLocation::Heap);
361 tarch::freeMemory(faceData.QOut[faceIndex][0], tarch::MemoryLocation::Heap);
362 tarch::freeMemory(faceData.QOut[faceIndex][1], tarch::MemoryLocation::Heap);
363 }
364}
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant5.cpp:132
void firstTask(exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int leftFaceId, const int rightFaceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition Variant5.cpp:98
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::FaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant5.cpp:41
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition Utils.h:68
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition Utils.h:30
tarch::logging::Log _log
This is variant 5 of the fused kernels.
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition Variant5.cpp:225
Represents the sides of one face, with 2 sides (left and right) to a face For ADER QIn will contain t...
Definition LRFaceData.h:20
tarch::la::Vector< DIMENSIONS, double > * faceSize
Definition LRFaceData.h:27
inType *(* QIn)[2]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.
Definition LRFaceData.h:25
tarch::la::Vector< DIMENSIONS, double > * faceCentre
Definition LRFaceData.h:26
outType *(* QOut)[2]
Out values.
Definition LRFaceData.h:60