Peano
Loading...
Searching...
No Matches
Variant3.cpp
Go to the documentation of this file.
1
26
27#include "CellFaceData.h"
28#include "exahype2/CellData.h"
29#include "kernels/AderSolver/BufferSizes.h"
30#include "kernels/AderSolver/FaceIntegral.h"
31#include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
32#include "kernels/AderSolver/MaxScaledEigenvalue.h"
33#include "kernels/AderSolver/RiemannSolver.h"
34#include "repositories/SolverRepository.h"
35#include "Utils.h"
36#include "Variants.h"
37
38#include <cstring>
39
40tarch::logging::Log variant3::_log("variants3::");
41
43
44inline void initialTask(
45 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
47 const int cellId,
48 const int faceId,
49 tarch::timing::Measurement& measurement
50) {
51
52 // Assume that the faces for a given cell are one after another
53 // in typical Peano order, e.g. left, lower, right, upper
54 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
55 &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
56 &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
57#if DIMENSIONS == 3
58 &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
59#endif
60 &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
61 &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
62#if DIMENSIONS == 3
63 ,
64 &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
65#endif
66 };
67
68 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
69 &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
70 &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
71#if DIMENSIONS == 3
72 &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
73#endif
74 &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
75 &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
76#if DIMENSIONS == 3
77 ,
78 &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
79#endif
80 };
81
82 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
83
84 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
85 repositories::instanceOfAderSolver,
86 lQhbnd,
87 lFhbnd,
88 myCellData->QIn[cellId],
89 myCellData->cellCentre[cellId],
90 myCellData->cellSize[cellId],
91 myCellData->t,
92 myCellData->dt
93 );
94
95 watchKernelCompute.stop();
96 measurement.setValue(watchKernelCompute.getCalendarTime());
97}
98
99inline void firstTask(
101 const int faceId,
102 const int faceDirection,
103 tarch::timing::Measurement& measurement
104) {
105
106 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
107
108 tarch::la::Vector<DIMENSIONS, double> faceCenter = myFaceData->cellCentre[faceId];
109 faceCenter[faceDirection % DIMENSIONS] += 0.5 * myFaceData->cellSize[faceId][faceDirection % DIMENSIONS];
110
111 // We are assuming that there are no boundaries okay
112 // In a real ExaHyPE application boundaries will exist
113 // but we can still assume that the corresponding faces
114 // will have received data
115 kernels::AderSolver::riemannSolver<SolverPrecision>(
116 repositories::instanceOfAderSolver,
117 &myFaceData->QOut[faceId][faceDirection][0],
118 &myFaceData->QOut[faceId][faceDirection][kernels::AderSolver::getBndFluxSize()],
119 &myFaceData->QIn[faceId][faceDirection][0],
120 &myFaceData->QIn[faceId][faceDirection][kernels::AderSolver::getBndFaceSize()],
121 myFaceData->t[faceId],
122 myFaceData->dt[faceId],
123 faceCenter,
124 myFaceData->cellSize[faceId],
125 faceDirection,
126 false,
127 0
128 );
129
130 watchKernelCompute.stop();
131 measurement.setValue(watchKernelCompute.getCalendarTime());
132}
133
134inline void secondTask(
135 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
137 const int cellId,
138 const int faceId,
139 tarch::timing::Measurement& measurement
140) {
141
142 // Assume that the faces for a given cell are one after another
143 // in typical Peano order, e.g. left, lower, right, upper
144 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
145 &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
146 &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
147#if DIMENSIONS == 3
148 &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
149#endif
150 &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
151 &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
152#if DIMENSIONS == 3
153 ,
154 &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
155#endif
156 };
157
158 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
159 &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
160 &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
161#if DIMENSIONS == 3
162 &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
163#endif
164 &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
165 &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
166#if DIMENSIONS == 3
167 ,
168 &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
169#endif
170 };
171
172 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
173
174 for (int d = 0; d < DIMENSIONS; d++) {
175 const int direction = d;
176
177 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
178 // Negative face
179 kernels::AderSolver::faceIntegral(
180 myCellData->QIn[cellId],
181 &myFaceData->QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
182 direction,
183 0,
184 inverseDxDirection,
185 myCellData->dt
186 );
187
188 // Positive face
189 kernels::AderSolver::faceIntegral(
190 myCellData->QIn[cellId],
191 &myFaceData->QOut[faceId][d + DIMENSIONS][0],
192 direction,
193 1,
194 inverseDxDirection,
195 myCellData->dt
196 );
197
198 } // for d
199
200 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
201 repositories::instanceOfAderSolver,
202 myCellData->QIn[cellId],
203 myCellData->cellCentre[cellId],
204 myCellData->cellSize[cellId],
205 myCellData->t,
206 myCellData->dt
207 );
208
209 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
210 repositories::instanceOfAderSolver,
211 lQhbnd,
212 lFhbnd,
213 myCellData->QIn[cellId],
214 myCellData->cellCentre[cellId],
215 myCellData->cellSize[cellId],
216 myCellData->t,
217 myCellData->dt
218 );
219
220 watchKernelCompute.stop();
221 measurement.setValue(watchKernelCompute.getCalendarTime());
222}
223
224
226 int numberOfCells,
227 double timeStamp,
228 double timeStepSize,
229 const tarch::la::Vector<DIMENSIONS, double> cellCenter,
230 const tarch::la::Vector<DIMENSIONS, double> cellSize
231) {
232
233 tarch::timing::Measurement timingComputeKernel;
234
235 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
237
238 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
239 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
241 tarch::MemoryLocation::Heap
242 );
243 cellData.t = timeStamp;
244 cellData.dt = timeStepSize;
245 cellData.QOut[cellIndex] = nullptr;
246 cellData.cellCentre[cellIndex] = cellCenter;
247 cellData.cellSize[cellIndex] = cellSize;
248 cellData.maxEigenvalue[cellIndex] = 0.0;
249
250 initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
251 std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
252
253 for (int i = 0; i < DIMENSIONS; i++) {
254 cellFaceData.QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
255 2 * kernels::AderSolver::getBndFaceSize(),
256 tarch::MemoryLocation::Heap
257 );
258 cellFaceData.QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259 2 * kernels::AderSolver::getBndFluxSize(),
260 tarch::MemoryLocation::Heap
261 );
262 }
263 // Stitch faces together by connecting the left and right faces of any given cell
264 for (int i = 0; i < DIMENSIONS; i++) {
265 cellFaceData.QIn[cellIndex][i + DIMENSIONS] = cellFaceData.QIn[cellIndex][i];
266 cellFaceData.QOut[cellIndex][i + DIMENSIONS] = cellFaceData.QOut[cellIndex][i];
267 }
268
269 cellFaceData.t[cellIndex] = timeStamp;
270 cellFaceData.dt[cellIndex] = timeStepSize;
271 cellFaceData.cellCentre[cellIndex] = cellCenter;
272 cellFaceData.cellSize[cellIndex] = cellSize;
273
274 std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
275 }
276
277 int numberOfThreads = 1;
278
279 #if defined(WITH_OPENMP)
280 for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
281 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
282 #endif
283
284 // Initial task
285 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
286 initialTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
287 }
288
289 timingComputeKernel.erase();
290 tarch::timing::Measurement timingKernelLaunch;
291
292 for (int sample = 0; sample <= NumberOfSamples; sample++) {
293
294 tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
295
296 #if defined(WITH_OPENMP)
297 #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])
298 #endif
299 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
300 // We're not actually simulating a real grid, instead
301 // we are making each cell it's own neighbour, as though
302 // it were a whole periodic domain
303 firstTask(&cellFaceData, cellIndex, 0, timingComputeKernel);
304 firstTask(&cellFaceData, cellIndex, 1, timingComputeKernel);
305 }
306 #if defined(WITH_OPENMP)
307 #pragma omp parallel num_threads(NumberOfLaunchingThreads[threadIndex])
308 #endif
309 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
310 cellData.maxEigenvalue[cellIndex] = 0.0;
311 secondTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
312 }
313
314 watchKernelLaunch.stop();
315 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
316 }
317
318
319 reportRuntime("variant 3", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
320 // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
321 // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
322
323 #if defined(WITH_OPENMP)
324 }//threadIndex
325 #endif
326
327 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
328 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
329 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
330
331 for (int i = 0; i < DIMENSIONS; i++) {
332 tarch::freeMemory(cellFaceData.QIn[cellIndex][i], tarch::MemoryLocation::Heap);
333 tarch::freeMemory(cellFaceData.QOut[cellIndex][i], tarch::MemoryLocation::Heap);
334 }
335 }
336}
void firstTask(exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int faceId, const int faceDirection, tarch::timing::Measurement &measurement)
Definition Variant3.cpp:99
void secondTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant3.cpp:134
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant3.cpp:44
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition Utils.h:68
constexpr int NumberOfOutputEntriesPerCell
Definition Utils.h:20
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition Utils.h:30
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition Variant3.cpp:225
tarch::logging::Log _log
This is variant 3 of the fused kernels.
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
tarch::la::Vector< DIMENSIONS, double > * cellSize
tarch::la::Vector< DIMENSIONS, double > * cellCentre
outType *(* QOut)[2 *DIMENSIONS]
Out values.
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.