Peano
Loading...
Searching...
No Matches
Variant2.cpp
Go to the documentation of this file.
1
28
29#include "CellFaceData.h"
30#include "exahype2/CellData.h"
31#include "kernels/AderSolver/BufferSizes.h"
32#include "kernels/AderSolver/FaceIntegral.h"
33#include "kernels/AderSolver/FusedSpaceTimePredictorVolumeIntegral.h"
34#include "kernels/AderSolver/MaxScaledEigenvalue.h"
35#include "kernels/AderSolver/RiemannSolver.h"
36#include "repositories/SolverRepository.h"
37#include "Utils.h"
38#include "Variants.h"
39
40#include <cstring>
41
42tarch::logging::Log variant2::_log("variants3::");
43
45
46inline void initialTask(
47 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
49 const int cellId,
50 const int faceId,
51 tarch::timing::Measurement& measurement
52) {
53
54 // Assume that the faces for a given cell are one after another
55 // in typical Peano order, e.g. left, lower, right, upper
56 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
57 &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
58 &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
59#if DIMENSIONS == 3
60 &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
61#endif
62 &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
63 &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
64#if DIMENSIONS == 3
65 ,
66 &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
67#endif
68 };
69
70 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
71 &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
72 &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
73#if DIMENSIONS == 3
74 &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
75#endif
76 &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
77 &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
78#if DIMENSIONS == 3
79 ,
80 &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
81#endif
82 };
83
84 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
85
86 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
87 repositories::instanceOfAderSolver,
88 lQhbnd,
89 lFhbnd,
90 myCellData->QIn[cellId],
91 myCellData->cellCentre[cellId],
92 myCellData->cellSize[cellId],
93 myCellData->t,
94 myCellData->dt
95 );
96
97 watchKernelCompute.stop();
98 measurement.setValue(watchKernelCompute.getCalendarTime());
99}
100
101
103 exahype2::CellData<SolverPrecision, SolverPrecision>* myCellData,
105 const int cellId,
106 const int faceId,
107 tarch::timing::Measurement& measurement
108) {
109
110 // Assume that the faces for a given cell are one after another
111 // in typical Peano order, e.g. left, lower, right, upper
112 // Assume that the faces for a given cell are one after another
113 // in typical Peano order, e.g. left, lower, right, upper
114 SolverPrecision* lQhbnd[2 * DIMENSIONS] = {
115 &myFaceData->QIn[faceId][0][kernels::AderSolver::getBndFaceSize()],
116 &myFaceData->QIn[faceId][1][kernels::AderSolver::getBndFaceSize()],
117#if DIMENSIONS == 3
118 &myFaceData->QIn[faceId][2][kernels::AderSolver::getBndFaceSize()],
119#endif
120 &myFaceData->QIn[faceId][DIMENSIONS + 0][0],
121 &myFaceData->QIn[faceId][DIMENSIONS + 1][0]
122#if DIMENSIONS == 3
123 ,
124 &myFaceData->QIn[faceId][DIMENSIONS + 2][0]
125#endif
126 };
127
128 SolverPrecision* lFhbnd[2 * DIMENSIONS] = {
129 &myFaceData->QOut[faceId][0][kernels::AderSolver::getBndFluxSize()],
130 &myFaceData->QOut[faceId][1][kernels::AderSolver::getBndFluxSize()],
131#if DIMENSIONS == 3
132 &myFaceData->QOut[faceId][2][kernels::AderSolver::getBndFluxSize()],
133#endif
134 &myFaceData->QOut[faceId][DIMENSIONS + 0][0],
135 &myFaceData->QOut[faceId][DIMENSIONS + 1][0]
136#if DIMENSIONS == 3
137 ,
138 &myFaceData->QOut[faceId][DIMENSIONS + 2][0]
139#endif
140 };
141
142 tarch::timing::Watch watchKernelCompute("::runBenchmarks", "assessKernel(...)", false);
143
144 for (int d = 0; d < DIMENSIONS; d++) {
145 const int direction = d;
146
147 kernels::AderSolver::riemannSolver<SolverPrecision>(
148 repositories::instanceOfAderSolver,
149 &myFaceData->QOut[faceId][d][0], // lFhbnd[d+DIMENSIONS],
150 &myFaceData->QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
151 &myFaceData->QIn[faceId][d][0], // lFhbnd[d+DIMENSIONS],
152 &myFaceData->QIn[faceId][d][kernels::AderSolver::getBndFaceSize()],
153 0.5 * (myFaceData->t[faceId] + myFaceData->t[faceId]),
154 0.5 * (myFaceData->dt[faceId] + myFaceData->dt[faceId]),
155 0.5 * (myFaceData->cellCentre[faceId] + myFaceData->cellCentre[faceId]),
156 0.5 * (myFaceData->cellSize[faceId] + myFaceData->cellSize[faceId]),
157 direction,
158 false,
159 0
160 );
161
162 const double inverseDxDirection = 1.0 / myCellData->cellSize[cellId][d];
163 // Negative face
164 kernels::AderSolver::faceIntegral(
165 myCellData->QIn[cellId],
166 &myFaceData->QOut[faceId][d][kernels::AderSolver::getBndFluxSize()],
167 direction,
168 0,
169 inverseDxDirection,
170 myCellData->dt
171 );
172
173 tarch::la::Vector<DIMENSIONS, double> faceCentre = myFaceData->cellCentre[faceId];
174 faceCentre[d] += 0.5 * myFaceData->cellSize[faceId][d];
175
176 kernels::AderSolver::riemannSolver<SolverPrecision>(
177 repositories::instanceOfAderSolver,
178 &myFaceData->QOut[faceId][d + DIMENSIONS][0], // lFhbnd[d+DIMENSIONS],
179 &myFaceData->QOut[faceId][d + DIMENSIONS][kernels::AderSolver::getBndFluxSize()],
180 &myFaceData->QIn[faceId][d + DIMENSIONS][0], // lFhbnd[d+DIMENSIONS],
181 &myFaceData->QIn[faceId][d + DIMENSIONS][kernels::AderSolver::getBndFaceSize()],
182 0.5 * (myFaceData->t[faceId] + myFaceData->t[faceId]),
183 0.5 * (myFaceData->dt[faceId] + myFaceData->dt[faceId]),
184 faceCentre,
185 myFaceData->cellSize[faceId],
186 direction,
187 false,
188 0
189 );
190
191 faceCentre[d] -= myFaceData->cellSize[faceId][d];
192 // Positive face
193 kernels::AderSolver::faceIntegral(
194 myCellData->QIn[cellId],
195 &myFaceData->QOut[faceId][d + DIMENSIONS][0],
196 direction,
197 1,
198 inverseDxDirection,
199 myCellData->dt
200 );
201
202 } // for d
203
204 myCellData->maxEigenvalue[cellId] = kernels::AderSolver::maxScaledEigenvalue(
205 repositories::instanceOfAderSolver,
206 myCellData->QIn[cellId],
207 myCellData->cellCentre[cellId],
208 myCellData->cellSize[cellId],
209 myCellData->t,
210 myCellData->dt
211 );
212
213 int numberOfIterations = kernels::AderSolver::fusedSpaceTimePredictorVolumeIntegral<SolverPrecision, SolverPrecision, SolverPrecision>(
214 repositories::instanceOfAderSolver,
215 lQhbnd,
216 lFhbnd,
217 myCellData->QIn[cellId],
218 myCellData->cellCentre[cellId],
219 myCellData->cellSize[cellId],
220 myCellData->t,
221 myCellData->dt
222 );
223
224 watchKernelCompute.stop();
225 measurement.setValue(watchKernelCompute.getCalendarTime());
226}
227
228
230 int numberOfCells,
231 double timeStamp,
232 double timeStepSize,
233 const tarch::la::Vector<DIMENSIONS, double> cellCenter,
234 const tarch::la::Vector<DIMENSIONS, double> cellSize
235) {
236
237 tarch::timing::Measurement timingComputeKernel;
238
239 exahype2::CellData<SolverPrecision, SolverPrecision> cellData(numberOfCells);
241
242 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
243 cellData.QIn[cellIndex] = tarch::allocateMemory<SolverPrecision>(
245 tarch::MemoryLocation::Heap
246 );
247 cellData.t = timeStamp;
248 cellData.dt = timeStepSize;
249 cellData.QOut[cellIndex] = nullptr;
250 cellData.cellCentre[cellIndex] = cellCenter;
251 cellData.cellSize[cellIndex] = cellSize;
252 cellData.maxEigenvalue[cellIndex] = 0.0;
253
254 initInputData(cellData.QIn[cellIndex], cellCenter, cellSize);
255 std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
256
257 for (int i = 0; i < 2 * DIMENSIONS; i++) {
258 cellFaceData.QIn[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
259 2 * kernels::AderSolver::getBndFaceSize(),
260 tarch::MemoryLocation::Heap
261 );
262 cellFaceData.QOut[cellIndex][i] = tarch::allocateMemory<SolverPrecision>(
263 2 * kernels::AderSolver::getBndFluxSize(),
264 tarch::MemoryLocation::Heap
265 );
266 }
267
268 cellFaceData.t[cellIndex] = timeStamp;
269 cellFaceData.dt[cellIndex] = timeStepSize;
270 cellFaceData.cellCentre[cellIndex] = cellCenter;
271 cellFaceData.cellSize[cellIndex] = cellSize;
272
273 std::memset(cellData.QOut[cellIndex], 0.0, NumberOfOutputEntriesPerCell * sizeof(SolverPrecision));
274 }
275
276 int numberOfThreads = 1;
277
278 #if defined(WITH_OPENMP)
279 for (int threadIndex = 0; threadIndex < NumberOfLaunchingThreads.size(); threadIndex++) {
280 numberOfThreads = NumberOfLaunchingThreads[threadIndex];
281 #endif
282
283 // Initial task
284 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
285 initialTask(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
286 }
287
288 timingComputeKernel.erase();
289 tarch::timing::Measurement timingKernelLaunch;
290
291 for (int sample = 0; sample <= NumberOfSamples; sample++) {
292
293 #if defined(WITH_OPENMP)
294 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
295 #endif
296 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
297
298 // Copy data from one side of each face to the other
299 // Here we just copy data from one side of a given cell's face to the opposite face of the same cell
300 for (int d = 0; d < DIMENSIONS; d++) {
301 // Copy inner negative face into outer positive face
302 std::copy_n(
303 &cellFaceData.QIn[cellIndex][d][kernels::AderSolver::getBndFaceSize()],
304 kernels::AderSolver::getBndFaceSize(),
305 &cellFaceData.QIn[cellIndex][d + DIMENSIONS][kernels::AderSolver::getBndFaceSize()]
306 );
307 std::copy_n(
308 &cellFaceData.QOut[cellIndex][d][kernels::AderSolver::getBndFluxSize()],
309 kernels::AderSolver::getBndFluxSize(),
310 &cellFaceData.QOut[cellIndex][d + DIMENSIONS][kernels::AderSolver::getBndFluxSize()]
311 );
312
313 // Copy inner positive face into outer negative face
314 std::copy_n(
315 &cellFaceData.QIn[cellIndex][d + DIMENSIONS][0],
316 kernels::AderSolver::getBndFaceSize(),
317 &cellFaceData.QIn[cellIndex][d][0]
318 );
319 std::copy_n(
320 &cellFaceData.QOut[cellIndex][d + DIMENSIONS][0],
321 kernels::AderSolver::getBndFluxSize(),
322 &cellFaceData.QOut[cellIndex][d][0]
323 );
324 }
325 }
326
327 tarch::timing::Watch watchKernelLaunch("::runBenchmarks", "assessKernel(...)", false);
328
329 #if defined(WITH_OPENMP)
330 #pragma omp parallel for num_threads(NumberOfLaunchingThreads[threadIndex])
331 #endif
332 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
333 cellData.maxEigenvalue[cellIndex] = 0.0;
334 runKernels(&cellData, &cellFaceData, cellIndex, cellIndex, timingComputeKernel);
335 }
336
337 watchKernelLaunch.stop();
338 timingKernelLaunch.setValue(watchKernelLaunch.getCalendarTime());
339
340 }
341
342 reportRuntime("variant 2", timingComputeKernel, timingKernelLaunch, numberOfCells, numberOfThreads, _log);
343 // allocateAndStoreOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
344 // validateOutcome(cellData.QOut, cellData.maxEigenvalue, numberOfCells);
345
346 #if defined(WITH_OPENMP)
347 }//threadIndex
348 #endif
349
350 for (int cellIndex = 0; cellIndex < numberOfCells; cellIndex++) {
351 tarch::freeMemory(cellData.QIn[cellIndex], tarch::MemoryLocation::Heap);
352 tarch::freeMemory(cellData.QOut[cellIndex], tarch::MemoryLocation::Heap);
353
354 for (int i = 0; i < 2 * DIMENSIONS; i++) {
355 tarch::freeMemory(cellFaceData.QIn[cellIndex][i], tarch::MemoryLocation::Heap);
356 tarch::freeMemory(cellFaceData.QOut[cellIndex][i], tarch::MemoryLocation::Heap);
357 }
358 }
359}
void runKernels(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant2.cpp:102
void initialTask(exahype2::CellData< SolverPrecision, SolverPrecision > *myCellData, exahype2::CellFaceData< SolverPrecision, SolverPrecision > *myFaceData, const int cellId, const int faceId, tarch::timing::Measurement &measurement)
Definition Variant2.cpp:46
constexpr double timeStamp
const tarch::la::Vector< DIMENSIONS, double > cellCenter
const tarch::la::Vector< DIMENSIONS, double > cellSize
constexpr double timeStepSize
tarch::timing::Measurement timingComputeKernel
constexpr int NumberOfInputEntriesPerCell
Definition Utils.h:14
void reportRuntime(const std::string &kernelIdentificator, const tarch::timing::Measurement &timingComputeKernel, const tarch::timing::Measurement &timingKernelLaunch, int numberOfCells, int numberOfThreads, tarch::logging::Log _log)
Reports the runtime and throughput of the benchmarks.
Definition Utils.h:68
constexpr int NumberOfOutputEntriesPerCell
Definition Utils.h:20
void initInputData(SolverPrecision *Q, const tarch::la::Vector< DIMENSIONS, double > CellCenter, const tarch::la::Vector< DIMENSIONS, double > CellSize)
Set input data.
Definition Utils.h:30
void runBenchmarks(int numberOfCells, double timeStamp, double timeStepSize, const tarch::la::Vector< DIMENSIONS, double > cellCenter, const tarch::la::Vector< DIMENSIONS, double > cellSize)
Definition Variant2.cpp:229
tarch::logging::Log _log
This is variant 2 of the fused kernels.
Represents the faces of one cell, with a total of 2*Dim faces per cell For ADER QIn will contain the ...
tarch::la::Vector< DIMENSIONS, double > * cellSize
tarch::la::Vector< DIMENSIONS, double > * cellCentre
outType *(* QOut)[2 *DIMENSIONS]
Out values.
inType *(* QIn)[2 *DIMENSIONS]
QIn may not be const, as some kernels delete it straightaway once the input data has been handled.